commit e3b481f74e4b345854ebd6db657ec40a5aac18d3 Author: Jake Walker Date: Wed Oct 25 13:53:43 2023 +0100 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a3b5645 --- /dev/null +++ b/.gitignore @@ -0,0 +1,259 @@ +### JupyterNotebooks ### +# gitignore template for Jupyter Notebooks +# website: http://jupyter.org/ + +.ipynb_checkpoints +*/.ipynb_checkpoints/* + +# IPython +profile_default/ +ipython_config.py + +# Remove previous ipynb_checkpoints +# git rm -r .ipynb_checkpoints/ + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### macOS Patch ### +# iCloud generated files +*.icloud + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook + +# IPython + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +HF DeepRL Course/ml-agents +HF DeepRL Course/results diff --git a/HF DeepRL Course/Unit1 - Intro.ipynb b/HF DeepRL Course/Unit1 - Intro.ipynb new file mode 100644 index 0000000..408e52f --- /dev/null +++ b/HF DeepRL Course/Unit1 - Intro.ipynb @@ -0,0 +1,19 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"njb_ProuHiOe"},"source":["# Unit 1: Train your first Deep Reinforcement Learning Agent ๐Ÿค–\n","\n","![Cover](https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit1/thumbnail.jpg)\n","\n","In this notebook, you'll train your **first Deep Reinforcement Learning agent** a Lunar Lander agent that will learn to **land correctly on the Moon ๐ŸŒ•**. Using [Stable-Baselines3](https://stable-baselines3.readthedocs.io/en/master/) a Deep Reinforcement Learning library, share them with the community, and experiment with different configurations\n","\n","โฌ‡๏ธ Here is an example of what **you will achieve in just a couple of minutes.** โฌ‡๏ธ\n","\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PF46MwbZD00b"},"outputs":[],"source":["%%html\n",""]},{"cell_type":"markdown","source":["### The environment ๐ŸŽฎ\n","\n","- [LunarLander-v2](https://gymnasium.farama.org/environments/box2d/lunar_lander/)\n","\n","### The library used ๐Ÿ“š\n","\n","- [Stable-Baselines3](https://stable-baselines3.readthedocs.io/en/master/)"],"metadata":{"id":"x7oR6R-ZIbeS"}},{"cell_type":"markdown","source":["We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the Github Repo](https://github.com/huggingface/deep-rl-class/issues)."],"metadata":{"id":"OwEcFHe9RRZW"}},{"cell_type":"markdown","metadata":{"id":"4i6tjI2tHQ8j"},"source":["## Objectives of this notebook ๐Ÿ†\n","\n","At the end of the notebook, you will:\n","\n","- Be able to use **Gymnasium**, the environment library.\n","- Be able to use **Stable-Baselines3**, the deep reinforcement learning library.\n","- Be able to **push your trained agent to the Hub** with a nice video replay and an evaluation score ๐Ÿ”ฅ.\n","\n","\n"]},{"cell_type":"markdown","source":["## This notebook is from Deep Reinforcement Learning Course\n","\n","\"Deep"],"metadata":{"id":"Ff-nyJdzJPND"}},{"cell_type":"markdown","metadata":{"id":"6p5HnEefISCB"},"source":["In this free course, you will:\n","\n","- ๐Ÿ“– Study Deep Reinforcement Learning in **theory and practice**.\n","- ๐Ÿง‘โ€๐Ÿ’ป Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- ๐Ÿค– Train **agents in unique environments**\n","- ๐ŸŽ“ **Earn a certificate of completion** by completing 80% of the assignments.\n","\n","And more!\n","\n","Check ๐Ÿ“š the syllabus ๐Ÿ‘‰ https://simoninithomas.github.io/deep-rl-course\n","\n","Donโ€™t forget to **sign up to the course** (we are collecting your email to be able toย **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","The best way to keep in touch and ask questions is **to join our discord server** to exchange with the community and with us ๐Ÿ‘‰๐Ÿป https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"Y-mo_6rXIjRi"},"source":["## Prerequisites ๐Ÿ—๏ธ\n","\n","Before diving into the notebook, you need to:\n","\n","๐Ÿ”ฒ ๐Ÿ“ **[Read Unit 0](https://huggingface.co/deep-rl-course/unit0/introduction)** that gives you all the **information about the course and helps you to onboard** ๐Ÿค—\n","\n","๐Ÿ”ฒ ๐Ÿ“š **Develop an understanding of the foundations of Reinforcement learning** (MC, TD, Rewards hypothesis...) by [reading Unit 1](https://huggingface.co/deep-rl-course/unit1/introduction)."]},{"cell_type":"markdown","source":["## A small recap of Deep Reinforcement Learning ๐Ÿ“š\n","\n","\"The"],"metadata":{"id":"HoeqMnr5LuYE"}},{"cell_type":"markdown","metadata":{"id":"xcQYx9ynaFMD"},"source":["Let's do a small recap on what we learned in the first Unit:\n","\n","- Reinforcement Learning is a **computational approach to learning from actions**. We build an agent that learns from the environment by **interacting with it through trial and error** and receiving rewards (negative or positive) as feedback.\n","\n","- The goal of any RL agent is to **maximize its expected cumulative reward** (also called expected return) because RL is based on the _reward hypothesis_, which is that all goals can be described as the maximization of an expected cumulative reward.\n","\n","- The RL process is a **loop that outputs a sequence of state, action, reward, and next state**.\n","\n","- To calculate the expected cumulative reward (expected return), **we discount the rewards**: the rewards that come sooner (at the beginning of the game) are more probable to happen since they are more predictable than the long-term future reward.\n","\n","- To solve an RL problem, you want to **find an optimal policy**; the policy is the \"brain\" of your AI that will tell us what action to take given a state. The optimal one is the one that gives you the actions that max the expected return.\n","\n","There are **two** ways to find your optimal policy:\n","\n","- By **training your policy directly**: policy-based methods.\n","- By **training a value function** that tells us the expected return the agent will get at each state and use this function to define our policy: value-based methods.\n","\n","- Finally, we spoke about Deep RL because **we introduce deep neural networks to estimate the action to take (policy-based) or to estimate the value of a state (value-based) hence the name \"deep.\"**"]},{"cell_type":"markdown","source":["# Let's train our first Deep Reinforcement Learning agent and upload it to the Hub ๐Ÿš€\n","\n","## Get a certificate ๐ŸŽ“\n","\n","To validate this hands-on for the [certification process](https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process), you need to push your trained model to the Hub and **get a result of >= 200**.\n","\n","To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) and find your model, **the result = mean_reward - std of reward**\n","\n","For more information about the certification process, check this section ๐Ÿ‘‰ https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"],"metadata":{"id":"qDploC3jSH99"}},{"cell_type":"markdown","source":["## Set the GPU ๐Ÿ’ช\n","\n","- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n","\n","\"GPU"],"metadata":{"id":"HqzznTzhNfAC"}},{"cell_type":"markdown","metadata":{"id":"38HBd3t1SHJ8"},"source":["- `Hardware Accelerator > GPU`\n","\n","\"GPU"]},{"cell_type":"markdown","metadata":{"id":"jeDAH0h0EBiG"},"source":["## Install dependencies and create a virtual screen ๐Ÿ”ฝ\n","\n","The first step is to install the dependencies, weโ€™ll install multiple ones.\n","\n","- `gymnasium[box2d]`: Contains the LunarLander-v2 environment ๐ŸŒ›\n","- `stable-baselines3[extra]`: The deep reinforcement learning library.\n","- `huggingface_sb3`: Additional code for Stable-baselines3 to load and upload models from the Hugging Face ๐Ÿค— Hub.\n","\n","To make things easier, we created a script to install all these dependencies."]},{"cell_type":"code","source":["!apt install swig cmake"],"metadata":{"id":"yQIGLPDkGhgG"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"9XaULfDZDvrC"},"outputs":[],"source":["!pip install -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt"]},{"cell_type":"markdown","source":["During the notebook, we'll need to generate a replay video. To do so, with colab, **we need to have a virtual screen to be able to render the environment** (and thus record the frames).\n","\n","Hence the following cell will install virtual screen libraries and create and run a virtual screen ๐Ÿ–ฅ"],"metadata":{"id":"BEKeXQJsQCYm"}},{"cell_type":"code","source":["!sudo apt-get update\n","!sudo apt-get install -y python3-opengl\n","!apt install ffmpeg\n","!apt install xvfb\n","!pip3 install pyvirtualdisplay"],"metadata":{"id":"j5f2cGkdP-mb"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["To make sure the new installed libraries are used, **sometimes it's required to restart the notebook runtime**. The next cell will force the **runtime to crash, so you'll need to connect again and run the code starting from here**. Thanks to this trick, **we will be able to run our virtual screen.**"],"metadata":{"id":"TCwBTAwAW9JJ"}},{"cell_type":"code","source":["import os\n","os.kill(os.getpid(), 9)"],"metadata":{"id":"cYvkbef7XEMi"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Virtual display\n","from pyvirtualdisplay import Display\n","\n","virtual_display = Display(visible=0, size=(1400, 900))\n","virtual_display.start()"],"metadata":{"id":"BE5JWP5rQIKf"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"wrgpVFqyENVf"},"source":["## Import the packages ๐Ÿ“ฆ\n","\n","One additional library we import is huggingface_hub **to be able to upload and download trained models from the hub**.\n","\n","\n","The Hugging Face Hub ๐Ÿค— works as a central place where anyone can share and explore models and datasets. It has versioning, metrics, visualizations and other features that will allow you to easily collaborate with others.\n","\n","You can see here all the Deep reinforcement Learning models available here๐Ÿ‘‰ https://huggingface.co/models?pipeline_tag=reinforcement-learning&sort=downloads\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cygWLPGsEQ0m"},"outputs":[],"source":["import gymnasium\n","\n","from huggingface_sb3 import load_from_hub, package_to_hub\n","from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.\n","\n","from stable_baselines3 import PPO\n","from stable_baselines3.common.env_util import make_vec_env\n","from stable_baselines3.common.evaluation import evaluate_policy\n","from stable_baselines3.common.monitor import Monitor"]},{"cell_type":"markdown","metadata":{"id":"MRqRuRUl8CsB"},"source":["## Understand Gymnasium and how it works ๐Ÿค–\n","\n","๐Ÿ‹ The library containing our environment is called Gymnasium.\n","**You'll use Gymnasium a lot in Deep Reinforcement Learning.**\n","\n","Gymnasium is the **new version of Gym library** [maintained by the Farama Foundation](https://farama.org/).\n","\n","The Gymnasium library provides two things:\n","\n","- An interface that allows you to **create RL environments**.\n","- A **collection of environments** (gym-control, atari, box2D...).\n","\n","Let's look at an example, but first let's recall the RL loop.\n","\n","\"The"]},{"cell_type":"markdown","metadata":{"id":"-TzNN0bQ_j-3"},"source":["At each step:\n","- Our Agent receivesย a **state (S0)**ย from theย **Environment**ย โ€” we receive the first frame of our game (Environment).\n","- Based on thatย **state (S0),**ย the Agent takes anย **action (A0)**ย โ€” our Agent will move to the right.\n","- The environment transitions to aย **new**ย **state (S1)**ย โ€” new frame.\n","- The environment gives someย **reward (R1)**ย to the Agent โ€” weโ€™re not deadย *(Positive Reward +1)*.\n","\n","\n","With Gymnasium:\n","\n","1๏ธโƒฃ We create our environment using `gymnasium.make()`\n","\n","2๏ธโƒฃ We reset the environment to its initial state with `observation = env.reset()`\n","\n","At each step:\n","\n","3๏ธโƒฃ Get an action using our model (in our example we take a random action)\n","\n","4๏ธโƒฃ Using `env.step(action)`, we perform this action in the environment and get\n","- `observation`: The new state (st+1)\n","- `reward`: The reward we get after executing the action\n","- `terminated`: Indicates if the episode terminated (agent reach the terminal state)\n","- `truncated`: Introduced with this new version, it indicates a timelimit or if an agent go out of bounds of the environment for instance.\n","- `info`: A dictionary that provides additional information (depends on the environment).\n","\n","For more explanations check this ๐Ÿ‘‰ https://gymnasium.farama.org/api/env/#gymnasium.Env.step\n","\n","If the episode is terminated:\n","- We reset the environment to its initial state with `observation = env.reset()`\n","\n","**Let's look at an example!** Make sure to read the code\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"w7vOFlpA_ONz"},"outputs":[],"source":["import gymnasium as gym\n","\n","# First, we create our environment called LunarLander-v2\n","env = gym.make(\"LunarLander-v2\")\n","\n","# Then we reset this environment\n","observation, info = env.reset()\n","\n","for _ in range(20):\n"," # Take a random action\n"," action = env.action_space.sample()\n"," print(\"Action taken:\", action)\n","\n"," # Do this action in the environment and get\n"," # next_state, reward, terminated, truncated and info\n"," observation, reward, terminated, truncated, info = env.step(action)\n","\n"," # If the game is terminated (in our case we land, crashed) or truncated (timeout)\n"," if terminated or truncated:\n"," # Reset the environment\n"," print(\"Environment is reset\")\n"," observation, info = env.reset()\n","\n","env.close()"]},{"cell_type":"markdown","metadata":{"id":"XIrKGGSlENZB"},"source":["## Create the LunarLander environment ๐ŸŒ› and understand how it works\n","\n","### [The environment ๐ŸŽฎ](https://gymnasium.farama.org/environments/box2d/lunar_lander/)\n","\n","In this first tutorial, weโ€™re going to train our agent, a [Lunar Lander](https://gymnasium.farama.org/environments/box2d/lunar_lander/), **to land correctly on the moon**. To do that, the agent needs to learn **to adapt its speed and position (horizontal, vertical, and angular) to land correctly.**\n","\n","---\n","\n","\n","๐Ÿ’ก A good habit when you start to use an environment is to check its documentation\n","\n","๐Ÿ‘‰ https://gymnasium.farama.org/environments/box2d/lunar_lander/\n","\n","---\n"]},{"cell_type":"markdown","metadata":{"id":"poLBgRocF9aT"},"source":["Let's see what the Environment looks like:\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZNPG0g_UGCfh"},"outputs":[],"source":["# We create our environment with gym.make(\"\")\n","env = gym.make(\"LunarLander-v2\")\n","env.reset()\n","print(\"_____OBSERVATION SPACE_____ \\n\")\n","print(\"Observation Space Shape\", env.observation_space.shape)\n","print(\"Sample observation\", env.observation_space.sample()) # Get a random observation"]},{"cell_type":"markdown","metadata":{"id":"2MXc15qFE0M9"},"source":["We see with `Observation Space Shape (8,)` that the observation is a vector of size 8, where each value contains different information about the lander:\n","- Horizontal pad coordinate (x)\n","- Vertical pad coordinate (y)\n","- Horizontal speed (x)\n","- Vertical speed (y)\n","- Angle\n","- Angular speed\n","- If the left leg contact point has touched the land (boolean)\n","- If the right leg contact point has touched the land (boolean)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"We5WqOBGLoSm"},"outputs":[],"source":["print(\"\\n _____ACTION SPACE_____ \\n\")\n","print(\"Action Space Shape\", env.action_space.n)\n","print(\"Action Space Sample\", env.action_space.sample()) # Take a random action"]},{"cell_type":"markdown","metadata":{"id":"MyxXwkI2Magx"},"source":["The action space (the set of possible actions the agent can take) is discrete with 4 actions available ๐ŸŽฎ:\n","\n","- Action 0: Do nothing,\n","- Action 1: Fire left orientation engine,\n","- Action 2: Fire the main engine,\n","- Action 3: Fire right orientation engine.\n","\n","Reward function (the function that will gives a reward at each timestep) ๐Ÿ’ฐ:\n","\n","After every step a reward is granted. The total reward of an episode is the **sum of the rewards for all the steps within that episode**.\n","\n","For each step, the reward:\n","\n","- Is increased/decreased the closer/further the lander is to the landing pad.\n","- Is increased/decreased the slower/faster the lander is moving.\n","- Is decreased the more the lander is tilted (angle not horizontal).\n","- Is increased by 10 points for each leg that is in contact with the ground.\n","- Is decreased by 0.03 points each frame a side engine is firing.\n","- Is decreased by 0.3 points each frame the main engine is firing.\n","\n","The episode receive an **additional reward of -100 or +100 points for crashing or landing safely respectively.**\n","\n","An episode is **considered a solution if it scores at least 200 points.**"]},{"cell_type":"markdown","metadata":{"id":"dFD9RAFjG8aq"},"source":["#### Vectorized Environment\n","\n","- We create a vectorized environment (a method for stacking multiple independent environments into a single environment) of 16 environments, this way, **we'll have more diverse experiences during the training.**"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"99hqQ_etEy1N"},"outputs":[],"source":["# Create the environment\n","env = make_vec_env('LunarLander-v2', n_envs=16)"]},{"cell_type":"markdown","metadata":{"id":"VgrE86r5E5IK"},"source":["## Create the Model ๐Ÿค–\n","- We have studied our environment and we understood the problem: **being able to land the Lunar Lander to the Landing Pad correctly by controlling left, right and main orientation engine**. Now let's build the algorithm we're going to use to solve this Problem ๐Ÿš€.\n","\n","- To do so, we're going to use our first Deep RL library, [Stable Baselines3 (SB3)](https://stable-baselines3.readthedocs.io/en/master/).\n","\n","- SB3 is a set of **reliable implementations of reinforcement learning algorithms in PyTorch**.\n","\n","---\n","\n","๐Ÿ’ก A good habit when using a new library is to dive first on the documentation: https://stable-baselines3.readthedocs.io/en/master/ and then try some tutorials.\n","\n","----"]},{"cell_type":"markdown","source":["\"Stable"],"metadata":{"id":"HLlClRW37Q7e"}},{"cell_type":"markdown","metadata":{"id":"HV4yiUM_9_Ka"},"source":["To solve this problem, we're going to use SB3 **PPO**. [PPO (aka Proximal Policy Optimization) is one of the SOTA (state of the art) Deep Reinforcement Learning algorithms that you'll study during this course](https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#example%5D).\n","\n","PPO is a combination of:\n","- *Value-based reinforcement learning method*: learning an action-value function that will tell us the **most valuable action to take given a state and action**.\n","- *Policy-based reinforcement learning method*: learning a policy that will **give us a probability distribution over actions**."]},{"cell_type":"markdown","metadata":{"id":"5qL_4HeIOrEJ"},"source":["Stable-Baselines3 is easy to set up:\n","\n","1๏ธโƒฃ You **create your environment** (in our case it was done above)\n","\n","2๏ธโƒฃ You define the **model you want to use and instantiate this model** `model = PPO(\"MlpPolicy\")`\n","\n","3๏ธโƒฃ You **train the agent** with `model.learn` and define the number of training timesteps\n","\n","```\n","# Create environment\n","env = gym.make('LunarLander-v2')\n","\n","# Instantiate the agent\n","model = PPO('MlpPolicy', env, verbose=1)\n","# Train the agent\n","model.learn(total_timesteps=int(2e5))\n","```\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"nxI6hT1GE4-A"},"outputs":[],"source":["# TODO: Define a PPO MlpPolicy architecture\n","# We use MultiLayerPerceptron (MLPPolicy) because the input is a vector,\n","# if we had frames as input we would use CnnPolicy\n","model ="]},{"cell_type":"markdown","metadata":{"id":"QAN7B0_HCVZC"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"543OHYDfcjK4"},"outputs":[],"source":["# SOLUTION\n","# We added some parameters to accelerate the training\n","model = PPO(\n"," policy = 'MlpPolicy',\n"," env = env,\n"," n_steps = 1024,\n"," batch_size = 64,\n"," n_epochs = 4,\n"," gamma = 0.999,\n"," gae_lambda = 0.98,\n"," ent_coef = 0.01,\n"," verbose=1)"]},{"cell_type":"markdown","metadata":{"id":"ClJJk88yoBUi"},"source":["## Train the PPO agent ๐Ÿƒ\n","- Let's train our agent for 1,000,000 timesteps, don't forget to use GPU on Colab. It will take approximately ~20min, but you can use fewer timesteps if you just want to try it out.\n","- During the training, take a โ˜• break you deserved it ๐Ÿค—"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"qKnYkNiVp89p"},"outputs":[],"source":["# TODO: Train it for 1,000,000 timesteps\n","\n","# TODO: Specify file name for model and save the model to file\n","model_name = \"\"\n"]},{"cell_type":"markdown","metadata":{"id":"1bQzQ-QcE3zo"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"poBCy9u_csyR"},"outputs":[],"source":["# SOLUTION\n","# Train it for 1,000,000 timesteps\n","model.learn(total_timesteps=1000000)\n","# Save the model\n","model_name = \"ppo-LunarLander-v2\"\n","model.save(model_name)"]},{"cell_type":"markdown","metadata":{"id":"BY_HuedOoISR"},"source":["## Evaluate the agent ๐Ÿ“ˆ\n","- Remember to wrap the environment in a [Monitor](https://stable-baselines3.readthedocs.io/en/master/common/monitor.html).\n","- Now that our Lunar Lander agent is trained ๐Ÿš€, we need to **check its performance**.\n","- Stable-Baselines3 provides a method to do that: `evaluate_policy`.\n","- To fill that part you need to [check the documentation](https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#basic-usage-training-saving-loading)\n","- In the next step, we'll see **how to automatically evaluate and share your agent to compete in a leaderboard, but for now let's do it ourselves**\n","\n","\n","๐Ÿ’ก When you evaluate your agent, you should not use your training environment but create an evaluation environment."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"yRpno0glsADy"},"outputs":[],"source":["# TODO: Evaluate the agent\n","# Create a new environment for evaluation\n","eval_env =\n","\n","# Evaluate the model with 10 evaluation episodes and deterministic=True\n","mean_reward, std_reward =\n","\n","# Print the results\n","\n"]},{"cell_type":"markdown","metadata":{"id":"BqPKw3jt_pG5"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"zpz8kHlt_a_m"},"outputs":[],"source":["#@title\n","eval_env = Monitor(gym.make(\"LunarLander-v2\"))\n","mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)\n","print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")"]},{"cell_type":"markdown","metadata":{"id":"reBhoODwcXfr"},"source":["- In my case, I got a mean reward is `200.20 +/- 20.80` after training for 1 million steps, which means that our lunar lander agent is ready to land on the moon ๐ŸŒ›๐Ÿฅณ."]},{"cell_type":"markdown","metadata":{"id":"IK_kR78NoNb2"},"source":["## Publish our trained model on the Hub ๐Ÿ”ฅ\n","Now that we saw we got good results after the training, we can publish our trained model on the hub ๐Ÿค— with one line of code.\n","\n","๐Ÿ“š The libraries documentation ๐Ÿ‘‰ https://github.com/huggingface/huggingface_sb3/tree/main#hugging-face--x-stable-baselines3-v20\n","\n","Here's an example of a Model Card (with Space Invaders):"]},{"cell_type":"markdown","metadata":{"id":"Gs-Ew7e1gXN3"},"source":["By using `package_to_hub` **you evaluate, record a replay, generate a model card of your agent and push it to the hub**.\n","\n","This way:\n","- You can **showcase our work** ๐Ÿ”ฅ\n","- You can **visualize your agent playing** ๐Ÿ‘€\n","- You can **share with the community an agent that others can use** ๐Ÿ’พ\n","- You can **access a leaderboard ๐Ÿ† to see how well your agent is performing compared to your classmates** ๐Ÿ‘‰ https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard\n"]},{"cell_type":"markdown","metadata":{"id":"JquRrWytA6eo"},"source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1๏ธโƒฃ (If it's not already done) create an account on Hugging Face โžก https://huggingface.co/join\n","\n","2๏ธโƒฃ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","\"Create\n","\n","- Copy the token\n","- Run the cell below and paste the token"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"GZiFBBlzxzxY"},"outputs":[],"source":["notebook_login()\n","!git config --global credential.helper store"]},{"cell_type":"markdown","metadata":{"id":"_tsf2uv0g_4p"},"source":["If you don't want to use a Google Colab or a Jupyter Notebook, you need to use this command instead: `huggingface-cli login`"]},{"cell_type":"markdown","metadata":{"id":"FGNh9VsZok0i"},"source":["3๏ธโƒฃ We're now ready to push our trained agent to the ๐Ÿค— Hub ๐Ÿ”ฅ using `package_to_hub()` function"]},{"cell_type":"markdown","metadata":{"id":"Ay24l6bqFF18"},"source":["Let's fill the `package_to_hub` function:\n","- `model`: our trained model.\n","- `model_name`: the name of the trained model that we defined in `model_save`\n","- `model_architecture`: the model architecture we used, in our case PPO\n","- `env_id`: the name of the environment, in our case `LunarLander-v2`\n","- `eval_env`: the evaluation environment defined in eval_env\n","- `repo_id`: the name of the Hugging Face Hub Repository that will be created/updated `(repo_id = {username}/{repo_name})`\n","\n","๐Ÿ’ก **A good name is {username}/{model_architecture}-{env_id}**\n","\n","- `commit_message`: message of the commit"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"JPG7ofdGIHN8"},"outputs":[],"source":["import gymnasium as gym\n","from stable_baselines3.common.vec_env import DummyVecEnv\n","from stable_baselines3.common.env_util import make_vec_env\n","\n","from huggingface_sb3 import package_to_hub\n","\n","## TODO: Define a repo_id\n","## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n","repo_id =\n","\n","# TODO: Define the name of the environment\n","env_id =\n","\n","# Create the evaluation env and set the render_mode=\"rgb_array\"\n","eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode=\"rgb_array\"))])\n","\n","\n","# TODO: Define the model architecture we used\n","model_architecture = \"\"\n","\n","## TODO: Define the commit message\n","commit_message = \"\"\n","\n","# method save, evaluate, generate a model card and record a replay video of your agent before pushing the repo to the hub\n","package_to_hub(model=model, # Our trained model\n"," model_name=model_name, # The name of our trained model\n"," model_architecture=model_architecture, # The model architecture we used: in our case PPO\n"," env_id=env_id, # Name of the environment\n"," eval_env=eval_env, # Evaluation Environment\n"," repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n"," commit_message=commit_message)"]},{"cell_type":"markdown","metadata":{"id":"Avf6gufJBGMw"},"source":["#### Solution\n"]},{"cell_type":"code","source":["import gymnasium as gym\n","\n","from stable_baselines3 import PPO\n","from stable_baselines3.common.vec_env import DummyVecEnv\n","from stable_baselines3.common.env_util import make_vec_env\n","\n","from huggingface_sb3 import package_to_hub\n","\n","# PLACE the variables you've just defined two cells above\n","# Define the name of the environment\n","env_id = \"LunarLander-v2\"\n","\n","# TODO: Define the model architecture we used\n","model_architecture = \"PPO\"\n","\n","## Define a repo_id\n","## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n","## CHANGE WITH YOUR REPO ID\n","repo_id = \"ThomasSimonini/ppo-LunarLander-v2\" # Change with your repo id, you can't push with mine ๐Ÿ˜„\n","\n","## Define the commit message\n","commit_message = \"Upload PPO LunarLander-v2 trained agent\"\n","\n","# Create the evaluation env and set the render_mode=\"rgb_array\"\n","eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode=\"rgb_array\")])\n","\n","# PLACE the package_to_hub function you've just filled here\n","package_to_hub(model=model, # Our trained model\n"," model_name=model_name, # The name of our trained model\n"," model_architecture=model_architecture, # The model architecture we used: in our case PPO\n"," env_id=env_id, # Name of the environment\n"," eval_env=eval_env, # Evaluation Environment\n"," repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n"," commit_message=commit_message)\n"],"metadata":{"id":"I2E--IJu8JYq"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["Congrats ๐Ÿฅณ you've just trained and uploaded your first Deep Reinforcement Learning agent. The script above should have displayed a link to a model repository such as https://huggingface.co/osanseviero/test_sb3. When you go to this link, you can:\n","* See a video preview of your agent at the right.\n","* Click \"Files and versions\" to see all the files in the repository.\n","* Click \"Use in stable-baselines3\" to get a code snippet that shows how to load the model.\n","* A model card (`README.md` file) which gives a description of the model\n","\n","Under the hood, the Hub uses git-based repositories (don't worry if you don't know what git is), which means you can update the model with new versions as you experiment and improve your agent.\n","\n","Compare the results of your LunarLander-v2 with your classmates using the leaderboard ๐Ÿ† ๐Ÿ‘‰ https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard"],"metadata":{"id":"T79AEAWEFIxz"}},{"cell_type":"markdown","metadata":{"id":"9nWnuQHRfFRa"},"source":["## Load a saved LunarLander model from the Hub ๐Ÿค—\n","Thanks to [ironbar](https://github.com/ironbar) for the contribution.\n","\n","Loading a saved model from the Hub is really easy.\n","\n","You go to https://huggingface.co/models?library=stable-baselines3 to see the list of all the Stable-baselines3 saved models.\n","1. You select one and copy its repo_id\n","\n","\"Copy-id\"/"]},{"cell_type":"markdown","metadata":{"id":"hNPLJF2bfiUw"},"source":["2. Then we just need to use load_from_hub with:\n","- The repo_id\n","- The filename: the saved model inside the repo and its extension (*.zip)"]},{"cell_type":"markdown","source":["Because the model I download from the Hub was trained with Gym (the former version of Gymnasium) we need to install shimmy a API conversion tool that will help us to run the environment correctly.\n","\n","Shimmy Documentation: https://github.com/Farama-Foundation/Shimmy"],"metadata":{"id":"bhb9-NtsinKB"}},{"cell_type":"code","source":["!pip install shimmy"],"metadata":{"id":"03WI-bkci1kH"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"oj8PSGHJfwz3"},"outputs":[],"source":["from huggingface_sb3 import load_from_hub\n","repo_id = \"Classroom-workshop/assignment2-omar\" # The repo_id\n","filename = \"ppo-LunarLander-v2.zip\" # The model filename.zip\n","\n","# When the model was trained on Python 3.8 the pickle protocol is 5\n","# But Python 3.6, 3.7 use protocol 4\n","# In order to get compatibility we need to:\n","# 1. Install pickle5 (we done it at the beginning of the colab)\n","# 2. Create a custom empty object we pass as parameter to PPO.load()\n","custom_objects = {\n"," \"learning_rate\": 0.0,\n"," \"lr_schedule\": lambda _: 0.0,\n"," \"clip_range\": lambda _: 0.0,\n","}\n","\n","checkpoint = load_from_hub(repo_id, filename)\n","model = PPO.load(checkpoint, custom_objects=custom_objects, print_system_info=True)"]},{"cell_type":"markdown","metadata":{"id":"Fs0Y-qgPgLUf"},"source":["Let's evaluate this agent:"]},{"cell_type":"code","source":["#@title\n","eval_env = Monitor(gym.make(\"LunarLander-v2\"))\n","mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)\n","print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")"],"metadata":{"id":"PAEVwK-aahfx"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"BQAwLnYFPk-s"},"source":["## Some additional challenges ๐Ÿ†\n","The best way to learn **is to try things by your own**! As you saw, the current agent is not doing great. As a first suggestion, you can train for more steps. With 1,000,000 steps, we saw some great results!\n","\n","In the [Leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) you will find your agents. Can you get to the top?\n","\n","Here are some ideas to achieve so:\n","* Train more steps\n","* Try different hyperparameters for `PPO`. You can see them at https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#parameters.\n","* Check the [Stable-Baselines3 documentation](https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html) and try another model such as DQN.\n","* **Push your new trained model** on the Hub ๐Ÿ”ฅ\n","\n","**Compare the results of your LunarLander-v2 with your classmates** using the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) ๐Ÿ†\n","\n","Is moon landing too boring for you? Try to **change the environment**, why not use MountainCar-v0, CartPole-v1 or CarRacing-v0? Check how they work [using the gym documentation](https://www.gymlibrary.dev/) and have fun ๐ŸŽ‰."]},{"cell_type":"markdown","metadata":{"id":"9lM95-dvmif8"},"source":["________________________________________________________________________\n","Congrats on finishing this chapter! That was the biggest one, **and there was a lot of information.**\n","\n","If youโ€™re still feel confused with all these elements...it's totally normal! **This was the same for me and for all people who studied RL.**\n","\n","Take time to really **grasp the material before continuing and try the additional challenges**. Itโ€™s important to master these elements and have a solid foundations.\n","\n","Naturally, during the course, weโ€™re going to dive deeper into these concepts but **itโ€™s better to have a good understanding of them now before diving into the next chapters.**\n","\n"]},{"cell_type":"markdown","metadata":{"id":"BjLhT70TEZIn"},"source":["Next time, in the bonus unit 1, you'll train Huggy the Dog to fetch the stick.\n","\n","\"Huggy\"/\n","\n","## Keep learning, stay awesome ๐Ÿค—"]}], "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + },"nbformat":4,"nbformat_minor":0} diff --git a/HF DeepRL Course/Unit2 - Q-Learning.ipynb b/HF DeepRL Course/Unit2 - Q-Learning.ipynb new file mode 100644 index 0000000..98e83fa --- /dev/null +++ b/HF DeepRL Course/Unit2 - Q-Learning.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"njb_ProuHiOe"},"source":["# Unit 2: Q-Learning with FrozenLake-v1 โ›„ and Taxi-v3 ๐Ÿš•\n","\n","\"Unit\n","\n","In this notebook, **you'll code your first Reinforcement Learning agent from scratch** to play FrozenLake โ„๏ธ using Q-Learning, share it with the community, and experiment with different configurations.\n","\n","โฌ‡๏ธ Here is an example of what **you will achieve in just a couple of minutes.** โฌ‡๏ธ\n"]},{"cell_type":"markdown","metadata":{"id":"vRU_vXBrl1Jx"},"source":["\"Environments\"/"]},{"cell_type":"markdown","source":["###๐ŸŽฎ Environments:\n","\n","- [FrozenLake-v1](https://gymnasium.farama.org/environments/toy_text/frozen_lake/)\n","- [Taxi-v3](https://gymnasium.farama.org/environments/toy_text/taxi/)\n","\n","###๐Ÿ“š RL-Library:\n","\n","- Python and NumPy\n","- [Gymnasium](https://gymnasium.farama.org/)\n","\n","We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the GitHub Repo](https://github.com/huggingface/deep-rl-class/issues)."],"metadata":{"id":"DPTBOv9HYLZ2"}},{"cell_type":"markdown","metadata":{"id":"4i6tjI2tHQ8j"},"source":["## Objectives of this notebook ๐Ÿ†\n","\n","At the end of the notebook, you will:\n","\n","- Be able to use **Gymnasium**, the environment library.\n","- Be able to code a Q-Learning agent from scratch.\n","- Be able to **push your trained agent and the code to the Hub** with a nice video replay and an evaluation score ๐Ÿ”ฅ.\n","\n","\n"]},{"cell_type":"markdown","source":["## This notebook is from the Deep Reinforcement Learning Course\n","\n","\"Deep"],"metadata":{"id":"viNzVbVaYvY3"}},{"cell_type":"markdown","metadata":{"id":"6p5HnEefISCB"},"source":["In this free course, you will:\n","\n","- ๐Ÿ“– Study Deep Reinforcement Learning in **theory and practice**.\n","- ๐Ÿง‘โ€๐Ÿ’ป Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- ๐Ÿค– Train **agents in unique environments**\n","\n","And more check ๐Ÿ“š the syllabus ๐Ÿ‘‰ https://simoninithomas.github.io/deep-rl-course\n","\n","Donโ€™t forget to **sign up to the course** (we are collecting your email to be able toย **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","\n","The best way to keep in touch is to join our discord server to exchange with the community and with us ๐Ÿ‘‰๐Ÿป https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"Y-mo_6rXIjRi"},"source":["## Prerequisites ๐Ÿ—๏ธ\n","\n","Before diving into the notebook, you need to:\n","\n","๐Ÿ”ฒ ๐Ÿ“š **Study [Q-Learning by reading Unit 2](https://huggingface.co/deep-rl-course/unit2/introduction)** ๐Ÿค— "]},{"cell_type":"markdown","metadata":{"id":"f2ONOODsyrMU"},"source":["## A small recap of Q-Learning"]},{"cell_type":"markdown","metadata":{"id":"V68VveLacfxJ"},"source":["*Q-Learning* **is the RL algorithm that**:\n","\n","- Trains *Q-Function*, an **action-value function** that encoded, in internal memory, by a *Q-table* **that contains all the state-action pair values.**\n","\n","- Given a state and action, our Q-Function **will search the Q-table for the corresponding value.**\n"," \n","\"Q\n","\n","- When the training is done,**we have an optimal Q-Function, so an optimal Q-Table.**\n"," \n","- And if we **have an optimal Q-function**, we\n","have an optimal policy, since we **know for, each state, the best action to take.**\n","\n","\"Link\n","\n","\n","But, in the beginning,ย our **Q-Table is useless since it gives arbitrary value for each state-action pairย (most of the time we initialize the Q-Table to 0 values)**. But, as weโ€™llย explore the environment and update our Q-Table it will give us better and better approximations\n","\n","\"q-learning.jpeg\"\n","\n","This is the Q-Learning pseudocode:\n","\n","\"Q-Learning\"\n"]},{"cell_type":"markdown","source":["# Let's code our first Reinforcement Learning algorithm ๐Ÿš€"],"metadata":{"id":"HEtx8Y8MqKfH"}},{"cell_type":"markdown","source":["To validate this hands-on for the [certification process](https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process), you need to push your trained Taxi model to the Hub and **get a result of >= 4.5**.\n","\n","To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) and find your model, **the result = mean_reward - std of reward**\n","\n","For more information about the certification process, check this section ๐Ÿ‘‰ https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"],"metadata":{"id":"Kdxb1IhzTn0v"}},{"cell_type":"markdown","source":["## Install dependencies and create a virtual display ๐Ÿ”ฝ\n","\n","In the notebook, we'll need to generate a replay video. To do so, with Colab, **we need to have a virtual screen to render the environment** (and thus record the frames).\n","\n","Hence the following cell will install the libraries and create and run a virtual screen ๐Ÿ–ฅ\n","\n","Weโ€™ll install multiple ones:\n","\n","- `gymnasium`: Contains the FrozenLake-v1 โ›„ and Taxi-v3 ๐Ÿš• environments.\n","- `pygame`: Used for the FrozenLake-v1 and Taxi-v3 UI.\n","- `numpy`: Used for handling our Q-table.\n","\n","The Hugging Face Hub ๐Ÿค— works as a central place where anyone can share and explore models and datasets. It has versioning, metrics, visualizations and other features that will allow you to easily collaborate with others.\n","\n","You can see here all the Deep RL models available (if they use Q Learning) here ๐Ÿ‘‰ https://huggingface.co/models?other=q-learning"],"metadata":{"id":"4gpxC1_kqUYe"}},{"cell_type":"code","execution_count":25,"metadata":{"id":"9XaULfDZDvrC","executionInfo":{"status":"ok","timestamp":1696092114072,"user_tz":-60,"elapsed":7348,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["!pip install -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit2/requirements-unit2.txt"]},{"cell_type":"code","source":["!sudo apt-get update\n","!sudo apt-get install -y python3-opengl\n","!apt install ffmpeg xvfb\n","!pip3 install pyvirtualdisplay"],"metadata":{"id":"n71uTX7qqzz2","executionInfo":{"status":"ok","timestamp":1696092131729,"user_tz":-60,"elapsed":17662,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"execution_count":26,"outputs":[]},{"cell_type":"markdown","source":["To make sure the new installed libraries are used, **sometimes it's required to restart the notebook runtime**. The next cell will force the **runtime to crash, so you'll need to connect again and run the code starting from here**. Thanks to this trick, **we will be able to run our virtual screen.**"],"metadata":{"id":"K6XC13pTfFiD"}},{"cell_type":"code","source":["import os\n","os.kill(os.getpid(), 9)"],"metadata":{"id":"3kuZbWAkfHdg"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Virtual display\n","from pyvirtualdisplay import Display\n","\n","virtual_display = Display(visible=0, size=(1400, 900))\n","virtual_display.start()"],"metadata":{"id":"DaY1N4dBrabi","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696092139881,"user_tz":-60,"elapsed":400,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"e7e3ed15-9247-49da-d437-2bd4abcb4772"},"execution_count":1,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{},"execution_count":1}]},{"cell_type":"markdown","metadata":{"id":"W-7f-Swax_9x"},"source":["## Import the packages ๐Ÿ“ฆ\n","\n","In addition to the installed libraries, we also use:\n","\n","- `random`: To generate random numbers (that will be useful for epsilon-greedy policy).\n","- `imageio`: To generate a replay video."]},{"cell_type":"code","execution_count":2,"metadata":{"id":"VcNvOAQlysBJ","executionInfo":{"status":"ok","timestamp":1696092143580,"user_tz":-60,"elapsed":485,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["import numpy as np\n","import gymnasium as gym\n","import random\n","import imageio\n","import os\n","import tqdm\n","\n","import pickle5 as pickle\n","from tqdm.notebook import tqdm"]},{"cell_type":"markdown","metadata":{"id":"xp4-bXKIy1mQ"},"source":["We're now ready to code our Q-Learning algorithm ๐Ÿ”ฅ"]},{"cell_type":"markdown","metadata":{"id":"xya49aNJWVvv"},"source":["# Part 1: Frozen Lake โ›„ (non slippery version)"]},{"cell_type":"markdown","metadata":{"id":"NAvihuHdy9tw"},"source":["## Create and understand [FrozenLake environment โ›„]((https://gymnasium.farama.org/environments/toy_text/frozen_lake/)\n","---\n","\n","๐Ÿ’ก A good habit when you start to use an environment is to check its documentation\n","\n","๐Ÿ‘‰ https://gymnasium.farama.org/environments/toy_text/frozen_lake/\n","\n","---\n","\n","We're going to train our Q-Learning agent **to navigate from the starting state (S) to the goal state (G) by walking only on frozen tiles (F) and avoid holes (H)**.\n","\n","We can have two sizes of environment:\n","\n","- `map_name=\"4x4\"`: a 4x4 grid version\n","- `map_name=\"8x8\"`: a 8x8 grid version\n","\n","\n","The environment has two modes:\n","\n","- `is_slippery=False`: The agent always moves **in the intended direction** due to the non-slippery nature of the frozen lake (deterministic).\n","- `is_slippery=True`: The agent **may not always move in the intended direction** due to the slippery nature of the frozen lake (stochastic)."]},{"cell_type":"markdown","metadata":{"id":"UaW_LHfS0PY2"},"source":["For now let's keep it simple with the 4x4 map and non-slippery.\n","We add a parameter called `render_mode` that specifies how the environment should be visualised. In our case because we **want to record a video of the environment at the end, we need to set render_mode to rgb_array**.\n","\n","As [explained in the documentation](https://gymnasium.farama.org/api/env/#gymnasium.Env.render) โ€œrgb_arrayโ€: Return a single frame representing the current state of the environment. A frame is a np.ndarray with shape (x, y, 3) representing RGB values for an x-by-y pixel image."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"IzJnb8O3y8up"},"outputs":[],"source":["# Create the FrozenLake-v1 environment using 4x4 map and non-slippery version and render_mode=\"rgb_array\"\n","env = gym.make(\"FrozenLake-v1\", map_name=\"4x4\", is_slippery=False, render_mode=\"rgb_array\")"]},{"cell_type":"markdown","metadata":{"id":"Ji_UrI5l2zzn"},"source":["### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jNxUbPMP0akP"},"outputs":[],"source":["env = gym.make(\"FrozenLake-v1\", map_name=\"4x4\", is_slippery=False, render_mode=\"rgb_array\")"]},{"cell_type":"markdown","metadata":{"id":"KASNViqL4tZn"},"source":["You can create your own custom grid like this:\n","\n","```python\n","desc=[\"SFFF\", \"FHFH\", \"FFFH\", \"HFFG\"]\n","gym.make('FrozenLake-v1', desc=desc, is_slippery=True)\n","```\n","\n","but we'll use the default environment for now."]},{"cell_type":"markdown","metadata":{"id":"SXbTfdeJ1Xi9"},"source":["### Let's see what the Environment looks like:\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ZNPG0g_UGCfh","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696079802454,"user_tz":-60,"elapsed":209,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"6e22a837-f2de-4e30-924d-851cbac15f7d"},"outputs":[{"output_type":"stream","name":"stdout","text":["_____OBSERVATION SPACE_____ \n","\n","Observation Space Discrete(16)\n","Sample observation 6\n"]}],"source":["# We create our environment with gym.make(\"\")- `is_slippery=False`: The agent always moves in the intended direction due to the non-slippery nature of the frozen lake (deterministic).\n","print(\"_____OBSERVATION SPACE_____ \\n\")\n","print(\"Observation Space\", env.observation_space)\n","print(\"Sample observation\", env.observation_space.sample()) # Get a random observation"]},{"cell_type":"markdown","metadata":{"id":"2MXc15qFE0M9"},"source":["We see with `Observation Space Shape Discrete(16)` that the observation is an integer representing the **agentโ€™s current position as current_row * ncols + current_col (where both the row and col start at 0)**.\n","\n","For example, the goal position in the 4x4 map can be calculated as follows: 3 * 4 + 3 = 15. The number of possible observations is dependent on the size of the map. **For example, the 4x4 map has 16 possible observations.**\n","\n","\n","For instance, this is what state = 0 looks like:\n","\n","\"FrozenLake\""]},{"cell_type":"code","execution_count":null,"metadata":{"id":"We5WqOBGLoSm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696079806433,"user_tz":-60,"elapsed":209,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"9eee93fb-dee1-47a0-a646-f1f3fe0aebba"},"outputs":[{"output_type":"stream","name":"stdout","text":["\n"," _____ACTION SPACE_____ \n","\n","Action Space Shape 4\n","Action Space Sample 1\n"]}],"source":["print(\"\\n _____ACTION SPACE_____ \\n\")\n","print(\"Action Space Shape\", env.action_space.n)\n","print(\"Action Space Sample\", env.action_space.sample()) # Take a random action"]},{"cell_type":"markdown","metadata":{"id":"MyxXwkI2Magx"},"source":["The action space (the set of possible actions the agent can take) is discrete with 4 actions available ๐ŸŽฎ:\n","- 0: GO LEFT\n","- 1: GO DOWN\n","- 2: GO RIGHT\n","- 3: GO UP\n","\n","Reward function ๐Ÿ’ฐ:\n","- Reach goal: +1\n","- Reach hole: 0\n","- Reach frozen: 0"]},{"cell_type":"markdown","metadata":{"id":"1pFhWblk3Awr"},"source":["## Create and Initialize the Q-table ๐Ÿ—„๏ธ\n","\n","(๐Ÿ‘€ Step 1 of the pseudocode)\n","\n","\"Q-Learning\"\n","\n","\n","It's time to initialize our Q-table! To know how many rows (states) and columns (actions) to use, we need to know the action and observation space. We already know their values from before, but we'll want to obtain them programmatically so that our algorithm generalizes for different environments. Gym provides us a way to do that: `env.action_space.n` and `env.observation_space.n`\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"y3ZCdluj3k0l","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696079808848,"user_tz":-60,"elapsed":229,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"a6729547-3ea2-4848-a685-fabdf79f243f"},"outputs":[{"output_type":"stream","name":"stdout","text":["There are 16 possible states\n","There are 4 possible actions\n"]}],"source":["state_space = env.observation_space.n\n","print(\"There are \", state_space, \" possible states\")\n","\n","action_space = env.action_space.n\n","print(\"There are \", action_space, \" possible actions\")"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"rCddoOXM3UQH","executionInfo":{"status":"ok","timestamp":1696092287710,"user_tz":-60,"elapsed":395,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["# Let's create our Qtable of size (state_space, action_space) and initialized each values at 0 using np.zeros. np.zeros needs a tuple (a,b)\n","def initialize_q_table(state_space, action_space):\n"," Qtable = np.zeros((state_space, action_space))\n"," return Qtable"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"9YfvrqRt3jdR"},"outputs":[],"source":["Qtable_frozenlake = initialize_q_table(state_space, action_space)"]},{"cell_type":"markdown","metadata":{"id":"67OdoKL63eDD"},"source":["### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"HuTKv3th3ohG"},"outputs":[],"source":["state_space = env.observation_space.n\n","print(\"There are \", state_space, \" possible states\")\n","\n","action_space = env.action_space.n\n","print(\"There are \", action_space, \" possible actions\")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lnrb_nX33fJo"},"outputs":[],"source":["# Let's create our Qtable of size (state_space, action_space) and initialized each values at 0 using np.zeros\n","def initialize_q_table(state_space, action_space):\n"," Qtable = np.zeros((state_space, action_space))\n"," return Qtable"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Y0WlgkVO3Jf9"},"outputs":[],"source":["Qtable_frozenlake = initialize_q_table(state_space, action_space)"]},{"cell_type":"markdown","metadata":{"id":"Atll4Z774gri"},"source":["## Define the greedy policy ๐Ÿค–\n","\n","Remember we have two policies since Q-Learning is an **off-policy** algorithm. This means we're using a **different policy for acting and updating the value function**.\n","\n","- Epsilon-greedy policy (acting policy)\n","- Greedy-policy (updating policy)\n","\n","The greedy policy will also be the final policy we'll have when the Q-learning agent completes training. The greedy policy is used to select an action using the Q-table.\n","\n","\"Q-Learning\"\n"]},{"cell_type":"code","execution_count":8,"metadata":{"id":"E3SCLmLX5bWG","executionInfo":{"status":"ok","timestamp":1696092291068,"user_tz":-60,"elapsed":277,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def greedy_policy(Qtable, state):\n"," # Exploitation: take the action with the highest state, action value\n"," action = np.argmax(Qtable[state][:])\n","\n"," return action"]},{"cell_type":"markdown","metadata":{"id":"B2_-8b8z5k54"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"se2OzWGW5kYJ"},"outputs":[],"source":["def greedy_policy(Qtable, state):\n"," # Exploitation: take the action with the highest state, action value\n"," action = np.argmax(Qtable[state][:])\n","\n"," return action"]},{"cell_type":"markdown","metadata":{"id":"flILKhBU3yZ7"},"source":["##Define the epsilon-greedy policy ๐Ÿค–\n","\n","Epsilon-greedy is the training policy that handles the exploration/exploitation trade-off.\n","\n","The idea with epsilon-greedy:\n","\n","- With *probability 1โ€Š-โ€Šษ›* : **we do exploitation** (i.e. our agent selects the action with the highest state-action pair value).\n","\n","- With *probability ษ›*: we do **exploration** (trying a random action).\n","\n","As the training continues, we progressively **reduce the epsilon value since we will need less and less exploration and more exploitation.**\n","\n","\"Q-Learning\"\n"]},{"cell_type":"code","execution_count":9,"metadata":{"id":"6Bj7x3in3_Pq","executionInfo":{"status":"ok","timestamp":1696092293376,"user_tz":-60,"elapsed":294,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def epsilon_greedy_policy(Qtable, state, epsilon):\n"," # Randomly generate a number between 0 and 1\n"," random_num = random.uniform(0, 1)\n"," # if random_num > greater than epsilon --> exploitation\n"," if random_num > epsilon:\n"," # Take the action with the highest value given a state\n"," # np.argmax can be useful here\n"," action = greedy_policy(Qtable, state)\n"," # else --> exploration\n"," else:\n"," action = env.action_space.sample() # Take a random action\n","\n"," return action"]},{"cell_type":"markdown","metadata":{"id":"8R5ej1fS4P2V"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cYxHuckr4LiG"},"outputs":[],"source":["def epsilon_greedy_policy(Qtable, state, epsilon):\n"," # Randomly generate a number between 0 and 1\n"," random_num = random.uniform(0,1)\n"," # if random_num > greater than epsilon --> exploitation\n"," if random_num > epsilon:\n"," # Take the action with the highest value given a state\n"," # np.argmax can be useful here\n"," action = greedy_policy(Qtable, state)\n"," # else --> exploration\n"," else:\n"," action = env.action_space.sample()\n","\n"," return action"]},{"cell_type":"markdown","metadata":{"id":"hW80DealcRtu"},"source":["## Define the hyperparameters โš™๏ธ\n","\n","The exploration related hyperparamters are some of the most important ones.\n","\n","- We need to make sure that our agent **explores enough of the state space** to learn a good value approximation. To do that, we need to have progressive decay of the epsilon.\n","- If you decrease epsilon too fast (too high decay_rate), **you take the risk that your agent will be stuck**, since your agent didn't explore enough of the state space and hence can't solve the problem."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Y1tWn0tycWZ1"},"outputs":[],"source":["# Training parameters\n","n_training_episodes = 10000 # Total training episodes\n","learning_rate = 0.7 # Learning rate\n","\n","# Evaluation parameters\n","n_eval_episodes = 100 # Total number of test episodes\n","\n","# Environment parameters\n","env_id = \"FrozenLake-v1\" # Name of the environment\n","max_steps = 99 # Max steps per episode\n","gamma = 0.95 # Discounting rate\n","eval_seed = [] # The evaluation seed of the environment\n","\n","# Exploration parameters\n","max_epsilon = 1.0 # Exploration probability at start\n","min_epsilon = 0.05 # Minimum exploration probability\n","decay_rate = 0.0002 # Exponential decay rate for exploration prob"]},{"cell_type":"markdown","metadata":{"id":"cDb7Tdx8atfL"},"source":["## Create the training loop method\n","\n","\"Q-Learning\"\n","\n","The training loop goes like this:\n","\n","```\n","For episode in the total of training episodes:\n","\n","Reduce epsilon (since we need less and less exploration)\n","Reset the environment\n","\n"," For step in max timesteps: \n"," Choose the action At using epsilon greedy policy\n"," Take the action (a) and observe the outcome state(s') and reward (r)\n"," Update the Q-value Q(s,a) using Bellman equation Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n"," If done, finish the episode\n"," Our next state is the new state\n","```"]},{"cell_type":"code","execution_count":32,"metadata":{"id":"paOynXy3aoJW","executionInfo":{"status":"ok","timestamp":1696092879553,"user_tz":-60,"elapsed":300,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable, lr):\n"," for episode in tqdm(range(n_training_episodes)):\n"," # Reduce epsilon (because we need less and less exploration)\n"," epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)\n"," # Reset the environment\n"," state, info = env.reset()\n"," step = 0\n"," terminated = False\n"," truncated = False\n","\n"," # repeat\n"," for step in range(max_steps):\n"," # Choose the action At using epsilon greedy policy\n"," action = epsilon_greedy_policy(Qtable, state, epsilon)\n","\n"," # Take action At and observe Rt+1 and St+1\n"," # Take the action (a) and observe the outcome state(s') and reward (r)\n"," new_state, reward, terminated, truncated, info = env.step(action)\n","\n"," # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n"," Qtable[state][action] = Qtable[state][action] + lr * (reward + gamma * np.max(Qtable[new_state]) - Qtable[state][action])\n","\n"," # If terminated or truncated finish the episode\n"," if terminated or truncated:\n"," break\n","\n"," # Our next state is the new state\n"," state = new_state\n"," return Qtable"]},{"cell_type":"markdown","metadata":{"id":"Pnpk2ePoem3r"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"IyZaYbUAeolw"},"outputs":[],"source":["def train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable):\n"," for episode in tqdm(range(n_training_episodes)):\n"," # Reduce epsilon (because we need less and less exploration)\n"," epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)\n"," # Reset the environment\n"," state, info = env.reset()\n"," step = 0\n"," terminated = False\n"," truncated = False\n","\n"," # repeat\n"," for step in range(max_steps):\n"," # Choose the action At using epsilon greedy policy\n"," action = epsilon_greedy_policy(Qtable, state, epsilon)\n","\n"," # Take action At and observe Rt+1 and St+1\n"," # Take the action (a) and observe the outcome state(s') and reward (r)\n"," new_state, reward, terminated, truncated, info = env.step(action)\n","\n"," # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n"," Qtable[state][action] = Qtable[state][action] + learning_rate * (reward + gamma * np.max(Qtable[new_state]) - Qtable[state][action])\n","\n"," # If terminated or truncated finish the episode\n"," if terminated or truncated:\n"," break\n","\n"," # Our next state is the new state\n"," state = new_state\n"," return Qtable"]},{"cell_type":"markdown","metadata":{"id":"WLwKQ4tUdhGI"},"source":["## Train the Q-Learning agent ๐Ÿƒ"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"DPBxfjJdTCOH","colab":{"base_uri":"https://localhost:8080/","height":49,"referenced_widgets":["e6f0cdea90a54a409d1b6bb5da149fa2","1c2b811eaf5c46bdbae02a2c61a51ffd","840f4495c39f4dfca4ae71602ccd41e9","3de082bd7cbd40b9be4aec0e684becba","9833920632dd4f988b6f18e8caccbc5d","828041fe06b74a6a9ea122ff94a4e2a4","c0a0ecce9da741c6884684ac9224e388","41af6d560ff648b58032316a7367e001","8ac7eea521864e47bc77533e33a5990e","98ac2fcbdebd41e8a0465564c7f9f73c","fb6a4a28f32c417f88ae9b485051f39e"]},"executionInfo":{"status":"ok","timestamp":1696079839206,"user_tz":-60,"elapsed":3370,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"29414ef8-dc8f-4460-f557-2c9c540d13d2"},"outputs":[{"output_type":"display_data","data":{"text/plain":[" 0%| | 0/10000 [00:00\n"]},{"cell_type":"markdown","metadata":{"id":"kv0k1JQjpMq3"},"source":["Under the hood, the Hub uses git-based repositories (don't worry if you don't know what git is), which means you can update the model with new versions as you experiment and improve your agent."]},{"cell_type":"markdown","metadata":{"id":"QZ5LrR-joIHD"},"source":["#### Do not modify this code"]},{"cell_type":"code","execution_count":12,"metadata":{"id":"Jex3i9lZ8ksX","executionInfo":{"status":"ok","timestamp":1696092305316,"user_tz":-60,"elapsed":664,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["from huggingface_hub import HfApi, snapshot_download\n","from huggingface_hub.repocard import metadata_eval_result, metadata_save\n","\n","from pathlib import Path\n","import datetime\n","import json"]},{"cell_type":"code","execution_count":13,"metadata":{"id":"Qo57HBn3W74O","executionInfo":{"status":"ok","timestamp":1696092305970,"user_tz":-60,"elapsed":353,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def record_video(env, Qtable, out_directory, fps=1):\n"," \"\"\"\n"," Generate a replay video of the agent\n"," :param env\n"," :param Qtable: Qtable of our agent\n"," :param out_directory\n"," :param fps: how many frame per seconds (with taxi-v3 and frozenlake-v1 we use 1)\n"," \"\"\"\n"," images = []\n"," terminated = False\n"," truncated = False\n"," state, info = env.reset(seed=random.randint(0,500))\n"," img = env.render()\n"," images.append(img)\n"," while not terminated or truncated:\n"," # Take the action (index) that have the maximum expected future reward given that state\n"," action = np.argmax(Qtable[state][:])\n"," state, reward, terminated, truncated, info = env.step(action) # We directly put next_state = state for recording logic\n"," img = env.render()\n"," images.append(img)\n"," imageio.mimsave(out_directory, [np.array(img) for i, img in enumerate(images)], fps=fps)"]},{"cell_type":"code","source":["def push_to_hub(\n"," repo_id, model, env, video_fps=1, local_repo_path=\"hub\"\n","):\n"," \"\"\"\n"," Evaluate, Generate a video and Upload a model to Hugging Face Hub.\n"," This method does the complete pipeline:\n"," - It evaluates the model\n"," - It generates the model card\n"," - It generates a replay video of the agent\n"," - It pushes everything to the Hub\n","\n"," :param repo_id: repo_id: id of the model repository from the Hugging Face Hub\n"," :param env\n"," :param video_fps: how many frame per seconds to record our video replay\n"," (with taxi-v3 and frozenlake-v1 we use 1)\n"," :param local_repo_path: where the local repository is\n"," \"\"\"\n"," _, repo_name = repo_id.split(\"/\")\n","\n"," eval_env = env\n"," api = HfApi()\n","\n"," # Step 1: Create the repo\n"," repo_url = api.create_repo(\n"," repo_id=repo_id,\n"," exist_ok=True,\n"," )\n","\n"," # Step 2: Download files\n"," repo_local_path = Path(snapshot_download(repo_id=repo_id))\n","\n"," # Step 3: Save the model\n"," if env.spec.kwargs.get(\"map_name\"):\n"," model[\"map_name\"] = env.spec.kwargs.get(\"map_name\")\n"," if env.spec.kwargs.get(\"is_slippery\", \"\") == False:\n"," model[\"slippery\"] = False\n","\n"," # Pickle the model\n"," with open((repo_local_path) / \"q-learning.pkl\", \"wb\") as f:\n"," pickle.dump(model, f)\n","\n"," # Step 4: Evaluate the model and build JSON with evaluation metrics\n"," mean_reward, std_reward = evaluate_agent(\n"," eval_env, model[\"max_steps\"], model[\"n_eval_episodes\"], model[\"qtable\"], model[\"eval_seed\"]\n"," )\n","\n"," evaluate_data = {\n"," \"env_id\": model[\"env_id\"],\n"," \"mean_reward\": mean_reward,\n"," \"n_eval_episodes\": model[\"n_eval_episodes\"],\n"," \"eval_datetime\": datetime.datetime.now().isoformat()\n"," }\n","\n"," # Write a JSON file called \"results.json\" that will contain the\n"," # evaluation results\n"," with open(repo_local_path / \"results.json\", \"w\") as outfile:\n"," json.dump(evaluate_data, outfile)\n","\n"," # Step 5: Create the model card\n"," env_name = model[\"env_id\"]\n"," if env.spec.kwargs.get(\"map_name\"):\n"," env_name += \"-\" + env.spec.kwargs.get(\"map_name\")\n","\n"," if env.spec.kwargs.get(\"is_slippery\", \"\") == False:\n"," env_name += \"-\" + \"no_slippery\"\n","\n"," metadata = {}\n"," metadata[\"tags\"] = [env_name, \"q-learning\", \"reinforcement-learning\", \"custom-implementation\"]\n","\n"," # Add metrics\n"," eval = metadata_eval_result(\n"," model_pretty_name=repo_name,\n"," task_pretty_name=\"reinforcement-learning\",\n"," task_id=\"reinforcement-learning\",\n"," metrics_pretty_name=\"mean_reward\",\n"," metrics_id=\"mean_reward\",\n"," metrics_value=f\"{mean_reward:.2f} +/- {std_reward:.2f}\",\n"," dataset_pretty_name=env_name,\n"," dataset_id=env_name,\n"," )\n","\n"," # Merges both dictionaries\n"," metadata = {**metadata, **eval}\n","\n"," model_card = f\"\"\"\n"," # **Q-Learning** Agent playing1 **{env_id}**\n"," This is a trained model of a **Q-Learning** agent playing **{env_id}** .\n","\n"," ## Usage\n","\n"," ```python\n","\n"," model = load_from_hub(repo_id=\"{repo_id}\", filename=\"q-learning.pkl\")\n","\n"," # Don't forget to check if you need to add additional attributes (is_slippery=False etc)\n"," env = gym.make(model[\"env_id\"])\n"," ```\n"," \"\"\"\n","\n"," evaluate_agent(env, model[\"max_steps\"], model[\"n_eval_episodes\"], model[\"qtable\"], model[\"eval_seed\"])\n","\n"," readme_path = repo_local_path / \"README.md\"\n"," readme = \"\"\n"," print(readme_path.exists())\n"," if readme_path.exists():\n"," with readme_path.open(\"r\", encoding=\"utf8\") as f:\n"," readme = f.read()\n"," else:\n"," readme = model_card\n","\n"," with readme_path.open(\"w\", encoding=\"utf-8\") as f:\n"," f.write(readme)\n","\n"," # Save our metrics to Readme metadata\n"," metadata_save(readme_path, metadata)\n","\n"," # Step 6: Record a video\n"," video_path = repo_local_path / \"replay.mp4\"\n"," record_video(env, model[\"qtable\"], video_path, video_fps)\n","\n"," # Step 7. Push everything to the Hub\n"," api.upload_folder(\n"," repo_id=repo_id,\n"," folder_path=repo_local_path,\n"," path_in_repo=\".\",\n"," )\n","\n"," print(\"Your model is pushed to the Hub. You can view your model here: \", repo_url)"],"metadata":{"id":"U4mdUTKkGnUd","executionInfo":{"status":"ok","timestamp":1696092307631,"user_tz":-60,"elapsed":432,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"execution_count":14,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"81J6cet_ogSS"},"source":["### .\n","\n","By using `push_to_hub` **you evaluate, record a replay, generate a model card of your agent and push it to the Hub**.\n","\n","This way:\n","- You can **showcase our work** ๐Ÿ”ฅ\n","- You can **visualize your agent playing** ๐Ÿ‘€\n","- You can **share an agent with the community that others can use** ๐Ÿ’พ\n","- You can **access a leaderboard ๐Ÿ† to see how well your agent is performing compared to your classmates** ๐Ÿ‘‰ https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard\n"]},{"cell_type":"markdown","metadata":{"id":"cWnFC0iZooTw"},"source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1๏ธโƒฃ (If it's not already done) create an account to HF โžก https://huggingface.co/join\n","\n","2๏ธโƒฃ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","\n","\"Create\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"QB5nIcxR8paT","colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["4119e481cf774e14a39e3480bf3dee23","c55f984de9d0425792199bb0204b2a88","5ae9e2d6dc10461f8d1f99b5ed460946","cd98d11981f34cdcb8209cbaaf74f61e","e9244f63c5f5486898935cd4ba84fed8","9496a0198bc64e3ea7180059fe35cb58","747b9c6d02e54bef8cc875a06ee0caae","166beba22ad74a7fbf21d101156b9f28","d9ad51d7a6524f9884c85b6ae14d7212","8591dbbbc0fe4a20ab143914dd849b4c","bde8da2b29d64d5cbc158a34697acb7b","cfa99abd6ac34994bd48b5919375c96b","ce25d48725e84931b5a1ad3f0c5cd057","ebc0a564f44c4428ae315638657f15de","79760d8954834265a6c7f637a5f5f57b","ee7230b508824636aa1de3aa9e547537","4dbdec8615eb43728988b1e92868f7bf","6fc271597c73420385a1011461269bb8","75744d5a04a0483fa6b19711b287a59c","440f9915f04e4972b963258808503a9c","918c9e6775bf4f319dbc4e68057f2ce0","b73a1998a2e74c17a2dcdff3bfcdff5c","48437e2d495c4646a31c7647820e8f77","e4db486e42914c94bf41f4f521e7085a","04e81cad97634c9a830d38da5a026e25","6ab23d487ce94e8cb628096346ae7f10","52d901fc9d0b4c21805aa432737c1009","bc43bc4504004b2285d7c087b92f6d6e","4534b7cb3f374ec594312a73a63cb2aa","e9f846e8823241f680c4f077cabbedac","548d1565762546889e4af50c2002619b","201a94eea14741e1a2c19072205df7f7"]},"executionInfo":{"status":"ok","timestamp":1696079866404,"user_tz":-60,"elapsed":233,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"cf6d9422-012d-4d92-a248-9e1feea240b2"},"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='
\n"]},{"cell_type":"code","execution_count":3,"metadata":{"id":"gL0wpeO8gpej","executionInfo":{"status":"ok","timestamp":1696092196201,"user_tz":-60,"elapsed":279,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["env = gym.make(\"Taxi-v3\", render_mode=\"rgb_array\")"]},{"cell_type":"markdown","metadata":{"id":"gBOaXgtsrmtT"},"source":["There are **500 discrete states since there are 25 taxi positions, 5 possible locations of the passenger** (including the case when the passenger is in the taxi), and **4 destination locations.**\n"]},{"cell_type":"code","execution_count":4,"metadata":{"id":"_TPNaGSZrgqA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696092200923,"user_tz":-60,"elapsed":277,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"0660ae9e-2983-4dd5-80c9-ef3b6dee80cd"},"outputs":[{"output_type":"stream","name":"stdout","text":["There are 500 possible states\n"]}],"source":["state_space = env.observation_space.n\n","print(\"There are \", state_space, \" possible states\")"]},{"cell_type":"code","execution_count":5,"metadata":{"id":"CdeeZuokrhit","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696092203051,"user_tz":-60,"elapsed":336,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"5c91177f-6b05-4dab-a02a-3be9cdcc719a"},"outputs":[{"output_type":"stream","name":"stdout","text":["There are 6 possible actions\n"]}],"source":["action_space = env.action_space.n\n","print(\"There are \", action_space, \" possible actions\")"]},{"cell_type":"markdown","metadata":{"id":"R1r50Advrh5Q"},"source":["The action space (the set of possible actions the agent can take) is discrete with **6 actions available ๐ŸŽฎ**:\n","\n","- 0: move south\n","- 1: move north\n","- 2: move east\n","- 3: move west\n","- 4: pickup passenger\n","- 5: drop off passenger\n","\n","Reward function ๐Ÿ’ฐ:\n","\n","- -1 per step unless other reward is triggered.\n","- +20 delivering passenger.\n","- -10 executing โ€œpickupโ€ and โ€œdrop-offโ€ actions illegally."]},{"cell_type":"code","execution_count":15,"metadata":{"id":"US3yDXnEtY9I","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696092315666,"user_tz":-60,"elapsed":7,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"7064388e-0185-4a00-f4df-dde50786fcfb"},"outputs":[{"output_type":"stream","name":"stdout","text":["[[0. 0. 0. 0. 0. 0.]\n"," [0. 0. 0. 0. 0. 0.]\n"," [0. 0. 0. 0. 0. 0.]\n"," ...\n"," [0. 0. 0. 0. 0. 0.]\n"," [0. 0. 0. 0. 0. 0.]\n"," [0. 0. 0. 0. 0. 0.]]\n","Q-table shape: (500, 6)\n"]}],"source":["# Create our Q table with state_size rows and action_size columns (500x6)\n","Qtable_taxi = initialize_q_table(state_space, action_space)\n","print(Qtable_taxi)\n","print(\"Q-table shape: \", Qtable_taxi .shape)"]},{"cell_type":"markdown","metadata":{"id":"gUMKPH0_LJyH"},"source":["## Define the hyperparameters โš™๏ธ\n","\n","โš  DO NOT MODIFY EVAL_SEED: the eval_seed array **allows us to evaluate your agent with the same taxi starting positions for every classmate**"]},{"cell_type":"code","execution_count":52,"metadata":{"id":"AB6n__hhg7YS","executionInfo":{"status":"ok","timestamp":1696093093649,"user_tz":-60,"elapsed":295,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["# Training parameters\n","n_training_episodes = 2500000 # Total training episodes\n","learning_rate = 0.7 # Learning rate\n","\n","# Evaluation parameters\n","n_eval_episodes = 100 # Total number of test episodes\n","\n","# DO NOT MODIFY EVAL_SEED\n","eval_seed = [16,54,165,177,191,191,120,80,149,178,48,38,6,125,174,73,50,172,100,148,146,6,25,40,68,148,49,167,9,97,164,176,61,7,54,55,\n"," 161,131,184,51,170,12,120,113,95,126,51,98,36,135,54,82,45,95,89,59,95,124,9,113,58,85,51,134,121,169,105,21,30,11,50,65,12,43,82,145,152,97,106,55,31,85,38,\n"," 112,102,168,123,97,21,83,158,26,80,63,5,81,32,11,28,148] # Evaluation seed, this ensures that all classmates agents are trained on the same taxi starting position\n"," # Each seed has a specific starting state\n","\n","# Environment parameters\n","env_id = \"Taxi-v3\" # Name of the environment\n","max_steps = 99 # Max steps per episode\n","gamma = 0.95 # Discounting rate\n","\n","# Exploration parameters\n","max_epsilon = 1.0 # Exploration probability at start\n","min_epsilon = 0.05 # Minimum exploration probability\n","decay_rate = 0.005 # Exponential decay rate for exploration prob\n"]},{"cell_type":"markdown","metadata":{"id":"1TMORo1VLTsX"},"source":["## Train our Q-Learning agent ๐Ÿƒ"]},{"cell_type":"code","execution_count":53,"metadata":{"id":"WwP3Y2z2eS-K","colab":{"base_uri":"https://localhost:8080/","height":281,"referenced_widgets":["f1aba0ec8e784c42b112a74472267f0f","f258e37b90e64527b3fb4cc87c110bfb","e4d2875db2a441d1b1f9a63b43b3cdcd","d84cd9996c784094af24f27013b5c970","405c66fe1e5f4ebda88959e965fa0c17","f4eb5fcc8fb64019a0a4a3984d338473","0230faf9f7314e90aceeeb14735f80fd","aa4fcdf1ebc249a58f810fb36006bdb8","c1052fddbce24e67811b2e3c84801460","830018b1d3324418a7e8b90fcc4535c1","e27e7697b17543c4a14e2a9d35f32f6e"]},"executionInfo":{"status":"ok","timestamp":1696094912922,"user_tz":-60,"elapsed":1817873,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"69a04b7d-349d-4427-854d-bda536baaf66"},"outputs":[{"output_type":"display_data","data":{"text/plain":[" 0%| | 0/2500000 [00:00"]},{"cell_type":"markdown","metadata":{"id":"bzgIO70c0bu2"},"source":["# Part 3: Load from Hub ๐Ÿ”ฝ\n","\n","What's amazing with Hugging Face Hub ๐Ÿค— is that you can easily load powerful models from the community.\n","\n","Loading a saved model from the Hub is really easy:\n","\n","1. You go https://huggingface.co/models?other=q-learning to see the list of all the q-learning saved models.\n","2. You select one and copy its repo_id\n","\n","\"Copy"]},{"cell_type":"markdown","metadata":{"id":"gTth6thRoC6X"},"source":["3. Then we just need to use `load_from_hub` with:\n","- The repo_id\n","- The filename: the saved model inside the repo."]},{"cell_type":"markdown","metadata":{"id":"EtrfoTaBoNrd"},"source":["#### Do not modify this code"]},{"cell_type":"code","execution_count":57,"metadata":{"id":"Eo8qEzNtCaVI","executionInfo":{"status":"ok","timestamp":1696095062038,"user_tz":-60,"elapsed":234,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["from urllib.error import HTTPError\n","\n","from huggingface_hub import hf_hub_download\n","\n","\n","def load_from_hub(repo_id: str, filename: str) -> str:\n"," \"\"\"\n"," Download a model from Hugging Face Hub.\n"," :param repo_id: id of the model repository from the Hugging Face Hub\n"," :param filename: name of the model zip file from the repository\n"," \"\"\"\n"," # Get the model from the Hub, download and cache the model on your local disk\n"," pickle_model = hf_hub_download(\n"," repo_id=repo_id,\n"," filename=filename\n"," )\n","\n"," with open(pickle_model, 'rb') as f:\n"," downloaded_model_file = pickle.load(f)\n","\n"," return downloaded_model_file"]},{"cell_type":"markdown","metadata":{"id":"b_sM2gNioPZH"},"source":["### ."]},{"cell_type":"code","execution_count":59,"metadata":{"id":"JUm9lz2gCQcU","colab":{"base_uri":"https://localhost:8080/","height":351,"referenced_widgets":["db4eb369c18749fab4b992626b0d7ee2","eee8dc2819414ec8b385cdaf9ca31679","d8300151209b4bf69dd40293c4829c68","78ac941b16744bba8546042f2e1be9fa","a96ade3a25cb4fab9696e00f1d56249b","8679d7521cdf41c6bc090635f507e9d3","2545f2ad186d45f7a779f423959ebfa5","1ff751ff2be5462a825a3a85a991c9c2","033b057020714f4b8e8d93fb450fb5e2","b8e75d3f9be8427a82c61a6480bc7f78","348c3154cb1344fb9a6db7f5be8d3e72","f818282ac4ef46fe8f073e997e2364e5","4407635f19394c4cab829aa77324d44f","cf18d12d46a04fd4a99fe80819ba7a7b","60c019c2adf745c8aa616a7269f03ac5","3a86d2cd279049b38ba189824cafeae6","cd38950d068f46e38cb085847661515d","615e0eed539c4b2c88449b9cf9a698d8","7e97a0cc37b9427d820a184b88ffe64e","34ab33b6a93147c6a776f2c8e41198c2","3f6d00d90d384d768a7712cff9adf03f","a3136bd489b94a91a248eb08faddf549"]},"executionInfo":{"status":"ok","timestamp":1696095092963,"user_tz":-60,"elapsed":671,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"006cc6a5-403c-4345-bd60-7cd5898987d6"},"outputs":[{"output_type":"display_data","data":{"text/plain":["Downloading (โ€ฆ)/main/q-learning.pkl: 0%| | 0.00/24.6k [00:00\n","\n","Doom is a large environment with a huge state space (millions of different states). Creating and updating a Q-table for that environment would not be efficient.\n","\n","That's why we'll study Deep Q-Learning in the next unit, an algorithm **where we use a neural network that approximates, given a state, the different Q-values for each action.**\n","\n","\"Environments\"/\n"]},{"cell_type":"markdown","metadata":{"id":"BjLhT70TEZIn"},"source":["See you in Unit 3! ๐Ÿ”ฅ\n","\n","## Keep learning, stay awesome ๐Ÿค—"]}],"metadata":{"colab":{"provenance":[{"file_id":"https://github.com/huggingface/deep-rl-class/blob/master/notebooks/unit2/unit2.ipynb","timestamp":1696078055017}],"collapsed_sections":["Ji_UrI5l2zzn","67OdoKL63eDD","B2_-8b8z5k54","8R5ej1fS4P2V","Pnpk2ePoem3r"]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"widgets":{"application/vnd.jupyter.widget-state+json":{"e6f0cdea90a54a409d1b6bb5da149fa2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_1c2b811eaf5c46bdbae02a2c61a51ffd","IPY_MODEL_840f4495c39f4dfca4ae71602ccd41e9","IPY_MODEL_3de082bd7cbd40b9be4aec0e684becba"],"layout":"IPY_MODEL_9833920632dd4f988b6f18e8caccbc5d"}},"1c2b811eaf5c46bdbae02a2c61a51ffd":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_828041fe06b74a6a9ea122ff94a4e2a4","placeholder":"โ€‹","style":"IPY_MODEL_c0a0ecce9da741c6884684ac9224e388","value":"100%"}},"840f4495c39f4dfca4ae71602ccd41e9":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_41af6d560ff648b58032316a7367e001","max":10000,"min":0,"orientation":"horizontal","style":"IPY_MODEL_8ac7eea521864e47bc77533e33a5990e","value":10000}},"3de082bd7cbd40b9be4aec0e684becba":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_98ac2fcbdebd41e8a0465564c7f9f73c","placeholder":"โ€‹","style":"IPY_MODEL_fb6a4a28f32c417f88ae9b485051f39e","value":" 10000/10000 [00:03<00:00, 3824.86it/s]"}},"9833920632dd4f988b6f18e8caccbc5d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"828041fe06b74a6a9ea122ff94a4e2a4":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c0a0ecce9da741c6884684ac9224e388":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"41af6d560ff648b58032316a7367e001":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8ac7eea521864e47bc77533e33a5990e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"98ac2fcbdebd41e8a0465564c7f9f73c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fb6a4a28f32c417f88ae9b485051f39e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9288c30c30e14f8e964256563722f0f1":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_fdd5af398f844d6199f7be41e14b90d9","IPY_MODEL_45712c72b8024eda976dfe6791af84d7","IPY_MODEL_e0effc981c2d4baeb1a6cf6224b22fc4"],"layout":"IPY_MODEL_04878d4e8ebf4d5d86fef0edc22b03e7"}},"fdd5af398f844d6199f7be41e14b90d9":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6bb9b7084bf74043914f4f4b6d35469f","placeholder":"โ€‹","style":"IPY_MODEL_e521204e0833433b825b3036d14d02ee","value":"100%"}},"45712c72b8024eda976dfe6791af84d7":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_810a3f5035d648779b5550f82f64264c","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_0a231d4af7cb4fe29f2be2bce9a0eb8c","value":100}},"e0effc981c2d4baeb1a6cf6224b22fc4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d70c52144c824b6ab1133576ed52c635","placeholder":"โ€‹","style":"IPY_MODEL_21ea34cbd4724403b10cff3a41461e06","value":" 100/100 [00:00<00:00, 56.15it/s]"}},"04878d4e8ebf4d5d86fef0edc22b03e7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6bb9b7084bf74043914f4f4b6d35469f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e521204e0833433b825b3036d14d02ee":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"810a3f5035d648779b5550f82f64264c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0a231d4af7cb4fe29f2be2bce9a0eb8c":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d70c52144c824b6ab1133576ed52c635":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"21ea34cbd4724403b10cff3a41461e06":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4119e481cf774e14a39e3480bf3dee23":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_918c9e6775bf4f319dbc4e68057f2ce0","IPY_MODEL_b73a1998a2e74c17a2dcdff3bfcdff5c","IPY_MODEL_48437e2d495c4646a31c7647820e8f77","IPY_MODEL_e4db486e42914c94bf41f4f521e7085a"],"layout":"IPY_MODEL_747b9c6d02e54bef8cc875a06ee0caae"}},"c55f984de9d0425792199bb0204b2a88":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_166beba22ad74a7fbf21d101156b9f28","placeholder":"โ€‹","style":"IPY_MODEL_d9ad51d7a6524f9884c85b6ae14d7212","value":"

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
"}},"5ae9e2d6dc10461f8d1f99b5ed460946":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_8591dbbbc0fe4a20ab143914dd849b4c","placeholder":"โ€‹","style":"IPY_MODEL_bde8da2b29d64d5cbc158a34697acb7b","value":""}},"cd98d11981f34cdcb8209cbaaf74f61e":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_cfa99abd6ac34994bd48b5919375c96b","style":"IPY_MODEL_ce25d48725e84931b5a1ad3f0c5cd057","value":true}},"e9244f63c5f5486898935cd4ba84fed8":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_ebc0a564f44c4428ae315638657f15de","style":"IPY_MODEL_79760d8954834265a6c7f637a5f5f57b","tooltip":""}},"9496a0198bc64e3ea7180059fe35cb58":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ee7230b508824636aa1de3aa9e547537","placeholder":"โ€‹","style":"IPY_MODEL_4dbdec8615eb43728988b1e92868f7bf","value":"\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks.
"}},"747b9c6d02e54bef8cc875a06ee0caae":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"166beba22ad74a7fbf21d101156b9f28":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d9ad51d7a6524f9884c85b6ae14d7212":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8591dbbbc0fe4a20ab143914dd849b4c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bde8da2b29d64d5cbc158a34697acb7b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"cfa99abd6ac34994bd48b5919375c96b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ce25d48725e84931b5a1ad3f0c5cd057":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ebc0a564f44c4428ae315638657f15de":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"79760d8954834265a6c7f637a5f5f57b":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"ee7230b508824636aa1de3aa9e547537":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4dbdec8615eb43728988b1e92868f7bf":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6fc271597c73420385a1011461269bb8":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_75744d5a04a0483fa6b19711b287a59c","placeholder":"โ€‹","style":"IPY_MODEL_440f9915f04e4972b963258808503a9c","value":"Connecting..."}},"75744d5a04a0483fa6b19711b287a59c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"440f9915f04e4972b963258808503a9c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"918c9e6775bf4f319dbc4e68057f2ce0":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_04e81cad97634c9a830d38da5a026e25","placeholder":"โ€‹","style":"IPY_MODEL_6ab23d487ce94e8cb628096346ae7f10","value":"Token is valid (permission: write)."}},"b73a1998a2e74c17a2dcdff3bfcdff5c":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_52d901fc9d0b4c21805aa432737c1009","placeholder":"โ€‹","style":"IPY_MODEL_bc43bc4504004b2285d7c087b92f6d6e","value":"Your token has been saved in your configured git credential helpers (store)."}},"48437e2d495c4646a31c7647820e8f77":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4534b7cb3f374ec594312a73a63cb2aa","placeholder":"โ€‹","style":"IPY_MODEL_e9f846e8823241f680c4f077cabbedac","value":"Your token has been saved to /root/.cache/huggingface/token"}},"e4db486e42914c94bf41f4f521e7085a":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_548d1565762546889e4af50c2002619b","placeholder":"โ€‹","style":"IPY_MODEL_201a94eea14741e1a2c19072205df7f7","value":"Login successful"}},"04e81cad97634c9a830d38da5a026e25":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6ab23d487ce94e8cb628096346ae7f10":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"52d901fc9d0b4c21805aa432737c1009":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bc43bc4504004b2285d7c087b92f6d6e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4534b7cb3f374ec594312a73a63cb2aa":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e9f846e8823241f680c4f077cabbedac":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"548d1565762546889e4af50c2002619b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"201a94eea14741e1a2c19072205df7f7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"6363a5b1c65749c6977f97c2b00c55ec":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b54c8a87ab0d4850aaf8f9e6e5af5de3","IPY_MODEL_196e8c478db34d4c949ada9697d87f2d","IPY_MODEL_518fa64f8c3846e886468ac731edbfaa"],"layout":"IPY_MODEL_b2b2baeb9d4c415bab801855351d4ce1"}},"b54c8a87ab0d4850aaf8f9e6e5af5de3":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c376e2f1509947b0b0115160bbd121d2","placeholder":"โ€‹","style":"IPY_MODEL_2b2ed6fb86f344a69a2cbc5e0f4fd479","value":"Fetching 1 files: 100%"}},"196e8c478db34d4c949ada9697d87f2d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e3cc65bebe4a4d0cbd765bb826cae2df","max":1,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c4abebed7d954ac9b12d7f02240a0895","value":1}},"518fa64f8c3846e886468ac731edbfaa":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b4503a457c044fd2aa5c58391427571c","placeholder":"โ€‹","style":"IPY_MODEL_c60c04f8a206433f9a75cb9f3568bf20","value":" 1/1 [00:00<00:00, 3.83it/s]"}},"b2b2baeb9d4c415bab801855351d4ce1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c376e2f1509947b0b0115160bbd121d2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2b2ed6fb86f344a69a2cbc5e0f4fd479":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e3cc65bebe4a4d0cbd765bb826cae2df":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c4abebed7d954ac9b12d7f02240a0895":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b4503a457c044fd2aa5c58391427571c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c60c04f8a206433f9a75cb9f3568bf20":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ae5a832a46d94ce68c4a4c9e2636a562":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c0a9f2753fb24bd899dccc94e914a296","IPY_MODEL_7098717195b54ad481f7bbdfd1f988d9","IPY_MODEL_c59b3e51fc78441488eb0823f4482730"],"layout":"IPY_MODEL_f5d23de601f643158c10b9a5df7914aa"}},"c0a9f2753fb24bd899dccc94e914a296":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d9f2b2d47c6b4249a89a38d7f9e6ff67","placeholder":"โ€‹","style":"IPY_MODEL_807481d4c14b43b7b03f75f4f546b9ff","value":"100%"}},"7098717195b54ad481f7bbdfd1f988d9":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ff70d642a053434eaf8500872709e56f","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_abcb725b1d74471c8d7bba61ad2de760","value":100}},"c59b3e51fc78441488eb0823f4482730":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5cd1832337c14c3cac847142c0d2a18e","placeholder":"โ€‹","style":"IPY_MODEL_bb44cc6e87654874a0a1bec09b5a54f4","value":" 100/100 [00:00<00:00, 2229.58it/s]"}},"f5d23de601f643158c10b9a5df7914aa":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d9f2b2d47c6b4249a89a38d7f9e6ff67":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"807481d4c14b43b7b03f75f4f546b9ff":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ff70d642a053434eaf8500872709e56f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"abcb725b1d74471c8d7bba61ad2de760":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"5cd1832337c14c3cac847142c0d2a18e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bb44cc6e87654874a0a1bec09b5a54f4":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"73702d5503374a0d9787365c45c5da4c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0134a25d0993417a8cac06177939e642","IPY_MODEL_c503c61dac1c405991dd693ded8f0947","IPY_MODEL_8e75b304812a4ca0b3c6e963a41f2560"],"layout":"IPY_MODEL_416154146a6d41fbb8231b334c26e02e"}},"0134a25d0993417a8cac06177939e642":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f8aed36c53c84744a02b1f6578c56233","placeholder":"โ€‹","style":"IPY_MODEL_72b575b0d9b041b2b0465509d2c4158d","value":"100%"}},"c503c61dac1c405991dd693ded8f0947":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e22407fa6ac54bca94ffdfc76be62b5f","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_12120b5c42b0445fb9afcd651bdd075e","value":100}},"8e75b304812a4ca0b3c6e963a41f2560":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d125a8de2fdb4478b08a47ce232db088","placeholder":"โ€‹","style":"IPY_MODEL_f7c5020bc4ac43bba38e70cfe7379283","value":" 100/100 [00:00<00:00, 2111.43it/s]"}},"416154146a6d41fbb8231b334c26e02e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f8aed36c53c84744a02b1f6578c56233":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"72b575b0d9b041b2b0465509d2c4158d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e22407fa6ac54bca94ffdfc76be62b5f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"12120b5c42b0445fb9afcd651bdd075e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d125a8de2fdb4478b08a47ce232db088":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f7c5020bc4ac43bba38e70cfe7379283":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0ae97a99f86540079c48b842681c6460":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e5356c548e84454585a645b66ffe84e0","IPY_MODEL_728955de7f2940229f35febdf2a52fee","IPY_MODEL_e6a280b5696b4c7e934d162cbb9f47b2"],"layout":"IPY_MODEL_15eb4d2b8b124ddf96d07bf9ccdf6410"}},"e5356c548e84454585a645b66ffe84e0":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5546de085a3841f78067d8c4a694c6b3","placeholder":"โ€‹","style":"IPY_MODEL_d9326170d94d4a5db9a2262ef2299149","value":"q-learning.pkl: 100%"}},"728955de7f2940229f35febdf2a52fee":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_fd1d7bf7b736412bb6b45ae5446bb5dc","max":914,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9a5506c45ccd451695ec028e35a85560","value":914}},"e6a280b5696b4c7e934d162cbb9f47b2":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0d9bed76678d42e9b9e707edda2fb334","placeholder":"โ€‹","style":"IPY_MODEL_dbb8148d047847de9a0b5c72004b821a","value":" 914/914 [00:00<00:00, 2.75kB/s]"}},"15eb4d2b8b124ddf96d07bf9ccdf6410":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5546de085a3841f78067d8c4a694c6b3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d9326170d94d4a5db9a2262ef2299149":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fd1d7bf7b736412bb6b45ae5446bb5dc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9a5506c45ccd451695ec028e35a85560":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"0d9bed76678d42e9b9e707edda2fb334":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dbb8148d047847de9a0b5c72004b821a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f1aba0ec8e784c42b112a74472267f0f":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_f258e37b90e64527b3fb4cc87c110bfb","IPY_MODEL_e4d2875db2a441d1b1f9a63b43b3cdcd","IPY_MODEL_d84cd9996c784094af24f27013b5c970"],"layout":"IPY_MODEL_405c66fe1e5f4ebda88959e965fa0c17"}},"f258e37b90e64527b3fb4cc87c110bfb":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f4eb5fcc8fb64019a0a4a3984d338473","placeholder":"โ€‹","style":"IPY_MODEL_0230faf9f7314e90aceeeb14735f80fd","value":"100%"}},"e4d2875db2a441d1b1f9a63b43b3cdcd":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_aa4fcdf1ebc249a58f810fb36006bdb8","max":2500000,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c1052fddbce24e67811b2e3c84801460","value":2500000}},"d84cd9996c784094af24f27013b5c970":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_830018b1d3324418a7e8b90fcc4535c1","placeholder":"โ€‹","style":"IPY_MODEL_e27e7697b17543c4a14e2a9d35f32f6e","value":" 2500000/2500000 [30:17<00:00, 1220.58it/s]"}},"405c66fe1e5f4ebda88959e965fa0c17":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f4eb5fcc8fb64019a0a4a3984d338473":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0230faf9f7314e90aceeeb14735f80fd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"aa4fcdf1ebc249a58f810fb36006bdb8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c1052fddbce24e67811b2e3c84801460":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"830018b1d3324418a7e8b90fcc4535c1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e27e7697b17543c4a14e2a9d35f32f6e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9d44bf64d8714ac285554a47ade9d4db":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_09d306353c6249d282c7093d30dd2b2c","IPY_MODEL_de2a248a1f2b4c6f94d9ae1ba7accb82","IPY_MODEL_54393e0489ec442a9e36942546d281b2"],"layout":"IPY_MODEL_849dfdd2321c4bbfa90e8aa8e21c38a7"}},"09d306353c6249d282c7093d30dd2b2c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_ef2d2e9fabc6494ba0648b669d6a91d2","placeholder":"โ€‹","style":"IPY_MODEL_4457995c30a04396872f866184bd8151","value":"100%"}},"de2a248a1f2b4c6f94d9ae1ba7accb82":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_0e566cd2a6ed45bf8c0feabef523ee59","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_72a3cf500fd34d9bade3055360bc3d8e","value":100}},"54393e0489ec442a9e36942546d281b2":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b6aacba6d11d49cabac66c42049319bc","placeholder":"โ€‹","style":"IPY_MODEL_614bc7497c84455197017396a30e5501","value":" 100/100 [00:00<00:00, 963.02it/s]"}},"849dfdd2321c4bbfa90e8aa8e21c38a7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ef2d2e9fabc6494ba0648b669d6a91d2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4457995c30a04396872f866184bd8151":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0e566cd2a6ed45bf8c0feabef523ee59":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"72a3cf500fd34d9bade3055360bc3d8e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b6aacba6d11d49cabac66c42049319bc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"614bc7497c84455197017396a30e5501":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e697bc9541244dcaa063bce1596f27a4":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4dda7398d8594ccc9056276f7086abb3","IPY_MODEL_fa7f5b5dea4b4f77b8b28f646b54df4d","IPY_MODEL_713789465a484c75858710654d5f34d9"],"layout":"IPY_MODEL_ba9ec78d22d14e4a8c1d9c8a0c35f696"}},"4dda7398d8594ccc9056276f7086abb3":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e3c765f08dd84843a56884e7c347f70a","placeholder":"โ€‹","style":"IPY_MODEL_0bc6d042f62e406992a811fe915c99af","value":"Fetching 1 files: 100%"}},"fa7f5b5dea4b4f77b8b28f646b54df4d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_4ac8c0103af949bb9f6bd1d7cdf34031","max":1,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5d38797e81ad4a6dab19d67e0a24e0cf","value":1}},"713789465a484c75858710654d5f34d9":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d02d6c4b5f754706b83563becedbc163","placeholder":"โ€‹","style":"IPY_MODEL_f6756d58b1854092a11dc0648bb62ec3","value":" 1/1 [00:00<00:00, 1.64it/s]"}},"ba9ec78d22d14e4a8c1d9c8a0c35f696":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e3c765f08dd84843a56884e7c347f70a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0bc6d042f62e406992a811fe915c99af":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4ac8c0103af949bb9f6bd1d7cdf34031":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5d38797e81ad4a6dab19d67e0a24e0cf":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"d02d6c4b5f754706b83563becedbc163":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f6756d58b1854092a11dc0648bb62ec3":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2455e834526a41e798e7368d26b1e47a":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0bf9e3901a1d43b9ae5ec1d7c91dec3e","IPY_MODEL_ecc1104e2cd94b77b04e3a05ed024a8d","IPY_MODEL_213cc41ca4434c1fab2a80b9fe20dbc4"],"layout":"IPY_MODEL_b75c940c972a48298593633c7647771c"}},"0bf9e3901a1d43b9ae5ec1d7c91dec3e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4d855e8be73b44699a632b7b1c913824","placeholder":"โ€‹","style":"IPY_MODEL_625bba259547431e906659738a263c54","value":"Downloading (โ€ฆ)94191/.gitattributes: 100%"}},"ecc1104e2cd94b77b04e3a05ed024a8d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ca610db4b8c04bfeb043050345454fab","max":1519,"min":0,"orientation":"horizontal","style":"IPY_MODEL_eeb4422d56d14b70a3b6b3253f851003","value":1519}},"213cc41ca4434c1fab2a80b9fe20dbc4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e37dcb6119d04d11adc2efd051b43ecd","placeholder":"โ€‹","style":"IPY_MODEL_309b664e615e43298fe1d15f733f6ba1","value":" 1.52k/1.52k [00:00<00:00, 45.7kB/s]"}},"b75c940c972a48298593633c7647771c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4d855e8be73b44699a632b7b1c913824":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"625bba259547431e906659738a263c54":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ca610db4b8c04bfeb043050345454fab":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eeb4422d56d14b70a3b6b3253f851003":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e37dcb6119d04d11adc2efd051b43ecd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"309b664e615e43298fe1d15f733f6ba1":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a5f463ba70bb477f9f407e9cd0f66bb2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_f113595cdc3f4968800334139d28cde8","IPY_MODEL_f3ef64aa553840feab4b0a1fcc288d86","IPY_MODEL_f3d8b3510494430484097dd72b9fa95d"],"layout":"IPY_MODEL_dcb89e796a5c43f7b5899ea12243235d"}},"f113595cdc3f4968800334139d28cde8":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_eab72c7103c14e8d9027dd2651bdbd3c","placeholder":"โ€‹","style":"IPY_MODEL_b992492d93b5486ebcfd33126a56400d","value":"100%"}},"f3ef64aa553840feab4b0a1fcc288d86":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_a9e83f7bef3843b999f120332f791c85","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_002591ac452b4a4683a798a95b5bcc4f","value":100}},"f3d8b3510494430484097dd72b9fa95d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f8fac9672368448db9ed4c7d04f067af","placeholder":"โ€‹","style":"IPY_MODEL_bfb9222136364ef793db561b60b8dcf5","value":" 100/100 [00:00<00:00, 858.08it/s]"}},"dcb89e796a5c43f7b5899ea12243235d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eab72c7103c14e8d9027dd2651bdbd3c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b992492d93b5486ebcfd33126a56400d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"a9e83f7bef3843b999f120332f791c85":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"002591ac452b4a4683a798a95b5bcc4f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f8fac9672368448db9ed4c7d04f067af":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bfb9222136364ef793db561b60b8dcf5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2e1ca4c549d249d5b5043ef9c1c3f919":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_9af46f486d1942388114bd9d795c1211","IPY_MODEL_8923229253dd4919937143b0cab1138e","IPY_MODEL_517c081d6608445bbc0b645865c85680"],"layout":"IPY_MODEL_9f4d341ac2b04623a80cb87de091c8e7"}},"9af46f486d1942388114bd9d795c1211":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5efb90d5297f4e06b1425d3bd9cef568","placeholder":"โ€‹","style":"IPY_MODEL_eff5618b6647405996bb4a46d8c0b767","value":"100%"}},"8923229253dd4919937143b0cab1138e":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_fbb4ab140ce24c69a9f19f3e10bae77d","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d2c972960c194d88bbb7b3018331aec3","value":100}},"517c081d6608445bbc0b645865c85680":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c2983191da0e4b6f9f8857455200aa83","placeholder":"โ€‹","style":"IPY_MODEL_f5228016014a4413af718462fa11b3f3","value":" 100/100 [00:00<00:00, 1353.43it/s]"}},"9f4d341ac2b04623a80cb87de091c8e7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5efb90d5297f4e06b1425d3bd9cef568":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eff5618b6647405996bb4a46d8c0b767":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fbb4ab140ce24c69a9f19f3e10bae77d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d2c972960c194d88bbb7b3018331aec3":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c2983191da0e4b6f9f8857455200aa83":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f5228016014a4413af718462fa11b3f3":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fe714ee300f141fd9f7a517f1c289cef":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_84961d5178fe419db1e61d8e128e73a9","IPY_MODEL_fdcdc72928f343128f27143d8e01eeab","IPY_MODEL_7100dbb5532a4e268b510e81a828dabc"],"layout":"IPY_MODEL_1615b15e28b34dbb955e26d30502025d"}},"84961d5178fe419db1e61d8e128e73a9":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dfbb1f86a79c4f97bc59dc276ed9c79c","placeholder":"โ€‹","style":"IPY_MODEL_7f20fc7b659c40cdbfc4919f22d1be7e","value":"q-learning.pkl: 100%"}},"fdcdc72928f343128f27143d8e01eeab":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_115600bc5a1b41a1899ce6c955f2feff","max":24572,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d5872793d51541289fbfb88601525d22","value":24572}},"7100dbb5532a4e268b510e81a828dabc":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_97321a1444e14dcda5ca10bfd6ae8fb1","placeholder":"โ€‹","style":"IPY_MODEL_023d6586ca674d3d9557b965c4c270cc","value":" 24.6k/24.6k [00:00<00:00, 50.3kB/s]"}},"1615b15e28b34dbb955e26d30502025d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dfbb1f86a79c4f97bc59dc276ed9c79c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7f20fc7b659c40cdbfc4919f22d1be7e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"115600bc5a1b41a1899ce6c955f2feff":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d5872793d51541289fbfb88601525d22":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"97321a1444e14dcda5ca10bfd6ae8fb1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"023d6586ca674d3d9557b965c4c270cc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"db4eb369c18749fab4b992626b0d7ee2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_eee8dc2819414ec8b385cdaf9ca31679","IPY_MODEL_d8300151209b4bf69dd40293c4829c68","IPY_MODEL_78ac941b16744bba8546042f2e1be9fa"],"layout":"IPY_MODEL_a96ade3a25cb4fab9696e00f1d56249b"}},"eee8dc2819414ec8b385cdaf9ca31679":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8679d7521cdf41c6bc090635f507e9d3","placeholder":"โ€‹","style":"IPY_MODEL_2545f2ad186d45f7a779f423959ebfa5","value":"Downloading (โ€ฆ)/main/q-learning.pkl: 100%"}},"d8300151209b4bf69dd40293c4829c68":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1ff751ff2be5462a825a3a85a991c9c2","max":24589,"min":0,"orientation":"horizontal","style":"IPY_MODEL_033b057020714f4b8e8d93fb450fb5e2","value":24589}},"78ac941b16744bba8546042f2e1be9fa":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b8e75d3f9be8427a82c61a6480bc7f78","placeholder":"โ€‹","style":"IPY_MODEL_348c3154cb1344fb9a6db7f5be8d3e72","value":" 24.6k/24.6k [00:00<00:00, 1.18MB/s]"}},"a96ade3a25cb4fab9696e00f1d56249b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8679d7521cdf41c6bc090635f507e9d3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2545f2ad186d45f7a779f423959ebfa5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1ff751ff2be5462a825a3a85a991c9c2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"033b057020714f4b8e8d93fb450fb5e2":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"b8e75d3f9be8427a82c61a6480bc7f78":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"348c3154cb1344fb9a6db7f5be8d3e72":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f818282ac4ef46fe8f073e997e2364e5":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4407635f19394c4cab829aa77324d44f","IPY_MODEL_cf18d12d46a04fd4a99fe80819ba7a7b","IPY_MODEL_60c019c2adf745c8aa616a7269f03ac5"],"layout":"IPY_MODEL_3a86d2cd279049b38ba189824cafeae6"}},"4407635f19394c4cab829aa77324d44f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cd38950d068f46e38cb085847661515d","placeholder":"โ€‹","style":"IPY_MODEL_615e0eed539c4b2c88449b9cf9a698d8","value":"100%"}},"cf18d12d46a04fd4a99fe80819ba7a7b":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_7e97a0cc37b9427d820a184b88ffe64e","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_34ab33b6a93147c6a776f2c8e41198c2","value":100}},"60c019c2adf745c8aa616a7269f03ac5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3f6d00d90d384d768a7712cff9adf03f","placeholder":"โ€‹","style":"IPY_MODEL_a3136bd489b94a91a248eb08faddf549","value":" 100/100 [00:00<00:00, 1102.31it/s]"}},"3a86d2cd279049b38ba189824cafeae6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cd38950d068f46e38cb085847661515d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"615e0eed539c4b2c88449b9cf9a698d8":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7e97a0cc37b9427d820a184b88ffe64e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"34ab33b6a93147c6a776f2c8e41198c2":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"3f6d00d90d384d768a7712cff9adf03f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a3136bd489b94a91a248eb08faddf549":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"981ab71773204dccb4e8a323dd97efc0":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e8ccf2360f654f92941de06d5b98cb3f","IPY_MODEL_e16a5839a4f4449abe265f3f2cfa6261","IPY_MODEL_429e7232f611491385805a4dd1c92e7a"],"layout":"IPY_MODEL_e5e43e14975a4b2685da1fada90ce101"}},"e8ccf2360f654f92941de06d5b98cb3f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dc5b85f0ca2a41b6914ccda37b6e0438","placeholder":"โ€‹","style":"IPY_MODEL_b987cbd3a33a4110b5c9d7b3ce333b45","value":"Downloading q-learning.pkl: 100%"}},"e16a5839a4f4449abe265f3f2cfa6261":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_c7dbd12a1e3a4691a956c7e7476d4912","max":933,"min":0,"orientation":"horizontal","style":"IPY_MODEL_af27db0b6e5546028c4518df44ef6449","value":933}},"429e7232f611491385805a4dd1c92e7a":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4b4697fc763a48c592f8f494f37b558d","placeholder":"โ€‹","style":"IPY_MODEL_4e7e917d38f64b1e8ef08c224c0ddee7","value":" 933/933 [00:00<00:00, 50.0kB/s]"}},"e5e43e14975a4b2685da1fada90ce101":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dc5b85f0ca2a41b6914ccda37b6e0438":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b987cbd3a33a4110b5c9d7b3ce333b45":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c7dbd12a1e3a4691a956c7e7476d4912":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"af27db0b6e5546028c4518df44ef6449":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4b4697fc763a48c592f8f494f37b558d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4e7e917d38f64b1e8ef08c224c0ddee7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5876625ed27c45bf9e15631657f13565":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_8099cffb2b204cfb92cddd328965e152","IPY_MODEL_804fbde59727408590095db9c5bdad35","IPY_MODEL_b97d7581248344998347aa3bc62b4f4d"],"layout":"IPY_MODEL_ff9c6ed16e2545aabdcecfeb620f2d5d"}},"8099cffb2b204cfb92cddd328965e152":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_dbe820d70e8f480babd7b15e80c3356b","placeholder":"โ€‹","style":"IPY_MODEL_fc0477020d8b4448aa6ddbb35acdb942","value":"100%"}},"804fbde59727408590095db9c5bdad35":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1b9185b2dd6447b8b5324029374ea8c8","max":100,"min":0,"orientation":"horizontal","style":"IPY_MODEL_74f997c338c844b28d6eb5279634d779","value":100}},"b97d7581248344998347aa3bc62b4f4d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_047d00fb6d06450d9e72081111324ac8","placeholder":"โ€‹","style":"IPY_MODEL_02936f5cd8ea48c8af1eb0cee49ddcd3","value":" 100/100 [00:00<00:00, 2042.39it/s]"}},"ff9c6ed16e2545aabdcecfeb620f2d5d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dbe820d70e8f480babd7b15e80c3356b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fc0477020d8b4448aa6ddbb35acdb942":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1b9185b2dd6447b8b5324029374ea8c8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"74f997c338c844b28d6eb5279634d779":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"047d00fb6d06450d9e72081111324ac8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"02936f5cd8ea48c8af1eb0cee49ddcd3":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0} \ No newline at end of file diff --git a/HF DeepRL Course/Unit3 - Deep Q-Learning.ipynb b/HF DeepRL Course/Unit3 - Deep Q-Learning.ipynb new file mode 100644 index 0000000..39a6308 --- /dev/null +++ b/HF DeepRL Course/Unit3 - Deep Q-Learning.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"k7xBVPzoXxOg"},"source":["# Unit 3: Deep Q-Learning with Atari Games ๐Ÿ‘พ using RL Baselines3 Zoo\n","\n","\"Unit\n","\n","In this notebook, **you'll train a Deep Q-Learning agent** playing Space Invaders using [RL Baselines3 Zoo](https://github.com/DLR-RM/rl-baselines3-zoo), a training framework based on [Stable-Baselines3](https://stable-baselines3.readthedocs.io/en/master/) that provides scripts for training, evaluating agents, tuning hyperparameters, plotting results and recording videos.\n","\n","We're using the [RL-Baselines-3 Zoo integration, a vanilla version of Deep Q-Learning](https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html) with no extensions such as Double-DQN, Dueling-DQN, and Prioritized Experience Replay.\n","\n","โฌ‡๏ธ Here is an example of what **you will achieve** โฌ‡๏ธ"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"J9S713biXntc","colab":{"base_uri":"https://localhost:8080/","height":231},"executionInfo":{"status":"ok","timestamp":1696584508361,"user_tz":-60,"elapsed":7,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"16b8d973-4da5-4222-efb5-ad0f924a53ef"},"outputs":[{"output_type":"display_data","data":{"text/plain":[""],"text/html":["\n"]},"metadata":{}}],"source":["%%html\n",""]},{"cell_type":"markdown","source":["### ๐ŸŽฎ Environments:\n","\n","- [SpacesInvadersNoFrameskip-v4](https://gymnasium.farama.org/environments/atari/space_invaders/)\n","\n","You can see the difference between Space Invaders versions here ๐Ÿ‘‰ https://gymnasium.farama.org/environments/atari/space_invaders/#variants\n","\n","### ๐Ÿ“š RL-Library:\n","\n","- [RL-Baselines3-Zoo](https://github.com/DLR-RM/rl-baselines3-zoo)"],"metadata":{"id":"ykJiGevCMVc5"}},{"cell_type":"markdown","metadata":{"id":"wciHGjrFYz9m"},"source":["## Objectives of this notebook ๐Ÿ†\n","At the end of the notebook, you will:\n","- Be able to understand deeper **how RL Baselines3 Zoo works**.\n","- Be able to **push your trained agent and the code to the Hub** with a nice video replay and an evaluation score ๐Ÿ”ฅ.\n","\n","\n"]},{"cell_type":"markdown","source":["## This notebook is from Deep Reinforcement Learning Course\n","\"Deep"],"metadata":{"id":"TsnP0rjxMn1e"}},{"cell_type":"markdown","metadata":{"id":"nw6fJHIAZd-J"},"source":["In this free course, you will:\n","\n","- ๐Ÿ“– Study Deep Reinforcement Learning in **theory and practice**.\n","- ๐Ÿง‘โ€๐Ÿ’ป Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- ๐Ÿค– Train **agents in unique environments**\n","\n","And more check ๐Ÿ“š the syllabus ๐Ÿ‘‰ https://simoninithomas.github.io/deep-rl-course\n","\n","Donโ€™t forget to **sign up to the course** (we are collecting your email to be able toย **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","\n","The best way to keep in touch is to join our discord server to exchange with the community and with us ๐Ÿ‘‰๐Ÿป https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"0vgANIBBZg1p"},"source":["## Prerequisites ๐Ÿ—๏ธ\n","Before diving into the notebook, you need to:\n","\n","๐Ÿ”ฒ ๐Ÿ“š **[Study Deep Q-Learning by reading Unit 3](https://huggingface.co/deep-rl-course/unit3/introduction)** ๐Ÿค—"]},{"cell_type":"markdown","source":["We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the Github Repo](https://github.com/huggingface/deep-rl-class/issues)."],"metadata":{"id":"7kszpGFaRVhq"}},{"cell_type":"markdown","metadata":{"id":"QR0jZtYreSI5"},"source":["# Let's train a Deep Q-Learning agent playing Atari' Space Invaders ๐Ÿ‘พ and upload it to the Hub.\n","\n","We strongly recommend students **to use Google Colab for the hands-on exercises instead of running them on their personal computers**.\n","\n","By using Google Colab, **you can focus on learning and experimenting without worrying about the technical aspects of setting up your environments**.\n","\n","To validate this hands-on for the certification process, you need to push your trained model to the Hub and **get a result of >= 200**.\n","\n","To find your result, go to the leaderboard and find your model, **the result = mean_reward - std of reward**\n","\n","For more information about the certification process, check this section ๐Ÿ‘‰ https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"]},{"cell_type":"markdown","source":["## An advice ๐Ÿ’ก\n","It's better to run this colab in a copy on your Google Drive, so that **if it timeouts** you still have the saved notebook on your Google Drive and do not need to fill everything from scratch.\n","\n","To do that you can either do `Ctrl + S` or `File > Save a copy in Google Drive.`\n","\n","Also, we're going to **train it for 90 minutes with 1M timesteps**. By typing `!nvidia-smi` will tell you what GPU you're using.\n","\n","And if you want to train more such 10 million steps, this will take about 9 hours, potentially resulting in Colab timing out. In that case, I recommend running this on your local computer (or somewhere else). Just click on: `File>Download`."],"metadata":{"id":"Nc8BnyVEc3Ys"}},{"cell_type":"code","source":["!nvidia-smi"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"RA9lvEMQUdKi","executionInfo":{"status":"ok","timestamp":1696584561299,"user_tz":-60,"elapsed":1143,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"0bc66424-a1b1-40bf-c3a5-8e2d70917136"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Fri Oct 6 09:29:19 2023 \n","+-----------------------------------------------------------------------------+\n","| NVIDIA-SMI 525.105.17 Driver Version: 525.105.17 CUDA Version: 12.0 |\n","|-------------------------------+----------------------+----------------------+\n","| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n","| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n","| | | MIG M. |\n","|===============================+======================+======================|\n","| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n","| N/A 51C P8 9W / 70W | 0MiB / 15360MiB | 0% Default |\n","| | | N/A |\n","+-------------------------------+----------------------+----------------------+\n"," \n","+-----------------------------------------------------------------------------+\n","| Processes: |\n","| GPU GI CI PID Type Process name GPU Memory |\n","| ID ID Usage |\n","|=============================================================================|\n","| No running processes found |\n","+-----------------------------------------------------------------------------+\n"]}]},{"cell_type":"markdown","source":["## Set the GPU ๐Ÿ’ช\n","- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n","\n","\"GPU"],"metadata":{"id":"PU4FVzaoM6fC"}},{"cell_type":"markdown","source":["- `Hardware Accelerator > GPU`\n","\n","\"GPU"],"metadata":{"id":"KV0NyFdQM9ZG"}},{"cell_type":"markdown","source":["# Install RL-Baselines3 Zoo and its dependencies ๐Ÿ“š\n","\n","If you see `ERROR: pip's dependency resolver does not currently take into account all the packages that are installed.` **this is normal and it's not a critical error** there's a conflict of version. But the packages we need are installed."],"metadata":{"id":"wS_cVefO-aYg"}},{"cell_type":"code","source":["# For now we install this update of RL-Baselines3 Zoo\n","!pip install git+https://github.com/DLR-RM/rl-baselines3-zoo@update/hf"],"metadata":{"id":"hLTwHqIWdnPb","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696584719529,"user_tz":-60,"elapsed":143277,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"939d6eff-14d2-4d66-d51a-b33b10af71d4"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting git+https://github.com/DLR-RM/rl-baselines3-zoo@update/hf\n"," Cloning https://github.com/DLR-RM/rl-baselines3-zoo (to revision update/hf) to /tmp/pip-req-build-p3lz3yfc\n"," Running command git clone --filter=blob:none --quiet https://github.com/DLR-RM/rl-baselines3-zoo /tmp/pip-req-build-p3lz3yfc\n"," Running command git checkout -b update/hf --track origin/update/hf\n"," Switched to a new branch 'update/hf'\n"," Branch 'update/hf' set up to track remote branch 'update/hf' from 'origin'.\n"," Resolved https://github.com/DLR-RM/rl-baselines3-zoo to commit 7dcbff7e74e7a12c052452181ff353a4dbed313a\n"," Running command git submodule update --init --recursive -q\n"," Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n"," Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n"," Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Collecting sb3-contrib>=2.0.0a9 (from rl-zoo3==2.0.0a9)\n"," Downloading sb3_contrib-2.1.0-py3-none-any.whl (80 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m80.3/80.3 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting gym==0.26.2 (from rl-zoo3==2.0.0a9)\n"," Downloading gym-0.26.2.tar.gz (721 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m721.7/721.7 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n"," Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n"," Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Collecting huggingface-sb3>=2.2.1 (from rl-zoo3==2.0.0a9)\n"," Downloading huggingface_sb3-3.0-py3-none-any.whl (9.7 kB)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from rl-zoo3==2.0.0a9) (4.66.1)\n","Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from rl-zoo3==2.0.0a9) (13.6.0)\n","Collecting optuna (from rl-zoo3==2.0.0a9)\n"," Downloading optuna-3.3.0-py3-none-any.whl (404 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m404.2/404.2 kB\u001b[0m \u001b[31m29.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from rl-zoo3==2.0.0a9) (6.0.1)\n","Collecting pytablewriter~=0.64 (from rl-zoo3==2.0.0a9)\n"," Downloading pytablewriter-0.64.2-py3-none-any.whl (106 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m106.6/106.6 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy>=1.18.0 in /usr/local/lib/python3.10/dist-packages (from gym==0.26.2->rl-zoo3==2.0.0a9) (1.23.5)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gym==0.26.2->rl-zoo3==2.0.0a9) (2.2.1)\n","Requirement already satisfied: gym-notices>=0.0.4 in /usr/local/lib/python3.10/dist-packages (from gym==0.26.2->rl-zoo3==2.0.0a9) (0.0.8)\n","Collecting huggingface-hub~=0.8 (from huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9)\n"," Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: wasabi in /usr/local/lib/python3.10/dist-packages (from huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (1.1.2)\n","Requirement already satisfied: setuptools>=38.3.0 in /usr/local/lib/python3.10/dist-packages (from pytablewriter~=0.64->rl-zoo3==2.0.0a9) (67.7.2)\n","Collecting DataProperty<2,>=0.55.0 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n"," Downloading DataProperty-1.0.1-py3-none-any.whl (27 kB)\n","Collecting mbstrdecoder<2,>=1.0.0 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n"," Downloading mbstrdecoder-1.1.3-py3-none-any.whl (7.8 kB)\n","Collecting pathvalidate<3,>=2.3.0 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n"," Downloading pathvalidate-2.5.2-py3-none-any.whl (20 kB)\n","Collecting tabledata<2,>=1.3.0 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n"," Downloading tabledata-1.3.3-py3-none-any.whl (11 kB)\n","Collecting tcolorpy<1,>=0.0.5 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n"," Downloading tcolorpy-0.1.4-py3-none-any.whl (7.9 kB)\n","Collecting typepy[datetime]<2,>=1.2.0 (from pytablewriter~=0.64->rl-zoo3==2.0.0a9)\n"," Downloading typepy-1.3.2-py3-none-any.whl (31 kB)\n","Collecting stable-baselines3>=2.1.0 (from sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9)\n"," Downloading stable_baselines3-2.1.0-py3-none-any.whl (178 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m178.7/178.7 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting alembic>=1.5.0 (from optuna->rl-zoo3==2.0.0a9)\n"," Downloading alembic-1.12.0-py3-none-any.whl (226 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m226.0/226.0 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting cmaes>=0.10.0 (from optuna->rl-zoo3==2.0.0a9)\n"," Downloading cmaes-0.10.0-py3-none-any.whl (29 kB)\n","Collecting colorlog (from optuna->rl-zoo3==2.0.0a9)\n"," Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from optuna->rl-zoo3==2.0.0a9) (23.2)\n","Requirement already satisfied: sqlalchemy>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from optuna->rl-zoo3==2.0.0a9) (2.0.21)\n","Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->rl-zoo3==2.0.0a9) (3.0.0)\n","Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->rl-zoo3==2.0.0a9) (2.16.1)\n","Collecting Mako (from alembic>=1.5.0->optuna->rl-zoo3==2.0.0a9)\n"," Downloading Mako-1.2.4-py3-none-any.whl (78 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m78.7/78.7 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: typing-extensions>=4 in /usr/local/lib/python3.10/dist-packages (from alembic>=1.5.0->optuna->rl-zoo3==2.0.0a9) (4.5.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (3.12.4)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (2023.6.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (2.31.0)\n","Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->rl-zoo3==2.0.0a9) (0.1.2)\n","Requirement already satisfied: chardet<6,>=3.0.4 in /usr/local/lib/python3.10/dist-packages (from mbstrdecoder<2,>=1.0.0->pytablewriter~=0.64->rl-zoo3==2.0.0a9) (5.2.0)\n","Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy>=1.3.0->optuna->rl-zoo3==2.0.0a9) (3.0.0)\n","Collecting gymnasium<0.30,>=0.28.1 (from stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9)\n"," Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m953.9/953.9 kB\u001b[0m \u001b[31m49.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: torch>=1.13 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (2.0.1+cu118)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (1.5.3)\n","Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (3.7.1)\n","Requirement already satisfied: python-dateutil<3.0.0,>=2.8.0 in /usr/local/lib/python3.10/dist-packages (from typepy[datetime]<2,>=1.2.0->pytablewriter~=0.64->rl-zoo3==2.0.0a9) (2.8.2)\n","Requirement already satisfied: pytz>=2018.9 in /usr/local/lib/python3.10/dist-packages (from typepy[datetime]<2,>=1.2.0->pytablewriter~=0.64->rl-zoo3==2.0.0a9) (2023.3.post1)\n","Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9)\n"," Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3.0.0,>=2.8.0->typepy[datetime]<2,>=1.2.0->pytablewriter~=0.64->rl-zoo3==2.0.0a9) (1.16.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (3.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (3.1.2)\n","Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (2.0.0)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (3.27.6)\n","Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (17.0.2)\n","Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from Mako->alembic>=1.5.0->optuna->rl-zoo3==2.0.0a9) (2.1.3)\n","Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (1.1.1)\n","Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (0.12.0)\n","Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (4.43.0)\n","Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (1.4.5)\n","Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (9.4.0)\n","Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (3.1.1)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (2.0.6)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface-sb3>=2.2.1->rl-zoo3==2.0.0a9) (2023.7.22)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13->stable-baselines3>=2.1.0->sb3-contrib>=2.0.0a9->rl-zoo3==2.0.0a9) (1.3.0)\n","Building wheels for collected packages: rl-zoo3, gym\n"," Building wheel for rl-zoo3 (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for rl-zoo3: filename=rl_zoo3-2.0.0a9-py3-none-any.whl size=76401 sha256=e9df8ae954f8408f3ca58502acb807a37aad1aff44f6723c00d77a0e02868346\n"," Stored in directory: /tmp/pip-ephem-wheel-cache-uqr6j5hl/wheels/fc/36/d5/2ef574649d85327de098075c8523da50be2612f3e5807261f7\n"," Building wheel for gym (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for gym: filename=gym-0.26.2-py3-none-any.whl size=827621 sha256=7d6a0506ad85e4f801ff76aaa17b47c3404f04d8fb7966e4c9c78c73b7e9cf13\n"," Stored in directory: /root/.cache/pip/wheels/b9/22/6d/3e7b32d98451b4cd9d12417052affbeeeea012955d437da1da\n","Successfully built rl-zoo3 gym\n","Installing collected packages: farama-notifications, tcolorpy, pathvalidate, mbstrdecoder, Mako, gymnasium, gym, colorlog, cmaes, typepy, huggingface-hub, alembic, optuna, huggingface-sb3, DataProperty, tabledata, pytablewriter, stable-baselines3, sb3-contrib, rl-zoo3\n"," Attempting uninstall: gym\n"," Found existing installation: gym 0.25.2\n"," Uninstalling gym-0.25.2:\n"," Successfully uninstalled gym-0.25.2\n","\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n","dopamine-rl 4.0.6 requires gym<=0.25.2, but you have gym 0.26.2 which is incompatible.\u001b[0m\u001b[31m\n","\u001b[0mSuccessfully installed DataProperty-1.0.1 Mako-1.2.4 alembic-1.12.0 cmaes-0.10.0 colorlog-6.7.0 farama-notifications-0.0.4 gym-0.26.2 gymnasium-0.29.1 huggingface-hub-0.17.3 huggingface-sb3-3.0 mbstrdecoder-1.1.3 optuna-3.3.0 pathvalidate-2.5.2 pytablewriter-0.64.2 rl-zoo3-2.0.0a9 sb3-contrib-2.1.0 stable-baselines3-2.1.0 tabledata-1.3.3 tcolorpy-0.1.4 typepy-1.3.2\n"]}]},{"cell_type":"markdown","source":["IF AND ONLY IF THE VERSION ABOVE DOES NOT EXIST ANYMORE. UNCOMMENT AND INSTALL THE ONE BELOW"],"metadata":{"id":"p0xe2sJHdtHy"}},{"cell_type":"code","source":["#!pip install rl_zoo3==2.0.0a9"],"metadata":{"id":"N0d6wy-F-f39"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["!apt-get install swig cmake ffmpeg"],"metadata":{"id":"8_MllY6Om1eI","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696584785636,"user_tz":-60,"elapsed":6952,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"3405c67f-4e42-4f75-bd6a-cd4b86450068"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Reading package lists... Done\n","Building dependency tree... Done\n","Reading state information... Done\n","cmake is already the newest version (3.22.1-1ubuntu1.22.04.1).\n","ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).\n","The following additional packages will be installed:\n"," swig4.0\n","Suggested packages:\n"," swig-doc swig-examples swig4.0-examples swig4.0-doc\n","The following NEW packages will be installed:\n"," swig swig4.0\n","0 upgraded, 2 newly installed, 0 to remove and 18 not upgraded.\n","Need to get 1,116 kB of archives.\n","After this operation, 5,542 kB of additional disk space will be used.\n","Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig4.0 amd64 4.0.2-1ubuntu1 [1,110 kB]\n","Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig all 4.0.2-1ubuntu1 [5,632 B]\n","Fetched 1,116 kB in 2s (697 kB/s)\n","Selecting previously unselected package swig4.0.\n","(Reading database ... 120879 files and directories currently installed.)\n","Preparing to unpack .../swig4.0_4.0.2-1ubuntu1_amd64.deb ...\n","Unpacking swig4.0 (4.0.2-1ubuntu1) ...\n","Selecting previously unselected package swig.\n","Preparing to unpack .../swig_4.0.2-1ubuntu1_all.deb ...\n","Unpacking swig (4.0.2-1ubuntu1) ...\n","Setting up swig4.0 (4.0.2-1ubuntu1) ...\n","Setting up swig (4.0.2-1ubuntu1) ...\n","Processing triggers for man-db (2.10.2-1) ...\n"]}]},{"cell_type":"markdown","metadata":{"id":"4S9mJiKg6SqC"},"source":["To be able to use Atari games in Gymnasium we need to install atari package. And accept-rom-license to download the rom files (games files)."]},{"cell_type":"code","source":["!pip install gymnasium[atari]\n","!pip install gymnasium[accept-rom-license]"],"metadata":{"id":"NsRP-lX1_2fC","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696584805712,"user_tz":-60,"elapsed":20086,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"2a748c7e-dde5-49d6-bc7d-03ad11cf56d1"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: gymnasium[atari] in /usr/local/lib/python3.10/dist-packages (0.29.1)\n","Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[atari]) (1.23.5)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[atari]) (2.2.1)\n","Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[atari]) (4.5.0)\n","Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium[atari]) (0.0.4)\n","Collecting shimmy[atari]<1.0,>=0.1.0 (from gymnasium[atari])\n"," Downloading Shimmy-0.2.1-py3-none-any.whl (25 kB)\n","Collecting ale-py~=0.8.1 (from shimmy[atari]<1.0,>=0.1.0->gymnasium[atari])\n"," Downloading ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m18.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from ale-py~=0.8.1->shimmy[atari]<1.0,>=0.1.0->gymnasium[atari]) (6.1.0)\n","Installing collected packages: ale-py, shimmy\n","Successfully installed ale-py-0.8.1 shimmy-0.2.1\n","Requirement already satisfied: gymnasium[accept-rom-license] in /usr/local/lib/python3.10/dist-packages (0.29.1)\n","Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[accept-rom-license]) (1.23.5)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[accept-rom-license]) (2.2.1)\n","Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium[accept-rom-license]) (4.5.0)\n","Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium[accept-rom-license]) (0.0.4)\n","Collecting autorom[accept-rom-license]~=0.4.2 (from gymnasium[accept-rom-license])\n"," Downloading AutoROM-0.4.2-py3-none-any.whl (16 kB)\n","Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (8.1.7)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (2.31.0)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (4.66.1)\n","Collecting AutoROM.accept-rom-license (from autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license])\n"," Downloading AutoROM.accept-rom-license-0.6.1.tar.gz (434 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m434.7/434.7 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n"," Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n"," Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (2.0.6)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.4.2->gymnasium[accept-rom-license]) (2023.7.22)\n","Building wheels for collected packages: AutoROM.accept-rom-license\n"," Building wheel for AutoROM.accept-rom-license (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for AutoROM.accept-rom-license: filename=AutoROM.accept_rom_license-0.6.1-py3-none-any.whl size=446660 sha256=fd1a9e89131c49b793e331349960b5f3eefb0d31da169f9c487807a8dcb1a296\n"," Stored in directory: /root/.cache/pip/wheels/6b/1b/ef/a43ff1a2f1736d5711faa1ba4c1f61be1131b8899e6a057811\n","Successfully built AutoROM.accept-rom-license\n","Installing collected packages: AutoROM.accept-rom-license, autorom\n","Successfully installed AutoROM.accept-rom-license-0.6.1 autorom-0.4.2\n"]}]},{"cell_type":"markdown","source":["## Create a virtual display ๐Ÿ”ฝ\n","\n","During the notebook, we'll need to generate a replay video. To do so, with colab, **we need to have a virtual screen to be able to render the environment** (and thus record the frames).\n","\n","Hence the following cell will install the librairies and create and run a virtual screen ๐Ÿ–ฅ"],"metadata":{"id":"bTpYcVZVMzUI"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"jV6wjQ7Be7p5"},"outputs":[],"source":["%%capture\n","!apt install python-opengl\n","!apt install ffmpeg\n","!apt install xvfb\n","!pip3 install pyvirtualdisplay"]},{"cell_type":"code","source":["# Virtual display\n","from pyvirtualdisplay import Display\n","\n","virtual_display = Display(visible=0, size=(1400, 900))\n","virtual_display.start()"],"metadata":{"id":"BE5JWP5rQIKf","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696584826034,"user_tz":-60,"elapsed":1592,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"62c5a796-5942-4eab-a115-a45548801126"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"5iPgzluo9z-u"},"source":["## Train our Deep Q-Learning Agent to Play Space Invaders ๐Ÿ‘พ\n","\n","To train an agent with RL-Baselines3-Zoo, we just need to do two things:\n","\n","1. Create a hyperparameter config file that will contain our training hyperparameters called `dqn.yml`.\n","\n","This is a template example:\n","\n","```\n","SpaceInvadersNoFrameskip-v4:\n"," env_wrapper:\n"," - stable_baselines3.common.atari_wrappers.AtariWrapper\n"," frame_stack: 4\n"," policy: 'CnnPolicy'\n"," n_timesteps: !!float 1e7\n"," buffer_size: 100000\n"," learning_rate: !!float 1e-4\n"," batch_size: 32\n"," learning_starts: 100000\n"," target_update_interval: 1000\n"," train_freq: 4\n"," gradient_steps: 1\n"," exploration_fraction: 0.1\n"," exploration_final_eps: 0.01\n"," # If True, you need to deactivate handle_timeout_termination\n"," # in the replay_buffer_kwargs\n"," optimize_memory_usage: False\n","```"]},{"cell_type":"markdown","metadata":{"id":"_VjblFSVDQOj"},"source":["Here we see that:\n","- We use the `Atari Wrapper` that preprocess the input (Frame reduction ,grayscale, stack 4 frames)\n","- We use `CnnPolicy`, since we use Convolutional layers to process the frames\n","- We train it for 10 million `n_timesteps`\n","- Memory (Experience Replay) size is 100000, aka the amount of experience steps you saved to train again your agent with.\n","\n","๐Ÿ’ก My advice is to **reduce the training timesteps to 1M,** which will take about 90 minutes on a P100. `!nvidia-smi` will tell you what GPU you're using. At 10 million steps, this will take about 9 hours, which could likely result in Colab timing out. I recommend running this on your local computer (or somewhere else). Just click on: `File>Download`."]},{"cell_type":"markdown","metadata":{"id":"5qTkbWrkECOJ"},"source":["In terms of hyperparameters optimization, my advice is to focus on these 3 hyperparameters:\n","- `learning_rate`\n","- `buffer_size (Experience Memory size)`\n","- `batch_size`\n","\n","As a good practice, you need to **check the documentation to understand what each hyperparameters does**: https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html#parameters\n","\n"]},{"cell_type":"markdown","metadata":{"id":"Hn8bRTHvERRL"},"source":["2. We start the training and save the models on `logs` folder ๐Ÿ“\n","\n","- Define the algorithm after `--algo`, where we save the model after `-f` and where the hyperparameter config is after `-c`."]},{"cell_type":"code","execution_count":9,"metadata":{"id":"Xr1TVW4xfbz3","colab":{"base_uri":"https://localhost:8080/"},"outputId":"fba1f572-48ab-4870-d486-c99cdf4ec09a","executionInfo":{"status":"ok","timestamp":1696589371144,"user_tz":-60,"elapsed":2041815,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[{"output_type":"stream","name":"stdout","text":["\u001b[1;30;43mStreaming output truncated to the last 5000 lines.\u001b[0m\n","| n_updates | 101638 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.9e+03 |\n","| ep_rew_mean | 353 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2560 |\n","| fps | 254 |\n","| time_elapsed | 1994 |\n","| total_timesteps | 507359 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0151 |\n","| n_updates | 101839 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.91e+03 |\n","| ep_rew_mean | 356 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2564 |\n","| fps | 254 |\n","| time_elapsed | 2000 |\n","| total_timesteps | 508616 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0174 |\n","| n_updates | 102153 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.9e+03 |\n","| ep_rew_mean | 352 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2568 |\n","| fps | 254 |\n","| time_elapsed | 2004 |\n","| total_timesteps | 509632 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00915 |\n","| n_updates | 102407 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.91e+03 |\n","| ep_rew_mean | 355 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2572 |\n","| fps | 254 |\n","| time_elapsed | 2008 |\n","| total_timesteps | 510699 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0112 |\n","| n_updates | 102674 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.91e+03 |\n","| ep_rew_mean | 355 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2576 |\n","| fps | 254 |\n","| time_elapsed | 2014 |\n","| total_timesteps | 511805 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0338 |\n","| n_updates | 102951 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.92e+03 |\n","| ep_rew_mean | 358 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2580 |\n","| fps | 254 |\n","| time_elapsed | 2017 |\n","| total_timesteps | 512491 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0318 |\n","| n_updates | 103122 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.93e+03 |\n","| ep_rew_mean | 356 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2584 |\n","| fps | 254 |\n","| time_elapsed | 2022 |\n","| total_timesteps | 513724 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0167 |\n","| n_updates | 103430 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.94e+03 |\n","| ep_rew_mean | 358 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2588 |\n","| fps | 254 |\n","| time_elapsed | 2025 |\n","| total_timesteps | 514581 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.029 |\n","| n_updates | 103645 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.95e+03 |\n","| ep_rew_mean | 362 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2592 |\n","| fps | 253 |\n","| time_elapsed | 2030 |\n","| total_timesteps | 515544 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0355 |\n","| n_updates | 103885 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.95e+03 |\n","| ep_rew_mean | 361 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2596 |\n","| fps | 253 |\n","| time_elapsed | 2032 |\n","| total_timesteps | 516320 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0251 |\n","| n_updates | 104079 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.96e+03 |\n","| ep_rew_mean | 363 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2600 |\n","| fps | 253 |\n","| time_elapsed | 2038 |\n","| total_timesteps | 517705 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0274 |\n","| n_updates | 104426 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.96e+03 |\n","| ep_rew_mean | 362 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2604 |\n","| fps | 253 |\n","| time_elapsed | 2043 |\n","| total_timesteps | 518964 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0375 |\n","| n_updates | 104740 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.97e+03 |\n","| ep_rew_mean | 363 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2608 |\n","| fps | 253 |\n","| time_elapsed | 2048 |\n","| total_timesteps | 520042 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0122 |\n","| n_updates | 105010 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.96e+03 |\n","| ep_rew_mean | 362 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2612 |\n","| fps | 253 |\n","| time_elapsed | 2051 |\n","| total_timesteps | 520838 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0251 |\n","| n_updates | 105209 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.96e+03 |\n","| ep_rew_mean | 363 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2616 |\n","| fps | 253 |\n","| time_elapsed | 2056 |\n","| total_timesteps | 522069 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0218 |\n","| n_updates | 105517 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.99e+03 |\n","| ep_rew_mean | 368 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2620 |\n","| fps | 253 |\n","| time_elapsed | 2060 |\n","| total_timesteps | 523170 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0512 |\n","| n_updates | 105792 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3e+03 |\n","| ep_rew_mean | 370 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2624 |\n","| fps | 253 |\n","| time_elapsed | 2063 |\n","| total_timesteps | 523962 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0352 |\n","| n_updates | 105990 |\n","----------------------------------\n","Eval num_timesteps=525000, episode_reward=430.00 +/- 118.57\n","Episode length: 3479.40 +/- 763.89\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 3.48e+03 |\n","| mean_reward | 430 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 525000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0284 |\n","| n_updates | 106249 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.01e+03 |\n","| ep_rew_mean | 371 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2628 |\n","| fps | 252 |\n","| time_elapsed | 2079 |\n","| total_timesteps | 525273 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0263 |\n","| n_updates | 106318 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.02e+03 |\n","| ep_rew_mean | 371 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2632 |\n","| fps | 252 |\n","| time_elapsed | 2085 |\n","| total_timesteps | 526692 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0138 |\n","| n_updates | 106672 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.04e+03 |\n","| ep_rew_mean | 377 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2636 |\n","| fps | 252 |\n","| time_elapsed | 2089 |\n","| total_timesteps | 527835 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0881 |\n","| n_updates | 106958 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.04e+03 |\n","| ep_rew_mean | 377 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2640 |\n","| fps | 252 |\n","| time_elapsed | 2095 |\n","| total_timesteps | 529111 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0247 |\n","| n_updates | 107277 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.05e+03 |\n","| ep_rew_mean | 380 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2644 |\n","| fps | 252 |\n","| time_elapsed | 2100 |\n","| total_timesteps | 530435 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0463 |\n","| n_updates | 107608 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.06e+03 |\n","| ep_rew_mean | 382 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2648 |\n","| fps | 252 |\n","| time_elapsed | 2105 |\n","| total_timesteps | 531667 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0137 |\n","| n_updates | 107916 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.04e+03 |\n","| ep_rew_mean | 377 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2652 |\n","| fps | 252 |\n","| time_elapsed | 2111 |\n","| total_timesteps | 533041 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0133 |\n","| n_updates | 108260 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.05e+03 |\n","| ep_rew_mean | 378 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2656 |\n","| fps | 252 |\n","| time_elapsed | 2115 |\n","| total_timesteps | 534137 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0234 |\n","| n_updates | 108534 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.05e+03 |\n","| ep_rew_mean | 377 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2660 |\n","| fps | 252 |\n","| time_elapsed | 2119 |\n","| total_timesteps | 535101 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0122 |\n","| n_updates | 108775 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.05e+03 |\n","| ep_rew_mean | 379 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2664 |\n","| fps | 252 |\n","| time_elapsed | 2124 |\n","| total_timesteps | 536139 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0282 |\n","| n_updates | 109034 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.04e+03 |\n","| ep_rew_mean | 376 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2668 |\n","| fps | 252 |\n","| time_elapsed | 2127 |\n","| total_timesteps | 537007 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.025 |\n","| n_updates | 109251 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.04e+03 |\n","| ep_rew_mean | 379 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2672 |\n","| fps | 252 |\n","| time_elapsed | 2133 |\n","| total_timesteps | 538511 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0569 |\n","| n_updates | 109627 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.05e+03 |\n","| ep_rew_mean | 382 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2676 |\n","| fps | 252 |\n","| time_elapsed | 2136 |\n","| total_timesteps | 539141 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00827 |\n","| n_updates | 109785 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.02e+03 |\n","| ep_rew_mean | 376 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2680 |\n","| fps | 252 |\n","| time_elapsed | 2139 |\n","| total_timesteps | 539981 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0265 |\n","| n_updates | 109995 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.03e+03 |\n","| ep_rew_mean | 379 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2684 |\n","| fps | 252 |\n","| time_elapsed | 2145 |\n","| total_timesteps | 541286 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0467 |\n","| n_updates | 110321 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.07e+03 |\n","| ep_rew_mean | 387 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2688 |\n","| fps | 252 |\n","| time_elapsed | 2152 |\n","| total_timesteps | 543180 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0222 |\n","| n_updates | 110794 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.07e+03 |\n","| ep_rew_mean | 384 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2692 |\n","| fps | 252 |\n","| time_elapsed | 2156 |\n","| total_timesteps | 544149 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0196 |\n","| n_updates | 111037 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.09e+03 |\n","| ep_rew_mean | 388 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2696 |\n","| fps | 252 |\n","| time_elapsed | 2164 |\n","| total_timesteps | 546057 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0163 |\n","| n_updates | 111514 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.12e+03 |\n","| ep_rew_mean | 393 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2700 |\n","| fps | 252 |\n","| time_elapsed | 2171 |\n","| total_timesteps | 547897 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.05 |\n","| n_updates | 111974 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.13e+03 |\n","| ep_rew_mean | 397 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2704 |\n","| fps | 252 |\n","| time_elapsed | 2176 |\n","| total_timesteps | 548981 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0233 |\n","| n_updates | 112245 |\n","----------------------------------\n","Eval num_timesteps=550000, episode_reward=492.00 +/- 235.45\n","Episode length: 4043.20 +/- 1726.44\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.04e+03 |\n","| mean_reward | 492 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 550000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0169 |\n","| n_updates | 112499 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.16e+03 |\n","| ep_rew_mean | 400 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2708 |\n","| fps | 250 |\n","| time_elapsed | 2193 |\n","| total_timesteps | 550490 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0357 |\n","| n_updates | 112622 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.17e+03 |\n","| ep_rew_mean | 404 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2712 |\n","| fps | 250 |\n","| time_elapsed | 2197 |\n","| total_timesteps | 551305 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0193 |\n","| n_updates | 112826 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.18e+03 |\n","| ep_rew_mean | 407 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2716 |\n","| fps | 250 |\n","| time_elapsed | 2203 |\n","| total_timesteps | 552842 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0302 |\n","| n_updates | 113210 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.2e+03 |\n","| ep_rew_mean | 410 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2720 |\n","| fps | 250 |\n","| time_elapsed | 2207 |\n","| total_timesteps | 553871 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0118 |\n","| n_updates | 113467 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.21e+03 |\n","| ep_rew_mean | 412 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2724 |\n","| fps | 250 |\n","| time_elapsed | 2211 |\n","| total_timesteps | 554791 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0117 |\n","| n_updates | 113697 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.22e+03 |\n","| ep_rew_mean | 416 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2728 |\n","| fps | 250 |\n","| time_elapsed | 2217 |\n","| total_timesteps | 556161 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0135 |\n","| n_updates | 114040 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.23e+03 |\n","| ep_rew_mean | 416 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2732 |\n","| fps | 250 |\n","| time_elapsed | 2222 |\n","| total_timesteps | 557507 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0237 |\n","| n_updates | 114376 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.23e+03 |\n","| ep_rew_mean | 419 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2736 |\n","| fps | 250 |\n","| time_elapsed | 2229 |\n","| total_timesteps | 559166 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0167 |\n","| n_updates | 114791 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.24e+03 |\n","| ep_rew_mean | 420 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2740 |\n","| fps | 250 |\n","| time_elapsed | 2233 |\n","| total_timesteps | 560218 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0171 |\n","| n_updates | 115054 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.25e+03 |\n","| ep_rew_mean | 422 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2744 |\n","| fps | 250 |\n","| time_elapsed | 2237 |\n","| total_timesteps | 561216 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00583 |\n","| n_updates | 115303 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.25e+03 |\n","| ep_rew_mean | 421 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2748 |\n","| fps | 250 |\n","| time_elapsed | 2242 |\n","| total_timesteps | 562157 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0293 |\n","| n_updates | 115539 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.26e+03 |\n","| ep_rew_mean | 418 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2752 |\n","| fps | 250 |\n","| time_elapsed | 2254 |\n","| total_timesteps | 565085 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0196 |\n","| n_updates | 116271 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.33e+03 |\n","| ep_rew_mean | 424 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2756 |\n","| fps | 250 |\n","| time_elapsed | 2259 |\n","| total_timesteps | 566373 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0162 |\n","| n_updates | 116593 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.33e+03 |\n","| ep_rew_mean | 425 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2760 |\n","| fps | 250 |\n","| time_elapsed | 2263 |\n","| total_timesteps | 567494 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0151 |\n","| n_updates | 116873 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.33e+03 |\n","| ep_rew_mean | 425 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2764 |\n","| fps | 250 |\n","| time_elapsed | 2269 |\n","| total_timesteps | 568809 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0133 |\n","| n_updates | 117202 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.35e+03 |\n","| ep_rew_mean | 430 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2768 |\n","| fps | 250 |\n","| time_elapsed | 2273 |\n","| total_timesteps | 569830 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0314 |\n","| n_updates | 117457 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.33e+03 |\n","| ep_rew_mean | 432 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2772 |\n","| fps | 250 |\n","| time_elapsed | 2282 |\n","| total_timesteps | 572194 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0212 |\n","| n_updates | 118048 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.34e+03 |\n","| ep_rew_mean | 436 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2776 |\n","| fps | 250 |\n","| time_elapsed | 2286 |\n","| total_timesteps | 573205 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0174 |\n","| n_updates | 118301 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.34e+03 |\n","| ep_rew_mean | 436 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2780 |\n","| fps | 250 |\n","| time_elapsed | 2291 |\n","| total_timesteps | 574391 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.097 |\n","| n_updates | 118597 |\n","----------------------------------\n","Eval num_timesteps=575000, episode_reward=340.00 +/- 40.50\n","Episode length: 2673.60 +/- 255.43\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 2.67e+03 |\n","| mean_reward | 340 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 575000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0154 |\n","| n_updates | 118749 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.37e+03 |\n","| ep_rew_mean | 440 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2784 |\n","| fps | 249 |\n","| time_elapsed | 2302 |\n","| total_timesteps | 575270 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0153 |\n","| n_updates | 118817 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.4e+03 |\n","| ep_rew_mean | 446 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2788 |\n","| fps | 249 |\n","| time_elapsed | 2309 |\n","| total_timesteps | 576931 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.11 |\n","| n_updates | 119232 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.38e+03 |\n","| ep_rew_mean | 443 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2792 |\n","| fps | 249 |\n","| time_elapsed | 2316 |\n","| total_timesteps | 578588 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.029 |\n","| n_updates | 119646 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.41e+03 |\n","| ep_rew_mean | 447 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2796 |\n","| fps | 249 |\n","| time_elapsed | 2324 |\n","| total_timesteps | 580787 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0364 |\n","| n_updates | 120196 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.43e+03 |\n","| ep_rew_mean | 454 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2800 |\n","| fps | 249 |\n","| time_elapsed | 2329 |\n","| total_timesteps | 581943 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0293 |\n","| n_updates | 120485 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.45e+03 |\n","| ep_rew_mean | 455 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2804 |\n","| fps | 249 |\n","| time_elapsed | 2341 |\n","| total_timesteps | 584902 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.028 |\n","| n_updates | 121225 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.49e+03 |\n","| ep_rew_mean | 460 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2808 |\n","| fps | 249 |\n","| time_elapsed | 2345 |\n","| total_timesteps | 586118 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0175 |\n","| n_updates | 121529 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.5e+03 |\n","| ep_rew_mean | 460 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2812 |\n","| fps | 249 |\n","| time_elapsed | 2352 |\n","| total_timesteps | 587667 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0452 |\n","| n_updates | 121916 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.5e+03 |\n","| ep_rew_mean | 460 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2816 |\n","| fps | 249 |\n","| time_elapsed | 2356 |\n","| total_timesteps | 588842 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0314 |\n","| n_updates | 122210 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.55e+03 |\n","| ep_rew_mean | 467 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2820 |\n","| fps | 249 |\n","| time_elapsed | 2363 |\n","| total_timesteps | 590609 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0144 |\n","| n_updates | 122652 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.56e+03 |\n","| ep_rew_mean | 470 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2824 |\n","| fps | 249 |\n","| time_elapsed | 2368 |\n","| total_timesteps | 591694 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.13 |\n","| n_updates | 122923 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.6e+03 |\n","| ep_rew_mean | 474 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2828 |\n","| fps | 249 |\n","| time_elapsed | 2376 |\n","| total_timesteps | 593682 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0197 |\n","| n_updates | 123420 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.62e+03 |\n","| ep_rew_mean | 478 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2832 |\n","| fps | 249 |\n","| time_elapsed | 2388 |\n","| total_timesteps | 596695 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0225 |\n","| n_updates | 124173 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.66e+03 |\n","| ep_rew_mean | 485 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2836 |\n","| fps | 249 |\n","| time_elapsed | 2393 |\n","| total_timesteps | 598089 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0176 |\n","| n_updates | 124522 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.71e+03 |\n","| ep_rew_mean | 494 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2840 |\n","| fps | 249 |\n","| time_elapsed | 2399 |\n","| total_timesteps | 599779 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0249 |\n","| n_updates | 124944 |\n","----------------------------------\n","Eval num_timesteps=600000, episode_reward=353.00 +/- 136.55\n","Episode length: 2427.20 +/- 634.37\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 2.43e+03 |\n","| mean_reward | 353 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 600000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0197 |\n","| n_updates | 124999 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.72e+03 |\n","| ep_rew_mean | 495 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2844 |\n","| fps | 249 |\n","| time_elapsed | 2414 |\n","| total_timesteps | 601559 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0209 |\n","| n_updates | 125389 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.73e+03 |\n","| ep_rew_mean | 498 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2848 |\n","| fps | 249 |\n","| time_elapsed | 2418 |\n","| total_timesteps | 602519 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0205 |\n","| n_updates | 125629 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.75e+03 |\n","| ep_rew_mean | 503 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2852 |\n","| fps | 249 |\n","| time_elapsed | 2425 |\n","| total_timesteps | 604395 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0122 |\n","| n_updates | 126098 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.78e+03 |\n","| ep_rew_mean | 506 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2856 |\n","| fps | 249 |\n","| time_elapsed | 2431 |\n","| total_timesteps | 605999 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0392 |\n","| n_updates | 126499 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.79e+03 |\n","| ep_rew_mean | 514 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2860 |\n","| fps | 249 |\n","| time_elapsed | 2439 |\n","| total_timesteps | 607920 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0414 |\n","| n_updates | 126979 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.81e+03 |\n","| ep_rew_mean | 517 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2864 |\n","| fps | 249 |\n","| time_elapsed | 2445 |\n","| total_timesteps | 609371 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0219 |\n","| n_updates | 127342 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.82e+03 |\n","| ep_rew_mean | 517 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2868 |\n","| fps | 249 |\n","| time_elapsed | 2449 |\n","| total_timesteps | 610480 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00944 |\n","| n_updates | 127619 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.82e+03 |\n","| ep_rew_mean | 521 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2872 |\n","| fps | 249 |\n","| time_elapsed | 2456 |\n","| total_timesteps | 612208 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.016 |\n","| n_updates | 128051 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.82e+03 |\n","| ep_rew_mean | 522 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2876 |\n","| fps | 249 |\n","| time_elapsed | 2462 |\n","| total_timesteps | 613548 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0268 |\n","| n_updates | 128386 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.85e+03 |\n","| ep_rew_mean | 524 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2880 |\n","| fps | 249 |\n","| time_elapsed | 2469 |\n","| total_timesteps | 615312 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0244 |\n","| n_updates | 128827 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.87e+03 |\n","| ep_rew_mean | 525 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2884 |\n","| fps | 249 |\n","| time_elapsed | 2477 |\n","| total_timesteps | 617261 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0365 |\n","| n_updates | 129315 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.88e+03 |\n","| ep_rew_mean | 528 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2888 |\n","| fps | 249 |\n","| time_elapsed | 2481 |\n","| total_timesteps | 618178 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0139 |\n","| n_updates | 129544 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.88e+03 |\n","| ep_rew_mean | 528 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2892 |\n","| fps | 249 |\n","| time_elapsed | 2485 |\n","| total_timesteps | 619349 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0205 |\n","| n_updates | 129837 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.9e+03 |\n","| ep_rew_mean | 531 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2896 |\n","| fps | 249 |\n","| time_elapsed | 2490 |\n","| total_timesteps | 620479 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0108 |\n","| n_updates | 130119 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.88e+03 |\n","| ep_rew_mean | 529 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2900 |\n","| fps | 249 |\n","| time_elapsed | 2495 |\n","| total_timesteps | 621666 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0827 |\n","| n_updates | 130416 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.9e+03 |\n","| ep_rew_mean | 531 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2904 |\n","| fps | 249 |\n","| time_elapsed | 2501 |\n","| total_timesteps | 623259 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0249 |\n","| n_updates | 130814 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.89e+03 |\n","| ep_rew_mean | 528 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2908 |\n","| fps | 249 |\n","| time_elapsed | 2507 |\n","| total_timesteps | 624613 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0166 |\n","| n_updates | 131153 |\n","----------------------------------\n","Eval num_timesteps=625000, episode_reward=723.00 +/- 289.35\n","Episode length: 4884.20 +/- 1192.20\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.88e+03 |\n","| mean_reward | 723 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 625000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0235 |\n","| n_updates | 131249 |\n","----------------------------------\n","New best mean reward!\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.89e+03 |\n","| ep_rew_mean | 526 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2912 |\n","| fps | 247 |\n","| time_elapsed | 2525 |\n","| total_timesteps | 625653 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0108 |\n","| n_updates | 131413 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.88e+03 |\n","| ep_rew_mean | 527 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2916 |\n","| fps | 247 |\n","| time_elapsed | 2532 |\n","| total_timesteps | 627396 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0258 |\n","| n_updates | 131848 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.89e+03 |\n","| ep_rew_mean | 528 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2920 |\n","| fps | 247 |\n","| time_elapsed | 2538 |\n","| total_timesteps | 628901 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0133 |\n","| n_updates | 132225 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.91e+03 |\n","| ep_rew_mean | 531 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2924 |\n","| fps | 247 |\n","| time_elapsed | 2545 |\n","| total_timesteps | 630481 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00973 |\n","| n_updates | 132620 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.91e+03 |\n","| ep_rew_mean | 537 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2928 |\n","| fps | 247 |\n","| time_elapsed | 2551 |\n","| total_timesteps | 632043 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0268 |\n","| n_updates | 133010 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.91e+03 |\n","| ep_rew_mean | 536 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2932 |\n","| fps | 247 |\n","| time_elapsed | 2556 |\n","| total_timesteps | 633348 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0116 |\n","| n_updates | 133336 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.95e+03 |\n","| ep_rew_mean | 544 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2936 |\n","| fps | 247 |\n","| time_elapsed | 2562 |\n","| total_timesteps | 634774 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0158 |\n","| n_updates | 133693 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.96e+03 |\n","| ep_rew_mean | 543 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2940 |\n","| fps | 247 |\n","| time_elapsed | 2568 |\n","| total_timesteps | 636181 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0167 |\n","| n_updates | 134045 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.97e+03 |\n","| ep_rew_mean | 546 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2944 |\n","| fps | 247 |\n","| time_elapsed | 2575 |\n","| total_timesteps | 638228 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0103 |\n","| n_updates | 134556 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.96e+03 |\n","| ep_rew_mean | 546 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2948 |\n","| fps | 247 |\n","| time_elapsed | 2582 |\n","| total_timesteps | 639748 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0363 |\n","| n_updates | 134936 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.93e+03 |\n","| ep_rew_mean | 540 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2952 |\n","| fps | 247 |\n","| time_elapsed | 2585 |\n","| total_timesteps | 640540 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0188 |\n","| n_updates | 135134 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.93e+03 |\n","| ep_rew_mean | 543 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2956 |\n","| fps | 247 |\n","| time_elapsed | 2590 |\n","| total_timesteps | 641736 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0383 |\n","| n_updates | 135433 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.92e+03 |\n","| ep_rew_mean | 538 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2960 |\n","| fps | 247 |\n","| time_elapsed | 2597 |\n","| total_timesteps | 643571 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0209 |\n","| n_updates | 135892 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.92e+03 |\n","| ep_rew_mean | 539 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2964 |\n","| fps | 247 |\n","| time_elapsed | 2605 |\n","| total_timesteps | 645596 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0468 |\n","| n_updates | 136398 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.97e+03 |\n","| ep_rew_mean | 544 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2968 |\n","| fps | 247 |\n","| time_elapsed | 2611 |\n","| total_timesteps | 647143 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0285 |\n","| n_updates | 136785 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.95e+03 |\n","| ep_rew_mean | 541 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2972 |\n","| fps | 247 |\n","| time_elapsed | 2617 |\n","| total_timesteps | 648568 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00841 |\n","| n_updates | 137141 |\n","----------------------------------\n","Eval num_timesteps=650000, episode_reward=538.00 +/- 137.43\n","Episode length: 3883.00 +/- 491.66\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 3.88e+03 |\n","| mean_reward | 538 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 650000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0481 |\n","| n_updates | 137499 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 544 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2976 |\n","| fps | 246 |\n","| time_elapsed | 2634 |\n","| total_timesteps | 650086 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00674 |\n","| n_updates | 137521 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 544 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2980 |\n","| fps | 246 |\n","| time_elapsed | 2639 |\n","| total_timesteps | 651309 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0175 |\n","| n_updates | 137827 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 543 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2984 |\n","| fps | 246 |\n","| time_elapsed | 2641 |\n","| total_timesteps | 651875 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0372 |\n","| n_updates | 137968 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 545 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2988 |\n","| fps | 246 |\n","| time_elapsed | 2646 |\n","| total_timesteps | 653196 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0185 |\n","| n_updates | 138298 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.97e+03 |\n","| ep_rew_mean | 543 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2992 |\n","| fps | 246 |\n","| time_elapsed | 2653 |\n","| total_timesteps | 654745 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0197 |\n","| n_updates | 138686 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 546 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 2996 |\n","| fps | 246 |\n","| time_elapsed | 2657 |\n","| total_timesteps | 655848 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.021 |\n","| n_updates | 138961 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 549 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3000 |\n","| fps | 246 |\n","| time_elapsed | 2662 |\n","| total_timesteps | 657058 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.024 |\n","| n_updates | 139264 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 549 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3004 |\n","| fps | 246 |\n","| time_elapsed | 2668 |\n","| total_timesteps | 658475 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0303 |\n","| n_updates | 139618 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 551 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3008 |\n","| fps | 246 |\n","| time_elapsed | 2673 |\n","| total_timesteps | 659938 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00681 |\n","| n_updates | 139984 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.01e+03 |\n","| ep_rew_mean | 552 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3012 |\n","| fps | 246 |\n","| time_elapsed | 2679 |\n","| total_timesteps | 661162 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0103 |\n","| n_updates | 140290 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 553 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3016 |\n","| fps | 246 |\n","| time_elapsed | 2684 |\n","| total_timesteps | 662643 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0242 |\n","| n_updates | 140660 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.93e+03 |\n","| ep_rew_mean | 550 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3020 |\n","| fps | 246 |\n","| time_elapsed | 2688 |\n","| total_timesteps | 663644 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0145 |\n","| n_updates | 140910 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.94e+03 |\n","| ep_rew_mean | 552 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3024 |\n","| fps | 246 |\n","| time_elapsed | 2694 |\n","| total_timesteps | 665073 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0152 |\n","| n_updates | 141268 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.95e+03 |\n","| ep_rew_mean | 553 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3028 |\n","| fps | 246 |\n","| time_elapsed | 2699 |\n","| total_timesteps | 666238 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0335 |\n","| n_updates | 141559 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.93e+03 |\n","| ep_rew_mean | 544 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3032 |\n","| fps | 246 |\n","| time_elapsed | 2703 |\n","| total_timesteps | 667117 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0346 |\n","| n_updates | 141779 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.95e+03 |\n","| ep_rew_mean | 550 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3036 |\n","| fps | 246 |\n","| time_elapsed | 2708 |\n","| total_timesteps | 668512 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.014 |\n","| n_updates | 142127 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 555 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3040 |\n","| fps | 246 |\n","| time_elapsed | 2720 |\n","| total_timesteps | 671428 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0391 |\n","| n_updates | 142856 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 556 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3044 |\n","| fps | 246 |\n","| time_elapsed | 2725 |\n","| total_timesteps | 672642 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0508 |\n","| n_updates | 143160 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 560 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3048 |\n","| fps | 246 |\n","| time_elapsed | 2732 |\n","| total_timesteps | 674048 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.01 |\n","| n_updates | 143511 |\n","----------------------------------\n","Eval num_timesteps=675000, episode_reward=684.00 +/- 133.13\n","Episode length: 4844.60 +/- 1307.13\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.84e+03 |\n","| mean_reward | 684 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 675000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0187 |\n","| n_updates | 143749 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 558 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3052 |\n","| fps | 245 |\n","| time_elapsed | 2752 |\n","| total_timesteps | 675997 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0408 |\n","| n_updates | 143999 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 555 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3056 |\n","| fps | 245 |\n","| time_elapsed | 2758 |\n","| total_timesteps | 677409 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0402 |\n","| n_updates | 144352 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.03e+03 |\n","| ep_rew_mean | 556 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3060 |\n","| fps | 245 |\n","| time_elapsed | 2761 |\n","| total_timesteps | 678212 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0775 |\n","| n_updates | 144552 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 554 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3064 |\n","| fps | 245 |\n","| time_elapsed | 2770 |\n","| total_timesteps | 680294 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00943 |\n","| n_updates | 145073 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 550 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3068 |\n","| fps | 245 |\n","| time_elapsed | 2775 |\n","| total_timesteps | 681600 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00632 |\n","| n_updates | 145399 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 549 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3072 |\n","| fps | 245 |\n","| time_elapsed | 2780 |\n","| total_timesteps | 682753 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0301 |\n","| n_updates | 145688 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.96e+03 |\n","| ep_rew_mean | 547 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3076 |\n","| fps | 245 |\n","| time_elapsed | 2783 |\n","| total_timesteps | 683588 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00575 |\n","| n_updates | 145896 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.95e+03 |\n","| ep_rew_mean | 545 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3080 |\n","| fps | 245 |\n","| time_elapsed | 2790 |\n","| total_timesteps | 685278 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00993 |\n","| n_updates | 146319 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.93e+03 |\n","| ep_rew_mean | 547 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3084 |\n","| fps | 245 |\n","| time_elapsed | 2799 |\n","| total_timesteps | 687727 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0194 |\n","| n_updates | 146931 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.94e+03 |\n","| ep_rew_mean | 547 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3088 |\n","| fps | 245 |\n","| time_elapsed | 2807 |\n","| total_timesteps | 689640 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.014 |\n","| n_updates | 147409 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.91e+03 |\n","| ep_rew_mean | 543 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3092 |\n","| fps | 245 |\n","| time_elapsed | 2810 |\n","| total_timesteps | 690460 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0437 |\n","| n_updates | 147614 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.89e+03 |\n","| ep_rew_mean | 539 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3096 |\n","| fps | 245 |\n","| time_elapsed | 2817 |\n","| total_timesteps | 692225 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0196 |\n","| n_updates | 148056 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.83e+03 |\n","| ep_rew_mean | 531 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3100 |\n","| fps | 245 |\n","| time_elapsed | 2825 |\n","| total_timesteps | 694230 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0294 |\n","| n_updates | 148557 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.84e+03 |\n","| ep_rew_mean | 534 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3104 |\n","| fps | 245 |\n","| time_elapsed | 2829 |\n","| total_timesteps | 695205 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0437 |\n","| n_updates | 148801 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.85e+03 |\n","| ep_rew_mean | 538 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3108 |\n","| fps | 245 |\n","| time_elapsed | 2836 |\n","| total_timesteps | 696969 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0735 |\n","| n_updates | 149242 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.86e+03 |\n","| ep_rew_mean | 542 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3112 |\n","| fps | 245 |\n","| time_elapsed | 2842 |\n","| total_timesteps | 698432 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0814 |\n","| n_updates | 149607 |\n","----------------------------------\n","Eval num_timesteps=700000, episode_reward=586.00 +/- 59.28\n","Episode length: 4325.80 +/- 662.72\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.33e+03 |\n","| mean_reward | 586 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 700000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0493 |\n","| n_updates | 149999 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.84e+03 |\n","| ep_rew_mean | 537 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3116 |\n","| fps | 244 |\n","| time_elapsed | 2860 |\n","| total_timesteps | 700073 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0312 |\n","| n_updates | 150018 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.86e+03 |\n","| ep_rew_mean | 546 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3120 |\n","| fps | 244 |\n","| time_elapsed | 2866 |\n","| total_timesteps | 701480 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0123 |\n","| n_updates | 150369 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.86e+03 |\n","| ep_rew_mean | 545 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3124 |\n","| fps | 244 |\n","| time_elapsed | 2872 |\n","| total_timesteps | 703094 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0804 |\n","| n_updates | 150773 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.86e+03 |\n","| ep_rew_mean | 543 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3128 |\n","| fps | 244 |\n","| time_elapsed | 2877 |\n","| total_timesteps | 704190 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0935 |\n","| n_updates | 151047 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.89e+03 |\n","| ep_rew_mean | 546 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3132 |\n","| fps | 244 |\n","| time_elapsed | 2887 |\n","| total_timesteps | 706766 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0505 |\n","| n_updates | 151691 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.89e+03 |\n","| ep_rew_mean | 546 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3136 |\n","| fps | 244 |\n","| time_elapsed | 2898 |\n","| total_timesteps | 709592 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00818 |\n","| n_updates | 152397 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.89e+03 |\n","| ep_rew_mean | 553 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3140 |\n","| fps | 244 |\n","| time_elapsed | 2907 |\n","| total_timesteps | 711726 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0425 |\n","| n_updates | 152931 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.92e+03 |\n","| ep_rew_mean | 558 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3144 |\n","| fps | 244 |\n","| time_elapsed | 2914 |\n","| total_timesteps | 713647 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0287 |\n","| n_updates | 153411 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.89e+03 |\n","| ep_rew_mean | 554 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3148 |\n","| fps | 244 |\n","| time_elapsed | 2920 |\n","| total_timesteps | 714944 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00862 |\n","| n_updates | 153735 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.88e+03 |\n","| ep_rew_mean | 559 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3152 |\n","| fps | 244 |\n","| time_elapsed | 2927 |\n","| total_timesteps | 716917 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0322 |\n","| n_updates | 154229 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.88e+03 |\n","| ep_rew_mean | 564 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3156 |\n","| fps | 244 |\n","| time_elapsed | 2941 |\n","| total_timesteps | 720219 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0833 |\n","| n_updates | 155054 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.92e+03 |\n","| ep_rew_mean | 572 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3160 |\n","| fps | 244 |\n","| time_elapsed | 2947 |\n","| total_timesteps | 721611 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0287 |\n","| n_updates | 155402 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.91e+03 |\n","| ep_rew_mean | 573 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3164 |\n","| fps | 244 |\n","| time_elapsed | 2952 |\n","| total_timesteps | 722926 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0181 |\n","| n_updates | 155731 |\n","----------------------------------\n","Eval num_timesteps=725000, episode_reward=489.00 +/- 127.77\n","Episode length: 3926.00 +/- 1038.94\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 3.93e+03 |\n","| mean_reward | 489 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 725000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0232 |\n","| n_updates | 156249 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.92e+03 |\n","| ep_rew_mean | 576 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3168 |\n","| fps | 243 |\n","| time_elapsed | 2973 |\n","| total_timesteps | 725173 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00967 |\n","| n_updates | 156293 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.91e+03 |\n","| ep_rew_mean | 575 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3172 |\n","| fps | 243 |\n","| time_elapsed | 2980 |\n","| total_timesteps | 726826 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0322 |\n","| n_updates | 156706 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.91e+03 |\n","| ep_rew_mean | 574 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3176 |\n","| fps | 243 |\n","| time_elapsed | 2986 |\n","| total_timesteps | 728422 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0226 |\n","| n_updates | 157105 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.92e+03 |\n","| ep_rew_mean | 571 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3180 |\n","| fps | 243 |\n","| time_elapsed | 2991 |\n","| total_timesteps | 729682 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0232 |\n","| n_updates | 157420 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.93e+03 |\n","| ep_rew_mean | 573 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3184 |\n","| fps | 243 |\n","| time_elapsed | 2999 |\n","| total_timesteps | 731839 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0598 |\n","| n_updates | 157959 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.93e+03 |\n","| ep_rew_mean | 576 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3188 |\n","| fps | 243 |\n","| time_elapsed | 3005 |\n","| total_timesteps | 733149 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0131 |\n","| n_updates | 158287 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.93e+03 |\n","| ep_rew_mean | 576 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3192 |\n","| fps | 243 |\n","| time_elapsed | 3013 |\n","| total_timesteps | 735318 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0224 |\n","| n_updates | 158829 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.95e+03 |\n","| ep_rew_mean | 577 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3196 |\n","| fps | 243 |\n","| time_elapsed | 3020 |\n","| total_timesteps | 736871 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0563 |\n","| n_updates | 159217 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 581 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3200 |\n","| fps | 244 |\n","| time_elapsed | 3028 |\n","| total_timesteps | 739040 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0236 |\n","| n_updates | 159759 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 581 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3204 |\n","| fps | 244 |\n","| time_elapsed | 3036 |\n","| total_timesteps | 741177 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0414 |\n","| n_updates | 160294 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 581 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3208 |\n","| fps | 244 |\n","| time_elapsed | 3043 |\n","| total_timesteps | 742726 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0196 |\n","| n_updates | 160681 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 580 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3212 |\n","| fps | 244 |\n","| time_elapsed | 3050 |\n","| total_timesteps | 744633 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0318 |\n","| n_updates | 161158 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 587 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3216 |\n","| fps | 244 |\n","| time_elapsed | 3059 |\n","| total_timesteps | 746792 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0196 |\n","| n_updates | 161697 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 587 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3220 |\n","| fps | 244 |\n","| time_elapsed | 3065 |\n","| total_timesteps | 748345 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0306 |\n","| n_updates | 162086 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 586 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3224 |\n","| fps | 244 |\n","| time_elapsed | 3071 |\n","| total_timesteps | 749829 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0119 |\n","| n_updates | 162457 |\n","----------------------------------\n","Eval num_timesteps=750000, episode_reward=681.00 +/- 150.41\n","Episode length: 5224.80 +/- 1098.07\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 5.22e+03 |\n","| mean_reward | 681 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 750000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0138 |\n","| n_updates | 162499 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.03e+03 |\n","| ep_rew_mean | 595 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3228 |\n","| fps | 243 |\n","| time_elapsed | 3091 |\n","| total_timesteps | 751350 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0263 |\n","| n_updates | 162837 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.04e+03 |\n","| ep_rew_mean | 598 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3232 |\n","| fps | 243 |\n","| time_elapsed | 3099 |\n","| total_timesteps | 753191 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0133 |\n","| n_updates | 163297 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.04e+03 |\n","| ep_rew_mean | 601 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3236 |\n","| fps | 243 |\n","| time_elapsed | 3107 |\n","| total_timesteps | 755109 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0159 |\n","| n_updates | 163777 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.05e+03 |\n","| ep_rew_mean | 600 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3240 |\n","| fps | 243 |\n","| time_elapsed | 3114 |\n","| total_timesteps | 757015 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0163 |\n","| n_updates | 164253 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.04e+03 |\n","| ep_rew_mean | 602 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3244 |\n","| fps | 243 |\n","| time_elapsed | 3119 |\n","| total_timesteps | 758223 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0436 |\n","| n_updates | 164555 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.04e+03 |\n","| ep_rew_mean | 603 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3248 |\n","| fps | 243 |\n","| time_elapsed | 3127 |\n","| total_timesteps | 760238 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0436 |\n","| n_updates | 165059 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.05e+03 |\n","| ep_rew_mean | 605 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3252 |\n","| fps | 243 |\n","| time_elapsed | 3134 |\n","| total_timesteps | 761857 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.093 |\n","| n_updates | 165464 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.06e+03 |\n","| ep_rew_mean | 604 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3256 |\n","| fps | 243 |\n","| time_elapsed | 3141 |\n","| total_timesteps | 763761 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0145 |\n","| n_updates | 165940 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.08e+03 |\n","| ep_rew_mean | 607 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3260 |\n","| fps | 243 |\n","| time_elapsed | 3149 |\n","| total_timesteps | 765639 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0132 |\n","| n_updates | 166409 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.08e+03 |\n","| ep_rew_mean | 607 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3264 |\n","| fps | 243 |\n","| time_elapsed | 3153 |\n","| total_timesteps | 766845 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0369 |\n","| n_updates | 166711 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.09e+03 |\n","| ep_rew_mean | 612 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3268 |\n","| fps | 243 |\n","| time_elapsed | 3159 |\n","| total_timesteps | 768079 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0692 |\n","| n_updates | 167019 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.11e+03 |\n","| ep_rew_mean | 615 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3272 |\n","| fps | 243 |\n","| time_elapsed | 3163 |\n","| total_timesteps | 769283 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0276 |\n","| n_updates | 167320 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.04e+03 |\n","| ep_rew_mean | 606 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3276 |\n","| fps | 243 |\n","| time_elapsed | 3171 |\n","| total_timesteps | 771038 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0223 |\n","| n_updates | 167759 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.05e+03 |\n","| ep_rew_mean | 608 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3280 |\n","| fps | 243 |\n","| time_elapsed | 3178 |\n","| total_timesteps | 772671 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0231 |\n","| n_updates | 168167 |\n","----------------------------------\n","Eval num_timesteps=775000, episode_reward=548.00 +/- 232.37\n","Episode length: 4043.00 +/- 1484.57\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.04e+03 |\n","| mean_reward | 548 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 775000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.019 |\n","| n_updates | 168749 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.07e+03 |\n","| ep_rew_mean | 614 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3284 |\n","| fps | 242 |\n","| time_elapsed | 3199 |\n","| total_timesteps | 775066 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.027 |\n","| n_updates | 168766 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.08e+03 |\n","| ep_rew_mean | 615 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3288 |\n","| fps | 242 |\n","| time_elapsed | 3206 |\n","| total_timesteps | 776442 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0503 |\n","| n_updates | 169110 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.1e+03 |\n","| ep_rew_mean | 616 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3292 |\n","| fps | 242 |\n","| time_elapsed | 3212 |\n","| total_timesteps | 778085 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00819 |\n","| n_updates | 169521 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.1e+03 |\n","| ep_rew_mean | 618 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3296 |\n","| fps | 242 |\n","| time_elapsed | 3215 |\n","| total_timesteps | 778957 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00844 |\n","| n_updates | 169739 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.1e+03 |\n","| ep_rew_mean | 618 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3300 |\n","| fps | 242 |\n","| time_elapsed | 3222 |\n","| total_timesteps | 780403 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0281 |\n","| n_updates | 170100 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.09e+03 |\n","| ep_rew_mean | 616 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3304 |\n","| fps | 242 |\n","| time_elapsed | 3226 |\n","| total_timesteps | 781474 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0193 |\n","| n_updates | 170368 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.12e+03 |\n","| ep_rew_mean | 623 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3308 |\n","| fps | 242 |\n","| time_elapsed | 3232 |\n","| total_timesteps | 783042 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0375 |\n","| n_updates | 170760 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.14e+03 |\n","| ep_rew_mean | 627 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3312 |\n","| fps | 242 |\n","| time_elapsed | 3239 |\n","| total_timesteps | 784673 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00975 |\n","| n_updates | 171168 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.17e+03 |\n","| ep_rew_mean | 632 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3316 |\n","| fps | 242 |\n","| time_elapsed | 3245 |\n","| total_timesteps | 786116 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00638 |\n","| n_updates | 171528 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.16e+03 |\n","| ep_rew_mean | 633 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3320 |\n","| fps | 242 |\n","| time_elapsed | 3250 |\n","| total_timesteps | 787625 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0191 |\n","| n_updates | 171906 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.15e+03 |\n","| ep_rew_mean | 635 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3324 |\n","| fps | 242 |\n","| time_elapsed | 3255 |\n","| total_timesteps | 788639 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0247 |\n","| n_updates | 172159 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.14e+03 |\n","| ep_rew_mean | 634 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3328 |\n","| fps | 242 |\n","| time_elapsed | 3260 |\n","| total_timesteps | 789965 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0183 |\n","| n_updates | 172491 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.14e+03 |\n","| ep_rew_mean | 633 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3332 |\n","| fps | 242 |\n","| time_elapsed | 3265 |\n","| total_timesteps | 791338 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0475 |\n","| n_updates | 172834 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.16e+03 |\n","| ep_rew_mean | 639 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3336 |\n","| fps | 242 |\n","| time_elapsed | 3271 |\n","| total_timesteps | 792664 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0384 |\n","| n_updates | 173165 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.16e+03 |\n","| ep_rew_mean | 641 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3340 |\n","| fps | 242 |\n","| time_elapsed | 3277 |\n","| total_timesteps | 794360 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0856 |\n","| n_updates | 173589 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.16e+03 |\n","| ep_rew_mean | 640 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3344 |\n","| fps | 242 |\n","| time_elapsed | 3285 |\n","| total_timesteps | 796118 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0364 |\n","| n_updates | 174029 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.15e+03 |\n","| ep_rew_mean | 638 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3348 |\n","| fps | 242 |\n","| time_elapsed | 3289 |\n","| total_timesteps | 797059 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00957 |\n","| n_updates | 174264 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.16e+03 |\n","| ep_rew_mean | 641 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3352 |\n","| fps | 242 |\n","| time_elapsed | 3295 |\n","| total_timesteps | 798595 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0164 |\n","| n_updates | 174648 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.11e+03 |\n","| ep_rew_mean | 631 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3356 |\n","| fps | 242 |\n","| time_elapsed | 3301 |\n","| total_timesteps | 799987 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0217 |\n","| n_updates | 174996 |\n","----------------------------------\n","Eval num_timesteps=800000, episode_reward=657.00 +/- 252.72\n","Episode length: 4466.20 +/- 1595.76\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.47e+03 |\n","| mean_reward | 657 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 800000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0161 |\n","| n_updates | 174999 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.09e+03 |\n","| ep_rew_mean | 628 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3360 |\n","| fps | 241 |\n","| time_elapsed | 3322 |\n","| total_timesteps | 802037 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0136 |\n","| n_updates | 175509 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.09e+03 |\n","| ep_rew_mean | 626 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3364 |\n","| fps | 241 |\n","| time_elapsed | 3328 |\n","| total_timesteps | 803727 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0211 |\n","| n_updates | 175931 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.13e+03 |\n","| ep_rew_mean | 629 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3368 |\n","| fps | 241 |\n","| time_elapsed | 3335 |\n","| total_timesteps | 805415 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0342 |\n","| n_updates | 176353 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.1e+03 |\n","| ep_rew_mean | 624 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3372 |\n","| fps | 241 |\n","| time_elapsed | 3343 |\n","| total_timesteps | 807209 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.136 |\n","| n_updates | 176802 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.08e+03 |\n","| ep_rew_mean | 621 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3376 |\n","| fps | 241 |\n","| time_elapsed | 3349 |\n","| total_timesteps | 808739 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0267 |\n","| n_updates | 177184 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.08e+03 |\n","| ep_rew_mean | 623 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3380 |\n","| fps | 241 |\n","| time_elapsed | 3355 |\n","| total_timesteps | 810283 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0652 |\n","| n_updates | 177570 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.06e+03 |\n","| ep_rew_mean | 616 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3384 |\n","| fps | 241 |\n","| time_elapsed | 3361 |\n","| total_timesteps | 811638 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0258 |\n","| n_updates | 177909 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.01e+03 |\n","| ep_rew_mean | 609 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3388 |\n","| fps | 241 |\n","| time_elapsed | 3370 |\n","| total_timesteps | 813994 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0198 |\n","| n_updates | 178498 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 608 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3392 |\n","| fps | 241 |\n","| time_elapsed | 3373 |\n","| total_timesteps | 814641 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0272 |\n","| n_updates | 178660 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.03e+03 |\n","| ep_rew_mean | 606 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3396 |\n","| fps | 241 |\n","| time_elapsed | 3382 |\n","| total_timesteps | 816812 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0185 |\n","| n_updates | 179202 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 605 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3400 |\n","| fps | 241 |\n","| time_elapsed | 3389 |\n","| total_timesteps | 818857 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0185 |\n","| n_updates | 179714 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.03e+03 |\n","| ep_rew_mean | 605 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3404 |\n","| fps | 241 |\n","| time_elapsed | 3394 |\n","| total_timesteps | 819855 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0107 |\n","| n_updates | 179963 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 602 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3408 |\n","| fps | 241 |\n","| time_elapsed | 3399 |\n","| total_timesteps | 821227 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0169 |\n","| n_updates | 180306 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.01e+03 |\n","| ep_rew_mean | 599 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3412 |\n","| fps | 241 |\n","| time_elapsed | 3407 |\n","| total_timesteps | 823111 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0154 |\n","| n_updates | 180777 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.01e+03 |\n","| ep_rew_mean | 597 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3416 |\n","| fps | 241 |\n","| time_elapsed | 3412 |\n","| total_timesteps | 824246 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0203 |\n","| n_updates | 181061 |\n","----------------------------------\n","Eval num_timesteps=825000, episode_reward=691.00 +/- 115.86\n","Episode length: 4365.80 +/- 357.58\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.37e+03 |\n","| mean_reward | 691 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 825000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0266 |\n","| n_updates | 181249 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 599 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3420 |\n","| fps | 240 |\n","| time_elapsed | 3431 |\n","| total_timesteps | 825994 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.023 |\n","| n_updates | 181498 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 603 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3424 |\n","| fps | 240 |\n","| time_elapsed | 3438 |\n","| total_timesteps | 827933 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0126 |\n","| n_updates | 181983 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 597 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3428 |\n","| fps | 240 |\n","| time_elapsed | 3445 |\n","| total_timesteps | 829450 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0367 |\n","| n_updates | 182362 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.96e+03 |\n","| ep_rew_mean | 593 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3432 |\n","| fps | 240 |\n","| time_elapsed | 3448 |\n","| total_timesteps | 830232 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.042 |\n","| n_updates | 182557 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.95e+03 |\n","| ep_rew_mean | 596 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3436 |\n","| fps | 240 |\n","| time_elapsed | 3457 |\n","| total_timesteps | 832286 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0213 |\n","| n_updates | 183071 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.94e+03 |\n","| ep_rew_mean | 596 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3440 |\n","| fps | 240 |\n","| time_elapsed | 3462 |\n","| total_timesteps | 833626 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0661 |\n","| n_updates | 183406 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.93e+03 |\n","| ep_rew_mean | 595 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3444 |\n","| fps | 240 |\n","| time_elapsed | 3472 |\n","| total_timesteps | 836100 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0146 |\n","| n_updates | 184024 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 605 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3448 |\n","| fps | 240 |\n","| time_elapsed | 3483 |\n","| total_timesteps | 838950 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0226 |\n","| n_updates | 184737 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 609 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3452 |\n","| fps | 240 |\n","| time_elapsed | 3491 |\n","| total_timesteps | 841143 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0237 |\n","| n_updates | 185285 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 610 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3456 |\n","| fps | 240 |\n","| time_elapsed | 3498 |\n","| total_timesteps | 842627 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00735 |\n","| n_updates | 185656 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 609 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3460 |\n","| fps | 240 |\n","| time_elapsed | 3509 |\n","| total_timesteps | 845402 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0107 |\n","| n_updates | 186350 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 612 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3464 |\n","| fps | 240 |\n","| time_elapsed | 3518 |\n","| total_timesteps | 847735 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0162 |\n","| n_updates | 186933 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 608 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3468 |\n","| fps | 240 |\n","| time_elapsed | 3525 |\n","| total_timesteps | 849428 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0189 |\n","| n_updates | 187356 |\n","----------------------------------\n","Eval num_timesteps=850000, episode_reward=712.00 +/- 222.18\n","Episode length: 4293.00 +/- 996.81\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.29e+03 |\n","| mean_reward | 712 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 850000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0506 |\n","| n_updates | 187499 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 610 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3472 |\n","| fps | 240 |\n","| time_elapsed | 3545 |\n","| total_timesteps | 851450 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0353 |\n","| n_updates | 187862 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 607 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3476 |\n","| fps | 240 |\n","| time_elapsed | 3548 |\n","| total_timesteps | 852169 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0738 |\n","| n_updates | 188042 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 607 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3480 |\n","| fps | 240 |\n","| time_elapsed | 3555 |\n","| total_timesteps | 854045 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0388 |\n","| n_updates | 188511 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.01e+03 |\n","| ep_rew_mean | 605 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3484 |\n","| fps | 240 |\n","| time_elapsed | 3560 |\n","| total_timesteps | 855326 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0122 |\n","| n_updates | 188831 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 604 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3488 |\n","| fps | 240 |\n","| time_elapsed | 3566 |\n","| total_timesteps | 856870 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0174 |\n","| n_updates | 189217 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.01e+03 |\n","| ep_rew_mean | 599 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3492 |\n","| fps | 240 |\n","| time_elapsed | 3570 |\n","| total_timesteps | 857688 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0222 |\n","| n_updates | 189421 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4e+03 |\n","| ep_rew_mean | 595 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3496 |\n","| fps | 240 |\n","| time_elapsed | 3582 |\n","| total_timesteps | 860609 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0549 |\n","| n_updates | 190152 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.03e+03 |\n","| ep_rew_mean | 599 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3500 |\n","| fps | 240 |\n","| time_elapsed | 3588 |\n","| total_timesteps | 862224 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0132 |\n","| n_updates | 190555 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.01e+03 |\n","| ep_rew_mean | 594 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3504 |\n","| fps | 240 |\n","| time_elapsed | 3595 |\n","| total_timesteps | 863893 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0165 |\n","| n_updates | 190973 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.01e+03 |\n","| ep_rew_mean | 592 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3508 |\n","| fps | 240 |\n","| time_elapsed | 3599 |\n","| total_timesteps | 864944 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0519 |\n","| n_updates | 191235 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.03e+03 |\n","| ep_rew_mean | 595 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3512 |\n","| fps | 240 |\n","| time_elapsed | 3610 |\n","| total_timesteps | 867707 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0316 |\n","| n_updates | 191926 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.03e+03 |\n","| ep_rew_mean | 596 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3516 |\n","| fps | 240 |\n","| time_elapsed | 3615 |\n","| total_timesteps | 869166 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.019 |\n","| n_updates | 192291 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.1e+03 |\n","| ep_rew_mean | 610 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3520 |\n","| fps | 240 |\n","| time_elapsed | 3626 |\n","| total_timesteps | 871767 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.054 |\n","| n_updates | 192941 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.1e+03 |\n","| ep_rew_mean | 609 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3524 |\n","| fps | 240 |\n","| time_elapsed | 3633 |\n","| total_timesteps | 873405 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0138 |\n","| n_updates | 193351 |\n","----------------------------------\n","Eval num_timesteps=875000, episode_reward=468.00 +/- 70.33\n","Episode length: 4099.20 +/- 585.87\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.1e+03 |\n","| mean_reward | 468 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 875000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0293 |\n","| n_updates | 193749 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.09e+03 |\n","| ep_rew_mean | 606 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3528 |\n","| fps | 239 |\n","| time_elapsed | 3654 |\n","| total_timesteps | 875825 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0113 |\n","| n_updates | 193956 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.07e+03 |\n","| ep_rew_mean | 607 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3532 |\n","| fps | 239 |\n","| time_elapsed | 3659 |\n","| total_timesteps | 877069 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0272 |\n","| n_updates | 194267 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.06e+03 |\n","| ep_rew_mean | 606 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3536 |\n","| fps | 239 |\n","| time_elapsed | 3667 |\n","| total_timesteps | 879063 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0115 |\n","| n_updates | 194765 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.11e+03 |\n","| ep_rew_mean | 618 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3540 |\n","| fps | 239 |\n","| time_elapsed | 3683 |\n","| total_timesteps | 882976 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0171 |\n","| n_updates | 195743 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.08e+03 |\n","| ep_rew_mean | 610 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3544 |\n","| fps | 239 |\n","| time_elapsed | 3693 |\n","| total_timesteps | 885287 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0118 |\n","| n_updates | 196321 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.07e+03 |\n","| ep_rew_mean | 607 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3548 |\n","| fps | 239 |\n","| time_elapsed | 3699 |\n","| total_timesteps | 886845 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0201 |\n","| n_updates | 196711 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.07e+03 |\n","| ep_rew_mean | 610 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3552 |\n","| fps | 239 |\n","| time_elapsed | 3702 |\n","| total_timesteps | 887796 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.023 |\n","| n_updates | 196948 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.09e+03 |\n","| ep_rew_mean | 612 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3556 |\n","| fps | 239 |\n","| time_elapsed | 3708 |\n","| total_timesteps | 889154 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00807 |\n","| n_updates | 197288 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.09e+03 |\n","| ep_rew_mean | 612 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3560 |\n","| fps | 239 |\n","| time_elapsed | 3714 |\n","| total_timesteps | 890693 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0205 |\n","| n_updates | 197673 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.1e+03 |\n","| ep_rew_mean | 616 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3564 |\n","| fps | 239 |\n","| time_elapsed | 3721 |\n","| total_timesteps | 892254 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0189 |\n","| n_updates | 198063 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.1e+03 |\n","| ep_rew_mean | 618 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3568 |\n","| fps | 239 |\n","| time_elapsed | 3725 |\n","| total_timesteps | 893418 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0174 |\n","| n_updates | 198354 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.09e+03 |\n","| ep_rew_mean | 613 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3572 |\n","| fps | 239 |\n","| time_elapsed | 3731 |\n","| total_timesteps | 894741 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0195 |\n","| n_updates | 198685 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.06e+03 |\n","| ep_rew_mean | 613 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3576 |\n","| fps | 239 |\n","| time_elapsed | 3738 |\n","| total_timesteps | 896598 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.125 |\n","| n_updates | 199149 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.06e+03 |\n","| ep_rew_mean | 608 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3580 |\n","| fps | 239 |\n","| time_elapsed | 3744 |\n","| total_timesteps | 898012 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0132 |\n","| n_updates | 199502 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.07e+03 |\n","| ep_rew_mean | 608 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3584 |\n","| fps | 239 |\n","| time_elapsed | 3751 |\n","| total_timesteps | 899801 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00976 |\n","| n_updates | 199950 |\n","----------------------------------\n","Eval num_timesteps=900000, episode_reward=647.00 +/- 104.43\n","Episode length: 4145.80 +/- 554.99\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.15e+03 |\n","| mean_reward | 647 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 900000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0142 |\n","| n_updates | 199999 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.08e+03 |\n","| ep_rew_mean | 606 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3588 |\n","| fps | 239 |\n","| time_elapsed | 3768 |\n","| total_timesteps | 901153 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0247 |\n","| n_updates | 200288 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.09e+03 |\n","| ep_rew_mean | 606 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3592 |\n","| fps | 239 |\n","| time_elapsed | 3772 |\n","| total_timesteps | 902086 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0134 |\n","| n_updates | 200521 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.07e+03 |\n","| ep_rew_mean | 602 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3596 |\n","| fps | 239 |\n","| time_elapsed | 3777 |\n","| total_timesteps | 903484 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00956 |\n","| n_updates | 200870 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 598 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3600 |\n","| fps | 239 |\n","| time_elapsed | 3784 |\n","| total_timesteps | 905031 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0129 |\n","| n_updates | 201257 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.03e+03 |\n","| ep_rew_mean | 606 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3604 |\n","| fps | 239 |\n","| time_elapsed | 3793 |\n","| total_timesteps | 907534 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0153 |\n","| n_updates | 201883 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.03e+03 |\n","| ep_rew_mean | 607 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3608 |\n","| fps | 239 |\n","| time_elapsed | 3802 |\n","| total_timesteps | 909784 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0233 |\n","| n_updates | 202445 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.06e+03 |\n","| ep_rew_mean | 617 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3612 |\n","| fps | 239 |\n","| time_elapsed | 3812 |\n","| total_timesteps | 912373 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0124 |\n","| n_updates | 203093 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.12e+03 |\n","| ep_rew_mean | 626 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3616 |\n","| fps | 239 |\n","| time_elapsed | 3822 |\n","| total_timesteps | 914593 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0277 |\n","| n_updates | 203648 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.11e+03 |\n","| ep_rew_mean | 625 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3620 |\n","| fps | 239 |\n","| time_elapsed | 3831 |\n","| total_timesteps | 916908 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0254 |\n","| n_updates | 204226 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.17e+03 |\n","| ep_rew_mean | 634 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3624 |\n","| fps | 239 |\n","| time_elapsed | 3836 |\n","| total_timesteps | 918250 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0114 |\n","| n_updates | 204562 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.18e+03 |\n","| ep_rew_mean | 635 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3628 |\n","| fps | 239 |\n","| time_elapsed | 3841 |\n","| total_timesteps | 919552 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0188 |\n","| n_updates | 204887 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.19e+03 |\n","| ep_rew_mean | 637 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3632 |\n","| fps | 239 |\n","| time_elapsed | 3851 |\n","| total_timesteps | 922050 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0388 |\n","| n_updates | 205512 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.2e+03 |\n","| ep_rew_mean | 640 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3636 |\n","| fps | 239 |\n","| time_elapsed | 3858 |\n","| total_timesteps | 923695 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0418 |\n","| n_updates | 205923 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.18e+03 |\n","| ep_rew_mean | 636 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3640 |\n","| fps | 239 |\n","| time_elapsed | 3862 |\n","| total_timesteps | 924770 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0213 |\n","| n_updates | 206192 |\n","----------------------------------\n","Eval num_timesteps=925000, episode_reward=838.00 +/- 302.86\n","Episode length: 5350.20 +/- 1324.31\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 5.35e+03 |\n","| mean_reward | 838 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 925000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0133 |\n","| n_updates | 206249 |\n","----------------------------------\n","New best mean reward!\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.2e+03 |\n","| ep_rew_mean | 640 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3644 |\n","| fps | 238 |\n","| time_elapsed | 3883 |\n","| total_timesteps | 926094 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0319 |\n","| n_updates | 206523 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.23e+03 |\n","| ep_rew_mean | 652 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3648 |\n","| fps | 238 |\n","| time_elapsed | 3896 |\n","| total_timesteps | 929384 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00733 |\n","| n_updates | 207345 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.22e+03 |\n","| ep_rew_mean | 648 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3652 |\n","| fps | 238 |\n","| time_elapsed | 3900 |\n","| total_timesteps | 930461 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0299 |\n","| n_updates | 207615 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.21e+03 |\n","| ep_rew_mean | 646 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3656 |\n","| fps | 238 |\n","| time_elapsed | 3911 |\n","| total_timesteps | 932900 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0122 |\n","| n_updates | 208224 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.27e+03 |\n","| ep_rew_mean | 652 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3660 |\n","| fps | 238 |\n","| time_elapsed | 3918 |\n","| total_timesteps | 934662 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0167 |\n","| n_updates | 208665 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.29e+03 |\n","| ep_rew_mean | 657 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3664 |\n","| fps | 238 |\n","| time_elapsed | 3926 |\n","| total_timesteps | 936786 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0537 |\n","| n_updates | 209196 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.29e+03 |\n","| ep_rew_mean | 652 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3668 |\n","| fps | 238 |\n","| time_elapsed | 3934 |\n","| total_timesteps | 938480 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0208 |\n","| n_updates | 209619 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.29e+03 |\n","| ep_rew_mean | 655 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3672 |\n","| fps | 238 |\n","| time_elapsed | 3939 |\n","| total_timesteps | 939806 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0129 |\n","| n_updates | 209951 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.28e+03 |\n","| ep_rew_mean | 660 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3676 |\n","| fps | 238 |\n","| time_elapsed | 3946 |\n","| total_timesteps | 941589 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00926 |\n","| n_updates | 210397 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.25e+03 |\n","| ep_rew_mean | 658 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3680 |\n","| fps | 238 |\n","| time_elapsed | 3950 |\n","| total_timesteps | 942698 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0176 |\n","| n_updates | 210674 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.2e+03 |\n","| ep_rew_mean | 649 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3684 |\n","| fps | 238 |\n","| time_elapsed | 3961 |\n","| total_timesteps | 945368 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0196 |\n","| n_updates | 211341 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.19e+03 |\n","| ep_rew_mean | 649 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3688 |\n","| fps | 238 |\n","| time_elapsed | 3969 |\n","| total_timesteps | 947291 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0485 |\n","| n_updates | 211822 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.19e+03 |\n","| ep_rew_mean | 649 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3692 |\n","| fps | 238 |\n","| time_elapsed | 3977 |\n","| total_timesteps | 949315 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0173 |\n","| n_updates | 212328 |\n","----------------------------------\n","Eval num_timesteps=950000, episode_reward=810.00 +/- 405.36\n","Episode length: 4782.40 +/- 1120.77\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.78e+03 |\n","| mean_reward | 810 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 950000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0292 |\n","| n_updates | 212499 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.17e+03 |\n","| ep_rew_mean | 645 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3696 |\n","| fps | 237 |\n","| time_elapsed | 3997 |\n","| total_timesteps | 950988 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0165 |\n","| n_updates | 212746 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.13e+03 |\n","| ep_rew_mean | 646 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3700 |\n","| fps | 237 |\n","| time_elapsed | 4009 |\n","| total_timesteps | 953836 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0135 |\n","| n_updates | 213458 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.14e+03 |\n","| ep_rew_mean | 648 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3704 |\n","| fps | 237 |\n","| time_elapsed | 4014 |\n","| total_timesteps | 955128 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0194 |\n","| n_updates | 213781 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.11e+03 |\n","| ep_rew_mean | 647 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3708 |\n","| fps | 237 |\n","| time_elapsed | 4020 |\n","| total_timesteps | 956522 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0521 |\n","| n_updates | 214130 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.1e+03 |\n","| ep_rew_mean | 643 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3712 |\n","| fps | 237 |\n","| time_elapsed | 4026 |\n","| total_timesteps | 958123 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0443 |\n","| n_updates | 214530 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.08e+03 |\n","| ep_rew_mean | 648 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3716 |\n","| fps | 237 |\n","| time_elapsed | 4035 |\n","| total_timesteps | 960402 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0183 |\n","| n_updates | 215100 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.04e+03 |\n","| ep_rew_mean | 644 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3720 |\n","| fps | 237 |\n","| time_elapsed | 4044 |\n","| total_timesteps | 962559 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0888 |\n","| n_updates | 215639 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.04e+03 |\n","| ep_rew_mean | 650 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3724 |\n","| fps | 238 |\n","| time_elapsed | 4053 |\n","| total_timesteps | 964982 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0175 |\n","| n_updates | 216245 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 647 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3728 |\n","| fps | 238 |\n","| time_elapsed | 4059 |\n","| total_timesteps | 966237 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0129 |\n","| n_updates | 216559 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 643 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3732 |\n","| fps | 238 |\n","| time_elapsed | 4064 |\n","| total_timesteps | 967595 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.025 |\n","| n_updates | 216898 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.96e+03 |\n","| ep_rew_mean | 638 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3736 |\n","| fps | 238 |\n","| time_elapsed | 4068 |\n","| total_timesteps | 968613 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0273 |\n","| n_updates | 217153 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.97e+03 |\n","| ep_rew_mean | 638 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3740 |\n","| fps | 238 |\n","| time_elapsed | 4077 |\n","| total_timesteps | 970741 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00721 |\n","| n_updates | 217685 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.97e+03 |\n","| ep_rew_mean | 639 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3744 |\n","| fps | 238 |\n","| time_elapsed | 4082 |\n","| total_timesteps | 972109 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0856 |\n","| n_updates | 218027 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.97e+03 |\n","| ep_rew_mean | 640 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3748 |\n","| fps | 238 |\n","| time_elapsed | 4091 |\n","| total_timesteps | 974231 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0242 |\n","| n_updates | 218557 |\n","----------------------------------\n","Eval num_timesteps=975000, episode_reward=598.00 +/- 123.92\n","Episode length: 4229.00 +/- 267.55\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 4.23e+03 |\n","| mean_reward | 598 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 975000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0162 |\n","| n_updates | 218749 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 639 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3752 |\n","| fps | 237 |\n","| time_elapsed | 4109 |\n","| total_timesteps | 975702 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0118 |\n","| n_updates | 218925 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 639 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3756 |\n","| fps | 237 |\n","| time_elapsed | 4116 |\n","| total_timesteps | 977529 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0199 |\n","| n_updates | 219382 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 640 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3760 |\n","| fps | 237 |\n","| time_elapsed | 4121 |\n","| total_timesteps | 978602 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0945 |\n","| n_updates | 219650 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 639 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3764 |\n","| fps | 237 |\n","| time_elapsed | 4126 |\n","| total_timesteps | 979931 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.014 |\n","| n_updates | 219982 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.97e+03 |\n","| ep_rew_mean | 639 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3768 |\n","| fps | 237 |\n","| time_elapsed | 4138 |\n","| total_timesteps | 982790 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0127 |\n","| n_updates | 220697 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.99e+03 |\n","| ep_rew_mean | 645 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3772 |\n","| fps | 237 |\n","| time_elapsed | 4143 |\n","| total_timesteps | 983990 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0215 |\n","| n_updates | 220997 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 646 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3776 |\n","| fps | 237 |\n","| time_elapsed | 4151 |\n","| total_timesteps | 985987 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0159 |\n","| n_updates | 221496 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.98e+03 |\n","| ep_rew_mean | 644 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3780 |\n","| fps | 237 |\n","| time_elapsed | 4159 |\n","| total_timesteps | 987854 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0193 |\n","| n_updates | 221963 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 654 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3784 |\n","| fps | 237 |\n","| time_elapsed | 4164 |\n","| total_timesteps | 989271 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0124 |\n","| n_updates | 222317 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 655 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3788 |\n","| fps | 237 |\n","| time_elapsed | 4170 |\n","| total_timesteps | 990627 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0372 |\n","| n_updates | 222656 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.01e+03 |\n","| ep_rew_mean | 649 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3792 |\n","| fps | 237 |\n","| time_elapsed | 4178 |\n","| total_timesteps | 992869 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0365 |\n","| n_updates | 223217 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 649 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3796 |\n","| fps | 237 |\n","| time_elapsed | 4183 |\n","| total_timesteps | 993914 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.00835 |\n","| n_updates | 223478 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.01e+03 |\n","| ep_rew_mean | 645 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3800 |\n","| fps | 237 |\n","| time_elapsed | 4189 |\n","| total_timesteps | 995535 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0113 |\n","| n_updates | 223883 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.02e+03 |\n","| ep_rew_mean | 648 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3804 |\n","| fps | 237 |\n","| time_elapsed | 4195 |\n","| total_timesteps | 996842 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0087 |\n","| n_updates | 224210 |\n","----------------------------------\n","----------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.03e+03 |\n","| ep_rew_mean | 653 |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| episodes | 3808 |\n","| fps | 237 |\n","| time_elapsed | 4203 |\n","| total_timesteps | 999076 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.0299 |\n","| n_updates | 224768 |\n","----------------------------------\n","Eval num_timesteps=1000000, episode_reward=493.00 +/- 74.07\n","Episode length: 3349.00 +/- 598.13\n","----------------------------------\n","| eval/ | |\n","| mean_ep_length | 3.35e+03 |\n","| mean_reward | 493 |\n","| rollout/ | |\n","| exploration_rate | 0.01 |\n","| time/ | |\n","| total_timesteps | 1000000 |\n","| train/ | |\n","| learning_rate | 0.0001 |\n","| loss | 0.126 |\n","| n_updates | 224999 |\n","----------------------------------\n","Saving to logs//dqn/SpaceInvadersNoFrameskip-v4_1\n"]}],"source":["!python -m rl_zoo3.train --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/ -c dqn.yml"]},{"cell_type":"markdown","metadata":{"id":"SeChoX-3SZfP"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PuocgdokSab9"},"outputs":[],"source":["!python -m rl_zoo3.train --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/ -c dqn.yml"]},{"cell_type":"markdown","metadata":{"id":"_dLomIiMKQaf"},"source":["## Let's evaluate our agent ๐Ÿ‘€\n","- RL-Baselines3-Zoo provides `enjoy.py`, a python script to evaluate our agent. In most RL libraries, we call the evaluation script `enjoy.py`.\n","- Let's evaluate it for 5000 timesteps ๐Ÿ”ฅ"]},{"cell_type":"code","execution_count":11,"metadata":{"id":"co5um_KeKbBJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696590557990,"user_tz":-60,"elapsed":26635,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"4b4834a0-9a3b-4ea0-dc15-ec928e85b790"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-06 11:08:54.641826: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2023-10-06 11:08:55.861749: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Loading latest experiment, id=1\n","Loading logs/dqn/SpaceInvadersNoFrameskip-v4_1/SpaceInvadersNoFrameskip-v4.zip\n","A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n","[Powered by Stella]\n","Stacking 4 frames\n","Atari Episode Score: 1315.00\n","Atari Episode Length 6701\n","Atari Episode Score: 415.00\n","Atari Episode Length 3195\n","Atari Episode Score: 520.00\n","Atari Episode Length 3261\n","Atari Episode Score: 600.00\n","Atari Episode Length 3773\n"]}],"source":["!python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 --no-render --n-timesteps 5000 --folder logs/"]},{"cell_type":"markdown","metadata":{"id":"Q24K1tyWSj7t"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"P_uSmwGRSk0z"},"outputs":[],"source":["!python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 --no-render --n-timesteps 5000 --folder logs/"]},{"cell_type":"markdown","metadata":{"id":"liBeTltiHJtr"},"source":["## Publish our trained model on the Hub ๐Ÿš€\n","Now that we saw we got good results after the training, we can publish our trained model on the hub ๐Ÿค— with one line of code.\n","\n","\"Space"]},{"cell_type":"markdown","metadata":{"id":"ezbHS1q3HYVV"},"source":["By using `rl_zoo3.push_to_hub` **you evaluate, record a replay, generate a model card of your agent and push it to the hub**.\n","\n","This way:\n","- You can **showcase our work** ๐Ÿ”ฅ\n","- You can **visualize your agent playing** ๐Ÿ‘€\n","- You can **share with the community an agent that others can use** ๐Ÿ’พ\n","- You can **access a leaderboard ๐Ÿ† to see how well your agent is performing compared to your classmates** ๐Ÿ‘‰ https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard"]},{"cell_type":"markdown","metadata":{"id":"XMSeZRBiHk6X"},"source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1๏ธโƒฃ (If it's not already done) create an account to HF โžก https://huggingface.co/join\n","\n","2๏ธโƒฃ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","\"Create"]},{"cell_type":"markdown","metadata":{"id":"9O6FI0F8HnzE"},"source":["- Copy the token\n","- Run the cell below and past the token"]},{"cell_type":"code","execution_count":12,"metadata":{"id":"Ppu9yePwHrZX","colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["8b26dd256d9046aabeb92f0d9f68e527","b63eda5abce34f8cad97d98a2290ad32","6be3c02ba56c4b5e8f378f01ed938646","f02cd1d54b2f4b039d787720a621a133","c30a027f58464d5c913fb68d8df9c0b6","3d9c6fe75541435998fcc7d634a7fa32","755efdbda0c843a397a544599fd1b3ae","1018363e5bfc4b8bb29257ad54066450","5c0c048031fe42bf90f27a7a5a67d475","ad3159ed419f4ce2bf40254b6cf09d57","ffa44fd8b43548f39fb3ea1e03a40616","ec5b669d8555490498c62d8a883ba7c2","27f4a1c6884542349b2da60f265024c0","7889c55f3174477bb3872459b0eac46c","eb8ba9da4bbc4b17b92fb85e57cb51bf","d6b0ac2d5f234a489d5a51a7ab6b46e1","000c81960541450e9d3c11dbb2a5e458","f7455c63e55b46a29b8359770d37e9eb","7b50fca49ac743a28bf376fa924a9843","40ac7b3245134b4dbc3070c3ac838d53","bbd19e48cbcd4eeaaaf38d8b870161e6","9732fa3d953c49a3ab36f78bee7657f9","22c51cc5261643bbb17901aa5ed74821","60ba997532ab4370a40da3ef4e6a27b2","465d5a47a4d140df89456520d98af3ac","7273ac76d5b042febcfa62905d9c19aa","e828447348a5436c897b583680db3c25","ffde39d25d034a3ea2ac9e13cbd7dc99","4017f6fe2976446bbb2b2a531f513a31","25c8eeeef3e04858b9c9408ec091fd07","e88faaa6609e42d4892556f4f70c9d7f","65b0b42963bf459b8692456d366b6425"]},"executionInfo":{"status":"ok","timestamp":1696590569406,"user_tz":-60,"elapsed":514,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"cae82ef9-2722-4aad-f4b2-07a0d9f6d792"},"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='
"]},{"cell_type":"code","execution_count":13,"metadata":{"id":"Ygk2sEktTDEw","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696590680481,"user_tz":-60,"elapsed":62163,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"98aff527-8b0a-44aa-c0ae-989403af5e0d"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-06 11:10:21.746519: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2023-10-06 11:10:22.702495: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Loading latest experiment, id=1\n","Loading logs/dqn/SpaceInvadersNoFrameskip-v4_1/SpaceInvadersNoFrameskip-v4.zip\n","A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n","[Powered by Stella]\n","Stacking 4 frames\n","Wrapping the env in a VecTransposeImage.\n","Uploading to jake-walker/dqn-SpaceInvadersNoFrameskip-v4, make sure to have the rights\n","\u001b[38;5;4mโ„น This function will save, evaluate, generate a video of your agent,\n","create a model card and push everything to the hub. It might take up to some\n","minutes if video generation is activated. This is a work in progress: if you\n","encounter a bug, please open an issue.\u001b[0m\n","Cloning https://huggingface.co/jake-walker/dqn-SpaceInvadersNoFrameskip-v4 into local empty directory.\n","WARNING:huggingface_hub.repository:Cloning https://huggingface.co/jake-walker/dqn-SpaceInvadersNoFrameskip-v4 into local empty directory.\n","Saving model to: hub/dqn-SpaceInvadersNoFrameskip-v4/dqn-SpaceInvadersNoFrameskip-v4\n","/usr/local/lib/python3.10/dist-packages/gymnasium/utils/passive_env_checker.py:335: UserWarning: \u001b[33mWARN: No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.\u001b[0m\n"," logger.warn(\n","Saving video to /tmp/tmp_1ypghzz/-step-0-to-step-1000.mp4\n","Moviepy - Building video /tmp/tmp_1ypghzz/-step-0-to-step-1000.mp4.\n","Moviepy - Writing video /tmp/tmp_1ypghzz/-step-0-to-step-1000.mp4\n","\n","Moviepy - Done !\n","Moviepy - video ready /tmp/tmp_1ypghzz/-step-0-to-step-1000.mp4\n","ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers\n"," built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)\n"," configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared\n"," libavutil 56. 70.100 / 56. 70.100\n"," libavcodec 58.134.100 / 58.134.100\n"," libavformat 58. 76.100 / 58. 76.100\n"," libavdevice 58. 13.100 / 58. 13.100\n"," libavfilter 7.110.100 / 7.110.100\n"," libswscale 5. 9.100 / 5. 9.100\n"," libswresample 3. 9.100 / 3. 9.100\n"," libpostproc 55. 9.100 / 55. 9.100\n","Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/tmp/tmp_1ypghzz/-step-0-to-step-1000.mp4':\n"," Metadata:\n"," major_brand : isom\n"," minor_version : 512\n"," compatible_brands: isomiso2avc1mp41\n"," encoder : Lavf58.29.100\n"," Duration: 00:00:33.40, start: 0.000000, bitrate: 54 kb/s\n"," Stream #0:0(und): Video: h264 (High) (avc1 / 0x31637661), yuv420p, 160x210, 52 kb/s, 30 fps, 30 tbr, 15360 tbn, 60 tbc (default)\n"," Metadata:\n"," handler_name : VideoHandler\n"," vendor_id : [0][0][0][0]\n","Stream mapping:\n"," Stream #0:0 -> #0:0 (h264 (native) -> h264 (libx264))\n","Press [q] to stop, [?] for help\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0musing cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mprofile High, level 1.2, 4:2:0, 8-bit\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0m264 - core 163 r3060 5db6aa6 - H.264/MPEG-4 AVC codec - Copyleft 2003-2021 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=3 lookahead_threads=1 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00\n","Output #0, mp4, to 'hub/dqn-SpaceInvadersNoFrameskip-v4/replay.mp4':\n"," Metadata:\n"," major_brand : isom\n"," minor_version : 512\n"," compatible_brands: isomiso2avc1mp41\n"," encoder : Lavf58.76.100\n"," Stream #0:0(und): Video: h264 (avc1 / 0x31637661), yuv420p(progressive), 160x210, q=2-31, 30 fps, 15360 tbn (default)\n"," Metadata:\n"," handler_name : VideoHandler\n"," vendor_id : [0][0][0][0]\n"," encoder : Lavc58.134.100 libx264\n"," Side data:\n"," cpb: bitrate max/min/avg: 0/0/0 buffer size: 0 vbv_delay: N/A\n","frame= 1002 fps=0.0 q=-1.0 Lsize= 216kB time=00:00:33.30 bitrate= 53.3kbits/s speed=36.2x \n","video:206kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 5.143969%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mframe I:5 Avg QP:18.04 size: 2726\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mframe P:524 Avg QP:22.26 size: 329\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mframe B:473 Avg QP:29.62 size: 51\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mconsecutive B-frames: 31.6% 13.4% 8.7% 46.3%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mmb I I16..4: 20.4% 40.3% 39.3%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mmb P I16..4: 0.6% 1.0% 0.9% P16..4: 6.9% 2.7% 1.5% 0.0% 0.0% skip:86.4%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mmb B I16..4: 0.2% 0.1% 0.1% B16..8: 10.1% 0.8% 0.1% direct: 0.2% skip:88.5% L0:46.0% L1:53.5% BI: 0.5%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0m8x8 transform intra:40.1% inter:5.6%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mcoded y,uvDC,uvAC intra: 21.6% 41.0% 37.4% inter: 1.4% 1.8% 1.5%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mi16 v,h,dc,p: 43% 50% 7% 0%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mi8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 26% 8% 65% 1% 0% 0% 0% 0% 0%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mi4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 32% 11% 45% 2% 2% 3% 2% 2% 1%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mi8c dc,h,v,p: 54% 31% 14% 1%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mWeighted P-Frames: Y:0.0% UV:0.0%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mref P L0: 75.2% 4.7% 11.2% 8.8%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mref B L0: 82.8% 13.9% 3.3%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mref B L1: 95.9% 4.1%\n","\u001b[1;36m[libx264 @ 0x5ae5fedc3b00] \u001b[0mkb/s:50.33\n","\u001b[38;5;4mโ„น Pushing repo dqn-SpaceInvadersNoFrameskip-v4 to the Hugging Face\n","Hub\u001b[0m\n","Upload file dqn-SpaceInvadersNoFrameskip-v4.zip: 0% 1.00/26.0M [00:00 main\n","\n","WARNING:huggingface_hub.repository:To https://huggingface.co/jake-walker/dqn-SpaceInvadersNoFrameskip-v4\n"," 1e04436..a73d6d8 main -> main\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4.zip: 100% 26.0M/26.0M [00:07<00:00, 3.88MB/s]\n","\n","Upload file train_eval_metrics.zip: 100% 35.5k/35.5k [00:07<00:00, 5.19kB/s]\n","\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4/policy.pth: 100% 12.9M/12.9M [00:07<00:00, 1.93MB/s]\n","\n","\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4/pytorch_variables.pth: 100% 431/431 [00:07<00:00, 61.4B/s] \n","\n","\n","\n","\n","Upload file replay.mp4: 100% 216k/216k [00:07<00:00, 31.6kB/s]\n","\n","\n","\n","\n","\n","Upload file dqn-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth: 100% 12.9M/12.9M [00:07<00:00, 1.93MB/s]\n","\u001b[38;5;4mโ„น Your model is pushed to the hub. You can view your model here:\n","https://huggingface.co/jake-walker/dqn-SpaceInvadersNoFrameskip-v4\u001b[0m\n"]}],"source":["!python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 --repo-name dqn-SpaceInvadersNoFrameskip-v4 -orga jake-walker -f logs/"]},{"cell_type":"markdown","metadata":{"id":"otgpa0rhS9wR"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"_HQNlAXuEhci"},"outputs":[],"source":["!python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 --repo-name dqn-SpaceInvadersNoFrameskip-v4 -orga ThomasSimonini -f logs/"]},{"cell_type":"markdown","metadata":{"id":"0D4F5zsTTJ-L"},"source":["###."]},{"cell_type":"markdown","metadata":{"id":"ff89kd2HL1_s"},"source":["Congrats ๐Ÿฅณ you've just trained and uploaded your first Deep Q-Learning agent using RL-Baselines-3 Zoo. The script above should have displayed a link to a model repository such as https://huggingface.co/ThomasSimonini/dqn-SpaceInvadersNoFrameskip-v4. When you go to this link, you can:\n","\n","- See a **video preview of your agent** at the right.\n","- Click \"Files and versions\" to see all the files in the repository.\n","- Click \"Use in stable-baselines3\" to get a code snippet that shows how to load the model.\n","- A model card (`README.md` file) which gives a description of the model and the hyperparameters you used.\n","\n","Under the hood, the Hub uses git-based repositories (don't worry if you don't know what git is), which means you can update the model with new versions as you experiment and improve your agent.\n","\n","**Compare the results of your agents with your classmates** using the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) ๐Ÿ†"]},{"cell_type":"markdown","metadata":{"id":"fyRKcCYY-dIo"},"source":["## Load a powerful trained model ๐Ÿ”ฅ\n","- The Stable-Baselines3 team uploaded **more than 150 trained Deep Reinforcement Learning agents on the Hub**.\n","\n","You can find them here: ๐Ÿ‘‰ https://huggingface.co/sb3\n","\n","Some examples:\n","- Asteroids: https://huggingface.co/sb3/dqn-AsteroidsNoFrameskip-v4\n","- Beam Rider: https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4\n","- Breakout: https://huggingface.co/sb3/dqn-BreakoutNoFrameskip-v4\n","- Road Runner: https://huggingface.co/sb3/dqn-RoadRunnerNoFrameskip-v4\n","\n","Let's load an agent playing Beam Rider: https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4"]},{"cell_type":"code","execution_count":14,"metadata":{"id":"B-9QVFIROI5Y","colab":{"base_uri":"https://localhost:8080/","height":231},"executionInfo":{"status":"ok","timestamp":1696590746205,"user_tz":-60,"elapsed":1039,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"cb55980e-9e1e-4616-c5d0-747fa1dc26fe"},"outputs":[{"output_type":"display_data","data":{"text/plain":[""],"text/html":["\n"]},"metadata":{}}],"source":["%%html\n",""]},{"cell_type":"markdown","metadata":{"id":"7ZQNY_r6NJtC"},"source":["1. We download the model using `rl_zoo3.load_from_hub`, and place it in a new folder that we can call `rl_trained`"]},{"cell_type":"code","execution_count":15,"metadata":{"id":"OdBNZHy0NGTR","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696590822816,"user_tz":-60,"elapsed":17577,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"43d0ceec-f667-4a84-dc19-2f33112077b0"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-06 11:13:28.520597: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2023-10-06 11:13:29.515283: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Downloading from https://huggingface.co/sb3/dqn-BreakoutNoFrameskip-v4\n","Downloading (โ€ฆ)utNoFrameskip-v4.zip: 100% 27.2M/27.2M [00:03<00:00, 8.26MB/s]\n","Downloading (โ€ฆ)olve/main/config.yml: 100% 548/548 [00:00<00:00, 2.98MB/s]\n","No normalization file\n","Downloading (โ€ฆ)esolve/main/args.yml: 100% 886/886 [00:00<00:00, 4.29MB/s]\n","Downloading (โ€ฆ)/main/env_kwargs.yml: 100% 3.00/3.00 [00:00<00:00, 12.9kB/s]\n","Downloading train_eval_metrics.zip: 100% 346k/346k [00:00<00:00, 369kB/s]\n","Saving to rl_trained/dqn/BreakoutNoFrameskip-v4_1\n"]}],"source":["# Download model and save it into the logs/ folder\n","!python -m rl_zoo3.load_from_hub --algo dqn --env BreakoutNoFrameskip-v4 -orga sb3 -f rl_trained/"]},{"cell_type":"markdown","metadata":{"id":"LFt6hmWsNdBo"},"source":["2. Let's evaluate if for 5000 timesteps"]},{"cell_type":"code","execution_count":16,"metadata":{"id":"aOxs0rNuN0uS","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696590862001,"user_tz":-60,"elapsed":26104,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"fe78ee25-82e2-49bd-8cb8-57f271d1f61b"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-06 11:13:57.837179: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2023-10-06 11:13:58.862053: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","Loading latest experiment, id=1\n","Loading rl_trained/dqn/BreakoutNoFrameskip-v4_1/BreakoutNoFrameskip-v4.zip\n","A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)\n","[Powered by Stella]\n","Stacking 4 frames\n","/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/save_util.py:166: UserWarning: Could not deserialize object exploration_schedule. Consider using `custom_objects` argument to replace this object.\n","Exception: 'bytes' object cannot be interpreted as an integer\n"," warnings.warn(\n","/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/vec_env/patch_gym.py:95: UserWarning: You loaded a model that was trained using OpenAI Gym. We strongly recommend transitioning to Gymnasium by saving that model again.\n"," warnings.warn(\n","Atari Episode Score: 386.00\n","Atari Episode Length 7878\n","Atari Episode Score: 374.00\n","Atari Episode Length 9586\n"]}],"source":["!python -m rl_zoo3.enjoy --algo dqn --env BreakoutNoFrameskip-v4 -n 5000 -f rl_trained/ --no-render"]},{"cell_type":"markdown","metadata":{"id":"kxMDuDfPON57"},"source":["Why not trying to train your own **Deep Q-Learning Agent playing BeamRiderNoFrameskip-v4? ๐Ÿ†.**\n","\n","If you want to try, check https://huggingface.co/sb3/dqn-BeamRiderNoFrameskip-v4#hyperparameters **in the model card, you have the hyperparameters of the trained agent.**"]},{"cell_type":"markdown","metadata":{"id":"xL_ZtUgpOuY6"},"source":["But finding hyperparameters can be a daunting task. Fortunately, we'll see in the next Unit, how we can **use Optuna for optimizing the Hyperparameters ๐Ÿ”ฅ.**\n"]},{"cell_type":"markdown","metadata":{"id":"-pqaco8W-huW"},"source":["## Some additional challenges ๐Ÿ†\n","The best way to learn **is to try things by your own**!\n","\n","In the [Leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) you will find your agents. Can you get to the top?\n","\n","Here's a list of environments you can try to train your agent with:\n","- BeamRiderNoFrameskip-v4\n","- BreakoutNoFrameskip-v4\n","- EnduroNoFrameskip-v4\n","- PongNoFrameskip-v4\n","\n","Also, **if you want to learn to implement Deep Q-Learning by yourself**, you definitely should look at CleanRL implementation: https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/dqn_atari.py\n","\n","\"Environments\"/"]},{"cell_type":"markdown","metadata":{"id":"paS-XKo4-kmu"},"source":["________________________________________________________________________\n","Congrats on finishing this chapter!\n","\n","If youโ€™re still feel confused with all these elements...it's totally normal! **This was the same for me and for all people who studied RL.**\n","\n","Take time to really **grasp the material before continuing and try the additional challenges**. Itโ€™s important to master these elements and having a solid foundations.\n","\n","In the next unit, **weโ€™re going to learn about [Optuna](https://optuna.org/)**. One of the most critical task in Deep Reinforcement Learning is to find a good set of training hyperparameters. And Optuna is a library that helps you to automate the search.\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"5WRx7tO7-mvC"},"source":["\n","\n","### This is a course built with you ๐Ÿ‘ท๐Ÿฟโ€โ™€๏ธ\n","\n","Finally, we want to improve and update the course iteratively with your feedback. If you have some, please fill this form ๐Ÿ‘‰ https://forms.gle/3HgA7bEHwAmmLfwh9\n","\n","We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the Github Repo](https://github.com/huggingface/deep-rl-class/issues)."]},{"cell_type":"markdown","source":["See you on Bonus unit 2! ๐Ÿ”ฅ"],"metadata":{"id":"Kc3udPT-RcXc"}},{"cell_type":"markdown","metadata":{"id":"fS3Xerx0fIMV"},"source":["### Keep Learning, Stay Awesome ๐Ÿค—"]}],"metadata":{"colab":{"provenance":[{"file_id":"https://github.com/huggingface/deep-rl-class/blob/main/notebooks/unit3/unit3.ipynb","timestamp":1696584471906}],"gpuType":"T4","collapsed_sections":["SeChoX-3SZfP","Q24K1tyWSj7t","otgpa0rhS9wR"]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.6"},"varInspector":{"cols":{"lenName":16,"lenType":16,"lenVar":40},"kernels_config":{"python":{"delete_cmd_postfix":"","delete_cmd_prefix":"del ","library":"var_list.py","varRefreshCmd":"print(var_dic_list())"},"r":{"delete_cmd_postfix":") ","delete_cmd_prefix":"rm(","library":"var_list.r","varRefreshCmd":"cat(var_dic_list()) "}},"types_to_exclude":["module","function","builtin_function_or_method","instance","_Feature"],"window_display":false},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"8b26dd256d9046aabeb92f0d9f68e527":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":["IPY_MODEL_bbd19e48cbcd4eeaaaf38d8b870161e6","IPY_MODEL_9732fa3d953c49a3ab36f78bee7657f9","IPY_MODEL_22c51cc5261643bbb17901aa5ed74821","IPY_MODEL_60ba997532ab4370a40da3ef4e6a27b2"],"layout":"IPY_MODEL_755efdbda0c843a397a544599fd1b3ae"}},"b63eda5abce34f8cad97d98a2290ad32":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1018363e5bfc4b8bb29257ad54066450","placeholder":"โ€‹","style":"IPY_MODEL_5c0c048031fe42bf90f27a7a5a67d475","value":"

Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
"}},"6be3c02ba56c4b5e8f378f01ed938646":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_ad3159ed419f4ce2bf40254b6cf09d57","placeholder":"โ€‹","style":"IPY_MODEL_ffa44fd8b43548f39fb3ea1e03a40616","value":""}},"f02cd1d54b2f4b039d787720a621a133":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_ec5b669d8555490498c62d8a883ba7c2","style":"IPY_MODEL_27f4a1c6884542349b2da60f265024c0","value":true}},"c30a027f58464d5c913fb68d8df9c0b6":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_7889c55f3174477bb3872459b0eac46c","style":"IPY_MODEL_eb8ba9da4bbc4b17b92fb85e57cb51bf","tooltip":""}},"3d9c6fe75541435998fcc7d634a7fa32":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d6b0ac2d5f234a489d5a51a7ab6b46e1","placeholder":"โ€‹","style":"IPY_MODEL_000c81960541450e9d3c11dbb2a5e458","value":"\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks.
"}},"755efdbda0c843a397a544599fd1b3ae":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"1018363e5bfc4b8bb29257ad54066450":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5c0c048031fe42bf90f27a7a5a67d475":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ad3159ed419f4ce2bf40254b6cf09d57":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ffa44fd8b43548f39fb3ea1e03a40616":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ec5b669d8555490498c62d8a883ba7c2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"27f4a1c6884542349b2da60f265024c0":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7889c55f3174477bb3872459b0eac46c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"eb8ba9da4bbc4b17b92fb85e57cb51bf":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"d6b0ac2d5f234a489d5a51a7ab6b46e1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"000c81960541450e9d3c11dbb2a5e458":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"f7455c63e55b46a29b8359770d37e9eb":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7b50fca49ac743a28bf376fa924a9843","placeholder":"โ€‹","style":"IPY_MODEL_40ac7b3245134b4dbc3070c3ac838d53","value":"Connecting..."}},"7b50fca49ac743a28bf376fa924a9843":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"40ac7b3245134b4dbc3070c3ac838d53":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bbd19e48cbcd4eeaaaf38d8b870161e6":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_465d5a47a4d140df89456520d98af3ac","placeholder":"โ€‹","style":"IPY_MODEL_7273ac76d5b042febcfa62905d9c19aa","value":"Token is valid (permission: write)."}},"9732fa3d953c49a3ab36f78bee7657f9":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e828447348a5436c897b583680db3c25","placeholder":"โ€‹","style":"IPY_MODEL_ffde39d25d034a3ea2ac9e13cbd7dc99","value":"Your token has been saved in your configured git credential helpers (store)."}},"22c51cc5261643bbb17901aa5ed74821":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4017f6fe2976446bbb2b2a531f513a31","placeholder":"โ€‹","style":"IPY_MODEL_25c8eeeef3e04858b9c9408ec091fd07","value":"Your token has been saved to /root/.cache/huggingface/token"}},"60ba997532ab4370a40da3ef4e6a27b2":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e88faaa6609e42d4892556f4f70c9d7f","placeholder":"โ€‹","style":"IPY_MODEL_65b0b42963bf459b8692456d366b6425","value":"Login successful"}},"465d5a47a4d140df89456520d98af3ac":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7273ac76d5b042febcfa62905d9c19aa":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e828447348a5436c897b583680db3c25":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ffde39d25d034a3ea2ac9e13cbd7dc99":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4017f6fe2976446bbb2b2a531f513a31":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"25c8eeeef3e04858b9c9408ec091fd07":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e88faaa6609e42d4892556f4f70c9d7f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"65b0b42963bf459b8692456d366b6425":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0} \ No newline at end of file diff --git a/HF DeepRL Course/Unit4 - Policy Gradient.ipynb b/HF DeepRL Course/Unit4 - Policy Gradient.ipynb new file mode 100644 index 0000000..d0bb970 --- /dev/null +++ b/HF DeepRL Course/Unit4 - Policy Gradient.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"CjRWziAVU2lZ"},"source":["# Unit 4: Code your first Deep Reinforcement Learning Algorithm with PyTorch: Reinforce. And test its robustness ๐Ÿ’ช\n","\n","\"thumbnail\"/\n","\n","\n","In this notebook, you'll code your first Deep Reinforcement Learning algorithm from scratch: Reinforce (also called Monte Carlo Policy Gradient).\n","\n","Reinforce is a *Policy-based method*: a Deep Reinforcement Learning algorithm that tries **to optimize the policy directly without using an action-value function**.\n","\n","More precisely, Reinforce is a *Policy-gradient method*, a subclass of *Policy-based methods* that aims **to optimize the policy directly by estimating the weights of the optimal policy using gradient ascent**.\n","\n","To test its robustness, we're going to train it in 2 different simple environments:\n","- Cartpole-v1\n","- PixelcopterEnv\n","\n","โฌ‡๏ธ Here is an example of what **you will achieve at the end of this notebook.** โฌ‡๏ธ"]},{"cell_type":"markdown","source":[" \"Environments\"/\n"],"metadata":{"id":"s4rBom2sbo7S"}},{"cell_type":"markdown","source":["### ๐ŸŽฎ Environments:\n","\n","- [CartPole-v1](https://www.gymlibrary.dev/environments/classic_control/cart_pole/)\n","- [PixelCopter](https://pygame-learning-environment.readthedocs.io/en/latest/user/games/pixelcopter.html)\n","\n","### ๐Ÿ“š RL-Library:\n","\n","- Python\n","- PyTorch\n","\n","\n","We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the GitHub Repo](https://github.com/huggingface/deep-rl-class/issues)."],"metadata":{"id":"BPLwsPajb1f8"}},{"cell_type":"markdown","metadata":{"id":"L_WSo0VUV99t"},"source":["## Objectives of this notebook ๐Ÿ†\n","At the end of the notebook, you will:\n","- Be able to **code from scratch a Reinforce algorithm using PyTorch.**\n","- Be able to **test the robustness of your agent using simple environments.**\n","- Be able to **push your trained agent to the Hub** with a nice video replay and an evaluation score ๐Ÿ”ฅ."]},{"cell_type":"markdown","metadata":{"id":"lEPrZg2eWa4R"},"source":["## This notebook is from the Deep Reinforcement Learning Course\n","\"Deep"]},{"cell_type":"markdown","metadata":{"id":"6p5HnEefISCB"},"source":["In this free course, you will:\n","\n","- ๐Ÿ“– Study Deep Reinforcement Learning in **theory and practice**.\n","- ๐Ÿง‘โ€๐Ÿ’ป Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- ๐Ÿค– Train **agents in unique environments**\n","\n","And more check ๐Ÿ“š the syllabus ๐Ÿ‘‰ https://simoninithomas.github.io/deep-rl-course\n","\n","Donโ€™t forget to **sign up to the course** (we are collecting your email to be able toย **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","\n","The best way to keep in touch is to join our discord server to exchange with the community and with us ๐Ÿ‘‰๐Ÿป https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"mjY-eq3eWh9O"},"source":["## Prerequisites ๐Ÿ—๏ธ\n","Before diving into the notebook, you need to:\n","\n","๐Ÿ”ฒ ๐Ÿ“š [Study Policy Gradients by reading Unit 4](https://huggingface.co/deep-rl-course/unit4/introduction)"]},{"cell_type":"markdown","source":["# Let's code Reinforce algorithm from scratch ๐Ÿ”ฅ\n","\n","\n","To validate this hands-on for the certification process, you need to push your trained models to the Hub.\n","\n","- Get a result of >= 350 for `Cartpole-v1`.\n","- Get a result of >= 5 for `PixelCopter`.\n","\n","To find your result, go to the leaderboard and find your model, **the result = mean_reward - std of reward**. **If you don't see your model on the leaderboard, go at the bottom of the leaderboard page and click on the refresh button**.\n","\n","For more information about the certification process, check this section ๐Ÿ‘‰ https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process\n"],"metadata":{"id":"Bsh4ZAamchSl"}},{"cell_type":"markdown","source":["## An advice ๐Ÿ’ก\n","It's better to run this colab in a copy on your Google Drive, so that **if it timeouts** you still have the saved notebook on your Google Drive and do not need to fill everything from scratch.\n","\n","To do that you can either do `Ctrl + S` or `File > Save a copy in Google Drive.`"],"metadata":{"id":"JoTC9o2SczNn"}},{"cell_type":"markdown","source":["## Set the GPU ๐Ÿ’ช\n","- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n","\n","\"GPU"],"metadata":{"id":"PU4FVzaoM6fC"}},{"cell_type":"markdown","source":["- `Hardware Accelerator > GPU`\n","\n","\"GPU"],"metadata":{"id":"KV0NyFdQM9ZG"}},{"cell_type":"markdown","source":["## Create a virtual display ๐Ÿ–ฅ\n","\n","During the notebook, we'll need to generate a replay video. To do so, with colab, **we need to have a virtual screen to be able to render the environment** (and thus record the frames).\n","\n","Hence the following cell will install the librairies and create and run a virtual screen ๐Ÿ–ฅ"],"metadata":{"id":"bTpYcVZVMzUI"}},{"cell_type":"code","execution_count":1,"metadata":{"id":"jV6wjQ7Be7p5","executionInfo":{"status":"ok","timestamp":1697189479738,"user_tz":-60,"elapsed":26147,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["%%capture\n","!apt install python-opengl\n","!apt install ffmpeg\n","!apt install xvfb\n","!pip install pyvirtualdisplay\n","!pip install pyglet==1.5.1"]},{"cell_type":"code","source":["# Virtual display\n","from pyvirtualdisplay import Display\n","\n","virtual_display = Display(visible=0, size=(1400, 900))\n","virtual_display.start()"],"metadata":{"id":"Sr-Nuyb1dBm0","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697189480454,"user_tz":-60,"elapsed":720,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"0824fafa-e5c9-4760-807c-22ac6f4a90dd"},"execution_count":2,"outputs":[{"output_type":"execute_result","data":{"text/plain":[""]},"metadata":{},"execution_count":2}]},{"cell_type":"markdown","metadata":{"id":"tjrLfPFIW8XK"},"source":["## Install the dependencies ๐Ÿ”ฝ\n","The first step is to install the dependencies. Weโ€™ll install multiple ones:\n","\n","- `gym`\n","- `gym-games`: Extra gym environments made with PyGame.\n","- `huggingface_hub`: ๐Ÿค— works as a central place where anyone can share and explore models and datasets. It has versioning, metrics, visualizations, and other features that will allow you to easily collaborate with others.\n","\n","You may be wondering why we install gym and not gymnasium, a more recent version of gym? **Because the gym-games we are using are not updated yet with gymnasium**.\n","\n","The differences you'll encounter here:\n","- In `gym` we don't have `terminated` and `truncated` but only `done`.\n","- In `gym` using `env.step()` returns `state, reward, done, info`\n","\n","You can learn more about the differences between Gym and Gymnasium here ๐Ÿ‘‰ https://gymnasium.farama.org/content/migration-guide/\n","\n","\n","You can see here all the Reinforce models available ๐Ÿ‘‰ https://huggingface.co/models?other=reinforce\n","\n","And you can find all the Deep Reinforcement Learning models here ๐Ÿ‘‰ https://huggingface.co/models?pipeline_tag=reinforcement-learning\n"]},{"cell_type":"code","source":["!pip install -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt"],"metadata":{"id":"e8ZVi-uydpgL","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697189499517,"user_tz":-60,"elapsed":12635,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"4631edda-8e5a-4a6c-bff0-d486e8270076"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting git+https://github.com/ntasfi/PyGame-Learning-Environment.git (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 1))\n"," Cloning https://github.com/ntasfi/PyGame-Learning-Environment.git to /tmp/pip-req-build-pbury65s\n"," Running command git clone --filter=blob:none --quiet https://github.com/ntasfi/PyGame-Learning-Environment.git /tmp/pip-req-build-pbury65s\n"," Resolved https://github.com/ntasfi/PyGame-Learning-Environment.git to commit 3dbe79dc0c35559bb441b9359948aabf9bb3d331\n"," Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting git+https://github.com/simoninithomas/gym-games (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2))\n"," Cloning https://github.com/simoninithomas/gym-games to /tmp/pip-req-build-cjb_39h_\n"," Running command git clone --filter=blob:none --quiet https://github.com/simoninithomas/gym-games /tmp/pip-req-build-cjb_39h_\n"," Resolved https://github.com/simoninithomas/gym-games to commit f31695e4ba028400628dc054ee8a436f28193f0b\n"," Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting huggingface_hub (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3))\n"," Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: imageio-ffmpeg in /usr/local/lib/python3.10/dist-packages (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 4)) (0.4.9)\n","Collecting pyyaml==6.0 (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 5))\n"," Downloading PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (682 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m682.2/682.2 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from ple==0.0.1->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 1)) (1.23.5)\n","Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from ple==0.0.1->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 1)) (9.4.0)\n","Requirement already satisfied: gym>=0.13.0 in /usr/local/lib/python3.10/dist-packages (from gym-games==1.0.4->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2)) (0.25.2)\n","Requirement already satisfied: setuptools>=65.5.1 in /usr/local/lib/python3.10/dist-packages (from gym-games==1.0.4->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2)) (67.7.2)\n","Requirement already satisfied: pygame>=1.9.6 in /usr/local/lib/python3.10/dist-packages (from gym-games==1.0.4->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2)) (2.5.2)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (3.12.4)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (2023.6.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (2.31.0)\n","Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (4.66.1)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (4.5.0)\n","Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (23.2)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gym>=0.13.0->gym-games==1.0.4->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2)) (2.2.1)\n","Requirement already satisfied: gym-notices>=0.0.4 in /usr/local/lib/python3.10/dist-packages (from gym>=0.13.0->gym-games==1.0.4->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 2)) (0.0.8)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (2.0.6)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit4/requirements-unit4.txt (line 3)) (2023.7.22)\n","Building wheels for collected packages: ple, gym-games\n"," Building wheel for ple (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for ple: filename=ple-0.0.1-py3-none-any.whl size=50770 sha256=d084c36176f5d14eac837596716029b54f4c9e9fd2802b435fa1b61b6d2a1405\n"," Stored in directory: /tmp/pip-ephem-wheel-cache-g1e8y7l_/wheels/f8/31/ca/a64a7ce73540465412d82813780d062db53b90e3f42a4ecb7f\n"," Building wheel for gym-games (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for gym-games: filename=gym_games-1.0.4-py3-none-any.whl size=17306 sha256=fd314c50edfecfb70d2cab842c3149422f794de3d81be5548d42ef762b6f352d\n"," Stored in directory: /tmp/pip-ephem-wheel-cache-g1e8y7l_/wheels/ca/bf/6b/7d631626202ebb033c908a688d1862ff4d948c34cf621d7dc9\n","Successfully built ple gym-games\n","Installing collected packages: pyyaml, ple, huggingface_hub, gym-games\n"," Attempting uninstall: pyyaml\n"," Found existing installation: PyYAML 6.0.1\n"," Uninstalling PyYAML-6.0.1:\n"," Successfully uninstalled PyYAML-6.0.1\n","Successfully installed gym-games-1.0.4 huggingface_hub-0.18.0 ple-0.0.1 pyyaml-6.0\n"]}]},{"cell_type":"markdown","metadata":{"id":"AAHAq6RZW3rn"},"source":["## Import the packages ๐Ÿ“ฆ\n","In addition to import the installed libraries, we also import:\n","\n","- `imageio`: A library that will help us to generate a replay video\n","\n"]},{"cell_type":"code","execution_count":4,"metadata":{"id":"V8oadoJSWp7C","executionInfo":{"status":"ok","timestamp":1697189513399,"user_tz":-60,"elapsed":7285,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["import numpy as np\n","\n","from collections import deque\n","\n","import matplotlib.pyplot as plt\n","%matplotlib inline\n","\n","# PyTorch\n","import torch\n","import torch.nn as nn\n","import torch.nn.functional as F\n","import torch.optim as optim\n","from torch.distributions import Categorical\n","\n","# Gym\n","import gym\n","import gym_pygame\n","\n","# Hugging Face Hub\n","from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.\n","import imageio"]},{"cell_type":"markdown","source":["## Check if we have a GPU\n","\n","- Let's check if we have a GPU\n","- If it's the case you should see `device:cuda0`"],"metadata":{"id":"RfxJYdMeeVgv"}},{"cell_type":"code","execution_count":5,"metadata":{"id":"kaJu5FeZxXGY","executionInfo":{"status":"ok","timestamp":1697189516797,"user_tz":-60,"elapsed":238,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"U5TNYa14aRav","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697189519400,"user_tz":-60,"elapsed":208,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"a0ed2a82-0d96-4208-ff3a-c9aa2940a735"},"outputs":[{"output_type":"stream","name":"stdout","text":["cuda:0\n"]}],"source":["print(device)"]},{"cell_type":"markdown","metadata":{"id":"PBPecCtBL_pZ"},"source":["We're now ready to implement our Reinforce algorithm ๐Ÿ”ฅ"]},{"cell_type":"markdown","metadata":{"id":"8KEyKYo2ZSC-"},"source":["# First agent: Playing CartPole-v1 ๐Ÿค–"]},{"cell_type":"markdown","metadata":{"id":"haLArKURMyuF"},"source":["## Create the CartPole environment and understand how it works\n","### [The environment ๐ŸŽฎ](https://www.gymlibrary.dev/environments/classic_control/cart_pole/)\n"]},{"cell_type":"markdown","metadata":{"id":"AH_TaLKFXo_8"},"source":["### Why do we use a simple environment like CartPole-v1?\n","As explained in [Reinforcement Learning Tips and Tricks](https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html), when you implement your agent from scratch you need **to be sure that it works correctly and find bugs with easy environments before going deeper**. Since finding bugs will be much easier in simple environments.\n","\n","\n","> Try to have some โ€œsign of lifeโ€ on toy problems\n","\n","\n","> Validate the implementation by making it run on harder and harder envs (you can compare results against the RL zoo). You usually need to run hyperparameter optimization for that step.\n","___\n","### The CartPole-v1 environment\n","\n","> A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces in the left and right direction on the cart.\n","\n","\n","\n","So, we start with CartPole-v1. The goal is to push the cart left or right **so that the pole stays in the equilibrium.**\n","\n","The episode ends if:\n","- The pole Angle is greater than ยฑ12ยฐ\n","- Cart Position is greater than ยฑ2.4\n","- Episode length is greater than 500\n","\n","We get a reward ๐Ÿ’ฐ of +1 every timestep the Pole stays in the equilibrium."]},{"cell_type":"code","execution_count":141,"metadata":{"id":"POOOk15_K6KA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697194221950,"user_tz":-60,"elapsed":219,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"779b7b4e-5a21-430f-c7d4-622eed7d1451"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/gym/core.py:317: DeprecationWarning: \u001b[33mWARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.\u001b[0m\n"," deprecation(\n","/usr/local/lib/python3.10/dist-packages/gym/wrappers/step_api_compatibility.py:39: DeprecationWarning: \u001b[33mWARN: Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.\u001b[0m\n"," deprecation(\n"]}],"source":["env_id = \"CartPole-v1\"\n","# Create the env\n","env = gym.make(env_id)\n","\n","# Create the evaluation env\n","eval_env = gym.make(env_id)\n","\n","# Get the state space and action space\n","s_size = env.observation_space.shape[0]\n","a_size = env.action_space.n"]},{"cell_type":"code","execution_count":142,"metadata":{"id":"FMLFrjiBNLYJ","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697194222821,"user_tz":-60,"elapsed":3,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"bedcfa58-1a35-42e5-cb4b-e9f9c3016f1f"},"outputs":[{"output_type":"stream","name":"stdout","text":["_____OBSERVATION SPACE_____ \n","\n","The State Space is: 4\n","Sample observation [-2.7834890e+00 -4.5419460e+37 -7.4647829e-02 -1.0727393e+37]\n"]}],"source":["print(\"_____OBSERVATION SPACE_____ \\n\")\n","print(\"The State Space is: \", s_size)\n","print(\"Sample observation\", env.observation_space.sample()) # Get a random observation"]},{"cell_type":"code","execution_count":143,"metadata":{"id":"Lu6t4sRNNWkN","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697194223482,"user_tz":-60,"elapsed":2,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"ebd08bdc-e037-4cd9-ef4d-e46da1d370db"},"outputs":[{"output_type":"stream","name":"stdout","text":["\n"," _____ACTION SPACE_____ \n","\n","The Action Space is: 2\n","Action Space Sample 1\n"]}],"source":["print(\"\\n _____ACTION SPACE_____ \\n\")\n","print(\"The Action Space is: \", a_size)\n","print(\"Action Space Sample\", env.action_space.sample()) # Take a random action"]},{"cell_type":"markdown","metadata":{"id":"7SJMJj3WaFOz"},"source":["## Let's build the Reinforce Architecture\n","This implementation is based on two implementations:\n","- [PyTorch official Reinforcement Learning example](https://github.com/pytorch/examples/blob/main/reinforcement_learning/reinforce.py)\n","- [Udacity Reinforce](https://github.com/udacity/deep-reinforcement-learning/blob/master/reinforce/REINFORCE.ipynb)\n","- [Improvement of the integration by Chris1nexus](https://github.com/huggingface/deep-rl-class/pull/95)\n","\n","\"Reinforce\"/"]},{"cell_type":"markdown","metadata":{"id":"49kogtxBODX8"},"source":["So we want:\n","- Two fully connected layers (fc1 and fc2).\n","- Using ReLU as activation function of fc1\n","- Using Softmax to output a probability distribution over actions"]},{"cell_type":"code","execution_count":144,"metadata":{"id":"w2LHcHhVZvPZ","executionInfo":{"status":"ok","timestamp":1697194225352,"user_tz":-60,"elapsed":211,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["class Policy(nn.Module):\n"," def __init__(self, s_size, a_size, h_size):\n"," super(Policy, self).__init__()\n"," # Create two fully connected layers\n"," self.fc1 = nn.Linear(s_size, h_size)\n"," self.fc2 = nn.Linear(h_size, a_size)\n","\n"," def forward(self, x):\n"," # Define the forward pass\n"," # state goes to fc1 then we apply ReLU activation function\n"," x = F.relu(self.fc1(x))\n"," # fc1 outputs goes to fc2\n"," x = self.fc2(x)\n"," # We output the softmax\n"," return F.softmax(x, dim=1)\n","\n"," def act(self, state):\n"," \"\"\"\n"," Given a state, take action\n"," \"\"\"\n"," state = torch.from_numpy(state).float().unsqueeze(0).to(device)\n"," probs = self.forward(state).cpu()\n"," m = Categorical(probs)\n"," action = m.sample()\n"," return action.item(), m.log_prob(action)"]},{"cell_type":"markdown","metadata":{"id":"rOMrdwSYOWSC"},"source":["### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jGdhRSVrOV4K"},"outputs":[],"source":["class Policy(nn.Module):\n"," def __init__(self, s_size, a_size, h_size):\n"," super(Policy, self).__init__()\n"," self.fc1 = nn.Linear(s_size, h_size)\n"," self.fc2 = nn.Linear(h_size, a_size)\n","\n"," def forward(self, x):\n"," x = F.relu(self.fc1(x))\n"," x = self.fc2(x)\n"," return F.softmax(x, dim=1)\n","\n"," def act(self, state):\n"," state = torch.from_numpy(state).float().unsqueeze(0).to(device)\n"," probs = self.forward(state).cpu()\n"," m = Categorical(probs)\n"," action = np.argmax(m)\n"," return action.item(), m.log_prob(action)"]},{"cell_type":"markdown","metadata":{"id":"ZTGWL4g2eM5B"},"source":["I make a mistake, can you guess where?\n","\n","- To find out let's make a forward pass:"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lwnqGBCNePor"},"outputs":[],"source":["debug_policy = Policy(s_size, a_size, 64).to(device)\n","debug_policy.act(env.reset())"]},{"cell_type":"markdown","metadata":{"id":"14UYkoxCPaor"},"source":["- Here we see that the error says `ValueError: The value argument to log_prob must be a Tensor`\n","\n","- It means that `action` in `m.log_prob(action)` must be a Tensor **but it's not.**\n","\n","- Do you know why? Check the act function and try to see why it does not work.\n","\n","Advice ๐Ÿ’ก: Something is wrong in this implementation. Remember that we act function **we want to sample an action from the probability distribution over actions**.\n"]},{"cell_type":"markdown","metadata":{"id":"gfGJNZBUP7Vn"},"source":["### (Real) Solution"]},{"cell_type":"code","execution_count":145,"metadata":{"id":"Ho_UHf49N9i4","executionInfo":{"status":"ok","timestamp":1697194227568,"user_tz":-60,"elapsed":234,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["class Policy(nn.Module):\n"," def __init__(self, s_size, a_size, h_size):\n"," super(Policy, self).__init__()\n"," self.fc1 = nn.Linear(s_size, h_size)\n"," self.fc2 = nn.Linear(h_size, a_size)\n","\n"," def forward(self, x):\n"," x = F.relu(self.fc1(x))\n"," x = self.fc2(x)\n"," return F.softmax(x, dim=1)\n","\n"," def act(self, state):\n"," state = torch.from_numpy(state).float().unsqueeze(0).to(device)\n"," probs = self.forward(state).cpu()\n"," m = Categorical(probs)\n"," action = m.sample()\n"," return action.item(), m.log_prob(action)"]},{"cell_type":"markdown","metadata":{"id":"rgJWQFU_eUYw"},"source":["By using CartPole, it was easier to debug since **we know that the bug comes from our integration and not from our simple environment**."]},{"cell_type":"markdown","source":["- Since **we want to sample an action from the probability distribution over actions**, we can't use `action = np.argmax(m)` since it will always output the action that have the highest probability.\n","\n","- We need to replace with `action = m.sample()` that will sample an action from the probability distribution P(.|s)"],"metadata":{"id":"c-20i7Pk0l1T"}},{"cell_type":"markdown","metadata":{"id":"4MXoqetzfIoW"},"source":["### Let's build the Reinforce Training Algorithm\n","This is the Reinforce algorithm pseudocode:\n","\n","\"Policy\n"," "]},{"cell_type":"markdown","source":["- When we calculate the return Gt (line 6) we see that we calculate the sum of discounted rewards **starting at timestep t**.\n","\n","- Why? Because our policy should only **reinforce actions on the basis of the consequences**: so rewards obtained before taking an action are useless (since they were not because of the action), **only the ones that come after the action matters**.\n","\n","- Before coding this you should read this section [don't let the past distract you](https://spinningup.openai.com/en/latest/spinningup/rl_intro3.html#don-t-let-the-past-distract-you) that explains why we use reward-to-go policy gradient.\n","\n","We use an interesting technique coded by [Chris1nexus](https://github.com/Chris1nexus) to **compute the return at each timestep efficiently**. The comments explained the procedure. Don't hesitate also [to check the PR explanation](https://github.com/huggingface/deep-rl-class/pull/95)\n","But overall the idea is to **compute the return at each timestep efficiently**."],"metadata":{"id":"QmcXG-9i2Qu2"}},{"cell_type":"markdown","metadata":{"id":"O554nUGPpcoq"},"source":["The second question you may ask is **why do we minimize the loss**? You talked about Gradient Ascent not Gradient Descent?\n","\n","- We want to maximize our utility function $J(\\theta)$ but in PyTorch like in Tensorflow it's better to **minimize an objective function.**\n"," - So let's say we want to reinforce action 3 at a certain timestep. Before training this action P is 0.25.\n"," - So we want to modify $\\theta$ such that $\\pi_\\theta(a_3|s; \\theta) > 0.25$\n"," - Because all P must sum to 1, max $\\pi_\\theta(a_3|s; \\theta)$ will **minimize other action probability.**\n"," - So we should tell PyTorch **to min $1 - \\pi_\\theta(a_3|s; \\theta)$.**\n"," - This loss function approaches 0 as $\\pi_\\theta(a_3|s; \\theta)$ nears 1.\n"," - So we are encouraging the gradient to max $\\pi_\\theta(a_3|s; \\theta)$\n"]},{"cell_type":"code","execution_count":14,"metadata":{"id":"iOdv8Q9NfLK7","executionInfo":{"status":"ok","timestamp":1697190619013,"user_tz":-60,"elapsed":400,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def reinforce(policy, optimizer, n_training_episodes, max_t, gamma, print_every):\n"," # Help us to calculate the score during the training\n"," scores_deque = deque(maxlen=100)\n"," scores = []\n"," # Line 3 of pseudocode\n"," for i_episode in range(1, n_training_episodes+1):\n"," saved_log_probs = []\n"," rewards = []\n"," state = env.reset()\n"," # Line 4 of pseudocode\n"," for t in range(max_t):\n"," action, log_prob = policy.act(state)\n"," saved_log_probs.append(log_prob)\n"," state, reward, done, _ = env.step(action)\n"," rewards.append(reward)\n"," if done:\n"," break\n"," scores_deque.append(sum(rewards))\n"," scores.append(sum(rewards))\n","\n"," # Line 6 of pseudocode: calculate the return\n"," returns = deque(maxlen=max_t)\n"," n_steps = len(rewards)\n"," # Compute the discounted returns at each timestep,\n"," # as the sum of the gamma-discounted return at time t (G_t) + the reward at time t\n","\n"," # In O(N) time, where N is the number of time steps\n"," # (this definition of the discounted return G_t follows the definition of this quantity\n"," # shown at page 44 of Sutton&Barto 2017 2nd draft)\n"," # G_t = r_(t+1) + r_(t+2) + ...\n","\n"," # Given this formulation, the returns at each timestep t can be computed\n"," # by re-using the computed future returns G_(t+1) to compute the current return G_t\n"," # G_t = r_(t+1) + gamma*G_(t+1)\n"," # G_(t-1) = r_t + gamma* G_t\n"," # (this follows a dynamic programming approach, with which we memorize solutions in order\n"," # to avoid computing them multiple times)\n","\n"," # This is correct since the above is equivalent to (see also page 46 of Sutton&Barto 2017 2nd draft)\n"," # G_(t-1) = r_t + gamma*r_(t+1) + gamma*gamma*r_(t+2) + ...\n","\n","\n"," ## Given the above, we calculate the returns at timestep t as:\n"," # gamma[t] * return[t] + reward[t]\n"," #\n"," ## We compute this starting from the last timestep to the first, in order\n"," ## to employ the formula presented above and avoid redundant computations that would be needed\n"," ## if we were to do it from first to last.\n","\n"," ## Hence, the queue \"returns\" will hold the returns in chronological order, from t=0 to t=n_steps\n"," ## thanks to the appendleft() function which allows to append to the position 0 in constant time O(1)\n"," ## a normal python list would instead require O(N) to do this.\n"," for t in range(n_steps)[::-1]:\n"," disc_return_t = (returns[0] if len(returns)>0 else 0)\n"," returns.appendleft(gamma * disc_return_t + rewards[t]) # TODO: complete here\n","\n"," ## standardization of the returns is employed to make training more stable\n"," eps = np.finfo(np.float32).eps.item()\n","\n"," ## eps is the smallest representable float, which is\n"," # added to the standard deviation of the returns to avoid numerical instabilities\n"," returns = torch.tensor(returns)\n"," returns = (returns - returns.mean()) / (returns.std() + eps)\n","\n"," # Line 7:\n"," policy_loss = []\n"," for log_prob, disc_return in zip(saved_log_probs, returns):\n"," policy_loss.append(-log_prob * disc_return)\n"," policy_loss = torch.cat(policy_loss).sum()\n","\n"," # Line 8: PyTorch prefers gradient descent\n"," optimizer.zero_grad()\n"," policy_loss.backward()\n"," optimizer.step()\n","\n"," if i_episode % print_every == 0:\n"," print('Episode {}\\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))\n","\n"," return scores"]},{"cell_type":"markdown","metadata":{"id":"YB0Cxrw1StrP"},"source":["#### Solution"]},{"cell_type":"code","execution_count":146,"metadata":{"id":"NCNvyElRStWG","executionInfo":{"status":"ok","timestamp":1697194232794,"user_tz":-60,"elapsed":230,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def reinforce(policy, optimizer, n_training_episodes, max_t, gamma, print_every):\n"," # Help us to calculate the score during the training\n"," scores_deque = deque(maxlen=100)\n"," scores = []\n"," # Line 3 of pseudocode\n"," for i_episode in range(1, n_training_episodes+1):\n"," saved_log_probs = []\n"," rewards = []\n"," state = env.reset()\n"," # Line 4 of pseudocode\n"," for t in range(max_t):\n"," action, log_prob = policy.act(state)\n"," saved_log_probs.append(log_prob)\n"," state, reward, done, _ = env.step(action)\n"," rewards.append(reward)\n"," if done:\n"," break\n"," scores_deque.append(sum(rewards))\n"," scores.append(sum(rewards))\n","\n"," # Line 6 of pseudocode: calculate the return\n"," returns = deque(maxlen=max_t)\n"," n_steps = len(rewards)\n"," # Compute the discounted returns at each timestep,\n"," # as\n"," # the sum of the gamma-discounted return at time t (G_t) + the reward at time t\n"," #\n"," # In O(N) time, where N is the number of time steps\n"," # (this definition of the discounted return G_t follows the definition of this quantity\n"," # shown at page 44 of Sutton&Barto 2017 2nd draft)\n"," # G_t = r_(t+1) + r_(t+2) + ...\n","\n"," # Given this formulation, the returns at each timestep t can be computed\n"," # by re-using the computed future returns G_(t+1) to compute the current return G_t\n"," # G_t = r_(t+1) + gamma*G_(t+1)\n"," # G_(t-1) = r_t + gamma* G_t\n"," # (this follows a dynamic programming approach, with which we memorize solutions in order\n"," # to avoid computing them multiple times)\n","\n"," # This is correct since the above is equivalent to (see also page 46 of Sutton&Barto 2017 2nd draft)\n"," # G_(t-1) = r_t + gamma*r_(t+1) + gamma*gamma*r_(t+2) + ...\n","\n","\n"," ## Given the above, we calculate the returns at timestep t as:\n"," # gamma[t] * return[t] + reward[t]\n"," #\n"," ## We compute this starting from the last timestep to the first, in order\n"," ## to employ the formula presented above and avoid redundant computations that would be needed\n"," ## if we were to do it from first to last.\n","\n"," ## Hence, the queue \"returns\" will hold the returns in chronological order, from t=0 to t=n_steps\n"," ## thanks to the appendleft() function which allows to append to the position 0 in constant time O(1)\n"," ## a normal python list would instead require O(N) to do this.\n"," for t in range(n_steps)[::-1]:\n"," disc_return_t = (returns[0] if len(returns)>0 else 0)\n"," returns.appendleft( gamma*disc_return_t + rewards[t] )\n","\n"," ## standardization of the returns is employed to make training more stable\n"," eps = np.finfo(np.float32).eps.item()\n"," ## eps is the smallest representable float, which is\n"," # added to the standard deviation of the returns to avoid numerical instabilities\n"," returns = torch.tensor(returns)\n"," returns = (returns - returns.mean()) / (returns.std() + eps)\n","\n"," # Line 7:\n"," policy_loss = []\n"," for log_prob, disc_return in zip(saved_log_probs, returns):\n"," policy_loss.append(-log_prob * disc_return)\n"," policy_loss = torch.cat(policy_loss).sum()\n","\n"," # Line 8: PyTorch prefers gradient descent\n"," optimizer.zero_grad()\n"," policy_loss.backward()\n"," optimizer.step()\n","\n"," if i_episode % print_every == 0:\n"," print('Episode {}\\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))\n","\n"," return scores"]},{"cell_type":"markdown","metadata":{"id":"RIWhQyJjfpEt"},"source":["## Train it\n","- We're now ready to train our agent.\n","- But first, we define a variable containing all the training hyperparameters.\n","- You can change the training parameters (and should ๐Ÿ˜‰)"]},{"cell_type":"code","execution_count":198,"metadata":{"id":"utRe1NgtVBYF","executionInfo":{"status":"ok","timestamp":1697195245035,"user_tz":-60,"elapsed":2,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["cartpole_hyperparameters = {\n"," \"h_size\": 8, #16,\n"," \"n_training_episodes\": 500,\n"," \"n_evaluation_episodes\": 10,\n"," \"max_t\": 1000, #1000,\n"," \"gamma\": 1.0, #1.0,\n"," \"lr\": 1e-2, #1e-2,\n"," \"env_id\": env_id,\n"," \"state_space\": s_size,\n"," \"action_space\": a_size,\n","}"]},{"cell_type":"code","execution_count":199,"metadata":{"id":"D3lWyVXBVfl6","executionInfo":{"status":"ok","timestamp":1697195245276,"user_tz":-60,"elapsed":3,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["# Create policy and place it to the device\n","cartpole_policy = Policy(cartpole_hyperparameters[\"state_space\"], cartpole_hyperparameters[\"action_space\"], cartpole_hyperparameters[\"h_size\"]).to(device)\n","cartpole_optimizer = optim.Adam(cartpole_policy.parameters(), lr=cartpole_hyperparameters[\"lr\"])"]},{"cell_type":"code","execution_count":200,"metadata":{"id":"uGf-hQCnfouB","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697195416949,"user_tz":-60,"elapsed":171675,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"66295dd9-ba27-4535-92ec-f4488b1af637"},"outputs":[{"output_type":"stream","name":"stdout","text":["Episode 100\tAverage Score: 37.43\n","Episode 200\tAverage Score: 202.48\n","Episode 300\tAverage Score: 427.34\n","Episode 400\tAverage Score: 489.71\n","Episode 500\tAverage Score: 499.94\n"]}],"source":["scores = reinforce(cartpole_policy,\n"," cartpole_optimizer,\n"," cartpole_hyperparameters[\"n_training_episodes\"],\n"," cartpole_hyperparameters[\"max_t\"],\n"," cartpole_hyperparameters[\"gamma\"],\n"," 100)"]},{"cell_type":"markdown","metadata":{"id":"Qajj2kXqhB3g"},"source":["## Define evaluation method ๐Ÿ“\n","- Here we define the evaluation method that we're going to use to test our Reinforce agent."]},{"cell_type":"code","execution_count":201,"metadata":{"id":"3FamHmxyhBEU","executionInfo":{"status":"ok","timestamp":1697195430373,"user_tz":-60,"elapsed":217,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def evaluate_agent(env, max_steps, n_eval_episodes, policy):\n"," \"\"\"\n"," Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.\n"," :param env: The evaluation environment\n"," :param n_eval_episodes: Number of episode to evaluate the agent\n"," :param policy: The Reinforce agent\n"," \"\"\"\n"," episode_rewards = []\n"," for episode in range(n_eval_episodes):\n"," state = env.reset()\n"," step = 0\n"," done = False\n"," total_rewards_ep = 0\n","\n"," for step in range(max_steps):\n"," action, _ = policy.act(state)\n"," new_state, reward, done, info = env.step(action)\n"," total_rewards_ep += reward\n","\n"," if done:\n"," break\n"," state = new_state\n"," episode_rewards.append(total_rewards_ep)\n"," mean_reward = np.mean(episode_rewards)\n"," std_reward = np.std(episode_rewards)\n","\n"," return mean_reward, std_reward"]},{"cell_type":"markdown","metadata":{"id":"xdH2QCrLTrlT"},"source":["## Evaluate our agent ๐Ÿ“ˆ"]},{"cell_type":"code","execution_count":202,"metadata":{"id":"ohGSXDyHh0xx","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697195435253,"user_tz":-60,"elapsed":3743,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"ddc020d3-561b-417f-a60e-fbaedda7606c"},"outputs":[{"output_type":"execute_result","data":{"text/plain":["(500.0, 0.0)"]},"metadata":{},"execution_count":202}],"source":["evaluate_agent(eval_env,\n"," cartpole_hyperparameters[\"max_t\"],\n"," cartpole_hyperparameters[\"n_evaluation_episodes\"],\n"," cartpole_policy)"]},{"cell_type":"markdown","metadata":{"id":"7CoeLkQ7TpO8"},"source":["### Publish our trained model on the Hub ๐Ÿ”ฅ\n","Now that we saw we got good results after the training, we can publish our trained model on the hub ๐Ÿค— with one line of code.\n","\n","Here's an example of a Model Card:\n","\n",""]},{"cell_type":"markdown","metadata":{"id":"Jmhs1k-cftIq"},"source":["### Push to the Hub\n","#### Do not modify this code"]},{"cell_type":"code","source":["from huggingface_hub import HfApi, snapshot_download\n","from huggingface_hub.repocard import metadata_eval_result, metadata_save\n","\n","from pathlib import Path\n","import datetime\n","import json\n","import imageio\n","\n","import tempfile\n","\n","import os"],"metadata":{"id":"LIVsvlW_8tcw","executionInfo":{"status":"ok","timestamp":1697195439370,"user_tz":-60,"elapsed":230,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"execution_count":203,"outputs":[]},{"cell_type":"code","execution_count":204,"metadata":{"id":"Lo4JH45if81z","executionInfo":{"status":"ok","timestamp":1697195440290,"user_tz":-60,"elapsed":3,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["def record_video(env, policy, out_directory, fps=30):\n"," \"\"\"\n"," Generate a replay video of the agent\n"," :param env\n"," :param Qtable: Qtable of our agent\n"," :param out_directory\n"," :param fps: how many frame per seconds (with taxi-v3 and frozenlake-v1 we use 1)\n"," \"\"\"\n"," images = []\n"," done = False\n"," state = env.reset()\n"," img = env.render(mode='rgb_array')\n"," images.append(img)\n"," while not done:\n"," # Take the action (index) that have the maximum expected future reward given that state\n"," action, _ = policy.act(state)\n"," state, reward, done, info = env.step(action) # We directly put next_state = state for recording logic\n"," img = env.render(mode='rgb_array')\n"," images.append(img)\n"," imageio.mimsave(out_directory, [np.array(img) for i, img in enumerate(images)], fps=fps)"]},{"cell_type":"code","source":["def push_to_hub(repo_id,\n"," model,\n"," hyperparameters,\n"," eval_env,\n"," video_fps=30\n"," ):\n"," \"\"\"\n"," Evaluate, Generate a video and Upload a model to Hugging Face Hub.\n"," This method does the complete pipeline:\n"," - It evaluates the model\n"," - It generates the model card\n"," - It generates a replay video of the agent\n"," - It pushes everything to the Hub\n","\n"," :param repo_id: repo_id: id of the model repository from the Hugging Face Hub\n"," :param model: the pytorch model we want to save\n"," :param hyperparameters: training hyperparameters\n"," :param eval_env: evaluation environment\n"," :param video_fps: how many frame per seconds to record our video replay\n"," \"\"\"\n","\n"," _, repo_name = repo_id.split(\"/\")\n"," api = HfApi()\n","\n"," # Step 1: Create the repo\n"," repo_url = api.create_repo(\n"," repo_id=repo_id,\n"," exist_ok=True,\n"," )\n","\n"," with tempfile.TemporaryDirectory() as tmpdirname:\n"," local_directory = Path(tmpdirname)\n","\n"," # Step 2: Save the model\n"," torch.save(model, local_directory / \"model.pt\")\n","\n"," # Step 3: Save the hyperparameters to JSON\n"," with open(local_directory / \"hyperparameters.json\", \"w\") as outfile:\n"," json.dump(hyperparameters, outfile)\n","\n"," # Step 4: Evaluate the model and build JSON\n"," mean_reward, std_reward = evaluate_agent(eval_env,\n"," hyperparameters[\"max_t\"],\n"," hyperparameters[\"n_evaluation_episodes\"],\n"," model)\n"," # Get datetime\n"," eval_datetime = datetime.datetime.now()\n"," eval_form_datetime = eval_datetime.isoformat()\n","\n"," evaluate_data = {\n"," \"env_id\": hyperparameters[\"env_id\"],\n"," \"mean_reward\": mean_reward,\n"," \"n_evaluation_episodes\": hyperparameters[\"n_evaluation_episodes\"],\n"," \"eval_datetime\": eval_form_datetime,\n"," }\n","\n"," # Write a JSON file\n"," with open(local_directory / \"results.json\", \"w\") as outfile:\n"," json.dump(evaluate_data, outfile)\n","\n"," # Step 5: Create the model card\n"," env_name = hyperparameters[\"env_id\"]\n","\n"," metadata = {}\n"," metadata[\"tags\"] = [\n"," env_name,\n"," \"reinforce\",\n"," \"reinforcement-learning\",\n"," \"custom-implementation\",\n"," \"deep-rl-class\"\n"," ]\n","\n"," # Add metrics\n"," eval = metadata_eval_result(\n"," model_pretty_name=repo_name,\n"," task_pretty_name=\"reinforcement-learning\",\n"," task_id=\"reinforcement-learning\",\n"," metrics_pretty_name=\"mean_reward\",\n"," metrics_id=\"mean_reward\",\n"," metrics_value=f\"{mean_reward:.2f} +/- {std_reward:.2f}\",\n"," dataset_pretty_name=env_name,\n"," dataset_id=env_name,\n"," )\n","\n"," # Merges both dictionaries\n"," metadata = {**metadata, **eval}\n","\n"," model_card = f\"\"\"\n"," # **Reinforce** Agent playing **{env_id}**\n"," This is a trained model of a **Reinforce** agent playing **{env_id}** .\n"," To learn to use this model and train yours check Unit 4 of the Deep Reinforcement Learning Course: https://huggingface.co/deep-rl-course/unit4/introduction\n"," \"\"\"\n","\n"," readme_path = local_directory / \"README.md\"\n"," readme = \"\"\n"," if readme_path.exists():\n"," with readme_path.open(\"r\", encoding=\"utf8\") as f:\n"," readme = f.read()\n"," else:\n"," readme = model_card\n","\n"," with readme_path.open(\"w\", encoding=\"utf-8\") as f:\n"," f.write(readme)\n","\n"," # Save our metrics to Readme metadata\n"," metadata_save(readme_path, metadata)\n","\n"," # Step 6: Record a video\n"," video_path = local_directory / \"replay.mp4\"\n"," record_video(env, model, video_path, video_fps)\n","\n"," # Step 7. Push everything to the Hub\n"," api.upload_folder(\n"," repo_id=repo_id,\n"," folder_path=local_directory,\n"," path_in_repo=\".\",\n"," )\n","\n"," print(f\"Your model is pushed to the Hub. You can view your model here: {repo_url}\")"],"metadata":{"id":"_TPdq47D7_f_","executionInfo":{"status":"ok","timestamp":1697195441625,"user_tz":-60,"elapsed":231,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"execution_count":205,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"w17w8CxzoURM"},"source":["### .\n","\n","By using `push_to_hub` **you evaluate, record a replay, generate a model card of your agent and push it to the Hub**.\n","\n","This way:\n","- You can **showcase our work** ๐Ÿ”ฅ\n","- You can **visualize your agent playing** ๐Ÿ‘€\n","- You can **share with the community an agent that others can use** ๐Ÿ’พ\n","- You can **access a leaderboard ๐Ÿ† to see how well your agent is performing compared to your classmates** ๐Ÿ‘‰ https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard\n"]},{"cell_type":"markdown","metadata":{"id":"cWnFC0iZooTw"},"source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1๏ธโƒฃ (If it's not already done) create an account to HF โžก https://huggingface.co/join\n","\n","2๏ธโƒฃ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","\n","\"Create\n"]},{"cell_type":"code","execution_count":122,"metadata":{"id":"QB5nIcxR8paT","colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["182e3dedcd2745bcb549ea005e88315b","56cb4e7164164b14a704c0fea955a679","4486295dab3f442487a8907ea0f61426","aba1a189ce5b4e0bbf755c37e0a44414","2cff9183e49f4fe6909e933b78ffe1b7","ed30d1ff45e34f3b83742a384e48b080","2524853f299e4439b12e3f08234dfca3","a89fac821524462a959a595a8b2ce819","a77ed4102c4642a5b1f76e7cb906b767","47b89d52b8ce41bbb5d1488a4a1f9562","5c6128936cc34a0aa0d825ebe9b4a4b5","7a781ca81c874ca59499a90ecd8d2d99","4412d394eaae418b85dab9a264bb9806","9d323712621d4016bc5cb4abf73f3e24","08cc80b5cb0449ce8c8b341fa291ffef","e282c76d39f74bc581cb5b899ec558fd","820fc86b834d423caf3910b0c07ff5b4","372be77f740246989f83b976ba092dd8","4a01f1d1bb16429c967e5d01e06fac10","91492660cb7c421285d033d14629750b","d511a25b17854121add2cb3760fe6f80","e83bda293b434dfcbd15b94963f45ce8","d14bc31dde9045a284836cb202709ec8","20832517f0d345f98c66bd6fd9816a5a","7d535b0a18af42b2ab5c10414ae0db6a","3234299078f34c018e6a591ebe477a14","470ee07fa0b341cc9ed596609f9e51d0","b85c1108819a45f78813dd3a148b7b4c","edc6fd51cc4e48b29b4ff4cc3beaca87","dcc7e9fad3854158adfcbdcc9e4663df","02f7ce4ff5c74819998f63175c6720e3","2458132638eb4b6dab7c41b9322ffbae"]},"executionInfo":{"status":"ok","timestamp":1697193027490,"user_tz":-60,"elapsed":224,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"91575382-19cd-476d-cb16-a16964798656"},"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='
"}},"4486295dab3f442487a8907ea0f61426":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_47b89d52b8ce41bbb5d1488a4a1f9562","placeholder":"โ€‹","style":"IPY_MODEL_5c6128936cc34a0aa0d825ebe9b4a4b5","value":""}},"aba1a189ce5b4e0bbf755c37e0a44414":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_7a781ca81c874ca59499a90ecd8d2d99","style":"IPY_MODEL_4412d394eaae418b85dab9a264bb9806","value":true}},"2cff9183e49f4fe6909e933b78ffe1b7":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_9d323712621d4016bc5cb4abf73f3e24","style":"IPY_MODEL_08cc80b5cb0449ce8c8b341fa291ffef","tooltip":""}},"ed30d1ff45e34f3b83742a384e48b080":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e282c76d39f74bc581cb5b899ec558fd","placeholder":"โ€‹","style":"IPY_MODEL_820fc86b834d423caf3910b0c07ff5b4","value":"\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. "}},"2524853f299e4439b12e3f08234dfca3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"a89fac821524462a959a595a8b2ce819":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a77ed4102c4642a5b1f76e7cb906b767":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"47b89d52b8ce41bbb5d1488a4a1f9562":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5c6128936cc34a0aa0d825ebe9b4a4b5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7a781ca81c874ca59499a90ecd8d2d99":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4412d394eaae418b85dab9a264bb9806":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9d323712621d4016bc5cb4abf73f3e24":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"08cc80b5cb0449ce8c8b341fa291ffef":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"e282c76d39f74bc581cb5b899ec558fd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"820fc86b834d423caf3910b0c07ff5b4":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"372be77f740246989f83b976ba092dd8":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4a01f1d1bb16429c967e5d01e06fac10","placeholder":"โ€‹","style":"IPY_MODEL_91492660cb7c421285d033d14629750b","value":"Connecting..."}},"4a01f1d1bb16429c967e5d01e06fac10":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"91492660cb7c421285d033d14629750b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d511a25b17854121add2cb3760fe6f80":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7d535b0a18af42b2ab5c10414ae0db6a","placeholder":"โ€‹","style":"IPY_MODEL_3234299078f34c018e6a591ebe477a14","value":"Token is valid (permission: write)."}},"e83bda293b434dfcbd15b94963f45ce8":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_470ee07fa0b341cc9ed596609f9e51d0","placeholder":"โ€‹","style":"IPY_MODEL_b85c1108819a45f78813dd3a148b7b4c","value":"Your token has been saved in your configured git credential helpers (store)."}},"d14bc31dde9045a284836cb202709ec8":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_edc6fd51cc4e48b29b4ff4cc3beaca87","placeholder":"โ€‹","style":"IPY_MODEL_dcc7e9fad3854158adfcbdcc9e4663df","value":"Your token has been saved to /root/.cache/huggingface/token"}},"20832517f0d345f98c66bd6fd9816a5a":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_02f7ce4ff5c74819998f63175c6720e3","placeholder":"โ€‹","style":"IPY_MODEL_2458132638eb4b6dab7c41b9322ffbae","value":"Login successful"}},"7d535b0a18af42b2ab5c10414ae0db6a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3234299078f34c018e6a591ebe477a14":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"470ee07fa0b341cc9ed596609f9e51d0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b85c1108819a45f78813dd3a148b7b4c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"edc6fd51cc4e48b29b4ff4cc3beaca87":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dcc7e9fad3854158adfcbdcc9e4663df":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"02f7ce4ff5c74819998f63175c6720e3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2458132638eb4b6dab7c41b9322ffbae":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"9a8bc46b3df64f11b759ce9386050685":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_3ea63469a06e4710804b00dc5088f0bf","IPY_MODEL_72ac1f6248b049c886d40a373195c947","IPY_MODEL_14c0f88e629c4af3affe81114e5adb7d"],"layout":"IPY_MODEL_b1d73b696e2e4b55a8c77f7e9f899a10"}},"3ea63469a06e4710804b00dc5088f0bf":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c61b7e7783644ec1814a25ad47c3e505","placeholder":"โ€‹","style":"IPY_MODEL_565f9c1738854ff198ebdccd9c7530ce","value":"model.pt: 100%"}},"72ac1f6248b049c886d40a373195c947":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_4350e62412d84530b2d97e7ace53c5bc","max":2579,"min":0,"orientation":"horizontal","style":"IPY_MODEL_6d061a32800e4891ba896d9eb8ef0b5b","value":2579}},"14c0f88e629c4af3affe81114e5adb7d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6862d17e5dcb4cbb8a76e01cd3b44ed0","placeholder":"โ€‹","style":"IPY_MODEL_d3985cded95449ffa71d3a8370fff913","value":" 2.58k/2.58k [00:00<00:00, 8.13kB/s]"}},"b1d73b696e2e4b55a8c77f7e9f899a10":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c61b7e7783644ec1814a25ad47c3e505":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"565f9c1738854ff198ebdccd9c7530ce":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4350e62412d84530b2d97e7ace53c5bc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6d061a32800e4891ba896d9eb8ef0b5b":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"6862d17e5dcb4cbb8a76e01cd3b44ed0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d3985cded95449ffa71d3a8370fff913":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2949c6cee3e54b1983894fb28acb6f9c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_74d9fae5441d42c8a645fa183148e2e5","IPY_MODEL_84a93b3f5ff74ee99bc85beb5af76107","IPY_MODEL_a39be9f762244f1fa2f0a6bcedf72a36"],"layout":"IPY_MODEL_a6dcf9190aaf4d7d8ea521e2d0b5b0f9"}},"74d9fae5441d42c8a645fa183148e2e5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_96558faa30944d8eac42935704ea9486","placeholder":"โ€‹","style":"IPY_MODEL_a6f10d6908884133b9754963af735566","value":"model.pt: 100%"}},"84a93b3f5ff74ee99bc85beb5af76107":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_4434df81c6834371b5e6e8aac994d146","max":39239,"min":0,"orientation":"horizontal","style":"IPY_MODEL_20e08af188c54eda8e5fb8632c0e924e","value":39239}},"a39be9f762244f1fa2f0a6bcedf72a36":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_01990cbfe9fd4b58841147771799268e","placeholder":"โ€‹","style":"IPY_MODEL_ed60418f04294ca3b0f58c0891be7006","value":" 39.2k/39.2k [00:00<00:00, 52.6kB/s]"}},"a6dcf9190aaf4d7d8ea521e2d0b5b0f9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"96558faa30944d8eac42935704ea9486":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a6f10d6908884133b9754963af735566":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4434df81c6834371b5e6e8aac994d146":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"20e08af188c54eda8e5fb8632c0e924e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"01990cbfe9fd4b58841147771799268e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ed60418f04294ca3b0f58c0891be7006":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0} \ No newline at end of file diff --git a/HF DeepRL Course/Unit5 - ML-Agents.ipynb b/HF DeepRL Course/Unit5 - ML-Agents.ipynb new file mode 100644 index 0000000..0e1d1dc --- /dev/null +++ b/HF DeepRL Course/Unit5 - ML-Agents.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"2D3NL_e4crQv"},"source":["# Unit 5: An Introduction to ML-Agents\n","\n"]},{"cell_type":"markdown","source":["\"Thumbnail\"/\n","\n","In this notebook, you'll learn about ML-Agents and train two agents.\n","\n","- The first one will learn to **shoot snowballs onto spawning targets**.\n","- The second need to press a button to spawn a pyramid, then navigate to the pyramid, knock it over, **and move to the gold brick at the top**. To do that, it will need to explore its environment, and we will use a technique called curiosity.\n","\n","After that, you'll be able **to watch your agents playing directly on your browser**.\n","\n","For more information about the certification process, check this section ๐Ÿ‘‰ https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"],"metadata":{"id":"97ZiytXEgqIz"}},{"cell_type":"markdown","source":["โฌ‡๏ธ Here is an example of what **you will achieve at the end of this unit.** โฌ‡๏ธ\n"],"metadata":{"id":"FMYrDriDujzX"}},{"cell_type":"markdown","source":["\"Pyramids\"/\n","\n","\"SnowballTarget\"/"],"metadata":{"id":"cBmFlh8suma-"}},{"cell_type":"markdown","source":["### ๐ŸŽฎ Environments:\n","\n","- [Pyramids](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Learning-Environment-Examples.md#pyramids)\n","- SnowballTarget\n","\n","### ๐Ÿ“š RL-Library:\n","\n","- [ML-Agents](https://github.com/Unity-Technologies/ml-agents)\n"],"metadata":{"id":"A-cYE0K5iL-w"}},{"cell_type":"markdown","source":["We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the GitHub Repo](https://github.com/huggingface/deep-rl-class/issues)."],"metadata":{"id":"qEhtaFh9i31S"}},{"cell_type":"markdown","source":["## Objectives of this notebook ๐Ÿ†\n","\n","At the end of the notebook, you will:\n","\n","- Understand how works **ML-Agents**, the environment library.\n","- Be able to **train agents in Unity Environments**.\n"],"metadata":{"id":"j7f63r3Yi5vE"}},{"cell_type":"markdown","source":["## This notebook is from the Deep Reinforcement Learning Course\n","\"Deep"],"metadata":{"id":"viNzVbVaYvY3"}},{"cell_type":"markdown","metadata":{"id":"6p5HnEefISCB"},"source":["In this free course, you will:\n","\n","- ๐Ÿ“– Study Deep Reinforcement Learning in **theory and practice**.\n","- ๐Ÿง‘โ€๐Ÿ’ป Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- ๐Ÿค– Train **agents in unique environments**\n","\n","And more check ๐Ÿ“š the syllabus ๐Ÿ‘‰ https://huggingface.co/deep-rl-course/communication/publishing-schedule\n","\n","Donโ€™t forget to **sign up to the course** (we are collecting your email to be able toย **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","\n","The best way to keep in touch is to join our discord server to exchange with the community and with us ๐Ÿ‘‰๐Ÿป https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"Y-mo_6rXIjRi"},"source":["## Prerequisites ๐Ÿ—๏ธ\n","Before diving into the notebook, you need to:\n","\n","๐Ÿ”ฒ ๐Ÿ“š **Study [what is ML-Agents and how it works by reading Unit 5](https://huggingface.co/deep-rl-course/unit5/introduction)** ๐Ÿค— "]},{"cell_type":"markdown","source":["# Let's train our agents ๐Ÿš€\n","\n","**To validate this hands-on for the certification process, you just need to push your trained models to the Hub**. Thereโ€™s no results to attain to validate this one. But if you want to get nice results you can try to attain:\n","\n","- For `Pyramids` : Mean Reward = 1.75\n","- For `SnowballTarget` : Mean Reward = 15 or 30 targets hit in an episode.\n"],"metadata":{"id":"xYO1uD5Ujgdh"}},{"cell_type":"markdown","source":["## Set the GPU ๐Ÿ’ช\n","- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n","\n","\"GPU"],"metadata":{"id":"DssdIjk_8vZE"}},{"cell_type":"markdown","source":["- `Hardware Accelerator > GPU`\n","\n","\"GPU"],"metadata":{"id":"sTfCXHy68xBv"}},{"cell_type":"markdown","metadata":{"id":"an3ByrXYQ4iK"},"source":["## Clone the repository and install the dependencies ๐Ÿ”ฝ\n"]},{"cell_type":"code","execution_count":1,"metadata":{"id":"6WNoL04M7rTa","executionInfo":{"status":"ok","timestamp":1697361786876,"user_tz":-60,"elapsed":7400,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}}},"outputs":[],"source":["%%capture\n","# Clone the repository\n","!git clone --depth 1 https://github.com/Unity-Technologies/ml-agents"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"d8wmVcMk7xKo"},"outputs":[],"source":["%%capture\n","# Go inside the repository and install the package\n","%cd ml-agents\n","!pip3 install -e ./ml-agents-envs\n","!pip3 install -e ./ml-agents"]},{"cell_type":"markdown","source":["## SnowballTarget โ›„\n","\n","If you need a refresher on how this environments work check this section ๐Ÿ‘‰\n","https://huggingface.co/deep-rl-course/unit5/snowball-target"],"metadata":{"id":"R5_7Ptd_kEcG"}},{"cell_type":"markdown","metadata":{"id":"HRY5ufKUKfhI"},"source":["### Download and move the environment zip file in `./training-envs-executables/linux/`\n","- Our environment executable is in a zip file.\n","- We need to download it and place it to `./training-envs-executables/linux/`\n","- We use a linux executable because we use colab, and colab machines OS is Ubuntu (linux)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"C9Ls6_6eOKiA"},"outputs":[],"source":["# Here, we create training-envs-executables and linux\n","!mkdir ./training-envs-executables\n","!mkdir ./training-envs-executables/linux"]},{"cell_type":"markdown","metadata":{"id":"jsoZGxr1MIXY"},"source":["Download the file SnowballTarget.zip from https://drive.google.com/file/d/1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5 using `wget`.\n","\n","Check out the full solution to download large files from GDrive [here](https://bcrf.biochem.wisc.edu/2021/02/05/download-google-drive-files-using-wget/)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"QU6gi8CmWhnA","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697206519710,"user_tz":-60,"elapsed":1990,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"8af5d19f-b177-4831-acf2-c8c18b94668a"},"outputs":[{"output_type":"stream","name":"stdout","text":["--2023-10-13 14:15:18-- https://docs.google.com/uc?export=download&confirm=t&id=1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5\n","Resolving docs.google.com (docs.google.com)... 74.125.142.100, 74.125.142.101, 74.125.142.138, ...\n","Connecting to docs.google.com (docs.google.com)|74.125.142.100|:443... connected.\n","HTTP request sent, awaiting response... 303 See Other\n","Location: https://doc-14-28-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9qrrn3l294p7iisdst09cdhp9igi3ghk/1697206500000/15803371278684422230/*/1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5?e=download&uuid=db620364-aaaf-4a37-9e6a-24defb0a225a [following]\n","Warning: wildcards not supported in HTTP.\n","--2023-10-13 14:15:18-- https://doc-14-28-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9qrrn3l294p7iisdst09cdhp9igi3ghk/1697206500000/15803371278684422230/*/1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5?e=download&uuid=db620364-aaaf-4a37-9e6a-24defb0a225a\n","Resolving doc-14-28-docs.googleusercontent.com (doc-14-28-docs.googleusercontent.com)... 142.250.99.132, 2607:f8b0:400e:c0c::84\n","Connecting to doc-14-28-docs.googleusercontent.com (doc-14-28-docs.googleusercontent.com)|142.250.99.132|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 35134213 (34M) [application/x-zip-compressed]\n","Saving to: โ€˜./training-envs-executables/linux/SnowballTarget.zipโ€™\n","\n","./training-envs-exe 100%[===================>] 33.51M 53.3MB/s in 0.6s \n","\n","2023-10-13 14:15:19 (53.3 MB/s) - โ€˜./training-envs-executables/linux/SnowballTarget.zipโ€™ saved [35134213/35134213]\n","\n"]}],"source":["!wget --load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id=1YHHLjyj6gaZ3Gemx1hQgqrPgSS2ZhmB5\" -O ./training-envs-executables/linux/SnowballTarget.zip && rm -rf /tmp/cookies.txt"]},{"cell_type":"markdown","source":["We unzip the executable.zip file"],"metadata":{"id":"_LLVaEEK3ayi"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"8FPx0an9IAwO"},"outputs":[],"source":["%%capture\n","!unzip -d ./training-envs-executables/linux/ ./training-envs-executables/linux/SnowballTarget.zip"]},{"cell_type":"markdown","metadata":{"id":"nyumV5XfPKzu"},"source":["Make sure your file is accessible"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"EdFsLJ11JvQf"},"outputs":[],"source":["!chmod -R 755 ./training-envs-executables/linux/SnowballTarget"]},{"cell_type":"markdown","source":["### Define the SnowballTarget config file\n","- In ML-Agents, you define the **training hyperparameters into config.yaml files.**\n","\n","There are multiple hyperparameters. To know them better, you should check for each explanation with [the documentation](https://github.com/Unity-Technologies/ml-agents/blob/release_20_docs/docs/Training-Configuration-File.md)\n","\n","\n","So you need to create a `SnowballTarget.yaml` config file in ./content/ml-agents/config/ppo/\n","\n","We'll give you here a first version of this config (to copy and paste into your `SnowballTarget.yaml file`), **but you should modify it**.\n","\n","```\n","behaviors:\n"," SnowballTarget:\n"," trainer_type: ppo\n"," summary_freq: 10000\n"," keep_checkpoints: 10\n"," checkpoint_interval: 50000\n"," max_steps: 200000\n"," time_horizon: 64\n"," threaded: true\n"," hyperparameters:\n"," learning_rate: 0.0003\n"," learning_rate_schedule: linear\n"," batch_size: 128\n"," buffer_size: 2048\n"," beta: 0.005\n"," epsilon: 0.2\n"," lambd: 0.95\n"," num_epoch: 3\n"," network_settings:\n"," normalize: false\n"," hidden_units: 256\n"," num_layers: 2\n"," vis_encode_type: simple\n"," reward_signals:\n"," extrinsic:\n"," gamma: 0.99\n"," strength: 1.0\n","```"],"metadata":{"id":"NAuEq32Mwvtz"}},{"cell_type":"markdown","source":["\"Config\n","\"Config"],"metadata":{"id":"4U3sRH4N4h_l"}},{"cell_type":"markdown","source":["As an experimentation, you should also try to modify some other hyperparameters. Unity provides very [good documentation explaining each of them here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-Configuration-File.md).\n","\n","Now that you've created the config file and understand what most hyperparameters do, we're ready to train our agent ๐Ÿ”ฅ."],"metadata":{"id":"JJJdo_5AyoGo"}},{"cell_type":"markdown","metadata":{"id":"f9fI555bO12v"},"source":["### Train the agent\n","\n","To train our agent, we just need to **launch mlagents-learn and select the executable containing the environment.**\n","\n","We define four parameters:\n","\n","1. `mlagents-learn `: the path where the hyperparameter config file is.\n","2. `--env`: where the environment executable is.\n","3. `--run_id`: the name you want to give to your training run id.\n","4. `--no-graphics`: to not launch the visualization during the training.\n","\n","\"MlAgents\n","\n","Train the model and use the `--resume` flag to continue training in case of interruption.\n","\n","> It will fail first time if and when you use `--resume`, try running the block again to bypass the error.\n","\n"]},{"cell_type":"markdown","source":["The training will take 10 to 35min depending on your config, go take a โ˜•๏ธyou deserve it ๐Ÿค—."],"metadata":{"id":"lN32oWF8zPjs"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"bS-Yh1UdHfzy","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697207065541,"user_tz":-60,"elapsed":449500,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"ab21b023-f1d1-44f4-bfb9-12d26355902d"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-13 14:17:00.914435: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","\n"," โ” โ•–\n"," โ•“โ•–โ•ฌโ”‚โ•ก โ”‚โ”‚โ•ฌโ•–โ•–\n"," โ•“โ•–โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ”˜ โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ•ฌโ•–\n"," โ•–โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ•ฌโ•œ โ•™โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ•–โ•– โ•—โ•—โ•—\n"," โ•ฌโ•ฌโ•ฌโ•ฌโ•–โ”‚โ”‚โ•ฆโ•– โ•–โ•ฌโ”‚โ”‚โ•—โ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•œโ•œโ•œ โ•Ÿโ•ฃโ•ฃ\n"," โ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•–โ”‚โ•ฌโ•–โ•–โ•“โ•ฌโ•ชโ”‚โ•“โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•’โ•ฃโ•ฃโ•–โ•—โ•ฃโ•ฃโ•ฃโ•— โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•– โ•ฃโ•ฃโ•ฃ\n"," โ•ฌโ•ฌโ•ฌโ•ฌโ” โ•™โ•ฌโ•ฌโ•ฌโ•ฌโ”‚โ•“โ•ฃโ•ฃโ•ฃโ•โ•œ โ•ซโ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฃโ•™ โ•™โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃ โ•™โ•Ÿโ•ฃโ•ฃโ•œโ•™ โ•ซโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃ\n"," โ•ฌโ•ฌโ•ฌโ•ฌโ” โ•™โ•ฌโ•ฌโ•ฃโ•ฃ โ•ซโ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃโ”Œโ•ฃโ•ฃโ•œ\n"," โ•ฌโ•ฌโ•ฌโ•œ โ•ฌโ•ฌโ•ฃโ•ฃ โ•™โ•โ•ฃโ•ฃโ•ฌ โ•™โ•ฃโ•ฃโ•ฃโ•—โ•–โ•“โ•—โ•ฃโ•ฃโ•ฃโ•œ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฆโ•“ โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃ\n"," โ•™ โ•“โ•ฆโ•– โ•ฌโ•ฌโ•ฃโ•ฃ โ•“โ•—โ•—โ•– โ•™โ•โ•ฃโ•ฃโ•ฃโ•ฃโ•โ•œ โ•˜โ•โ•โ•œ โ•โ•โ• โ•โ•โ• โ•™โ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฃ\n"," โ•ฉโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฆโ•ฆโ•ฌโ•ฌโ•ฃโ•ฃโ•—โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ• โ•ซโ•ฃโ•ฃโ•ฃโ•ฃ\n"," โ•™โ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•โ•œ\n"," โ•™โ•ฌโ•ฌโ•ฌโ•ฃโ•ฃโ•ฃโ•œ\n"," โ•™\n"," \n"," Version information:\n"," ml-agents: 1.1.0.dev0,\n"," ml-agents-envs: 1.1.0.dev0,\n"," Communicator API: 1.5.0,\n"," PyTorch: 2.0.1+cu118\n","[INFO] Connected to Unity environment with package version 2.1.0-exp.1 and communication version 1.5.0\n","[INFO] Connected new brain: SnowballTarget?team=0\n","[INFO] Hyperparameters for behavior name SnowballTarget: \n","\ttrainer_type:\tppo\n","\thyperparameters:\t\n","\t batch_size:\t128\n","\t buffer_size:\t2048\n","\t learning_rate:\t0.0003\n","\t beta:\t0.005\n","\t epsilon:\t0.2\n","\t lambd:\t0.95\n","\t num_epoch:\t3\n","\t shared_critic:\tFalse\n","\t learning_rate_schedule:\tlinear\n","\t beta_schedule:\tlinear\n","\t epsilon_schedule:\tlinear\n","\tcheckpoint_interval:\t50000\n","\tnetwork_settings:\t\n","\t normalize:\tFalse\n","\t hidden_units:\t256\n","\t num_layers:\t2\n","\t vis_encode_type:\tsimple\n","\t memory:\tNone\n","\t goal_conditioning_type:\thyper\n","\t deterministic:\tFalse\n","\treward_signals:\t\n","\t extrinsic:\t\n","\t gamma:\t0.99\n","\t strength:\t1.0\n","\t network_settings:\t\n","\t normalize:\tFalse\n","\t hidden_units:\t128\n","\t num_layers:\t2\n","\t vis_encode_type:\tsimple\n","\t memory:\tNone\n","\t goal_conditioning_type:\thyper\n","\t deterministic:\tFalse\n","\tinit_path:\tNone\n","\tkeep_checkpoints:\t10\n","\teven_checkpoints:\tFalse\n","\tmax_steps:\t200000\n","\ttime_horizon:\t64\n","\tsummary_freq:\t10000\n","\tthreaded:\tTrue\n","\tself_play:\tNone\n","\tbehavioral_cloning:\tNone\n","[INFO] SnowballTarget. Step: 10000. Time Elapsed: 32.631 s. Mean Reward: 3.659. Std of Reward: 2.235. Training.\n","[INFO] SnowballTarget. Step: 20000. Time Elapsed: 54.320 s. Mean Reward: 5.909. Std of Reward: 2.856. Training.\n","[INFO] SnowballTarget. Step: 30000. Time Elapsed: 74.734 s. Mean Reward: 9.364. Std of Reward: 2.672. Training.\n","[INFO] SnowballTarget. Step: 40000. Time Elapsed: 95.906 s. Mean Reward: 10.964. Std of Reward: 2.635. Training.\n","[INFO] SnowballTarget. Step: 50000. Time Elapsed: 117.648 s. Mean Reward: 13.364. Std of Reward: 2.297. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/SnowballTarget1/SnowballTarget/SnowballTarget-49936.onnx\n","[INFO] SnowballTarget. Step: 60000. Time Elapsed: 139.233 s. Mean Reward: 15.473. Std of Reward: 2.456. Training.\n","[INFO] SnowballTarget. Step: 70000. Time Elapsed: 160.278 s. Mean Reward: 16.568. Std of Reward: 2.199. Training.\n","[INFO] SnowballTarget. Step: 80000. Time Elapsed: 183.213 s. Mean Reward: 18.618. Std of Reward: 2.576. Training.\n","[INFO] SnowballTarget. Step: 90000. Time Elapsed: 203.479 s. Mean Reward: 19.977. Std of Reward: 2.127. Training.\n","[INFO] SnowballTarget. Step: 100000. Time Elapsed: 225.885 s. Mean Reward: 19.782. Std of Reward: 2.172. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/SnowballTarget1/SnowballTarget/SnowballTarget-99960.onnx\n","[INFO] SnowballTarget. Step: 110000. Time Elapsed: 247.541 s. Mean Reward: 20.370. Std of Reward: 2.429. Training.\n","[INFO] SnowballTarget. Step: 120000. Time Elapsed: 268.255 s. Mean Reward: 21.444. Std of Reward: 2.809. Training.\n","[INFO] SnowballTarget. Step: 130000. Time Elapsed: 290.602 s. Mean Reward: 22.727. Std of Reward: 2.720. Training.\n","[INFO] SnowballTarget. Step: 140000. Time Elapsed: 312.492 s. Mean Reward: 23.318. Std of Reward: 2.419. Training.\n","[INFO] SnowballTarget. Step: 150000. Time Elapsed: 335.058 s. Mean Reward: 24.236. Std of Reward: 2.106. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/SnowballTarget1/SnowballTarget/SnowballTarget-149984.onnx\n","[INFO] SnowballTarget. Step: 160000. Time Elapsed: 356.424 s. Mean Reward: 24.318. Std of Reward: 2.678. Training.\n","[INFO] SnowballTarget. Step: 170000. Time Elapsed: 378.086 s. Mean Reward: 24.818. Std of Reward: 2.241. Training.\n","[INFO] SnowballTarget. Step: 180000. Time Elapsed: 399.156 s. Mean Reward: 24.659. Std of Reward: 2.215. Training.\n","[INFO] SnowballTarget. Step: 190000. Time Elapsed: 421.498 s. Mean Reward: 25.255. Std of Reward: 2.290. Training.\n","[INFO] SnowballTarget. Step: 200000. Time Elapsed: 441.510 s. Mean Reward: 24.955. Std of Reward: 2.296. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/SnowballTarget1/SnowballTarget/SnowballTarget-199984.onnx\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/SnowballTarget1/SnowballTarget/SnowballTarget-200112.onnx\n","[INFO] Copied results/SnowballTarget1/SnowballTarget/SnowballTarget-200112.onnx to results/SnowballTarget1/SnowballTarget.onnx.\n"]}],"source":["!mlagents-learn ./config/ppo/SnowballTarget.yaml --env=./training-envs-executables/linux/SnowballTarget/SnowballTarget --run-id=\"SnowballTarget1\" --no-graphics"]},{"cell_type":"markdown","metadata":{"id":"5Vue94AzPy1t"},"source":["### Push the agent to the ๐Ÿค— Hub\n","\n","- Now that we trained our agent, weโ€™re **ready to push it to the Hub to be able to visualize it playing on your browser๐Ÿ”ฅ.**"]},{"cell_type":"markdown","source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1๏ธโƒฃ (If it's not already done) create an account to HF โžก https://huggingface.co/join\n","\n","2๏ธโƒฃ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","\"Create\n","\n","- Copy the token\n","- Run the cell below and paste the token"],"metadata":{"id":"izT6FpgNzZ6R"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"rKt2vsYoK56o","colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["4428338482f948e6be752e8e9827c841","a37b769ce02840ed9b83bbde4f4daac7","197f3a0f7b2b4618aaa2c0abcc121768","0de73d47456740c8a7aec3a2c23429e4","dbb882b2352b44cb8d0a7646b3ef0a96","48c1ef961a6942199d437ba1b8456f6f","357cfe11cb4f458bb70372a6d1209262","ae25137c62ab4c6aa00ea707937c68bd","2196e1264505454098d83d5a986c122f","ebda32c0c8d94222b1bd72cedab9a01c","b03c362004d24d65aa07a298d9cbf7d6","1d64929e8b8d4dc18a2175c82567e1c7","ac0b7664442f4c68bd72781ddb87ea9e","47893006c1f54e96adaffd43521d4c16","83fd942f1b9e4df09cf684609dec4ee7","80c35c2596fe4098b77e9ab7b4afba34","3a38f167af43410b9dc9f279cc7b5a01","d54984153efa4edc957d350eda7d8f93","2f90a2040a854b83a2ec49155655c495","22da873132224b989e009434a3369a68","249ca35c9a7946d2b4f17f6c3eddc0ab","c50f669c6c6d4033888d1e4cf549b7ca","398634082d6c4fd49b0b518b6c20e3e3","d3c41cdcff7d4056a45b1894f83bfcb4","f4979c13430b456ea912de1c06712416","95fda6ca627a40f6a0cea7450dff1ba4","c2b4330ea8b9461ca1b137d100988f13","44ac7a9c241c47ec97260fbed13aa199","243b25a12f2442049b65f25b6154e07c","bc379cef55ae4a70973b5ff21a867e5a","280358528f3b481f9dac3af422f88aff","abf0998b051f4b5f9d9098fe3a7867be"]},"executionInfo":{"status":"ok","timestamp":1697207093945,"user_tz":-60,"elapsed":335,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"54143fc9-86f6-4a10-e300-16ee9e514911"},"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='
, so in my case results/First Training.\n","3. `--repo-id`: the name of the Hugging Face repo you want to create or update. Itโ€™s always /\n","If the repo does not exist **it will be created automatically**\n","4. `--commit-message`: since HF repos are git repository you need to define a commit message.\n","\n","\"Push\n","\n","For instance:\n","\n","`!mlagents-push-to-hf --run-id=\"SnowballTarget1\" --local-dir=\"./results/SnowballTarget1\" --repo-id=\"ThomasSimonini/ppo-SnowballTarget\" --commit-message=\"First Push\"`"],"metadata":{"id":"KK4fPfnczunT"}},{"cell_type":"code","source":["!mlagents-push-to-hf --run-id=\"SnowballTarget1\" --local-dir=\"./results/SnowballTarget1\" --repo-id=\"ThomasSimonini/ppo-SnowballTarget\" --commit-message=\"First Push\""],"metadata":{"id":"kAFzVB7OYj_H"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"dGEFAIboLVc6","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697207191747,"user_tz":-60,"elapsed":4408,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"412789af-62ba-4ba7-9ab5-dd4087df896d"},"outputs":[{"output_type":"stream","name":"stdout","text":["[INFO] This function will create a model card and upload your SnowballTarget1 into HuggingFace Hub. This is a work in progress: If you encounter a bug, please send open an issue\n","[INFO] Pushing repo SnowballTarget1 to the Hugging Face Hub\n","SnowballTarget-149984.onnx: 0% 0.00/651k [00:00"],"metadata":{"id":"VMc4oOsE0QiZ"}},{"cell_type":"markdown","source":["1. In step 1, choose your model repository which is the model id (in my case ThomasSimonini/ppo-SnowballTarget).\n","\n","2. In step 2, **choose what model you want to replay**:\n"," - I have multiple one, since we saved a model every 500000 timesteps.\n"," - But if I want the more recent I choose `SnowballTarget.onnx`\n","\n","๐Ÿ‘‰ Whatโ€™s nice **is to try with different models step to see the improvement of the agent.**\n","\n","And don't hesitate to share the best score your agent gets on discord in #rl-i-made-this channel ๐Ÿ”ฅ\n","\n","Let's now try a harder environment called Pyramids..."],"metadata":{"id":"Djs8c5rR0Z8a"}},{"cell_type":"markdown","source":["## Pyramids ๐Ÿ†\n","\n","### Download and move the environment zip file in `./training-envs-executables/linux/`\n","- Our environment executable is in a zip file.\n","- We need to download it and place it to `./training-envs-executables/linux/`\n","- We use a linux executable because we use colab, and colab machines OS is Ubuntu (linux)"],"metadata":{"id":"rVMwRi4y_tmx"}},{"cell_type":"markdown","metadata":{"id":"NyqYYkLyAVMK"},"source":["Download the file Pyramids.zip from https://drive.google.com/uc?export=download&id=1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H using `wget`. Check out the full solution to download large files from GDrive [here](https://bcrf.biochem.wisc.edu/2021/02/05/download-google-drive-files-using-wget/)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"AxojCsSVAVMP","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697207442454,"user_tz":-60,"elapsed":2411,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"e5398bab-dbcf-482f-94ae-12809eae5da8"},"outputs":[{"output_type":"stream","name":"stdout","text":["--2023-10-13 14:30:40-- https://docs.google.com/uc?export=download&confirm=t&id=1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H\n","Resolving docs.google.com (docs.google.com)... 74.125.197.138, 74.125.197.102, 74.125.197.139, ...\n","Connecting to docs.google.com (docs.google.com)|74.125.197.138|:443... connected.\n","HTTP request sent, awaiting response... 303 See Other\n","Location: https://doc-04-8c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9basvv5fve5ps442dm9pekn83q111ukb/1697207400000/09764732090272539193/*/1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H?e=download&uuid=9a5b3206-d62a-4673-b836-ff7744a46256 [following]\n","Warning: wildcards not supported in HTTP.\n","--2023-10-13 14:30:40-- https://doc-04-8c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9basvv5fve5ps442dm9pekn83q111ukb/1697207400000/09764732090272539193/*/1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H?e=download&uuid=9a5b3206-d62a-4673-b836-ff7744a46256\n","Resolving doc-04-8c-docs.googleusercontent.com (doc-04-8c-docs.googleusercontent.com)... 172.253.117.132, 2607:f8b0:400e:c0a::84\n","Connecting to doc-04-8c-docs.googleusercontent.com (doc-04-8c-docs.googleusercontent.com)|172.253.117.132|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 42907187 (41M) [application/zip]\n","Saving to: โ€˜./training-envs-executables/linux/Pyramids.zipโ€™\n","\n","./training-envs-exe 100%[===================>] 40.92M 40.4MB/s in 1.0s \n","\n","2023-10-13 14:30:42 (40.4 MB/s) - โ€˜./training-envs-executables/linux/Pyramids.zipโ€™ saved [42907187/42907187]\n","\n"]}],"source":["!wget --load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id=1UiFNdKlsH0NTu32xV-giYUEVKV4-vc7H\" -O ./training-envs-executables/linux/Pyramids.zip && rm -rf /tmp/cookies.txt"]},{"cell_type":"markdown","metadata":{"id":"bfs6CTJ1AVMP"},"source":["**OR** Download directly to local machine and then drag and drop the file from local machine to `./training-envs-executables/linux`"]},{"cell_type":"markdown","metadata":{"id":"H7JmgOwcSSmF"},"source":["Wait for the upload to finish and then run the command below.\n","\n","![image.png]()"]},{"cell_type":"markdown","source":["Unzip it"],"metadata":{"id":"iWUUcs0_794U"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"i2E3K4V2AVMP"},"outputs":[],"source":["%%capture\n","!unzip -d ./training-envs-executables/linux/ ./training-envs-executables/linux/Pyramids.zip"]},{"cell_type":"markdown","metadata":{"id":"KmKYBgHTAVMP"},"source":["Make sure your file is accessible"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Im-nwvLPAVMP"},"outputs":[],"source":["!chmod -R 755 ./training-envs-executables/linux/Pyramids/Pyramids"]},{"cell_type":"markdown","source":["### Modify the PyramidsRND config file\n","- Contrary to the first environment which was a custom one, **Pyramids was made by the Unity team**.\n","- So the PyramidsRND config file already exists and is in ./content/ml-agents/config/ppo/PyramidsRND.yaml\n","- You might asked why \"RND\" in PyramidsRND. RND stands for *random network distillation* it's a way to generate curiosity rewards. If you want to know more on that we wrote an article explaning this technique: https://medium.com/data-from-the-trenches/curiosity-driven-learning-through-random-network-distillation-488ffd8e5938\n","\n","For this training, weโ€™ll modify one thing:\n","- The total training steps hyperparameter is too high since we can hit the benchmark (mean reward = 1.75) in only 1M training steps.\n","๐Ÿ‘‰ To do that, we go to config/ppo/PyramidsRND.yaml,**and modify these to max_steps to 1000000.**\n","\n","\"Pyramids"],"metadata":{"id":"fqceIATXAgih"}},{"cell_type":"markdown","source":["As an experimentation, you should also try to modify some other hyperparameters, Unity provides a very [good documentation explaining each of them here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-Configuration-File.md).\n","\n","Weโ€™re now ready to train our agent ๐Ÿ”ฅ."],"metadata":{"id":"RI-5aPL7BWVk"}},{"cell_type":"markdown","source":["### Train the agent\n","\n","The training will take 30 to 45min depending on your machine, go take a โ˜•๏ธyou deserve it ๐Ÿค—."],"metadata":{"id":"s5hr1rvIBdZH"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"fXi4-IaHBhqD","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697209575529,"user_tz":-60,"elapsed":2045244,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"882d5b7f-fbb0-4250-8a0c-f561eebb60f0"},"outputs":[{"output_type":"stream","name":"stdout","text":["2023-10-13 14:32:12.375036: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","\n"," โ” โ•–\n"," โ•“โ•–โ•ฌโ”‚โ•ก โ”‚โ”‚โ•ฌโ•–โ•–\n"," โ•“โ•–โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ”˜ โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ•ฌโ•–\n"," โ•–โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ•ฌโ•œ โ•™โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ•–โ•– โ•—โ•—โ•—\n"," โ•ฌโ•ฌโ•ฌโ•ฌโ•–โ”‚โ”‚โ•ฆโ•– โ•–โ•ฌโ”‚โ”‚โ•—โ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•œโ•œโ•œ โ•Ÿโ•ฃโ•ฃ\n"," โ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•–โ”‚โ•ฌโ•–โ•–โ•“โ•ฌโ•ชโ”‚โ•“โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•’โ•ฃโ•ฃโ•–โ•—โ•ฃโ•ฃโ•ฃโ•— โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•– โ•ฃโ•ฃโ•ฃ\n"," โ•ฌโ•ฌโ•ฌโ•ฌโ” โ•™โ•ฌโ•ฌโ•ฌโ•ฌโ”‚โ•“โ•ฃโ•ฃโ•ฃโ•โ•œ โ•ซโ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฃโ•™ โ•™โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃ โ•™โ•Ÿโ•ฃโ•ฃโ•œโ•™ โ•ซโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃ\n"," โ•ฌโ•ฌโ•ฌโ•ฌโ” โ•™โ•ฌโ•ฌโ•ฃโ•ฃ โ•ซโ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃโ”Œโ•ฃโ•ฃโ•œ\n"," โ•ฌโ•ฌโ•ฌโ•œ โ•ฌโ•ฌโ•ฃโ•ฃ โ•™โ•โ•ฃโ•ฃโ•ฌ โ•™โ•ฃโ•ฃโ•ฃโ•—โ•–โ•“โ•—โ•ฃโ•ฃโ•ฃโ•œ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฆโ•“ โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃ\n"," โ•™ โ•“โ•ฆโ•– โ•ฌโ•ฌโ•ฃโ•ฃ โ•“โ•—โ•—โ•– โ•™โ•โ•ฃโ•ฃโ•ฃโ•ฃโ•โ•œ โ•˜โ•โ•โ•œ โ•โ•โ• โ•โ•โ• โ•™โ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฃ\n"," โ•ฉโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฆโ•ฆโ•ฌโ•ฌโ•ฃโ•ฃโ•—โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ• โ•ซโ•ฃโ•ฃโ•ฃโ•ฃ\n"," โ•™โ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•โ•œ\n"," โ•™โ•ฌโ•ฌโ•ฌโ•ฃโ•ฃโ•ฃโ•œ\n"," โ•™\n"," \n"," Version information:\n"," ml-agents: 1.1.0.dev0,\n"," ml-agents-envs: 1.1.0.dev0,\n"," Communicator API: 1.5.0,\n"," PyTorch: 2.0.1+cu118\n","[INFO] Connected to Unity environment with package version 2.2.1-exp.1 and communication version 1.5.0\n","[INFO] Connected new brain: Pyramids?team=0\n","[INFO] Hyperparameters for behavior name Pyramids: \n","\ttrainer_type:\tppo\n","\thyperparameters:\t\n","\t batch_size:\t128\n","\t buffer_size:\t2048\n","\t learning_rate:\t0.0003\n","\t beta:\t0.01\n","\t epsilon:\t0.2\n","\t lambd:\t0.95\n","\t num_epoch:\t3\n","\t shared_critic:\tFalse\n","\t learning_rate_schedule:\tlinear\n","\t beta_schedule:\tlinear\n","\t epsilon_schedule:\tlinear\n","\tcheckpoint_interval:\t500000\n","\tnetwork_settings:\t\n","\t normalize:\tFalse\n","\t hidden_units:\t512\n","\t num_layers:\t2\n","\t vis_encode_type:\tsimple\n","\t memory:\tNone\n","\t goal_conditioning_type:\thyper\n","\t deterministic:\tFalse\n","\treward_signals:\t\n","\t extrinsic:\t\n","\t gamma:\t0.99\n","\t strength:\t1.0\n","\t network_settings:\t\n","\t normalize:\tFalse\n","\t hidden_units:\t128\n","\t num_layers:\t2\n","\t vis_encode_type:\tsimple\n","\t memory:\tNone\n","\t goal_conditioning_type:\thyper\n","\t deterministic:\tFalse\n","\t rnd:\t\n","\t gamma:\t0.99\n","\t strength:\t0.01\n","\t network_settings:\t\n","\t normalize:\tFalse\n","\t hidden_units:\t64\n","\t num_layers:\t3\n","\t vis_encode_type:\tsimple\n","\t memory:\tNone\n","\t goal_conditioning_type:\thyper\n","\t deterministic:\tFalse\n","\t learning_rate:\t0.0001\n","\t encoding_size:\tNone\n","\tinit_path:\tNone\n","\tkeep_checkpoints:\t5\n","\teven_checkpoints:\tFalse\n","\tmax_steps:\t1000000\n","\ttime_horizon:\t128\n","\tsummary_freq:\t30000\n","\tthreaded:\tFalse\n","\tself_play:\tNone\n","\tbehavioral_cloning:\tNone\n","[INFO] Pyramids. Step: 30000. Time Elapsed: 57.790 s. Mean Reward: -1.000. Std of Reward: 0.000. Training.\n","[INFO] Pyramids. Step: 60000. Time Elapsed: 112.239 s. Mean Reward: -1.000. Std of Reward: 0.000. Training.\n","[INFO] Pyramids. Step: 90000. Time Elapsed: 166.638 s. Mean Reward: -0.867. Std of Reward: 0.516. Training.\n","[INFO] Pyramids. Step: 120000. Time Elapsed: 224.838 s. Mean Reward: -1.000. Std of Reward: 0.000. Training.\n","[INFO] Pyramids. Step: 150000. Time Elapsed: 285.002 s. Mean Reward: -0.925. Std of Reward: 0.416. Training.\n","[INFO] Pyramids. Step: 180000. Time Elapsed: 341.805 s. Mean Reward: -1.000. Std of Reward: 0.000. Training.\n","[INFO] Pyramids. Step: 210000. Time Elapsed: 401.424 s. Mean Reward: -0.374. Std of Reward: 1.111. Training.\n","[INFO] Pyramids. Step: 240000. Time Elapsed: 459.935 s. Mean Reward: -0.163. Std of Reward: 1.142. Training.\n","[INFO] Pyramids. Step: 270000. Time Elapsed: 519.300 s. Mean Reward: -0.900. Std of Reward: 0.500. Training.\n","[INFO] Pyramids. Step: 300000. Time Elapsed: 581.630 s. Mean Reward: -0.639. Std of Reward: 0.874. Training.\n","[INFO] Pyramids. Step: 330000. Time Elapsed: 645.060 s. Mean Reward: 0.113. Std of Reward: 1.216. Training.\n","[INFO] Pyramids. Step: 360000. Time Elapsed: 706.423 s. Mean Reward: -0.089. Std of Reward: 1.167. Training.\n","[INFO] Pyramids. Step: 390000. Time Elapsed: 769.370 s. Mean Reward: 0.226. Std of Reward: 1.213. Training.\n","[INFO] Pyramids. Step: 420000. Time Elapsed: 827.123 s. Mean Reward: 0.019. Std of Reward: 1.220. Training.\n","[INFO] Pyramids. Step: 450000. Time Elapsed: 887.729 s. Mean Reward: 0.595. Std of Reward: 1.225. Training.\n","[INFO] Pyramids. Step: 480000. Time Elapsed: 951.806 s. Mean Reward: 0.828. Std of Reward: 1.137. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/Pyramids Training/Pyramids/Pyramids-499967.onnx\n","[INFO] Pyramids. Step: 510000. Time Elapsed: 1016.865 s. Mean Reward: 1.057. Std of Reward: 1.036. Training.\n","[INFO] Pyramids. Step: 540000. Time Elapsed: 1078.715 s. Mean Reward: 0.713. Std of Reward: 1.172. Training.\n","[INFO] Pyramids. Step: 570000. Time Elapsed: 1139.891 s. Mean Reward: 0.968. Std of Reward: 1.098. Training.\n","[INFO] Pyramids. Step: 600000. Time Elapsed: 1202.891 s. Mean Reward: 0.862. Std of Reward: 1.172. Training.\n","[INFO] Pyramids. Step: 630000. Time Elapsed: 1263.803 s. Mean Reward: 0.896. Std of Reward: 1.127. Training.\n","[INFO] Pyramids. Step: 660000. Time Elapsed: 1324.749 s. Mean Reward: 0.915. Std of Reward: 1.079. Training.\n","[INFO] Pyramids. Step: 690000. Time Elapsed: 1387.112 s. Mean Reward: 1.132. Std of Reward: 0.993. Training.\n","[INFO] Pyramids. Step: 720000. Time Elapsed: 1449.411 s. Mean Reward: 0.963. Std of Reward: 1.082. Training.\n","[INFO] Pyramids. Step: 750000. Time Elapsed: 1512.370 s. Mean Reward: 1.410. Std of Reward: 0.718. Training.\n","[INFO] Pyramids. Step: 780000. Time Elapsed: 1576.158 s. Mean Reward: 1.171. Std of Reward: 0.886. Training.\n","[INFO] Pyramids. Step: 810000. Time Elapsed: 1636.217 s. Mean Reward: 1.238. Std of Reward: 0.865. Training.\n","[INFO] Pyramids. Step: 840000. Time Elapsed: 1700.688 s. Mean Reward: 1.406. Std of Reward: 0.758. Training.\n","[INFO] Pyramids. Step: 870000. Time Elapsed: 1762.539 s. Mean Reward: 1.073. Std of Reward: 1.067. Training.\n","[INFO] Pyramids. Step: 900000. Time Elapsed: 1825.113 s. Mean Reward: 1.366. Std of Reward: 0.745. Training.\n","[INFO] Pyramids. Step: 930000. Time Elapsed: 1892.548 s. Mean Reward: 1.382. Std of Reward: 0.795. Training.\n","[INFO] Pyramids. Step: 960000. Time Elapsed: 1954.001 s. Mean Reward: 1.404. Std of Reward: 0.773. Training.\n","[INFO] Pyramids. Step: 990000. Time Elapsed: 2020.626 s. Mean Reward: 1.405. Std of Reward: 0.700. Training.\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/Pyramids Training/Pyramids/Pyramids-999876.onnx\n","============= Diagnostic Run torch.onnx.export version 2.0.1+cu118 =============\n","verbose: False, log level: Level.ERROR\n","======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================\n","\n","[INFO] Exported results/Pyramids Training/Pyramids/Pyramids-1000004.onnx\n","[INFO] Copied results/Pyramids Training/Pyramids/Pyramids-1000004.onnx to results/Pyramids Training/Pyramids.onnx.\n"]}],"source":["!mlagents-learn ./config/ppo/PyramidsRND.yaml --env=./training-envs-executables/linux/Pyramids/Pyramids --run-id=\"Pyramids Training\" --no-graphics"]},{"cell_type":"markdown","metadata":{"id":"txonKxuSByut"},"source":["### Push the agent to the ๐Ÿค— Hub\n","\n","- Now that we trained our agent, weโ€™re **ready to push it to the Hub to be able to visualize it playing on your browser๐Ÿ”ฅ.**"]},{"cell_type":"code","source":["!mlagents-push-to-hf --run-id=\"Pyramids\" --local-dir=\"./results/Pyramids Training\" --repo-id=\"jake-walker/ppo-Pyramids\" --commit-message=\"Initial commit\""],"metadata":{"id":"yiEQbv7rB4mU","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697209891309,"user_tz":-60,"elapsed":4221,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"2d31ba07-54ee-4a6c-89bc-d6324e9f5bdb"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["[INFO] This function will create a model card and upload your Pyramids into HuggingFace Hub. This is a work in progress: If you encounter a bug, please send open an issue\n","[INFO] Pushing repo Pyramids to the Hugging Face Hub\n","Pyramids.onnx: 0% 0.00/1.42M [00:00
Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
"}},"197f3a0f7b2b4618aaa2c0abcc121768":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_ebda32c0c8d94222b1bd72cedab9a01c","placeholder":"โ€‹","style":"IPY_MODEL_b03c362004d24d65aa07a298d9cbf7d6","value":""}},"0de73d47456740c8a7aec3a2c23429e4":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_1d64929e8b8d4dc18a2175c82567e1c7","style":"IPY_MODEL_ac0b7664442f4c68bd72781ddb87ea9e","value":true}},"dbb882b2352b44cb8d0a7646b3ef0a96":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_47893006c1f54e96adaffd43521d4c16","style":"IPY_MODEL_83fd942f1b9e4df09cf684609dec4ee7","tooltip":""}},"48c1ef961a6942199d437ba1b8456f6f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_80c35c2596fe4098b77e9ab7b4afba34","placeholder":"โ€‹","style":"IPY_MODEL_3a38f167af43410b9dc9f279cc7b5a01","value":"\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. "}},"357cfe11cb4f458bb70372a6d1209262":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"ae25137c62ab4c6aa00ea707937c68bd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2196e1264505454098d83d5a986c122f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ebda32c0c8d94222b1bd72cedab9a01c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b03c362004d24d65aa07a298d9cbf7d6":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1d64929e8b8d4dc18a2175c82567e1c7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ac0b7664442f4c68bd72781ddb87ea9e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"47893006c1f54e96adaffd43521d4c16":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"83fd942f1b9e4df09cf684609dec4ee7":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"80c35c2596fe4098b77e9ab7b4afba34":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3a38f167af43410b9dc9f279cc7b5a01":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d54984153efa4edc957d350eda7d8f93":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2f90a2040a854b83a2ec49155655c495","placeholder":"โ€‹","style":"IPY_MODEL_22da873132224b989e009434a3369a68","value":"Connecting..."}},"2f90a2040a854b83a2ec49155655c495":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"22da873132224b989e009434a3369a68":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"249ca35c9a7946d2b4f17f6c3eddc0ab":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f4979c13430b456ea912de1c06712416","placeholder":"โ€‹","style":"IPY_MODEL_95fda6ca627a40f6a0cea7450dff1ba4","value":"Token is valid (permission: write)."}},"c50f669c6c6d4033888d1e4cf549b7ca":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c2b4330ea8b9461ca1b137d100988f13","placeholder":"โ€‹","style":"IPY_MODEL_44ac7a9c241c47ec97260fbed13aa199","value":"Your token has been saved in your configured git credential helpers (store)."}},"398634082d6c4fd49b0b518b6c20e3e3":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_243b25a12f2442049b65f25b6154e07c","placeholder":"โ€‹","style":"IPY_MODEL_bc379cef55ae4a70973b5ff21a867e5a","value":"Your token has been saved to /root/.cache/huggingface/token"}},"d3c41cdcff7d4056a45b1894f83bfcb4":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_280358528f3b481f9dac3af422f88aff","placeholder":"โ€‹","style":"IPY_MODEL_abf0998b051f4b5f9d9098fe3a7867be","value":"Login successful"}},"f4979c13430b456ea912de1c06712416":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"95fda6ca627a40f6a0cea7450dff1ba4":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c2b4330ea8b9461ca1b137d100988f13":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"44ac7a9c241c47ec97260fbed13aa199":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"243b25a12f2442049b65f25b6154e07c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bc379cef55ae4a70973b5ff21a867e5a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"280358528f3b481f9dac3af422f88aff":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"abf0998b051f4b5f9d9098fe3a7867be":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0} \ No newline at end of file diff --git a/HF DeepRL Course/Unit6 - Advantage Actor Critic.ipynb b/HF DeepRL Course/Unit6 - Advantage Actor Critic.ipynb new file mode 100644 index 0000000..219d435 --- /dev/null +++ b/HF DeepRL Course/Unit6 - Advantage Actor Critic.ipynb @@ -0,0 +1 @@ +{"cells":[{"cell_type":"markdown","metadata":{"id":"-PTReiOw-RAN"},"source":["# Unit 6: Advantage Actor Critic (A2C) using Robotics Simulations with Panda-Gym ๐Ÿค–\n","\n","\"Thumbnail\"/\n","\n","In this notebook, you'll learn to use A2C with [Panda-Gym](https://github.com/qgallouedec/panda-gym). You're going **to train a robotic arm** (Franka Emika Panda robot) to perform a task:\n","\n","- `Reach`: the robot must place its end-effector at a target position.\n","\n","After that, you'll be able **to train in other robotics tasks**.\n"]},{"cell_type":"markdown","metadata":{"id":"QInFitfWno1Q"},"source":["### ๐ŸŽฎ Environments:\n","\n","- [Panda-Gym](https://github.com/qgallouedec/panda-gym)\n","\n","###๐Ÿ“š RL-Library:\n","\n","- [Stable-Baselines3](https://stable-baselines3.readthedocs.io/)"]},{"cell_type":"markdown","metadata":{"id":"2CcdX4g3oFlp"},"source":["We're constantly trying to improve our tutorials, so **if you find some issues in this notebook**, please [open an issue on the GitHub Repo](https://github.com/huggingface/deep-rl-class/issues)."]},{"cell_type":"markdown","metadata":{"id":"MoubJX20oKaQ"},"source":["## Objectives of this notebook ๐Ÿ†\n","\n","At the end of the notebook, you will:\n","\n","- Be able to use **Panda-Gym**, the environment library.\n","- Be able to **train robots using A2C**.\n","- Understand why **we need to normalize the input**.\n","- Be able to **push your trained agent and the code to the Hub** with a nice video replay and an evaluation score ๐Ÿ”ฅ.\n","\n","\n"]},{"cell_type":"markdown","metadata":{"id":"DoUNkTExoUED"},"source":["## This notebook is from the Deep Reinforcement Learning Course\n","\"Deep\n","\n","In this free course, you will:\n","\n","- ๐Ÿ“– Study Deep Reinforcement Learning in **theory and practice**.\n","- ๐Ÿง‘โ€๐Ÿ’ป Learn to **use famous Deep RL libraries** such as Stable Baselines3, RL Baselines3 Zoo, CleanRL and Sample Factory 2.0.\n","- ๐Ÿค– Train **agents in unique environments**\n","\n","And more check ๐Ÿ“š the syllabus ๐Ÿ‘‰ https://simoninithomas.github.io/deep-rl-course\n","\n","Donโ€™t forget to **sign up to the course** (we are collecting your email to be able toย **send you the links when each Unit is published and give you information about the challenges and updates).**\n","\n","\n","The best way to keep in touch is to join our discord server to exchange with the community and with us ๐Ÿ‘‰๐Ÿป https://discord.gg/ydHrjt3WP5"]},{"cell_type":"markdown","metadata":{"id":"BTuQAUAPoa5E"},"source":["## Prerequisites ๐Ÿ—๏ธ\n","Before diving into the notebook, you need to:\n","\n","๐Ÿ”ฒ ๐Ÿ“š Study [Actor-Critic methods by reading Unit 6](https://huggingface.co/deep-rl-course/unit6/introduction) ๐Ÿค— "]},{"cell_type":"markdown","metadata":{"id":"iajHvVDWoo01"},"source":["# Let's train our first robots ๐Ÿค–"]},{"cell_type":"markdown","metadata":{"id":"zbOENTE2os_D"},"source":["To validate this hands-on for the [certification process](https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process), you need to push your trained model to the Hub and get the following results:\n","\n","- `PandaReachDense-v3` get a result of >= -3.5.\n","\n","To find your result, go to the [leaderboard](https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard) and find your model, **the result = mean_reward - std of reward**\n","\n","For more information about the certification process, check this section ๐Ÿ‘‰ https://huggingface.co/deep-rl-course/en/unit0/introduction#certification-process"]},{"cell_type":"markdown","metadata":{"id":"PU4FVzaoM6fC"},"source":["## Set the GPU ๐Ÿ’ช\n","- To **accelerate the agent's training, we'll use a GPU**. To do that, go to `Runtime > Change Runtime type`\n","\n","\"GPU"]},{"cell_type":"markdown","metadata":{"id":"KV0NyFdQM9ZG"},"source":["- `Hardware Accelerator > GPU`\n","\n","\"GPU"]},{"cell_type":"markdown","metadata":{"id":"bTpYcVZVMzUI"},"source":["## Create a virtual display ๐Ÿ”ฝ\n","\n","During the notebook, we'll need to generate a replay video. To do so, with colab, **we need to have a virtual screen to be able to render the environment** (and thus record the frames).\n","\n","Hence the following cell will install the librairies and create and run a virtual screen ๐Ÿ–ฅ"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jV6wjQ7Be7p5"},"outputs":[],"source":["%%capture\n","!apt install python-opengl\n","!apt install ffmpeg\n","!apt install xvfb\n","!pip3 install pyvirtualdisplay"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":520,"status":"ok","timestamp":1697802125983,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"ww5PQH1gNLI4","outputId":"405df85f-0516-4518-8a0a-aecb1685ff3e"},"outputs":[{"data":{"text/plain":[""]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["# Virtual display\n","from pyvirtualdisplay import Display\n","\n","virtual_display = Display(visible=0, size=(1400, 900))\n","virtual_display.start()"]},{"cell_type":"markdown","metadata":{"id":"e1obkbdJ_KnG"},"source":["### Install dependencies ๐Ÿ”ฝ\n","\n","The first step is to install the dependencies, weโ€™ll install multiple ones:\n","- `gymnasium`\n","- `panda-gym`: Contains the robotics arm environments.\n","- `stable-baselines3`: The SB3 deep reinforcement learning library.\n","- `huggingface_sb3`: Additional code for Stable-baselines3 to load and upload models from the Hugging Face ๐Ÿค— Hub.\n","- `huggingface_hub`: Library allowing anyone to work with the Hub repositories.\n","\n","โฒ The installation can **take 10 minutes**."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":22726,"status":"ok","timestamp":1697802148707,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"TgZUkjKYSgvn","outputId":"0eb69833-0b13-4bf0-dd21-1792727fe05b"},"outputs":[{"name":"stdout","output_type":"stream","text":["Collecting stable-baselines3[extra]\n"," Downloading stable_baselines3-2.1.0-py3-none-any.whl (178 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m178.7/178.7 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting gymnasium<0.30,>=0.28.1 (from stable-baselines3[extra])\n"," Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m953.9/953.9 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (1.23.5)\n","Requirement already satisfied: torch>=1.13 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (2.1.0+cu118)\n","Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (2.2.1)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (1.5.3)\n","Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (3.7.1)\n","Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (4.8.0.76)\n","Requirement already satisfied: pygame in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (2.5.2)\n","Requirement already satisfied: tensorboard>=2.9.1 in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (2.13.0)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (5.9.5)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (4.66.1)\n","Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (13.6.0)\n","Collecting shimmy[atari]~=1.1.0 (from stable-baselines3[extra])\n"," Downloading Shimmy-1.1.0-py3-none-any.whl (37 kB)\n","Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from stable-baselines3[extra]) (9.4.0)\n","Collecting autorom[accept-rom-license]~=0.6.1 (from stable-baselines3[extra])\n"," Downloading AutoROM-0.6.1-py3-none-any.whl (9.4 kB)\n","Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (8.1.7)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (2.31.0)\n","Collecting AutoROM.accept-rom-license (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra])\n"," Downloading AutoROM.accept-rom-license-0.6.1.tar.gz (434 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m434.7/434.7 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n"," Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n"," Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium<0.30,>=0.28.1->stable-baselines3[extra]) (4.5.0)\n","Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable-baselines3[extra])\n"," Using cached Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)\n","Collecting ale-py~=0.8.1 (from shimmy[atari]~=1.1.0->stable-baselines3[extra])\n"," Downloading ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.4.0)\n","Requirement already satisfied: grpcio>=1.48.2 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.59.0)\n","Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (2.17.3)\n","Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (1.0.0)\n","Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (3.5)\n","Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (3.20.3)\n","Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (67.7.2)\n","Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (0.7.1)\n","Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (3.0.0)\n","Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.10/dist-packages (from tensorboard>=2.9.1->stable-baselines3[extra]) (0.41.2)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (3.12.4)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (3.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (3.1.2)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (2023.6.0)\n","Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13->stable-baselines3[extra]) (2.1.0)\n","Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (1.1.1)\n","Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (0.12.1)\n","Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (4.43.1)\n","Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (1.4.5)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (23.2)\n","Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (3.1.1)\n","Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->stable-baselines3[extra]) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->stable-baselines3[extra]) (2023.3.post1)\n","Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->stable-baselines3[extra]) (3.0.0)\n","Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->stable-baselines3[extra]) (2.16.1)\n","Requirement already satisfied: importlib-resources in /usr/local/lib/python3.10/dist-packages (from ale-py~=0.8.1->shimmy[atari]~=1.1.0->stable-baselines3[extra]) (6.1.0)\n","Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]) (5.3.1)\n","Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]) (0.3.0)\n","Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]) (1.16.0)\n","Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.10/dist-packages (from google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]) (4.9)\n","Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard>=2.9.1->stable-baselines3[extra]) (1.3.1)\n","Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->stable-baselines3[extra]) (0.1.2)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]) (2023.7.22)\n","Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard>=2.9.1->stable-baselines3[extra]) (2.1.3)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13->stable-baselines3[extra]) (1.3.0)\n","Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /usr/local/lib/python3.10/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.9.1->stable-baselines3[extra]) (0.5.0)\n","Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard>=2.9.1->stable-baselines3[extra]) (3.2.2)\n","Building wheels for collected packages: AutoROM.accept-rom-license\n"," Building wheel for AutoROM.accept-rom-license (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for AutoROM.accept-rom-license: filename=AutoROM.accept_rom_license-0.6.1-py3-none-any.whl size=446660 sha256=84d117179bdc642a07fdd784f2ce51b95d2badf0767dc5c12174cd11df782bac\n"," Stored in directory: /root/.cache/pip/wheels/6b/1b/ef/a43ff1a2f1736d5711faa1ba4c1f61be1131b8899e6a057811\n","Successfully built AutoROM.accept-rom-license\n","Installing collected packages: farama-notifications, gymnasium, ale-py, shimmy, AutoROM.accept-rom-license, autorom, stable-baselines3\n","Successfully installed AutoROM.accept-rom-license-0.6.1 ale-py-0.8.1 autorom-0.6.1 farama-notifications-0.0.4 gymnasium-0.29.1 shimmy-1.1.0 stable-baselines3-2.1.0\n","Requirement already satisfied: gymnasium in /usr/local/lib/python3.10/dist-packages (0.29.1)\n","Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (1.23.5)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (2.2.1)\n","Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (4.5.0)\n","Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (0.0.4)\n"]}],"source":["!pip install stable-baselines3[extra]\n","!pip install gymnasium"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":209251,"status":"ok","timestamp":1697802565927,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"ABneW6tOSpyU","outputId":"12ecb851-feb6-43f6-a946-97cdf70f4976"},"outputs":[{"name":"stdout","output_type":"stream","text":["Collecting huggingface_sb3\n"," Downloading huggingface_sb3-3.0-py3-none-any.whl (9.7 kB)\n","Collecting huggingface-hub~=0.8 (from huggingface_sb3)\n"," Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: pyyaml~=6.0 in /usr/local/lib/python3.10/dist-packages (from huggingface_sb3) (6.0.1)\n","Requirement already satisfied: wasabi in /usr/local/lib/python3.10/dist-packages (from huggingface_sb3) (1.1.2)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from huggingface_sb3) (1.23.5)\n","Requirement already satisfied: cloudpickle>=1.6 in /usr/local/lib/python3.10/dist-packages (from huggingface_sb3) (2.2.1)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (3.12.4)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (2023.6.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (2.31.0)\n","Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (4.66.1)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (4.5.0)\n","Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub~=0.8->huggingface_sb3) (23.2)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub~=0.8->huggingface_sb3) (2023.7.22)\n","Installing collected packages: huggingface-hub, huggingface_sb3\n","Successfully installed huggingface-hub-0.18.0 huggingface_sb3-3.0\n","Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (0.18.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (3.12.4)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (2023.6.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (2.31.0)\n","Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (4.66.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (6.0.1)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (4.5.0)\n","Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (23.2)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (3.3.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub) (2023.7.22)\n","Collecting panda_gym\n"," Downloading panda_gym-3.0.7-py3-none-any.whl (23 kB)\n","Requirement already satisfied: gymnasium>=0.26 in /usr/local/lib/python3.10/dist-packages (from panda_gym) (0.29.1)\n","Collecting pybullet (from panda_gym)\n"," Downloading pybullet-3.2.5.tar.gz (80.5 MB)\n","\u001b[2K \u001b[90mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m80.5/80.5 MB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from panda_gym) (1.23.5)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from panda_gym) (1.11.3)\n","Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium>=0.26->panda_gym) (2.2.1)\n","Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium>=0.26->panda_gym) (4.5.0)\n","Requirement already satisfied: farama-notifications>=0.0.1 in /usr/local/lib/python3.10/dist-packages (from gymnasium>=0.26->panda_gym) (0.0.4)\n","Building wheels for collected packages: pybullet\n"," Building wheel for pybullet (setup.py) ... \u001b[?25l\u001b[?25hdone\n"," Created wheel for pybullet: filename=pybullet-3.2.5-cp310-cp310-linux_x86_64.whl size=99850132 sha256=98a6b68785984d0f2c1b5cb18bc101d6d13f771e8825e0ca98af00c38d343a57\n"," Stored in directory: /root/.cache/pip/wheels/6b/fa/1a/c315a5133f0c9bf202a6daa5d70891120e7fe403e06e3407cc\n","Successfully built pybullet\n","Installing collected packages: pybullet, panda_gym\n","Successfully installed panda_gym-3.0.7 pybullet-3.2.5\n"]}],"source":["!pip install huggingface_sb3\n","!pip install huggingface_hub\n","!pip install panda_gym"]},{"cell_type":"markdown","metadata":{"id":"QTep3PQQABLr"},"source":["## Import the packages ๐Ÿ“ฆ"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"HpiB8VdnQ7Bk"},"outputs":[],"source":["import os\n","\n","import gymnasium as gym\n","import panda_gym\n","\n","from huggingface_sb3 import load_from_hub, package_to_hub\n","\n","from stable_baselines3 import A2C\n","from stable_baselines3.common.evaluation import evaluate_policy\n","from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize\n","from stable_baselines3.common.env_util import make_vec_env\n","\n","from huggingface_hub import notebook_login"]},{"cell_type":"markdown","metadata":{"id":"lfBwIS_oAVXI"},"source":["## PandaReachDense-v3 ๐Ÿฆพ\n","\n","The agent we're going to train is a robotic arm that needs to do controls (moving the arm and using the end-effector).\n","\n","In robotics, the *end-effector* is the device at the end of a robotic arm designed to interact with the environment.\n","\n","In `PandaReach`, the robot must place its end-effector at a target position (green ball).\n","\n","We're going to use the dense version of this environment. It means we'll get a *dense reward function* that **will provide a reward at each timestep** (the closer the agent is to completing the task, the higher the reward). Contrary to a *sparse reward function* where the environment **return a reward if and only if the task is completed**.\n","\n","Also, we're going to use the *End-effector displacement control*, it means the **action corresponds to the displacement of the end-effector**. We don't control the individual motion of each joint (joint control).\n","\n","\"Robotics\"/\n","\n","\n","This way **the training will be easier**.\n","\n"]},{"cell_type":"markdown","metadata":{"id":"frVXOrnlBerQ"},"source":["### Create the environment\n","\n","#### The environment ๐ŸŽฎ\n","\n","In `PandaReachDense-v3` the robotic arm must place its end-effector at a target position (green ball)."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"zXzAu3HYF1WD"},"outputs":[],"source":["env_id = \"PandaReachDense-v3\"\n","\n","# Create the env\n","env = gym.make(env_id)\n","\n","# Get the state space and action space\n","s_size = env.observation_space.shape\n","a_size = env.action_space"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":242,"status":"ok","timestamp":1697794230186,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"E-U9dexcF-FB","outputId":"df255efb-1f0a-4e6c-be83-ffb580ea5c9a"},"outputs":[{"name":"stdout","output_type":"stream","text":["_____OBSERVATION SPACE_____ \n","\n","The State Space is: None\n","Sample observation OrderedDict([('achieved_goal', array([ 0.36307505, -0.69471014, -6.307691 ], dtype=float32)), ('desired_goal', array([ 1.7523677 , -0.59887403, 8.890521 ], dtype=float32)), ('observation', array([-4.9718113, -2.5216153, 0.5674409, 5.2952337, -3.1501577,\n"," -1.5565605], dtype=float32))])\n"]}],"source":["print(\"_____OBSERVATION SPACE_____ \\n\")\n","print(\"The State Space is: \", s_size)\n","print(\"Sample observation\", env.observation_space.sample()) # Get a random observation"]},{"cell_type":"markdown","metadata":{"id":"g_JClfElGFnF"},"source":["The observation space **is a dictionary with 3 different elements**:\n","- `achieved_goal`: (x,y,z) position of the goal.\n","- `desired_goal`: (x,y,z) distance between the goal position and the current object position.\n","- `observation`: position (x,y,z) and velocity of the end-effector (vx, vy, vz).\n","\n","Given it's a dictionary as observation, **we will need to use a MultiInputPolicy policy instead of MlpPolicy**."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":246,"status":"ok","timestamp":1697794239423,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"ib1Kxy4AF-FC","outputId":"1f77638f-48ec-499b-8e29-f3e97b21fde1"},"outputs":[{"name":"stdout","output_type":"stream","text":["\n"," _____ACTION SPACE_____ \n","\n","The Action Space is: Box(-1.0, 1.0, (3,), float32)\n","Action Space Sample [-0.3927638 0.39380783 0.7919843 ]\n"]}],"source":["print(\"\\n _____ACTION SPACE_____ \\n\")\n","print(\"The Action Space is: \", a_size)\n","print(\"Action Space Sample\", env.action_space.sample()) # Take a random action"]},{"cell_type":"markdown","metadata":{"id":"5MHTHEHZS4yp"},"source":["The action space is a vector with 3 values:\n","- Control x, y, z movement"]},{"cell_type":"markdown","metadata":{"id":"S5sXcg469ysB"},"source":["### Normalize observation and rewards"]},{"cell_type":"markdown","metadata":{"id":"1ZyX6qf3Zva9"},"source":["A good practice in reinforcement learning is to [normalize input features](https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html).\n","\n","For that purpose, there is a wrapper that will compute a running average and standard deviation of input features.\n","\n","We also normalize rewards with this same wrapper by adding `norm_reward = True`\n","\n","[You should check the documentation to fill this cell](https://stable-baselines3.readthedocs.io/en/master/guide/vec_envs.html#vecnormalize)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":633,"status":"ok","timestamp":1697794412785,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"1RsDtHHAQ9Ie","outputId":"483874a0-bb0e-492b-b0fb-daa1bb36f397"},"outputs":[{"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n"," and should_run_async(code)\n"]}],"source":["env = make_vec_env(env_id, n_envs=4)\n","\n","# Adding this wrapper to normalize the observation and the reward\n","env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)"]},{"cell_type":"markdown","metadata":{"id":"tF42HvI7-gs5"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"2O67mqgC-hol"},"outputs":[],"source":["env = make_vec_env(env_id, n_envs=4)\n","\n","env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)"]},{"cell_type":"markdown","metadata":{"id":"4JmEVU6z1ZA-"},"source":["### Create the A2C Model ๐Ÿค–\n","\n","For more information about A2C implementation with StableBaselines3 check: https://stable-baselines3.readthedocs.io/en/master/modules/a2c.html#notes\n","\n","To find the best parameters I checked the [official trained agents by Stable-Baselines3 team](https://huggingface.co/sb3)."]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":8196,"status":"ok","timestamp":1697794632312,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"vR3T4qFt164I","outputId":"220ade29-7ee6-4ac2-d809-e897ebea5f30"},"outputs":[{"name":"stdout","output_type":"stream","text":["Using cuda device\n"]}],"source":["model = A2C(policy=\"MultiInputPolicy\", env=env, verbose=1)"]},{"cell_type":"markdown","metadata":{"id":"nWAuOOLh-oQf"},"source":["#### Solution"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"FKFLY54T-pU1"},"outputs":[],"source":["model = A2C(policy = \"MultiInputPolicy\",\n"," env = env,\n"," verbose=1)"]},{"cell_type":"markdown","metadata":{"id":"opyK3mpJ1-m9"},"source":["### Train the A2C agent ๐Ÿƒ\n","- Let's train our agent for 1,000,000 timesteps, don't forget to use GPU on Colab. It will take approximately ~25-40min"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":293328,"status":"ok","timestamp":1697797343648,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"4TuGHZD7RF1G","outputId":"f9683cf1-cdb3-456b-9701-5393b764cafe"},"outputs":[{"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n"," and should_run_async(code)\n"]},{"name":"stdout","output_type":"stream","text":["\u001b[1;30;43mStreaming output truncated to the last 5000 lines.\u001b[0m\n","| time/ | |\n","| fps | 370 |\n","| iterations | 22300 |\n","| time_elapsed | 1203 |\n","| total_timesteps | 446000 |\n","| train/ | |\n","| entropy_loss | -1.59 |\n","| explained_variance | 0.949 |\n","| learning_rate | 0.0007 |\n","| n_updates | 22299 |\n","| policy_loss | 0.00934 |\n","| std | 0.423 |\n","| value_loss | 0.000212 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.9 |\n","| ep_rew_mean | -0.24 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 22400 |\n","| time_elapsed | 1209 |\n","| total_timesteps | 448000 |\n","| train/ | |\n","| entropy_loss | -1.61 |\n","| explained_variance | 0.987 |\n","| learning_rate | 0.0007 |\n","| n_updates | 22399 |\n","| policy_loss | -0.0015 |\n","| std | 0.425 |\n","| value_loss | 0.000121 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.72 |\n","| ep_rew_mean | -0.217 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 22500 |\n","| time_elapsed | 1214 |\n","| total_timesteps | 450000 |\n","| train/ | |\n","| entropy_loss | -1.61 |\n","| explained_variance | 0.956 |\n","| learning_rate | 0.0007 |\n","| n_updates | 22499 |\n","| policy_loss | -0.0133 |\n","| std | 0.425 |\n","| value_loss | 0.000201 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.203 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 22600 |\n","| time_elapsed | 1219 |\n","| total_timesteps | 452000 |\n","| train/ | |\n","| entropy_loss | -1.59 |\n","| explained_variance | 0.792 |\n","| learning_rate | 0.0007 |\n","| n_updates | 22599 |\n","| policy_loss | -0.02 |\n","| std | 0.422 |\n","| value_loss | 0.00106 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.72 |\n","| ep_rew_mean | -0.209 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 22700 |\n","| time_elapsed | 1225 |\n","| total_timesteps | 454000 |\n","| train/ | |\n","| entropy_loss | -1.6 |\n","| explained_variance | 0.928 |\n","| learning_rate | 0.0007 |\n","| n_updates | 22699 |\n","| policy_loss | 0.0197 |\n","| std | 0.423 |\n","| value_loss | 0.000372 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.73 |\n","| ep_rew_mean | -0.217 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 22800 |\n","| time_elapsed | 1231 |\n","| total_timesteps | 456000 |\n","| train/ | |\n","| entropy_loss | -1.59 |\n","| explained_variance | 0.978 |\n","| learning_rate | 0.0007 |\n","| n_updates | 22799 |\n","| policy_loss | 0.00215 |\n","| std | 0.423 |\n","| value_loss | 0.000168 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.217 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 22900 |\n","| time_elapsed | 1237 |\n","| total_timesteps | 458000 |\n","| train/ | |\n","| entropy_loss | -1.57 |\n","| explained_variance | 0.983 |\n","| learning_rate | 0.0007 |\n","| n_updates | 22899 |\n","| policy_loss | 0.0323 |\n","| std | 0.419 |\n","| value_loss | 0.000441 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.84 |\n","| ep_rew_mean | -0.223 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 23000 |\n","| time_elapsed | 1241 |\n","| total_timesteps | 460000 |\n","| train/ | |\n","| entropy_loss | -1.55 |\n","| explained_variance | 0.986 |\n","| learning_rate | 0.0007 |\n","| n_updates | 22999 |\n","| policy_loss | -0.0127 |\n","| std | 0.417 |\n","| value_loss | 0.000145 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.72 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 23100 |\n","| time_elapsed | 1246 |\n","| total_timesteps | 462000 |\n","| train/ | |\n","| entropy_loss | -1.53 |\n","| explained_variance | 0.97 |\n","| learning_rate | 0.0007 |\n","| n_updates | 23099 |\n","| policy_loss | -0.00299 |\n","| std | 0.416 |\n","| value_loss | 0.000131 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.64 |\n","| ep_rew_mean | -0.199 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 23200 |\n","| time_elapsed | 1252 |\n","| total_timesteps | 464000 |\n","| train/ | |\n","| entropy_loss | -1.52 |\n","| explained_variance | 0.974 |\n","| learning_rate | 0.0007 |\n","| n_updates | 23199 |\n","| policy_loss | -0.00295 |\n","| std | 0.415 |\n","| value_loss | 0.000226 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.96 |\n","| ep_rew_mean | -0.231 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 23300 |\n","| time_elapsed | 1257 |\n","| total_timesteps | 466000 |\n","| train/ | |\n","| entropy_loss | -1.51 |\n","| explained_variance | 0.738 |\n","| learning_rate | 0.0007 |\n","| n_updates | 23299 |\n","| policy_loss | -0.0751 |\n","| std | 0.413 |\n","| value_loss | 0.00266 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 23400 |\n","| time_elapsed | 1263 |\n","| total_timesteps | 468000 |\n","| train/ | |\n","| entropy_loss | -1.51 |\n","| explained_variance | 0.96 |\n","| learning_rate | 0.0007 |\n","| n_updates | 23399 |\n","| policy_loss | -0.00202 |\n","| std | 0.414 |\n","| value_loss | 0.000141 |\n","------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.74 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 23500 |\n","| time_elapsed | 1267 |\n","| total_timesteps | 470000 |\n","| train/ | |\n","| entropy_loss | -1.51 |\n","| explained_variance | 0.985 |\n","| learning_rate | 0.0007 |\n","| n_updates | 23499 |\n","| policy_loss | -0.000896 |\n","| std | 0.414 |\n","| value_loss | 0.0001 |\n","-------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.82 |\n","| ep_rew_mean | -0.23 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 23600 |\n","| time_elapsed | 1272 |\n","| total_timesteps | 472000 |\n","| train/ | |\n","| entropy_loss | -1.49 |\n","| explained_variance | 0.85 |\n","| learning_rate | 0.0007 |\n","| n_updates | 23599 |\n","| policy_loss | 0.0253 |\n","| std | 0.412 |\n","| value_loss | 0.000381 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.66 |\n","| ep_rew_mean | -0.199 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 23700 |\n","| time_elapsed | 1278 |\n","| total_timesteps | 474000 |\n","| train/ | |\n","| entropy_loss | -1.49 |\n","| explained_variance | 0.974 |\n","| learning_rate | 0.0007 |\n","| n_updates | 23699 |\n","| policy_loss | 0.00574 |\n","| std | 0.412 |\n","| value_loss | 0.000193 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.93 |\n","| ep_rew_mean | -0.217 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 23800 |\n","| time_elapsed | 1283 |\n","| total_timesteps | 476000 |\n","| train/ | |\n","| entropy_loss | -1.49 |\n","| explained_variance | 0.946 |\n","| learning_rate | 0.0007 |\n","| n_updates | 23799 |\n","| policy_loss | 0.0576 |\n","| std | 0.414 |\n","| value_loss | 0.000613 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.85 |\n","| ep_rew_mean | -0.228 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 23900 |\n","| time_elapsed | 1289 |\n","| total_timesteps | 478000 |\n","| train/ | |\n","| entropy_loss | -1.5 |\n","| explained_variance | 0.979 |\n","| learning_rate | 0.0007 |\n","| n_updates | 23899 |\n","| policy_loss | 0.0135 |\n","| std | 0.415 |\n","| value_loss | 0.000219 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.81 |\n","| ep_rew_mean | -0.214 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 24000 |\n","| time_elapsed | 1294 |\n","| total_timesteps | 480000 |\n","| train/ | |\n","| entropy_loss | -1.49 |\n","| explained_variance | 0.903 |\n","| learning_rate | 0.0007 |\n","| n_updates | 23999 |\n","| policy_loss | 0.0169 |\n","| std | 0.414 |\n","| value_loss | 0.000477 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.212 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 24100 |\n","| time_elapsed | 1298 |\n","| total_timesteps | 482000 |\n","| train/ | |\n","| entropy_loss | -1.48 |\n","| explained_variance | 0.908 |\n","| learning_rate | 0.0007 |\n","| n_updates | 24099 |\n","| policy_loss | -0.00448 |\n","| std | 0.412 |\n","| value_loss | 0.000677 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.71 |\n","| ep_rew_mean | -0.214 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 24200 |\n","| time_elapsed | 1304 |\n","| total_timesteps | 484000 |\n","| train/ | |\n","| entropy_loss | -1.47 |\n","| explained_variance | 0.963 |\n","| learning_rate | 0.0007 |\n","| n_updates | 24199 |\n","| policy_loss | 0.0186 |\n","| std | 0.411 |\n","| value_loss | 0.000364 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.77 |\n","| ep_rew_mean | -0.214 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 24300 |\n","| time_elapsed | 1309 |\n","| total_timesteps | 486000 |\n","| train/ | |\n","| entropy_loss | -1.46 |\n","| explained_variance | 0.932 |\n","| learning_rate | 0.0007 |\n","| n_updates | 24299 |\n","| policy_loss | -0.00132 |\n","| std | 0.41 |\n","| value_loss | 0.000166 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.215 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 24400 |\n","| time_elapsed | 1315 |\n","| total_timesteps | 488000 |\n","| train/ | |\n","| entropy_loss | -1.48 |\n","| explained_variance | 0.947 |\n","| learning_rate | 0.0007 |\n","| n_updates | 24399 |\n","| policy_loss | -0.00102 |\n","| std | 0.413 |\n","| value_loss | 0.000159 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.52 |\n","| ep_rew_mean | -0.195 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 24500 |\n","| time_elapsed | 1320 |\n","| total_timesteps | 490000 |\n","| train/ | |\n","| entropy_loss | -1.48 |\n","| explained_variance | 0.983 |\n","| learning_rate | 0.0007 |\n","| n_updates | 24499 |\n","| policy_loss | -0.0149 |\n","| std | 0.413 |\n","| value_loss | 0.000194 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.201 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 24600 |\n","| time_elapsed | 1325 |\n","| total_timesteps | 492000 |\n","| train/ | |\n","| entropy_loss | -1.48 |\n","| explained_variance | 0.938 |\n","| learning_rate | 0.0007 |\n","| n_updates | 24599 |\n","| policy_loss | -0.0183 |\n","| std | 0.413 |\n","| value_loss | 0.000225 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.82 |\n","| ep_rew_mean | -0.225 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 24700 |\n","| time_elapsed | 1330 |\n","| total_timesteps | 494000 |\n","| train/ | |\n","| entropy_loss | -1.46 |\n","| explained_variance | 0.962 |\n","| learning_rate | 0.0007 |\n","| n_updates | 24699 |\n","| policy_loss | 0.00995 |\n","| std | 0.411 |\n","| value_loss | 0.000238 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.74 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 24800 |\n","| time_elapsed | 1335 |\n","| total_timesteps | 496000 |\n","| train/ | |\n","| entropy_loss | -1.45 |\n","| explained_variance | 0.963 |\n","| learning_rate | 0.0007 |\n","| n_updates | 24799 |\n","| policy_loss | -0.00953 |\n","| std | 0.41 |\n","| value_loss | 0.000177 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.64 |\n","| ep_rew_mean | -0.214 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 24900 |\n","| time_elapsed | 1341 |\n","| total_timesteps | 498000 |\n","| train/ | |\n","| entropy_loss | -1.42 |\n","| explained_variance | 0.979 |\n","| learning_rate | 0.0007 |\n","| n_updates | 24899 |\n","| policy_loss | 0.000315 |\n","| std | 0.406 |\n","| value_loss | 5.8e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.63 |\n","| ep_rew_mean | -0.198 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 25000 |\n","| time_elapsed | 1346 |\n","| total_timesteps | 500000 |\n","| train/ | |\n","| entropy_loss | -1.38 |\n","| explained_variance | 0.919 |\n","| learning_rate | 0.0007 |\n","| n_updates | 24999 |\n","| policy_loss | -0.0244 |\n","| std | 0.401 |\n","| value_loss | 0.000709 |\n","------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.69 |\n","| ep_rew_mean | -0.204 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 25100 |\n","| time_elapsed | 1351 |\n","| total_timesteps | 502000 |\n","| train/ | |\n","| entropy_loss | -1.38 |\n","| explained_variance | 0.957 |\n","| learning_rate | 0.0007 |\n","| n_updates | 25099 |\n","| policy_loss | -0.000708 |\n","| std | 0.402 |\n","| value_loss | 0.000231 |\n","-------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.95 |\n","| ep_rew_mean | -0.227 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 25200 |\n","| time_elapsed | 1357 |\n","| total_timesteps | 504000 |\n","| train/ | |\n","| entropy_loss | -1.37 |\n","| explained_variance | 0.954 |\n","| learning_rate | 0.0007 |\n","| n_updates | 25199 |\n","| policy_loss | 0.0245 |\n","| std | 0.4 |\n","| value_loss | 0.00044 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.84 |\n","| ep_rew_mean | -0.221 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 25300 |\n","| time_elapsed | 1362 |\n","| total_timesteps | 506000 |\n","| train/ | |\n","| entropy_loss | -1.39 |\n","| explained_variance | 0.965 |\n","| learning_rate | 0.0007 |\n","| n_updates | 25299 |\n","| policy_loss | 0.00448 |\n","| std | 0.402 |\n","| value_loss | 0.000133 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.81 |\n","| ep_rew_mean | -0.21 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 25400 |\n","| time_elapsed | 1367 |\n","| total_timesteps | 508000 |\n","| train/ | |\n","| entropy_loss | -1.4 |\n","| explained_variance | 0.364 |\n","| learning_rate | 0.0007 |\n","| n_updates | 25399 |\n","| policy_loss | -0.0354 |\n","| std | 0.405 |\n","| value_loss | 0.00417 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.8 |\n","| ep_rew_mean | -0.218 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 25500 |\n","| time_elapsed | 1373 |\n","| total_timesteps | 510000 |\n","| train/ | |\n","| entropy_loss | -1.39 |\n","| explained_variance | 0.963 |\n","| learning_rate | 0.0007 |\n","| n_updates | 25499 |\n","| policy_loss | -0.00203 |\n","| std | 0.405 |\n","| value_loss | 0.000139 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.85 |\n","| ep_rew_mean | -0.223 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 25600 |\n","| time_elapsed | 1377 |\n","| total_timesteps | 512000 |\n","| train/ | |\n","| entropy_loss | -1.39 |\n","| explained_variance | 0.712 |\n","| learning_rate | 0.0007 |\n","| n_updates | 25599 |\n","| policy_loss | 0.00121 |\n","| std | 0.403 |\n","| value_loss | 0.00179 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.219 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 25700 |\n","| time_elapsed | 1383 |\n","| total_timesteps | 514000 |\n","| train/ | |\n","| entropy_loss | -1.36 |\n","| explained_variance | 0.932 |\n","| learning_rate | 0.0007 |\n","| n_updates | 25699 |\n","| policy_loss | -0.00428 |\n","| std | 0.401 |\n","| value_loss | 0.000315 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.85 |\n","| ep_rew_mean | -0.224 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 25800 |\n","| time_elapsed | 1388 |\n","| total_timesteps | 516000 |\n","| train/ | |\n","| entropy_loss | -1.36 |\n","| explained_variance | 0.208 |\n","| learning_rate | 0.0007 |\n","| n_updates | 25799 |\n","| policy_loss | 0.0133 |\n","| std | 0.401 |\n","| value_loss | 0.00162 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.72 |\n","| ep_rew_mean | -0.212 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 25900 |\n","| time_elapsed | 1394 |\n","| total_timesteps | 518000 |\n","| train/ | |\n","| entropy_loss | -1.36 |\n","| explained_variance | 0.805 |\n","| learning_rate | 0.0007 |\n","| n_updates | 25899 |\n","| policy_loss | 0.00214 |\n","| std | 0.4 |\n","| value_loss | 0.00065 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.81 |\n","| ep_rew_mean | -0.225 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 26000 |\n","| time_elapsed | 1399 |\n","| total_timesteps | 520000 |\n","| train/ | |\n","| entropy_loss | -1.34 |\n","| explained_variance | 0.963 |\n","| learning_rate | 0.0007 |\n","| n_updates | 25999 |\n","| policy_loss | 0.0111 |\n","| std | 0.398 |\n","| value_loss | 0.000262 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.97 |\n","| ep_rew_mean | -0.25 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 26100 |\n","| time_elapsed | 1404 |\n","| total_timesteps | 522000 |\n","| train/ | |\n","| entropy_loss | -1.33 |\n","| explained_variance | 0.98 |\n","| learning_rate | 0.0007 |\n","| n_updates | 26099 |\n","| policy_loss | 0.00785 |\n","| std | 0.398 |\n","| value_loss | 0.000212 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.81 |\n","| ep_rew_mean | -0.219 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 26200 |\n","| time_elapsed | 1410 |\n","| total_timesteps | 524000 |\n","| train/ | |\n","| entropy_loss | -1.31 |\n","| explained_variance | 0.921 |\n","| learning_rate | 0.0007 |\n","| n_updates | 26199 |\n","| policy_loss | -0.00498 |\n","| std | 0.395 |\n","| value_loss | 0.000306 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.58 |\n","| ep_rew_mean | -0.199 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 26300 |\n","| time_elapsed | 1415 |\n","| total_timesteps | 526000 |\n","| train/ | |\n","| entropy_loss | -1.29 |\n","| explained_variance | 0.989 |\n","| learning_rate | 0.0007 |\n","| n_updates | 26299 |\n","| policy_loss | -0.00204 |\n","| std | 0.393 |\n","| value_loss | 6.63e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.56 |\n","| ep_rew_mean | -0.199 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 26400 |\n","| time_elapsed | 1421 |\n","| total_timesteps | 528000 |\n","| train/ | |\n","| entropy_loss | -1.29 |\n","| explained_variance | 0.971 |\n","| learning_rate | 0.0007 |\n","| n_updates | 26399 |\n","| policy_loss | 0.00476 |\n","| std | 0.393 |\n","| value_loss | 0.000169 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.202 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 26500 |\n","| time_elapsed | 1426 |\n","| total_timesteps | 530000 |\n","| train/ | |\n","| entropy_loss | -1.26 |\n","| explained_variance | 0.963 |\n","| learning_rate | 0.0007 |\n","| n_updates | 26499 |\n","| policy_loss | -0.00918 |\n","| std | 0.389 |\n","| value_loss | 0.000276 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.89 |\n","| ep_rew_mean | -0.233 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 26600 |\n","| time_elapsed | 1431 |\n","| total_timesteps | 532000 |\n","| train/ | |\n","| entropy_loss | -1.26 |\n","| explained_variance | 0.979 |\n","| learning_rate | 0.0007 |\n","| n_updates | 26599 |\n","| policy_loss | -0.00635 |\n","| std | 0.389 |\n","| value_loss | 0.000139 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.69 |\n","| ep_rew_mean | -0.203 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 26700 |\n","| time_elapsed | 1437 |\n","| total_timesteps | 534000 |\n","| train/ | |\n","| entropy_loss | -1.26 |\n","| explained_variance | 0.575 |\n","| learning_rate | 0.0007 |\n","| n_updates | 26699 |\n","| policy_loss | -0.0391 |\n","| std | 0.387 |\n","| value_loss | 0.00261 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.66 |\n","| ep_rew_mean | -0.202 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 26800 |\n","| time_elapsed | 1442 |\n","| total_timesteps | 536000 |\n","| train/ | |\n","| entropy_loss | -1.25 |\n","| explained_variance | 0.962 |\n","| learning_rate | 0.0007 |\n","| n_updates | 26799 |\n","| policy_loss | 0.0173 |\n","| std | 0.387 |\n","| value_loss | 0.000495 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.6 |\n","| ep_rew_mean | -0.193 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 26900 |\n","| time_elapsed | 1448 |\n","| total_timesteps | 538000 |\n","| train/ | |\n","| entropy_loss | -1.22 |\n","| explained_variance | 0.972 |\n","| learning_rate | 0.0007 |\n","| n_updates | 26899 |\n","| policy_loss | 0.00954 |\n","| std | 0.383 |\n","| value_loss | 0.000204 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.89 |\n","| ep_rew_mean | -0.224 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 27000 |\n","| time_elapsed | 1453 |\n","| total_timesteps | 540000 |\n","| train/ | |\n","| entropy_loss | -1.21 |\n","| explained_variance | 0.951 |\n","| learning_rate | 0.0007 |\n","| n_updates | 26999 |\n","| policy_loss | -0.00556 |\n","| std | 0.382 |\n","| value_loss | 0.000222 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.9 |\n","| ep_rew_mean | -0.242 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 27100 |\n","| time_elapsed | 1458 |\n","| total_timesteps | 542000 |\n","| train/ | |\n","| entropy_loss | -1.2 |\n","| explained_variance | 0.957 |\n","| learning_rate | 0.0007 |\n","| n_updates | 27099 |\n","| policy_loss | -0.00364 |\n","| std | 0.381 |\n","| value_loss | 0.000258 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.93 |\n","| ep_rew_mean | -0.239 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 27200 |\n","| time_elapsed | 1464 |\n","| total_timesteps | 544000 |\n","| train/ | |\n","| entropy_loss | -1.2 |\n","| explained_variance | 0.965 |\n","| learning_rate | 0.0007 |\n","| n_updates | 27199 |\n","| policy_loss | -0.00378 |\n","| std | 0.382 |\n","| value_loss | 0.000146 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.88 |\n","| ep_rew_mean | -0.229 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 27300 |\n","| time_elapsed | 1469 |\n","| total_timesteps | 546000 |\n","| train/ | |\n","| entropy_loss | -1.21 |\n","| explained_variance | 0.949 |\n","| learning_rate | 0.0007 |\n","| n_updates | 27299 |\n","| policy_loss | -0.0069 |\n","| std | 0.382 |\n","| value_loss | 0.000229 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.69 |\n","| ep_rew_mean | -0.204 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 27400 |\n","| time_elapsed | 1475 |\n","| total_timesteps | 548000 |\n","| train/ | |\n","| entropy_loss | -1.2 |\n","| explained_variance | 0.973 |\n","| learning_rate | 0.0007 |\n","| n_updates | 27399 |\n","| policy_loss | -0.00456 |\n","| std | 0.382 |\n","| value_loss | 0.000196 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.92 |\n","| ep_rew_mean | -0.237 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 27500 |\n","| time_elapsed | 1480 |\n","| total_timesteps | 550000 |\n","| train/ | |\n","| entropy_loss | -1.19 |\n","| explained_variance | 0.966 |\n","| learning_rate | 0.0007 |\n","| n_updates | 27499 |\n","| policy_loss | 0.00336 |\n","| std | 0.381 |\n","| value_loss | 0.000271 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.66 |\n","| ep_rew_mean | -0.201 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 27600 |\n","| time_elapsed | 1485 |\n","| total_timesteps | 552000 |\n","| train/ | |\n","| entropy_loss | -1.2 |\n","| explained_variance | 0.981 |\n","| learning_rate | 0.0007 |\n","| n_updates | 27599 |\n","| policy_loss | 0.000766 |\n","| std | 0.382 |\n","| value_loss | 0.000147 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.57 |\n","| ep_rew_mean | -0.201 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 27700 |\n","| time_elapsed | 1491 |\n","| total_timesteps | 554000 |\n","| train/ | |\n","| entropy_loss | -1.18 |\n","| explained_variance | 0.959 |\n","| learning_rate | 0.0007 |\n","| n_updates | 27699 |\n","| policy_loss | -0.0173 |\n","| std | 0.379 |\n","| value_loss | 0.000264 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.77 |\n","| ep_rew_mean | -0.209 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 27800 |\n","| time_elapsed | 1496 |\n","| total_timesteps | 556000 |\n","| train/ | |\n","| entropy_loss | -1.17 |\n","| explained_variance | 0.949 |\n","| learning_rate | 0.0007 |\n","| n_updates | 27799 |\n","| policy_loss | 0.000551 |\n","| std | 0.378 |\n","| value_loss | 0.00016 |\n","------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.62 |\n","| ep_rew_mean | -0.2 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 27900 |\n","| time_elapsed | 1502 |\n","| total_timesteps | 558000 |\n","| train/ | |\n","| entropy_loss | -1.14 |\n","| explained_variance | 0.941 |\n","| learning_rate | 0.0007 |\n","| n_updates | 27899 |\n","| policy_loss | -0.000696 |\n","| std | 0.375 |\n","| value_loss | 0.000233 |\n","-------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.92 |\n","| ep_rew_mean | -0.228 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 28000 |\n","| time_elapsed | 1507 |\n","| total_timesteps | 560000 |\n","| train/ | |\n","| entropy_loss | -1.17 |\n","| explained_variance | 0.612 |\n","| learning_rate | 0.0007 |\n","| n_updates | 27999 |\n","| policy_loss | -0.0643 |\n","| std | 0.378 |\n","| value_loss | 0.00586 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.202 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 28100 |\n","| time_elapsed | 1513 |\n","| total_timesteps | 562000 |\n","| train/ | |\n","| entropy_loss | -1.16 |\n","| explained_variance | 0.982 |\n","| learning_rate | 0.0007 |\n","| n_updates | 28099 |\n","| policy_loss | 0.0239 |\n","| std | 0.377 |\n","| value_loss | 0.000301 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.215 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 28200 |\n","| time_elapsed | 1519 |\n","| total_timesteps | 564000 |\n","| train/ | |\n","| entropy_loss | -1.15 |\n","| explained_variance | 0.976 |\n","| learning_rate | 0.0007 |\n","| n_updates | 28199 |\n","| policy_loss | -0.0206 |\n","| std | 0.375 |\n","| value_loss | 0.000195 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.202 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 28300 |\n","| time_elapsed | 1524 |\n","| total_timesteps | 566000 |\n","| train/ | |\n","| entropy_loss | -1.13 |\n","| explained_variance | 0.97 |\n","| learning_rate | 0.0007 |\n","| n_updates | 28299 |\n","| policy_loss | -0.00922 |\n","| std | 0.373 |\n","| value_loss | 0.000213 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.97 |\n","| ep_rew_mean | -0.237 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 28400 |\n","| time_elapsed | 1530 |\n","| total_timesteps | 568000 |\n","| train/ | |\n","| entropy_loss | -1.12 |\n","| explained_variance | 0.972 |\n","| learning_rate | 0.0007 |\n","| n_updates | 28399 |\n","| policy_loss | 0.0177 |\n","| std | 0.373 |\n","| value_loss | 0.000324 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.216 |\n","| time/ | |\n","| fps | 371 |\n","| iterations | 28500 |\n","| time_elapsed | 1535 |\n","| total_timesteps | 570000 |\n","| train/ | |\n","| entropy_loss | -1.12 |\n","| explained_variance | 0.944 |\n","| learning_rate | 0.0007 |\n","| n_updates | 28499 |\n","| policy_loss | -0.00116 |\n","| std | 0.373 |\n","| value_loss | 0.000274 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.85 |\n","| ep_rew_mean | -0.216 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 28600 |\n","| time_elapsed | 1542 |\n","| total_timesteps | 572000 |\n","| train/ | |\n","| entropy_loss | -1.12 |\n","| explained_variance | 0.574 |\n","| learning_rate | 0.0007 |\n","| n_updates | 28599 |\n","| policy_loss | -0.0423 |\n","| std | 0.374 |\n","| value_loss | 0.00667 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.88 |\n","| ep_rew_mean | -0.231 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 28700 |\n","| time_elapsed | 1547 |\n","| total_timesteps | 574000 |\n","| train/ | |\n","| entropy_loss | -1.12 |\n","| explained_variance | 0.947 |\n","| learning_rate | 0.0007 |\n","| n_updates | 28699 |\n","| policy_loss | -0.0191 |\n","| std | 0.374 |\n","| value_loss | 0.0006 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.7 |\n","| ep_rew_mean | -0.205 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 28800 |\n","| time_elapsed | 1553 |\n","| total_timesteps | 576000 |\n","| train/ | |\n","| entropy_loss | -1.1 |\n","| explained_variance | 0.963 |\n","| learning_rate | 0.0007 |\n","| n_updates | 28799 |\n","| policy_loss | -0.00246 |\n","| std | 0.372 |\n","| value_loss | 0.000154 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.83 |\n","| ep_rew_mean | -0.226 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 28900 |\n","| time_elapsed | 1558 |\n","| total_timesteps | 578000 |\n","| train/ | |\n","| entropy_loss | -1.11 |\n","| explained_variance | 0.943 |\n","| learning_rate | 0.0007 |\n","| n_updates | 28899 |\n","| policy_loss | -0.00343 |\n","| std | 0.373 |\n","| value_loss | 0.000237 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.82 |\n","| ep_rew_mean | -0.225 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 29000 |\n","| time_elapsed | 1563 |\n","| total_timesteps | 580000 |\n","| train/ | |\n","| entropy_loss | -1.12 |\n","| explained_variance | 0.966 |\n","| learning_rate | 0.0007 |\n","| n_updates | 28999 |\n","| policy_loss | 0.00808 |\n","| std | 0.374 |\n","| value_loss | 0.000176 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.64 |\n","| ep_rew_mean | -0.195 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 29100 |\n","| time_elapsed | 1569 |\n","| total_timesteps | 582000 |\n","| train/ | |\n","| entropy_loss | -1.09 |\n","| explained_variance | 0.798 |\n","| learning_rate | 0.0007 |\n","| n_updates | 29099 |\n","| policy_loss | -0.0217 |\n","| std | 0.372 |\n","| value_loss | 0.00186 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.2 |\n","| ep_rew_mean | -0.252 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 29200 |\n","| time_elapsed | 1574 |\n","| total_timesteps | 584000 |\n","| train/ | |\n","| entropy_loss | -1.11 |\n","| explained_variance | 0.434 |\n","| learning_rate | 0.0007 |\n","| n_updates | 29199 |\n","| policy_loss | 0.0171 |\n","| std | 0.374 |\n","| value_loss | 0.00356 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.81 |\n","| ep_rew_mean | -0.218 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 29300 |\n","| time_elapsed | 1581 |\n","| total_timesteps | 586000 |\n","| train/ | |\n","| entropy_loss | -1.1 |\n","| explained_variance | 0.969 |\n","| learning_rate | 0.0007 |\n","| n_updates | 29299 |\n","| policy_loss | 0.00606 |\n","| std | 0.373 |\n","| value_loss | 0.000466 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.84 |\n","| ep_rew_mean | -0.222 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 29400 |\n","| time_elapsed | 1586 |\n","| total_timesteps | 588000 |\n","| train/ | |\n","| entropy_loss | -1.1 |\n","| explained_variance | 0.941 |\n","| learning_rate | 0.0007 |\n","| n_updates | 29399 |\n","| policy_loss | 0.0222 |\n","| std | 0.374 |\n","| value_loss | 0.000908 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.82 |\n","| ep_rew_mean | -0.22 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 29500 |\n","| time_elapsed | 1591 |\n","| total_timesteps | 590000 |\n","| train/ | |\n","| entropy_loss | -1.1 |\n","| explained_variance | 0.975 |\n","| learning_rate | 0.0007 |\n","| n_updates | 29499 |\n","| policy_loss | 0.000126 |\n","| std | 0.373 |\n","| value_loss | 6.21e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.93 |\n","| ep_rew_mean | -0.236 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 29600 |\n","| time_elapsed | 1597 |\n","| total_timesteps | 592000 |\n","| train/ | |\n","| entropy_loss | -1.09 |\n","| explained_variance | 0.986 |\n","| learning_rate | 0.0007 |\n","| n_updates | 29599 |\n","| policy_loss | -0.0107 |\n","| std | 0.372 |\n","| value_loss | 0.000203 |\n","------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.26 |\n","| ep_rew_mean | -0.264 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 29700 |\n","| time_elapsed | 1602 |\n","| total_timesteps | 594000 |\n","| train/ | |\n","| entropy_loss | -1.09 |\n","| explained_variance | 0.894 |\n","| learning_rate | 0.0007 |\n","| n_updates | 29699 |\n","| policy_loss | -2.96e-05 |\n","| std | 0.372 |\n","| value_loss | 0.00053 |\n","-------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 29800 |\n","| time_elapsed | 1608 |\n","| total_timesteps | 596000 |\n","| train/ | |\n","| entropy_loss | -1.07 |\n","| explained_variance | 0.933 |\n","| learning_rate | 0.0007 |\n","| n_updates | 29799 |\n","| policy_loss | -0.00366 |\n","| std | 0.369 |\n","| value_loss | 0.000264 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.13 |\n","| ep_rew_mean | -0.242 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 29900 |\n","| time_elapsed | 1613 |\n","| total_timesteps | 598000 |\n","| train/ | |\n","| entropy_loss | -1.07 |\n","| explained_variance | 0.956 |\n","| learning_rate | 0.0007 |\n","| n_updates | 29899 |\n","| policy_loss | 0.0225 |\n","| std | 0.369 |\n","| value_loss | 0.000667 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.74 |\n","| ep_rew_mean | -0.202 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 30000 |\n","| time_elapsed | 1619 |\n","| total_timesteps | 600000 |\n","| train/ | |\n","| entropy_loss | -1.05 |\n","| explained_variance | 0.941 |\n","| learning_rate | 0.0007 |\n","| n_updates | 29999 |\n","| policy_loss | 0.0245 |\n","| std | 0.367 |\n","| value_loss | 0.00122 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.7 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 30100 |\n","| time_elapsed | 1624 |\n","| total_timesteps | 602000 |\n","| train/ | |\n","| entropy_loss | -1.03 |\n","| explained_variance | 0.972 |\n","| learning_rate | 0.0007 |\n","| n_updates | 30099 |\n","| policy_loss | 0.0033 |\n","| std | 0.365 |\n","| value_loss | 0.000116 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.85 |\n","| ep_rew_mean | -0.218 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 30200 |\n","| time_elapsed | 1629 |\n","| total_timesteps | 604000 |\n","| train/ | |\n","| entropy_loss | -1.03 |\n","| explained_variance | 0.84 |\n","| learning_rate | 0.0007 |\n","| n_updates | 30199 |\n","| policy_loss | -0.0157 |\n","| std | 0.365 |\n","| value_loss | 0.000663 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.91 |\n","| ep_rew_mean | -0.23 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 30300 |\n","| time_elapsed | 1635 |\n","| total_timesteps | 606000 |\n","| train/ | |\n","| entropy_loss | -1.01 |\n","| explained_variance | 0.945 |\n","| learning_rate | 0.0007 |\n","| n_updates | 30299 |\n","| policy_loss | -0.00954 |\n","| std | 0.362 |\n","| value_loss | 0.000363 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.77 |\n","| ep_rew_mean | -0.22 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 30400 |\n","| time_elapsed | 1640 |\n","| total_timesteps | 608000 |\n","| train/ | |\n","| entropy_loss | -0.993 |\n","| explained_variance | 0.952 |\n","| learning_rate | 0.0007 |\n","| n_updates | 30399 |\n","| policy_loss | -0.00869 |\n","| std | 0.361 |\n","| value_loss | 0.000228 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.94 |\n","| ep_rew_mean | -0.236 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 30500 |\n","| time_elapsed | 1646 |\n","| total_timesteps | 610000 |\n","| train/ | |\n","| entropy_loss | -0.945 |\n","| explained_variance | 0.748 |\n","| learning_rate | 0.0007 |\n","| n_updates | 30499 |\n","| policy_loss | -0.0418 |\n","| std | 0.355 |\n","| value_loss | 0.00359 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.85 |\n","| ep_rew_mean | -0.228 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 30600 |\n","| time_elapsed | 1651 |\n","| total_timesteps | 612000 |\n","| train/ | |\n","| entropy_loss | -0.926 |\n","| explained_variance | 0.985 |\n","| learning_rate | 0.0007 |\n","| n_updates | 30599 |\n","| policy_loss | 0.00479 |\n","| std | 0.353 |\n","| value_loss | 0.000425 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.88 |\n","| ep_rew_mean | -0.226 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 30700 |\n","| time_elapsed | 1656 |\n","| total_timesteps | 614000 |\n","| train/ | |\n","| entropy_loss | -0.926 |\n","| explained_variance | 0.693 |\n","| learning_rate | 0.0007 |\n","| n_updates | 30699 |\n","| policy_loss | -0.014 |\n","| std | 0.353 |\n","| value_loss | 0.00214 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.218 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 30800 |\n","| time_elapsed | 1662 |\n","| total_timesteps | 616000 |\n","| train/ | |\n","| entropy_loss | -0.917 |\n","| explained_variance | 0.849 |\n","| learning_rate | 0.0007 |\n","| n_updates | 30799 |\n","| policy_loss | 0.00243 |\n","| std | 0.352 |\n","| value_loss | 0.00164 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.204 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 30900 |\n","| time_elapsed | 1667 |\n","| total_timesteps | 618000 |\n","| train/ | |\n","| entropy_loss | -0.898 |\n","| explained_variance | 0.962 |\n","| learning_rate | 0.0007 |\n","| n_updates | 30899 |\n","| policy_loss | -0.00587 |\n","| std | 0.349 |\n","| value_loss | 0.000291 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.74 |\n","| ep_rew_mean | -0.218 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 31000 |\n","| time_elapsed | 1673 |\n","| total_timesteps | 620000 |\n","| train/ | |\n","| entropy_loss | -0.925 |\n","| explained_variance | 0.957 |\n","| learning_rate | 0.0007 |\n","| n_updates | 30999 |\n","| policy_loss | -0.0221 |\n","| std | 0.352 |\n","| value_loss | 0.000565 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.73 |\n","| ep_rew_mean | -0.21 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 31100 |\n","| time_elapsed | 1677 |\n","| total_timesteps | 622000 |\n","| train/ | |\n","| entropy_loss | -0.925 |\n","| explained_variance | 0.956 |\n","| learning_rate | 0.0007 |\n","| n_updates | 31099 |\n","| policy_loss | -0.00369 |\n","| std | 0.352 |\n","| value_loss | 0.000136 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.59 |\n","| ep_rew_mean | -0.194 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 31200 |\n","| time_elapsed | 1682 |\n","| total_timesteps | 624000 |\n","| train/ | |\n","| entropy_loss | -0.915 |\n","| explained_variance | 0.972 |\n","| learning_rate | 0.0007 |\n","| n_updates | 31199 |\n","| policy_loss | 0.00117 |\n","| std | 0.351 |\n","| value_loss | 0.000228 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.8 |\n","| ep_rew_mean | -0.22 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 31300 |\n","| time_elapsed | 1688 |\n","| total_timesteps | 626000 |\n","| train/ | |\n","| entropy_loss | -0.902 |\n","| explained_variance | 0.967 |\n","| learning_rate | 0.0007 |\n","| n_updates | 31299 |\n","| policy_loss | 0.00434 |\n","| std | 0.35 |\n","| value_loss | 0.000194 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.53 |\n","| ep_rew_mean | -0.18 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 31400 |\n","| time_elapsed | 1693 |\n","| total_timesteps | 628000 |\n","| train/ | |\n","| entropy_loss | -0.889 |\n","| explained_variance | 0.965 |\n","| learning_rate | 0.0007 |\n","| n_updates | 31399 |\n","| policy_loss | -0.00948 |\n","| std | 0.349 |\n","| value_loss | 0.000186 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.73 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 31500 |\n","| time_elapsed | 1699 |\n","| total_timesteps | 630000 |\n","| train/ | |\n","| entropy_loss | -0.868 |\n","| explained_variance | 0.976 |\n","| learning_rate | 0.0007 |\n","| n_updates | 31499 |\n","| policy_loss | -0.00141 |\n","| std | 0.346 |\n","| value_loss | 0.000148 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.64 |\n","| ep_rew_mean | -0.194 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 31600 |\n","| time_elapsed | 1704 |\n","| total_timesteps | 632000 |\n","| train/ | |\n","| entropy_loss | -0.852 |\n","| explained_variance | 0.992 |\n","| learning_rate | 0.0007 |\n","| n_updates | 31599 |\n","| policy_loss | 0.00312 |\n","| std | 0.345 |\n","| value_loss | 7.1e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.49 |\n","| ep_rew_mean | -0.178 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 31700 |\n","| time_elapsed | 1709 |\n","| total_timesteps | 634000 |\n","| train/ | |\n","| entropy_loss | -0.85 |\n","| explained_variance | 0.963 |\n","| learning_rate | 0.0007 |\n","| n_updates | 31699 |\n","| policy_loss | 0.000295 |\n","| std | 0.345 |\n","| value_loss | 0.000322 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.8 |\n","| ep_rew_mean | -0.221 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 31800 |\n","| time_elapsed | 1715 |\n","| total_timesteps | 636000 |\n","| train/ | |\n","| entropy_loss | -0.843 |\n","| explained_variance | 0.965 |\n","| learning_rate | 0.0007 |\n","| n_updates | 31799 |\n","| policy_loss | -0.0121 |\n","| std | 0.345 |\n","| value_loss | 0.000503 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.216 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 31900 |\n","| time_elapsed | 1720 |\n","| total_timesteps | 638000 |\n","| train/ | |\n","| entropy_loss | -0.81 |\n","| explained_variance | 0.971 |\n","| learning_rate | 0.0007 |\n","| n_updates | 31899 |\n","| policy_loss | -0.00257 |\n","| std | 0.341 |\n","| value_loss | 0.000209 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.205 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 32000 |\n","| time_elapsed | 1727 |\n","| total_timesteps | 640000 |\n","| train/ | |\n","| entropy_loss | -0.773 |\n","| explained_variance | 0.967 |\n","| learning_rate | 0.0007 |\n","| n_updates | 31999 |\n","| policy_loss | 0.0135 |\n","| std | 0.336 |\n","| value_loss | 0.000448 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.72 |\n","| ep_rew_mean | -0.212 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 32100 |\n","| time_elapsed | 1733 |\n","| total_timesteps | 642000 |\n","| train/ | |\n","| entropy_loss | -0.781 |\n","| explained_variance | 0.922 |\n","| learning_rate | 0.0007 |\n","| n_updates | 32099 |\n","| policy_loss | -0.00758 |\n","| std | 0.337 |\n","| value_loss | 0.000887 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.62 |\n","| ep_rew_mean | -0.198 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 32200 |\n","| time_elapsed | 1738 |\n","| total_timesteps | 644000 |\n","| train/ | |\n","| entropy_loss | -0.759 |\n","| explained_variance | 0.951 |\n","| learning_rate | 0.0007 |\n","| n_updates | 32199 |\n","| policy_loss | -0.00883 |\n","| std | 0.335 |\n","| value_loss | 0.000575 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.205 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 32300 |\n","| time_elapsed | 1744 |\n","| total_timesteps | 646000 |\n","| train/ | |\n","| entropy_loss | -0.745 |\n","| explained_variance | 0.987 |\n","| learning_rate | 0.0007 |\n","| n_updates | 32299 |\n","| policy_loss | -0.0011 |\n","| std | 0.334 |\n","| value_loss | 8.33e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.222 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 32400 |\n","| time_elapsed | 1749 |\n","| total_timesteps | 648000 |\n","| train/ | |\n","| entropy_loss | -0.758 |\n","| explained_variance | 0.891 |\n","| learning_rate | 0.0007 |\n","| n_updates | 32399 |\n","| policy_loss | 0.00367 |\n","| std | 0.336 |\n","| value_loss | 0.00107 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 32500 |\n","| time_elapsed | 1755 |\n","| total_timesteps | 650000 |\n","| train/ | |\n","| entropy_loss | -0.767 |\n","| explained_variance | 0.976 |\n","| learning_rate | 0.0007 |\n","| n_updates | 32499 |\n","| policy_loss | -0.00398 |\n","| std | 0.337 |\n","| value_loss | 0.000223 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.66 |\n","| ep_rew_mean | -0.201 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 32600 |\n","| time_elapsed | 1760 |\n","| total_timesteps | 652000 |\n","| train/ | |\n","| entropy_loss | -0.778 |\n","| explained_variance | 0.962 |\n","| learning_rate | 0.0007 |\n","| n_updates | 32599 |\n","| policy_loss | 0.00605 |\n","| std | 0.338 |\n","| value_loss | 0.000407 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.79 |\n","| ep_rew_mean | -0.213 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 32700 |\n","| time_elapsed | 1766 |\n","| total_timesteps | 654000 |\n","| train/ | |\n","| entropy_loss | -0.768 |\n","| explained_variance | 0.951 |\n","| learning_rate | 0.0007 |\n","| n_updates | 32699 |\n","| policy_loss | -0.00468 |\n","| std | 0.338 |\n","| value_loss | 0.00026 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.206 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 32800 |\n","| time_elapsed | 1771 |\n","| total_timesteps | 656000 |\n","| train/ | |\n","| entropy_loss | -0.786 |\n","| explained_variance | 0.956 |\n","| learning_rate | 0.0007 |\n","| n_updates | 32799 |\n","| policy_loss | 0.00403 |\n","| std | 0.339 |\n","| value_loss | 0.000409 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.217 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 32900 |\n","| time_elapsed | 1776 |\n","| total_timesteps | 658000 |\n","| train/ | |\n","| entropy_loss | -0.778 |\n","| explained_variance | 0.979 |\n","| learning_rate | 0.0007 |\n","| n_updates | 32899 |\n","| policy_loss | 0.00136 |\n","| std | 0.339 |\n","| value_loss | 9.21e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.77 |\n","| ep_rew_mean | -0.217 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 33000 |\n","| time_elapsed | 1782 |\n","| total_timesteps | 660000 |\n","| train/ | |\n","| entropy_loss | -0.758 |\n","| explained_variance | 0.887 |\n","| learning_rate | 0.0007 |\n","| n_updates | 32999 |\n","| policy_loss | 0.0131 |\n","| std | 0.337 |\n","| value_loss | 0.000436 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.85 |\n","| ep_rew_mean | -0.224 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 33100 |\n","| time_elapsed | 1787 |\n","| total_timesteps | 662000 |\n","| train/ | |\n","| entropy_loss | -0.758 |\n","| explained_variance | 0.801 |\n","| learning_rate | 0.0007 |\n","| n_updates | 33099 |\n","| policy_loss | -0.0263 |\n","| std | 0.337 |\n","| value_loss | 0.00151 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.209 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 33200 |\n","| time_elapsed | 1793 |\n","| total_timesteps | 664000 |\n","| train/ | |\n","| entropy_loss | -0.745 |\n","| explained_variance | 0.979 |\n","| learning_rate | 0.0007 |\n","| n_updates | 33199 |\n","| policy_loss | -0.00329 |\n","| std | 0.335 |\n","| value_loss | 0.000105 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.59 |\n","| ep_rew_mean | -0.206 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 33300 |\n","| time_elapsed | 1798 |\n","| total_timesteps | 666000 |\n","| train/ | |\n","| entropy_loss | -0.729 |\n","| explained_variance | 0.989 |\n","| learning_rate | 0.0007 |\n","| n_updates | 33299 |\n","| policy_loss | -0.0155 |\n","| std | 0.334 |\n","| value_loss | 0.000204 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.71 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 33400 |\n","| time_elapsed | 1803 |\n","| total_timesteps | 668000 |\n","| train/ | |\n","| entropy_loss | -0.695 |\n","| explained_variance | 0.99 |\n","| learning_rate | 0.0007 |\n","| n_updates | 33399 |\n","| policy_loss | -0.00419 |\n","| std | 0.331 |\n","| value_loss | 0.000118 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.82 |\n","| ep_rew_mean | -0.22 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 33500 |\n","| time_elapsed | 1809 |\n","| total_timesteps | 670000 |\n","| train/ | |\n","| entropy_loss | -0.689 |\n","| explained_variance | 0.981 |\n","| learning_rate | 0.0007 |\n","| n_updates | 33499 |\n","| policy_loss | -0.00792 |\n","| std | 0.331 |\n","| value_loss | 0.000152 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.64 |\n","| ep_rew_mean | -0.196 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 33600 |\n","| time_elapsed | 1814 |\n","| total_timesteps | 672000 |\n","| train/ | |\n","| entropy_loss | -0.672 |\n","| explained_variance | 0.983 |\n","| learning_rate | 0.0007 |\n","| n_updates | 33599 |\n","| policy_loss | 0.00425 |\n","| std | 0.33 |\n","| value_loss | 0.000116 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.86 |\n","| ep_rew_mean | -0.222 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 33700 |\n","| time_elapsed | 1820 |\n","| total_timesteps | 674000 |\n","| train/ | |\n","| entropy_loss | -0.653 |\n","| explained_variance | 0.955 |\n","| learning_rate | 0.0007 |\n","| n_updates | 33699 |\n","| policy_loss | -0.00029 |\n","| std | 0.329 |\n","| value_loss | 0.000335 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.61 |\n","| ep_rew_mean | -0.203 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 33800 |\n","| time_elapsed | 1825 |\n","| total_timesteps | 676000 |\n","| train/ | |\n","| entropy_loss | -0.651 |\n","| explained_variance | 0.942 |\n","| learning_rate | 0.0007 |\n","| n_updates | 33799 |\n","| policy_loss | -0.0063 |\n","| std | 0.329 |\n","| value_loss | 0.000432 |\n","------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.8 |\n","| ep_rew_mean | -0.222 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 33900 |\n","| time_elapsed | 1830 |\n","| total_timesteps | 678000 |\n","| train/ | |\n","| entropy_loss | -0.643 |\n","| explained_variance | 0.973 |\n","| learning_rate | 0.0007 |\n","| n_updates | 33899 |\n","| policy_loss | -0.000768 |\n","| std | 0.328 |\n","| value_loss | 0.000148 |\n","-------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.71 |\n","| ep_rew_mean | -0.217 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 34000 |\n","| time_elapsed | 1836 |\n","| total_timesteps | 680000 |\n","| train/ | |\n","| entropy_loss | -0.632 |\n","| explained_variance | 0.971 |\n","| learning_rate | 0.0007 |\n","| n_updates | 33999 |\n","| policy_loss | 0.00146 |\n","| std | 0.327 |\n","| value_loss | 0.000182 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.71 |\n","| ep_rew_mean | -0.213 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 34100 |\n","| time_elapsed | 1841 |\n","| total_timesteps | 682000 |\n","| train/ | |\n","| entropy_loss | -0.627 |\n","| explained_variance | 0.977 |\n","| learning_rate | 0.0007 |\n","| n_updates | 34099 |\n","| policy_loss | -0.0095 |\n","| std | 0.326 |\n","| value_loss | 0.000314 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.82 |\n","| ep_rew_mean | -0.224 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 34200 |\n","| time_elapsed | 1847 |\n","| total_timesteps | 684000 |\n","| train/ | |\n","| entropy_loss | -0.631 |\n","| explained_variance | 0.982 |\n","| learning_rate | 0.0007 |\n","| n_updates | 34199 |\n","| policy_loss | 0.0112 |\n","| std | 0.326 |\n","| value_loss | 0.000291 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.63 |\n","| ep_rew_mean | -0.198 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 34300 |\n","| time_elapsed | 1852 |\n","| total_timesteps | 686000 |\n","| train/ | |\n","| entropy_loss | -0.618 |\n","| explained_variance | 0.964 |\n","| learning_rate | 0.0007 |\n","| n_updates | 34299 |\n","| policy_loss | 0.0161 |\n","| std | 0.326 |\n","| value_loss | 0.000308 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.73 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 34400 |\n","| time_elapsed | 1857 |\n","| total_timesteps | 688000 |\n","| train/ | |\n","| entropy_loss | -0.614 |\n","| explained_variance | 0.98 |\n","| learning_rate | 0.0007 |\n","| n_updates | 34399 |\n","| policy_loss | 0.00919 |\n","| std | 0.325 |\n","| value_loss | 0.000281 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.64 |\n","| ep_rew_mean | -0.194 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 34500 |\n","| time_elapsed | 1863 |\n","| total_timesteps | 690000 |\n","| train/ | |\n","| entropy_loss | -0.618 |\n","| explained_variance | 0.981 |\n","| learning_rate | 0.0007 |\n","| n_updates | 34499 |\n","| policy_loss | 0.00651 |\n","| std | 0.325 |\n","| value_loss | 7.66e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.84 |\n","| ep_rew_mean | -0.225 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 34600 |\n","| time_elapsed | 1868 |\n","| total_timesteps | 692000 |\n","| train/ | |\n","| entropy_loss | -0.624 |\n","| explained_variance | 0.974 |\n","| learning_rate | 0.0007 |\n","| n_updates | 34599 |\n","| policy_loss | 0.00316 |\n","| std | 0.326 |\n","| value_loss | 0.000231 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.9 |\n","| ep_rew_mean | -0.233 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 34700 |\n","| time_elapsed | 1874 |\n","| total_timesteps | 694000 |\n","| train/ | |\n","| entropy_loss | -0.626 |\n","| explained_variance | 0.919 |\n","| learning_rate | 0.0007 |\n","| n_updates | 34699 |\n","| policy_loss | 0.00283 |\n","| std | 0.326 |\n","| value_loss | 0.000538 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.213 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 34800 |\n","| time_elapsed | 1879 |\n","| total_timesteps | 696000 |\n","| train/ | |\n","| entropy_loss | -0.619 |\n","| explained_variance | 0.962 |\n","| learning_rate | 0.0007 |\n","| n_updates | 34799 |\n","| policy_loss | 0.02 |\n","| std | 0.326 |\n","| value_loss | 0.000824 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.97 |\n","| ep_rew_mean | -0.239 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 34900 |\n","| time_elapsed | 1885 |\n","| total_timesteps | 698000 |\n","| train/ | |\n","| entropy_loss | -0.621 |\n","| explained_variance | 0.971 |\n","| learning_rate | 0.0007 |\n","| n_updates | 34899 |\n","| policy_loss | 0.000574 |\n","| std | 0.326 |\n","| value_loss | 0.000166 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.21 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 35000 |\n","| time_elapsed | 1890 |\n","| total_timesteps | 700000 |\n","| train/ | |\n","| entropy_loss | -0.617 |\n","| explained_variance | 0.941 |\n","| learning_rate | 0.0007 |\n","| n_updates | 34999 |\n","| policy_loss | 0.00347 |\n","| std | 0.325 |\n","| value_loss | 0.000159 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.208 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 35100 |\n","| time_elapsed | 1895 |\n","| total_timesteps | 702000 |\n","| train/ | |\n","| entropy_loss | -0.606 |\n","| explained_variance | 0.913 |\n","| learning_rate | 0.0007 |\n","| n_updates | 35099 |\n","| policy_loss | 0.0074 |\n","| std | 0.323 |\n","| value_loss | 0.000453 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.69 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 35200 |\n","| time_elapsed | 1901 |\n","| total_timesteps | 704000 |\n","| train/ | |\n","| entropy_loss | -0.618 |\n","| explained_variance | 0.956 |\n","| learning_rate | 0.0007 |\n","| n_updates | 35199 |\n","| policy_loss | -0.0156 |\n","| std | 0.323 |\n","| value_loss | 0.000196 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.73 |\n","| ep_rew_mean | -0.21 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 35300 |\n","| time_elapsed | 1906 |\n","| total_timesteps | 706000 |\n","| train/ | |\n","| entropy_loss | -0.599 |\n","| explained_variance | 0.977 |\n","| learning_rate | 0.0007 |\n","| n_updates | 35299 |\n","| policy_loss | -0.00947 |\n","| std | 0.321 |\n","| value_loss | 0.000158 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.81 |\n","| ep_rew_mean | -0.221 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 35400 |\n","| time_elapsed | 1912 |\n","| total_timesteps | 708000 |\n","| train/ | |\n","| entropy_loss | -0.588 |\n","| explained_variance | 0.995 |\n","| learning_rate | 0.0007 |\n","| n_updates | 35399 |\n","| policy_loss | -0.00901 |\n","| std | 0.32 |\n","| value_loss | 8.99e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.74 |\n","| ep_rew_mean | -0.212 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 35500 |\n","| time_elapsed | 1917 |\n","| total_timesteps | 710000 |\n","| train/ | |\n","| entropy_loss | -0.563 |\n","| explained_variance | 0.953 |\n","| learning_rate | 0.0007 |\n","| n_updates | 35499 |\n","| policy_loss | -0.00164 |\n","| std | 0.318 |\n","| value_loss | 0.000211 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.73 |\n","| ep_rew_mean | -0.214 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 35600 |\n","| time_elapsed | 1922 |\n","| total_timesteps | 712000 |\n","| train/ | |\n","| entropy_loss | -0.551 |\n","| explained_variance | 0.955 |\n","| learning_rate | 0.0007 |\n","| n_updates | 35599 |\n","| policy_loss | 0.00301 |\n","| std | 0.317 |\n","| value_loss | 0.000355 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.85 |\n","| ep_rew_mean | -0.224 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 35700 |\n","| time_elapsed | 1928 |\n","| total_timesteps | 714000 |\n","| train/ | |\n","| entropy_loss | -0.544 |\n","| explained_variance | 0.954 |\n","| learning_rate | 0.0007 |\n","| n_updates | 35699 |\n","| policy_loss | -0.006 |\n","| std | 0.316 |\n","| value_loss | 0.000688 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.03 |\n","| ep_rew_mean | -0.245 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 35800 |\n","| time_elapsed | 1933 |\n","| total_timesteps | 716000 |\n","| train/ | |\n","| entropy_loss | -0.552 |\n","| explained_variance | 0.577 |\n","| learning_rate | 0.0007 |\n","| n_updates | 35799 |\n","| policy_loss | 0.0161 |\n","| std | 0.317 |\n","| value_loss | 0.00323 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.223 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 35900 |\n","| time_elapsed | 1939 |\n","| total_timesteps | 718000 |\n","| train/ | |\n","| entropy_loss | -0.554 |\n","| explained_variance | 0.936 |\n","| learning_rate | 0.0007 |\n","| n_updates | 35899 |\n","| policy_loss | -0.00517 |\n","| std | 0.317 |\n","| value_loss | 0.000629 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.83 |\n","| ep_rew_mean | -0.221 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 36000 |\n","| time_elapsed | 1944 |\n","| total_timesteps | 720000 |\n","| train/ | |\n","| entropy_loss | -0.547 |\n","| explained_variance | 0.951 |\n","| learning_rate | 0.0007 |\n","| n_updates | 35999 |\n","| policy_loss | 0.00271 |\n","| std | 0.317 |\n","| value_loss | 0.000222 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.8 |\n","| ep_rew_mean | -0.226 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 36100 |\n","| time_elapsed | 1950 |\n","| total_timesteps | 722000 |\n","| train/ | |\n","| entropy_loss | -0.533 |\n","| explained_variance | 0.957 |\n","| learning_rate | 0.0007 |\n","| n_updates | 36099 |\n","| policy_loss | -0.00146 |\n","| std | 0.316 |\n","| value_loss | 0.000196 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.217 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 36200 |\n","| time_elapsed | 1956 |\n","| total_timesteps | 724000 |\n","| train/ | |\n","| entropy_loss | -0.51 |\n","| explained_variance | 0.969 |\n","| learning_rate | 0.0007 |\n","| n_updates | 36199 |\n","| policy_loss | 0.0105 |\n","| std | 0.314 |\n","| value_loss | 0.000754 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.214 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 36300 |\n","| time_elapsed | 1961 |\n","| total_timesteps | 726000 |\n","| train/ | |\n","| entropy_loss | -0.486 |\n","| explained_variance | 0.954 |\n","| learning_rate | 0.0007 |\n","| n_updates | 36299 |\n","| policy_loss | 0.00552 |\n","| std | 0.311 |\n","| value_loss | 0.000373 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.75 |\n","| ep_rew_mean | -0.216 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 36400 |\n","| time_elapsed | 1967 |\n","| total_timesteps | 728000 |\n","| train/ | |\n","| entropy_loss | -0.477 |\n","| explained_variance | 0.985 |\n","| learning_rate | 0.0007 |\n","| n_updates | 36399 |\n","| policy_loss | -0.0127 |\n","| std | 0.311 |\n","| value_loss | 0.000318 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.56 |\n","| ep_rew_mean | -0.2 |\n","| time/ | |\n","| fps | 370 |\n","| iterations | 36500 |\n","| time_elapsed | 1972 |\n","| total_timesteps | 730000 |\n","| train/ | |\n","| entropy_loss | -0.467 |\n","| explained_variance | 0.985 |\n","| learning_rate | 0.0007 |\n","| n_updates | 36499 |\n","| policy_loss | 0.00378 |\n","| std | 0.31 |\n","| value_loss | 0.00012 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.216 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 36600 |\n","| time_elapsed | 1979 |\n","| total_timesteps | 732000 |\n","| train/ | |\n","| entropy_loss | -0.473 |\n","| explained_variance | 0.979 |\n","| learning_rate | 0.0007 |\n","| n_updates | 36599 |\n","| policy_loss | 0.00482 |\n","| std | 0.31 |\n","| value_loss | 0.000204 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.7 |\n","| ep_rew_mean | -0.203 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 36700 |\n","| time_elapsed | 1984 |\n","| total_timesteps | 734000 |\n","| train/ | |\n","| entropy_loss | -0.455 |\n","| explained_variance | 0.872 |\n","| learning_rate | 0.0007 |\n","| n_updates | 36699 |\n","| policy_loss | 0.00152 |\n","| std | 0.308 |\n","| value_loss | 0.000986 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.69 |\n","| ep_rew_mean | -0.216 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 36800 |\n","| time_elapsed | 1989 |\n","| total_timesteps | 736000 |\n","| train/ | |\n","| entropy_loss | -0.451 |\n","| explained_variance | 0.977 |\n","| learning_rate | 0.0007 |\n","| n_updates | 36799 |\n","| policy_loss | -0.00245 |\n","| std | 0.308 |\n","| value_loss | 0.000164 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.69 |\n","| ep_rew_mean | -0.206 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 36900 |\n","| time_elapsed | 1995 |\n","| total_timesteps | 738000 |\n","| train/ | |\n","| entropy_loss | -0.44 |\n","| explained_variance | 0.973 |\n","| learning_rate | 0.0007 |\n","| n_updates | 36899 |\n","| policy_loss | -0.0021 |\n","| std | 0.308 |\n","| value_loss | 0.000133 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.64 |\n","| ep_rew_mean | -0.194 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 37000 |\n","| time_elapsed | 2000 |\n","| total_timesteps | 740000 |\n","| train/ | |\n","| entropy_loss | -0.422 |\n","| explained_variance | 0.818 |\n","| learning_rate | 0.0007 |\n","| n_updates | 36999 |\n","| policy_loss | 0.00654 |\n","| std | 0.306 |\n","| value_loss | 0.0013 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.7 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 37100 |\n","| time_elapsed | 2007 |\n","| total_timesteps | 742000 |\n","| train/ | |\n","| entropy_loss | -0.42 |\n","| explained_variance | 0.965 |\n","| learning_rate | 0.0007 |\n","| n_updates | 37099 |\n","| policy_loss | -0.0197 |\n","| std | 0.306 |\n","| value_loss | 0.000427 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.87 |\n","| ep_rew_mean | -0.222 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 37200 |\n","| time_elapsed | 2012 |\n","| total_timesteps | 744000 |\n","| train/ | |\n","| entropy_loss | -0.418 |\n","| explained_variance | 0.738 |\n","| learning_rate | 0.0007 |\n","| n_updates | 37199 |\n","| policy_loss | 0.013 |\n","| std | 0.307 |\n","| value_loss | 0.00267 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.62 |\n","| ep_rew_mean | -0.19 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 37300 |\n","| time_elapsed | 2018 |\n","| total_timesteps | 746000 |\n","| train/ | |\n","| entropy_loss | -0.429 |\n","| explained_variance | 0.949 |\n","| learning_rate | 0.0007 |\n","| n_updates | 37299 |\n","| policy_loss | 0.00391 |\n","| std | 0.307 |\n","| value_loss | 0.000298 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.202 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 37400 |\n","| time_elapsed | 2023 |\n","| total_timesteps | 748000 |\n","| train/ | |\n","| entropy_loss | -0.417 |\n","| explained_variance | 0.96 |\n","| learning_rate | 0.0007 |\n","| n_updates | 37399 |\n","| policy_loss | 0.00301 |\n","| std | 0.306 |\n","| value_loss | 0.000298 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.79 |\n","| ep_rew_mean | -0.222 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 37500 |\n","| time_elapsed | 2029 |\n","| total_timesteps | 750000 |\n","| train/ | |\n","| entropy_loss | -0.41 |\n","| explained_variance | 0.966 |\n","| learning_rate | 0.0007 |\n","| n_updates | 37499 |\n","| policy_loss | 0.00954 |\n","| std | 0.305 |\n","| value_loss | 0.000347 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.71 |\n","| ep_rew_mean | -0.21 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 37600 |\n","| time_elapsed | 2035 |\n","| total_timesteps | 752000 |\n","| train/ | |\n","| entropy_loss | -0.401 |\n","| explained_variance | 0.984 |\n","| learning_rate | 0.0007 |\n","| n_updates | 37599 |\n","| policy_loss | -0.00145 |\n","| std | 0.304 |\n","| value_loss | 0.000125 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.98 |\n","| ep_rew_mean | -0.243 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 37700 |\n","| time_elapsed | 2040 |\n","| total_timesteps | 754000 |\n","| train/ | |\n","| entropy_loss | -0.391 |\n","| explained_variance | 0.986 |\n","| learning_rate | 0.0007 |\n","| n_updates | 37699 |\n","| policy_loss | 0.00078 |\n","| std | 0.303 |\n","| value_loss | 0.000243 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.66 |\n","| ep_rew_mean | -0.203 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 37800 |\n","| time_elapsed | 2046 |\n","| total_timesteps | 756000 |\n","| train/ | |\n","| entropy_loss | -0.378 |\n","| explained_variance | 0.98 |\n","| learning_rate | 0.0007 |\n","| n_updates | 37799 |\n","| policy_loss | 0.00376 |\n","| std | 0.302 |\n","| value_loss | 0.000143 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.72 |\n","| ep_rew_mean | -0.215 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 37900 |\n","| time_elapsed | 2051 |\n","| total_timesteps | 758000 |\n","| train/ | |\n","| entropy_loss | -0.367 |\n","| explained_variance | 0.992 |\n","| learning_rate | 0.0007 |\n","| n_updates | 37899 |\n","| policy_loss | 0.00106 |\n","| std | 0.301 |\n","| value_loss | 7.36e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.91 |\n","| ep_rew_mean | -0.239 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 38000 |\n","| time_elapsed | 2056 |\n","| total_timesteps | 760000 |\n","| train/ | |\n","| entropy_loss | -0.347 |\n","| explained_variance | 0.978 |\n","| learning_rate | 0.0007 |\n","| n_updates | 37999 |\n","| policy_loss | 0.00239 |\n","| std | 0.299 |\n","| value_loss | 0.000211 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.59 |\n","| ep_rew_mean | -0.198 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 38100 |\n","| time_elapsed | 2062 |\n","| total_timesteps | 762000 |\n","| train/ | |\n","| entropy_loss | -0.357 |\n","| explained_variance | 0.99 |\n","| learning_rate | 0.0007 |\n","| n_updates | 38099 |\n","| policy_loss | -0.00176 |\n","| std | 0.301 |\n","| value_loss | 0.000155 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.74 |\n","| ep_rew_mean | -0.212 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 38200 |\n","| time_elapsed | 2066 |\n","| total_timesteps | 764000 |\n","| train/ | |\n","| entropy_loss | -0.34 |\n","| explained_variance | 0.988 |\n","| learning_rate | 0.0007 |\n","| n_updates | 38199 |\n","| policy_loss | -0.00693 |\n","| std | 0.299 |\n","| value_loss | 8.58e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.79 |\n","| ep_rew_mean | -0.227 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 38300 |\n","| time_elapsed | 2073 |\n","| total_timesteps | 766000 |\n","| train/ | |\n","| entropy_loss | -0.335 |\n","| explained_variance | 0.962 |\n","| learning_rate | 0.0007 |\n","| n_updates | 38299 |\n","| policy_loss | 0.00789 |\n","| std | 0.299 |\n","| value_loss | 0.000405 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.69 |\n","| ep_rew_mean | -0.212 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 38400 |\n","| time_elapsed | 2078 |\n","| total_timesteps | 768000 |\n","| train/ | |\n","| entropy_loss | -0.337 |\n","| explained_variance | 0.994 |\n","| learning_rate | 0.0007 |\n","| n_updates | 38399 |\n","| policy_loss | -0.00164 |\n","| std | 0.298 |\n","| value_loss | 7.49e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.64 |\n","| ep_rew_mean | -0.195 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 38500 |\n","| time_elapsed | 2084 |\n","| total_timesteps | 770000 |\n","| train/ | |\n","| entropy_loss | -0.323 |\n","| explained_variance | 0.989 |\n","| learning_rate | 0.0007 |\n","| n_updates | 38499 |\n","| policy_loss | 0.000334 |\n","| std | 0.297 |\n","| value_loss | 7.01e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.7 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 38600 |\n","| time_elapsed | 2089 |\n","| total_timesteps | 772000 |\n","| train/ | |\n","| entropy_loss | -0.314 |\n","| explained_variance | 0.96 |\n","| learning_rate | 0.0007 |\n","| n_updates | 38599 |\n","| policy_loss | 0.00254 |\n","| std | 0.296 |\n","| value_loss | 0.000353 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.83 |\n","| ep_rew_mean | -0.223 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 38700 |\n","| time_elapsed | 2095 |\n","| total_timesteps | 774000 |\n","| train/ | |\n","| entropy_loss | -0.324 |\n","| explained_variance | 0.993 |\n","| learning_rate | 0.0007 |\n","| n_updates | 38699 |\n","| policy_loss | 0.000697 |\n","| std | 0.299 |\n","| value_loss | 0.000159 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.81 |\n","| ep_rew_mean | -0.222 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 38800 |\n","| time_elapsed | 2100 |\n","| total_timesteps | 776000 |\n","| train/ | |\n","| entropy_loss | -0.306 |\n","| explained_variance | 0.974 |\n","| learning_rate | 0.0007 |\n","| n_updates | 38799 |\n","| policy_loss | -0.00347 |\n","| std | 0.298 |\n","| value_loss | 0.000229 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.77 |\n","| ep_rew_mean | -0.214 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 38900 |\n","| time_elapsed | 2105 |\n","| total_timesteps | 778000 |\n","| train/ | |\n","| entropy_loss | -0.318 |\n","| explained_variance | 0.655 |\n","| learning_rate | 0.0007 |\n","| n_updates | 38899 |\n","| policy_loss | 0.000817 |\n","| std | 0.3 |\n","| value_loss | 0.00321 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.69 |\n","| ep_rew_mean | -0.209 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 39000 |\n","| time_elapsed | 2111 |\n","| total_timesteps | 780000 |\n","| train/ | |\n","| entropy_loss | -0.297 |\n","| explained_variance | 0.987 |\n","| learning_rate | 0.0007 |\n","| n_updates | 38999 |\n","| policy_loss | -0.00423 |\n","| std | 0.298 |\n","| value_loss | 7.47e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.71 |\n","| ep_rew_mean | -0.203 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 39100 |\n","| time_elapsed | 2116 |\n","| total_timesteps | 782000 |\n","| train/ | |\n","| entropy_loss | -0.282 |\n","| explained_variance | 0.989 |\n","| learning_rate | 0.0007 |\n","| n_updates | 39099 |\n","| policy_loss | -0.00336 |\n","| std | 0.297 |\n","| value_loss | 8.87e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.75 |\n","| ep_rew_mean | -0.213 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 39200 |\n","| time_elapsed | 2122 |\n","| total_timesteps | 784000 |\n","| train/ | |\n","| entropy_loss | -0.271 |\n","| explained_variance | 0.977 |\n","| learning_rate | 0.0007 |\n","| n_updates | 39199 |\n","| policy_loss | 0.00463 |\n","| std | 0.295 |\n","| value_loss | 0.000164 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.216 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 39300 |\n","| time_elapsed | 2128 |\n","| total_timesteps | 786000 |\n","| train/ | |\n","| entropy_loss | -0.286 |\n","| explained_variance | 0.835 |\n","| learning_rate | 0.0007 |\n","| n_updates | 39299 |\n","| policy_loss | -0.0073 |\n","| std | 0.297 |\n","| value_loss | 0.00105 |\n","------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.231 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 39400 |\n","| time_elapsed | 2133 |\n","| total_timesteps | 788000 |\n","| train/ | |\n","| entropy_loss | -0.26 |\n","| explained_variance | 0.982 |\n","| learning_rate | 0.0007 |\n","| n_updates | 39399 |\n","| policy_loss | -0.000717 |\n","| std | 0.294 |\n","| value_loss | 0.000136 |\n","-------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.66 |\n","| ep_rew_mean | -0.205 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 39500 |\n","| time_elapsed | 2139 |\n","| total_timesteps | 790000 |\n","| train/ | |\n","| entropy_loss | -0.238 |\n","| explained_variance | 0.996 |\n","| learning_rate | 0.0007 |\n","| n_updates | 39499 |\n","| policy_loss | -2.25e-05 |\n","| std | 0.293 |\n","| value_loss | 3.68e-05 |\n","-------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.7 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 39600 |\n","| time_elapsed | 2144 |\n","| total_timesteps | 792000 |\n","| train/ | |\n","| entropy_loss | -0.23 |\n","| explained_variance | 0.963 |\n","| learning_rate | 0.0007 |\n","| n_updates | 39599 |\n","| policy_loss | 0.00124 |\n","| std | 0.292 |\n","| value_loss | 0.000345 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.8 |\n","| ep_rew_mean | -0.221 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 39700 |\n","| time_elapsed | 2151 |\n","| total_timesteps | 794000 |\n","| train/ | |\n","| entropy_loss | -0.223 |\n","| explained_variance | 0.955 |\n","| learning_rate | 0.0007 |\n","| n_updates | 39699 |\n","| policy_loss | -0.00696 |\n","| std | 0.292 |\n","| value_loss | 0.000445 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.77 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 39800 |\n","| time_elapsed | 2156 |\n","| total_timesteps | 796000 |\n","| train/ | |\n","| entropy_loss | -0.202 |\n","| explained_variance | 0.962 |\n","| learning_rate | 0.0007 |\n","| n_updates | 39799 |\n","| policy_loss | 0.00756 |\n","| std | 0.291 |\n","| value_loss | 0.000269 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.62 |\n","| ep_rew_mean | -0.209 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 39900 |\n","| time_elapsed | 2161 |\n","| total_timesteps | 798000 |\n","| train/ | |\n","| entropy_loss | -0.201 |\n","| explained_variance | 0.939 |\n","| learning_rate | 0.0007 |\n","| n_updates | 39899 |\n","| policy_loss | -0.0023 |\n","| std | 0.291 |\n","| value_loss | 0.0004 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.81 |\n","| ep_rew_mean | -0.222 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 40000 |\n","| time_elapsed | 2167 |\n","| total_timesteps | 800000 |\n","| train/ | |\n","| entropy_loss | -0.21 |\n","| explained_variance | 0.982 |\n","| learning_rate | 0.0007 |\n","| n_updates | 39999 |\n","| policy_loss | 0.000684 |\n","| std | 0.292 |\n","| value_loss | 0.000195 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.199 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 40100 |\n","| time_elapsed | 2172 |\n","| total_timesteps | 802000 |\n","| train/ | |\n","| entropy_loss | -0.215 |\n","| explained_variance | 0.958 |\n","| learning_rate | 0.0007 |\n","| n_updates | 40099 |\n","| policy_loss | -0.0137 |\n","| std | 0.292 |\n","| value_loss | 0.000298 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.74 |\n","| ep_rew_mean | -0.206 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 40200 |\n","| time_elapsed | 2178 |\n","| total_timesteps | 804000 |\n","| train/ | |\n","| entropy_loss | -0.215 |\n","| explained_variance | 0.984 |\n","| learning_rate | 0.0007 |\n","| n_updates | 40199 |\n","| policy_loss | 0.00292 |\n","| std | 0.292 |\n","| value_loss | 0.000183 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.94 |\n","| ep_rew_mean | -0.228 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 40300 |\n","| time_elapsed | 2183 |\n","| total_timesteps | 806000 |\n","| train/ | |\n","| entropy_loss | -0.208 |\n","| explained_variance | 0.986 |\n","| learning_rate | 0.0007 |\n","| n_updates | 40299 |\n","| policy_loss | 0.00246 |\n","| std | 0.291 |\n","| value_loss | 8.77e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 40400 |\n","| time_elapsed | 2189 |\n","| total_timesteps | 808000 |\n","| train/ | |\n","| entropy_loss | -0.224 |\n","| explained_variance | 0.963 |\n","| learning_rate | 0.0007 |\n","| n_updates | 40399 |\n","| policy_loss | -0.0059 |\n","| std | 0.293 |\n","| value_loss | 0.0002 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.217 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 40500 |\n","| time_elapsed | 2194 |\n","| total_timesteps | 810000 |\n","| train/ | |\n","| entropy_loss | -0.213 |\n","| explained_variance | 0.992 |\n","| learning_rate | 0.0007 |\n","| n_updates | 40499 |\n","| policy_loss | -0.00346 |\n","| std | 0.292 |\n","| value_loss | 5.02e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.56 |\n","| ep_rew_mean | -0.188 |\n","| time/ | |\n","| fps | 369 |\n","| iterations | 40600 |\n","| time_elapsed | 2199 |\n","| total_timesteps | 812000 |\n","| train/ | |\n","| entropy_loss | -0.2 |\n","| explained_variance | 0.966 |\n","| learning_rate | 0.0007 |\n","| n_updates | 40599 |\n","| policy_loss | -0.0023 |\n","| std | 0.291 |\n","| value_loss | 0.00013 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.7 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 40700 |\n","| time_elapsed | 2207 |\n","| total_timesteps | 814000 |\n","| train/ | |\n","| entropy_loss | -0.196 |\n","| explained_variance | 0.976 |\n","| learning_rate | 0.0007 |\n","| n_updates | 40699 |\n","| policy_loss | 0.00152 |\n","| std | 0.29 |\n","| value_loss | 0.000202 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.69 |\n","| ep_rew_mean | -0.206 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 40800 |\n","| time_elapsed | 2212 |\n","| total_timesteps | 816000 |\n","| train/ | |\n","| entropy_loss | -0.183 |\n","| explained_variance | 0.988 |\n","| learning_rate | 0.0007 |\n","| n_updates | 40799 |\n","| policy_loss | -0.00116 |\n","| std | 0.289 |\n","| value_loss | 7.77e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.57 |\n","| ep_rew_mean | -0.192 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 40900 |\n","| time_elapsed | 2218 |\n","| total_timesteps | 818000 |\n","| train/ | |\n","| entropy_loss | -0.151 |\n","| explained_variance | 0.975 |\n","| learning_rate | 0.0007 |\n","| n_updates | 40899 |\n","| policy_loss | -0.00194 |\n","| std | 0.286 |\n","| value_loss | 0.000199 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.57 |\n","| ep_rew_mean | -0.198 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 41000 |\n","| time_elapsed | 2223 |\n","| total_timesteps | 820000 |\n","| train/ | |\n","| entropy_loss | -0.135 |\n","| explained_variance | 0.988 |\n","| learning_rate | 0.0007 |\n","| n_updates | 40999 |\n","| policy_loss | 0.000996 |\n","| std | 0.285 |\n","| value_loss | 0.000121 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.66 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 41100 |\n","| time_elapsed | 2229 |\n","| total_timesteps | 822000 |\n","| train/ | |\n","| entropy_loss | -0.126 |\n","| explained_variance | 0.985 |\n","| learning_rate | 0.0007 |\n","| n_updates | 41099 |\n","| policy_loss | -0.00212 |\n","| std | 0.284 |\n","| value_loss | 0.000156 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.57 |\n","| ep_rew_mean | -0.199 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 41200 |\n","| time_elapsed | 2234 |\n","| total_timesteps | 824000 |\n","| train/ | |\n","| entropy_loss | -0.126 |\n","| explained_variance | 0.968 |\n","| learning_rate | 0.0007 |\n","| n_updates | 41199 |\n","| policy_loss | -0.00282 |\n","| std | 0.284 |\n","| value_loss | 0.000188 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.96 |\n","| ep_rew_mean | -0.24 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 41300 |\n","| time_elapsed | 2240 |\n","| total_timesteps | 826000 |\n","| train/ | |\n","| entropy_loss | -0.123 |\n","| explained_variance | 0.965 |\n","| learning_rate | 0.0007 |\n","| n_updates | 41299 |\n","| policy_loss | 0.00622 |\n","| std | 0.285 |\n","| value_loss | 0.000636 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.71 |\n","| ep_rew_mean | -0.212 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 41400 |\n","| time_elapsed | 2246 |\n","| total_timesteps | 828000 |\n","| train/ | |\n","| entropy_loss | -0.116 |\n","| explained_variance | 0.966 |\n","| learning_rate | 0.0007 |\n","| n_updates | 41399 |\n","| policy_loss | -0.00257 |\n","| std | 0.285 |\n","| value_loss | 0.000232 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.71 |\n","| ep_rew_mean | -0.215 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 41500 |\n","| time_elapsed | 2251 |\n","| total_timesteps | 830000 |\n","| train/ | |\n","| entropy_loss | -0.113 |\n","| explained_variance | 0.976 |\n","| learning_rate | 0.0007 |\n","| n_updates | 41499 |\n","| policy_loss | 0.00336 |\n","| std | 0.285 |\n","| value_loss | 0.000249 |\n","------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.7 |\n","| ep_rew_mean | -0.205 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 41600 |\n","| time_elapsed | 2257 |\n","| total_timesteps | 832000 |\n","| train/ | |\n","| entropy_loss | -0.115 |\n","| explained_variance | 0.991 |\n","| learning_rate | 0.0007 |\n","| n_updates | 41599 |\n","| policy_loss | -8.09e-06 |\n","| std | 0.286 |\n","| value_loss | 8.17e-05 |\n","-------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.63 |\n","| ep_rew_mean | -0.203 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 41700 |\n","| time_elapsed | 2262 |\n","| total_timesteps | 834000 |\n","| train/ | |\n","| entropy_loss | -0.0992 |\n","| explained_variance | 0.991 |\n","| learning_rate | 0.0007 |\n","| n_updates | 41699 |\n","| policy_loss | -0.00135 |\n","| std | 0.286 |\n","| value_loss | 0.000217 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.9 |\n","| ep_rew_mean | -0.239 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 41800 |\n","| time_elapsed | 2269 |\n","| total_timesteps | 836000 |\n","| train/ | |\n","| entropy_loss | -0.0974 |\n","| explained_variance | 0.988 |\n","| learning_rate | 0.0007 |\n","| n_updates | 41799 |\n","| policy_loss | 0.00715 |\n","| std | 0.286 |\n","| value_loss | 0.000153 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.59 |\n","| ep_rew_mean | -0.194 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 41900 |\n","| time_elapsed | 2274 |\n","| total_timesteps | 838000 |\n","| train/ | |\n","| entropy_loss | -0.0765 |\n","| explained_variance | 0.973 |\n","| learning_rate | 0.0007 |\n","| n_updates | 41899 |\n","| policy_loss | 0.00194 |\n","| std | 0.285 |\n","| value_loss | 0.000179 |\n","------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.77 |\n","| ep_rew_mean | -0.22 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 42000 |\n","| time_elapsed | 2279 |\n","| total_timesteps | 840000 |\n","| train/ | |\n","| entropy_loss | -0.0793 |\n","| explained_variance | 0.979 |\n","| learning_rate | 0.0007 |\n","| n_updates | 41999 |\n","| policy_loss | -0.000296 |\n","| std | 0.286 |\n","| value_loss | 0.000206 |\n","-------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.72 |\n","| ep_rew_mean | -0.203 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 42100 |\n","| time_elapsed | 2285 |\n","| total_timesteps | 842000 |\n","| train/ | |\n","| entropy_loss | -0.0627 |\n","| explained_variance | 0.984 |\n","| learning_rate | 0.0007 |\n","| n_updates | 42099 |\n","| policy_loss | 0.000554 |\n","| std | 0.285 |\n","| value_loss | 0.000218 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.75 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 42200 |\n","| time_elapsed | 2290 |\n","| total_timesteps | 844000 |\n","| train/ | |\n","| entropy_loss | -0.0515 |\n","| explained_variance | 0.988 |\n","| learning_rate | 0.0007 |\n","| n_updates | 42199 |\n","| policy_loss | -0.00696 |\n","| std | 0.285 |\n","| value_loss | 0.000283 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.7 |\n","| ep_rew_mean | -0.217 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 42300 |\n","| time_elapsed | 2296 |\n","| total_timesteps | 846000 |\n","| train/ | |\n","| entropy_loss | -0.0408 |\n","| explained_variance | 0.979 |\n","| learning_rate | 0.0007 |\n","| n_updates | 42299 |\n","| policy_loss | -0.00423 |\n","| std | 0.283 |\n","| value_loss | 0.000156 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.75 |\n","| ep_rew_mean | -0.221 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 42400 |\n","| time_elapsed | 2302 |\n","| total_timesteps | 848000 |\n","| train/ | |\n","| entropy_loss | -0.0206 |\n","| explained_variance | 0.863 |\n","| learning_rate | 0.0007 |\n","| n_updates | 42399 |\n","| policy_loss | -0.012 |\n","| std | 0.281 |\n","| value_loss | 0.00221 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.86 |\n","| ep_rew_mean | -0.219 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 42500 |\n","| time_elapsed | 2307 |\n","| total_timesteps | 850000 |\n","| train/ | |\n","| entropy_loss | -0.0021 |\n","| explained_variance | 0.982 |\n","| learning_rate | 0.0007 |\n","| n_updates | 42499 |\n","| policy_loss | -0.00534 |\n","| std | 0.279 |\n","| value_loss | 0.000232 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.64 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 42600 |\n","| time_elapsed | 2313 |\n","| total_timesteps | 852000 |\n","| train/ | |\n","| entropy_loss | -0.0125 |\n","| explained_variance | 0.98 |\n","| learning_rate | 0.0007 |\n","| n_updates | 42599 |\n","| policy_loss | 0.00384 |\n","| std | 0.281 |\n","| value_loss | 0.000232 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.72 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 42700 |\n","| time_elapsed | 2318 |\n","| total_timesteps | 854000 |\n","| train/ | |\n","| entropy_loss | 0.00768 |\n","| explained_variance | 0.948 |\n","| learning_rate | 0.0007 |\n","| n_updates | 42699 |\n","| policy_loss | -0.00122 |\n","| std | 0.279 |\n","| value_loss | 0.000569 |\n","------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.62 |\n","| ep_rew_mean | -0.209 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 42800 |\n","| time_elapsed | 2324 |\n","| total_timesteps | 856000 |\n","| train/ | |\n","| entropy_loss | 0.0185 |\n","| explained_variance | 0.988 |\n","| learning_rate | 0.0007 |\n","| n_updates | 42799 |\n","| policy_loss | -0.000285 |\n","| std | 0.279 |\n","| value_loss | 8.38e-05 |\n","-------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.74 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 42900 |\n","| time_elapsed | 2330 |\n","| total_timesteps | 858000 |\n","| train/ | |\n","| entropy_loss | 0.0162 |\n","| explained_variance | 0.963 |\n","| learning_rate | 0.0007 |\n","| n_updates | 42899 |\n","| policy_loss | 0.00484 |\n","| std | 0.28 |\n","| value_loss | 0.000399 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.65 |\n","| ep_rew_mean | -0.202 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 43000 |\n","| time_elapsed | 2336 |\n","| total_timesteps | 860000 |\n","| train/ | |\n","| entropy_loss | 0.0316 |\n","| explained_variance | 0.978 |\n","| learning_rate | 0.0007 |\n","| n_updates | 42999 |\n","| policy_loss | 0.00017 |\n","| std | 0.28 |\n","| value_loss | 0.000129 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.82 |\n","| ep_rew_mean | -0.225 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 43100 |\n","| time_elapsed | 2341 |\n","| total_timesteps | 862000 |\n","| train/ | |\n","| entropy_loss | 0.0497 |\n","| explained_variance | 0.916 |\n","| learning_rate | 0.0007 |\n","| n_updates | 43099 |\n","| policy_loss | 0.00899 |\n","| std | 0.278 |\n","| value_loss | 0.000573 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.58 |\n","| ep_rew_mean | -0.196 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 43200 |\n","| time_elapsed | 2346 |\n","| total_timesteps | 864000 |\n","| train/ | |\n","| entropy_loss | 0.0705 |\n","| explained_variance | 0.97 |\n","| learning_rate | 0.0007 |\n","| n_updates | 43199 |\n","| policy_loss | -0.0026 |\n","| std | 0.276 |\n","| value_loss | 0.000282 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.69 |\n","| ep_rew_mean | -0.203 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 43300 |\n","| time_elapsed | 2351 |\n","| total_timesteps | 866000 |\n","| train/ | |\n","| entropy_loss | 0.0972 |\n","| explained_variance | 0.979 |\n","| learning_rate | 0.0007 |\n","| n_updates | 43299 |\n","| policy_loss | 0.00171 |\n","| std | 0.273 |\n","| value_loss | 0.000169 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.74 |\n","| ep_rew_mean | -0.213 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 43400 |\n","| time_elapsed | 2356 |\n","| total_timesteps | 868000 |\n","| train/ | |\n","| entropy_loss | 0.103 |\n","| explained_variance | 0.983 |\n","| learning_rate | 0.0007 |\n","| n_updates | 43399 |\n","| policy_loss | -0.00518 |\n","| std | 0.272 |\n","| value_loss | 0.000144 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.75 |\n","| ep_rew_mean | -0.21 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 43500 |\n","| time_elapsed | 2362 |\n","| total_timesteps | 870000 |\n","| train/ | |\n","| entropy_loss | 0.124 |\n","| explained_variance | 0.74 |\n","| learning_rate | 0.0007 |\n","| n_updates | 43499 |\n","| policy_loss | 0.0128 |\n","| std | 0.27 |\n","| value_loss | 0.00289 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.53 |\n","| ep_rew_mean | -0.185 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 43600 |\n","| time_elapsed | 2367 |\n","| total_timesteps | 872000 |\n","| train/ | |\n","| entropy_loss | 0.14 |\n","| explained_variance | 0.982 |\n","| learning_rate | 0.0007 |\n","| n_updates | 43599 |\n","| policy_loss | 4.74e-05 |\n","| std | 0.269 |\n","| value_loss | 0.000178 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 43700 |\n","| time_elapsed | 2373 |\n","| total_timesteps | 874000 |\n","| train/ | |\n","| entropy_loss | 0.15 |\n","| explained_variance | 0.87 |\n","| learning_rate | 0.0007 |\n","| n_updates | 43699 |\n","| policy_loss | -0.00248 |\n","| std | 0.269 |\n","| value_loss | 0.00118 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.208 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 43800 |\n","| time_elapsed | 2377 |\n","| total_timesteps | 876000 |\n","| train/ | |\n","| entropy_loss | 0.153 |\n","| explained_variance | 0.987 |\n","| learning_rate | 0.0007 |\n","| n_updates | 43799 |\n","| policy_loss | 0.000131 |\n","| std | 0.269 |\n","| value_loss | 0.000181 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.227 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 43900 |\n","| time_elapsed | 2383 |\n","| total_timesteps | 878000 |\n","| train/ | |\n","| entropy_loss | 0.166 |\n","| explained_variance | 0.984 |\n","| learning_rate | 0.0007 |\n","| n_updates | 43899 |\n","| policy_loss | -0.0044 |\n","| std | 0.268 |\n","| value_loss | 0.00018 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.71 |\n","| ep_rew_mean | -0.21 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 44000 |\n","| time_elapsed | 2388 |\n","| total_timesteps | 880000 |\n","| train/ | |\n","| entropy_loss | 0.171 |\n","| explained_variance | 0.976 |\n","| learning_rate | 0.0007 |\n","| n_updates | 43999 |\n","| policy_loss | -0.00274 |\n","| std | 0.268 |\n","| value_loss | 0.000212 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.91 |\n","| ep_rew_mean | -0.226 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 44100 |\n","| time_elapsed | 2394 |\n","| total_timesteps | 882000 |\n","| train/ | |\n","| entropy_loss | 0.18 |\n","| explained_variance | 0.989 |\n","| learning_rate | 0.0007 |\n","| n_updates | 44099 |\n","| policy_loss | 0.00268 |\n","| std | 0.268 |\n","| value_loss | 0.000135 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.73 |\n","| ep_rew_mean | -0.206 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 44200 |\n","| time_elapsed | 2400 |\n","| total_timesteps | 884000 |\n","| train/ | |\n","| entropy_loss | 0.187 |\n","| explained_variance | 0.984 |\n","| learning_rate | 0.0007 |\n","| n_updates | 44199 |\n","| policy_loss | 0.00523 |\n","| std | 0.268 |\n","| value_loss | 0.000261 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.69 |\n","| ep_rew_mean | -0.201 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 44300 |\n","| time_elapsed | 2404 |\n","| total_timesteps | 886000 |\n","| train/ | |\n","| entropy_loss | 0.193 |\n","| explained_variance | 0.991 |\n","| learning_rate | 0.0007 |\n","| n_updates | 44299 |\n","| policy_loss | -0.00104 |\n","| std | 0.268 |\n","| value_loss | 5.67e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.99 |\n","| ep_rew_mean | -0.227 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 44400 |\n","| time_elapsed | 2410 |\n","| total_timesteps | 888000 |\n","| train/ | |\n","| entropy_loss | 0.184 |\n","| explained_variance | 0.795 |\n","| learning_rate | 0.0007 |\n","| n_updates | 44399 |\n","| policy_loss | 0.00176 |\n","| std | 0.269 |\n","| value_loss | 0.000957 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.73 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 44500 |\n","| time_elapsed | 2415 |\n","| total_timesteps | 890000 |\n","| train/ | |\n","| entropy_loss | 0.187 |\n","| explained_variance | 0.955 |\n","| learning_rate | 0.0007 |\n","| n_updates | 44499 |\n","| policy_loss | -0.0046 |\n","| std | 0.269 |\n","| value_loss | 0.000338 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.21 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 44600 |\n","| time_elapsed | 2421 |\n","| total_timesteps | 892000 |\n","| train/ | |\n","| entropy_loss | 0.184 |\n","| explained_variance | 0.949 |\n","| learning_rate | 0.0007 |\n","| n_updates | 44599 |\n","| policy_loss | 0.00029 |\n","| std | 0.269 |\n","| value_loss | 0.000338 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.87 |\n","| ep_rew_mean | -0.234 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 44700 |\n","| time_elapsed | 2426 |\n","| total_timesteps | 894000 |\n","| train/ | |\n","| entropy_loss | 0.182 |\n","| explained_variance | 0.801 |\n","| learning_rate | 0.0007 |\n","| n_updates | 44699 |\n","| policy_loss | -0.0241 |\n","| std | 0.27 |\n","| value_loss | 0.00115 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.84 |\n","| ep_rew_mean | -0.225 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 44800 |\n","| time_elapsed | 2432 |\n","| total_timesteps | 896000 |\n","| train/ | |\n","| entropy_loss | 0.196 |\n","| explained_variance | 0.983 |\n","| learning_rate | 0.0007 |\n","| n_updates | 44799 |\n","| policy_loss | -0.00151 |\n","| std | 0.269 |\n","| value_loss | 0.00019 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.61 |\n","| ep_rew_mean | -0.202 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 44900 |\n","| time_elapsed | 2437 |\n","| total_timesteps | 898000 |\n","| train/ | |\n","| entropy_loss | 0.185 |\n","| explained_variance | 0.985 |\n","| learning_rate | 0.0007 |\n","| n_updates | 44899 |\n","| policy_loss | -0.0031 |\n","| std | 0.271 |\n","| value_loss | 0.000298 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.213 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 45000 |\n","| time_elapsed | 2442 |\n","| total_timesteps | 900000 |\n","| train/ | |\n","| entropy_loss | 0.198 |\n","| explained_variance | 0.987 |\n","| learning_rate | 0.0007 |\n","| n_updates | 44999 |\n","| policy_loss | 0.00106 |\n","| std | 0.271 |\n","| value_loss | 0.000171 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.203 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 45100 |\n","| time_elapsed | 2448 |\n","| total_timesteps | 902000 |\n","| train/ | |\n","| entropy_loss | 0.225 |\n","| explained_variance | 0.977 |\n","| learning_rate | 0.0007 |\n","| n_updates | 45099 |\n","| policy_loss | 0.00321 |\n","| std | 0.269 |\n","| value_loss | 0.000174 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.72 |\n","| ep_rew_mean | -0.22 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 45200 |\n","| time_elapsed | 2453 |\n","| total_timesteps | 904000 |\n","| train/ | |\n","| entropy_loss | 0.24 |\n","| explained_variance | 0.952 |\n","| learning_rate | 0.0007 |\n","| n_updates | 45199 |\n","| policy_loss | -0.00208 |\n","| std | 0.267 |\n","| value_loss | 0.000242 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.66 |\n","| ep_rew_mean | -0.2 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 45300 |\n","| time_elapsed | 2459 |\n","| total_timesteps | 906000 |\n","| train/ | |\n","| entropy_loss | 0.271 |\n","| explained_variance | 0.983 |\n","| learning_rate | 0.0007 |\n","| n_updates | 45299 |\n","| policy_loss | -0.00553 |\n","| std | 0.265 |\n","| value_loss | 0.000167 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.67 |\n","| ep_rew_mean | -0.204 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 45400 |\n","| time_elapsed | 2463 |\n","| total_timesteps | 908000 |\n","| train/ | |\n","| entropy_loss | 0.269 |\n","| explained_variance | 0.992 |\n","| learning_rate | 0.0007 |\n","| n_updates | 45399 |\n","| policy_loss | 0.00133 |\n","| std | 0.266 |\n","| value_loss | 0.000363 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.8 |\n","| ep_rew_mean | -0.214 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 45500 |\n","| time_elapsed | 2469 |\n","| total_timesteps | 910000 |\n","| train/ | |\n","| entropy_loss | 0.276 |\n","| explained_variance | 0.964 |\n","| learning_rate | 0.0007 |\n","| n_updates | 45499 |\n","| policy_loss | -0.00835 |\n","| std | 0.265 |\n","| value_loss | 0.000288 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.76 |\n","| ep_rew_mean | -0.21 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 45600 |\n","| time_elapsed | 2474 |\n","| total_timesteps | 912000 |\n","| train/ | |\n","| entropy_loss | 0.268 |\n","| explained_variance | 0.988 |\n","| learning_rate | 0.0007 |\n","| n_updates | 45599 |\n","| policy_loss | -0.00317 |\n","| std | 0.267 |\n","| value_loss | 0.000172 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.79 |\n","| ep_rew_mean | -0.21 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 45700 |\n","| time_elapsed | 2479 |\n","| total_timesteps | 914000 |\n","| train/ | |\n","| entropy_loss | 0.268 |\n","| explained_variance | 0.964 |\n","| learning_rate | 0.0007 |\n","| n_updates | 45699 |\n","| policy_loss | 0.00148 |\n","| std | 0.267 |\n","| value_loss | 0.000399 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.83 |\n","| ep_rew_mean | -0.218 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 45800 |\n","| time_elapsed | 2485 |\n","| total_timesteps | 916000 |\n","| train/ | |\n","| entropy_loss | 0.279 |\n","| explained_variance | 0.91 |\n","| learning_rate | 0.0007 |\n","| n_updates | 45799 |\n","| policy_loss | 0.0106 |\n","| std | 0.267 |\n","| value_loss | 0.000879 |\n","------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.54 |\n","| ep_rew_mean | -0.194 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 45900 |\n","| time_elapsed | 2490 |\n","| total_timesteps | 918000 |\n","| train/ | |\n","| entropy_loss | 0.262 |\n","| explained_variance | 0.988 |\n","| learning_rate | 0.0007 |\n","| n_updates | 45899 |\n","| policy_loss | -0.000651 |\n","| std | 0.268 |\n","| value_loss | 0.000179 |\n","-------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.214 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 46000 |\n","| time_elapsed | 2496 |\n","| total_timesteps | 920000 |\n","| train/ | |\n","| entropy_loss | 0.262 |\n","| explained_variance | 0.986 |\n","| learning_rate | 0.0007 |\n","| n_updates | 45999 |\n","| policy_loss | -0.00303 |\n","| std | 0.27 |\n","| value_loss | 0.000166 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.79 |\n","| ep_rew_mean | -0.211 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 46100 |\n","| time_elapsed | 2500 |\n","| total_timesteps | 922000 |\n","| train/ | |\n","| entropy_loss | 0.282 |\n","| explained_variance | 0.978 |\n","| learning_rate | 0.0007 |\n","| n_updates | 46099 |\n","| policy_loss | -0.00638 |\n","| std | 0.268 |\n","| value_loss | 0.000137 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.61 |\n","| ep_rew_mean | -0.193 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 46200 |\n","| time_elapsed | 2506 |\n","| total_timesteps | 924000 |\n","| train/ | |\n","| entropy_loss | 0.304 |\n","| explained_variance | 0.983 |\n","| learning_rate | 0.0007 |\n","| n_updates | 46199 |\n","| policy_loss | 0.00416 |\n","| std | 0.266 |\n","| value_loss | 0.000145 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.86 |\n","| ep_rew_mean | -0.223 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 46300 |\n","| time_elapsed | 2511 |\n","| total_timesteps | 926000 |\n","| train/ | |\n","| entropy_loss | 0.289 |\n","| explained_variance | 0.969 |\n","| learning_rate | 0.0007 |\n","| n_updates | 46299 |\n","| policy_loss | 0.00131 |\n","| std | 0.267 |\n","| value_loss | 0.000178 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.83 |\n","| ep_rew_mean | -0.212 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 46400 |\n","| time_elapsed | 2516 |\n","| total_timesteps | 928000 |\n","| train/ | |\n","| entropy_loss | 0.302 |\n","| explained_variance | 0.982 |\n","| learning_rate | 0.0007 |\n","| n_updates | 46399 |\n","| policy_loss | 0.00128 |\n","| std | 0.266 |\n","| value_loss | 0.000431 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.59 |\n","| ep_rew_mean | -0.194 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 46500 |\n","| time_elapsed | 2522 |\n","| total_timesteps | 930000 |\n","| train/ | |\n","| entropy_loss | 0.304 |\n","| explained_variance | 0.961 |\n","| learning_rate | 0.0007 |\n","| n_updates | 46499 |\n","| policy_loss | -0.0111 |\n","| std | 0.266 |\n","| value_loss | 0.000262 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.71 |\n","| ep_rew_mean | -0.212 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 46600 |\n","| time_elapsed | 2527 |\n","| total_timesteps | 932000 |\n","| train/ | |\n","| entropy_loss | 0.308 |\n","| explained_variance | 0.945 |\n","| learning_rate | 0.0007 |\n","| n_updates | 46599 |\n","| policy_loss | 0.00992 |\n","| std | 0.266 |\n","| value_loss | 0.000554 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.77 |\n","| ep_rew_mean | -0.222 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 46700 |\n","| time_elapsed | 2533 |\n","| total_timesteps | 934000 |\n","| train/ | |\n","| entropy_loss | 0.304 |\n","| explained_variance | 0.97 |\n","| learning_rate | 0.0007 |\n","| n_updates | 46699 |\n","| policy_loss | -0.00132 |\n","| std | 0.266 |\n","| value_loss | 0.000187 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.74 |\n","| ep_rew_mean | -0.22 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 46800 |\n","| time_elapsed | 2538 |\n","| total_timesteps | 936000 |\n","| train/ | |\n","| entropy_loss | 0.32 |\n","| explained_variance | 0.981 |\n","| learning_rate | 0.0007 |\n","| n_updates | 46799 |\n","| policy_loss | -0.00506 |\n","| std | 0.264 |\n","| value_loss | 0.000267 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.57 |\n","| ep_rew_mean | -0.208 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 46900 |\n","| time_elapsed | 2543 |\n","| total_timesteps | 938000 |\n","| train/ | |\n","| entropy_loss | 0.336 |\n","| explained_variance | 0.987 |\n","| learning_rate | 0.0007 |\n","| n_updates | 46899 |\n","| policy_loss | -0.00688 |\n","| std | 0.263 |\n","| value_loss | 0.00018 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.21 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 47000 |\n","| time_elapsed | 2549 |\n","| total_timesteps | 940000 |\n","| train/ | |\n","| entropy_loss | 0.341 |\n","| explained_variance | 0.976 |\n","| learning_rate | 0.0007 |\n","| n_updates | 46999 |\n","| policy_loss | -0.0197 |\n","| std | 0.262 |\n","| value_loss | 0.000537 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.68 |\n","| ep_rew_mean | -0.208 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 47100 |\n","| time_elapsed | 2554 |\n","| total_timesteps | 942000 |\n","| train/ | |\n","| entropy_loss | 0.343 |\n","| explained_variance | 0.983 |\n","| learning_rate | 0.0007 |\n","| n_updates | 47099 |\n","| policy_loss | -0.00182 |\n","| std | 0.263 |\n","| value_loss | 0.000144 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.6 |\n","| ep_rew_mean | -0.205 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 47200 |\n","| time_elapsed | 2559 |\n","| total_timesteps | 944000 |\n","| train/ | |\n","| entropy_loss | 0.351 |\n","| explained_variance | 0.991 |\n","| learning_rate | 0.0007 |\n","| n_updates | 47199 |\n","| policy_loss | 0.00507 |\n","| std | 0.262 |\n","| value_loss | 9.76e-05 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.74 |\n","| ep_rew_mean | -0.213 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 47300 |\n","| time_elapsed | 2564 |\n","| total_timesteps | 946000 |\n","| train/ | |\n","| entropy_loss | 0.367 |\n","| explained_variance | 0.972 |\n","| learning_rate | 0.0007 |\n","| n_updates | 47299 |\n","| policy_loss | -0.00172 |\n","| std | 0.261 |\n","| value_loss | 0.00016 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.75 |\n","| ep_rew_mean | -0.218 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 47400 |\n","| time_elapsed | 2570 |\n","| total_timesteps | 948000 |\n","| train/ | |\n","| entropy_loss | 0.37 |\n","| explained_variance | 0.888 |\n","| learning_rate | 0.0007 |\n","| n_updates | 47399 |\n","| policy_loss | 0.102 |\n","| std | 0.261 |\n","| value_loss | 0.0469 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 5.1 |\n","| ep_rew_mean | -0.472 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 47500 |\n","| time_elapsed | 2575 |\n","| total_timesteps | 950000 |\n","| train/ | |\n","| entropy_loss | 0.389 |\n","| explained_variance | 0.975 |\n","| learning_rate | 0.0007 |\n","| n_updates | 47499 |\n","| policy_loss | -0.072 |\n","| std | 0.259 |\n","| value_loss | 0.072 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 5.64 |\n","| ep_rew_mean | -0.586 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 47600 |\n","| time_elapsed | 2581 |\n","| total_timesteps | 952000 |\n","| train/ | |\n","| entropy_loss | 0.376 |\n","| explained_variance | 0.71 |\n","| learning_rate | 0.0007 |\n","| n_updates | 47599 |\n","| policy_loss | -0.294 |\n","| std | 0.259 |\n","| value_loss | 2.33 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 6.25 |\n","| ep_rew_mean | -0.629 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 47700 |\n","| time_elapsed | 2586 |\n","| total_timesteps | 954000 |\n","| train/ | |\n","| entropy_loss | 0.381 |\n","| explained_variance | 0.967 |\n","| learning_rate | 0.0007 |\n","| n_updates | 47699 |\n","| policy_loss | 0.257 |\n","| std | 0.259 |\n","| value_loss | 0.465 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 5.3 |\n","| ep_rew_mean | -0.563 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 47800 |\n","| time_elapsed | 2592 |\n","| total_timesteps | 956000 |\n","| train/ | |\n","| entropy_loss | 0.394 |\n","| explained_variance | 0.963 |\n","| learning_rate | 0.0007 |\n","| n_updates | 47799 |\n","| policy_loss | 0.666 |\n","| std | 0.257 |\n","| value_loss | 0.873 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 4.31 |\n","| ep_rew_mean | -0.342 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 47900 |\n","| time_elapsed | 2598 |\n","| total_timesteps | 958000 |\n","| train/ | |\n","| entropy_loss | 0.421 |\n","| explained_variance | -25.6 |\n","| learning_rate | 0.0007 |\n","| n_updates | 47899 |\n","| policy_loss | 0.537 |\n","| std | 0.254 |\n","| value_loss | 4.94 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.96 |\n","| ep_rew_mean | -0.317 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 48000 |\n","| time_elapsed | 2603 |\n","| total_timesteps | 960000 |\n","| train/ | |\n","| entropy_loss | 0.439 |\n","| explained_variance | 0.44 |\n","| learning_rate | 0.0007 |\n","| n_updates | 47999 |\n","| policy_loss | -0.0174 |\n","| std | 0.253 |\n","| value_loss | 0.0244 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.92 |\n","| ep_rew_mean | -0.233 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 48100 |\n","| time_elapsed | 2609 |\n","| total_timesteps | 962000 |\n","| train/ | |\n","| entropy_loss | 0.452 |\n","| explained_variance | 0.72 |\n","| learning_rate | 0.0007 |\n","| n_updates | 48099 |\n","| policy_loss | 0.0104 |\n","| std | 0.252 |\n","| value_loss | 0.0267 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.8 |\n","| ep_rew_mean | -0.224 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 48200 |\n","| time_elapsed | 2613 |\n","| total_timesteps | 964000 |\n","| train/ | |\n","| entropy_loss | 0.464 |\n","| explained_variance | -0.27 |\n","| learning_rate | 0.0007 |\n","| n_updates | 48199 |\n","| policy_loss | -0.0121 |\n","| std | 0.251 |\n","| value_loss | 0.0119 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.86 |\n","| ep_rew_mean | -0.228 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 48300 |\n","| time_elapsed | 2619 |\n","| total_timesteps | 966000 |\n","| train/ | |\n","| entropy_loss | 0.471 |\n","| explained_variance | 0.367 |\n","| learning_rate | 0.0007 |\n","| n_updates | 48299 |\n","| policy_loss | 0.0417 |\n","| std | 0.25 |\n","| value_loss | 0.0082 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3.01 |\n","| ep_rew_mean | -0.238 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 48400 |\n","| time_elapsed | 2624 |\n","| total_timesteps | 968000 |\n","| train/ | |\n","| entropy_loss | 0.476 |\n","| explained_variance | 0.0691 |\n","| learning_rate | 0.0007 |\n","| n_updates | 48399 |\n","| policy_loss | 0.0139 |\n","| std | 0.25 |\n","| value_loss | 0.0138 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 3 |\n","| ep_rew_mean | -0.234 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 48500 |\n","| time_elapsed | 2630 |\n","| total_timesteps | 970000 |\n","| train/ | |\n","| entropy_loss | 0.458 |\n","| explained_variance | 0.69 |\n","| learning_rate | 0.0007 |\n","| n_updates | 48499 |\n","| policy_loss | -0.00865 |\n","| std | 0.252 |\n","| value_loss | 0.00166 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.91 |\n","| ep_rew_mean | -0.227 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 48600 |\n","| time_elapsed | 2635 |\n","| total_timesteps | 972000 |\n","| train/ | |\n","| entropy_loss | 0.471 |\n","| explained_variance | 0.68 |\n","| learning_rate | 0.0007 |\n","| n_updates | 48599 |\n","| policy_loss | -0.00833 |\n","| std | 0.251 |\n","| value_loss | 0.00228 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.92 |\n","| ep_rew_mean | -0.233 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 48700 |\n","| time_elapsed | 2640 |\n","| total_timesteps | 974000 |\n","| train/ | |\n","| entropy_loss | 0.488 |\n","| explained_variance | 0.727 |\n","| learning_rate | 0.0007 |\n","| n_updates | 48699 |\n","| policy_loss | 0.00807 |\n","| std | 0.25 |\n","| value_loss | 0.00116 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.73 |\n","| ep_rew_mean | -0.217 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 48800 |\n","| time_elapsed | 2646 |\n","| total_timesteps | 976000 |\n","| train/ | |\n","| entropy_loss | 0.49 |\n","| explained_variance | 0.806 |\n","| learning_rate | 0.0007 |\n","| n_updates | 48799 |\n","| policy_loss | 0.0138 |\n","| std | 0.25 |\n","| value_loss | 0.00104 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.87 |\n","| ep_rew_mean | -0.228 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 48900 |\n","| time_elapsed | 2650 |\n","| total_timesteps | 978000 |\n","| train/ | |\n","| entropy_loss | 0.475 |\n","| explained_variance | 0.828 |\n","| learning_rate | 0.0007 |\n","| n_updates | 48899 |\n","| policy_loss | 0.0108 |\n","| std | 0.25 |\n","| value_loss | 0.00147 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.218 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 49000 |\n","| time_elapsed | 2656 |\n","| total_timesteps | 980000 |\n","| train/ | |\n","| entropy_loss | 0.477 |\n","| explained_variance | 0.85 |\n","| learning_rate | 0.0007 |\n","| n_updates | 48999 |\n","| policy_loss | 0.00286 |\n","| std | 0.25 |\n","| value_loss | 0.00109 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.8 |\n","| ep_rew_mean | -0.222 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 49100 |\n","| time_elapsed | 2661 |\n","| total_timesteps | 982000 |\n","| train/ | |\n","| entropy_loss | 0.48 |\n","| explained_variance | 0.967 |\n","| learning_rate | 0.0007 |\n","| n_updates | 49099 |\n","| policy_loss | -0.00159 |\n","| std | 0.25 |\n","| value_loss | 0.000527 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.98 |\n","| ep_rew_mean | -0.233 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 49200 |\n","| time_elapsed | 2667 |\n","| total_timesteps | 984000 |\n","| train/ | |\n","| entropy_loss | 0.493 |\n","| explained_variance | 0.943 |\n","| learning_rate | 0.0007 |\n","| n_updates | 49199 |\n","| policy_loss | 0.00435 |\n","| std | 0.249 |\n","| value_loss | 0.000571 |\n","------------------------------------\n","-------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.84 |\n","| ep_rew_mean | -0.222 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 49300 |\n","| time_elapsed | 2672 |\n","| total_timesteps | 986000 |\n","| train/ | |\n","| entropy_loss | 0.486 |\n","| explained_variance | 0.918 |\n","| learning_rate | 0.0007 |\n","| n_updates | 49299 |\n","| policy_loss | -0.000171 |\n","| std | 0.25 |\n","| value_loss | 0.000799 |\n","-------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.65 |\n","| ep_rew_mean | -0.204 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 49400 |\n","| time_elapsed | 2677 |\n","| total_timesteps | 988000 |\n","| train/ | |\n","| entropy_loss | 0.486 |\n","| explained_variance | 0.969 |\n","| learning_rate | 0.0007 |\n","| n_updates | 49399 |\n","| policy_loss | -0.00756 |\n","| std | 0.251 |\n","| value_loss | 0.000684 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.71 |\n","| ep_rew_mean | -0.206 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 49500 |\n","| time_elapsed | 2683 |\n","| total_timesteps | 990000 |\n","| train/ | |\n","| entropy_loss | 0.488 |\n","| explained_variance | 0.949 |\n","| learning_rate | 0.0007 |\n","| n_updates | 49499 |\n","| policy_loss | -0.0265 |\n","| std | 0.251 |\n","| value_loss | 0.000959 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.75 |\n","| ep_rew_mean | -0.223 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 49600 |\n","| time_elapsed | 2689 |\n","| total_timesteps | 992000 |\n","| train/ | |\n","| entropy_loss | 0.494 |\n","| explained_variance | 0.971 |\n","| learning_rate | 0.0007 |\n","| n_updates | 49599 |\n","| policy_loss | -0.00128 |\n","| std | 0.25 |\n","| value_loss | 0.000325 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.73 |\n","| ep_rew_mean | -0.209 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 49700 |\n","| time_elapsed | 2695 |\n","| total_timesteps | 994000 |\n","| train/ | |\n","| entropy_loss | 0.497 |\n","| explained_variance | 0.926 |\n","| learning_rate | 0.0007 |\n","| n_updates | 49699 |\n","| policy_loss | -0.00763 |\n","| std | 0.25 |\n","| value_loss | 0.000821 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.7 |\n","| ep_rew_mean | -0.209 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 49800 |\n","| time_elapsed | 2700 |\n","| total_timesteps | 996000 |\n","| train/ | |\n","| entropy_loss | 0.501 |\n","| explained_variance | 0.953 |\n","| learning_rate | 0.0007 |\n","| n_updates | 49799 |\n","| policy_loss | 0.00254 |\n","| std | 0.249 |\n","| value_loss | 0.000549 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.78 |\n","| ep_rew_mean | -0.207 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 49900 |\n","| time_elapsed | 2706 |\n","| total_timesteps | 998000 |\n","| train/ | |\n","| entropy_loss | 0.514 |\n","| explained_variance | 0.943 |\n","| learning_rate | 0.0007 |\n","| n_updates | 49899 |\n","| policy_loss | -0.0087 |\n","| std | 0.248 |\n","| value_loss | 0.000554 |\n","------------------------------------\n","------------------------------------\n","| rollout/ | |\n","| ep_len_mean | 2.62 |\n","| ep_rew_mean | -0.197 |\n","| time/ | |\n","| fps | 368 |\n","| iterations | 50000 |\n","| time_elapsed | 2711 |\n","| total_timesteps | 1000000 |\n","| train/ | |\n","| entropy_loss | 0.51 |\n","| explained_variance | 0.94 |\n","| learning_rate | 0.0007 |\n","| n_updates | 49999 |\n","| policy_loss | 0.0144 |\n","| std | 0.249 |\n","| value_loss | 0.000603 |\n","------------------------------------\n"]},{"data":{"text/plain":[""]},"execution_count":12,"metadata":{},"output_type":"execute_result"}],"source":["model.learn(1_000_000)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"MfYtjj19cKFr"},"outputs":[],"source":["# Save the model and VecNormalize statistics when saving the agent\n","model.save(\"a2c-PandaReachDense-v3\")\n","env.save(\"vec_normalize.pkl\")"]},{"cell_type":"markdown","metadata":{"id":"01M9GCd32Ig-"},"source":["### Evaluate the agent ๐Ÿ“ˆ\n","- Now that's our agent is trained, we need to **check its performance**.\n","- Stable-Baselines3 provides a method to do that: `evaluate_policy`"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":400,"status":"ok","timestamp":1697797344047,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"liirTVoDkHq3","outputId":"eb7de6dc-2b1b-426b-c0d8-f886e0305c2b"},"outputs":[{"name":"stdout","output_type":"stream","text":["Mean reward = -0.22 +/- 0.11\n"]},{"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/evaluation.py:67: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n"," warnings.warn(\n"]}],"source":["from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize\n","\n","# Load the saved statistics\n","eval_env = DummyVecEnv([lambda: gym.make(\"PandaReachDense-v3\")])\n","eval_env = VecNormalize.load(\"vec_normalize.pkl\", eval_env)\n","\n","# We need to override the render_mode\n","eval_env.render_mode = \"rgb_array\"\n","\n","# do not update them at test time\n","eval_env.training = False\n","# reward normalization is not needed at test time\n","eval_env.norm_reward = False\n","\n","# Load the agent\n","model = A2C.load(\"a2c-PandaReachDense-v3\")\n","\n","mean_reward, std_reward = evaluate_policy(model, eval_env)\n","\n","print(f\"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}\")"]},{"cell_type":"markdown","metadata":{"id":"44L9LVQaavR8"},"source":["### Publish your trained model on the Hub ๐Ÿ”ฅ\n","Now that we saw we got good results after the training, we can publish our trained model on the Hub with one line of code.\n","\n","๐Ÿ“š The libraries documentation ๐Ÿ‘‰ https://github.com/huggingface/huggingface_sb3/tree/main#hugging-face--x-stable-baselines3-v20\n"]},{"cell_type":"markdown","metadata":{"id":"MkMk99m8bgaQ"},"source":["By using `package_to_hub`, as we already mentionned in the former units, **you evaluate, record a replay, generate a model card of your agent and push it to the hub**.\n","\n","This way:\n","- You can **showcase our work** ๐Ÿ”ฅ\n","- You can **visualize your agent playing** ๐Ÿ‘€\n","- You can **share with the community an agent that others can use** ๐Ÿ’พ\n","- You can **access a leaderboard ๐Ÿ† to see how well your agent is performing compared to your classmates** ๐Ÿ‘‰ https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard\n"]},{"cell_type":"markdown","metadata":{"id":"JquRrWytA6eo"},"source":["To be able to share your model with the community there are three more steps to follow:\n","\n","1๏ธโƒฃ (If it's not already done) create an account to HF โžก https://huggingface.co/join\n","\n","2๏ธโƒฃ Sign in and then, you need to store your authentication token from the Hugging Face website.\n","- Create a new token (https://huggingface.co/settings/tokens) **with write role**\n","\n","\"Create\n","\n","- Copy the token\n","- Run the cell below and paste the token"]},{"cell_type":"code","execution_count":20,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":145,"referenced_widgets":["9cacf8dbc7a94fa481e1cc40676692c8","e1bcfc8d49874c0a87984a1519ce9a02","9c37b1a06401475892e32e64cfc7228b","7d05761914c6458abd83d3320d5e6774","4fe7ce97f96c4d3ea8d42decfd98c7fd","3e23a54fdb7044d38aaf172664698871","3158042f4a7747b083201b336c509833","9d9554707a1842c9a21b1d7df3a6e8ea","005095e28ba94fab9433cc3e46079300","8d515b922b3149ef997885c4a1b8f57d","76fca4ceb1b74730ad80914a5526a33a","0b24db20f40e460c91fa1090d0286e80","0cd333170d704dde92b6826238507f38","4f7f847624ad487ba3e8e0c136fd86ee","b8228ef61d2147368167e35c75a14b2b","9b9e93708a824114abdb56ffb3a6bbdd","9dd1a10e441c4a3da2c870f40c55d047","5f30f9cbb28f4826af8b9c9b87ec5b88","18bbbe029e1f42bea69347d5de28d2cb","ffa9936e6f3846b9b215c6e03396106e","d0f67ad35369477185dbcf6b0a7c07c3","04bbbd03a7c94340a458d0dcd7f1bca5","8c3c6264b6284db3957ed7f15a90a601","70f52659a1d84dc7ac1ed4648c4de55d","a0f8c961d3c64db08c3555bd566fe955","db52efeddaff4046aad1067fefe78656","d7a899e7eaa8452fb8caf7f064ca96c3","66a44c2579634b54860458073bec84be","4e1d4dfdb773409d8f24f035608de8ab","d8bdb828b07b442eb51a0499c552dc5a","32c2bbf73b5549e28273810c28419711","fbdfe5c1594543138016fb9cd417930e"]},"executionInfo":{"elapsed":203,"status":"ok","timestamp":1697806081201,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"GZiFBBlzxzxY","outputId":"9bb2cd88-a0bd-46bd-d867-0cb36685e805"},"outputs":[{"output_type":"display_data","data":{"text/plain":["VBox(children=(HTML(value='
"]},"metadata":{},"execution_count":16}],"source":["model.learn(1000000)"]},{"cell_type":"code","execution_count":17,"metadata":{"executionInfo":{"elapsed":3,"status":"ok","timestamp":1697805826543,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"},"user_tz":-60},"id":"Q4TX9ABssFgF"},"outputs":[],"source":["model.save(\"a2c-PandaPickAndPlace-v3\")\n","env.save(\"vec_normalize.pkl\")"]},{"cell_type":"code","execution_count":18,"metadata":{"id":"LyerZBvys110","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1697805828368,"user_tz":-60,"elapsed":1827,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"9f71bc5b-e9e4-4b2f-910e-d7a55c254fad"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/evaluation.py:67: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n"," warnings.warn(\n"]},{"output_type":"stream","name":"stdout","text":["Mean reward = -45.00 +/- 15.00\n"]}],"source":["eval_env = DummyVecEnv([lambda: gym.make(\"PandaPickAndPlace-v3\")])\n","eval_env = VecNormalize.load(\"vec_normalize.pkl\", eval_env)\n","\n","eval_env.render_mode = \"rgb_array\"\n","eval_env.training = False\n","eval_env.norm_reward = False\n","\n","model = A2C.load(\"a2c-PandaPickAndPlace-v3\")\n","\n","mean_reward, std_reward = evaluate_policy(model, eval_env)\n","\n","print(f\"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}\")"]},{"cell_type":"code","source":["package_to_hub(\n"," model=model,\n"," model_name=f\"a2c-{env_id}\",\n"," model_architecture=\"A2C\",\n"," env_id=env_id,\n"," eval_env=eval_env,\n"," repo_id=f\"jake-walker/a2c-{env_id}\", # TODO: Change the username\n"," commit_message=\"Initial commit\",\n",")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":551,"referenced_widgets":["8bfe2085c7204088a1ef714838c2aadd","4658324a43be490caf922ead1f9d2d1d","4d717050a24841ee8069c6653117b3dc","ce714cf5cac0486fad1eb8cc49d53e0b","c7fe3d6d2f5541a1a0ec140a7741995d","4da35dc2fa704607b4e2504b6af5db88","9bfd6667c1fb449392d491dae77b59ee","7d07efce45fa4670ad96f9fac0dab684","5ef098a570de4d6e87bf8df85a33050e","f454eaeac4ac494c96fc9f7d438e9618","fc7ee0566b384ab9a135b89739531326","896b4650b155414aaba9036d6ebce7b2","0f93e35df61f42daa06dc7c910b1e7b4","9bd21c82a4a543f09ac4cb89657ca830","212f12b9d6ff45d8b5d8d297dfa53156","9f712dbc35054c13aef235348ce1a353","f8d84bf8235c45ec9b50d275f2cfc867","4d8a546735c74852947634ce252f14f7","2e302f20dc9e4937a1de9d9e0d0da7b3","b2d8210c3889402aaf1db961d4cd730f","4c24fc840b5840c7b2a516c419886882","252d9c5dfa0f46118640012285b286fa","eeed4ec9c9ce45a3bab7fd3e0d1d4ea8","5ba4f0e7d5fa419a9233aa2a4992b2ba","86caab393aa44dd5b435691285196c4d","5adfed80531b441690a0b5249c11b32c","289467336d58463db15339430e5c1f00","3a696b2695294d8995bda6871ed7df08","544f6f375b8b49a6a66b65893d66003d","fea8d1537e2e442ebdaa06366204e0a9","13dad26515bf437d8c280e32b793f09d","eed2665a7bcd4fb282df5ad004eb05a8","4b571dd45888423aadcd2e70935f55b0","b0e0e8fb7edc47b19bff9a9faab31ef7","1c15e9adfc804dc5ba65761294fd275e","f21549e660bd46fbaf03df38e546ba08","ca2b3ed6a1c848ffbf3d53ebfca2dbdd","714f4ce0c25a4f738cde857cd1d6b30d","e9413c0eaf4a44f293605decc0b286a2","82fad2f7c01d463bab4a4685da4bb3b5","99f9e09f5a474f79acf765cbc28722cd","129e7a89f2be4012bf4490a3fa2e4c56","86138644c37841e1ba73c60d459dedfb","8fe0f5c471864bc48cf66de37638442f","ca10c36b5bd54c74886863dcccba4633","b0c371c0b3024ef9857f25708fdccf13","6a11ced9206048c889c7d6ccc57a6ba2","db3086118fad4361aa92a46357af3d91","d85d82c65da14ec6b67f829c5edeea64","bc915aec907f4821a163a866e7fb8bb6","e8085db08af9497886f160b014c1307f","bd19549616904684bfc20c89b6ff3b63","cd1a75e4e84b45bfa8ed4af0f62847d9","7ab8e6c616f54b88ac34e0ba9b377f20","403401a6fd8a4961ac78e66df6b9925f","ab5cfc2daaef4a4fb0ba2fbe17c60144","e5736ce72bcc46c28347051dbcd02c88","ada769094a464e5799d67dc522cd5637","bd181fd072f84a82a46d048dbae72836","e811d4e6b28844f484e569c03576443c","b21d8aa148154174b6cce2dedb60d281","bfe4ac26ee404f56ac7ae070e0034fab","ff0e9057df6946f9a590acb7431fbc71","29a8b81811d948dab9db8dbacc0a2d34","53dde5d8c4934b81b254ac78a09b5dba","157c384de31845f1a345a9b44d3ebee6"]},"id":"BueNqaiXIJCb","executionInfo":{"status":"ok","timestamp":1697806439616,"user_tz":-60,"elapsed":335108,"user":{"displayName":"Jake Walker","userId":"16416010557495618096"}},"outputId":"53a4c245-081d-4101-8ab9-0d20d1ca0260"},"execution_count":21,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n"," and should_run_async(code)\n"]},{"output_type":"stream","name":"stdout","text":["\u001b[38;5;4mโ„น This function will save, evaluate, generate a video of your agent,\n","create a model card and push everything to the hub. It might take up to 1min.\n","This is a work in progress: if you encounter a bug, please open an issue.\u001b[0m\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/stable_baselines3/common/evaluation.py:67: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n"," warnings.warn(\n"]},{"output_type":"stream","name":"stdout","text":["Saving video to /tmp/tmp94l0t13f/-step-0-to-step-1000.mp4\n","Moviepy - Building video /tmp/tmp94l0t13f/-step-0-to-step-1000.mp4.\n","Moviepy - Writing video /tmp/tmp94l0t13f/-step-0-to-step-1000.mp4\n","\n"]},{"output_type":"stream","name":"stderr","text":[]},{"output_type":"stream","name":"stdout","text":["Moviepy - Done !\n","Moviepy - video ready /tmp/tmp94l0t13f/-step-0-to-step-1000.mp4\n","\u001b[38;5;4mโ„น Pushing repo jake-walker/a2c-PandaPickAndPlace-v3 to the Hugging Face\n","Hub\u001b[0m\n"]},{"output_type":"display_data","data":{"text/plain":["policy.optimizer.pth: 0%| | 0.00/52.1k [00:00
Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file.
"}},"9c37b1a06401475892e32e64cfc7228b":{"model_module":"@jupyter-widgets/controls","model_name":"PasswordModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"PasswordModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"PasswordView","continuous_update":true,"description":"Token:","description_tooltip":null,"disabled":false,"layout":"IPY_MODEL_8d515b922b3149ef997885c4a1b8f57d","placeholder":"โ€‹","style":"IPY_MODEL_76fca4ceb1b74730ad80914a5526a33a","value":""}},"7d05761914c6458abd83d3320d5e6774":{"model_module":"@jupyter-widgets/controls","model_name":"CheckboxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"CheckboxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"CheckboxView","description":"Add token as git credential?","description_tooltip":null,"disabled":false,"indent":true,"layout":"IPY_MODEL_0b24db20f40e460c91fa1090d0286e80","style":"IPY_MODEL_0cd333170d704dde92b6826238507f38","value":true}},"4fe7ce97f96c4d3ea8d42decfd98c7fd":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ButtonView","button_style":"","description":"Login","disabled":false,"icon":"","layout":"IPY_MODEL_4f7f847624ad487ba3e8e0c136fd86ee","style":"IPY_MODEL_b8228ef61d2147368167e35c75a14b2b","tooltip":""}},"3e23a54fdb7044d38aaf172664698871":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9b9e93708a824114abdb56ffb3a6bbdd","placeholder":"โ€‹","style":"IPY_MODEL_9dd1a10e441c4a3da2c870f40c55d047","value":"\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. "}},"3158042f4a7747b083201b336c509833":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":"center","align_self":null,"border":null,"bottom":null,"display":"flex","flex":null,"flex_flow":"column","grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":"50%"}},"9d9554707a1842c9a21b1d7df3a6e8ea":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"005095e28ba94fab9433cc3e46079300":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8d515b922b3149ef997885c4a1b8f57d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"76fca4ceb1b74730ad80914a5526a33a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"0b24db20f40e460c91fa1090d0286e80":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0cd333170d704dde92b6826238507f38":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4f7f847624ad487ba3e8e0c136fd86ee":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b8228ef61d2147368167e35c75a14b2b":{"model_module":"@jupyter-widgets/controls","model_name":"ButtonStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ButtonStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","button_color":null,"font_weight":""}},"9b9e93708a824114abdb56ffb3a6bbdd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9dd1a10e441c4a3da2c870f40c55d047":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5f30f9cbb28f4826af8b9c9b87ec5b88":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_18bbbe029e1f42bea69347d5de28d2cb","placeholder":"โ€‹","style":"IPY_MODEL_ffa9936e6f3846b9b215c6e03396106e","value":"Connecting..."}},"18bbbe029e1f42bea69347d5de28d2cb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ffa9936e6f3846b9b215c6e03396106e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d0f67ad35369477185dbcf6b0a7c07c3":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a0f8c961d3c64db08c3555bd566fe955","placeholder":"โ€‹","style":"IPY_MODEL_db52efeddaff4046aad1067fefe78656","value":"Token is valid (permission: write)."}},"04bbbd03a7c94340a458d0dcd7f1bca5":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_d7a899e7eaa8452fb8caf7f064ca96c3","placeholder":"โ€‹","style":"IPY_MODEL_66a44c2579634b54860458073bec84be","value":"Your token has been saved in your configured git credential helpers (store)."}},"8c3c6264b6284db3957ed7f15a90a601":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4e1d4dfdb773409d8f24f035608de8ab","placeholder":"โ€‹","style":"IPY_MODEL_d8bdb828b07b442eb51a0499c552dc5a","value":"Your token has been saved to /root/.cache/huggingface/token"}},"70f52659a1d84dc7ac1ed4648c4de55d":{"model_module":"@jupyter-widgets/controls","model_name":"LabelModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"LabelModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"LabelView","description":"","description_tooltip":null,"layout":"IPY_MODEL_32c2bbf73b5549e28273810c28419711","placeholder":"โ€‹","style":"IPY_MODEL_fbdfe5c1594543138016fb9cd417930e","value":"Login successful"}},"a0f8c961d3c64db08c3555bd566fe955":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"db52efeddaff4046aad1067fefe78656":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d7a899e7eaa8452fb8caf7f064ca96c3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"66a44c2579634b54860458073bec84be":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4e1d4dfdb773409d8f24f035608de8ab":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d8bdb828b07b442eb51a0499c552dc5a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"32c2bbf73b5549e28273810c28419711":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fbdfe5c1594543138016fb9cd417930e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8bfe2085c7204088a1ef714838c2aadd":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_4658324a43be490caf922ead1f9d2d1d","IPY_MODEL_4d717050a24841ee8069c6653117b3dc","IPY_MODEL_ce714cf5cac0486fad1eb8cc49d53e0b"],"layout":"IPY_MODEL_c7fe3d6d2f5541a1a0ec140a7741995d"}},"4658324a43be490caf922ead1f9d2d1d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4da35dc2fa704607b4e2504b6af5db88","placeholder":"โ€‹","style":"IPY_MODEL_9bfd6667c1fb449392d491dae77b59ee","value":"policy.optimizer.pth: 100%"}},"4d717050a24841ee8069c6653117b3dc":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_7d07efce45fa4670ad96f9fac0dab684","max":52079,"min":0,"orientation":"horizontal","style":"IPY_MODEL_5ef098a570de4d6e87bf8df85a33050e","value":52079}},"ce714cf5cac0486fad1eb8cc49d53e0b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f454eaeac4ac494c96fc9f7d438e9618","placeholder":"โ€‹","style":"IPY_MODEL_fc7ee0566b384ab9a135b89739531326","value":" 52.1k/52.1k [00:00<00:00, 24.9kB/s]"}},"c7fe3d6d2f5541a1a0ec140a7741995d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4da35dc2fa704607b4e2504b6af5db88":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9bfd6667c1fb449392d491dae77b59ee":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"7d07efce45fa4670ad96f9fac0dab684":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"5ef098a570de4d6e87bf8df85a33050e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f454eaeac4ac494c96fc9f7d438e9618":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fc7ee0566b384ab9a135b89739531326":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"896b4650b155414aaba9036d6ebce7b2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0f93e35df61f42daa06dc7c910b1e7b4","IPY_MODEL_9bd21c82a4a543f09ac4cb89657ca830","IPY_MODEL_212f12b9d6ff45d8b5d8d297dfa53156"],"layout":"IPY_MODEL_9f712dbc35054c13aef235348ce1a353"}},"0f93e35df61f42daa06dc7c910b1e7b4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f8d84bf8235c45ec9b50d275f2cfc867","placeholder":"โ€‹","style":"IPY_MODEL_4d8a546735c74852947634ce252f14f7","value":"policy.pth: 100%"}},"9bd21c82a4a543f09ac4cb89657ca830":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_2e302f20dc9e4937a1de9d9e0d0da7b3","max":53359,"min":0,"orientation":"horizontal","style":"IPY_MODEL_b2d8210c3889402aaf1db961d4cd730f","value":53359}},"212f12b9d6ff45d8b5d8d297dfa53156":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4c24fc840b5840c7b2a516c419886882","placeholder":"โ€‹","style":"IPY_MODEL_252d9c5dfa0f46118640012285b286fa","value":" 53.4k/53.4k [00:00<00:00, 25.4kB/s]"}},"9f712dbc35054c13aef235348ce1a353":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f8d84bf8235c45ec9b50d275f2cfc867":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4d8a546735c74852947634ce252f14f7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2e302f20dc9e4937a1de9d9e0d0da7b3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b2d8210c3889402aaf1db961d4cd730f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"4c24fc840b5840c7b2a516c419886882":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"252d9c5dfa0f46118640012285b286fa":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"eeed4ec9c9ce45a3bab7fd3e0d1d4ea8":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_5ba4f0e7d5fa419a9233aa2a4992b2ba","IPY_MODEL_86caab393aa44dd5b435691285196c4d","IPY_MODEL_5adfed80531b441690a0b5249c11b32c"],"layout":"IPY_MODEL_289467336d58463db15339430e5c1f00"}},"5ba4f0e7d5fa419a9233aa2a4992b2ba":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3a696b2695294d8995bda6871ed7df08","placeholder":"โ€‹","style":"IPY_MODEL_544f6f375b8b49a6a66b65893d66003d","value":"Upload 5 LFS files: 100%"}},"86caab393aa44dd5b435691285196c4d":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_fea8d1537e2e442ebdaa06366204e0a9","max":5,"min":0,"orientation":"horizontal","style":"IPY_MODEL_13dad26515bf437d8c280e32b793f09d","value":5}},"5adfed80531b441690a0b5249c11b32c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_eed2665a7bcd4fb282df5ad004eb05a8","placeholder":"โ€‹","style":"IPY_MODEL_4b571dd45888423aadcd2e70935f55b0","value":" 5/5 [00:01<00:00, 3.15it/s]"}},"289467336d58463db15339430e5c1f00":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"3a696b2695294d8995bda6871ed7df08":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"544f6f375b8b49a6a66b65893d66003d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"fea8d1537e2e442ebdaa06366204e0a9":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"13dad26515bf437d8c280e32b793f09d":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"eed2665a7bcd4fb282df5ad004eb05a8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4b571dd45888423aadcd2e70935f55b0":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"b0e0e8fb7edc47b19bff9a9faab31ef7":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_1c15e9adfc804dc5ba65761294fd275e","IPY_MODEL_f21549e660bd46fbaf03df38e546ba08","IPY_MODEL_ca2b3ed6a1c848ffbf3d53ebfca2dbdd"],"layout":"IPY_MODEL_714f4ce0c25a4f738cde857cd1d6b30d"}},"1c15e9adfc804dc5ba65761294fd275e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e9413c0eaf4a44f293605decc0b286a2","placeholder":"โ€‹","style":"IPY_MODEL_82fad2f7c01d463bab4a4685da4bb3b5","value":"pytorch_variables.pth: 100%"}},"f21549e660bd46fbaf03df38e546ba08":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_99f9e09f5a474f79acf765cbc28722cd","max":864,"min":0,"orientation":"horizontal","style":"IPY_MODEL_129e7a89f2be4012bf4490a3fa2e4c56","value":864}},"ca2b3ed6a1c848ffbf3d53ebfca2dbdd":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_86138644c37841e1ba73c60d459dedfb","placeholder":"โ€‹","style":"IPY_MODEL_8fe0f5c471864bc48cf66de37638442f","value":" 864/864 [00:00<00:00, 1.32kB/s]"}},"714f4ce0c25a4f738cde857cd1d6b30d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e9413c0eaf4a44f293605decc0b286a2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"82fad2f7c01d463bab4a4685da4bb3b5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"99f9e09f5a474f79acf765cbc28722cd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"129e7a89f2be4012bf4490a3fa2e4c56":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"86138644c37841e1ba73c60d459dedfb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8fe0f5c471864bc48cf66de37638442f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ca10c36b5bd54c74886863dcccba4633":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b0c371c0b3024ef9857f25708fdccf13","IPY_MODEL_6a11ced9206048c889c7d6ccc57a6ba2","IPY_MODEL_db3086118fad4361aa92a46357af3d91"],"layout":"IPY_MODEL_d85d82c65da14ec6b67f829c5edeea64"}},"b0c371c0b3024ef9857f25708fdccf13":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_bc915aec907f4821a163a866e7fb8bb6","placeholder":"โ€‹","style":"IPY_MODEL_e8085db08af9497886f160b014c1307f","value":"a2c-PandaPickAndPlace-v3.zip: 100%"}},"6a11ced9206048c889c7d6ccc57a6ba2":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_bd19549616904684bfc20c89b6ff3b63","max":124467,"min":0,"orientation":"horizontal","style":"IPY_MODEL_cd1a75e4e84b45bfa8ed4af0f62847d9","value":124467}},"db3086118fad4361aa92a46357af3d91":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7ab8e6c616f54b88ac34e0ba9b377f20","placeholder":"โ€‹","style":"IPY_MODEL_403401a6fd8a4961ac78e66df6b9925f","value":" 124k/124k [00:00<00:00, 28.0kB/s]"}},"d85d82c65da14ec6b67f829c5edeea64":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bc915aec907f4821a163a866e7fb8bb6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e8085db08af9497886f160b014c1307f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bd19549616904684bfc20c89b6ff3b63":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cd1a75e4e84b45bfa8ed4af0f62847d9":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"7ab8e6c616f54b88ac34e0ba9b377f20":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"403401a6fd8a4961ac78e66df6b9925f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ab5cfc2daaef4a4fb0ba2fbe17c60144":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e5736ce72bcc46c28347051dbcd02c88","IPY_MODEL_ada769094a464e5799d67dc522cd5637","IPY_MODEL_bd181fd072f84a82a46d048dbae72836"],"layout":"IPY_MODEL_e811d4e6b28844f484e569c03576443c"}},"e5736ce72bcc46c28347051dbcd02c88":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_b21d8aa148154174b6cce2dedb60d281","placeholder":"โ€‹","style":"IPY_MODEL_bfe4ac26ee404f56ac7ae070e0034fab","value":"vec_normalize.pkl: 100%"}},"ada769094a464e5799d67dc522cd5637":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_ff0e9057df6946f9a590acb7431fbc71","max":3023,"min":0,"orientation":"horizontal","style":"IPY_MODEL_29a8b81811d948dab9db8dbacc0a2d34","value":3023}},"bd181fd072f84a82a46d048dbae72836":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_53dde5d8c4934b81b254ac78a09b5dba","placeholder":"โ€‹","style":"IPY_MODEL_157c384de31845f1a345a9b44d3ebee6","value":" 3.02k/3.02k [00:00<00:00, 5.75kB/s]"}},"e811d4e6b28844f484e569c03576443c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"b21d8aa148154174b6cce2dedb60d281":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bfe4ac26ee404f56ac7ae070e0034fab":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"ff0e9057df6946f9a590acb7431fbc71":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"29a8b81811d948dab9db8dbacc0a2d34":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"53dde5d8c4934b81b254ac78a09b5dba":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"157c384de31845f1a345a9b44d3ebee6":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"nbformat":4,"nbformat_minor":0} \ No newline at end of file diff --git a/HF DeepRL Course/Unit7 - Multi-Agents.ipynb b/HF DeepRL Course/Unit7 - Multi-Agents.ipynb new file mode 100644 index 0000000..c645c14 --- /dev/null +++ b/HF DeepRL Course/Unit7 - Multi-Agents.ipynb @@ -0,0 +1,1355 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "128c7ef7-74f6-49b0-b29b-1b398d559ceb", + "metadata": {}, + "source": [ + "# AI vs AI Soccer\n", + "\n", + "Instructions: https://huggingface.co/learn/deep-rl-course/unit7/hands-on" + ] + }, + { + "cell_type": "markdown", + "id": "305935aa-e84d-4ddf-9067-47445db0fdaa", + "metadata": { + "tags": [] + }, + "source": [ + "## Step 0: Install MLAgents" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "92f35f0d-73e3-4556-9f51-076d6391438e", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cloning into 'ml-agents'...\n", + "remote: Enumerating objects: 91591, done.\u001b[K\n", + "remote: Counting objects: 100% (2481/2481), done.\u001b[K\n", + "remote: Compressing objects: 100% (999/999), done.\u001b[K\n", + "remote: Total 91591 (delta 1404), reused 2060 (delta 1157), pack-reused 89110\u001b[K\n", + "Receiving objects: 100% (91591/91591), 2.87 GiB | 1.92 MiB/s, done.\n", + "Resolving deltas: 100% (66541/66541), done.\n", + "Obtaining file:///home/jakewalker/Projects/deeprl-course/ml-agents/ml-agents-envs\n", + " Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hCollecting cloudpickle (from mlagents-envs==1.1.0.dev0)\n", + " Obtaining dependency information for cloudpickle from https://files.pythonhosted.org/packages/96/43/dae06432d0c4b1dc9e9149ad37b4ca8384cf6eb7700cd9215b177b914f0a/cloudpickle-3.0.0-py3-none-any.whl.metadata\n", + " Downloading cloudpickle-3.0.0-py3-none-any.whl.metadata (7.0 kB)\n", + "Collecting grpcio<=1.48.2,>=1.11.0 (from mlagents-envs==1.1.0.dev0)\n", + " Downloading grpcio-1.48.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.6 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m4.6/4.6 MB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting Pillow>=4.2.1 (from mlagents-envs==1.1.0.dev0)\n", + " Obtaining dependency information for Pillow>=4.2.1 from https://files.pythonhosted.org/packages/e5/b9/5c6ad3241f1ccca4b781dfeddbab2dac4480f95aedc351a0e60c9f4c8aa9/Pillow-10.1.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata\n", + " Downloading Pillow-10.1.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.5 kB)\n", + "Collecting protobuf<3.20,>=3.6 (from mlagents-envs==1.1.0.dev0)\n", + " Downloading protobuf-3.19.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m0:01\u001b[0m:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pyyaml>=3.1.0 in ./.venv/lib/python3.10/site-packages (from mlagents-envs==1.1.0.dev0) (6.0.1)\n", + "Collecting gym>=0.21.0 (from mlagents-envs==1.1.0.dev0)\n", + " Using cached gym-0.26.2-py3-none-any.whl\n", + "Collecting pettingzoo==1.15.0 (from mlagents-envs==1.1.0.dev0)\n", + " Downloading PettingZoo-1.15.0.tar.gz (756 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m756.7/756.7 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hCollecting numpy<1.24.0,>=1.21.2 (from mlagents-envs==1.1.0.dev0)\n", + " Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting filelock>=3.4.0 (from mlagents-envs==1.1.0.dev0)\n", + " Obtaining dependency information for filelock>=3.4.0 from https://files.pythonhosted.org/packages/5e/5d/97afbafd9d584ff1b45fcb354a479a3609bd97f912f8f1f6c563cb1fae21/filelock-3.12.4-py3-none-any.whl.metadata\n", + " Downloading filelock-3.12.4-py3-none-any.whl.metadata (2.8 kB)\n", + "Requirement already satisfied: six>=1.5.2 in ./.venv/lib/python3.10/site-packages (from grpcio<=1.48.2,>=1.11.0->mlagents-envs==1.1.0.dev0) (1.16.0)\n", + "Collecting gym-notices>=0.0.4 (from gym>=0.21.0->mlagents-envs==1.1.0.dev0)\n", + " Using cached gym_notices-0.0.8-py3-none-any.whl (3.0 kB)\n", + "Downloading filelock-3.12.4-py3-none-any.whl (11 kB)\n", + "Downloading cloudpickle-3.0.0-py3-none-any.whl (20 kB)\n", + "Downloading Pillow-10.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (3.6 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m:01\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: pettingzoo\n", + " Building wheel for pettingzoo (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for pettingzoo: filename=PettingZoo-1.15.0-py3-none-any.whl size=875632 sha256=0224ab58fef6069996c1020bb8ae12ec22385c575ed351da5f7d5843fd96ade7\n", + " Stored in directory: /home/jakewalker/.cache/pip/wheels/e3/35/ac/76984cb1c12902d190c818d57c43d25c3f9281591a640ccd13\n", + "Successfully built pettingzoo\n", + "Installing collected packages: gym-notices, protobuf, Pillow, numpy, grpcio, filelock, cloudpickle, gym, pettingzoo, mlagents-envs\n", + " Running setup.py develop for mlagents-envs\n", + "Successfully installed Pillow-10.1.0 cloudpickle-3.0.0 filelock-3.12.4 grpcio-1.48.2 gym-0.26.2 gym-notices-0.0.8 mlagents-envs-1.1.0.dev0 numpy-1.23.5 pettingzoo-1.15.0 protobuf-3.19.6\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Obtaining file:///home/jakewalker/Projects/deeprl-course/ml-agents/ml-agents\n", + " Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: grpcio<=1.48.2,>=1.11.0 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (1.48.2)\n", + "Collecting h5py>=2.9.0 (from mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for h5py>=2.9.0 from https://files.pythonhosted.org/packages/3b/d3/ecb4b3d2ec2c84132987e5f12ab1408f455bec1d90cd5bc408ebf37800f5/h5py-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", + " Downloading h5py-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.5 kB)\n", + "Requirement already satisfied: mlagents_envs==1.1.0.dev0 in ./ml-agents/ml-agents-envs (from mlagents==1.1.0.dev0) (1.1.0.dev0)\n", + "Requirement already satisfied: numpy<1.24.0,>=1.21.2 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (1.23.5)\n", + "Requirement already satisfied: Pillow>=4.2.1 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (10.1.0)\n", + "Requirement already satisfied: protobuf<3.20,>=3.6 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (3.19.6)\n", + "Requirement already satisfied: pyyaml>=3.1.0 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (6.0.1)\n", + "Collecting torch>=1.13.1 (from mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for torch>=1.13.1 from https://files.pythonhosted.org/packages/6d/13/b5e8bacd980b2195f8a1741ce11cbb9146568607795d5e4ff510dcff1064/torch-2.1.0-cp310-cp310-manylinux1_x86_64.whl.metadata\n", + " Downloading torch-2.1.0-cp310-cp310-manylinux1_x86_64.whl.metadata (25 kB)\n", + "Collecting tensorboard>=2.14 (from mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for tensorboard>=2.14 from https://files.pythonhosted.org/packages/69/38/fb2ac9c4c8efbe020ae88f6772be87d51ef18526ac541fc3393786b7c45a/tensorboard-2.15.0-py3-none-any.whl.metadata\n", + " Downloading tensorboard-2.15.0-py3-none-any.whl.metadata (1.7 kB)\n", + "Requirement already satisfied: six>=1.16 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (1.16.0)\n", + "Requirement already satisfied: attrs>=19.3.0 in ./.venv/lib/python3.10/site-packages (from mlagents==1.1.0.dev0) (23.1.0)\n", + "Collecting huggingface_hub>=0.14 (from mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for huggingface_hub>=0.14 from https://files.pythonhosted.org/packages/ef/b5/b6107bd65fa4c96fdf00e4733e2fe5729bb9e5e09997f63074bb43d3ab28/huggingface_hub-0.18.0-py3-none-any.whl.metadata\n", + " Downloading huggingface_hub-0.18.0-py3-none-any.whl.metadata (13 kB)\n", + "Collecting onnx==1.12.0 (from mlagents==1.1.0.dev0)\n", + " Downloading onnx-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting cattrs<1.7,>=1.1.0 (from mlagents==1.1.0.dev0)\n", + " Downloading cattrs-1.5.0-py3-none-any.whl (19 kB)\n", + "Requirement already satisfied: cloudpickle in ./.venv/lib/python3.10/site-packages (from mlagents_envs==1.1.0.dev0->mlagents==1.1.0.dev0) (3.0.0)\n", + "Requirement already satisfied: gym>=0.21.0 in ./.venv/lib/python3.10/site-packages (from mlagents_envs==1.1.0.dev0->mlagents==1.1.0.dev0) (0.26.2)\n", + "Requirement already satisfied: pettingzoo==1.15.0 in ./.venv/lib/python3.10/site-packages (from mlagents_envs==1.1.0.dev0->mlagents==1.1.0.dev0) (1.15.0)\n", + "Requirement already satisfied: filelock>=3.4.0 in ./.venv/lib/python3.10/site-packages (from mlagents_envs==1.1.0.dev0->mlagents==1.1.0.dev0) (3.12.4)\n", + "Requirement already satisfied: typing-extensions>=3.6.2.1 in ./.venv/lib/python3.10/site-packages (from onnx==1.12.0->mlagents==1.1.0.dev0) (4.8.0)\n", + "Collecting fsspec>=2023.5.0 (from huggingface_hub>=0.14->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for fsspec>=2023.5.0 from https://files.pythonhosted.org/packages/e8/f6/3eccfb530aac90ad1301c582da228e4763f19e719ac8200752a4841b0b2d/fsspec-2023.10.0-py3-none-any.whl.metadata\n", + " Downloading fsspec-2023.10.0-py3-none-any.whl.metadata (6.8 kB)\n", + "Requirement already satisfied: requests in ./.venv/lib/python3.10/site-packages (from huggingface_hub>=0.14->mlagents==1.1.0.dev0) (2.31.0)\n", + "Collecting tqdm>=4.42.1 (from huggingface_hub>=0.14->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for tqdm>=4.42.1 from https://files.pythonhosted.org/packages/00/e5/f12a80907d0884e6dff9c16d0c0114d81b8cd07dc3ae54c5e962cc83037e/tqdm-4.66.1-py3-none-any.whl.metadata\n", + " Downloading tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m57.6/57.6 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: packaging>=20.9 in ./.venv/lib/python3.10/site-packages (from huggingface_hub>=0.14->mlagents==1.1.0.dev0) (23.2)\n", + "Collecting absl-py>=0.4 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for absl-py>=0.4 from https://files.pythonhosted.org/packages/01/e4/dc0a1dcc4e74e08d7abedab278c795eef54a224363bb18f5692f416d834f/absl_py-2.0.0-py3-none-any.whl.metadata\n", + " Downloading absl_py-2.0.0-py3-none-any.whl.metadata (2.3 kB)\n", + "Collecting google-auth<3,>=1.6.3 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for google-auth<3,>=1.6.3 from https://files.pythonhosted.org/packages/39/7c/2e4fa55a99f83ef9ef229ac5d59c44ceb90e2d0145711590c0fa39669f32/google_auth-2.23.3-py2.py3-none-any.whl.metadata\n", + " Downloading google_auth-2.23.3-py2.py3-none-any.whl.metadata (4.2 kB)\n", + "Collecting google-auth-oauthlib<2,>=0.5 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for google-auth-oauthlib<2,>=0.5 from https://files.pythonhosted.org/packages/ce/33/a907b4b67245647746dde8d61e1643ef5d210c88e090d491efd89eff9f95/google_auth_oauthlib-1.1.0-py2.py3-none-any.whl.metadata\n", + " Downloading google_auth_oauthlib-1.1.0-py2.py3-none-any.whl.metadata (2.7 kB)\n", + "Collecting markdown>=2.6.8 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for markdown>=2.6.8 from https://files.pythonhosted.org/packages/bb/c1/50caaec6cadc1c6adc8fe351e03bd646d6e4dd17f55fca0f4c8d7ea8d3e9/Markdown-3.5-py3-none-any.whl.metadata\n", + " Downloading Markdown-3.5-py3-none-any.whl.metadata (7.1 kB)\n", + "Requirement already satisfied: setuptools>=41.0.0 in ./.venv/lib/python3.10/site-packages (from tensorboard>=2.14->mlagents==1.1.0.dev0) (68.2.0)\n", + "Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for tensorboard-data-server<0.8.0,>=0.7.0 from https://files.pythonhosted.org/packages/73/c6/825dab04195756cf8ff2e12698f22513b3db2f64925bdd41671bfb33aaa5/tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata\n", + " Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n", + "Collecting werkzeug>=1.0.1 (from tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for werkzeug>=1.0.1 from https://files.pythonhosted.org/packages/c3/fc/254c3e9b5feb89ff5b9076a23218dafbc99c96ac5941e900b71206e6313b/werkzeug-3.0.1-py3-none-any.whl.metadata\n", + " Downloading werkzeug-3.0.1-py3-none-any.whl.metadata (4.1 kB)\n", + "Collecting sympy (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Using cached sympy-1.12-py3-none-any.whl (5.7 MB)\n", + "Collecting networkx (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for networkx from https://files.pythonhosted.org/packages/f6/eb/5585c96636bbb2755865c31d83a19dd220ef88e716df4659dacb86e009cc/networkx-3.2-py3-none-any.whl.metadata\n", + " Downloading networkx-3.2-py3-none-any.whl.metadata (5.2 kB)\n", + "Requirement already satisfied: jinja2 in ./.venv/lib/python3.10/site-packages (from torch>=1.13.1->mlagents==1.1.0.dev0) (3.1.2)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m23.7/23.7 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m823.6/823.6 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m:01\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m14.1/14.1 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for nvidia-cudnn-cu12==8.9.2.26 from https://files.pythonhosted.org/packages/ff/74/a2e2be7fb83aaedec84f391f082cf765dfb635e7caa9b49065f73e4835d8/nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata\n", + " Downloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Downloading nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m410.6/410.6 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:03\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Downloading nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m121.6/121.6 MB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Downloading nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m56.5/56.5 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Downloading nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m124.2/124.2 MB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:02\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Downloading nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m196.0/196.0 MB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:03\u001b[0m\n", + "\u001b[?25hCollecting nvidia-nccl-cu12==2.18.1 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Downloading nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl (209.8 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m209.8/209.8 MB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:02\u001b[0m\n", + "\u001b[?25hCollecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Downloading nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting triton==2.1.0 (from torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for triton==2.1.0 from https://files.pythonhosted.org/packages/4d/22/91a8af421c8a8902dde76e6ef3db01b258af16c53d81e8c0d0dc13900a9e/triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata\n", + " Downloading triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)\n", + "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for nvidia-nvjitlink-cu12 from https://files.pythonhosted.org/packages/45/de/885b6d3e1fa07bf19124076b348d3cf30f68051f813cba99e103f53d2f75/nvidia_nvjitlink_cu12-12.3.52-py3-none-manylinux1_x86_64.whl.metadata\n", + " Downloading nvidia_nvjitlink_cu12-12.3.52-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", + "Collecting cachetools<6.0,>=2.0.0 (from google-auth<3,>=1.6.3->tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Obtaining dependency information for cachetools<6.0,>=2.0.0 from https://files.pythonhosted.org/packages/a2/91/2d843adb9fbd911e0da45fbf6f18ca89d07a087c3daa23e955584f90ebf4/cachetools-5.3.2-py3-none-any.whl.metadata\n", + " Downloading cachetools-5.3.2-py3-none-any.whl.metadata (5.2 kB)\n", + "Collecting pyasn1-modules>=0.2.1 (from google-auth<3,>=1.6.3->tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Using cached pyasn1_modules-0.3.0-py2.py3-none-any.whl (181 kB)\n", + "Collecting rsa<5,>=3.1.4 (from google-auth<3,>=1.6.3->tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Using cached rsa-4.9-py3-none-any.whl (34 kB)\n", + "Collecting requests-oauthlib>=0.7.0 (from google-auth-oauthlib<2,>=0.5->tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Using cached requests_oauthlib-1.3.1-py2.py3-none-any.whl (23 kB)\n", + "Requirement already satisfied: gym-notices>=0.0.4 in ./.venv/lib/python3.10/site-packages (from gym>=0.21.0->mlagents_envs==1.1.0.dev0->mlagents==1.1.0.dev0) (0.0.8)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in ./.venv/lib/python3.10/site-packages (from requests->huggingface_hub>=0.14->mlagents==1.1.0.dev0) (3.3.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in ./.venv/lib/python3.10/site-packages (from requests->huggingface_hub>=0.14->mlagents==1.1.0.dev0) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in ./.venv/lib/python3.10/site-packages (from requests->huggingface_hub>=0.14->mlagents==1.1.0.dev0) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in ./.venv/lib/python3.10/site-packages (from requests->huggingface_hub>=0.14->mlagents==1.1.0.dev0) (2023.7.22)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in ./.venv/lib/python3.10/site-packages (from werkzeug>=1.0.1->tensorboard>=2.14->mlagents==1.1.0.dev0) (2.1.3)\n", + "Collecting mpmath>=0.19 (from sympy->torch>=1.13.1->mlagents==1.1.0.dev0)\n", + " Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n", + "Collecting pyasn1<0.6.0,>=0.4.6 (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Using cached pyasn1-0.5.0-py2.py3-none-any.whl (83 kB)\n", + "Collecting oauthlib>=3.0.0 (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard>=2.14->mlagents==1.1.0.dev0)\n", + " Using cached oauthlib-3.2.2-py3-none-any.whl (151 kB)\n", + "Downloading h5py-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m4.8/4.8 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m0:01\u001b[0m:01\u001b[0m\n", + "\u001b[?25hDownloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m302.0/302.0 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading tensorboard-2.15.0-py3-none-any.whl (5.6 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading torch-2.1.0-cp310-cp310-manylinux1_x86_64.whl (670.2 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m670.2/670.2 MB\u001b[0m \u001b[31m844.4 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m eta \u001b[36m0:00:01\u001b[0m[36m0:00:07\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m731.7/731.7 MB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:06\u001b[0m\n", + "\u001b[?25hDownloading triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (89.2 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m89.2/89.2 MB\u001b[0m \u001b[31m3.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading absl_py-2.0.0-py3-none-any.whl (130 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2023.10.0-py3-none-any.whl (166 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m166.4/166.4 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading google_auth-2.23.3-py2.py3-none-any.whl (182 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m182.3/182.3 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading google_auth_oauthlib-1.1.0-py2.py3-none-any.whl (19 kB)\n", + "Downloading Markdown-3.5-py3-none-any.whl (101 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m101.7/101.7 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hUsing cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n", + "Downloading werkzeug-3.0.1-py3-none-any.whl (226 kB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m226.7/226.7 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading networkx-3.2-py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m:01\u001b[0m\n", + "\u001b[?25hDownloading cachetools-5.3.2-py3-none-any.whl (9.3 kB)\n", + "Downloading nvidia_nvjitlink_cu12-12.3.52-py3-none-manylinux1_x86_64.whl (20.5 MB)\n", + "\u001b[2K \u001b[38;2;114;156;31mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m \u001b[32m20.5/20.5 MB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m\n", + "\u001b[?25hInstalling collected packages: mpmath, werkzeug, triton, tqdm, tensorboard-data-server, sympy, pyasn1, onnx, oauthlib, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, networkx, markdown, h5py, fsspec, cattrs, cachetools, absl-py, rsa, requests-oauthlib, pyasn1-modules, nvidia-cusparse-cu12, nvidia-cudnn-cu12, huggingface_hub, nvidia-cusolver-cu12, google-auth, torch, google-auth-oauthlib, tensorboard, mlagents\n", + " Running setup.py develop for mlagents\n", + "Successfully installed absl-py-2.0.0 cachetools-5.3.2 cattrs-1.5.0 fsspec-2023.10.0 google-auth-2.23.3 google-auth-oauthlib-1.1.0 h5py-3.10.0 huggingface_hub-0.18.0 markdown-3.5 mlagents-1.1.0.dev0 mpmath-1.3.0 networkx-3.2 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.18.1 nvidia-nvjitlink-cu12-12.3.52 nvidia-nvtx-cu12-12.1.105 oauthlib-3.2.2 onnx-1.12.0 pyasn1-0.5.0 pyasn1-modules-0.3.0 requests-oauthlib-1.3.1 rsa-4.9 sympy-1.12 tensorboard-2.15.0 tensorboard-data-server-0.7.2 torch-2.1.0 tqdm-4.66.1 triton-2.1.0 werkzeug-3.0.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: torch in ./.venv/lib/python3.10/site-packages (2.1.0)\n", + "Requirement already satisfied: onnx==1.12.0 in ./.venv/lib/python3.10/site-packages (1.12.0)\n", + "Requirement already satisfied: numpy>=1.16.6 in ./.venv/lib/python3.10/site-packages (from onnx==1.12.0) (1.23.5)\n", + "Requirement already satisfied: protobuf<=3.20.1,>=3.12.2 in ./.venv/lib/python3.10/site-packages (from onnx==1.12.0) (3.19.6)\n", + "Requirement already satisfied: typing-extensions>=3.6.2.1 in ./.venv/lib/python3.10/site-packages (from onnx==1.12.0) (4.8.0)\n", + "Requirement already satisfied: filelock in ./.venv/lib/python3.10/site-packages (from torch) (3.12.4)\n", + "Requirement already satisfied: sympy in ./.venv/lib/python3.10/site-packages (from torch) (1.12)\n", + "Requirement already satisfied: networkx in ./.venv/lib/python3.10/site-packages (from torch) (3.2)\n", + "Requirement already satisfied: jinja2 in ./.venv/lib/python3.10/site-packages (from torch) (3.1.2)\n", + "Requirement already satisfied: fsspec in ./.venv/lib/python3.10/site-packages (from torch) (2023.10.0)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in ./.venv/lib/python3.10/site-packages (from torch) (8.9.2.26)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in ./.venv/lib/python3.10/site-packages (from torch) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in ./.venv/lib/python3.10/site-packages (from torch) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in ./.venv/lib/python3.10/site-packages (from torch) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in ./.venv/lib/python3.10/site-packages (from torch) (2.18.1)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in ./.venv/lib/python3.10/site-packages (from torch) (12.1.105)\n", + "Requirement already satisfied: triton==2.1.0 in ./.venv/lib/python3.10/site-packages (from torch) (2.1.0)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in ./.venv/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.3.52)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in ./.venv/lib/python3.10/site-packages (from jinja2->torch) (2.1.3)\n", + "Requirement already satisfied: mpmath>=0.19 in ./.venv/lib/python3.10/site-packages (from sympy->torch) (1.3.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!git clone https://github.com/Unity-Technologies/ml-agents\n", + "!pip install -e ./ml-agents/ml-agents-envs\n", + "!pip install -e ./ml-agents/ml-agents\n", + "!pip install torch onnx==1.12.0" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6a6054c6-3d59-4ffb-ade9-83740e7c18b5", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-10-25 10:44:43-- https://docs.google.com/uc?export=download&confirm=&id=1KuqBKYiXiIcU4kNMqEzhgypuFP5_45CL\n", + "Resolving docs.google.com (docs.google.com)... 142.250.200.46, 2a00:1450:4009:823::200e\n", + "Connecting to docs.google.com (docs.google.com)|142.250.200.46|:443... connected.\n", + "HTTP request sent, awaiting response... 303 See Other\n", + "Location: https://doc-04-8c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9p0egu57r3s8rkm6p23iualfupmjc5ra/1698227025000/09764732090272539193/*/1KuqBKYiXiIcU4kNMqEzhgypuFP5_45CL?e=download&uuid=273505f9-1676-4126-8bda-65e629c6560e [following]\n", + "Warning: wildcards not supported in HTTP.\n", + "--2023-10-25 10:44:43-- https://doc-04-8c-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/9p0egu57r3s8rkm6p23iualfupmjc5ra/1698227025000/09764732090272539193/*/1KuqBKYiXiIcU4kNMqEzhgypuFP5_45CL?e=download&uuid=273505f9-1676-4126-8bda-65e629c6560e\n", + "Resolving doc-04-8c-docs.googleusercontent.com (doc-04-8c-docs.googleusercontent.com)... 142.250.200.1, 2a00:1450:4009:822::2001\n", + "Connecting to doc-04-8c-docs.googleusercontent.com (doc-04-8c-docs.googleusercontent.com)|142.250.200.1|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 36963480 (35M) [application/x-zip-compressed]\n", + "Saving to: โ€˜./SoccerTwos.zipโ€™\n", + "\n", + "./SoccerTwos.zip 100%[===================>] 35.25M 13.6MB/s in 2.6s \n", + "\n", + "2023-10-25 10:44:46 (13.6 MB/s) - โ€˜./SoccerTwos.zipโ€™ saved [36963480/36963480]\n", + "\n" + ] + } + ], + "source": [ + "!wget --load-cookies /tmp/cookies.txt \"https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=FILEID' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\\1\\n/p')&id=1KuqBKYiXiIcU4kNMqEzhgypuFP5_45CL\" -O ./SoccerTwos.zip && rm -rf /tmp/cookies.txt" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "11d60d74-33fb-4144-8353-600151ceccc4", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Archive: ./SoccerTwos.zip\n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/\n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/app.info \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/boot.config \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/globalgamemanagers \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/globalgamemanagers.assets \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/level0 \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/level0.resS \n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/\n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Assembly-CSharp.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Google.Protobuf.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Grpc.Core.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Mono.Security.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/mscorlib.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/netstandard.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Newtonsoft.Json.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.ComponentModel.Composition.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Configuration.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Core.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Data.DataSetExtensions.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Data.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Drawing.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.EnterpriseServices.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Interactive.Async.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.IO.Abstractions.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.IO.Abstractions.TestingHelpers.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.IO.Compression.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.IO.Compression.FileSystem.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Net.Http.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Numerics.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Runtime.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Runtime.Serialization.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Security.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.ServiceModel.Internals.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Transactions.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Xml.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/System.Xml.Linq.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Barracuda.BurstBLAS.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Barracuda.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Barracuda.ONNX.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.Cecil.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.Cecil.Mdb.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.Cecil.Pdb.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.Cecil.Rocks.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Burst.Unsafe.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.InputSystem.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.Mathematics.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.ML-Agents.CommunicatorObjects.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.ML-Agents.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.ML-Agents.Extensions.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.ML-Agents.Extensions.Input.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/Unity.TextMeshPro.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AccessibilityModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AIModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AndroidJNIModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AnimationModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AssetBundleModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.AudioModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ClothModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ClusterInputModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ClusterRendererModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.CoreModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.CrashReportingModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.DirectorModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.DSPGraphModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.GameCenterModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.GIModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.GridModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.HotReloadModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ImageConversionModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.IMGUIModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.InputLegacyModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.InputModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.JSONSerializeModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.LocalizationModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ParticleSystemModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.PerformanceReportingModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.Physics2DModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.PhysicsModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ProfilerModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.RuntimeInitializeOnLoadManagerInitializerModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.ScreenCaptureModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.SharedInternalsModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.SpriteMaskModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.SpriteShapeModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.StreamingModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.SubstanceModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.SubsystemsModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TerrainModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TerrainPhysicsModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TextCoreFontEngineModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TextCoreTextEngineModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TextRenderingModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TilemapModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.TLSModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UI.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UIElementsModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UIElementsNativeModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UIModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UmbraModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UNETModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityAnalyticsCommonModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityAnalyticsModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityConnectModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityCurlModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityTestProtocolModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityWebRequestAssetBundleModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityWebRequestAudioModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityWebRequestModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityWebRequestTextureModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.UnityWebRequestWWWModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.VehiclesModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.VFXModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.VideoModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.VirtualTexturingModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.VRModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.WindModule.dll \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Managed/UnityEngine.XRModule.dll \n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/\n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/\n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/config \n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/\n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/\n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/Browsers/\n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/Browsers/Compat.browser \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/DefaultWsdlHelpGenerator.aspx \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/machine.config \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/settings.map \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/2.0/web.config \n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/\n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/Browsers/\n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/Browsers/Compat.browser \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/DefaultWsdlHelpGenerator.aspx \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/machine.config \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/settings.map \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.0/web.config \n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/\n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/Browsers/\n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/Browsers/Compat.browser \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/DefaultWsdlHelpGenerator.aspx \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/machine.config \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/settings.map \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/4.5/web.config \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/browscap.ini \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/config \n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/mconfig/\n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/etc/mono/mconfig/config.xml \n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/x86_64/\n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/x86_64/libmono-native.so \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/x86_64/libmonobdwgc-2.0.so \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/MonoBleedingEdge/x86_64/libMonoPosixHelper.so \n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Plugins/\n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Plugins/libgrpc_csharp_ext.x64.so \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Plugins/lib_burst_generated.so \n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Resources/\n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Resources/unity default resources \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Resources/UnityPlayer.png \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/Resources/unity_builtin_extra \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/resources.assets \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/resources.assets.resS \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/RuntimeInitializeOnLoads.json \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/ScriptingAssemblies.json \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/sharedassets0.assets \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_Data/sharedassets0.assets.resS \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos.x86_64 \n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/UnityPlayer.so \n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_BurstDebugInformation_DoNotShip/\n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_BurstDebugInformation_DoNotShip/Data/\n", + " creating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_BurstDebugInformation_DoNotShip/Data/Plugins/\n", + " inflating: ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos_BurstDebugInformation_DoNotShip/Data/Plugins/lib_burst_generated.txt \n" + ] + } + ], + "source": [ + "!mkdir -p ./ml-agents/training-envs-executables/SoccerTwos\n", + "!unzip ./SoccerTwos.zip -d ./ml-agents/training-envs-executables/SoccerTwos" + ] + }, + { + "cell_type": "markdown", + "id": "aa1b69a5-c232-411e-a395-65f7ee31e1ce", + "metadata": {}, + "source": [ + "## Step 1: Understand the environment\n", + "\n", + "- **Reward Function:** $1 - \\text{accumulated time penalty}$. When ball enters opponent's goal accumulated time penalty is incremented by $1 \\div \\text{max step}$ every fixed update and is reset to $0$ at the beginning of the episode. $-1$ when ball enters the team's goal.\n", + "- **Observation Space:** composed of vectors of size 336:\n", + " - 11 ray-casts forward distributed over 120 degrees (264 state dimensions)\n", + " - 3 ray-casts backward distributed over 90 degrees (72 state dimensions)\n", + " - Both of these ray-casts can detect 6 objects:\n", + " - Ball\n", + " - Blue Goal\n", + " - Purple Goal\n", + " - Wall\n", + " - Blue Agent\n", + " - Purple Agent\n", + "- **Action Space:**\n", + " - Forward motion: Up/Down\n", + " - Sideways motion: Left/Right\n", + " - Rotation: Left/Right" + ] + }, + { + "cell_type": "markdown", + "id": "4bc3cfd3-c34d-45f7-932a-5f3bb65d070e", + "metadata": {}, + "source": [ + "## Step 2: Understand MA-POCA\n", + "\n", + "[See here](https://huggingface.co/learn/deep-rl-course/unit7/hands-on#step-2-understand-ma-poca)\n", + "\n", + "![](https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit10/mapoca.png)\n", + "\n", + "Using a MC-POCA trainer (or 'coach') (called poca) which helps train cooperative behaviour." + ] + }, + { + "cell_type": "markdown", + "id": "af92b8ec-2fac-4118-8fc5-ef1e8b68bf6b", + "metadata": {}, + "source": [ + "## Step 3: Define the config file\n", + "\n", + "**`./ml-agents/config/poca/SoccerTwos.yaml`:**\n", + "\n", + "```yaml\n", + "behaviors:\n", + " SoccerTwos:\n", + " trainer_type: poca\n", + " hyperparameters:\n", + " batch_size: 2048\n", + " buffer_size: 20480\n", + " learning_rate: 0.0003\n", + " beta: 0.005\n", + " epsilon: 0.2\n", + " lambd: 0.95\n", + " num_epoch: 3\n", + " learning_rate_schedule: constant\n", + " network_settings:\n", + " normalize: false\n", + " hidden_units: 512\n", + " num_layers: 2\n", + " vis_encode_type: simple\n", + " reward_signals:\n", + " extrinsic:\n", + " gamma: 0.99\n", + " strength: 1.0\n", + " keep_checkpoints: 5\n", + " max_steps: 5000000\n", + " time_horizon: 1000\n", + " summary_freq: 10000\n", + " self_play:\n", + " save_steps: 50000\n", + " team_change: 200000\n", + " swap_steps: 2000\n", + " window: 10\n", + " play_against_latest_model_ratio: 0.5\n", + " initial_elo: 1200.0\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "6e7ff86a-1f86-47a2-941e-3e80e21c9e57", + "metadata": {}, + "source": [ + "## Step 4: Start the training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "795032b7-978e-4822-b680-a42fc8bc216f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/jakewalker/Projects/deeprl-course/.venv/lib/python3.10/site-packages/torch/__init__.py:614: UserWarning: torch.set_default_tensor_type() is deprecated as of PyTorch 2.1, please use torch.set_default_dtype() and torch.set_default_device() as alternatives. (Triggered internally at ../torch/csrc/tensor/python_tensor.cpp:451.)\n", + " _C._set_default_tensor_type(t)\n", + "\n", + " โ” โ•–\n", + " โ•“โ•–โ•ฌโ”‚โ•ก โ”‚โ”‚โ•ฌโ•–โ•–\n", + " โ•“โ•–โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ”˜ โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ•ฌโ•–\n", + " โ•–โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ•ฌโ•œ โ•™โ•ฌโ”‚โ”‚โ”‚โ”‚โ”‚โ•–โ•– โ•—โ•—โ•—\n", + " โ•ฌโ•ฌโ•ฌโ•ฌโ•–โ”‚โ”‚โ•ฆโ•– โ•–โ•ฌโ”‚โ”‚โ•—โ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•œโ•œโ•œ โ•Ÿโ•ฃโ•ฃ\n", + " โ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•–โ”‚โ•ฌโ•–โ•–โ•“โ•ฌโ•ชโ”‚โ•“โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•’โ•ฃโ•ฃโ•–โ•—โ•ฃโ•ฃโ•ฃโ•— โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•– โ•ฃโ•ฃโ•ฃ\n", + " โ•ฌโ•ฌโ•ฌโ•ฌโ” โ•™โ•ฌโ•ฌโ•ฌโ•ฌโ”‚โ•“โ•ฃโ•ฃโ•ฃโ•โ•œ โ•ซโ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฃโ•™ โ•™โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃ โ•™โ•Ÿโ•ฃโ•ฃโ•œโ•™ โ•ซโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃ\n", + " โ•ฌโ•ฌโ•ฌโ•ฌโ” โ•™โ•ฌโ•ฌโ•ฃโ•ฃ โ•ซโ•ฃโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•Ÿโ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃโ”Œโ•ฃโ•ฃโ•œ\n", + " โ•ฌโ•ฌโ•ฌโ•œ โ•ฌโ•ฌโ•ฃโ•ฃ โ•™โ•โ•ฃโ•ฃโ•ฌ โ•™โ•ฃโ•ฃโ•ฃโ•—โ•–โ•“โ•—โ•ฃโ•ฃโ•ฃโ•œ โ•Ÿโ•ฃโ•ฃโ•ฌ โ•ฃโ•ฃโ•ฃ โ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฆโ•“ โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃ\n", + " โ•™ โ•“โ•ฆโ•– โ•ฌโ•ฌโ•ฃโ•ฃ โ•“โ•—โ•—โ•– โ•™โ•โ•ฃโ•ฃโ•ฃโ•ฃโ•โ•œ โ•˜โ•โ•โ•œ โ•โ•โ• โ•โ•โ• โ•™โ•ฃโ•ฃโ•ฃ โ•Ÿโ•ฃโ•ฃโ•ฃ\n", + " โ•ฉโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฆโ•ฆโ•ฌโ•ฌโ•ฃโ•ฃโ•—โ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ• โ•ซโ•ฃโ•ฃโ•ฃโ•ฃ\n", + " โ•™โ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฌโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•ฃโ•โ•œ\n", + " โ•™โ•ฌโ•ฌโ•ฌโ•ฃโ•ฃโ•ฃโ•œ\n", + " โ•™\n", + " \n", + " Version information:\n", + " ml-agents: 1.1.0.dev0,\n", + " ml-agents-envs: 1.1.0.dev0,\n", + " Communicator API: 1.5.0,\n", + " PyTorch: 2.1.0+cu121\n", + "[INFO] Connected to Unity environment with package version 2.3.0-exp.3 and communication version 1.5.0\n", + "[INFO] Connected new brain: SoccerTwos?team=1\n", + "[INFO] Connected new brain: SoccerTwos?team=0\n", + "[WARNING] Deleting TensorBoard data events.out.tfevents.1698236060.adminuser-System-Product-Name.10733.0 that was left over from a previous run.\n", + "[INFO] Hyperparameters for behavior name SoccerTwos: \n", + "\ttrainer_type:\tpoca\n", + "\thyperparameters:\t\n", + "\t batch_size:\t2048\n", + "\t buffer_size:\t204800\n", + "\t learning_rate:\t0.0003\n", + "\t beta:\t0.005\n", + "\t epsilon:\t0.2\n", + "\t lambd:\t0.95\n", + "\t num_epoch:\t3\n", + "\t learning_rate_schedule:\tconstant\n", + "\t beta_schedule:\tconstant\n", + "\t epsilon_schedule:\tconstant\n", + "\tcheckpoint_interval:\t500000\n", + "\tnetwork_settings:\t\n", + "\t normalize:\tFalse\n", + "\t hidden_units:\t512\n", + "\t num_layers:\t3\n", + "\t vis_encode_type:\tsimple\n", + "\t memory:\tNone\n", + "\t goal_conditioning_type:\thyper\n", + "\t deterministic:\tFalse\n", + "\treward_signals:\t\n", + "\t extrinsic:\t\n", + "\t gamma:\t0.99\n", + "\t strength:\t1.0\n", + "\t network_settings:\t\n", + "\t normalize:\tFalse\n", + "\t hidden_units:\t128\n", + "\t num_layers:\t2\n", + "\t vis_encode_type:\tsimple\n", + "\t memory:\tNone\n", + "\t goal_conditioning_type:\thyper\n", + "\t deterministic:\tFalse\n", + "\tinit_path:\tNone\n", + "\tkeep_checkpoints:\t5\n", + "\teven_checkpoints:\tFalse\n", + "\tmax_steps:\t5000000\n", + "\ttime_horizon:\t1000\n", + "\tsummary_freq:\t10000\n", + "\tthreaded:\tFalse\n", + "\tself_play:\t\n", + "\t save_steps:\t50000\n", + "\t team_change:\t200000\n", + "\t swap_steps:\t2000\n", + "\t window:\t10\n", + "\t play_against_latest_model_ratio:\t0.5\n", + "\t initial_elo:\t1200.0\n", + "\tbehavioral_cloning:\tNone\n", + "/home/jakewalker/Projects/deeprl-course/ml-agents/ml-agents/mlagents/trainers/torch_entities/utils.py:289: UserWarning: The use of `x.T` on tensors of dimension other than 2 to reverse their shape is deprecated and it will throw an error in a future release. Consider `x.mT` to transpose batches of matrices or `x.permute(*torch.arange(x.ndim - 1, -1, -1))` to reverse the dimensions of a tensor. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:3614.)\n", + " torch.nn.functional.one_hot(_act.T, action_size[i]).float()\n", + "[INFO] SoccerTwos. Step: 10000. Time Elapsed: 20.758 s. Mean Reward: 0.000. Mean Group Reward: 0.026. Training. ELO: 1201.067.\n" + ] + } + ], + "source": [ + "!chmod +x ./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos.x86_64\n", + "!mlagents-learn ./ml-agents/config/poca/SoccerTwos.yaml --env=./ml-agents/training-envs-executables/SoccerTwos/SoccerTwos.x86_64 --run-id=\"SoccerTwos\" --no-graphics" + ] + }, + { + "cell_type": "markdown", + "id": "ee052048-4fe5-4c54-a01b-7c9e1c5baac4", + "metadata": {}, + "source": [ + "## Step 5. Push the agent to the Hugging Face Hub" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e037b3bf-3160-448e-a4ca-2a6d95c463ec", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] This function will create a model card and upload your SoccerTwos into HuggingFace Hub. This is a work in progress: If you encounter a bug, please send open an issue\n", + "[INFO] Pushing repo SoccerTwos to the Hugging Face Hub\n", + "SoccerTwos-3499096.onnx: 0%| | 0.00/1.77M [00:00