From cb1f94adc14e7cefc5f81305aca482a6fd9c9d05 Mon Sep 17 00:00:00 2001
From: Jake Walker <hi@jakew.me>
Date: Sun, 19 May 2024 13:22:15 +0100
Subject: [PATCH] Add RL notebooks and example good agent

---
 .gitignore                                |   2 +
 2_reinforcement_learning.ipynb            | 433 +++++++++++++++++++++
 2a_reinforcement_learning_solutions.ipynb | 446 ++++++++++++++++++++++
 poetry.lock                               | 284 +++++++++++++-
 ppo-LunarLander-v2-good.zip               |   3 +
 pyproject.toml                            |   4 +-
 6 files changed, 1167 insertions(+), 5 deletions(-)
 create mode 100644 2_reinforcement_learning.ipynb
 create mode 100644 2a_reinforcement_learning_solutions.ipynb
 create mode 100644 ppo-LunarLander-v2-good.zip

diff --git a/.gitignore b/.gitignore
index 59efbd8..f3a7405 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 cifar-10-batches-py
+Unit1 - Intro.ipynb
+lunar_lander.mp4
diff --git a/2_reinforcement_learning.ipynb b/2_reinforcement_learning.ipynb
new file mode 100644
index 0000000..e0878f3
--- /dev/null
+++ b/2_reinforcement_learning.ipynb
@@ -0,0 +1,433 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "njb_ProuHiOe"
+   },
+   "source": [
+    "# Intro to Reinforcement Learning\n",
+    "\n",
+    "This notebook is modified from [Unit 1 of Hugging Face's Deep RL course](https://github.com/huggingface/deep-rl-class/blob/main/notebooks/unit1/unit1.ipynb), it has been simplified."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "wrgpVFqyENVf"
+   },
+   "source": [
+    "## Import the packages 📦"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "cygWLPGsEQ0m"
+   },
+   "outputs": [],
+   "source": [
+    "from stable_baselines3 import PPO\n",
+    "from stable_baselines3.common.env_util import make_vec_env\n",
+    "from stable_baselines3.common.evaluation import evaluate_policy\n",
+    "from stable_baselines3.common.monitor import Monitor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "-TzNN0bQ_j-3"
+   },
+   "source": [
+    "At each step:\n",
+    "- Our Agent receives a **state (S0)** from the **Environment** — we receive the first frame of our game (Environment).\n",
+    "- Based on that **state (S0),** the Agent takes an **action (A0)** — our Agent will move to the right.\n",
+    "- The environment transitions to a **new** **state (S1)** — new frame.\n",
+    "- The environment gives some **reward (R1)** to the Agent — we’re not dead *(Positive Reward +1)*.\n",
+    "\n",
+    "\n",
+    "With Gymnasium:\n",
+    "\n",
+    "1️⃣ We create our environment using `gymnasium.make()`\n",
+    "\n",
+    "2️⃣ We reset the environment to its initial state with `observation = env.reset()`\n",
+    "\n",
+    "At each step:\n",
+    "\n",
+    "3️⃣ Get an action using our model (in our example we take a random action)\n",
+    "\n",
+    "4️⃣ Using `env.step(action)`, we perform this action in the environment and get\n",
+    "- `observation`: The new state (st+1)\n",
+    "- `reward`: The reward we get after executing the action\n",
+    "- `terminated`: Indicates if the episode terminated (agent reach the terminal state)\n",
+    "- `truncated`: Introduced with this new version, it indicates a timelimit or if an agent go out of bounds of the environment for instance.\n",
+    "- `info`: A dictionary that provides additional information (depends on the environment).\n",
+    "\n",
+    "For more explanations check this 👉 https://gymnasium.farama.org/api/env/#gymnasium.Env.step\n",
+    "\n",
+    "If the episode is terminated:\n",
+    "- We reset the environment to its initial state with `observation = env.reset()`\n",
+    "\n",
+    "**Let's look at an example!** Make sure to read the code\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "w7vOFlpA_ONz"
+   },
+   "outputs": [],
+   "source": [
+    "import gymnasium as gym\n",
+    "\n",
+    "# First, we create our environment called LunarLander-v2\n",
+    "env = gym.make(\"LunarLander-v2\")\n",
+    "\n",
+    "# Then we reset this environment\n",
+    "observation, info = env.reset()\n",
+    "\n",
+    "for _ in range(20):\n",
+    "    # Take a random action\n",
+    "    action = env.action_space.sample()\n",
+    "    print(\"Action taken:\", action)\n",
+    "\n",
+    "    # Do this action in the environment and get\n",
+    "    # next_state, reward, terminated, truncated and info\n",
+    "    observation, reward, terminated, truncated, info = env.step(action)\n",
+    "\n",
+    "    # If the game is terminated (in our case we land, crashed) or truncated (timeout)\n",
+    "    if terminated or truncated:\n",
+    "        # Reset the environment\n",
+    "        print(\"Environment is reset\")\n",
+    "        observation, info = env.reset()\n",
+    "\n",
+    "env.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "XIrKGGSlENZB"
+   },
+   "source": [
+    "## Create the LunarLander environment 🌛 and understand how it works\n",
+    "\n",
+    "### [The environment 🎮](https://gymnasium.farama.org/environments/box2d/lunar_lander/)\n",
+    "\n",
+    "In this first tutorial, we’re going to train our agent, a [Lunar Lander](https://gymnasium.farama.org/environments/box2d/lunar_lander/), **to land correctly on the moon**. To do that, the agent needs to learn **to adapt its speed and position (horizontal, vertical, and angular) to land correctly.**\n",
+    "\n",
+    "---\n",
+    "\n",
+    "\n",
+    "💡 A good habit when you start to use an environment is to check its documentation\n",
+    "\n",
+    "👉 https://gymnasium.farama.org/environments/box2d/lunar_lander/\n",
+    "\n",
+    "---\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "poLBgRocF9aT"
+   },
+   "source": [
+    "Let's see what the Environment looks like:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "ZNPG0g_UGCfh"
+   },
+   "outputs": [],
+   "source": [
+    "# We create our environment with gym.make(\"<name_of_the_environment>\")\n",
+    "env = gym.make(\"LunarLander-v2\")\n",
+    "env.reset()\n",
+    "print(\"_____OBSERVATION SPACE_____ \\n\")\n",
+    "print(\"Observation Space Shape\", env.observation_space.shape)\n",
+    "print(\"Sample observation\", env.observation_space.sample())  # Get a random observation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "2MXc15qFE0M9"
+   },
+   "source": [
+    "We see with `Observation Space Shape (8,)` that the observation is a vector of size 8, where each value contains different information about the lander:\n",
+    "- Horizontal pad coordinate (x)\n",
+    "- Vertical pad coordinate (y)\n",
+    "- Horizontal speed (x)\n",
+    "- Vertical speed (y)\n",
+    "- Angle\n",
+    "- Angular speed\n",
+    "- If the left leg contact point has touched the land (boolean)\n",
+    "- If the right leg contact point has touched the land (boolean)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "We5WqOBGLoSm"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"\\n _____ACTION SPACE_____ \\n\")\n",
+    "print(\"Action Space Shape\", env.action_space.n)\n",
+    "print(\"Action Space Sample\", env.action_space.sample())  # Take a random action"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "MyxXwkI2Magx"
+   },
+   "source": [
+    "The action space (the set of possible actions the agent can take) is discrete with 4 actions available 🎮:\n",
+    "\n",
+    "- Action 0: Do nothing,\n",
+    "- Action 1: Fire left orientation engine,\n",
+    "- Action 2: Fire the main engine,\n",
+    "- Action 3: Fire right orientation engine.\n",
+    "\n",
+    "Reward function (the function that will gives a reward at each timestep) 💰:\n",
+    "\n",
+    "After every step a reward is granted. The total reward of an episode is the **sum of the rewards for all the steps within that episode**.\n",
+    "\n",
+    "For each step, the reward:\n",
+    "\n",
+    "- Is increased/decreased the closer/further the lander is to the landing pad.\n",
+    "-  Is increased/decreased the slower/faster the lander is moving.\n",
+    "- Is decreased the more the lander is tilted (angle not horizontal).\n",
+    "- Is increased by 10 points for each leg that is in contact with the ground.\n",
+    "- Is decreased by 0.03 points each frame a side engine is firing.\n",
+    "- Is decreased by 0.3 points each frame the main engine is firing.\n",
+    "\n",
+    "The episode receive an **additional reward of -100 or +100 points for crashing or landing safely respectively.**\n",
+    "\n",
+    "An episode is **considered a solution if it scores at least 200 points.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "dFD9RAFjG8aq"
+   },
+   "source": [
+    "#### Vectorized Environment\n",
+    "\n",
+    "- We create a vectorized environment (a method for stacking multiple independent environments into a single environment) of 16 environments, this way, **we'll have more diverse experiences during the training.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "99hqQ_etEy1N"
+   },
+   "outputs": [],
+   "source": [
+    "# Create the environment\n",
+    "env = make_vec_env(\"LunarLander-v2\", n_envs=16)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VgrE86r5E5IK"
+   },
+   "source": [
+    "## Create the Model 🤖\n",
+    "- We have studied our environment and we understood the problem: **being able to land the Lunar Lander to the Landing Pad correctly by controlling left, right and main orientation engine**. Now let's build the algorithm we're going to use to solve this Problem 🚀.\n",
+    "\n",
+    "- To do so, we're going to use our first Deep RL library, [Stable Baselines3 (SB3)](https://stable-baselines3.readthedocs.io/en/master/).\n",
+    "\n",
+    "- SB3 is a set of **reliable implementations of reinforcement learning algorithms in PyTorch**.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "💡 A good habit when using a new library is to dive first on the documentation: https://stable-baselines3.readthedocs.io/en/master/ and then try some tutorials.\n",
+    "\n",
+    "----"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "HLlClRW37Q7e"
+   },
+   "source": [
+    "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit1/sb3.png\" alt=\"Stable Baselines3\">"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "HV4yiUM_9_Ka"
+   },
+   "source": [
+    "To solve this problem, we're going to use SB3 **PPO**. [PPO (aka Proximal Policy Optimization) is one of the SOTA (state of the art) Deep Reinforcement Learning algorithms that you'll study during this course](https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#example%5D).\n",
+    "\n",
+    "PPO is a combination of:\n",
+    "- *Value-based reinforcement learning method*: learning an action-value function that will tell us the **most valuable action to take given a state and action**.\n",
+    "- *Policy-based reinforcement learning method*: learning a policy that will **give us a probability distribution over actions**."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "5qL_4HeIOrEJ"
+   },
+   "source": [
+    "Stable-Baselines3 is easy to set up:\n",
+    "\n",
+    "1️⃣ You **create your environment** (in our case it was done above)\n",
+    "\n",
+    "2️⃣ You define the **model you want to use and instantiate this model** `model = PPO(\"MlpPolicy\")`\n",
+    "\n",
+    "3️⃣ You **train the agent** with `model.learn` and define the number of training timesteps\n",
+    "\n",
+    "```\n",
+    "# Create environment\n",
+    "env = gym.make('LunarLander-v2')\n",
+    "\n",
+    "# Instantiate the agent\n",
+    "model = PPO('MlpPolicy', env, verbose=1)\n",
+    "# Train the agent\n",
+    "model.learn(total_timesteps=int(2e5))\n",
+    "```\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "nxI6hT1GE4-A"
+   },
+   "outputs": [],
+   "source": [
+    "# TODO: Define a PPO MlpPolicy architecture\n",
+    "# We use MultiLayerPerceptron (MLPPolicy) because the input is a vector,\n",
+    "# if we had frames as input we would use CnnPolicy\n",
+    "model ="
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ClJJk88yoBUi"
+   },
+   "source": [
+    "## Train the PPO agent 🏃\n",
+    "- Let's train our agent for 10,000 timesteps. You may want to increase it later on.\n",
+    "- During the training, take a ☕ break you deserved it 🤗"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "qKnYkNiVp89p"
+   },
+   "outputs": [],
+   "source": [
+    "# TODO: Train it for 10,000 timesteps\n",
+    "\n",
+    "# TODO: Specify file name for model and save the model to file\n",
+    "model_name = \"\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "BY_HuedOoISR"
+   },
+   "source": [
+    "## Evaluate the agent 📈\n",
+    "- Remember to wrap the environment in a [Monitor](https://stable-baselines3.readthedocs.io/en/master/common/monitor.html).\n",
+    "- Now that our Lunar Lander agent is trained 🚀, we need to **check its performance**.\n",
+    "- Stable-Baselines3 provides a method to do that: `evaluate_policy`.\n",
+    "- To fill that part you need to [check the documentation](https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#basic-usage-training-saving-loading)\n",
+    "- In the next step,  we'll see **how to automatically evaluate and share your agent to compete in a leaderboard, but for now let's do it ourselves**\n",
+    "\n",
+    "\n",
+    "💡 When you evaluate your agent, you should not use your training environment but create an evaluation environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "yRpno0glsADy"
+   },
+   "outputs": [],
+   "source": [
+    "# TODO: Evaluate the agent\n",
+    "# Create a new environment for evaluation\n",
+    "eval_env =\n",
+    "\n",
+    "# Evaluate the model with 10 evaluation episodes and deterministic=True\n",
+    "mean_reward, std_reward =\n",
+    "\n",
+    "# Print the results\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "reBhoODwcXfr"
+   },
+   "source": [
+    "- In my case, I got a mean reward is `200.20 +/- 20.80` after training for 1 million steps, which means that our lunar lander agent is ready to land on the moon 🌛🥳."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "BQAwLnYFPk-s"
+   },
+   "source": [
+    "## Some additional challenges 🏆\n",
+    "The best way to learn **is to try things by your own**! As you saw, the current agent is not doing great. As a first suggestion, you can train for more steps. With 1,000,000 steps, we saw some great results!\n",
+    "\n",
+    "Can you beat your neighbour's mean reward?\n",
+    "\n",
+    "Here are some ideas to achieve so:\n",
+    "* Train more steps\n",
+    "* Try different hyperparameters for `PPO`. You can see them at https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#parameters.\n",
+    "* Check the [Stable-Baselines3 documentation](https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html) and try another model such as DQN.\n",
+    "\n",
+    "Is moon landing too boring for you? Try to **change the environment**, why not use MountainCar-v0, CartPole-v1 or CarRacing-v0? Check how they work [using the gym documentation](https://www.gymlibrary.dev/) and have fun 🎉."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/2a_reinforcement_learning_solutions.ipynb b/2a_reinforcement_learning_solutions.ipynb
new file mode 100644
index 0000000..c0ccf51
--- /dev/null
+++ b/2a_reinforcement_learning_solutions.ipynb
@@ -0,0 +1,446 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "njb_ProuHiOe"
+   },
+   "source": [
+    "# Intro to Reinforcement Learning\n",
+    "\n",
+    "This notebook is modified from [Unit 1 of Hugging Face's Deep RL course](https://github.com/huggingface/deep-rl-class/blob/main/notebooks/unit1/unit1.ipynb), it has been simplified."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "wrgpVFqyENVf"
+   },
+   "source": [
+    "## Import the packages 📦"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "cygWLPGsEQ0m"
+   },
+   "outputs": [],
+   "source": [
+    "from stable_baselines3 import PPO\n",
+    "from stable_baselines3.common.env_util import make_vec_env\n",
+    "from stable_baselines3.common.evaluation import evaluate_policy\n",
+    "from stable_baselines3.common.monitor import Monitor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "-TzNN0bQ_j-3"
+   },
+   "source": [
+    "At each step:\n",
+    "- Our Agent receives a **state (S0)** from the **Environment** — we receive the first frame of our game (Environment).\n",
+    "- Based on that **state (S0),** the Agent takes an **action (A0)** — our Agent will move to the right.\n",
+    "- The environment transitions to a **new** **state (S1)** — new frame.\n",
+    "- The environment gives some **reward (R1)** to the Agent — we’re not dead *(Positive Reward +1)*.\n",
+    "\n",
+    "\n",
+    "With Gymnasium:\n",
+    "\n",
+    "1️⃣ We create our environment using `gymnasium.make()`\n",
+    "\n",
+    "2️⃣ We reset the environment to its initial state with `observation = env.reset()`\n",
+    "\n",
+    "At each step:\n",
+    "\n",
+    "3️⃣ Get an action using our model (in our example we take a random action)\n",
+    "\n",
+    "4️⃣ Using `env.step(action)`, we perform this action in the environment and get\n",
+    "- `observation`: The new state (st+1)\n",
+    "- `reward`: The reward we get after executing the action\n",
+    "- `terminated`: Indicates if the episode terminated (agent reach the terminal state)\n",
+    "- `truncated`: Introduced with this new version, it indicates a timelimit or if an agent go out of bounds of the environment for instance.\n",
+    "- `info`: A dictionary that provides additional information (depends on the environment).\n",
+    "\n",
+    "For more explanations check this 👉 https://gymnasium.farama.org/api/env/#gymnasium.Env.step\n",
+    "\n",
+    "If the episode is terminated:\n",
+    "- We reset the environment to its initial state with `observation = env.reset()`\n",
+    "\n",
+    "**Let's look at an example!** Make sure to read the code\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "w7vOFlpA_ONz"
+   },
+   "outputs": [],
+   "source": [
+    "import gymnasium as gym\n",
+    "\n",
+    "# First, we create our environment called LunarLander-v2\n",
+    "env = gym.make(\"LunarLander-v2\")\n",
+    "\n",
+    "# Then we reset this environment\n",
+    "observation, info = env.reset()\n",
+    "\n",
+    "for _ in range(20):\n",
+    "    # Take a random action\n",
+    "    action = env.action_space.sample()\n",
+    "    print(\"Action taken:\", action)\n",
+    "\n",
+    "    # Do this action in the environment and get\n",
+    "    # next_state, reward, terminated, truncated and info\n",
+    "    observation, reward, terminated, truncated, info = env.step(action)\n",
+    "\n",
+    "    # If the game is terminated (in our case we land, crashed) or truncated (timeout)\n",
+    "    if terminated or truncated:\n",
+    "        # Reset the environment\n",
+    "        print(\"Environment is reset\")\n",
+    "        observation, info = env.reset()\n",
+    "\n",
+    "env.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "XIrKGGSlENZB"
+   },
+   "source": [
+    "## Create the LunarLander environment 🌛 and understand how it works\n",
+    "\n",
+    "### [The environment 🎮](https://gymnasium.farama.org/environments/box2d/lunar_lander/)\n",
+    "\n",
+    "In this first tutorial, we’re going to train our agent, a [Lunar Lander](https://gymnasium.farama.org/environments/box2d/lunar_lander/), **to land correctly on the moon**. To do that, the agent needs to learn **to adapt its speed and position (horizontal, vertical, and angular) to land correctly.**\n",
+    "\n",
+    "---\n",
+    "\n",
+    "\n",
+    "💡 A good habit when you start to use an environment is to check its documentation\n",
+    "\n",
+    "👉 https://gymnasium.farama.org/environments/box2d/lunar_lander/\n",
+    "\n",
+    "---\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "poLBgRocF9aT"
+   },
+   "source": [
+    "Let's see what the Environment looks like:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "ZNPG0g_UGCfh"
+   },
+   "outputs": [],
+   "source": [
+    "# We create our environment with gym.make(\"<name_of_the_environment>\")\n",
+    "env = gym.make(\"LunarLander-v2\")\n",
+    "env.reset()\n",
+    "print(\"_____OBSERVATION SPACE_____ \\n\")\n",
+    "print(\"Observation Space Shape\", env.observation_space.shape)\n",
+    "print(\"Sample observation\", env.observation_space.sample())  # Get a random observation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "2MXc15qFE0M9"
+   },
+   "source": [
+    "We see with `Observation Space Shape (8,)` that the observation is a vector of size 8, where each value contains different information about the lander:\n",
+    "- Horizontal pad coordinate (x)\n",
+    "- Vertical pad coordinate (y)\n",
+    "- Horizontal speed (x)\n",
+    "- Vertical speed (y)\n",
+    "- Angle\n",
+    "- Angular speed\n",
+    "- If the left leg contact point has touched the land (boolean)\n",
+    "- If the right leg contact point has touched the land (boolean)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "We5WqOBGLoSm"
+   },
+   "outputs": [],
+   "source": [
+    "print(\"\\n _____ACTION SPACE_____ \\n\")\n",
+    "print(\"Action Space Shape\", env.action_space.n)\n",
+    "print(\"Action Space Sample\", env.action_space.sample())  # Take a random action"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "MyxXwkI2Magx"
+   },
+   "source": [
+    "The action space (the set of possible actions the agent can take) is discrete with 4 actions available 🎮:\n",
+    "\n",
+    "- Action 0: Do nothing,\n",
+    "- Action 1: Fire left orientation engine,\n",
+    "- Action 2: Fire the main engine,\n",
+    "- Action 3: Fire right orientation engine.\n",
+    "\n",
+    "Reward function (the function that will gives a reward at each timestep) 💰:\n",
+    "\n",
+    "After every step a reward is granted. The total reward of an episode is the **sum of the rewards for all the steps within that episode**.\n",
+    "\n",
+    "For each step, the reward:\n",
+    "\n",
+    "- Is increased/decreased the closer/further the lander is to the landing pad.\n",
+    "-  Is increased/decreased the slower/faster the lander is moving.\n",
+    "- Is decreased the more the lander is tilted (angle not horizontal).\n",
+    "- Is increased by 10 points for each leg that is in contact with the ground.\n",
+    "- Is decreased by 0.03 points each frame a side engine is firing.\n",
+    "- Is decreased by 0.3 points each frame the main engine is firing.\n",
+    "\n",
+    "The episode receive an **additional reward of -100 or +100 points for crashing or landing safely respectively.**\n",
+    "\n",
+    "An episode is **considered a solution if it scores at least 200 points.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "dFD9RAFjG8aq"
+   },
+   "source": [
+    "#### Vectorized Environment\n",
+    "\n",
+    "- We create a vectorized environment (a method for stacking multiple independent environments into a single environment) of 16 environments, this way, **we'll have more diverse experiences during the training.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "99hqQ_etEy1N"
+   },
+   "outputs": [],
+   "source": [
+    "# Create the environment\n",
+    "env = make_vec_env(\"LunarLander-v2\", n_envs=16)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "VgrE86r5E5IK"
+   },
+   "source": [
+    "## Create the Model 🤖\n",
+    "- We have studied our environment and we understood the problem: **being able to land the Lunar Lander to the Landing Pad correctly by controlling left, right and main orientation engine**. Now let's build the algorithm we're going to use to solve this Problem 🚀.\n",
+    "\n",
+    "- To do so, we're going to use our first Deep RL library, [Stable Baselines3 (SB3)](https://stable-baselines3.readthedocs.io/en/master/).\n",
+    "\n",
+    "- SB3 is a set of **reliable implementations of reinforcement learning algorithms in PyTorch**.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "💡 A good habit when using a new library is to dive first on the documentation: https://stable-baselines3.readthedocs.io/en/master/ and then try some tutorials.\n",
+    "\n",
+    "----"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "HLlClRW37Q7e"
+   },
+   "source": [
+    "<img src=\"https://huggingface.co/datasets/huggingface-deep-rl-course/course-images/resolve/main/en/unit1/sb3.png\" alt=\"Stable Baselines3\">"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "HV4yiUM_9_Ka"
+   },
+   "source": [
+    "To solve this problem, we're going to use SB3 **PPO**. [PPO (aka Proximal Policy Optimization) is one of the SOTA (state of the art) Deep Reinforcement Learning algorithms that you'll study during this course](https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#example%5D).\n",
+    "\n",
+    "PPO is a combination of:\n",
+    "- *Value-based reinforcement learning method*: learning an action-value function that will tell us the **most valuable action to take given a state and action**.\n",
+    "- *Policy-based reinforcement learning method*: learning a policy that will **give us a probability distribution over actions**."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "5qL_4HeIOrEJ"
+   },
+   "source": [
+    "Stable-Baselines3 is easy to set up:\n",
+    "\n",
+    "1️⃣ You **create your environment** (in our case it was done above)\n",
+    "\n",
+    "2️⃣ You define the **model you want to use and instantiate this model** `model = PPO(\"MlpPolicy\")`\n",
+    "\n",
+    "3️⃣ You **train the agent** with `model.learn` and define the number of training timesteps\n",
+    "\n",
+    "```\n",
+    "# Create environment\n",
+    "env = gym.make('LunarLander-v2')\n",
+    "\n",
+    "# Instantiate the agent\n",
+    "model = PPO('MlpPolicy', env, verbose=1)\n",
+    "# Train the agent\n",
+    "model.learn(total_timesteps=int(2e5))\n",
+    "```\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "nxI6hT1GE4-A"
+   },
+   "outputs": [],
+   "source": [
+    "# We use MultiLayerPerceptron (MLPPolicy) because the input is a vector,\n",
+    "# if we had frames as input we would use CnnPolicy\n",
+    "model = PPO(\n",
+    "    policy=\"MlpPolicy\",\n",
+    "    env=env,\n",
+    "    n_steps=1024,\n",
+    "    batch_size=64,\n",
+    "    n_epochs=4,\n",
+    "    gamma=0.999,\n",
+    "    gae_lambda=0.98,\n",
+    "    ent_coef=0.01,\n",
+    "    verbose=1,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ClJJk88yoBUi"
+   },
+   "source": [
+    "## Train the PPO agent 🏃\n",
+    "- Let's train our agent for 1,000,000 timesteps, don't forget to use GPU on Colab. It will take approximately ~20min, but you can use fewer timesteps if you just want to try it out.\n",
+    "- During the training, take a ☕ break you deserved it 🤗"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "qKnYkNiVp89p"
+   },
+   "outputs": [],
+   "source": [
+    "# TODO: Train it for 10,000 timesteps\n",
+    "model.learn(total_timesteps=5_000_000)\n",
+    "\n",
+    "# TODO: Specify file name for model and save the model to file\n",
+    "model_name = \"ppo-LunarLander-v2\"\n",
+    "model.save(model_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "BY_HuedOoISR"
+   },
+   "source": [
+    "## Evaluate the agent 📈\n",
+    "- Remember to wrap the environment in a [Monitor](https://stable-baselines3.readthedocs.io/en/master/common/monitor.html).\n",
+    "- Now that our Lunar Lander agent is trained 🚀, we need to **check its performance**.\n",
+    "- Stable-Baselines3 provides a method to do that: `evaluate_policy`.\n",
+    "- To fill that part you need to [check the documentation](https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#basic-usage-training-saving-loading)\n",
+    "- In the next step,  we'll see **how to automatically evaluate and share your agent to compete in a leaderboard, but for now let's do it ourselves**\n",
+    "\n",
+    "\n",
+    "💡 When you evaluate your agent, you should not use your training environment but create an evaluation environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "yRpno0glsADy"
+   },
+   "outputs": [],
+   "source": [
+    "# TODO: Evaluate the agent\n",
+    "# Create a new environment for evaluation\n",
+    "eval_env = Monitor(gym.make(\"LunarLander-v2\"))\n",
+    "\n",
+    "# Evaluate the model with 10 evaluation episodes and deterministic=True\n",
+    "mean_reward, std_reward = evaluate_policy(\n",
+    "    model, eval_env, n_eval_episodes=10, deterministic=True\n",
+    ")\n",
+    "\n",
+    "# Print the results\n",
+    "print(f\"mean_reward={mean_reward:.2f} +/- {std_reward}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "reBhoODwcXfr"
+   },
+   "source": [
+    "- In my case, I got a mean reward is `200.20 +/- 20.80` after training for 1 million steps, which means that our lunar lander agent is ready to land on the moon 🌛🥳."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "BQAwLnYFPk-s"
+   },
+   "source": [
+    "## Some additional challenges 🏆\n",
+    "The best way to learn **is to try things by your own**! As you saw, the current agent is not doing great. As a first suggestion, you can train for more steps. With 1,000,000 steps, we saw some great results!\n",
+    "\n",
+    "Can you beat your neighbour's mean reward?\n",
+    "\n",
+    "Here are some ideas to achieve so:\n",
+    "* Train more steps\n",
+    "* Try different hyperparameters for `PPO`. You can see them at https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html#parameters.\n",
+    "* Check the [Stable-Baselines3 documentation](https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html) and try another model such as DQN.\n",
+    "\n",
+    "Is moon landing too boring for you? Try to **change the environment**, why not use MountainCar-v0, CartPole-v1 or CarRacing-v0? Check how they work [using the gym documentation](https://www.gymlibrary.dev/) and have fun 🎉."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/poetry.lock b/poetry.lock
index 96ede6b..11151de 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -11,6 +11,43 @@ files = [
     {file = "absl_py-2.1.0-py3-none-any.whl", hash = "sha256:526a04eadab8b4ee719ce68f204172ead1027549089702d99b9059f129ff1308"},
 ]
 
+[[package]]
+name = "ale-py"
+version = "0.8.1"
+description = "The Arcade Learning Environment (ALE) - a platform for AI research."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "ale_py-0.8.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:b2aa2f69a4169742800615970efe6914fa856e33eaf7fa9133c0e06a617a80e2"},
+    {file = "ale_py-0.8.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f2f6b92c8fd6189654979bbf0b305dbe0ecf82176c47f244d8c1cbc36286b89"},
+    {file = "ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9b168eb88c87d0f3e2a778e6c5cdde4ad951d1ca8a6dc3d3679fd45398df7d1"},
+    {file = "ale_py-0.8.1-cp310-cp310-win_amd64.whl", hash = "sha256:5fcc31f495de79ee1d6bfc0f4b7c4619948851e679bbf010035e25f23146a687"},
+    {file = "ale_py-0.8.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:0856ca777473ec4ae8a59f3af9580259adb0fd4a47d586a125a440c62e82fc10"},
+    {file = "ale_py-0.8.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f10b1df8774bbe3b00365748b5e0e07cf35f6a703bbaff991bc7b3b2247dccc9"},
+    {file = "ale_py-0.8.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0006d80dfe7745eb5a93444492337203c8bc7eb594a2c24c6a651c5c5b0eaf09"},
+    {file = "ale_py-0.8.1-cp311-cp311-win_amd64.whl", hash = "sha256:9773eea7505484e024beb2fff0f3bfd363db151bdb9799d70995448e196b1ded"},
+    {file = "ale_py-0.8.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:87557db05be0e04130e2ec1bf909d3bb0b0bc034645d4f664e6baa573fe32191"},
+    {file = "ale_py-0.8.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae2ba24557e0ce541ea3be13b148db2a9cfa730d83537b4cbed5e10449826e51"},
+    {file = "ale_py-0.8.1-cp37-cp37m-win_amd64.whl", hash = "sha256:ade5c32af567629164a6b49378978c728a15dc4db07ad6b679e8832d4fd3ea1f"},
+    {file = "ale_py-0.8.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:0ffecb5c956749596030e464827642945162170a132d093c3d4fa2d7e5725c18"},
+    {file = "ale_py-0.8.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7cd74b7ee0248ef11a086c9764e142e71defd40ec8989a99232bfd2d9e8023be"},
+    {file = "ale_py-0.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eadf9f3990b4ff2f9e5ca35889f5e2e95cddd6a353d9d857d9b4601a6e1c4e7c"},
+    {file = "ale_py-0.8.1-cp38-cp38-win_amd64.whl", hash = "sha256:817adf9a3a82c4923c731e634520a5ecf296aca0367f5c69959a96b32119d831"},
+    {file = "ale_py-0.8.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:2d9fcfa06c74a613c5419e942ef4d3e0959533f52e94d2d4bda61d07fbfffeee"},
+    {file = "ale_py-0.8.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f278036f9b6066062abcdf0987a0ec5a8e0f22a2c7cfac925e39378d4343d490"},
+    {file = "ale_py-0.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b00f74e27815131c1a2791f3d48114363fa2708e19f09ce6b7b614cb14c9d469"},
+    {file = "ale_py-0.8.1-cp39-cp39-win_amd64.whl", hash = "sha256:d49b550a2d9c25b63c343aa680fd81f253a3714cdc0e1835640933ebff1798ff"},
+]
+
+[package.dependencies]
+importlib-metadata = {version = ">=4.10.0", markers = "python_version < \"3.10\""}
+importlib-resources = "*"
+numpy = "*"
+typing-extensions = {version = "*", markers = "python_version < \"3.11\""}
+
+[package.extras]
+test = ["gym (>=0.23,<1.0)", "pytest (>=7.0)"]
+
 [[package]]
 name = "anyio"
 version = "4.3.0"
@@ -186,6 +223,42 @@ tests = ["attrs[tests-no-zope]", "zope-interface"]
 tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"]
 tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"]
 
+[[package]]
+name = "autorom"
+version = "0.6.1"
+description = "Automated installation of Atari ROMs for Gym/ALE-Py"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "AutoROM-0.6.1-py3-none-any.whl", hash = "sha256:e734fdad23dc8e48897de013803eba3c9e109e028d5463a4817346f7f669604f"},
+    {file = "AutoROM-0.6.1.tar.gz", hash = "sha256:6eff1f1b96a9d519577437f71d96a8d3b896238eca3433a8e69c5c92f6de3231"},
+]
+
+[package.dependencies]
+"AutoROM.accept-rom-license" = {version = "*", optional = true, markers = "extra == \"accept-rom-license\""}
+click = "*"
+requests = "*"
+
+[package.extras]
+accept-rom-license = ["AutoROM.accept-rom-license"]
+
+[[package]]
+name = "autorom-accept-rom-license"
+version = "0.6.1"
+description = "Automated installation of Atari ROMs for Gym/ALE-Py"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "AutoROM.accept-rom-license-0.6.1.tar.gz", hash = "sha256:0c905a708d634a076f686802f672817d3585259ce3be0bde8713a4fb59e3159e"},
+]
+
+[package.dependencies]
+click = "*"
+requests = "*"
+
+[package.extras]
+tests = ["ale_py", "multi_agent_ale_py"]
+
 [[package]]
 name = "babel"
 version = "2.15.0"
@@ -239,6 +312,20 @@ webencodings = "*"
 [package.extras]
 css = ["tinycss2 (>=1.1.0,<1.3)"]
 
+[[package]]
+name = "box2d-py"
+version = "2.3.5"
+description = "Python Box2D"
+optional = false
+python-versions = "*"
+files = [
+    {file = "box2d-py-2.3.5.tar.gz", hash = "sha256:b37dc38844bcd7def48a97111d2b082e4f81cca3cece7460feb3eacda0da2207"},
+    {file = "box2d_py-2.3.5-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:287aa54005c0644b47bf7ad72966e4068d66e56bcf8458f5b4a653ffe42a2618"},
+    {file = "box2d_py-2.3.5-cp36-cp36m-macosx_10_13_x86_64.whl", hash = "sha256:483b3f9acd5d156b72bf2013f93cf7f8ca0ee1562e43d2353ab4c0cbec4ee49a"},
+    {file = "box2d_py-2.3.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:a294c2d7cc73cc05dd491287079e15419eb98caa3158df94f40faf85eeb4b6e9"},
+    {file = "box2d_py-2.3.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:0d46068eb8d29e366ed698ab2a4833d4d2d34ed035ebd6a685888007dda05f64"},
+]
+
 [[package]]
 name = "certifi"
 version = "2024.2.2"
@@ -413,6 +500,20 @@ files = [
     {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
 ]
 
+[[package]]
+name = "click"
+version = "8.1.7"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
+    {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
 [[package]]
 name = "cloudpickle"
 version = "3.0.0"
@@ -873,10 +974,13 @@ files = [
 ]
 
 [package.dependencies]
+box2d-py = {version = "2.3.5", optional = true, markers = "extra == \"box2d\""}
 cloudpickle = ">=1.2.0"
 farama-notifications = ">=0.0.1"
 importlib-metadata = {version = ">=4.8.0", markers = "python_version < \"3.10\""}
 numpy = ">=1.21.0"
+pygame = {version = ">=2.1.3", optional = true, markers = "extra == \"box2d\""}
+swig = {version = "==4.*", optional = true, markers = "extra == \"box2d\""}
 typing-extensions = ">=4.3.0"
 
 [package.extras]
@@ -1885,9 +1989,9 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.23.3", markers = "python_version >= \"3.11\""},
     {version = ">=1.21.2", markers = "python_version >= \"3.10\" and python_version < \"3.11\""},
     {version = ">1.20", markers = "python_version < \"3.10\""},
+    {version = ">=1.23.3", markers = "python_version >= \"3.11\""},
 ]
 
 [package.extras]
@@ -2235,6 +2339,31 @@ files = [
     {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"},
 ]
 
+[[package]]
+name = "opencv-python"
+version = "4.9.0.80"
+description = "Wrapper package for OpenCV python bindings."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "opencv-python-4.9.0.80.tar.gz", hash = "sha256:1a9f0e6267de3a1a1db0c54213d022c7c8b5b9ca4b580e80bdc58516c922c9e1"},
+    {file = "opencv_python-4.9.0.80-cp37-abi3-macosx_10_16_x86_64.whl", hash = "sha256:7e5f7aa4486651a6ebfa8ed4b594b65bd2d2f41beeb4241a3e4b1b85acbbbadb"},
+    {file = "opencv_python-4.9.0.80-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71dfb9555ccccdd77305fc3dcca5897fbf0cf28b297c51ee55e079c065d812a3"},
+    {file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b34a52e9da36dda8c151c6394aed602e4b17fa041df0b9f5b93ae10b0fcca2a"},
+    {file = "opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4088cab82b66a3b37ffc452976b14a3c599269c247895ae9ceb4066d8188a57"},
+    {file = "opencv_python-4.9.0.80-cp37-abi3-win32.whl", hash = "sha256:dcf000c36dd1651118a2462257e3a9e76db789a78432e1f303c7bac54f63ef6c"},
+    {file = "opencv_python-4.9.0.80-cp37-abi3-win_amd64.whl", hash = "sha256:3f16f08e02b2a2da44259c7cc712e779eff1dd8b55fdb0323e8cab09548086c0"},
+]
+
+[package.dependencies]
+numpy = [
+    {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
+    {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
+    {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
+    {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
+    {version = ">=1.23.5", markers = "python_version >= \"3.11\""},
+]
+
 [[package]]
 name = "opt-einsum"
 version = "3.3.0"
@@ -2373,8 +2502,8 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
     {version = ">=1.22.4", markers = "python_version < \"3.11\""},
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@@ -2659,6 +2788,72 @@ files = [
     {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
 ]
 
+[[package]]
+name = "pygame"
+version = "2.5.2"
+description = "Python Game Development"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "pygame-2.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a0769eb628c818761755eb0a0ca8216b95270ea8cbcbc82227e39ac9644643da"},
+    {file = "pygame-2.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed9a3d98adafa0805ccbaaff5d2996a2b5795381285d8437a4a5d248dbd12b4a"},
+    {file = "pygame-2.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f30d1618672a55e8c6669281ba264464b3ab563158e40d89e8c8b3faa0febebd"},
+    {file = "pygame-2.5.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39690e9be9baf58b7359d1f3b2336e1fd6f92fedbbce42987be5df27f8d30718"},
+    {file = "pygame-2.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03879ec299c9f4ba23901b2649a96b2143f0a5d787f0b6c39469989e2320caf1"},
+    {file = "pygame-2.5.2-cp310-cp310-win32.whl", hash = "sha256:74e1d6284100e294f445832e6f6343be4fe4748decc4f8a51131ae197dae8584"},
+    {file = "pygame-2.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:485239c7d32265fd35b76ae8f64f34b0637ae11e69d76de15710c4b9edcc7c8d"},
+    {file = "pygame-2.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:34646ca20e163dc6f6cf8170f1e12a2e41726780112594ac061fa448cf7ccd75"},
+    {file = "pygame-2.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3b8a6e351665ed26ea791f0e1fd649d3f483e8681892caef9d471f488f9ea5ee"},
+    {file = "pygame-2.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc346965847aef00013fa2364f41a64f068cd096dcc7778fc306ca3735f0eedf"},
+    {file = "pygame-2.5.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35632035fd81261f2d797fa810ea8c46111bd78ceb6089d52b61ed7dc3c5d05f"},
+    {file = "pygame-2.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e24d05184e4195fe5ebcdce8b18ecb086f00182b9ae460a86682d312ce8d31f"},
+    {file = "pygame-2.5.2-cp311-cp311-win32.whl", hash = "sha256:f02c1c7505af18d426d355ac9872bd5c916b27f7b0fe224749930662bea47a50"},
+    {file = "pygame-2.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:6d58c8cf937815d3b7cdc0fa9590c5129cb2c9658b72d00e8a4568dea2ff1d42"},
+    {file = "pygame-2.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1a2a43802bb5e89ce2b3b775744e78db4f9a201bf8d059b946c61722840ceea8"},
+    {file = "pygame-2.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1c289f2613c44fe70a1e40769de4a49c5ab5a29b9376f1692bb1a15c9c1c9bfa"},
+    {file = "pygame-2.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:074aa6c6e110c925f7f27f00c7733c6303407edc61d738882985091d1eb2ef17"},
+    {file = "pygame-2.5.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe0228501ec616779a0b9c4299e837877783e18df294dd690b9ab0eed3d8aaab"},
+    {file = "pygame-2.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31648d38ecdc2335ffc0e38fb18a84b3339730521505dac68514f83a1092e3f4"},
+    {file = "pygame-2.5.2-cp312-cp312-win32.whl", hash = "sha256:224c308856334bc792f696e9278e50d099a87c116f7fc314cd6aa3ff99d21592"},
+    {file = "pygame-2.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:dd2d2650faf54f9a0f5bd0db8409f79609319725f8f08af6507a0609deadcad4"},
+    {file = "pygame-2.5.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9b30bc1220c457169571aac998e54b013aaeb732d2fd8744966cb1cfab1f61d1"},
+    {file = "pygame-2.5.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78fcd7643358b886a44127ff7dec9041c056c212b3a98977674f83f99e9b12d3"},
+    {file = "pygame-2.5.2-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35cf093a51cb294ede56c29d4acf41538c00f297fcf78a9b186fb7d23c0577b6"},
+    {file = "pygame-2.5.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fe323acbf53a0195c8c98b1b941eba7ac24e3e2b28ae48e8cda566f15fc4945"},
+    {file = "pygame-2.5.2-cp36-cp36m-win32.whl", hash = "sha256:5697528266b4716d9cdd44a5a1d210f4d86ef801d0f64ca5da5d0816704009d9"},
+    {file = "pygame-2.5.2-cp36-cp36m-win_amd64.whl", hash = "sha256:edda1f7cff4806a4fa39e0e8ccd75f38d1d340fa5fc52d8582ade87aca247d92"},
+    {file = "pygame-2.5.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9bd738fd4ecc224769d0b4a719f96900a86578e26e0105193658a32966df2aae"},
+    {file = "pygame-2.5.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30a8d7cf12363b4140bf2f93b5eec4028376ca1d0fe4b550588f836279485308"},
+    {file = "pygame-2.5.2-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bc12e4dea3e88ea8a553de6d56a37b704dbe2aed95105889f6afeb4b96e62097"},
+    {file = "pygame-2.5.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b34c73cb328024f8db3cb6487a37e54000148988275d8d6e5adf99d9323c937"},
+    {file = "pygame-2.5.2-cp37-cp37m-win32.whl", hash = "sha256:7d0a2794649defa57ef50b096a99f7113d3d0c2e32d1426cafa7d618eadce4c7"},
+    {file = "pygame-2.5.2-cp37-cp37m-win_amd64.whl", hash = "sha256:41f8779f52e0f6e6e6ccb8f0b5536e432bf386ee29c721a1c22cada7767b0cef"},
+    {file = "pygame-2.5.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:677e37bc0ea7afd89dde5a88ced4458aa8656159c70a576eea68b5622ee1997b"},
+    {file = "pygame-2.5.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:47a8415d2bd60e6909823b5643a1d4ef5cc29417d817f2a214b255f6fa3a1e4c"},
+    {file = "pygame-2.5.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ff21201df6278b8ca2e948fb148ffe88f5481fd03760f381dd61e45954c7dff"},
+    {file = "pygame-2.5.2-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d29a84b2e02814b9ba925357fd2e1df78efe5e1aa64dc3051eaed95d2b96eafd"},
+    {file = "pygame-2.5.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d78485c4d21133d6b2fbb504cd544ca655e50b6eb551d2995b3aa6035928adda"},
+    {file = "pygame-2.5.2-cp38-cp38-win32.whl", hash = "sha256:d851247239548aa357c4a6840fb67adc2d570ce7cb56988d036a723d26b48bff"},
+    {file = "pygame-2.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:88d1cdacc2d3471eceab98bf0c93c14d3a8461f93e58e3d926f20d4de3a75554"},
+    {file = "pygame-2.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4f1559e7efe4efb9dc19d2d811d702f325d9605f9f6f9ececa39ee6890c798f5"},
+    {file = "pygame-2.5.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cf2191b756ceb0e8458a761d0c665b0c70b538570449e0d39b75a5ba94ac5cf0"},
+    {file = "pygame-2.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cf2257447ce7f2d6de37e5fb019d2bbe32ed05a5721ace8bc78c2d9beaf3aee"},
+    {file = "pygame-2.5.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d75cbbfaba2b81434d62631d0b08b85fab16cf4a36e40b80298d3868927e1299"},
+    {file = "pygame-2.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:daca456d5b9f52e088e06a127dec182b3638a775684fb2260f25d664351cf1ae"},
+    {file = "pygame-2.5.2-cp39-cp39-win32.whl", hash = "sha256:3b3e619e33d11c297d7a57a82db40681f9c2c3ae1d5bf06003520b4fe30c435d"},
+    {file = "pygame-2.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:1822d534bb7fe756804647b6da2c9ea5d7a62d8796b2e15d172d3be085de28c6"},
+    {file = "pygame-2.5.2-pp36-pypy36_pp73-win32.whl", hash = "sha256:e708fc8f709a0fe1d1876489345f2e443d47f3976d33455e2e1e937f972f8677"},
+    {file = "pygame-2.5.2-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c13edebc43c240fb0532969e914f0ccefff5ae7e50b0b788d08ad2c15ef793e4"},
+    {file = "pygame-2.5.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:263b4a7cbfc9fe2055abc21b0251cc17dea6dff750f0e1c598919ff350cdbffe"},
+    {file = "pygame-2.5.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e58e2b0c791041e4bccafa5bd7650623ba1592b8fe62ae0a276b7d0ecb314b6c"},
+    {file = "pygame-2.5.2-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0bd67426c02ffe6c9827fc4bcbda9442fbc451d29b17c83a3c088c56fef2c90"},
+    {file = "pygame-2.5.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dcff6cbba1584cf7732ce1dbdd044406cd4f6e296d13bcb7fba963fb4aeefc9"},
+    {file = "pygame-2.5.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ce4b6c0bfe44d00bb0998a6517bd0cf9455f642f30f91bc671ad41c05bf6f6ae"},
+    {file = "pygame-2.5.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68c4e8e60b725ffc7a6c6ecd9bb5fcc5ed2d6e0e2a2c4a29a8454856ef16ad63"},
+    {file = "pygame-2.5.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f3849f97372a3381c66955f99a0d58485ccd513c3d00c030b869094ce6997a6"},
+    {file = "pygame-2.5.2.tar.gz", hash = "sha256:c1b89eb5d539e7ac5cf75513125fb5f2f0a2d918b1fd6e981f23bf0ac1b1c24a"},
+]
+
 [[package]]
 name = "pygments"
 version = "2.18.0"
@@ -3229,6 +3424,35 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments
 testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
 testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
 
+[[package]]
+name = "shimmy"
+version = "1.3.0"
+description = "An API conversion tool providing Gymnasium and PettingZoo bindings for popular external reinforcement learning environments."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "Shimmy-1.3.0-py3-none-any.whl", hash = "sha256:de608fb53fab0130ad5dc8a50ae0e6b0122aa3b808cc2f3e7bde618053dcf30e"},
+    {file = "Shimmy-1.3.0.tar.gz", hash = "sha256:f45fbeaa81a0e755abc8251d5741cd4b7d5dddd003aaccda7960e62bee82b493"},
+]
+
+[package.dependencies]
+ale-py = {version = ">=0.8.1,<0.9.0", optional = true, markers = "extra == \"atari\""}
+gymnasium = ">=0.27.0"
+numpy = ">=1.18.0"
+
+[package.extras]
+all = ["ale-py (>=0.8.1,<0.9.0)", "bsuite (>=0.3.5)", "dm-control (>=1.0.10)", "dm-env (>=1.6)", "dm-meltingpot (>=2.2.0)", "gym (>=0.26.2)", "h5py (>=3.7.0)", "imageio", "open-spiel (>=1.2)", "pettingzoo (>=1.23)"]
+atari = ["ale-py (>=0.8.1,<0.9.0)"]
+bsuite = ["bsuite (>=0.3.5)"]
+dm-control = ["dm-control (>=1.0.10)", "h5py (>=3.7.0)", "imageio"]
+dm-control-multi-agent = ["dm-control (>=1.0.10)", "h5py (>=3.7.0)", "imageio", "pettingzoo (>=1.23)"]
+dm-lab = ["dm-env (>=1.6)"]
+gym-v21 = ["gym (>=0.21.0,<0.26)", "pyglet (==1.5.11)"]
+gym-v26 = ["gym (>=0.26.2)"]
+meltingpot = ["dm-meltingpot (>=2.2.0)", "pettingzoo (>=1.23)"]
+openspiel = ["open-spiel (>=1.2)", "pettingzoo (>=1.23)"]
+testing = ["autorom[accept-rom-license] (>=0.6.0,<0.7.0)", "pillow (>=9.3.0)", "pytest (==7.1.3)"]
+
 [[package]]
 name = "six"
 version = "1.16.0"
@@ -3274,12 +3498,21 @@ files = [
 ]
 
 [package.dependencies]
+autorom = {version = ">=0.6.1,<0.7.0", extras = ["accept-rom-license"], optional = true, markers = "extra == \"extra\""}
 cloudpickle = "*"
 gymnasium = ">=0.28.1,<0.30"
 matplotlib = "*"
 numpy = ">=1.20"
+opencv-python = {version = "*", optional = true, markers = "extra == \"extra\""}
 pandas = "*"
+pillow = {version = "*", optional = true, markers = "extra == \"extra\""}
+psutil = {version = "*", optional = true, markers = "extra == \"extra\""}
+pygame = {version = "*", optional = true, markers = "extra == \"extra\""}
+rich = {version = "*", optional = true, markers = "extra == \"extra\""}
+shimmy = {version = ">=1.3.0,<1.4.0", extras = ["atari"], optional = true, markers = "extra == \"extra\""}
+tensorboard = {version = ">=2.9.1", optional = true, markers = "extra == \"extra\""}
 torch = ">=1.13"
+tqdm = {version = "*", optional = true, markers = "extra == \"extra\""}
 
 [package.extras]
 docs = ["sphinx (>=5,<8)", "sphinx-autobuild", "sphinx-copybutton", "sphinx-rtd-theme (>=1.3.0)", "sphinxcontrib.spelling"]
@@ -3306,6 +3539,31 @@ pure-eval = "*"
 [package.extras]
 tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
 
+[[package]]
+name = "swig"
+version = "4.2.1"
+description = "SWIG is a software development tool that connects programs written in C and C++ with a variety of high-level programming languages."
+optional = false
+python-versions = "*"
+files = [
+    {file = "swig-4.2.1-py2.py3-none-macosx_10_9_universal2.whl", hash = "sha256:3b7389a1c86d39637b7364f11a1dd8cfb52b1ebba40a2356d2ded74498f4cff7"},
+    {file = "swig-4.2.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:5c8826b415d6feaca2bbf1f4bd0ff900d6dbb9ddd3b1850f95129b8aac1eb44b"},
+    {file = "swig-4.2.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:6f30b409d20b1e96f5432bf94e0106cf68596884b6da70323d5a21b8f43e3f3b"},
+    {file = "swig-4.2.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c4d8e3c928d5efba698b4610a9292b51a0597958ced79506f76f102ac621184"},
+    {file = "swig-4.2.1-py2.py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4f8931d0f33a91a611f084a26ecf1364845b5ff8b417db2b99cc8b8a4d8215e3"},
+    {file = "swig-4.2.1-py2.py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9f3482aadf1d6ce3122a84fcd854c32dd5031dd65dba5d5dd9f73deb7333c505"},
+    {file = "swig-4.2.1-py2.py3-none-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7ce1fd4518ca0a63b809177ed8e0ec11ab485f8ebb66752605c978b964e75b5b"},
+    {file = "swig-4.2.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c7b2db2ccff556dbfc88ef98773b3524b18bc4077ea1d7a5d7339c3a1f3f70dc"},
+    {file = "swig-4.2.1-py2.py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:0cd1812ef95a806eec82ee74b601aa0afd616beffb4227afe090bdc12f690286"},
+    {file = "swig-4.2.1-py2.py3-none-musllinux_1_1_i686.whl", hash = "sha256:bfe68d1c8294632bc6e9a1be7abfb3daafce0794d8dfa94fbccd6326073124b8"},
+    {file = "swig-4.2.1-py2.py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:23d2f90145b32e74f7386b5a99168ecd3000fad232530866c2b390325a39da19"},
+    {file = "swig-4.2.1-py2.py3-none-musllinux_1_1_s390x.whl", hash = "sha256:3b19337ab24aa5483bea5a36282e454c18ed03af1ada0efd92d02c1bea96dbd1"},
+    {file = "swig-4.2.1-py2.py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:30bfb7d8f4bc6ae843c9f36825abfe1fbdadf0982cbb119af64da3f750c01620"},
+    {file = "swig-4.2.1-py2.py3-none-win32.whl", hash = "sha256:41cbe797f6ba49fc600c629928e1afd2632be23bc7d1603543360234e1773dae"},
+    {file = "swig-4.2.1-py2.py3-none-win_amd64.whl", hash = "sha256:2482156f6fba8a8a720246a95d203530834e3d8da981f904ef71c1a7e9d68efe"},
+    {file = "swig-4.2.1.tar.gz", hash = "sha256:cdbb1748132ce99defaf4ebf1736c457fb13788183264af28f17d0d477e109ba"},
+]
+
 [[package]]
 name = "sympy"
 version = "1.12"
@@ -3609,6 +3867,26 @@ files = [
     {file = "tornado-6.4.tar.gz", hash = "sha256:72291fa6e6bc84e626589f1c29d90a5a6d593ef5ae68052ee2ef000dfd273dee"},
 ]
 
+[[package]]
+name = "tqdm"
+version = "4.66.4"
+description = "Fast, Extensible Progress Meter"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tqdm-4.66.4-py3-none-any.whl", hash = "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644"},
+    {file = "tqdm-4.66.4.tar.gz", hash = "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
+notebook = ["ipywidgets (>=6)"]
+slack = ["slack-sdk"]
+telegram = ["requests"]
+
 [[package]]
 name = "traitlets"
 version = "5.14.3"
@@ -3892,4 +4170,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.12"
-content-hash = "667d056b23dfc57df4ecc541631a84b34bae26d3026632b119df148779041f57"
+content-hash = "ecb3fb36aa0c91cb21e909dab268c7d7ee29580cb6c5467d5cdd4a033d25cad2"
diff --git a/ppo-LunarLander-v2-good.zip b/ppo-LunarLander-v2-good.zip
new file mode 100644
index 0000000..e5a98cc
--- /dev/null
+++ b/ppo-LunarLander-v2-good.zip
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5e3271a74929765f40943d0c23d571ba7ca825b3bf711d79215d8a836ab52ec
+size 150082
diff --git a/pyproject.toml b/pyproject.toml
index 7b25f45..67b0370 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,8 +12,8 @@ jupyterlab = "^4.2.0"
 keras = "^3.3.3"
 matplotlib = "^3.9.0"
 numpy = "^1.26.4"
-stable-baselines3 = "^2.3.2"
-gymnasium = "^0.29.1"
+stable-baselines3 = {extras = ["extra"], version = "^2.3.2"}
+gymnasium = {extras = ["box2d"], version = "^0.29.1"}
 tensorflow = "^2.16.1"
 scikit-image = "0.22.0"