diff --git a/1a_image_classification_solutions.ipynb b/1a_image_classification_solutions.ipynb new file mode 100644 index 0000000..873ccf0 --- /dev/null +++ b/1a_image_classification_solutions.ipynb @@ -0,0 +1,359 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image Classification\n", + "\n", + "Simple image classification using the [CIFAR-10 dataset](https://www.cs.toronto.edu/~kriz/cifar.html).\n", + "\n", + "The CIFAR-10 dataset has 60,000 32x32 colour images in 10 classes (6,000 per class). These are split into 50,000 training images and 10,000 testing images.\n", + "\n", + "Here are the classes:\n", + "1. Airplane\n", + "2. Car\n", + "3. Bird\n", + "4. Cat\n", + "5. Deer\n", + "6. Dog\n", + "7. Frog\n", + "8. Horse\n", + "9. Ship\n", + "10. Truck" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import keras\n", + "import numpy as np\n", + "import os\n", + "from keras.src.datasets.cifar import load_batch\n", + "from keras import backend\n", + "from skimage.transform import resize\n", + "\n", + "classes = [\n", + " \"airplane\",\n", + " \"car\",\n", + " \"bird\",\n", + " \"cat\",\n", + " \"deer\",\n", + " \"dog\",\n", + " \"frog\",\n", + " \"horse\",\n", + " \"ship\",\n", + " \"truck\",\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the dataset ๐Ÿ’ฟ" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_train_samples = 50000\n", + "\n", + "x_train = np.empty((num_train_samples, 3, 32, 32), dtype=\"uint8\")\n", + "y_train = np.empty((num_train_samples,), dtype=\"uint8\")\n", + "\n", + "for i in range(1, 6):\n", + " file_path = os.path.join(\"cifar-10-batches-py\", f\"data_batch_{i}\")\n", + " (\n", + " x_train[(i - 1) * 10000 : i * 10000, :, :, :],\n", + " y_train[(i - 1) * 10000 : i * 10000],\n", + " ) = load_batch(file_path)\n", + "\n", + "file_path = os.path.join(\"cifar-10-batches-py\", \"test_batch\")\n", + "x_test, y_test = load_batch(file_path)\n", + "\n", + "y_train = np.reshape(y_train, (len(y_train), 1))\n", + "y_test = np.reshape(y_test, (len(y_test), 1))\n", + "\n", + "if backend.image_data_format() == \"channels_last\":\n", + " x_train = x_train.transpose(0, 2, 3, 1)\n", + " x_test = x_test.transpose(0, 2, 3, 1)\n", + "\n", + "x_test = x_test.astype(x_train.dtype)\n", + "y_test = y_test.astype(y_train.dtype)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring ๐Ÿ”Ž" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(x_train.shape)\n", + "print(y_train.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`x_train` is the actual images in the dataset. You can see they are 32x32 and the 3 is for red, green and blue values.\n", + "`y_train` is the category for each image, this is just a single number between 0 and 9." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_train[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(x_train[1])\n", + "print(y_train[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Processing ๐Ÿซง\n", + "\n", + "Our neural network works with decimal numbers between 0 and 1, so we need to convert the categories into 0s and 1s. We take an array of 0s and set a 1 for the category.\n", + "\n", + "For example, the number 2 would get encoded to `[0, 0, 1, ...]`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y_train_one_hot = keras.src.utils.numerical_utils.to_categorical(y_train, 10)\n", + "y_test_one_hot = keras.src.utils.numerical_utils.to_categorical(y_test, 10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# task: can you print out the one hot encoded label for the truck above?\n", + "print(y_train_one_hot[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At the moment each pixel is represented by a number from 0 to 255. We also need to convert these to be between 0 and 1." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_train = x_train.astype(\"float32\")\n", + "x_test = x_test.astype(\"float32\")\n", + "x_train = x_train / 255\n", + "x_test = x_test / 255" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_train[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Build and Train CNN ๐Ÿ”จ" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from keras.models import Sequential\n", + "from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D\n", + "\n", + "model = Sequential()\n", + "model.add(\n", + " Conv2D(32, (3, 3), activation=\"relu\", padding=\"same\", input_shape=(32, 32, 3))\n", + ")\n", + "model.add(MaxPooling2D(pool_size=(2, 2)))\n", + "model.add(Dropout(0.25))\n", + "model.add(Conv2D(64, (3, 3), activation=\"relu\", padding=\"same\"))\n", + "model.add(MaxPooling2D(pool_size=(2, 2)))\n", + "model.add(Dropout(0.25))\n", + "model.add(Flatten())\n", + "model.add(Dense(512, activation=\"relu\"))\n", + "model.add(Dropout(0.5))\n", + "model.add(Dense(10, activation=\"softmax\"))\n", + "\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.compile(loss=\"categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hist = model.fit(\n", + " x_train, y_train_one_hot, batch_size=32, epochs=1, validation_split=0.2\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate ๐Ÿงช" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.evaluate(x_test, y_test_one_hot)[1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "~50% accuracy... not great" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What about for something it's not been trained on?\n", + "\n", + "Let's try and feed a picture of a cat to the model, and see what it thinks... As a reminder, the model hasn't been trained on pictures of cats." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cat = plt.imread(\"cat.jpg\")\n", + "cat_resized = resize(cat, (32, 32, 3))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(cat_resized)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "probabilities = model.predict(\n", + " np.array(\n", + " [\n", + " cat_resized,\n", + " ]\n", + " )\n", + ")\n", + "probabilities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "index = np.argsort(probabilities[0, :])\n", + "print(f\"Most likely: {classes[index[9]]}, probability={probabilities[0,index[9]]}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Additional Challenges ๐Ÿ†\n", + "\n", + "- Try adding in some more layers to the neural network, adding a second `Conv2D` layer under both of the existing ones.\n", + "- Try increasing the number of `epochs` when training.\n", + "- Save/load your model with `model.save('mymodel.h5')` and `keras.models.load_model('mymodel.h5')`." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}