diff --git a/notebooks/TP2_masterLiTL_2223_CORRECT.ipynb b/notebooks/TP2_masterLiTL_2223_CORRECT.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e5a7b54428546e2caffb91173a3b2bfa9c8ba2ee --- /dev/null +++ b/notebooks/TP2_masterLiTL_2223_CORRECT.ipynb @@ -0,0 +1,1721 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "gpuClass": "standard" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "XCHhtzOXQ2po" + }, + "source": [ + "# TP 2: Linear Algebra and Feedforward neural network\n", + "Master LiTL - 2022-2023\n", + "\n", + "## Requirements\n", + "In this section, we will go through some code to learn how to manipulate matrices and tensors, and we will take a look at some PyTorch code that allows to define, train and evaluate a simple neural network. \n", + "The modules used are the the same as in the previous session, *Numpy* and *Scikit*, with the addition of *PyTorch*. They are all already available within colab. \n", + "\n", + "## Part 1: Linear Algebra\n", + "\n", + "In this section, we will go through some python code to deal with matrices and also tensors, the data structures used in PyTorch.\n", + "\n", + "Sources: \n", + "* Linear Algebra explained in the context of deep learning: https://towardsdatascience.com/linear-algebra-explained-in-the-context-of-deep-learning-8fcb8fca1494\n", + "* PyTorch tutorial: https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py\n", + "* PyTorch doc on tensors: https://pytorch.org/docs/stable/torch.html\n" + ] + }, + { + "cell_type": "code", + "source": [ + "# Useful imports\n", + "import numpy as np\n", + "import torch" + ], + "metadata": { + "id": "2t2sdvtdsrjO" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G3Hk9fJuBVxk" + }, + "source": [ + "## 1.1 Numpy arrays\n", + "\n", + "NumPy’s main object is the homogeneous multidimensional array. It is a table of elements (usually numbers), all of the same type\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 1.1.1 Numpy arrays\n", + "\n", + "▶▶ **Look at the code below and check that you understand each line:**\n", + "* We define a numpy array (i.e. a vector) **x** from a list\n", + "* We define a numpy array of shape 3x2 (i.e. a matrix) initialized with random numbers, called **W**\n", + "* We define a scalar, **b**\n", + "* Finally, with all these elements, we can compute **h = W.x + b**" + ], + "metadata": { + "id": "5hfuybaGeOX_" + } + }, + { + "cell_type": "code", + "metadata": { + "id": "W2IvCK4gPUAv", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "27aaddfd-d3ed-471a-ad74-4211df3019fa" + }, + "source": [ + "x = np.array([1,2])\n", + "print(\"Our input vector with 2 elements:\\n\", x)\n", + "print( \"x shape:\", x.shape) \n", + "\n", + "print( \"x data type\", x.dtype)\n", + "# Give a list of elements\n", + "# a = np.array(1,2,3,4) # WRONG\n", + "# a = np.array([1,2,3,4]) # RIGHT\n", + "\n", + "# Generate a random matrix (with a generator and a seed, for reproducible results)\n", + "rng = np.random.default_rng(seed=42)\n", + "W = rng.random((3, 2))\n", + "print(\"\\n Our weight matrix, of shape 3x2:\\n\", W)\n", + "print( \"W shape:\", W.shape)\n", + "print( \"W data type\", W.dtype)\n", + "\n", + "# Bias, a scalar\n", + "b = 1\n", + "\n", + "# Now, try to multiply\n", + "h = W.dot(x) + b\n", + "print(\"\\n Our h layer:\\n\", h)\n", + "print( \"h shape:\", h.shape)\n", + "print( \"h data type\", h.dtype)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Our input vector with 2 elements:\n", + " [1 2]\n", + "x shape: (2,)\n", + "x data type int64\n", + "\n", + " Our weight matrix, of shape 3x2:\n", + " [[0.77395605 0.43887844]\n", + " [0.85859792 0.69736803]\n", + " [0.09417735 0.97562235]]\n", + "W shape: (3, 2)\n", + "W data type float64\n", + "\n", + " Our h layer:\n", + " [2.65171293 3.25333398 3.04542205]\n", + "h shape: (3,)\n", + "h data type float64\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 1.1.2 Operations on arrays\n", + "\n", + "▶▶ **Look at the code below and check that you understand each line:**\n", + "* How to reshape a matrix i.e. change its dimensions\n", + "* How to compute the transpose of a vector / matrix" + ], + "metadata": { + "id": "L18_HL5qfvFO" + } + }, + { + "cell_type": "code", + "metadata": { + "id": "hKzJk0aaPUv4", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "1c39300a-d2e0-4eaf-a3bc-4d299806c3ad" + }, + "source": [ + "# Useful transformations\n", + "h = h.reshape((3,1))\n", + "print(\"\\n h reshape:\\n\", h)\n", + "print( \"h shape:\", h.shape)\n", + "\n", + "h1 = np.transpose(h)\n", + "print(\"\\n h transpose:\\n\", h1)\n", + "print( \"h shape:\", h1.shape)\n", + "\n", + "h2 = h.T\n", + "print(\"\\n h transpose:\\n\", h2)\n", + "print( \"h shape:\", h2.shape)\n", + "\n", + "Wt = W.T\n", + "print(\"\\nW:\\n\", W)\n", + "print(\"\\nW.T:\\n\", Wt)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + " h reshape:\n", + " [[2.65171293]\n", + " [3.25333398]\n", + " [3.04542205]]\n", + "h shape: (3, 1)\n", + "\n", + " h transpose:\n", + " [[2.65171293 3.25333398 3.04542205]]\n", + "h shape: (1, 3)\n", + "\n", + " h transpose:\n", + " [[2.65171293 3.25333398 3.04542205]]\n", + "h shape: (1, 3)\n", + "\n", + "W:\n", + " [[0.77395605 0.43887844]\n", + " [0.85859792 0.69736803]\n", + " [0.09417735 0.97562235]]\n", + "\n", + "W.T:\n", + " [[0.77395605 0.85859792 0.09417735]\n", + " [0.43887844 0.69736803 0.97562235]]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "▶▶ **A last note: creating an identity matrix**" + ], + "metadata": { + "id": "O_p_oGvRhnkF" + } + }, + { + "cell_type": "code", + "metadata": { + "id": "KpIkzqN6PaJR", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "de4222fd-1c9d-4563-f9b9-1783e5321f45" + }, + "source": [ + "## numpy code to create identity matrix\n", + "a = np.eye(4)\n", + "print(a)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[1. 0. 0. 0.]\n", + " [0. 1. 0. 0.]\n", + " [0. 0. 1. 0.]\n", + " [0. 0. 0. 1.]]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Il-lX6VCA7gk" + }, + "source": [ + "## 1.2 Tensors\n", + "\n", + "For neural networks implementation in PyTorch, we use tensors: \n", + "* a specialized data structure that are very similar to arrays and matrices\n", + "* used to encode the inputs and outputs of a model, as well as the model’s parameters\n", + "* similar to NumPy’s ndarrays, except that tensors can run on GPUs or other specialized hardware to accelerate computing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hPqpGGZPCRT-" + }, + "source": [ + "### 1.2.1 Tensor initialization\n", + "\n", + "▶▶ **Look at the code below and check that you understand each line:**\n", + "* We define a PyTorch tensor (i.e. a matrix) **x_data** from a list of list\n", + "* We define a PyTorch tensor (i.e. a matrix) **x_np** from a numpy array\n", + "* How to initialize an random tensor, an one tensor and a zero tensor\n", + "* Finally, we define a PyTorch tensor (i.e. a matrix) from another tensor:\n", + " * **x_ones**: from the identity tensor\n", + " * **x_rand**: from a tensor initialized with random values" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HaEdsMG6BAh0", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "09e6047b-40d1-4900-9ec8-bef108f66969" + }, + "source": [ + "# Tensor initialization\n", + "\n", + "## from data. The data type is automatically inferred.\n", + "data = [[1, 2], [3, 4]]\n", + "x_data = torch.tensor(data)\n", + "print( \"x_data\", x_data)\n", + "print( \"data type x_data=\", x_data.dtype)\n", + "\n", + "## from a numpy array\n", + "np_array = np.array(data)\n", + "x_np = torch.from_numpy(np_array)\n", + "print(\"\\nx_np\", x_np)\n", + "print( \"data type, np_array=\", np_array.dtype, \"x_data=\", x_np.dtype)\n", + "\n", + "## with random values / ones / zeros\n", + "shape = (2, 3,) # shape is a tuple of tensor dimensions\n", + "rand_tensor = torch.rand(shape)\n", + "ones_tensor = torch.ones(shape)\n", + "zeros_tensor = torch.zeros(shape)\n", + "\n", + "print(f\"Random Tensor: \\n {rand_tensor} \\n\")\n", + "print(f\"Ones Tensor: \\n {ones_tensor} \\n\")\n", + "print(f\"Zeros Tensor: \\n {zeros_tensor}\")\n", + "\n", + "## from another tensor\n", + "x_ones = torch.ones_like(x_data) # retains the properties of x_data\n", + "print(f\"\\nFrom Ones Tensor: \\n {x_ones} \\n\")\n", + "\n", + "x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data\n", + "print(f\"From Random Tensor: \\n {x_rand} \\n\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "x_data tensor([[1, 2],\n", + " [3, 4]])\n", + "data type x_data= torch.int64\n", + "\n", + "x_np tensor([[1, 2],\n", + " [3, 4]])\n", + "data type, np_array= int64 x_data= torch.int64\n", + "Random Tensor: \n", + " tensor([[0.7422, 0.2063, 0.4437],\n", + " [0.4821, 0.8494, 0.7161]]) \n", + "\n", + "Ones Tensor: \n", + " tensor([[1., 1., 1.],\n", + " [1., 1., 1.]]) \n", + "\n", + "Zeros Tensor: \n", + " tensor([[0., 0., 0.],\n", + " [0., 0., 0.]])\n", + "\n", + "From Ones Tensor: \n", + " tensor([[1, 1],\n", + " [1, 1]]) \n", + "\n", + "From Random Tensor: \n", + " tensor([[0.9174, 0.8538],\n", + " [0.2395, 0.4414]]) \n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oFDVEZcBCWF_" + }, + "source": [ + "### 1.2.2 Tensor attributes\n", + "\n", + "▶▶ **A tensor has different attributes, print the values for:**\n", + "* shape of the tensor\n", + "* type of the data stored \n", + "* device on which data are stored\n", + "\n", + "Look at the doc here: https://www.tensorflow.org/api_docs/python/tf/Tensor#shape" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kS4TtR9DCJcq", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "acf50a6a-8327-44f3-a20c-6c014304dfb9" + }, + "source": [ + "# Tensor attributes\n", + "tensor = torch.rand(3, 4)\n", + "\n", + "print(f\"Shape of tensor: {tensor.shape}\")\n", + "print(f\"Datatype of tensor: {tensor.dtype}\")\n", + "print(f\"Device tensor is stored on: {tensor.device}\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Shape of tensor: torch.Size([3, 4])\n", + "Datatype of tensor: torch.float32\n", + "Device tensor is stored on: cpu\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tu8RM6O7CaKO" + }, + "source": [ + "### 1.2.3 Move to GPU\n", + "\n", + "The code below is used to:\n", + "* check on which device the code is running, 'cuda' stands for GPU. If not GPU is found that we use CPU.\n", + "\n", + "\n", + "▶▶ **Check and move to GPU:**\n", + "* Run the code, it should say 'no cpu'\n", + "* Move to GPU: in Colab, allocate a GPU by going to Edit > Notebook Settings (Modifier > Paramètres du notebook)\n", + " * you'll see an indicator of connexion in the uppper right part of the screen\n", + "* Run the code from 1.2 again and the cell below (you can use the function Run / Run before or Exécution / Exécuter avant), you'll need to do all the imports again. You see the difference?" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nT7n30VpCOzF", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "bd6cfc6f-cdc3-47af-c937-60d1b7559269" + }, + "source": [ + "# We move our tensor to the GPU if available\n", + "if torch.cuda.is_available():\n", + " tensor = tensor.to('cuda')\n", + " print(f\"Device tensor is stored on: {tensor.device}\")\n", + "else:\n", + " print(\"no gpu\")\n", + "\n", + "print(tensor)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "no gpu\n", + "tensor([[0.8571, 0.8930, 0.3581, 0.1879],\n", + " [0.6470, 0.9328, 0.8852, 0.7041],\n", + " [0.2505, 0.0432, 0.3965, 0.7014]])\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VdqHVRkHCcgq" + }, + "source": [ + "Below, run after moving to GPU." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nyZPKBvOGsyf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "82cf8bb9-8cbb-4839-af07-2cf70b5bb1a4" + }, + "source": [ + "# We move our tensor to the GPU if available\n", + "if torch.cuda.is_available():\n", + " tensor = tensor.to('cuda')\n", + " print(f\"Device tensor is stored on: {tensor.device}\")\n", + "else:\n", + " print(\"no gpu\")\n", + "\n", + "print(tensor)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Device tensor is stored on: cuda:0\n", + "tensor([[0.5677, 0.5670, 0.4132, 0.0301],\n", + " [0.8031, 0.6536, 0.4139, 0.6157],\n", + " [0.4833, 0.1557, 0.5355, 0.4551]], device='cuda:0')\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8um7SDWGCp8o" + }, + "source": [ + "### 1.2.4 Tensor operations\n", + "\n", + "Doc: https://pytorch.org/docs/stable/torch.html\n", + "\n", + "▶▶ **Slicing operations:**\n", + "* Below we use slicing operations to modify tensors" + ] + }, + { + "cell_type": "code", + "source": [ + "# Tensor operations: similar to numpy arrays\n", + "tensor = torch.ones(4, 4)\n", + "print(tensor)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BgF-ypEurJCk", + "outputId": "18e6eda8-d5d3-49b9-eabb-9fc05ce0fce5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]])\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7yLviqmYC3sZ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "553c83d0-a6a0-40be-eac3-516e30acce66" + }, + "source": [ + "# ---------------------------------------------------------\n", + "# TODO: What do you expect?\n", + "# ---------------------------------------------------------\n", + "## Slicing\n", + "print(\"\\nSlicing\")\n", + "tensor[:,1] = 0 \n", + "print(tensor)\n", + "\n", + "# ---------------------------------------------------------\n", + "# TODO: Change the first column with the value in l\n", + "# ---------------------------------------------------------\n", + "l =[1.,2.,3.,4.] \n", + "l = torch.tensor( l )\n", + "tensor[:, 0] = l\n", + "print(tensor)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Slicing\n", + "tensor([[1., 0., 1., 1.],\n", + " [1., 0., 1., 1.],\n", + " [1., 0., 1., 1.],\n", + " [1., 0., 1., 1.]])\n", + "tensor([[1., 0., 1., 1.],\n", + " [2., 0., 1., 1.],\n", + " [3., 0., 1., 1.],\n", + " [4., 0., 1., 1.]])\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "▶▶ *Other operations:**\n", + "* Check the code below that performs:\n", + " * tensor concatenation\n", + " * tensor multiplication" + ], + "metadata": { + "id": "uCZ2AWPmrW6q" + } + }, + { + "cell_type": "code", + "source": [ + "## Concatenation\n", + "print(\"\\nConcatenate\")\n", + "t1 = torch.cat([tensor, tensor, tensor], dim=1)\n", + "print(t1)\n", + "\n", + "## Multiplication: element_wise\n", + "print(\"\\nMultiply\")\n", + "# This computes the element-wise product\n", + "t2 = tensor.mul(tensor)\n", + "print(f\"tensor.mul(tensor) \\n {t2} \\n\")\n", + "# Alternative syntax:\n", + "t3 = tensor * tensor\n", + "print(f\"tensor * tensor \\n {t3}\")\n", + "\n", + "## Matrix multiplication\n", + "t4 = tensor.matmul(tensor.T)\n", + "print(f\"tensor.matmul(tensor.T) \\n {t4} \\n\")\n", + "# Alternative syntax:\n", + "t5 = tensor @ tensor.T\n", + "print(f\"tensor @ tensor.T \\n {t5}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "t_AkQSnarNX8", + "outputId": "28ceafc8-b42e-4291-efd1-e17d0c7cf02f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Concatenate\n", + "tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],\n", + " [2., 0., 1., 1., 2., 0., 1., 1., 2., 0., 1., 1.],\n", + " [3., 0., 1., 1., 3., 0., 1., 1., 3., 0., 1., 1.],\n", + " [4., 0., 1., 1., 4., 0., 1., 1., 4., 0., 1., 1.]])\n", + "\n", + "Multiply\n", + "tensor.mul(tensor) \n", + " tensor([[ 1., 0., 1., 1.],\n", + " [ 4., 0., 1., 1.],\n", + " [ 9., 0., 1., 1.],\n", + " [16., 0., 1., 1.]]) \n", + "\n", + "tensor * tensor \n", + " tensor([[ 1., 0., 1., 1.],\n", + " [ 4., 0., 1., 1.],\n", + " [ 9., 0., 1., 1.],\n", + " [16., 0., 1., 1.]])\n", + "tensor.matmul(tensor.T) \n", + " tensor([[ 3., 4., 5., 6.],\n", + " [ 4., 6., 8., 10.],\n", + " [ 5., 8., 11., 14.],\n", + " [ 6., 10., 14., 18.]]) \n", + "\n", + "tensor @ tensor.T \n", + " tensor([[ 3., 4., 5., 6.],\n", + " [ 4., 6., 8., 10.],\n", + " [ 5., 8., 11., 14.],\n", + " [ 6., 10., 14., 18.]])\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5ulTT2k_Hs97" + }, + "source": [ + "### 1.2.5 Tensor operations on GPU\n", + "\n", + "The tensor is stored on CPU by default.\n", + "\n", + "▶▶ **Initialize the tensor using *device='cuda'*: where are stored t1, ..., t5?**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "atwxGd1_IdxI", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "0fda45ea-e1fe-4bb3-c500-c4fda76379ce" + }, + "source": [ + "# Tensor operations: similar to numpy arrays\n", + "\n", + "tensor = torch.ones(4, 4, device='cuda')\n", + "print(tensor)\n", + "\n", + "# ---------------------------------------------------------\n", + "# TODO: What do you expect?\n", + "# ---------------------------------------------------------\n", + "## Slicing\n", + "print(\"\\nSlicing\")\n", + "tensor[:,1] = 0 \n", + "print(tensor)\n", + "\n", + "# ---------------------------------------------------------\n", + "# TODO: Change the first column with the value in l\n", + "# ---------------------------------------------------------\n", + "l =[1.,2.,3.,4.] \n", + "l = torch.tensor( l )\n", + "tensor[:, 0] = l\n", + "print(tensor)\n", + "\n", + "\n", + "## Concatenation\n", + "print(\"\\nConcatenate\")\n", + "t1 = torch.cat([tensor, tensor, tensor], dim=1)\n", + "print(t1)\n", + "\n", + "## Multiplication: element_wise\n", + "print(\"\\nMultiply\")\n", + "# This computes the element-wise product\n", + "t2 = tensor.mul(tensor)\n", + "print(f\"tensor.mul(tensor) \\n {t2} \\n\")\n", + "# Alternative syntax:\n", + "t3 = tensor * tensor\n", + "print(f\"tensor * tensor \\n {t3}\")\n", + "\n", + "## Matrix multiplication\n", + "t4 = tensor.matmul(tensor.T)\n", + "print(f\"tensor.matmul(tensor.T) \\n {t4} \\n\")\n", + "# Alternative syntax:\n", + "t5 = tensor @ tensor.T\n", + "print(f\"tensor @ tensor.T \\n {t5}\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "tensor([[1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.],\n", + " [1., 1., 1., 1.]], device='cuda:0')\n", + "\n", + "Slicing\n", + "tensor([[1., 0., 1., 1.],\n", + " [1., 0., 1., 1.],\n", + " [1., 0., 1., 1.],\n", + " [1., 0., 1., 1.]], device='cuda:0')\n", + "tensor([[1., 0., 1., 1.],\n", + " [2., 0., 1., 1.],\n", + " [3., 0., 1., 1.],\n", + " [4., 0., 1., 1.]], device='cuda:0')\n", + "\n", + "Concatenate\n", + "tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],\n", + " [2., 0., 1., 1., 2., 0., 1., 1., 2., 0., 1., 1.],\n", + " [3., 0., 1., 1., 3., 0., 1., 1., 3., 0., 1., 1.],\n", + " [4., 0., 1., 1., 4., 0., 1., 1., 4., 0., 1., 1.]], device='cuda:0')\n", + "\n", + "Multiply\n", + "tensor.mul(tensor) \n", + " tensor([[ 1., 0., 1., 1.],\n", + " [ 4., 0., 1., 1.],\n", + " [ 9., 0., 1., 1.],\n", + " [16., 0., 1., 1.]], device='cuda:0') \n", + "\n", + "tensor * tensor \n", + " tensor([[ 1., 0., 1., 1.],\n", + " [ 4., 0., 1., 1.],\n", + " [ 9., 0., 1., 1.],\n", + " [16., 0., 1., 1.]], device='cuda:0')\n", + "tensor.matmul(tensor.T) \n", + " tensor([[ 3., 4., 5., 6.],\n", + " [ 4., 6., 8., 10.],\n", + " [ 5., 8., 11., 14.],\n", + " [ 6., 10., 14., 18.]], device='cuda:0') \n", + "\n", + "tensor @ tensor.T \n", + " tensor([[ 3., 4., 5., 6.],\n", + " [ 4., 6., 8., 10.],\n", + " [ 5., 8., 11., 14.],\n", + " [ 6., 10., 14., 18.]], device='cuda:0')\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UxW1jtX-GOfd" + }, + "source": [ + "### 1.2.5 Final exercise: compute *h*\n", + "\n", + "▶▶ **Compute the tensor h, using the same data for x and W as at the beginning of this TP.**\n", + "\n", + "```\n", + "x = np.array([1,2])\n", + "rng = np.random.default_rng(seed=42)\n", + "W = rng.random((3, 2))\n", + "```\n", + "\n", + "Important note: when multiplying matrices, we need to have the same data type, e.g. not **x** with *int* and **W** with *float*.\n", + "So you have to say that the vector **x** has the data type *float*. Two ways:\n", + "* from the initialization: **x = torch.tensor([1,2], dtype=float)**\n", + "* from any tensor: **x = x.to( torch.float64)** (here using only **float** would give *float32*, not what we want) " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lwIanFgWD_YJ" + }, + "source": [ + "# --------------------------------------------------------\n", + "# TODO: Write the code to compute h = W.x+b\n", + "# --------------------------------------------------------\n", + "\n", + "# h = x.W + b\n", + "x = torch.tensor([1,2])\n", + "x = x.to( torch.float64) # be careful: using just 'float' here gives float32\n", + "## OR\n", + "#x = torch.tensor([1,2], dtype=float)\n", + "print(\"Our input vector with 2 elements:\\n\", x)\n", + "print( \"x shape:\", x.shape)\n", + "print( \"x type:\", x.dtype )\n", + "\n", + "# Generate a random matrix (with e generator, for reproducible results)\n", + "rng = np.random.default_rng(seed=42)\n", + "W = rng.random((3, 2))\n", + "W_t = torch.from_numpy(W)\n", + "print(\"\\n Our weight matrix, of shape 3x2:\\n\", W)\n", + "print( \"W shape:\", W_t.shape)\n", + "print( \"W type:\", W.dtype)\n", + "\n", + "# Bias, a scalar\n", + "b = 1.0\n", + "\n", + "# Now, try to multiply\n", + "h_t = W_t.matmul(x) + b\n", + "print(\"\\n Our h layer:\\n\", h_t)\n", + "print( \"h shape:\", h_t.shape)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "na_tJOnfGDIz" + }, + "source": [ + "### Last minor note" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lql9bH39G4Mw" + }, + "source": [ + "## Operations that have a _ suffix are in-place. For example: x.copy_(y), x.t_(), will change x.\n", + "print(tensor, \"\\n\")\n", + "tensor.add_(5)\n", + "print(tensor)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DGmy-dtuOtiw" + }, + "source": [ + "# Part 2: Feedforward Neural Network\n", + "\n", + "In this practical session, we will explore a simple neural network architecture for NLP applications ; specifically, we will train a feedforward neural network for sentiment analysis, using the same dataset of reviews as in the previous session. We will also keep the bag of words representation. \n", + "\n", + "\n", + "Sources:\n", + "* This TP is inspired by a TP by Tim van de Cruys\n", + "* https://www.deeplearningwizard.com/deep_learning/practical_pytorch/pytorch_feedforward_neuralnetwork/\n", + "* https://pytorch.org/tutorials/beginner/text_sentiment_ngrams_tutorial.html\n", + "* https://medium.com/swlh/sentiment-classification-using-feed-forward-neural-network-in-pytorch-655811a0913f \n", + "* https://www.deeplearningwizard.com/deep_learning/practical_pytorch/pytorch_feedforward_neuralnetwork/" + ] + }, + { + "cell_type": "code", + "source": [ + "# Useful imports\n", + "import pandas as pd\n", + "import numpy as np\n", + "import re\n", + "import sklearn\n", + "\n", + "from sklearn.feature_extraction.text import CountVectorizer" + ], + "metadata": { + "id": "TKukE_hAAn_2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Path to data\n", + "train_path = \"allocine_train.tsv\"\n", + "dev_path = \"allocine_dev.tsv\"" + ], + "metadata": { + "id": "iUxRwO37Ap8h" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wdSyhJqpVczO" + }, + "source": [ + "## 2.1 Read and load the data\n", + "\n", + "Here we will keep the bag of word representation, as in the previous session. \n", + "\n", + "You can find different ways of dealing with the input data in PyTorch. The simplest solution is to use the DataLoader from PyTorch: \n", + "* the doc here https://pytorch.org/docs/stable/data.html and here https://pytorch.org/tutorials/beginner/basics/data_tutorial.html\n", + "* an example of use, with numpy array: https://www.kaggle.com/arunmohan003/sentiment-analysis-using-lstm-pytorch\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "You can also find many datasets for text ready to load in pytorch on: https://pytorch.org/text/stable/datasets.html" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CxRbwziSV_BY" + }, + "source": [ + "#### 2.1.1 Build BoW vectors (code given)\n", + "\n", + "The code below allows to use scikit methods you already know to generate the bag of word representation." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "SoVJ18s_oxkn" + }, + "source": [ + "# This will be the size of the vectors reprensenting the input\n", + "MAX_FEATURES = 5000 \n", + "\n", + "def vectorize_data( data_path, vectorizer=None ):\n", + " data_df = pd.read_csv( data_path, header=0,\n", + " delimiter=\"\\t\", quoting=3)\n", + " # If an existing vectorizer is not given, initialize the \"CountVectorizer\" \n", + " # object, which is scikit-learn's bag of words tool. \n", + " if not vectorizer:\n", + " vectorizer = CountVectorizer(\n", + " analyzer = \"word\",\n", + " max_features = MAX_FEATURES\n", + " ) \n", + " vectorizer.fit(data_df[\"review\"])\n", + " # Then transform the data\n", + " x_data = vectorizer.transform(data_df[\"review\"])\n", + " # Vectorize also the labels\n", + " y_data = np.asarray(data_df[\"sentiment\"])\n", + " return x_data, y_data, vectorizer \n", + "\n", + "x_train, y_train, vectorizer = vectorize_data( train_path )\n", + "x_dev, y_dev, _ = vectorize_data( dev_path, vectorizer )\n", + "\n", + "# Count_Vectorizer returns sparse arrays (for computational reasons)\n", + "# but PyTorch will expect dense input:\n", + "x_train = x_train.toarray()\n", + "x_dev = x_dev.toarray()\n", + "\n", + "print(\"Train:\", x_train.shape)\n", + "print(\"Dev:\", x_dev.shape)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Mt00MaMmW1_P" + }, + "source": [ + "#### 2.1.2 Transform to tensors\n", + "\n", + "▶▶ **Create a dataset object within the PyTorch library:**\n", + "\n", + "Now we need to transform our data to tensors, in order to provide them as input to PyTorch. Follow the following steps:\n", + "\n", + "* 1- **torch.from_numpy( A_NUMPY_ARRAY )**: transform your array into a tensor\n", + " * Note: you need to transform tensor type to float, with **MY_TENSOR.to(torch.float)** (or cryptic error saying it was expecting long...).\n", + " * Print the shape of the tensor for your training data.\n", + "* 2- **torch.utils.data.TensorDataset(INPUT_TENSOR, TARGET_TENSOR)**: Dataset wrapping tensors. In particular: giv\n", + " * Take tensors as inputs, \n", + " \n", + "* 3- **torch.utils.data.DataLoader**: many arguments in the constructor:\n", + " * In particular, *dataset* of the type TensorDataset can be used\n", + " * We'd rather shuffling our data in general, can be done here by changing the value of one argument\n", + " * Note also the possibility to change the batch_size, we'll talk about it later\n", + "\n", + "```\n", + "DataLoader(\n", + " dataset,\n", + " batch_size=1,\n", + " shuffle=False,\n", + " num_workers=0,\n", + " collate_fn=None,\n", + " pin_memory=False,\n", + " )\n", + " ```\n" + ] + }, + { + "cell_type": "code", + "source": [ + "# Useful imports\n", + "import torch\n", + "from torch.utils.data import TensorDataset, DataLoader" + ], + "metadata": { + "id": "x4gzAYdyFUoR" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "JMLPp3vnoxnG" + }, + "source": [ + "# create Tensor dataset\n", + "tensor_x_train = torch.from_numpy(x_train).to(torch.float)\n", + "print( tensor_x_train.shape )\n", + "train_data = TensorDataset( torch.from_numpy(x_train).to(torch.float), \n", + " torch.from_numpy(y_train))\n", + "\n", + "# dataloaders\n", + "batch_size = 1 #no batch, or batch = 1\n", + "\n", + "# make sure to SHUFFLE your data\n", + "train_loader = DataLoader( train_data, shuffle=True, batch_size=batch_size )" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zOeZCY09o6CV" + }, + "source": [ + "## 2.2 Neural Network\n", + "\n", + "Now we can build our learning model.\n", + "\n", + "For this TP, we're going to walk through the code of a **simple feedforward neural network, with one hidden layer**.\n", + "\n", + "This network takes as input bag of words vectors, exactly as our 'classic' models: each review is represented by a vector of the size the number of tokens in the vocabulary with '1' when a word is present and '0' for the other words. " + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 2.2.1 Questions\n", + "\n", + "▶▶ **What is the input dimension?** \n", + "\n", + "▶▶ **What is the output dimension?** " + ], + "metadata": { + "id": "5KOM7ofrKUte" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BSK0j8YASriA" + }, + "source": [ + "▶▶ **What is the input dimension?** --> MAX FEATURES = 5000\n", + "\n", + "▶▶ **What is the output dimension?** --> number of classes = 2" + ] + }, + { + "cell_type": "code", + "source": [ + "# Useful imports\n", + "import torch\n", + "import torch.nn as nn" + ], + "metadata": { + "id": "DiNm2XwlG2_0" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### 2.2.2 Write the skeleton of the class\n", + "\n", + "▶▶ We're going to **define our own neural network type**, by defining a new class: \n", + "* The class is called **FeedforwardNeuralNetModel**\n", + "* it inherits from the class **nn.Module**\n", + "* the constructor takes the following arguments:\n", + " * size of the input (i.e. **input_dim**)\n", + " * size of the hidden layer (i.e. **hidden_dim**)\n", + " * size of the output layer (i.e. **output_dim**)\n", + "* in the constructor, we will call the constructor of the parent class\n", + "\n" + ], + "metadata": { + "id": "bE4RgHUkGnGl" + } + }, + { + "cell_type": "code", + "source": [ + "# Start to define the class corresponding to our type of neural network \n", + "\n" + ], + "metadata": { + "id": "uKcge-oBG1HV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "class FeedforwardNeuralNetModel(nn.Module):\n", + " def __init__(self, input_dim, hidden_dim, output_dim):\n", + " super(FeedforwardNeuralNetModel, self).__init__()\n", + " " + ], + "metadata": { + "id": "IyQinowpJ2ic" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### 2.2.3 Constructor\n", + "\n", + "▶▶ To continue the definition of our class, we need to explain how are built each layer of our network.\n", + "\n", + "More precisely, we're going to define:\n", + "* a function corresponding to the action of our hidden layer: \n", + " * what kind of function is it ?\n", + " * you need to indicate the size of the input and output for this function, what are they?\n", + "* a non linear function, that will be used on the ouput of our hidden layer\n", + "* a final output function: \n", + " * what kind of function is it ?\n", + " * you need to indicate the size of the input and output for this function, what are they? \n", + "\n", + "All the functions that can be used in Pytorch are defined here: https://pytorch.org/docs/stable/nn.functional.html\n", + "\n", + "Do you see things that you know?\n", + "\n", + "Hint: here you define fields of your class, these fields corresponding to specific kind of functions. \n", + "E.g. you're going to initialize a field such as **self.fc1=SPECIFIC_TYPE_OF_FCT(expected arguments)**." + ], + "metadata": { + "id": "0BHUuGKCHoU9" + } + }, + { + "cell_type": "code", + "source": [ + "# Continue the definition of the class by defining three functions in your constructor\n", + "\n" + ], + "metadata": { + "id": "LN3aSTSaJNkp" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "class FeedforwardNeuralNetModel(nn.Module):\n", + " def __init__(self, input_dim, hidden_dim, output_dim):\n", + " super(FeedforwardNeuralNetModel, self).__init__()\n", + " \n", + " # Linear function ==> W1\n", + " self.fc1 = nn.Linear(input_dim, hidden_dim)\n", + "\n", + " # Non-linearity ==> g\n", + " self.sigmoid = nn.Sigmoid()\n", + "\n", + " # Linear function (readout) ==> W2\n", + " self.fc2 = nn.Linear(hidden_dim, output_dim) " + ], + "metadata": { + "id": "pIkm7Wc-J6ce" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "And that's it ;)" + ], + "metadata": { + "id": "uuN7LtHSKGDL" + } + }, + { + "cell_type": "markdown", + "source": [ + "### 2.2.4 The **forward** method\n", + "\n", + "The main function we have to write when defining a neural network is called the **forward** function.\n", + "This function computes the outputs of the network (the logit), it is thus used to train the network.\n", + "It details how we apply the functions defined in the constructor. \n", + "\n", + "Let's define this function, with the following signature, where x is the input to the network:\n", + "```\n", + "def forward(self, x):\n", + "```\n", + "\n", + "▶▶ Follow the steps:\n", + "* 1- Apply the first linear functiond defined in the constructor to **x**, i.e. go through the hidden layer.\n", + "* 2- Apply the non linear function to the output of step 1, i.e. use the activation function.\n", + "* 3- Apply the second linear functiond defined in the constructor to the output of step 2, i.e. go through the output layer.\n", + "* 4- Return the output of step 3.\n", + "\n", + "You're done!" + ], + "metadata": { + "id": "e2IMSprgKJ7K" + } + }, + { + "cell_type": "code", + "source": [ + "# Define the forward function, used to make all the calculations\n", + "# through the network\n", + "def forward(self, x):\n", + " ''' y = g(x.W1+b).W2 '''\n", + " # ..." + ], + "metadata": { + "id": "8z-QpBt2NOlu" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Kvmc-_zqoxvF" + }, + "source": [ + "class FeedforwardNeuralNetModel(nn.Module):\n", + " def __init__(self, input_dim, hidden_dim, output_dim):\n", + " super(FeedforwardNeuralNetModel, self).__init__()\n", + " # Linear function ==> W1\n", + " self.fc1 = nn.Linear(input_dim, hidden_dim)\n", + "\n", + " # Non-linearity ==> g\n", + " self.sigmoid = nn.Sigmoid()\n", + "\n", + " # Linear function (readout) ==> W2\n", + " self.fc2 = nn.Linear(hidden_dim, output_dim) \n", + "\n", + " def forward(self, x):\n", + " '''\n", + " y = g(x.W1+b).W2\n", + " '''\n", + " # Linear function # LINEAR ==> x.W1+b\n", + " out = self.fc1(x)\n", + "\n", + " # Non-linearity # NON-LINEAR ==> h1 = g(x.W1+b)\n", + " out = self.sigmoid(out) \n", + "\n", + " # Linear function (readout) # LINEAR ==> y = h1.W2\n", + " out = self.fc2(out)\n", + " return out" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## 2.3 Training the network\n", + "\n", + "Now we can use our beautiful class to define and then train our own neural network." + ], + "metadata": { + "id": "sBrDXfQbO5yq" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oWLDfLGxpBvn" + }, + "source": [ + "### 2.3.1 Hyper-parameters\n", + "\n", + "We need to set up the values for the hyper-parameters, and define the form of the loss and the optimization methods.\n", + "\n", + "▶▶ **Check that you understand what are each of the variables below** \n", + "* one that you prabably don't know is the learning rate, we'll explain it in the next course. Broadly speaking, it corresponds to the amount of update used during training." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "fcGyjXbUoxx9" + }, + "source": [ + "# Many choices here!\n", + "VOCAB_SIZE = MAX_FEATURES\n", + "input_dim = VOCAB_SIZE \n", + "hidden_dim = 4\n", + "output_dim = 2\n", + "num_epochs = 5\n", + "learning_rate = 0.1" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### 2.3.2 Loss function\n", + "\n", + "Another thing that has to be decided is the kind of loss function we want to use.\n", + "Here we use a common one, called CrossEntropy. \n", + "We will come back in more details on this loss.\n", + "One important note is that this function in PyTorch includes the SoftMax function that should be applied after the output layer to get labels." + ], + "metadata": { + "id": "yyJINiVHPoWq" + } + }, + { + "cell_type": "code", + "source": [ + "criterion = nn.CrossEntropyLoss()" + ], + "metadata": { + "id": "TVVy7hhrPl-K" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### 2.3.3 Initialization of the model\n", + "\n", + "Now you can instantiate your class: define a model that is of the type FeedforwardNeuralNetModel using the values defined before as hyper-parameters." + ], + "metadata": { + "id": "kyY91BtPQIeo" + } + }, + { + "cell_type": "code", + "source": [ + "# Initialization of the model\n", + "# ..." + ], + "metadata": { + "id": "hk_nev2-Q0m-" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "15WR_Jdtoxze" + }, + "source": [ + "# Initialization of the model\n", + "model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### 2.3.4 Optimizer\n", + "\n", + "At last, we need to indicate the method we want to use to optimize our network.\n", + "Here, we use a common one called Stochastic Gradient Descent.\n", + "We will also go back on that later on.\n", + "\n", + "Note that its arguments are:\n", + "* the parameters of our models (the Ws)\n", + "* the learning rate\n", + "Based on these information, it can make the necessary updates. \n" + ], + "metadata": { + "id": "wBjNtZ-bQfSQ" + } + }, + { + "cell_type": "code", + "source": [ + "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)" + ], + "metadata": { + "id": "A8AY0bU8Qhyf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OPt_VbCMqoD2" + }, + "source": [ + "### Training the network\n", + "\n", + "A simple code to train the neural network is given below.\n", + "\n", + "▶▶ **Run the code and look at the loss after each training step.** " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "OnNx8hZJox3v" + }, + "source": [ + "# Start training\n", + "for epoch in range(num_epochs):\n", + " train_loss, total_acc, total_count = 0, 0, 0\n", + "\n", + " # for each instance + its associated label\n", + " for input, label in train_loader:\n", + "\n", + " # Clearing the accumulated gradients\n", + " # torch *accumulates* gradients. Before passing in a\n", + " # new instance, you need to zero out the gradients from the old\n", + " # instance\n", + " # Clear gradients w.r.t. parameters\n", + " optimizer.zero_grad()\n", + "\n", + " # ==> Forward pass to get output/logits \n", + " # = apply all our functions: y = g(x.W1+b).W2\n", + " outputs = model( input )\n", + "\n", + " # ==> Calculate Loss: softmax --> cross entropy loss\n", + " loss = criterion(outputs, label)\n", + "\n", + " # Getting gradients w.r.t. parameters\n", + " # Here is the way to find how to modify the parameters in\n", + " # order to lower the loss\n", + " loss.backward()\n", + "\n", + " # ==> Updating parameters: you don t need to provide the loss here,\n", + " # when computing the loss, the information is saved in the parameters\n", + " # (more precisely, doing backward computes the gradients for all tensors,\n", + " # and these gradients are saved by each tensor)\n", + " optimizer.step()\n", + "\n", + " # -- a useful print\n", + " # Accumulating the loss over time\n", + " train_loss += loss.item()\n", + " total_acc += (outputs.argmax(1) == label).sum().item()\n", + " total_count += label.size(0)\n", + "\n", + " # Compute accuracy on train set at each epoch\n", + " print('Epoch: {}. Loss: {}. ACC {} '.format(epoch, \n", + " train_loss/x_train.shape[0], \n", + " total_acc/x_train.shape[0]))\n", + " \n", + " total_acc, total_count = 0, 0\n", + " train_loss = 0" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tzMl5wdnqtCW" + }, + "source": [ + "### Evaluate the model " + ] + }, + { + "cell_type": "code", + "source": [ + "# Useful imports\n", + "from sklearn.metrics import classification_report" + ], + "metadata": { + "id": "N8wxX85sSyPM" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ldDubAPDox5K" + }, + "source": [ + "# create Tensor dataset\n", + "valid_data = TensorDataset( torch.from_numpy(x_dev).to(torch.float), \n", + " torch.from_numpy(y_dev))\n", + "valid_loader = DataLoader( valid_data )\n", + "\n", + "\n", + "# Disabling gradient calculation is useful for inference, \n", + "# when you are sure that you will not call Tensor.backward(). \n", + "predictions, gold = [], []\n", + "with torch.no_grad():\n", + " for input, label in valid_loader:\n", + " probs = model(input)\n", + " predictions.append( torch.argmax(probs, dim=1).cpu().numpy()[0] )\n", + " gold.append(int(label))\n", + "\n", + "print(classification_report(gold, predictions))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hq-jspmLL387" + }, + "source": [ + "## 3. Move to GPU\n", + "\n", + "Below we indicate the modifications needed to make all the computations on GPU instead of CPU." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pydK_h3QLZfO" + }, + "source": [ + "## 1- Define the device to be used\n", + "\n", + "# CUDA for PyTorch\n", + "use_cuda = torch.cuda.is_available()\n", + "device = torch.device(\"cuda\" if use_cuda else \"cpu\")\n", + "print(device)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "PuV1OjAdMHOX" + }, + "source": [ + "## 2- No change here\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "\n", + "class FeedforwardNeuralNetModel(nn.Module):\n", + " def __init__(self, input_dim, hidden_dim, output_dim):\n", + " super(FeedforwardNeuralNetModel, self).__init__()\n", + " # Linear function ==> W1\n", + " self.fc1 = nn.Linear(input_dim, hidden_dim)\n", + "\n", + " # Non-linearity ==> g\n", + " self.sigmoid = nn.Sigmoid()\n", + "\n", + " # Linear function (readout) ==> W2\n", + " self.fc2 = nn.Linear(hidden_dim, output_dim) \n", + "\n", + " def forward(self, x):\n", + " '''\n", + " y = g(x.W1+b).W2\n", + " '''\n", + " # Linear function # LINEAR ==> x.W1+b\n", + " out = self.fc1(x)\n", + "\n", + " # Non-linearity # NON-LINEAR ==> h1 = g(x.W1+b)\n", + " out = self.sigmoid(out) \n", + "\n", + " # Linear function (readout) # LINEAR ==> y = h1.W2\n", + " out = self.fc2(out)\n", + " return out" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "K7mmAyoPMziY" + }, + "source": [ + "## 3- Move your model to the GPU\n", + "\n", + "# Initialization of the model\n", + "model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)\n", + "\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n", + "\n", + "## ------------ CHANGE HERE -----------------\n", + "model = model.to(device)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ANibLgnhL9jU" + }, + "source": [ + "## 4- Move your data to GPU\n", + "\n", + "# Start training\n", + "for epoch in range(num_epochs):\n", + " train_loss, total_acc, total_count = 0, 0, 0\n", + " for input, label in train_loader:\n", + " ## ------------ CHANGE HERE -----------------\n", + " input = input.to(device)\n", + " label = label.to(device)\n", + "\n", + " # Clear gradients w.r.t. parameters\n", + " optimizer.zero_grad()\n", + "\n", + " # Forward pass to get output/logits\n", + " outputs = model( input )\n", + "\n", + " # Calculate Loss: softmax --> cross entropy loss\n", + " loss = criterion(outputs, label)\n", + "\n", + " # Getting gradients w.r.t. parameters\n", + " loss.backward()\n", + "\n", + " # Updating parameters\n", + " optimizer.step()\n", + "\n", + " # Accumulating the loss over time\n", + " train_loss += loss.item()\n", + " total_acc += (outputs.argmax(1) == label).sum().item()\n", + " total_count += label.size(0)\n", + "\n", + " # Compute accuracy on train set at each epoch\n", + " print('Epoch: {}. Loss: {}. ACC {} '.format(epoch, \n", + " train_loss/x_train.shape[0], \n", + " total_acc/x_train.shape[0]))\n", + " \n", + " total_acc, total_count = 0, 0\n", + " train_loss = 0" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "dSXQF-ViNUH4" + }, + "source": [ + "# -- 5- Again, move your data to GPU\n", + "\n", + "predictions = []\n", + "gold = []\n", + "\n", + "with torch.no_grad():\n", + " for input, label in valid_loader:\n", + " ## ------------ CHANGE HERE -----------------\n", + " input = input.to(device)\n", + " probs = model(input)\n", + " #Here, we need CPU: else, it will generate the following error\n", + " # can't convert cuda:0 device type tensor to numpy. \n", + " # Use Tensor.cpu() to copy the tensor to host memory first.\n", + " # (if we need a numpy array)\n", + " predictions.append( torch.argmax(probs, dim=1).cpu().numpy()[0] )\n", + " #print( probs )\n", + " #print( torch.argmax(probs, dim=1) ) # Return the index of the max value\n", + " #print( torch.argmax(probs, dim=1).cpu().numpy()[0] )\n", + " gold.append(int(label))\n", + "\n", + "print(classification_report(gold, predictions))" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file