Added backpropagation

2023-06-04 15:02:17 +02:00
parent 4094cea359
commit 07cbaf93e3
1 changed files with 160 additions and 54 deletions
--- a/3/aufgabe03.ipynb
+++ b/3/aufgabe03.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
@ -13,7 +13,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 253,
   "metadata": {},
   "outputs": [],
   "source": [
@ -23,7 +23,7 @@
    "\n",
    "sigmoid = np.vectorize(lambda x : 1 / (1 + np.exp(-x)))\n",
    "\n",
-    "inputs  = [rng.integers(0, 1, size=(2,1), endpoint=True) for _ in range(100)]\n",
+    "inputs  = [rng.integers(0, 1, size=(2,1), endpoint=True) for _ in range(10_000)]\n",
    "outputs = [a[0] ^ b[0] for a, b in inputs]\n",
    "data    = list(zip(inputs, outputs))\n",
    "\n",
@ -32,74 +32,180 @@
    "# Binary Cross Entropy Loss\n",
    "bcel = np.vectorize(lambda y, ŷ : -(y * np.log(ŷ) + (1 - y) * np.log(1 - ŷ)))\n",
    "\n",
-    "# Derivations\n",
-    "bcel_derivation = np.vectorize(lambda y, ŷ: (1 / (1 - ŷ)) if y == 0 else -(1 / ŷ))\n",
-    "sigmoid_derivation = np.vectorize(lambda x: (1 / (1 + np.exp(-x))) * (1 - (1 / (1 + np.exp(-x)))))\n",
-    "relu_derivation = np.vectorize(lambda x: 0 if x < 0 else 1)\n"
+    "# Derivatives\n",
+    "derivatives = {\n",
+    "    bcel    : np.vectorize(lambda y, ŷ: (1 / (1 - ŷ)) if y == 0 else -(1 / ŷ)),\n",
+    "    sigmoid : np.vectorize(lambda x: (1 / (1 + np.exp(-x))) * (1 - (1 / (1 + np.exp(-x))))),\n",
+    "    relu    : np.vectorize(lambda x: 0 if x < 0 else 1)\n",
+    "}\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 287,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aufgabe 2\n",
+    "from typing import Callable\n",
+    "\n",
+    "\n",
+    "class NeuralNet:\n",
+    "    def __init__(self, inputs: int = 2, hidden_layers: list[tuple[int, Callable]] = None):\n",
+    "        \"\"\"\n",
+    "        Initializes the neural network.\n",
+    "        Hidden layers can be specified with the 'hidden_layers' parameter,\n",
+    "        which takes list of the format [(layer1_size, layer1_activation_function), (layer2...), ...].\n",
+    "        The output layer will always consist of a single neuron and use the sigmoid\n",
+    "        activation function.\n",
+    "        \"\"\"\n",
+    "\n",
+    "        self.input_shape = (inputs, 1)\n",
+    "        self.layers = [] if hidden_layers is None else hidden_layers\n",
+    "        self.layers.append((1, sigmoid))  # Add output layer\n",
+    "\n",
+    "        # Construct weights for hidden layer\n",
+    "        self.activation_functions = []\n",
+    "        self.weights = []\n",
+    "        for index, (num_neurons, activation_function) in enumerate(self.layers):\n",
+    "            self.activation_functions.append(activation_function)\n",
+    "\n",
+    "            num_layer_inputs = inputs if index == 0 else self.layers[index - 1][0]\n",
+    "            self.weights.append(rng.uniform(low=-1.0, high=1.0, size=(num_neurons, num_layer_inputs)))\n",
+    "\n",
+    "    def forward_pass(self, x: np.array) -> tuple:\n",
+    "        \"\"\"\n",
+    "        Do a forward pass through the neural net.\n",
+    "        Returns the linear and activation function results for each layer.\n",
+    "        For the final output, see the last input in the F list.\n",
+    "        \"\"\"\n",
+    "\n",
+    "        x = np.array(x)\n",
+    "        if x.shape != self.input_shape:\n",
+    "            raise ValueError(f\"Input must be of shape {self.input_shape}.\")\n",
+    "\n",
+    "        Z = []  # linear values for each layer\n",
+    "        F = [x]  # activation function values for each layer\n",
+    "        for weights, activation_function in zip(self.weights, self.activation_functions):\n",
+    "            Z.append(np.matmul(weights, F[-1]))  # linear\n",
+    "            F.append(activation_function(Z[-1]))\n",
+    "\n",
+    "        return (Z, F)\n",
+    "\n",
+    "    def classify(self, x) -> bool:\n",
+    "        \"\"\"\n",
+    "        Executes a forward pass, and returns True if the resulting\n",
+    "        value is greater than 0.5.\n",
+    "        \"\"\"\n",
+    "\n",
+    "        _, F = self.forward_pass(x)\n",
+    "        ŷ = F[-1][0]\n",
+    "        return ŷ > 0.5\n",
+    "\n",
+    "    # Aufgabe 4\n",
+    "    def backward_pass(self, x, y: float):\n",
+    "        \"\"\"\n",
+    "        Do a backward pass through the neural net.\n",
+    "        Returns the calculated weight difference.\n",
+    "        \"\"\"\n",
+    "\n",
+    "        Z, F = self.forward_pass(x)\n",
+    "\n",
+    "        layer_errors = [None for _ in range(len(self.layers))]\n",
+    "        layer_errors[-1] = F[-1] - y  # ŷ - y\n",
+    "\n",
+    "        # Backpropagation\n",
+    "        for i in reversed(range(len(self.layers) - 1)):\n",
+    "            layer_errors[i] = np.multiply(\n",
+    "                self.weights[i + 1], layer_errors[i + 1] * derivatives[self.activation_functions[i]](Z[i].T)\n",
+    "            )\n",
+    "\n",
+    "        Δweights = [np.multiply(error, F[i]).T for i, error in enumerate(layer_errors)]\n",
+    "        return Δweights\n",
+    "\n",
+    "    # Aufgabe 5\n",
+    "    def train(self, data, learning_rate: float = 0.5):\n",
+    "        \"\"\"\n",
+    "        Train the neural network with the given input data.\n",
+    "        \"\"\"\n",
+    "        for test_input, expected in data:\n",
+    "            Δweights = self.backward_pass(test_input, expected)\n",
+    "            self.weights = [w - learning_rate * Δw for w, Δw in zip(self.weights, Δweights, strict=True)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 286,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Unit test to make sure neural net behaves as expected\n",
+    "nn = NeuralNet(inputs=3, hidden_layers=[(4, sigmoid)])\n",
+    "\n",
+    "nn.weights = [\n",
+    "    np.array([\n",
+    "        [0.179, 0.044, 0.01], \n",
+    "        [-0.186, -0.028, -0.035], \n",
+    "        [-0.008, -0.063, -0.004], \n",
+    "        [-0.048, -0.131, 0.088]\n",
+    "    ]),\n",
+    "    np.array([\n",
+    "        [0.088, 0.171, 0.005, -0.04]\n",
+    "    ]),\n",
+    "]\n",
+    "\n",
+    "# Make sure forward pass is correct\n",
+    "Z, F = nn.forward_pass(np.array([[7, 8, 10]]).T)\n",
+    "assert np.allclose(F[1], np.array([[0.845, 0.132, 0.354, 0.377]]).T, atol=0.01)\n",
+    "assert np.allclose(F[2], np.array([[0.521]]), atol=0.01)\n",
+    "\n",
+    "# Make sure backward pass is correct\n",
+    "Δweights = nn.backward_pass(np.array([[7, 8, 10]]).T, 1)\n",
+    "assert np.allclose(Δweights[0], np.array([\n",
+    "        [-0.039, -0.044, -0.055], \n",
+    "        [-0.066, -0.075, -0.094], \n",
+    "        [-0.004, -0.004, -0.005], \n",
+    "        [0.032, 0.036, 0.046]\n",
+    "    ]), atol=0.01)\n",
+    "assert np.allclose(Δweights[1], np.array([[-0.405, -0.063, -0.169, -0.181]]), atol=0.01)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 257,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.7722866010132297\n",
-      "0.7722866010132297\n",
-      "0.9354266393448504\n",
-      "0.7722866010132297\n",
-      "0.7722866010132297\n",
-      "0.7722866010132297\n",
-      "0.8102957251424703\n",
-      "0.7722866010132297\n",
-      "0.5\n",
-      "0.7722866010132297\n"
+      "Before training: correct=761\n",
+      "After training: correct=1000\n"
     ]
    }
   ],
   "source": [
-    "# Aufgabe 2\n",
+    "nn = NeuralNet(hidden_layers=[(3, relu)])\n",
    "\n",
+    "correct = 0\n",
+    "for i in range(1000):\n",
+    "    x = rng.integers(0, 1, size=(2,1), endpoint=True)\n",
+    "    y = (x[0] ^ x[1])[0]\n",
+    "    c = nn.classify(x)\n",
+    "    if (c and y == 1) or (not c and y == 0):\n",
+    "        correct += 1\n",
+    "print(f\"Before training: {correct=}\")\n",
    "\n",
-    "class NeuralNet:\n",
-    "    def __init__(self, inputs: int = 2, hidden_layers: int = 1, hidden_layer_neurons: int = 4):\n",
-    "        self.input_shape = (inputs, 1)\n",
+    "nn.train(data)\n",
    "\n",
-    "        # Construct weights for hidden layer\n",
-    "        self.weights = []\n",
-    "        for i in range(hidden_layers):\n",
-    "            num_inputs = inputs if i == 0 else hidden_layer_neurons # First hidden layer only needs 2x4 weight matrix\n",
-    "            self.weights.append(rng.uniform(low=-1.0, high=1.0, size=(hidden_layer_neurons, num_inputs)))\n",
-    "\n",
-    "        # Construct weights for output layer\n",
-    "        self.outweights = rng.uniform(low=-1.0, high=1.0, size=(1, hidden_layer_neurons))\n",
-    "\n",
-    "    def forward_pass(self, x) -> float:\n",
-    "        x = np.array(x)\n",
-    "        if x.shape != self.input_shape:\n",
-    "            raise ValueError(f\"Input must be of shape {self.input_shape}.\")\n",
-    "\n",
-    "        # Hidden layers\n",
-    "        for layer in self.weights:\n",
-    "            x = relu(np.matmul(layer, x))\n",
-    "\n",
-    "        # Output layer\n",
-    "        return sigmoid(np.matmul(self.outweights, x))[0][0]\n",
-    "    \n",
-    "    # Aufgabe 4\n",
-    "    def backward_pass(self, learning_rate: float):\n",
-    "        ...\n",
-    "\n",
-    "\n",
-    "nn = NeuralNet(hidden_layers=1)\n",
-    "\n",
-    "for input_values, expected in data[:10]:\n",
-    "    actual = nn.forward_pass(input_values)\n",
-    "\n",
-    "    print(actual)\n"
+    "correct = 0\n",
+    "for i in range(1000):\n",
+    "    x = rng.integers(0, 1, size=(2,1), endpoint=True)\n",
+    "    y = (x[0] ^ x[1])[0]\n",
+    "    c = nn.classify(x)\n",
+    "    if (c and y == 1) or (not c and y == 0):\n",
+    "        correct += 1\n",
+    "print(f\"After training: {correct=}\")"
   ]
  }
 ],