diff --git a/Aufgabe 3/aufgabe03.ipynb b/Aufgabe 3/aufgabe03.ipynb index 6411552..12eb3c8 100644 --- a/Aufgabe 3/aufgabe03.ipynb +++ b/Aufgabe 3/aufgabe03.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 89, "metadata": {}, "outputs": [], "source": [ @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 253, "metadata": {}, "outputs": [], "source": [ @@ -23,7 +23,7 @@ "\n", "sigmoid = np.vectorize(lambda x : 1 / (1 + np.exp(-x)))\n", "\n", - "inputs = [rng.integers(0, 1, size=(2,1), endpoint=True) for _ in range(100)]\n", + "inputs = [rng.integers(0, 1, size=(2,1), endpoint=True) for _ in range(10_000)]\n", "outputs = [a[0] ^ b[0] for a, b in inputs]\n", "data = list(zip(inputs, outputs))\n", "\n", @@ -32,74 +32,180 @@ "# Binary Cross Entropy Loss\n", "bcel = np.vectorize(lambda y, ŷ : -(y * np.log(ŷ) + (1 - y) * np.log(1 - ŷ)))\n", "\n", - "# Derivations\n", - "bcel_derivation = np.vectorize(lambda y, ŷ: (1 / (1 - ŷ)) if y == 0 else -(1 / ŷ))\n", - "sigmoid_derivation = np.vectorize(lambda x: (1 / (1 + np.exp(-x))) * (1 - (1 / (1 + np.exp(-x)))))\n", - "relu_derivation = np.vectorize(lambda x: 0 if x < 0 else 1)\n" + "# Derivatives\n", + "derivatives = {\n", + " bcel : np.vectorize(lambda y, ŷ: (1 / (1 - ŷ)) if y == 0 else -(1 / ŷ)),\n", + " sigmoid : np.vectorize(lambda x: (1 / (1 + np.exp(-x))) * (1 - (1 / (1 + np.exp(-x))))),\n", + " relu : np.vectorize(lambda x: 0 if x < 0 else 1)\n", + "}\n" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 287, + "metadata": {}, + "outputs": [], + "source": [ + "# Aufgabe 2\n", + "from typing import Callable\n", + "\n", + "\n", + "class NeuralNet:\n", + " def __init__(self, inputs: int = 2, hidden_layers: list[tuple[int, Callable]] = None):\n", + " \"\"\"\n", + " Initializes the neural network.\n", + " Hidden layers can be specified with the 'hidden_layers' parameter,\n", + " which takes list of the format [(layer1_size, layer1_activation_function), (layer2...), ...].\n", + " The output layer will always consist of a single neuron and use the sigmoid\n", + " activation function.\n", + " \"\"\"\n", + "\n", + " self.input_shape = (inputs, 1)\n", + " self.layers = [] if hidden_layers is None else hidden_layers\n", + " self.layers.append((1, sigmoid)) # Add output layer\n", + "\n", + " # Construct weights for hidden layer\n", + " self.activation_functions = []\n", + " self.weights = []\n", + " for index, (num_neurons, activation_function) in enumerate(self.layers):\n", + " self.activation_functions.append(activation_function)\n", + "\n", + " num_layer_inputs = inputs if index == 0 else self.layers[index - 1][0]\n", + " self.weights.append(rng.uniform(low=-1.0, high=1.0, size=(num_neurons, num_layer_inputs)))\n", + "\n", + " def forward_pass(self, x: np.array) -> tuple:\n", + " \"\"\"\n", + " Do a forward pass through the neural net.\n", + " Returns the linear and activation function results for each layer.\n", + " For the final output, see the last input in the F list.\n", + " \"\"\"\n", + "\n", + " x = np.array(x)\n", + " if x.shape != self.input_shape:\n", + " raise ValueError(f\"Input must be of shape {self.input_shape}.\")\n", + "\n", + " Z = [] # linear values for each layer\n", + " F = [x] # activation function values for each layer\n", + " for weights, activation_function in zip(self.weights, self.activation_functions):\n", + " Z.append(np.matmul(weights, F[-1])) # linear\n", + " F.append(activation_function(Z[-1]))\n", + "\n", + " return (Z, F)\n", + "\n", + " def classify(self, x) -> bool:\n", + " \"\"\"\n", + " Executes a forward pass, and returns True if the resulting\n", + " value is greater than 0.5.\n", + " \"\"\"\n", + "\n", + " _, F = self.forward_pass(x)\n", + " ŷ = F[-1][0]\n", + " return ŷ > 0.5\n", + "\n", + " # Aufgabe 4\n", + " def backward_pass(self, x, y: float):\n", + " \"\"\"\n", + " Do a backward pass through the neural net.\n", + " Returns the calculated weight difference.\n", + " \"\"\"\n", + "\n", + " Z, F = self.forward_pass(x)\n", + "\n", + " layer_errors = [None for _ in range(len(self.layers))]\n", + " layer_errors[-1] = F[-1] - y # ŷ - y\n", + "\n", + " # Backpropagation\n", + " for i in reversed(range(len(self.layers) - 1)):\n", + " layer_errors[i] = np.multiply(\n", + " self.weights[i + 1], layer_errors[i + 1] * derivatives[self.activation_functions[i]](Z[i].T)\n", + " )\n", + "\n", + " Δweights = [np.multiply(error, F[i]).T for i, error in enumerate(layer_errors)]\n", + " return Δweights\n", + "\n", + " # Aufgabe 5\n", + " def train(self, data, learning_rate: float = 0.5):\n", + " \"\"\"\n", + " Train the neural network with the given input data.\n", + " \"\"\"\n", + " for test_input, expected in data:\n", + " Δweights = self.backward_pass(test_input, expected)\n", + " self.weights = [w - learning_rate * Δw for w, Δw in zip(self.weights, Δweights, strict=True)]" + ] + }, + { + "cell_type": "code", + "execution_count": 286, + "metadata": {}, + "outputs": [], + "source": [ + "# Unit test to make sure neural net behaves as expected\n", + "nn = NeuralNet(inputs=3, hidden_layers=[(4, sigmoid)])\n", + "\n", + "nn.weights = [\n", + " np.array([\n", + " [0.179, 0.044, 0.01], \n", + " [-0.186, -0.028, -0.035], \n", + " [-0.008, -0.063, -0.004], \n", + " [-0.048, -0.131, 0.088]\n", + " ]),\n", + " np.array([\n", + " [0.088, 0.171, 0.005, -0.04]\n", + " ]),\n", + "]\n", + "\n", + "# Make sure forward pass is correct\n", + "Z, F = nn.forward_pass(np.array([[7, 8, 10]]).T)\n", + "assert np.allclose(F[1], np.array([[0.845, 0.132, 0.354, 0.377]]).T, atol=0.01)\n", + "assert np.allclose(F[2], np.array([[0.521]]), atol=0.01)\n", + "\n", + "# Make sure backward pass is correct\n", + "Δweights = nn.backward_pass(np.array([[7, 8, 10]]).T, 1)\n", + "assert np.allclose(Δweights[0], np.array([\n", + " [-0.039, -0.044, -0.055], \n", + " [-0.066, -0.075, -0.094], \n", + " [-0.004, -0.004, -0.005], \n", + " [0.032, 0.036, 0.046]\n", + " ]), atol=0.01)\n", + "assert np.allclose(Δweights[1], np.array([[-0.405, -0.063, -0.169, -0.181]]), atol=0.01)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 257, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.7722866010132297\n", - "0.7722866010132297\n", - "0.9354266393448504\n", - "0.7722866010132297\n", - "0.7722866010132297\n", - "0.7722866010132297\n", - "0.8102957251424703\n", - "0.7722866010132297\n", - "0.5\n", - "0.7722866010132297\n" + "Before training: correct=761\n", + "After training: correct=1000\n" ] } ], "source": [ - "# Aufgabe 2\n", + "nn = NeuralNet(hidden_layers=[(3, relu)])\n", "\n", + "correct = 0\n", + "for i in range(1000):\n", + " x = rng.integers(0, 1, size=(2,1), endpoint=True)\n", + " y = (x[0] ^ x[1])[0]\n", + " c = nn.classify(x)\n", + " if (c and y == 1) or (not c and y == 0):\n", + " correct += 1\n", + "print(f\"Before training: {correct=}\")\n", "\n", - "class NeuralNet:\n", - " def __init__(self, inputs: int = 2, hidden_layers: int = 1, hidden_layer_neurons: int = 4):\n", - " self.input_shape = (inputs, 1)\n", + "nn.train(data)\n", "\n", - " # Construct weights for hidden layer\n", - " self.weights = []\n", - " for i in range(hidden_layers):\n", - " num_inputs = inputs if i == 0 else hidden_layer_neurons # First hidden layer only needs 2x4 weight matrix\n", - " self.weights.append(rng.uniform(low=-1.0, high=1.0, size=(hidden_layer_neurons, num_inputs)))\n", - "\n", - " # Construct weights for output layer\n", - " self.outweights = rng.uniform(low=-1.0, high=1.0, size=(1, hidden_layer_neurons))\n", - "\n", - " def forward_pass(self, x) -> float:\n", - " x = np.array(x)\n", - " if x.shape != self.input_shape:\n", - " raise ValueError(f\"Input must be of shape {self.input_shape}.\")\n", - "\n", - " # Hidden layers\n", - " for layer in self.weights:\n", - " x = relu(np.matmul(layer, x))\n", - "\n", - " # Output layer\n", - " return sigmoid(np.matmul(self.outweights, x))[0][0]\n", - " \n", - " # Aufgabe 4\n", - " def backward_pass(self, learning_rate: float):\n", - " ...\n", - "\n", - "\n", - "nn = NeuralNet(hidden_layers=1)\n", - "\n", - "for input_values, expected in data[:10]:\n", - " actual = nn.forward_pass(input_values)\n", - "\n", - " print(actual)\n" + "correct = 0\n", + "for i in range(1000):\n", + " x = rng.integers(0, 1, size=(2,1), endpoint=True)\n", + " y = (x[0] ^ x[1])[0]\n", + " c = nn.classify(x)\n", + " if (c and y == 1) or (not c and y == 0):\n", + " correct += 1\n", + "print(f\"After training: {correct=}\")" ] } ],