diff --git a/Aufgabe 6/aufgabe06.ipynb b/Aufgabe 6/aufgabe06.ipynb new file mode 100644 index 0000000..5afd674 --- /dev/null +++ b/Aufgabe 6/aufgabe06.ipynb @@ -0,0 +1,317 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 205, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "import numpy as np\n", + "from tqdm import tqdm, trange\n", + "from random import sample\n", + "from typing import Literal\n", + "import statistics\n", + "import gymnasium \n", + "\n", + "rng = np.random.default_rng()" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "metadata": {}, + "outputs": [], + "source": [ + "def build_dqn(n_actions, input_dims, fc1_dims, fc2_dims):\n", + " model = tf.keras.Sequential([\n", + " tf.keras.layers.Dense(fc1_dims, activation=tf.keras.activations.relu, input_shape=(input_dims,)),\n", + " tf.keras.layers.Dense(fc2_dims, activation=tf.keras.activations.relu),\n", + " tf.keras.layers.Dense(n_actions)\n", + " ])\n", + " model.compile()\n", + "\n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": 207, + "metadata": {}, + "outputs": [], + "source": [ + "mutate_weights = np.vectorize(lambda w : w if rng.uniform() > 0.7 else w + rng.normal(scale=0.3))\n", + "\n", + "def mutate(agent: tf.keras.Sequential) -> None:\n", + " for layer in agent.layers:\n", + " w, b = layer.get_weights()\n", + " layer.set_weights([mutate_weights(w), b]) # don't touch biases\n", + "\n", + "def recombine(mother: tf.keras.Sequential, father: tf.keras.Sequential) -> tf.keras.Sequential:\n", + " parent = rng.choice((mother, father))\n", + " child = tf.keras.models.clone_model(parent)\n", + " child.build(4)\n", + " child.compile()\n", + " child.set_weights(parent.get_weights())\n", + " return child" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Warum ist es eventuell nicht die beste Lösung, jedes Individuum nur einmal zu testen?\n", + "Um die Fitness zu ermitteln, sollte jedes Individuum mehrmals getestet werden, da der Agent nicht immer gleich gut abschneidet. Hier gestaltet es sich sinnvoll, jedes Individuum mindestens 3 Mal zu testen und den Median-Score zu wählen um Ausreißer nach unten und oben aus der Wertung zu nehmen. Alternativ könnte auch der durchschnittliche Score sinnvoll sein.\n", + "\n", + "### Fällt Ihnen eine gute Methode für ein Crossover ein?\n", + "Ja! Tests haben ergeben, dass das Wählen eines zufälligen Elternteils sinnvoll ist und schnell berechnet werden kann. Danke an Melissa für den Tipp.\n", + "\n", + "### Welche Beobachtung machen Sie mit den unterschiedlichen Selektionsarten?\n", + "Hier die Avg-Scores des besten Individuums von jeweils 5 Durchgängen mit 20 Generationen, 30 Individuen und einer Selection von 15: \\\n", + "Avg-Score mit Elitist: 384, 500, 500, 500, 500 \\\n", + "Avg-Score mit Proportional: 500, 291, 500, 432, 188 \\\n", + "500 ist der maximale Score.\n", + "Für genauere Aussagen müsste man mehr Durchgänge durchführen. Da ein Durchgang >4min dauert, wurde darauf allerdings verzichtet.\n", + "Beide Selektionsarten haben oft das Optimum erreicht. In diesen Durchgängen hat Elitist bessere Ergebnisse erzielen können. Mit Elitist konnte 4 von 5 Mal der Optimum erreicht werden." + ] + }, + { + "cell_type": "code", + "execution_count": 212, + "metadata": {}, + "outputs": [], + "source": [ + "def fitness(agent: tf.keras.Sequential, n: int = 3) -> int:\n", + " env = gymnasium.make('CartPole-v1')\n", + "\n", + " scores = []\n", + "\n", + " for _ in range(n):\n", + " done = False\n", + " score = 0\n", + " observation, _ = env.reset()\n", + " while not done:\n", + " actions = agent(observation[np.newaxis, :])\n", + " action = np.argmax(actions)\n", + " observation, reward, terminated, truncated, _ = env.step(action)\n", + " done = terminated or truncated\n", + " score += reward\n", + "\n", + " scores.append(score)\n", + "\n", + " return statistics.median(scores)\n", + " \n", + "\n", + "def evolution(\n", + " generations: int = 50,\n", + " population_size: int = 20,\n", + " selection_size: int = 5,\n", + " selection_type: Literal[\"elitist\"] | Literal[\"proportional\"] = \"elitist\",\n", + "):\n", + " assert selection_type in (\"elitist\", \"proportional\")\n", + " population = [build_dqn(2, 4, 5, 5) for _ in range(population_size)]\n", + "\n", + " for _ in trange(generations):\n", + " # Select individuals with highest fitness for reproduction\n", + " population = sorted(population, key=lambda agent: fitness(agent), reverse=True)\n", + "\n", + " if selection_type == \"elitist\":\n", + " selection = population[:selection_size]\n", + " elif selection_type == \"proportional\":\n", + " population_fitness = [fitness(nn) for nn in population]\n", + " selection = np.random.choice(\n", + " population,\n", + " selection_size,\n", + " p=[f / sum(population_fitness) for f in population_fitness],\n", + " replace=False\n", + " ).tolist()\n", + "\n", + " # Reproduce\n", + " offsprings = []\n", + " for _ in range(population_size - selection_size):\n", + " mother, father = sample(selection, 2)\n", + "\n", + " offspring = recombine(mother, father)\n", + " mutate(offspring)\n", + " \n", + " offsprings.append(offspring)\n", + "\n", + " # Create new population\n", + " population = selection + offsprings\n", + " \n", + " # Return best individual of final population\n", + " return max(population, key=lambda agent: fitness(agent))" + ] + }, + { + "cell_type": "code", + "execution_count": 222, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 20/20 [02:08<00:00, 6.41s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "episode 0 score 500.00 average score 500.00\n", + "episode 1 score 500.00 average score 500.00\n", + "episode 2 score 500.00 average score 500.00\n", + "episode 3 score 500.00 average score 500.00\n", + "episode 4 score 500.00 average score 500.00\n", + "episode 5 score 500.00 average score 500.00\n", + "episode 6 score 500.00 average score 500.00\n", + "episode 7 score 500.00 average score 500.00\n", + "episode 8 score 500.00 average score 500.00\n", + "episode 9 score 500.00 average score 500.00\n", + "episode 10 score 133.00 average score 466.64\n", + "episode 11 score 500.00 average score 469.42\n", + "episode 12 score 106.00 average score 441.46\n", + "episode 13 score 500.00 average score 445.64\n", + "episode 14 score 128.00 average score 424.47\n", + "episode 15 score 500.00 average score 429.19\n", + "episode 16 score 500.00 average score 433.35\n", + "episode 17 score 500.00 average score 437.06\n", + "episode 18 score 500.00 average score 440.37\n", + "episode 19 score 500.00 average score 443.35\n", + "episode 20 score 500.00 average score 446.05\n", + "episode 21 score 500.00 average score 448.50\n", + "episode 22 score 500.00 average score 450.74\n", + "episode 23 score 500.00 average score 452.79\n", + "episode 24 score 118.00 average score 439.40\n", + "episode 25 score 500.00 average score 441.73\n", + "episode 26 score 500.00 average score 443.89\n", + "episode 27 score 500.00 average score 445.89\n", + "episode 28 score 117.00 average score 434.55\n", + "episode 29 score 500.00 average score 436.73\n", + "episode 30 score 500.00 average score 438.77\n", + "episode 31 score 500.00 average score 440.69\n", + "episode 32 score 123.00 average score 431.06\n", + "episode 33 score 189.00 average score 423.94\n", + "episode 34 score 500.00 average score 426.11\n", + "episode 35 score 500.00 average score 428.17\n", + "episode 36 score 106.00 average score 419.46\n", + "episode 37 score 500.00 average score 421.58\n", + "episode 38 score 500.00 average score 423.59\n", + "episode 39 score 500.00 average score 425.50\n", + "episode 40 score 500.00 average score 427.32\n", + "episode 41 score 500.00 average score 429.05\n", + "episode 42 score 500.00 average score 430.70\n", + "episode 43 score 500.00 average score 432.27\n", + "episode 44 score 138.00 average score 425.73\n", + "episode 45 score 500.00 average score 427.35\n", + "episode 46 score 500.00 average score 428.89\n", + "episode 47 score 500.00 average score 430.38\n", + "episode 48 score 500.00 average score 431.80\n", + "episode 49 score 500.00 average score 433.16\n", + "episode 50 score 459.00 average score 433.67\n", + "episode 51 score 500.00 average score 434.94\n", + "episode 52 score 500.00 average score 436.17\n", + "episode 53 score 500.00 average score 437.35\n", + "episode 54 score 500.00 average score 438.49\n", + "episode 55 score 157.00 average score 433.46\n", + "episode 56 score 500.00 average score 434.63\n", + "episode 57 score 500.00 average score 435.76\n", + "episode 58 score 164.00 average score 431.15\n", + "episode 59 score 112.00 average score 425.83\n", + "episode 60 score 346.00 average score 424.52\n", + "episode 61 score 500.00 average score 425.74\n", + "episode 62 score 500.00 average score 426.92\n", + "episode 63 score 500.00 average score 428.06\n", + "episode 64 score 500.00 average score 429.17\n", + "episode 65 score 500.00 average score 430.24\n", + "episode 66 score 500.00 average score 431.28\n", + "episode 67 score 140.00 average score 427.00\n", + "episode 68 score 500.00 average score 428.06\n", + "episode 69 score 500.00 average score 429.09\n", + "episode 70 score 500.00 average score 430.08\n", + "episode 71 score 500.00 average score 431.06\n", + "episode 72 score 500.00 average score 432.00\n", + "episode 73 score 500.00 average score 432.92\n", + "episode 74 score 500.00 average score 433.81\n", + "episode 75 score 500.00 average score 434.68\n", + "episode 76 score 500.00 average score 435.53\n", + "episode 77 score 500.00 average score 436.36\n", + "episode 78 score 477.00 average score 436.87\n", + "episode 79 score 500.00 average score 437.66\n", + "episode 80 score 130.00 average score 433.86\n", + "episode 81 score 500.00 average score 434.67\n", + "episode 82 score 394.00 average score 434.18\n", + "episode 83 score 500.00 average score 434.96\n", + "episode 84 score 500.00 average score 435.73\n", + "episode 85 score 500.00 average score 436.48\n", + "episode 86 score 500.00 average score 437.21\n", + "episode 87 score 500.00 average score 437.92\n", + "episode 88 score 500.00 average score 438.62\n", + "episode 89 score 500.00 average score 439.30\n", + "episode 90 score 500.00 average score 439.97\n", + "episode 91 score 500.00 average score 440.62\n", + "episode 92 score 140.00 average score 437.39\n", + "episode 93 score 500.00 average score 438.05\n", + "episode 94 score 500.00 average score 438.71\n", + "episode 95 score 500.00 average score 439.34\n", + "episode 96 score 179.00 average score 436.66\n", + "episode 97 score 282.00 average score 435.08\n", + "episode 98 score 500.00 average score 435.74\n", + "episode 99 score 108.00 average score 432.46\n" + ] + } + ], + "source": [ + "env = gymnasium.make('CartPole-v1')\n", + "agent = evolution(generations=20, population_size=30, selection_size=15, selection_type='elitist')\n", + "scores = []\n", + "\n", + "for i in range(100):\n", + " done = False\n", + " score = 0\n", + " observation, info = env.reset()\n", + " while not done:\n", + " action = np.argmax(agent(observation[np.newaxis, :]))\n", + " observation, reward, terminated, truncated, info = env.step(action)\n", + " done = terminated or truncated\n", + " score += reward\n", + "\n", + " scores.append(score)\n", + " \n", + "\n", + " avg_score = np.mean(scores[max(0, i-100):(i+1)])\n", + " print('episode ', i, 'score %.2f' % score, 'average score %.2f' % avg_score)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "bd385fe162c5ca0c84973b7dd5c518456272446b2b64e67c2a69f949ca7a1754" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Aufgabe 6/dqn_model.keras b/Aufgabe 6/dqn_model.keras new file mode 100644 index 0000000..7715da1 Binary files /dev/null and b/Aufgabe 6/dqn_model.keras differ