Aufgabe 6

This commit is contained in:
Kai Koellemann
2023-07-03 20:53:35 +02:00
parent e22a273609
commit fa6a721cc8
2 changed files with 317 additions and 0 deletions

317
Aufgabe 6/aufgabe06.ipynb Normal file
View File

@ -0,0 +1,317 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 205,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"import numpy as np\n",
"from tqdm import tqdm, trange\n",
"from random import sample\n",
"from typing import Literal\n",
"import statistics\n",
"import gymnasium \n",
"\n",
"rng = np.random.default_rng()"
]
},
{
"cell_type": "code",
"execution_count": 206,
"metadata": {},
"outputs": [],
"source": [
"def build_dqn(n_actions, input_dims, fc1_dims, fc2_dims):\n",
" model = tf.keras.Sequential([\n",
" tf.keras.layers.Dense(fc1_dims, activation=tf.keras.activations.relu, input_shape=(input_dims,)),\n",
" tf.keras.layers.Dense(fc2_dims, activation=tf.keras.activations.relu),\n",
" tf.keras.layers.Dense(n_actions)\n",
" ])\n",
" model.compile()\n",
"\n",
" return model"
]
},
{
"cell_type": "code",
"execution_count": 207,
"metadata": {},
"outputs": [],
"source": [
"mutate_weights = np.vectorize(lambda w : w if rng.uniform() > 0.7 else w + rng.normal(scale=0.3))\n",
"\n",
"def mutate(agent: tf.keras.Sequential) -> None:\n",
" for layer in agent.layers:\n",
" w, b = layer.get_weights()\n",
" layer.set_weights([mutate_weights(w), b]) # don't touch biases\n",
"\n",
"def recombine(mother: tf.keras.Sequential, father: tf.keras.Sequential) -> tf.keras.Sequential:\n",
" parent = rng.choice((mother, father))\n",
" child = tf.keras.models.clone_model(parent)\n",
" child.build(4)\n",
" child.compile()\n",
" child.set_weights(parent.get_weights())\n",
" return child"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Warum ist es eventuell nicht die beste Lösung, jedes Individuum nur einmal zu testen?\n",
"Um die Fitness zu ermitteln, sollte jedes Individuum mehrmals getestet werden, da der Agent nicht immer gleich gut abschneidet. Hier gestaltet es sich sinnvoll, jedes Individuum mindestens 3 Mal zu testen und den Median-Score zu wählen um Ausreißer nach unten und oben aus der Wertung zu nehmen. Alternativ könnte auch der durchschnittliche Score sinnvoll sein.\n",
"\n",
"### Fällt Ihnen eine gute Methode für ein Crossover ein?\n",
"Ja! Tests haben ergeben, dass das Wählen eines zufälligen Elternteils sinnvoll ist und schnell berechnet werden kann. Danke an Melissa für den Tipp.\n",
"\n",
"### Welche Beobachtung machen Sie mit den unterschiedlichen Selektionsarten?\n",
"Hier die Avg-Scores des besten Individuums von jeweils 5 Durchgängen mit 20 Generationen, 30 Individuen und einer Selection von 15: \\\n",
"Avg-Score mit Elitist: 384, 500, 500, 500, 500 \\\n",
"Avg-Score mit Proportional: 500, 291, 500, 432, 188 \\\n",
"500 ist der maximale Score.\n",
"Für genauere Aussagen müsste man mehr Durchgänge durchführen. Da ein Durchgang >4min dauert, wurde darauf allerdings verzichtet.\n",
"Beide Selektionsarten haben oft das Optimum erreicht. In diesen Durchgängen hat Elitist bessere Ergebnisse erzielen können. Mit Elitist konnte 4 von 5 Mal der Optimum erreicht werden."
]
},
{
"cell_type": "code",
"execution_count": 212,
"metadata": {},
"outputs": [],
"source": [
"def fitness(agent: tf.keras.Sequential, n: int = 3) -> int:\n",
" env = gymnasium.make('CartPole-v1')\n",
"\n",
" scores = []\n",
"\n",
" for _ in range(n):\n",
" done = False\n",
" score = 0\n",
" observation, _ = env.reset()\n",
" while not done:\n",
" actions = agent(observation[np.newaxis, :])\n",
" action = np.argmax(actions)\n",
" observation, reward, terminated, truncated, _ = env.step(action)\n",
" done = terminated or truncated\n",
" score += reward\n",
"\n",
" scores.append(score)\n",
"\n",
" return statistics.median(scores)\n",
" \n",
"\n",
"def evolution(\n",
" generations: int = 50,\n",
" population_size: int = 20,\n",
" selection_size: int = 5,\n",
" selection_type: Literal[\"elitist\"] | Literal[\"proportional\"] = \"elitist\",\n",
"):\n",
" assert selection_type in (\"elitist\", \"proportional\")\n",
" population = [build_dqn(2, 4, 5, 5) for _ in range(population_size)]\n",
"\n",
" for _ in trange(generations):\n",
" # Select individuals with highest fitness for reproduction\n",
" population = sorted(population, key=lambda agent: fitness(agent), reverse=True)\n",
"\n",
" if selection_type == \"elitist\":\n",
" selection = population[:selection_size]\n",
" elif selection_type == \"proportional\":\n",
" population_fitness = [fitness(nn) for nn in population]\n",
" selection = np.random.choice(\n",
" population,\n",
" selection_size,\n",
" p=[f / sum(population_fitness) for f in population_fitness],\n",
" replace=False\n",
" ).tolist()\n",
"\n",
" # Reproduce\n",
" offsprings = []\n",
" for _ in range(population_size - selection_size):\n",
" mother, father = sample(selection, 2)\n",
"\n",
" offspring = recombine(mother, father)\n",
" mutate(offspring)\n",
" \n",
" offsprings.append(offspring)\n",
"\n",
" # Create new population\n",
" population = selection + offsprings\n",
" \n",
" # Return best individual of final population\n",
" return max(population, key=lambda agent: fitness(agent))"
]
},
{
"cell_type": "code",
"execution_count": 222,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 20/20 [02:08<00:00, 6.41s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"episode 0 score 500.00 average score 500.00\n",
"episode 1 score 500.00 average score 500.00\n",
"episode 2 score 500.00 average score 500.00\n",
"episode 3 score 500.00 average score 500.00\n",
"episode 4 score 500.00 average score 500.00\n",
"episode 5 score 500.00 average score 500.00\n",
"episode 6 score 500.00 average score 500.00\n",
"episode 7 score 500.00 average score 500.00\n",
"episode 8 score 500.00 average score 500.00\n",
"episode 9 score 500.00 average score 500.00\n",
"episode 10 score 133.00 average score 466.64\n",
"episode 11 score 500.00 average score 469.42\n",
"episode 12 score 106.00 average score 441.46\n",
"episode 13 score 500.00 average score 445.64\n",
"episode 14 score 128.00 average score 424.47\n",
"episode 15 score 500.00 average score 429.19\n",
"episode 16 score 500.00 average score 433.35\n",
"episode 17 score 500.00 average score 437.06\n",
"episode 18 score 500.00 average score 440.37\n",
"episode 19 score 500.00 average score 443.35\n",
"episode 20 score 500.00 average score 446.05\n",
"episode 21 score 500.00 average score 448.50\n",
"episode 22 score 500.00 average score 450.74\n",
"episode 23 score 500.00 average score 452.79\n",
"episode 24 score 118.00 average score 439.40\n",
"episode 25 score 500.00 average score 441.73\n",
"episode 26 score 500.00 average score 443.89\n",
"episode 27 score 500.00 average score 445.89\n",
"episode 28 score 117.00 average score 434.55\n",
"episode 29 score 500.00 average score 436.73\n",
"episode 30 score 500.00 average score 438.77\n",
"episode 31 score 500.00 average score 440.69\n",
"episode 32 score 123.00 average score 431.06\n",
"episode 33 score 189.00 average score 423.94\n",
"episode 34 score 500.00 average score 426.11\n",
"episode 35 score 500.00 average score 428.17\n",
"episode 36 score 106.00 average score 419.46\n",
"episode 37 score 500.00 average score 421.58\n",
"episode 38 score 500.00 average score 423.59\n",
"episode 39 score 500.00 average score 425.50\n",
"episode 40 score 500.00 average score 427.32\n",
"episode 41 score 500.00 average score 429.05\n",
"episode 42 score 500.00 average score 430.70\n",
"episode 43 score 500.00 average score 432.27\n",
"episode 44 score 138.00 average score 425.73\n",
"episode 45 score 500.00 average score 427.35\n",
"episode 46 score 500.00 average score 428.89\n",
"episode 47 score 500.00 average score 430.38\n",
"episode 48 score 500.00 average score 431.80\n",
"episode 49 score 500.00 average score 433.16\n",
"episode 50 score 459.00 average score 433.67\n",
"episode 51 score 500.00 average score 434.94\n",
"episode 52 score 500.00 average score 436.17\n",
"episode 53 score 500.00 average score 437.35\n",
"episode 54 score 500.00 average score 438.49\n",
"episode 55 score 157.00 average score 433.46\n",
"episode 56 score 500.00 average score 434.63\n",
"episode 57 score 500.00 average score 435.76\n",
"episode 58 score 164.00 average score 431.15\n",
"episode 59 score 112.00 average score 425.83\n",
"episode 60 score 346.00 average score 424.52\n",
"episode 61 score 500.00 average score 425.74\n",
"episode 62 score 500.00 average score 426.92\n",
"episode 63 score 500.00 average score 428.06\n",
"episode 64 score 500.00 average score 429.17\n",
"episode 65 score 500.00 average score 430.24\n",
"episode 66 score 500.00 average score 431.28\n",
"episode 67 score 140.00 average score 427.00\n",
"episode 68 score 500.00 average score 428.06\n",
"episode 69 score 500.00 average score 429.09\n",
"episode 70 score 500.00 average score 430.08\n",
"episode 71 score 500.00 average score 431.06\n",
"episode 72 score 500.00 average score 432.00\n",
"episode 73 score 500.00 average score 432.92\n",
"episode 74 score 500.00 average score 433.81\n",
"episode 75 score 500.00 average score 434.68\n",
"episode 76 score 500.00 average score 435.53\n",
"episode 77 score 500.00 average score 436.36\n",
"episode 78 score 477.00 average score 436.87\n",
"episode 79 score 500.00 average score 437.66\n",
"episode 80 score 130.00 average score 433.86\n",
"episode 81 score 500.00 average score 434.67\n",
"episode 82 score 394.00 average score 434.18\n",
"episode 83 score 500.00 average score 434.96\n",
"episode 84 score 500.00 average score 435.73\n",
"episode 85 score 500.00 average score 436.48\n",
"episode 86 score 500.00 average score 437.21\n",
"episode 87 score 500.00 average score 437.92\n",
"episode 88 score 500.00 average score 438.62\n",
"episode 89 score 500.00 average score 439.30\n",
"episode 90 score 500.00 average score 439.97\n",
"episode 91 score 500.00 average score 440.62\n",
"episode 92 score 140.00 average score 437.39\n",
"episode 93 score 500.00 average score 438.05\n",
"episode 94 score 500.00 average score 438.71\n",
"episode 95 score 500.00 average score 439.34\n",
"episode 96 score 179.00 average score 436.66\n",
"episode 97 score 282.00 average score 435.08\n",
"episode 98 score 500.00 average score 435.74\n",
"episode 99 score 108.00 average score 432.46\n"
]
}
],
"source": [
"env = gymnasium.make('CartPole-v1')\n",
"agent = evolution(generations=20, population_size=30, selection_size=15, selection_type='elitist')\n",
"scores = []\n",
"\n",
"for i in range(100):\n",
" done = False\n",
" score = 0\n",
" observation, info = env.reset()\n",
" while not done:\n",
" action = np.argmax(agent(observation[np.newaxis, :]))\n",
" observation, reward, terminated, truncated, info = env.step(action)\n",
" done = terminated or truncated\n",
" score += reward\n",
"\n",
" scores.append(score)\n",
" \n",
"\n",
" avg_score = np.mean(scores[max(0, i-100):(i+1)])\n",
" print('episode ', i, 'score %.2f' % score, 'average score %.2f' % avg_score)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "bd385fe162c5ca0c84973b7dd5c518456272446b2b64e67c2a69f949ca7a1754"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

BIN
Aufgabe 6/dqn_model.keras Normal file

Binary file not shown.