Aufgabe 6
This commit is contained in:
317
Aufgabe 6/aufgabe06.ipynb
Normal file
317
Aufgabe 6/aufgabe06.ipynb
Normal file
@ -0,0 +1,317 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 205,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tensorflow as tf\n",
|
||||
"from tensorflow import keras\n",
|
||||
"import numpy as np\n",
|
||||
"from tqdm import tqdm, trange\n",
|
||||
"from random import sample\n",
|
||||
"from typing import Literal\n",
|
||||
"import statistics\n",
|
||||
"import gymnasium \n",
|
||||
"\n",
|
||||
"rng = np.random.default_rng()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 206,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def build_dqn(n_actions, input_dims, fc1_dims, fc2_dims):\n",
|
||||
" model = tf.keras.Sequential([\n",
|
||||
" tf.keras.layers.Dense(fc1_dims, activation=tf.keras.activations.relu, input_shape=(input_dims,)),\n",
|
||||
" tf.keras.layers.Dense(fc2_dims, activation=tf.keras.activations.relu),\n",
|
||||
" tf.keras.layers.Dense(n_actions)\n",
|
||||
" ])\n",
|
||||
" model.compile()\n",
|
||||
"\n",
|
||||
" return model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 207,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mutate_weights = np.vectorize(lambda w : w if rng.uniform() > 0.7 else w + rng.normal(scale=0.3))\n",
|
||||
"\n",
|
||||
"def mutate(agent: tf.keras.Sequential) -> None:\n",
|
||||
" for layer in agent.layers:\n",
|
||||
" w, b = layer.get_weights()\n",
|
||||
" layer.set_weights([mutate_weights(w), b]) # don't touch biases\n",
|
||||
"\n",
|
||||
"def recombine(mother: tf.keras.Sequential, father: tf.keras.Sequential) -> tf.keras.Sequential:\n",
|
||||
" parent = rng.choice((mother, father))\n",
|
||||
" child = tf.keras.models.clone_model(parent)\n",
|
||||
" child.build(4)\n",
|
||||
" child.compile()\n",
|
||||
" child.set_weights(parent.get_weights())\n",
|
||||
" return child"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Warum ist es eventuell nicht die beste Lösung, jedes Individuum nur einmal zu testen?\n",
|
||||
"Um die Fitness zu ermitteln, sollte jedes Individuum mehrmals getestet werden, da der Agent nicht immer gleich gut abschneidet. Hier gestaltet es sich sinnvoll, jedes Individuum mindestens 3 Mal zu testen und den Median-Score zu wählen um Ausreißer nach unten und oben aus der Wertung zu nehmen. Alternativ könnte auch der durchschnittliche Score sinnvoll sein.\n",
|
||||
"\n",
|
||||
"### Fällt Ihnen eine gute Methode für ein Crossover ein?\n",
|
||||
"Ja! Tests haben ergeben, dass das Wählen eines zufälligen Elternteils sinnvoll ist und schnell berechnet werden kann. Danke an Melissa für den Tipp.\n",
|
||||
"\n",
|
||||
"### Welche Beobachtung machen Sie mit den unterschiedlichen Selektionsarten?\n",
|
||||
"Hier die Avg-Scores des besten Individuums von jeweils 5 Durchgängen mit 20 Generationen, 30 Individuen und einer Selection von 15: \\\n",
|
||||
"Avg-Score mit Elitist: 384, 500, 500, 500, 500 \\\n",
|
||||
"Avg-Score mit Proportional: 500, 291, 500, 432, 188 \\\n",
|
||||
"500 ist der maximale Score.\n",
|
||||
"Für genauere Aussagen müsste man mehr Durchgänge durchführen. Da ein Durchgang >4min dauert, wurde darauf allerdings verzichtet.\n",
|
||||
"Beide Selektionsarten haben oft das Optimum erreicht. In diesen Durchgängen hat Elitist bessere Ergebnisse erzielen können. Mit Elitist konnte 4 von 5 Mal der Optimum erreicht werden."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 212,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def fitness(agent: tf.keras.Sequential, n: int = 3) -> int:\n",
|
||||
" env = gymnasium.make('CartPole-v1')\n",
|
||||
"\n",
|
||||
" scores = []\n",
|
||||
"\n",
|
||||
" for _ in range(n):\n",
|
||||
" done = False\n",
|
||||
" score = 0\n",
|
||||
" observation, _ = env.reset()\n",
|
||||
" while not done:\n",
|
||||
" actions = agent(observation[np.newaxis, :])\n",
|
||||
" action = np.argmax(actions)\n",
|
||||
" observation, reward, terminated, truncated, _ = env.step(action)\n",
|
||||
" done = terminated or truncated\n",
|
||||
" score += reward\n",
|
||||
"\n",
|
||||
" scores.append(score)\n",
|
||||
"\n",
|
||||
" return statistics.median(scores)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"def evolution(\n",
|
||||
" generations: int = 50,\n",
|
||||
" population_size: int = 20,\n",
|
||||
" selection_size: int = 5,\n",
|
||||
" selection_type: Literal[\"elitist\"] | Literal[\"proportional\"] = \"elitist\",\n",
|
||||
"):\n",
|
||||
" assert selection_type in (\"elitist\", \"proportional\")\n",
|
||||
" population = [build_dqn(2, 4, 5, 5) for _ in range(population_size)]\n",
|
||||
"\n",
|
||||
" for _ in trange(generations):\n",
|
||||
" # Select individuals with highest fitness for reproduction\n",
|
||||
" population = sorted(population, key=lambda agent: fitness(agent), reverse=True)\n",
|
||||
"\n",
|
||||
" if selection_type == \"elitist\":\n",
|
||||
" selection = population[:selection_size]\n",
|
||||
" elif selection_type == \"proportional\":\n",
|
||||
" population_fitness = [fitness(nn) for nn in population]\n",
|
||||
" selection = np.random.choice(\n",
|
||||
" population,\n",
|
||||
" selection_size,\n",
|
||||
" p=[f / sum(population_fitness) for f in population_fitness],\n",
|
||||
" replace=False\n",
|
||||
" ).tolist()\n",
|
||||
"\n",
|
||||
" # Reproduce\n",
|
||||
" offsprings = []\n",
|
||||
" for _ in range(population_size - selection_size):\n",
|
||||
" mother, father = sample(selection, 2)\n",
|
||||
"\n",
|
||||
" offspring = recombine(mother, father)\n",
|
||||
" mutate(offspring)\n",
|
||||
" \n",
|
||||
" offsprings.append(offspring)\n",
|
||||
"\n",
|
||||
" # Create new population\n",
|
||||
" population = selection + offsprings\n",
|
||||
" \n",
|
||||
" # Return best individual of final population\n",
|
||||
" return max(population, key=lambda agent: fitness(agent))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 222,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 20/20 [02:08<00:00, 6.41s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"episode 0 score 500.00 average score 500.00\n",
|
||||
"episode 1 score 500.00 average score 500.00\n",
|
||||
"episode 2 score 500.00 average score 500.00\n",
|
||||
"episode 3 score 500.00 average score 500.00\n",
|
||||
"episode 4 score 500.00 average score 500.00\n",
|
||||
"episode 5 score 500.00 average score 500.00\n",
|
||||
"episode 6 score 500.00 average score 500.00\n",
|
||||
"episode 7 score 500.00 average score 500.00\n",
|
||||
"episode 8 score 500.00 average score 500.00\n",
|
||||
"episode 9 score 500.00 average score 500.00\n",
|
||||
"episode 10 score 133.00 average score 466.64\n",
|
||||
"episode 11 score 500.00 average score 469.42\n",
|
||||
"episode 12 score 106.00 average score 441.46\n",
|
||||
"episode 13 score 500.00 average score 445.64\n",
|
||||
"episode 14 score 128.00 average score 424.47\n",
|
||||
"episode 15 score 500.00 average score 429.19\n",
|
||||
"episode 16 score 500.00 average score 433.35\n",
|
||||
"episode 17 score 500.00 average score 437.06\n",
|
||||
"episode 18 score 500.00 average score 440.37\n",
|
||||
"episode 19 score 500.00 average score 443.35\n",
|
||||
"episode 20 score 500.00 average score 446.05\n",
|
||||
"episode 21 score 500.00 average score 448.50\n",
|
||||
"episode 22 score 500.00 average score 450.74\n",
|
||||
"episode 23 score 500.00 average score 452.79\n",
|
||||
"episode 24 score 118.00 average score 439.40\n",
|
||||
"episode 25 score 500.00 average score 441.73\n",
|
||||
"episode 26 score 500.00 average score 443.89\n",
|
||||
"episode 27 score 500.00 average score 445.89\n",
|
||||
"episode 28 score 117.00 average score 434.55\n",
|
||||
"episode 29 score 500.00 average score 436.73\n",
|
||||
"episode 30 score 500.00 average score 438.77\n",
|
||||
"episode 31 score 500.00 average score 440.69\n",
|
||||
"episode 32 score 123.00 average score 431.06\n",
|
||||
"episode 33 score 189.00 average score 423.94\n",
|
||||
"episode 34 score 500.00 average score 426.11\n",
|
||||
"episode 35 score 500.00 average score 428.17\n",
|
||||
"episode 36 score 106.00 average score 419.46\n",
|
||||
"episode 37 score 500.00 average score 421.58\n",
|
||||
"episode 38 score 500.00 average score 423.59\n",
|
||||
"episode 39 score 500.00 average score 425.50\n",
|
||||
"episode 40 score 500.00 average score 427.32\n",
|
||||
"episode 41 score 500.00 average score 429.05\n",
|
||||
"episode 42 score 500.00 average score 430.70\n",
|
||||
"episode 43 score 500.00 average score 432.27\n",
|
||||
"episode 44 score 138.00 average score 425.73\n",
|
||||
"episode 45 score 500.00 average score 427.35\n",
|
||||
"episode 46 score 500.00 average score 428.89\n",
|
||||
"episode 47 score 500.00 average score 430.38\n",
|
||||
"episode 48 score 500.00 average score 431.80\n",
|
||||
"episode 49 score 500.00 average score 433.16\n",
|
||||
"episode 50 score 459.00 average score 433.67\n",
|
||||
"episode 51 score 500.00 average score 434.94\n",
|
||||
"episode 52 score 500.00 average score 436.17\n",
|
||||
"episode 53 score 500.00 average score 437.35\n",
|
||||
"episode 54 score 500.00 average score 438.49\n",
|
||||
"episode 55 score 157.00 average score 433.46\n",
|
||||
"episode 56 score 500.00 average score 434.63\n",
|
||||
"episode 57 score 500.00 average score 435.76\n",
|
||||
"episode 58 score 164.00 average score 431.15\n",
|
||||
"episode 59 score 112.00 average score 425.83\n",
|
||||
"episode 60 score 346.00 average score 424.52\n",
|
||||
"episode 61 score 500.00 average score 425.74\n",
|
||||
"episode 62 score 500.00 average score 426.92\n",
|
||||
"episode 63 score 500.00 average score 428.06\n",
|
||||
"episode 64 score 500.00 average score 429.17\n",
|
||||
"episode 65 score 500.00 average score 430.24\n",
|
||||
"episode 66 score 500.00 average score 431.28\n",
|
||||
"episode 67 score 140.00 average score 427.00\n",
|
||||
"episode 68 score 500.00 average score 428.06\n",
|
||||
"episode 69 score 500.00 average score 429.09\n",
|
||||
"episode 70 score 500.00 average score 430.08\n",
|
||||
"episode 71 score 500.00 average score 431.06\n",
|
||||
"episode 72 score 500.00 average score 432.00\n",
|
||||
"episode 73 score 500.00 average score 432.92\n",
|
||||
"episode 74 score 500.00 average score 433.81\n",
|
||||
"episode 75 score 500.00 average score 434.68\n",
|
||||
"episode 76 score 500.00 average score 435.53\n",
|
||||
"episode 77 score 500.00 average score 436.36\n",
|
||||
"episode 78 score 477.00 average score 436.87\n",
|
||||
"episode 79 score 500.00 average score 437.66\n",
|
||||
"episode 80 score 130.00 average score 433.86\n",
|
||||
"episode 81 score 500.00 average score 434.67\n",
|
||||
"episode 82 score 394.00 average score 434.18\n",
|
||||
"episode 83 score 500.00 average score 434.96\n",
|
||||
"episode 84 score 500.00 average score 435.73\n",
|
||||
"episode 85 score 500.00 average score 436.48\n",
|
||||
"episode 86 score 500.00 average score 437.21\n",
|
||||
"episode 87 score 500.00 average score 437.92\n",
|
||||
"episode 88 score 500.00 average score 438.62\n",
|
||||
"episode 89 score 500.00 average score 439.30\n",
|
||||
"episode 90 score 500.00 average score 439.97\n",
|
||||
"episode 91 score 500.00 average score 440.62\n",
|
||||
"episode 92 score 140.00 average score 437.39\n",
|
||||
"episode 93 score 500.00 average score 438.05\n",
|
||||
"episode 94 score 500.00 average score 438.71\n",
|
||||
"episode 95 score 500.00 average score 439.34\n",
|
||||
"episode 96 score 179.00 average score 436.66\n",
|
||||
"episode 97 score 282.00 average score 435.08\n",
|
||||
"episode 98 score 500.00 average score 435.74\n",
|
||||
"episode 99 score 108.00 average score 432.46\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"env = gymnasium.make('CartPole-v1')\n",
|
||||
"agent = evolution(generations=20, population_size=30, selection_size=15, selection_type='elitist')\n",
|
||||
"scores = []\n",
|
||||
"\n",
|
||||
"for i in range(100):\n",
|
||||
" done = False\n",
|
||||
" score = 0\n",
|
||||
" observation, info = env.reset()\n",
|
||||
" while not done:\n",
|
||||
" action = np.argmax(agent(observation[np.newaxis, :]))\n",
|
||||
" observation, reward, terminated, truncated, info = env.step(action)\n",
|
||||
" done = terminated or truncated\n",
|
||||
" score += reward\n",
|
||||
"\n",
|
||||
" scores.append(score)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" avg_score = np.mean(scores[max(0, i-100):(i+1)])\n",
|
||||
" print('episode ', i, 'score %.2f' % score, 'average score %.2f' % avg_score)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "bd385fe162c5ca0c84973b7dd5c518456272446b2b64e67c2a69f949ca7a1754"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
BIN
Aufgabe 6/dqn_model.keras
Normal file
BIN
Aufgabe 6/dqn_model.keras
Normal file
Binary file not shown.
Reference in New Issue
Block a user