Compare commits

...

7 Commits

Author SHA1 Message Date
ef7dce9bee Ready to present 2023-07-17 00:33:12 +02:00
8fd7baaf93 Final files 2023-07-17 00:02:10 +02:00
0fd332a6e5 Makrus hat nen kleinen Pimmel 2023-07-16 20:00:15 +02:00
eaf2e7e111 Am Konzept weitergearbeitet 2023-07-16 19:16:43 +02:00
fa6a721cc8 Aufgabe 6 2023-07-16 14:15:26 +02:00
e22a273609 viz 2023-07-16 00:40:47 +02:00
439f995eae Initial commit 2023-07-15 14:25:10 +02:00
9 changed files with 1257 additions and 0 deletions

View File

@ -0,0 +1,16 @@
import dataclasses
@dataclasses.dataclass()
class ConnectionGene:
nodes: tuple[int, int]
weight: float
innovation_no: int
disabled: bool = False
def reset_innovation_numbers():
_CONNECTION_GENES.clear()
_CONNECTION_GENES: dict[tuple[int, int], ConnectionGene] = dict()

280
Abschlussprojekt/genome.py Normal file
View File

@ -0,0 +1,280 @@
from __future__ import annotations
import dataclasses
import itertools
from enum import Enum
from random import choice
import matplotlib.pyplot as plt
import numpy as np
from graphs import creates_cycle
rng = np.random.default_rng()
from connection import _CONNECTION_GENES, ConnectionGene
from node import NodeGene, NodeType
class MutationType(Enum):
ADD_CONNECTION = 1
ADD_NODE = 2
class Genome:
def __init__(self):
# Initialize nodes
self.nodes: dict[int, NodeGene] = dict()
# Initialize connections
self.connections: dict[tuple[int, int], ConnectionGene] = dict()
self.fitness = 0
def set_node(self, key: int, node: NodeGene) -> None:
self.nodes[key] = node
def set_connection(self, key: tuple[int, int], connection: ConnectionGene) -> None:
self.connections[key] = connection
def add_node(self, node_type: NodeType = NodeType.HIDDEN) -> int:
"""
Adds a node of the given type to the genome and returns the identification key.
"""
key = len(self.nodes)
self.nodes[key] = NodeGene(key, node_type)
return key
def add_connection(self, from_node: int, to_node: int, weight: float) -> tuple[int, int]:
"""
Adds a connection of weight between two given nodes to the genome and returns
the identification key.
"""
if not isinstance(from_node, int) or not isinstance(to_node, int):
raise ValueError("Nodes must be integer keys.")
if from_node not in self.nodes or to_node not in self.nodes:
raise ValueError("Nodes do not exist.")
key = (from_node, to_node)
connection = ConnectionGene(key, weight, -1)
if key in _CONNECTION_GENES:
connection.innovation_no = _CONNECTION_GENES[key].innovation_no
else:
connection.innovation_no = len(_CONNECTION_GENES)
_CONNECTION_GENES[key] = connection
self.connections[key] = connection
return key
@staticmethod
def new(inputs: int, outputs: int) -> Genome:
genome = Genome()
# Add input nodes
for _ in range(inputs):
genome.add_node(node_type=NodeType.INPUT)
# Add output nodes
for _ in range(outputs):
genome.add_node(node_type=NodeType.OUTPUT)
# Fully connect
for i in range(inputs):
for o in range(inputs, inputs + outputs):
genome.add_connection(i, o, weight=1)
return genome
@staticmethod
def copy(genome: Genome) -> Genome:
clone = Genome()
# Copy nodes
for key, node in genome.nodes.items():
clone.set_node(key, dataclasses.replace(node))
# Copy connections
for key, connection in genome.connections.items():
clone.set_connection(key, dataclasses.replace(connection))
# Set fitness
clone.fitness = genome.fitness
return clone
def mutate(genome: Genome) -> None:
mutation = choice([MutationType.ADD_NODE, MutationType.ADD_CONNECTION])
if mutation is MutationType.ADD_CONNECTION:
_mutate_add_connection(genome)
elif mutation is MutationType.ADD_NODE:
_mutate_add_node(genome)
def crossover(mother: Genome, father: Genome) -> Genome:
mother_connections = {conn.innovation_no: conn for conn in mother.connections.values()}
father_connections = {conn.innovation_no: conn for conn in father.connections.values()}
innovation_numbers = set(mother_connections.keys()) | set(father_connections.keys())
child_connections: dict[int, ConnectionGene] = {}
for i in innovation_numbers:
# Matching genes
if i in mother_connections and i in father_connections:
child_connections[i] = choice((mother_connections[i], father_connections[i]))
# Disjoint or excess
else:
# Mother has better fitness
if mother.fitness > father.fitness and i in mother_connections:
child_connections[i] = mother_connections[i]
# Father has better fitness
elif father.fitness > mother.fitness and i in father_connections:
child_connections[i] = father_connections[i]
# Equal fitness
else:
connection = choice((mother_connections.get(i, None), father_connections.get(i, None)))
if connection is not None:
child_connections[i] = connection
# Determine input/output dimensions
inputs = sum(node.type == NodeType.INPUT for node in mother.nodes.values())
outputs = sum(node.type == NodeType.OUTPUT for node in mother.nodes.values())
# Create child and set nodes & connections
child = Genome.new(inputs, outputs)
for connection in child_connections.values():
# Set connections
child.set_connection(connection.nodes, dataclasses.replace(connection))
from_node, to_node = connection.nodes
# Add nodes if required
if from_node not in child.nodes:
child.set_node(from_node, NodeGene(from_node, NodeType.HIDDEN))
if to_node not in child.nodes:
child.set_node(to_node, NodeGene(to_node, NodeType.HIDDEN))
return child
def _mutate_add_connection(genome: Genome) -> None:
"""
In the add_connection mutation, a single new connection gene with a random weight
is added connecting two previously unconnected nodes.
"""
from_node = choice([id for id, node in genome.nodes.items() if node.type != NodeType.OUTPUT])
try:
to_node = choice(
[
id
for id, node in genome.nodes.items()
if node.type != NodeType.INPUT and (from_node, id) not in genome.connections
]
)
except IndexError:
return
# Checking for cycles
if creates_cycle(genome.connections.keys(), (from_node, to_node)):
return
genome.add_connection(from_node, to_node, weight=rng.uniform(0, 1))
def _mutate_add_node(genome: Genome) -> None:
"""
In the add_node mutation, an existing connection is split and the new node
placed where the old connection used to be. The old connection is disabled
and two new conections are added to the genome. The new connection leading
into the new node receives a weight of 1, and the new connection leading out
receives the same weight as the old connection.
"""
# Find connection to split
try:
connection = choice([node for node in genome.connections.values() if not node.disabled])
except IndexError:
return
connection.disabled = True
# Create new node
new_node = genome.add_node()
from_node, to_node = connection.nodes
# Connect previous from_node to new_node
genome.add_connection(from_node, new_node, weight=1)
# Connection new_node to previous to_node
genome.add_connection(new_node, to_node, weight=connection.weight)
def _excess(g1: Genome, g2: Genome) -> list[int]:
g1_connections = {conn.innovation_no: conn for conn in g1.connections.values()}
g2_connections = {conn.innovation_no: conn for conn in g2.connections.values()}
less_connections, more_connections = sorted((g1_connections, g2_connections), key=lambda c: max(c.keys()))
return [k for k in more_connections.keys() if k > max(less_connections.keys())]
def _disjoint(g1: Genome, g2: Genome) -> list[int]:
g1_connections = {conn.innovation_no: conn for conn in g1.connections.values()}
g2_connections = {conn.innovation_no: conn for conn in g2.connections.values()}
less_connections, more_connections = sorted((g1_connections, g2_connections), key=lambda c: max(c.keys()))
return list(
{i for i in less_connections.keys() if i not in more_connections}
| {i for i in more_connections.keys() if i not in less_connections and i <= max(less_connections.keys())}
)
def _get_delta(g1: Genome, g2: Genome, c1: float, c2: float, c3: float) -> float:
n = max([len(g1.nodes), len(g2.nodes)])
g1_connections = {conn.innovation_no: conn for conn in g1.connections.values()}
g2_connections = {conn.innovation_no: conn for conn in g2.connections.values()}
innovation_numbers = set(g1_connections.keys()) | set(g2_connections.keys())
# Calculate number of excess genes
less_connections, more_connections = sorted((g1_connections, g2_connections), key=lambda c: max(c.keys()))
e = len([k for k in more_connections.keys() if k > max(less_connections.keys())])
# Calculate number of disjoint genes
d = len(
{i for i in less_connections.keys() if i not in more_connections}
| {i for i in more_connections.keys() if i not in less_connections and i <= max(less_connections.keys())}
)
# Average weight difference of matching genes
w = 0
for i in innovation_numbers:
if i in g1_connections and i in g2_connections:
w += abs(g1_connections[i].weight - g2_connections[i].weight)
delta = ((c1 * e) / n) + ((c2 * d) / n) + (c3 * w)
return delta
def specify(genomes: list, c1: float, c2: float, c3: float) -> list[list]:
THRESHOLD = 1
species = []
for genom in genomes:
done = False
if len(species) < 1:
species.append([genom])
done = True
for spicy in species:
print("Delta: ", _get_delta(genom, spicy[0], c1, c2, c3))
if _get_delta(genom, spicy[0], c1, c2, c3) < THRESHOLD and not done:
spicy.append(genom)
done = True
if not done:
species.append([genom])
return species

View File

@ -0,0 +1,28 @@
def creates_cycle(connections: list[tuple[int, int]], test: tuple[int, int]) -> bool:
"""
Returns true if the addition of the 'test' connection would create a cycle,
assuming that no cycle already exists in the graph represented by 'connections'.
https://github.com/CodeReclaimers/neat-python/blob/4928381317213ee3285204ae1f2a086286aa3a10/neat/graphs.py#L4
"""
i, o = test
if i == o:
return True
visited = {o}
while True:
num_added = 0
for a, b in connections:
if a in visited and b not in visited:
if b == i:
return True
visited.add(b)
num_added += 1
if num_added == 0:
return False

518
Abschlussprojekt/neat.ipynb Normal file

File diff suppressed because one or more lines are too long

14
Abschlussprojekt/node.py Normal file
View File

@ -0,0 +1,14 @@
import dataclasses
from enum import Enum
class NodeType(Enum):
INPUT = 1
HIDDEN = 2
OUTPUT = 3
@dataclasses.dataclass(frozen=True)
class NodeGene:
id: int
type: NodeType

View File

@ -0,0 +1 @@
pygraphviz

View File

@ -0,0 +1,83 @@
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import tabulate
from genome import Genome, NodeType, mutate
from node import NodeType
def _find_layer(g: nx.DiGraph, hidden_node: int, inputs: list[int]) -> int:
paths = []
for input_node in inputs:
paths += list(nx.all_simple_paths(g, input_node, hidden_node))
path_lengths = [len(path) for path in paths]
return 2 if len(path_lengths) == 0 else max(path_lengths)
def genome_graph(genome: Genome):
graph = nx.DiGraph()
# Add nodes
for node in genome.nodes.keys():
graph.add_node(node)
# Add edges
for connection in genome.connections.values():
if connection.disabled:
continue
from_node, to_node = connection.nodes
graph.add_edge(from_node, to_node, weight=connection.weight)
inputs = [node.id for node in genome.nodes.values() if node.type == NodeType.INPUT]
hidden = [node.id for node in genome.nodes.values() if node.type == NodeType.HIDDEN]
outputs = [node.id for node in genome.nodes.values() if node.type == NodeType.OUTPUT]
for input_node in inputs:
graph.nodes[input_node]["layer"] = 0
max_layer = 1
for hidden_node in hidden:
layer = _find_layer(graph, hidden_node, inputs)
max_layer = max(layer, max_layer)
graph.nodes[hidden_node]["layer"] = layer
for output_node in outputs:
graph.nodes[output_node]["layer"] = max_layer + 1
plt.subplot()
pos = nx.multipartite_layout(graph, subset_key="layer")
nx.draw_networkx_nodes(graph, pos, nodelist=inputs, node_color="#ff0000")
nx.draw_networkx_nodes(graph, pos, nodelist=hidden, node_color="#00ff00")
nx.draw_networkx_nodes(graph, pos, nodelist=outputs, node_color="#0000ff")
nx.draw_networkx_labels(graph, pos)
nx.draw_networkx_edges(graph, pos)
def genome_table(genome: Genome):
table = [
(conn.innovation_no, "->".join([str(n) for n in conn.nodes]), "DIS" if conn.disabled else "")
for conn in genome.connections.values()
]
table.sort(key=lambda c: c[0])
table = zip(*table)
print(tabulate.tabulate(table, tablefmt="psql"))
if __name__ == "__main__":
g1 = Genome.new(3, 2)
g1.add_connection(0, 4, 0.5)
mutate(g1)
mutate(g1)
mutate(g1)
mutate(g1)
mutate(g1)
mutate(g1)
# mutate(g1)
# genome_graph(g1)
# plt.show()
genome_table(g1)

317
Aufgabe 6/aufgabe06.ipynb Normal file
View File

@ -0,0 +1,317 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 205,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"import numpy as np\n",
"from tqdm import tqdm, trange\n",
"from random import sample\n",
"from typing import Literal\n",
"import statistics\n",
"import gymnasium \n",
"\n",
"rng = np.random.default_rng()"
]
},
{
"cell_type": "code",
"execution_count": 206,
"metadata": {},
"outputs": [],
"source": [
"def build_dqn(n_actions, input_dims, fc1_dims, fc2_dims):\n",
" model = tf.keras.Sequential([\n",
" tf.keras.layers.Dense(fc1_dims, activation=tf.keras.activations.relu, input_shape=(input_dims,)),\n",
" tf.keras.layers.Dense(fc2_dims, activation=tf.keras.activations.relu),\n",
" tf.keras.layers.Dense(n_actions)\n",
" ])\n",
" model.compile()\n",
"\n",
" return model"
]
},
{
"cell_type": "code",
"execution_count": 207,
"metadata": {},
"outputs": [],
"source": [
"mutate_weights = np.vectorize(lambda w : w if rng.uniform() > 0.7 else w + rng.normal(scale=0.3))\n",
"\n",
"def mutate(agent: tf.keras.Sequential) -> None:\n",
" for layer in agent.layers:\n",
" w, b = layer.get_weights()\n",
" layer.set_weights([mutate_weights(w), b]) # don't touch biases\n",
"\n",
"def recombine(mother: tf.keras.Sequential, father: tf.keras.Sequential) -> tf.keras.Sequential:\n",
" parent = rng.choice((mother, father))\n",
" child = tf.keras.models.clone_model(parent)\n",
" child.build(4)\n",
" child.compile()\n",
" child.set_weights(parent.get_weights())\n",
" return child"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Warum ist es eventuell nicht die beste Lösung, jedes Individuum nur einmal zu testen?\n",
"Um die Fitness zu ermitteln, sollte jedes Individuum mehrmals getestet werden, da der Agent nicht immer gleich gut abschneidet. Hier gestaltet es sich sinnvoll, jedes Individuum mindestens 3 Mal zu testen und den Median-Score zu wählen um Ausreißer nach unten und oben aus der Wertung zu nehmen. Alternativ könnte auch der durchschnittliche Score sinnvoll sein.\n",
"\n",
"### Fällt Ihnen eine gute Methode für ein Crossover ein?\n",
"Ja! Tests haben ergeben, dass das Wählen eines zufälligen Elternteils sinnvoll ist und schnell berechnet werden kann. Danke an Melissa für den Tipp.\n",
"\n",
"### Welche Beobachtung machen Sie mit den unterschiedlichen Selektionsarten?\n",
"Hier die Avg-Scores des besten Individuums von jeweils 5 Durchgängen mit 20 Generationen, 30 Individuen und einer Selection von 15: \\\n",
"Avg-Score mit Elitist: 384, 500, 500, 500, 500 \\\n",
"Avg-Score mit Proportional: 500, 291, 500, 432, 188 \\\n",
"500 ist der maximale Score.\n",
"Für genauere Aussagen müsste man mehr Durchgänge durchführen. Da ein Durchgang >4min dauert, wurde darauf allerdings verzichtet.\n",
"Beide Selektionsarten haben oft das Optimum erreicht. In diesen Durchgängen hat Elitist bessere Ergebnisse erzielen können. Mit Elitist konnte 4 von 5 Mal der Optimum erreicht werden."
]
},
{
"cell_type": "code",
"execution_count": 212,
"metadata": {},
"outputs": [],
"source": [
"def fitness(agent: tf.keras.Sequential, n: int = 3) -> int:\n",
" env = gymnasium.make('CartPole-v1')\n",
"\n",
" scores = []\n",
"\n",
" for _ in range(n):\n",
" done = False\n",
" score = 0\n",
" observation, _ = env.reset()\n",
" while not done:\n",
" actions = agent(observation[np.newaxis, :])\n",
" action = np.argmax(actions)\n",
" observation, reward, terminated, truncated, _ = env.step(action)\n",
" done = terminated or truncated\n",
" score += reward\n",
"\n",
" scores.append(score)\n",
"\n",
" return statistics.median(scores)\n",
" \n",
"\n",
"def evolution(\n",
" generations: int = 50,\n",
" population_size: int = 20,\n",
" selection_size: int = 5,\n",
" selection_type: Literal[\"elitist\"] | Literal[\"proportional\"] = \"elitist\",\n",
"):\n",
" assert selection_type in (\"elitist\", \"proportional\")\n",
" population = [build_dqn(2, 4, 5, 5) for _ in range(population_size)]\n",
"\n",
" for _ in trange(generations):\n",
" # Select individuals with highest fitness for reproduction\n",
" population = sorted(population, key=lambda agent: fitness(agent), reverse=True)\n",
"\n",
" if selection_type == \"elitist\":\n",
" selection = population[:selection_size]\n",
" elif selection_type == \"proportional\":\n",
" population_fitness = [fitness(nn) for nn in population]\n",
" selection = np.random.choice(\n",
" population,\n",
" selection_size,\n",
" p=[f / sum(population_fitness) for f in population_fitness],\n",
" replace=False\n",
" ).tolist()\n",
"\n",
" # Reproduce\n",
" offsprings = []\n",
" for _ in range(population_size - selection_size):\n",
" mother, father = sample(selection, 2)\n",
"\n",
" offspring = recombine(mother, father)\n",
" mutate(offspring)\n",
" \n",
" offsprings.append(offspring)\n",
"\n",
" # Create new population\n",
" population = selection + offsprings\n",
" \n",
" # Return best individual of final population\n",
" return max(population, key=lambda agent: fitness(agent))"
]
},
{
"cell_type": "code",
"execution_count": 222,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 20/20 [02:08<00:00, 6.41s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"episode 0 score 500.00 average score 500.00\n",
"episode 1 score 500.00 average score 500.00\n",
"episode 2 score 500.00 average score 500.00\n",
"episode 3 score 500.00 average score 500.00\n",
"episode 4 score 500.00 average score 500.00\n",
"episode 5 score 500.00 average score 500.00\n",
"episode 6 score 500.00 average score 500.00\n",
"episode 7 score 500.00 average score 500.00\n",
"episode 8 score 500.00 average score 500.00\n",
"episode 9 score 500.00 average score 500.00\n",
"episode 10 score 133.00 average score 466.64\n",
"episode 11 score 500.00 average score 469.42\n",
"episode 12 score 106.00 average score 441.46\n",
"episode 13 score 500.00 average score 445.64\n",
"episode 14 score 128.00 average score 424.47\n",
"episode 15 score 500.00 average score 429.19\n",
"episode 16 score 500.00 average score 433.35\n",
"episode 17 score 500.00 average score 437.06\n",
"episode 18 score 500.00 average score 440.37\n",
"episode 19 score 500.00 average score 443.35\n",
"episode 20 score 500.00 average score 446.05\n",
"episode 21 score 500.00 average score 448.50\n",
"episode 22 score 500.00 average score 450.74\n",
"episode 23 score 500.00 average score 452.79\n",
"episode 24 score 118.00 average score 439.40\n",
"episode 25 score 500.00 average score 441.73\n",
"episode 26 score 500.00 average score 443.89\n",
"episode 27 score 500.00 average score 445.89\n",
"episode 28 score 117.00 average score 434.55\n",
"episode 29 score 500.00 average score 436.73\n",
"episode 30 score 500.00 average score 438.77\n",
"episode 31 score 500.00 average score 440.69\n",
"episode 32 score 123.00 average score 431.06\n",
"episode 33 score 189.00 average score 423.94\n",
"episode 34 score 500.00 average score 426.11\n",
"episode 35 score 500.00 average score 428.17\n",
"episode 36 score 106.00 average score 419.46\n",
"episode 37 score 500.00 average score 421.58\n",
"episode 38 score 500.00 average score 423.59\n",
"episode 39 score 500.00 average score 425.50\n",
"episode 40 score 500.00 average score 427.32\n",
"episode 41 score 500.00 average score 429.05\n",
"episode 42 score 500.00 average score 430.70\n",
"episode 43 score 500.00 average score 432.27\n",
"episode 44 score 138.00 average score 425.73\n",
"episode 45 score 500.00 average score 427.35\n",
"episode 46 score 500.00 average score 428.89\n",
"episode 47 score 500.00 average score 430.38\n",
"episode 48 score 500.00 average score 431.80\n",
"episode 49 score 500.00 average score 433.16\n",
"episode 50 score 459.00 average score 433.67\n",
"episode 51 score 500.00 average score 434.94\n",
"episode 52 score 500.00 average score 436.17\n",
"episode 53 score 500.00 average score 437.35\n",
"episode 54 score 500.00 average score 438.49\n",
"episode 55 score 157.00 average score 433.46\n",
"episode 56 score 500.00 average score 434.63\n",
"episode 57 score 500.00 average score 435.76\n",
"episode 58 score 164.00 average score 431.15\n",
"episode 59 score 112.00 average score 425.83\n",
"episode 60 score 346.00 average score 424.52\n",
"episode 61 score 500.00 average score 425.74\n",
"episode 62 score 500.00 average score 426.92\n",
"episode 63 score 500.00 average score 428.06\n",
"episode 64 score 500.00 average score 429.17\n",
"episode 65 score 500.00 average score 430.24\n",
"episode 66 score 500.00 average score 431.28\n",
"episode 67 score 140.00 average score 427.00\n",
"episode 68 score 500.00 average score 428.06\n",
"episode 69 score 500.00 average score 429.09\n",
"episode 70 score 500.00 average score 430.08\n",
"episode 71 score 500.00 average score 431.06\n",
"episode 72 score 500.00 average score 432.00\n",
"episode 73 score 500.00 average score 432.92\n",
"episode 74 score 500.00 average score 433.81\n",
"episode 75 score 500.00 average score 434.68\n",
"episode 76 score 500.00 average score 435.53\n",
"episode 77 score 500.00 average score 436.36\n",
"episode 78 score 477.00 average score 436.87\n",
"episode 79 score 500.00 average score 437.66\n",
"episode 80 score 130.00 average score 433.86\n",
"episode 81 score 500.00 average score 434.67\n",
"episode 82 score 394.00 average score 434.18\n",
"episode 83 score 500.00 average score 434.96\n",
"episode 84 score 500.00 average score 435.73\n",
"episode 85 score 500.00 average score 436.48\n",
"episode 86 score 500.00 average score 437.21\n",
"episode 87 score 500.00 average score 437.92\n",
"episode 88 score 500.00 average score 438.62\n",
"episode 89 score 500.00 average score 439.30\n",
"episode 90 score 500.00 average score 439.97\n",
"episode 91 score 500.00 average score 440.62\n",
"episode 92 score 140.00 average score 437.39\n",
"episode 93 score 500.00 average score 438.05\n",
"episode 94 score 500.00 average score 438.71\n",
"episode 95 score 500.00 average score 439.34\n",
"episode 96 score 179.00 average score 436.66\n",
"episode 97 score 282.00 average score 435.08\n",
"episode 98 score 500.00 average score 435.74\n",
"episode 99 score 108.00 average score 432.46\n"
]
}
],
"source": [
"env = gymnasium.make('CartPole-v1')\n",
"agent = evolution(generations=20, population_size=30, selection_size=15, selection_type='elitist')\n",
"scores = []\n",
"\n",
"for i in range(100):\n",
" done = False\n",
" score = 0\n",
" observation, info = env.reset()\n",
" while not done:\n",
" action = np.argmax(agent(observation[np.newaxis, :]))\n",
" observation, reward, terminated, truncated, info = env.step(action)\n",
" done = terminated or truncated\n",
" score += reward\n",
"\n",
" scores.append(score)\n",
" \n",
"\n",
" avg_score = np.mean(scores[max(0, i-100):(i+1)])\n",
" print('episode ', i, 'score %.2f' % score, 'average score %.2f' % avg_score)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "bd385fe162c5ca0c84973b7dd5c518456272446b2b64e67c2a69f949ca7a1754"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

BIN
Aufgabe 6/dqn_model.keras Normal file

Binary file not shown.