This commit is contained in:
mia 2026-04-23 09:14:31 +02:00
parent 0d2340b7e8
commit a74be2fc31
11 changed files with 2367 additions and 184 deletions

33
nim.py
View file

@ -56,11 +56,17 @@ class NimAI():
float: The Q-value associated with the (state, action) pair.
Returns 0 if the pair is not yet in the Q-table.
"""
print(self.q)
print(self.q, state, action)
try:
return self.q[(tuple(state), action)]
except:
return 0
def update_q_value(self, state, action, old_q, reward, future_q):
"""
Update the Q-value for a state-action pair using the Q-learning formula.
Q(s, a) Q(s, a) + α * (Belohnung + γ * max_a' Q(s', a') - Q(s, a))
Parameters:
state (list): The current game state.
@ -69,10 +75,11 @@ class NimAI():
reward (float): The reward received after taking the action.
future_q (float): The maximum Q-value for the next state.
"""
raise NotImplementedError
self.q[tuple(state), action] = old_q + self.alpha * (reward + self.epsilon * future_q - old_q)
return 0
def best_future_reward(self, state):
"""
"""
Determine the highest Q-value among all possible actions in a given state.
Parameters:
@ -82,7 +89,15 @@ class NimAI():
float: The highest Q-value among available actions.
Returns 0 if no actions are available.
"""
raise NotImplementedError
# actions = []
# for q in self.q.key:
# if q[0] == state:
# actions.append(q[1])
actions = tuple([key[1] for key in self.q.keys() if key[0] == state])
try:
return max([q for q in self.q[tuple(state), actions]])
except:
return 0
def choose_action(self, state, epsilon=True):
"""
@ -95,7 +110,15 @@ class NimAI():
Returns:
tuple: The chosen action from the available actions.
"""
raise NotImplementedError
if epsilon:
return random.choice(tuple(Nim.available_actions(state)))
# keys = [key[1] for key in self.q.key if key[0] == state]
# for key in keys:
else:
try:
return max([key[1] for key in self.q.keys() if key[0] == state])
except:
return (0,0)
def train(n):
player = NimAI()