uhhhh

2026-04-23 09:14:31 +02:00 · 2026-04-23 09:14:31 +02:00 · a74be2fc31
commit a74be2fc31
parent 0d2340b7e8
11 changed files with 2367 additions and 184 deletions
--- a/nim.py
+++ b/nim.py
@ -56,11 +56,17 @@ class NimAI():
        float: The Q-value associated with the (state, action) pair. 
               Returns 0 if the pair is not yet in the Q-table.
    """
-        print(self.q)
+        print(self.q, state, action)
+        try:
+            return self.q[(tuple(state), action)]
+        except:
+            return 0

    def update_q_value(self, state, action, old_q, reward, future_q):
        """
    Update the Q-value for a state-action pair using the Q-learning formula.
+
+    Q(s, a) ← Q(s, a) + α * (Belohnung + γ * max_a' Q(s', a') - Q(s, a))
    
    Parameters:
        state (list): The current game state.
@ -69,10 +75,11 @@ class NimAI():
        reward (float): The reward received after taking the action.
        future_q (float): The maximum Q-value for the next state.
    """
-        raise NotImplementedError
+        self.q[tuple(state), action] = old_q + self.alpha * (reward + self.epsilon * future_q - old_q)
+        return 0
    
    def best_future_reward(self, state):
-            """
+        """
    Determine the highest Q-value among all possible actions in a given state.
    
    Parameters:
@ -82,7 +89,15 @@ class NimAI():
        float: The highest Q-value among available actions. 
               Returns 0 if no actions are available.
    """
-            raise NotImplementedError
+        # actions = []
+        # for q in self.q.key:
+        #     if q[0] == state:
+        #         actions.append(q[1])
+        actions = tuple([key[1] for key in self.q.keys() if key[0] == state])
+        try:
+            return max([q for q in self.q[tuple(state), actions]])
+        except:
+            return 0

    def choose_action(self, state, epsilon=True):
        """
@ -95,7 +110,15 @@ class NimAI():
    Returns:
        tuple: The chosen action from the available actions.
    """
-        raise NotImplementedError
+        if epsilon:
+            return random.choice(tuple(Nim.available_actions(state)))
+            # keys = [key[1] for key in self.q.key if key[0] == state]
+            # for key in keys:
+        else:
+            try:
+                return max([key[1] for key in self.q.keys() if key[0] == state])
+            except:
+                return (0,0)

 def train(n):
    player = NimAI()