uhhhh

2026-04-23 09:14:31 +02:00 · 2026-04-23 09:14:31 +02:00 · a74be2fc31
commit a74be2fc31
parent 0d2340b7e8
11 changed files with 2367 additions and 184 deletions
--- a/.direnv/bin/nix-direnv-reload
+++ b/.direnv/bin/nix-direnv-reload
@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -e
+if [[ ! -d "/home/mia/git/Nim-AI-template" ]]; then
+  echo "Cannot find source directory; Did you move it?"
+  echo "(Looking for "/home/mia/git/Nim-AI-template")"
+  echo 'Cannot force reload with this script - use "direnv reload" manually and then try again'
+  exit 1
+fi
+
+# rebuild the cache forcefully
+_nix_direnv_force_reload=1 direnv exec "/home/mia/git/Nim-AI-template" true
+
+# Update the mtime for .envrc.
+# This will cause direnv to reload again - but without re-building.
+touch "/home/mia/git/Nim-AI-template/.envrc"
+
+# Also update the timestamp of whatever profile_rc we have.
+# This makes sure that we know we are up to date.
+touch -r "/home/mia/git/Nim-AI-template/.envrc" "/home/mia/git/Nim-AI-template/.direnv"/*.rc
--- a/.direnv/nix-profile-25.11-wl5m60vn27dl0dnq
+++ b/.direnv/nix-profile-25.11-wl5m60vn27dl0dnq
@ -0,0 +1 @@
+/nix/store/swz054hgfjc292xpp0axf1p089mlcbcd-nix-shell-env
--- a/.direnv/nix-profile-25.11-wl5m60vn27dl0dnq.rc
+++ b/.direnv/nix-profile-25.11-wl5m60vn27dl0dnq.rc
--- a/.envrc
+++ b/.envrc
@ -0,0 +1 @@
+use nix
--- a/pycache/game.cpython-313.pyc
+++ b/pycache/game.cpython-313.pyc
--- a/pycache/game.cpython-315.pyc
+++ b/pycache/game.cpython-315.pyc
--- a/pycache/nim.cpython-313.pyc
+++ b/pycache/nim.cpython-313.pyc
--- a/pycache/nim.cpython-315.pyc
+++ b/pycache/nim.cpython-315.pyc
--- a/game.py
+++ b/game.py
@ -91,6 +91,8 @@ def draw_game_state():
 def remove_stones():
    """Removes the selected stones from the selected pile."""
    global player_turn, selected_pile
+    if not selected_stones:
+        return  # Do nothing if nothing is selected
    for pile_index, stone_index in selected_stones:
        piles[pile_index] -= 1
    selected_stones.clear()
@ -102,11 +104,14 @@ def handle_selection(pile_index, stone_index):
    """Handles selecting or deselecting stones, ensuring only one pile can be selected."""
    global selected_pile
    if selected_pile is None or selected_pile == pile_index:
-        selected_pile = pile_index  # Lock the selection to the current pile
        if (pile_index, stone_index) in selected_stones:
            selected_stones.remove((pile_index, stone_index))  # Deselect
+            # If no stones are selected anymore, allow switching piles
+            if not selected_stones:
+                selected_pile = None
        else:
            selected_stones.append((pile_index, stone_index))  # Select
+            selected_pile = pile_index  # Lock the selection to the current pile

 def restart_game():
    """Restarts the game."""
--- a/nim.py
+++ b/nim.py
@ -56,12 +56,18 @@ class NimAI():
        float: The Q-value associated with the (state, action) pair. 
               Returns 0 if the pair is not yet in the Q-table.
    """
-        print(self.q)
+        print(self.q, state, action)
+        try:
+            return self.q[(tuple(state), action)]
+        except:
+            return 0

    def update_q_value(self, state, action, old_q, reward, future_q):
        """
    Update the Q-value for a state-action pair using the Q-learning formula.

+    Q(s, a) ← Q(s, a) + α * (Belohnung + γ * max_a' Q(s', a') - Q(s, a))
+    
    Parameters:
        state (list): The current game state.
        action (tuple): The action taken.
@ -69,10 +75,11 @@ class NimAI():
        reward (float): The reward received after taking the action.
        future_q (float): The maximum Q-value for the next state.
    """
-        raise NotImplementedError
+        self.q[tuple(state), action] = old_q + self.alpha * (reward + self.epsilon * future_q - old_q)
+        return 0
    
    def best_future_reward(self, state):
-            """
+        """
    Determine the highest Q-value among all possible actions in a given state.
    
    Parameters:
@ -82,7 +89,15 @@ class NimAI():
        float: The highest Q-value among available actions. 
               Returns 0 if no actions are available.
    """
-            raise NotImplementedError
+        # actions = []
+        # for q in self.q.key:
+        #     if q[0] == state:
+        #         actions.append(q[1])
+        actions = tuple([key[1] for key in self.q.keys() if key[0] == state])
+        try:
+            return max([q for q in self.q[tuple(state), actions]])
+        except:
+            return 0

    def choose_action(self, state, epsilon=True):
        """
@ -95,7 +110,15 @@ class NimAI():
    Returns:
        tuple: The chosen action from the available actions.
    """
-        raise NotImplementedError
+        if epsilon:
+            return random.choice(tuple(Nim.available_actions(state)))
+            # keys = [key[1] for key in self.q.key if key[0] == state]
+            # for key in keys:
+        else:
+            try:
+                return max([key[1] for key in self.q.keys() if key[0] == state])
+            except:
+                return (0,0)

 def train(n):
    player = NimAI()
--- a/shell.nix
+++ b/shell.nix
@ -0,0 +1,10 @@
+let
+  pkgs = import <nixpkgs> {};
+in pkgs.mkShell {
+  packages = [
+    (pkgs.python3.withPackages (python-pkgs: [
+      python-pkgs.pygame
+    ]))
+  ];
+}
+
				`@ -0,0 +1 @@`
				`/nix/store/swz054hgfjc292xpp0axf1p089mlcbcd-nix-shell-env`