commit 0d2340b7e81b6d442e774b63c537a68433dfdf77
Author: patkordum <patkordum@gmail.com>
Date:   Wed Apr 23 09:46:50 2025 +0200

    First commit

diff --git a/game.py b/game.py
new file mode 100644
index 0000000..f47adec
--- /dev/null
+++ b/game.py
@@ -0,0 +1,179 @@
+import pygame
+import sys
+import time
+
+# Initialize Pygame
+pygame.init()
+
+# Constants
+WIDTH, HEIGHT = 700, 500
+FPS = 30
+WHITE = (255, 255, 255)
+BLACK = (0, 0, 0)
+RED = (255, 0, 0)
+GREEN = (0, 255, 0)
+LIGHT_GREY = (200, 200, 200)
+DARK_GREY = (50, 50, 50)
+FONT = pygame.font.Font(None, 36)
+
+# Create screen
+screen = pygame.display.set_mode((WIDTH, HEIGHT))
+pygame.display.set_caption('Nim Game')
+
+# Define piles (number of coins in each pile)
+piles = [4, 4, 4, 4]  # 4 piles with 4 coins each
+selected_stones = []  # To store selected stones for removal
+selected_pile = None  # Tracks the pile from which coins are selected
+
+# Players
+player_turn = 1  # Player 1 starts (alternates between 1 and 2)
+
+# Game State
+game_over = False
+winner = None
+
+def draw_piles():
+    """Draws the piles of coins as circles with padding."""
+    x_pos = 130  # Start from left with padding
+    y_start = 250
+    padding = 150  # Increased space between piles for better symmetry
+    radius = 20
+    for idx, pile in enumerate(piles):
+        y_pos = y_start
+        for stone in range(pile):
+            color = RED
+            if (idx, stone) in selected_stones:
+                color = GREEN  # Show selected stones as green
+            pygame.draw.circle(screen, color, (x_pos, y_pos), radius)
+            y_pos -= 2 * radius + 10  # Space between circles
+        text = FONT.render(f'Pile {idx + 1}', True, BLACK)
+        screen.blit(text, (x_pos - 30, 320))
+        x_pos += padding  # Increase x position and add padding
+
+def check_game_over():
+    """Check if the game is over (all piles empty)."""
+    global winner, game_over
+    if all(pile == 0 for pile in piles):
+        winner = 2 if player_turn == 1 else 1  # The other player wins
+        game_over = True
+
+def draw_game_state():
+    """Draws the current game state including piles and turn."""
+    screen.fill(LIGHT_GREY)  # Background color
+    draw_piles()
+
+    if game_over:
+        if player_turn == 1:
+            text = FONT.render(f'You win, hooray!', True, GREEN)
+            screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30))  # Adjusted y-position for spacing
+        else:
+            text = FONT.render(f'AI wins!', True, GREEN)
+            screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30))  # Adjusted y-position for spacing
+       
+        # Draw the Restart button
+        pygame.draw.rect(screen, DARK_GREY, (WIDTH // 2 - 60, HEIGHT - 60, 120, 40))
+        restart_text = FONT.render("Restart", True, WHITE)
+        screen.blit(restart_text, (WIDTH // 2 - restart_text.get_width() // 2, HEIGHT - 50))
+    else:
+        if player_turn == 1:
+            text = FONT.render(f'Your turn!', True, BLACK)
+            screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30))  # Centered player label
+        else:
+            text = FONT.render(f'Computer thinking... ', True, BLACK)
+            screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30))  # Centered player labe
+
+
+    # Draw the "Remove" button at the bottom
+    pygame.draw.rect(screen, BLACK, (WIDTH // 2 - 60, HEIGHT - 120, 120, 40))
+    remove_text = FONT.render("Remove", True, WHITE)
+    screen.blit(remove_text, (WIDTH // 2 - remove_text.get_width() // 2, HEIGHT - 110))
+
+def remove_stones():
+    """Removes the selected stones from the selected pile."""
+    global player_turn, selected_pile
+    for pile_index, stone_index in selected_stones:
+        piles[pile_index] -= 1
+    selected_stones.clear()
+    selected_pile = None  # Reset selected pile after removal
+    player_turn = 2 if player_turn == 1 else 1  # Switch turns
+    check_game_over()
+
+def handle_selection(pile_index, stone_index):
+    """Handles selecting or deselecting stones, ensuring only one pile can be selected."""
+    global selected_pile
+    if selected_pile is None or selected_pile == pile_index:
+        selected_pile = pile_index  # Lock the selection to the current pile
+        if (pile_index, stone_index) in selected_stones:
+            selected_stones.remove((pile_index, stone_index))  # Deselect
+        else:
+            selected_stones.append((pile_index, stone_index))  # Select
+
+def restart_game():
+    """Restarts the game."""
+    global piles, player_turn, selected_stones, selected_pile, game_over, winner
+    piles = [4, 4, 4, 4]  # Reset piles
+    selected_stones.clear()
+    selected_pile = None
+    player_turn = 1
+    game_over = False
+    winner = None
+
+def start_game(ai):
+    """Starts the game and integrates AI for playing against the computer."""
+    global player_turn, game_over
+
+    # Main game loop
+    clock = pygame.time.Clock()
+    while True:
+        clock.tick(FPS)
+
+        for event in pygame.event.get():
+            if event.type == pygame.QUIT:
+                pygame.quit()
+                sys.exit()
+            elif event.type == pygame.MOUSEBUTTONDOWN and not game_over and player_turn == 1:
+                mouse_x, mouse_y = event.pos
+                # Check for pile selection (clicking on a coin)
+                x_pos = 130
+                y_start = 250
+                padding = 150
+                radius = 20
+                for pile_index, pile in enumerate(piles):
+                    y_pos = y_start
+                    for stone_index in range(pile):
+                        dist = ((mouse_x - x_pos)**2 + (mouse_y - y_pos)**2)**0.5
+                        if dist <= radius:
+                            handle_selection(pile_index, stone_index)
+                        y_pos -= 2 * radius + 10
+                    x_pos += padding
+                # Check for "Remove" button click
+                if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 120 <= mouse_y <= HEIGHT - 80:
+                    remove_stones()
+
+            # If game over, check for "Restart" button click
+            elif event.type == pygame.MOUSEBUTTONDOWN and game_over:
+                mouse_x, mouse_y = event.pos
+                if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 60 <= mouse_y <= HEIGHT - 20:
+                    restart_game()
+
+        # If it's the AI's turn and the game is not over
+        if player_turn == 2 and not game_over:
+            draw_game_state()
+            pygame.display.flip()
+            # Simulate thinking
+            time.sleep(2)
+            # AI makes its move
+            action = ai.choose_action(piles, epsilon=False)
+            remove_stones_from_ai(action)
+
+        draw_game_state()
+        pygame.display.flip()
+
+def remove_stones_from_ai(action):
+    """Handles AI stone removal."""
+    pile, count = action
+    for i in range(count):
+        piles[pile] -= 1
+    global player_turn
+    player_turn = 1  # Switch back to human player
+    check_game_over()
diff --git a/nim.py b/nim.py
new file mode 100644
index 0000000..42afde8
--- /dev/null
+++ b/nim.py
@@ -0,0 +1,123 @@
+import random
+
+class Nim():
+    def __init__(self, initial=[4, 4, 4, 4]):
+        self.piles = initial.copy()
+        self.player = 0  # Player 0 starts
+        self.winner = None
+
+    @classmethod
+    def available_actions(cls, piles):
+        actions = set()
+        for i, pile in enumerate(piles):
+            for j in range(1, pile + 1):
+                actions.add((i, j))
+        return actions
+
+    @classmethod
+    def other_player(cls, player):
+        return 0 if player == 1 else 1
+
+    def switch_player(self):
+        self.player = Nim.other_player(self.player)
+
+    def move(self, action):
+        pile, count = action
+        self.piles[pile] -= count
+        self.switch_player()
+        if all(pile == 0 for pile in self.piles):
+            self.winner = self.player
+
+
+
+class NimAI():
+    def __init__(self, alpha=0.5, epsilon=0.1):
+        self.q = dict()  # Q-value table
+        self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value 
+        self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value 
+        
+        self.alpha = alpha  # Learning rate
+        self.epsilon = epsilon  # Exploration rate
+
+    def update(self, old_state, action, new_state, reward):
+        old_q = self.get_q_value(old_state, action)
+        best_future_q = self.best_future_reward(new_state)
+        self.update_q_value(old_state, action, old_q, reward, best_future_q)
+
+    def get_q_value(self, state, action):
+        """
+    Return the Q-value for a given state-action pair.
+    
+    Parameters:
+        state (list): The current game state.
+        action (tuple): The action being evaluated.
+
+    Returns:
+        float: The Q-value associated with the (state, action) pair. 
+               Returns 0 if the pair is not yet in the Q-table.
+    """
+        print(self.q)
+
+    def update_q_value(self, state, action, old_q, reward, future_q):
+        """
+    Update the Q-value for a state-action pair using the Q-learning formula.
+    
+    Parameters:
+        state (list): The current game state.
+        action (tuple): The action taken.
+        old_q (float): The previous Q-value for the (state, action) pair.
+        reward (float): The reward received after taking the action.
+        future_q (float): The maximum Q-value for the next state.
+    """
+        raise NotImplementedError
+    
+    def best_future_reward(self, state):
+            """
+    Determine the highest Q-value among all possible actions in a given state.
+    
+    Parameters:
+        state (list): The state for which to compute the best future reward.
+        
+    Returns:
+        float: The highest Q-value among available actions. 
+               Returns 0 if no actions are available.
+    """
+            raise NotImplementedError
+
+    def choose_action(self, state, epsilon=True):
+        """
+    Choose an action for the given state using an epsilon-greedy strategy.
+    
+    Parameters:
+        state (list): The current game state.
+        epsilon (bool): If True, use epsilon-greedy exploration; otherwise, choose the best action.
+    
+    Returns:
+        tuple: The chosen action from the available actions.
+    """
+        raise NotImplementedError
+
+def train(n):
+    player = NimAI()
+
+    for i in range(n):
+        game = Nim([4, 4, 4, 4])
+        last_move = {0: {"state": None, "action": None}, 1: {"state": None, "action": None}}
+
+        while True:
+            state = game.piles.copy()
+            action = player.choose_action(state)
+            last_move[game.player]["state"] = state
+            last_move[game.player]["action"] = action
+
+            game.move(action)
+            new_state = game.piles.copy()
+
+            if game.winner is not None:
+                player.update(state, action, new_state, -1)
+                player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 1)
+                break
+            elif last_move[game.player]["state"] is not None:
+                player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 0)
+
+    return player
diff --git a/play.py b/play.py
new file mode 100644
index 0000000..6206d25
--- /dev/null
+++ b/play.py
@@ -0,0 +1,11 @@
+from nim import train
+from game import start_game
+
+if __name__ == "__main__":
+    # Train the AI with 1000 games
+    print("START TRAINING \n")
+    ai = train(1000)
+
+    # Start the game and play against the trained AI
+    print("STARTING THE GAME \n")
+    start_game(ai)
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..0731673
--- /dev/null
+++ b/test.py
@@ -0,0 +1,31 @@
+from nim import NimAI
+
+def test_get_q_value(ai):
+    print("\n--- Testing get_q_value ---")
+    state = (0, 0, 0, 2)
+    action = (3, 2)
+    value = ai.get_q_value(state, action)
+    print(f"Q-value for state {state}, action {action}: {value}")
+
+
+def test_update_q_value(ai):
+    print("\n--- Testing update_q_value ---")
+
+
+def test_best_future_reward(ai):
+    print("\n--- Testing best_future_reward ---")
+
+
+def test_choose_action(ai):
+    print("\n--- Testing choose_action ---")
+
+
+if __name__ == "__main__":
+    ai = NimAI()
+
+    test_get_q_value(ai)
+    test_update_q_value(ai)
+    test_best_future_reward(ai)
+    test_choose_action(ai)
+
+    print("\nAll tests completed.")