First commit

2025-04-23 09:46:50 +02:00 · 2025-04-23 09:46:50 +02:00 · 0d2340b7e8
commit 0d2340b7e8
4 changed files with 344 additions and 0 deletions
--- a/game.py
+++ b/game.py
@ -0,0 +1,179 @@
 import pygame
 import sys
 import time
 # Initialize Pygame
 pygame.init()
 # Constants
 WIDTH, HEIGHT = 700, 500
 FPS = 30
 WHITE = (255, 255, 255)
 BLACK = (0, 0, 0)
 RED = (255, 0, 0)
 GREEN = (0, 255, 0)
 LIGHT_GREY = (200, 200, 200)
 DARK_GREY = (50, 50, 50)
 FONT = pygame.font.Font(None, 36)
 # Create screen
 screen = pygame.display.set_mode((WIDTH, HEIGHT))
 pygame.display.set_caption('Nim Game')
 # Define piles (number of coins in each pile)
 piles = [4, 4, 4, 4]  # 4 piles with 4 coins each
 selected_stones = []  # To store selected stones for removal
 selected_pile = None  # Tracks the pile from which coins are selected
 # Players
 player_turn = 1  # Player 1 starts (alternates between 1 and 2)
 # Game State
 game_over = False
 winner = None
 def draw_piles():
    """Draws the piles of coins as circles with padding."""
    x_pos = 130  # Start from left with padding
    y_start = 250
    padding = 150  # Increased space between piles for better symmetry
    radius = 20
    for idx, pile in enumerate(piles):
        y_pos = y_start
        for stone in range(pile):
            color = RED
            if (idx, stone) in selected_stones:
                color = GREEN  # Show selected stones as green
            pygame.draw.circle(screen, color, (x_pos, y_pos), radius)
            y_pos -= 2 * radius + 10  # Space between circles
        text = FONT.render(f'Pile {idx + 1}', True, BLACK)
        screen.blit(text, (x_pos - 30, 320))
        x_pos += padding  # Increase x position and add padding
 def check_game_over():
    """Check if the game is over (all piles empty)."""
    global winner, game_over
    if all(pile == 0 for pile in piles):
        winner = 2 if player_turn == 1 else 1  # The other player wins
        game_over = True
 def draw_game_state():
    """Draws the current game state including piles and turn."""
    screen.fill(LIGHT_GREY)  # Background color
    draw_piles()
    if game_over:
        if player_turn == 1:
            text = FONT.render(f'You win, hooray!', True, GREEN)
            screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30))  # Adjusted y-position for spacing
        else:
            text = FONT.render(f'AI wins!', True, GREEN)
            screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30))  # Adjusted y-position for spacing
        # Draw the Restart button
        pygame.draw.rect(screen, DARK_GREY, (WIDTH // 2 - 60, HEIGHT - 60, 120, 40))
        restart_text = FONT.render("Restart", True, WHITE)
        screen.blit(restart_text, (WIDTH // 2 - restart_text.get_width() // 2, HEIGHT - 50))
    else:
        if player_turn == 1:
            text = FONT.render(f'Your turn!', True, BLACK)
            screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30))  # Centered player label
        else:
            text = FONT.render(f'Computer thinking... ', True, BLACK)
            screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30))  # Centered player labe
    # Draw the "Remove" button at the bottom
    pygame.draw.rect(screen, BLACK, (WIDTH // 2 - 60, HEIGHT - 120, 120, 40))
    remove_text = FONT.render("Remove", True, WHITE)
    screen.blit(remove_text, (WIDTH // 2 - remove_text.get_width() // 2, HEIGHT - 110))
 def remove_stones():
    """Removes the selected stones from the selected pile."""
    global player_turn, selected_pile
    for pile_index, stone_index in selected_stones:
        piles[pile_index] -= 1
    selected_stones.clear()
    selected_pile = None  # Reset selected pile after removal
    player_turn = 2 if player_turn == 1 else 1  # Switch turns
    check_game_over()
 def handle_selection(pile_index, stone_index):
    """Handles selecting or deselecting stones, ensuring only one pile can be selected."""
    global selected_pile
    if selected_pile is None or selected_pile == pile_index:
        selected_pile = pile_index  # Lock the selection to the current pile
        if (pile_index, stone_index) in selected_stones:
            selected_stones.remove((pile_index, stone_index))  # Deselect
        else:
            selected_stones.append((pile_index, stone_index))  # Select
 def restart_game():
    """Restarts the game."""
    global piles, player_turn, selected_stones, selected_pile, game_over, winner
    piles = [4, 4, 4, 4]  # Reset piles
    selected_stones.clear()
    selected_pile = None
    player_turn = 1
    game_over = False
    winner = None
 def start_game(ai):
    """Starts the game and integrates AI for playing against the computer."""
    global player_turn, game_over
    # Main game loop
    clock = pygame.time.Clock()
    while True:
        clock.tick(FPS)
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()
            elif event.type == pygame.MOUSEBUTTONDOWN and not game_over and player_turn == 1:
                mouse_x, mouse_y = event.pos
                # Check for pile selection (clicking on a coin)
                x_pos = 130
                y_start = 250
                padding = 150
                radius = 20
                for pile_index, pile in enumerate(piles):
                    y_pos = y_start
                    for stone_index in range(pile):
                        dist = ((mouse_x - x_pos)**2 + (mouse_y - y_pos)**2)**0.5
                        if dist <= radius:
                            handle_selection(pile_index, stone_index)
                        y_pos -= 2 * radius + 10
                    x_pos += padding
                # Check for "Remove" button click
                if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 120 <= mouse_y <= HEIGHT - 80:
                    remove_stones()
            # If game over, check for "Restart" button click
            elif event.type == pygame.MOUSEBUTTONDOWN and game_over:
                mouse_x, mouse_y = event.pos
                if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 60 <= mouse_y <= HEIGHT - 20:
                    restart_game()
        # If it's the AI's turn and the game is not over
        if player_turn == 2 and not game_over:
            draw_game_state()
            pygame.display.flip()
            # Simulate thinking
            time.sleep(2)
            # AI makes its move
            action = ai.choose_action(piles, epsilon=False)
            remove_stones_from_ai(action)
        draw_game_state()
        pygame.display.flip()
 def remove_stones_from_ai(action):
    """Handles AI stone removal."""
    pile, count = action
    for i in range(count):
        piles[pile] -= 1
    global player_turn
    player_turn = 1  # Switch back to human player
    check_game_over()
--- a/nim.py
+++ b/nim.py
@ -0,0 +1,123 @@
 import random
 class Nim():
    def __init__(self, initial=[4, 4, 4, 4]):
        self.piles = initial.copy()
        self.player = 0  # Player 0 starts
        self.winner = None
    @classmethod
    def available_actions(cls, piles):
        actions = set()
        for i, pile in enumerate(piles):
            for j in range(1, pile + 1):
                actions.add((i, j))
        return actions
    @classmethod
    def other_player(cls, player):
        return 0 if player == 1 else 1
    def switch_player(self):
        self.player = Nim.other_player(self.player)
    def move(self, action):
        pile, count = action
        self.piles[pile] -= count
        self.switch_player()
        if all(pile == 0 for pile in self.piles):
            self.winner = self.player
 class NimAI():
    def __init__(self, alpha=0.5, epsilon=0.1):
        self.q = dict()  # Q-value table
        self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value 
        self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value 
        self.alpha = alpha  # Learning rate
        self.epsilon = epsilon  # Exploration rate
    def update(self, old_state, action, new_state, reward):
        old_q = self.get_q_value(old_state, action)
        best_future_q = self.best_future_reward(new_state)
        self.update_q_value(old_state, action, old_q, reward, best_future_q)
    def get_q_value(self, state, action):
        """
    Return the Q-value for a given state-action pair.
    Parameters:
        state (list): The current game state.
        action (tuple): The action being evaluated.
    Returns:
        float: The Q-value associated with the (state, action) pair. 
               Returns 0 if the pair is not yet in the Q-table.
    """
        print(self.q)
    def update_q_value(self, state, action, old_q, reward, future_q):
        """
    Update the Q-value for a state-action pair using the Q-learning formula.
    Parameters:
        state (list): The current game state.
        action (tuple): The action taken.
        old_q (float): The previous Q-value for the (state, action) pair.
        reward (float): The reward received after taking the action.
        future_q (float): The maximum Q-value for the next state.
    """
        raise NotImplementedError
    def best_future_reward(self, state):
            """
    Determine the highest Q-value among all possible actions in a given state.
    Parameters:
        state (list): The state for which to compute the best future reward.
    Returns:
        float: The highest Q-value among available actions. 
               Returns 0 if no actions are available.
    """
            raise NotImplementedError
    def choose_action(self, state, epsilon=True):
        """
    Choose an action for the given state using an epsilon-greedy strategy.
    Parameters:
        state (list): The current game state.
        epsilon (bool): If True, use epsilon-greedy exploration; otherwise, choose the best action.
    Returns:
        tuple: The chosen action from the available actions.
    """
        raise NotImplementedError
 def train(n):
    player = NimAI()
    for i in range(n):
        game = Nim([4, 4, 4, 4])
        last_move = {0: {"state": None, "action": None}, 1: {"state": None, "action": None}}
        while True:
            state = game.piles.copy()
            action = player.choose_action(state)
            last_move[game.player]["state"] = state
            last_move[game.player]["action"] = action
            game.move(action)
            new_state = game.piles.copy()
            if game.winner is not None:
                player.update(state, action, new_state, -1)
                player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 1)
                break
            elif last_move[game.player]["state"] is not None:
                player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 0)
    return player
--- a/play.py
+++ b/play.py
@ -0,0 +1,11 @@
 from nim import train
 from game import start_game
 if __name__ == "__main__":
    # Train the AI with 1000 games
    print("START TRAINING \n")
    ai = train(1000)
    # Start the game and play against the trained AI
    print("STARTING THE GAME \n")
    start_game(ai)
--- a/test.py
+++ b/test.py
@ -0,0 +1,31 @@
 from nim import NimAI
 def test_get_q_value(ai):
    print("\n--- Testing get_q_value ---")
    state = (0, 0, 0, 2)
    action = (3, 2)
    value = ai.get_q_value(state, action)
    print(f"Q-value for state {state}, action {action}: {value}")
 def test_update_q_value(ai):
    print("\n--- Testing update_q_value ---")
 def test_best_future_reward(ai):
    print("\n--- Testing best_future_reward ---")
 def test_choose_action(ai):
    print("\n--- Testing choose_action ---")
 if __name__ == "__main__":
    ai = NimAI()
    test_get_q_value(ai)
    test_update_q_value(ai)
    test_best_future_reward(ai)
    test_choose_action(ai)
    print("\nAll tests completed.")