commit 0d2340b7e81b6d442e774b63c537a68433dfdf77 Author: patkordum Date: Wed Apr 23 09:46:50 2025 +0200 First commit diff --git a/game.py b/game.py new file mode 100644 index 0000000..f47adec --- /dev/null +++ b/game.py @@ -0,0 +1,179 @@ +import pygame +import sys +import time + +# Initialize Pygame +pygame.init() + +# Constants +WIDTH, HEIGHT = 700, 500 +FPS = 30 +WHITE = (255, 255, 255) +BLACK = (0, 0, 0) +RED = (255, 0, 0) +GREEN = (0, 255, 0) +LIGHT_GREY = (200, 200, 200) +DARK_GREY = (50, 50, 50) +FONT = pygame.font.Font(None, 36) + +# Create screen +screen = pygame.display.set_mode((WIDTH, HEIGHT)) +pygame.display.set_caption('Nim Game') + +# Define piles (number of coins in each pile) +piles = [4, 4, 4, 4] # 4 piles with 4 coins each +selected_stones = [] # To store selected stones for removal +selected_pile = None # Tracks the pile from which coins are selected + +# Players +player_turn = 1 # Player 1 starts (alternates between 1 and 2) + +# Game State +game_over = False +winner = None + +def draw_piles(): + """Draws the piles of coins as circles with padding.""" + x_pos = 130 # Start from left with padding + y_start = 250 + padding = 150 # Increased space between piles for better symmetry + radius = 20 + for idx, pile in enumerate(piles): + y_pos = y_start + for stone in range(pile): + color = RED + if (idx, stone) in selected_stones: + color = GREEN # Show selected stones as green + pygame.draw.circle(screen, color, (x_pos, y_pos), radius) + y_pos -= 2 * radius + 10 # Space between circles + text = FONT.render(f'Pile {idx + 1}', True, BLACK) + screen.blit(text, (x_pos - 30, 320)) + x_pos += padding # Increase x position and add padding + +def check_game_over(): + """Check if the game is over (all piles empty).""" + global winner, game_over + if all(pile == 0 for pile in piles): + winner = 2 if player_turn == 1 else 1 # The other player wins + game_over = True + +def draw_game_state(): + """Draws the current game state including piles and turn.""" + screen.fill(LIGHT_GREY) # Background color + draw_piles() + + if game_over: + if player_turn == 1: + text = FONT.render(f'You win, hooray!', True, GREEN) + screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Adjusted y-position for spacing + else: + text = FONT.render(f'AI wins!', True, GREEN) + screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Adjusted y-position for spacing + + # Draw the Restart button + pygame.draw.rect(screen, DARK_GREY, (WIDTH // 2 - 60, HEIGHT - 60, 120, 40)) + restart_text = FONT.render("Restart", True, WHITE) + screen.blit(restart_text, (WIDTH // 2 - restart_text.get_width() // 2, HEIGHT - 50)) + else: + if player_turn == 1: + text = FONT.render(f'Your turn!', True, BLACK) + screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Centered player label + else: + text = FONT.render(f'Computer thinking... ', True, BLACK) + screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Centered player labe + + + # Draw the "Remove" button at the bottom + pygame.draw.rect(screen, BLACK, (WIDTH // 2 - 60, HEIGHT - 120, 120, 40)) + remove_text = FONT.render("Remove", True, WHITE) + screen.blit(remove_text, (WIDTH // 2 - remove_text.get_width() // 2, HEIGHT - 110)) + +def remove_stones(): + """Removes the selected stones from the selected pile.""" + global player_turn, selected_pile + for pile_index, stone_index in selected_stones: + piles[pile_index] -= 1 + selected_stones.clear() + selected_pile = None # Reset selected pile after removal + player_turn = 2 if player_turn == 1 else 1 # Switch turns + check_game_over() + +def handle_selection(pile_index, stone_index): + """Handles selecting or deselecting stones, ensuring only one pile can be selected.""" + global selected_pile + if selected_pile is None or selected_pile == pile_index: + selected_pile = pile_index # Lock the selection to the current pile + if (pile_index, stone_index) in selected_stones: + selected_stones.remove((pile_index, stone_index)) # Deselect + else: + selected_stones.append((pile_index, stone_index)) # Select + +def restart_game(): + """Restarts the game.""" + global piles, player_turn, selected_stones, selected_pile, game_over, winner + piles = [4, 4, 4, 4] # Reset piles + selected_stones.clear() + selected_pile = None + player_turn = 1 + game_over = False + winner = None + +def start_game(ai): + """Starts the game and integrates AI for playing against the computer.""" + global player_turn, game_over + + # Main game loop + clock = pygame.time.Clock() + while True: + clock.tick(FPS) + + for event in pygame.event.get(): + if event.type == pygame.QUIT: + pygame.quit() + sys.exit() + elif event.type == pygame.MOUSEBUTTONDOWN and not game_over and player_turn == 1: + mouse_x, mouse_y = event.pos + # Check for pile selection (clicking on a coin) + x_pos = 130 + y_start = 250 + padding = 150 + radius = 20 + for pile_index, pile in enumerate(piles): + y_pos = y_start + for stone_index in range(pile): + dist = ((mouse_x - x_pos)**2 + (mouse_y - y_pos)**2)**0.5 + if dist <= radius: + handle_selection(pile_index, stone_index) + y_pos -= 2 * radius + 10 + x_pos += padding + # Check for "Remove" button click + if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 120 <= mouse_y <= HEIGHT - 80: + remove_stones() + + # If game over, check for "Restart" button click + elif event.type == pygame.MOUSEBUTTONDOWN and game_over: + mouse_x, mouse_y = event.pos + if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 60 <= mouse_y <= HEIGHT - 20: + restart_game() + + # If it's the AI's turn and the game is not over + if player_turn == 2 and not game_over: + draw_game_state() + pygame.display.flip() + # Simulate thinking + time.sleep(2) + # AI makes its move + action = ai.choose_action(piles, epsilon=False) + remove_stones_from_ai(action) + + draw_game_state() + pygame.display.flip() + +def remove_stones_from_ai(action): + """Handles AI stone removal.""" + pile, count = action + for i in range(count): + piles[pile] -= 1 + global player_turn + player_turn = 1 # Switch back to human player + check_game_over() diff --git a/nim.py b/nim.py new file mode 100644 index 0000000..42afde8 --- /dev/null +++ b/nim.py @@ -0,0 +1,123 @@ +import random + +class Nim(): + def __init__(self, initial=[4, 4, 4, 4]): + self.piles = initial.copy() + self.player = 0 # Player 0 starts + self.winner = None + + @classmethod + def available_actions(cls, piles): + actions = set() + for i, pile in enumerate(piles): + for j in range(1, pile + 1): + actions.add((i, j)) + return actions + + @classmethod + def other_player(cls, player): + return 0 if player == 1 else 1 + + def switch_player(self): + self.player = Nim.other_player(self.player) + + def move(self, action): + pile, count = action + self.piles[pile] -= count + self.switch_player() + if all(pile == 0 for pile in self.piles): + self.winner = self.player + + + +class NimAI(): + def __init__(self, alpha=0.5, epsilon=0.1): + self.q = dict() # Q-value table + self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value + self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value + + self.alpha = alpha # Learning rate + self.epsilon = epsilon # Exploration rate + + def update(self, old_state, action, new_state, reward): + old_q = self.get_q_value(old_state, action) + best_future_q = self.best_future_reward(new_state) + self.update_q_value(old_state, action, old_q, reward, best_future_q) + + def get_q_value(self, state, action): + """ + Return the Q-value for a given state-action pair. + + Parameters: + state (list): The current game state. + action (tuple): The action being evaluated. + + Returns: + float: The Q-value associated with the (state, action) pair. + Returns 0 if the pair is not yet in the Q-table. + """ + print(self.q) + + def update_q_value(self, state, action, old_q, reward, future_q): + """ + Update the Q-value for a state-action pair using the Q-learning formula. + + Parameters: + state (list): The current game state. + action (tuple): The action taken. + old_q (float): The previous Q-value for the (state, action) pair. + reward (float): The reward received after taking the action. + future_q (float): The maximum Q-value for the next state. + """ + raise NotImplementedError + + def best_future_reward(self, state): + """ + Determine the highest Q-value among all possible actions in a given state. + + Parameters: + state (list): The state for which to compute the best future reward. + + Returns: + float: The highest Q-value among available actions. + Returns 0 if no actions are available. + """ + raise NotImplementedError + + def choose_action(self, state, epsilon=True): + """ + Choose an action for the given state using an epsilon-greedy strategy. + + Parameters: + state (list): The current game state. + epsilon (bool): If True, use epsilon-greedy exploration; otherwise, choose the best action. + + Returns: + tuple: The chosen action from the available actions. + """ + raise NotImplementedError + +def train(n): + player = NimAI() + + for i in range(n): + game = Nim([4, 4, 4, 4]) + last_move = {0: {"state": None, "action": None}, 1: {"state": None, "action": None}} + + while True: + state = game.piles.copy() + action = player.choose_action(state) + last_move[game.player]["state"] = state + last_move[game.player]["action"] = action + + game.move(action) + new_state = game.piles.copy() + + if game.winner is not None: + player.update(state, action, new_state, -1) + player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 1) + break + elif last_move[game.player]["state"] is not None: + player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 0) + + return player diff --git a/play.py b/play.py new file mode 100644 index 0000000..6206d25 --- /dev/null +++ b/play.py @@ -0,0 +1,11 @@ +from nim import train +from game import start_game + +if __name__ == "__main__": + # Train the AI with 1000 games + print("START TRAINING \n") + ai = train(1000) + + # Start the game and play against the trained AI + print("STARTING THE GAME \n") + start_game(ai) diff --git a/test.py b/test.py new file mode 100644 index 0000000..0731673 --- /dev/null +++ b/test.py @@ -0,0 +1,31 @@ +from nim import NimAI + +def test_get_q_value(ai): + print("\n--- Testing get_q_value ---") + state = (0, 0, 0, 2) + action = (3, 2) + value = ai.get_q_value(state, action) + print(f"Q-value for state {state}, action {action}: {value}") + + +def test_update_q_value(ai): + print("\n--- Testing update_q_value ---") + + +def test_best_future_reward(ai): + print("\n--- Testing best_future_reward ---") + + +def test_choose_action(ai): + print("\n--- Testing choose_action ---") + + +if __name__ == "__main__": + ai = NimAI() + + test_get_q_value(ai) + test_update_q_value(ai) + test_best_future_reward(ai) + test_choose_action(ai) + + print("\nAll tests completed.")