First commit

This commit is contained in:
patkordum 2025-04-23 09:46:50 +02:00
commit 0d2340b7e8
4 changed files with 344 additions and 0 deletions

179
game.py Normal file
View file

@ -0,0 +1,179 @@
import pygame
import sys
import time
# Initialize Pygame
pygame.init()
# Constants
WIDTH, HEIGHT = 700, 500
FPS = 30
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)
GREEN = (0, 255, 0)
LIGHT_GREY = (200, 200, 200)
DARK_GREY = (50, 50, 50)
FONT = pygame.font.Font(None, 36)
# Create screen
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption('Nim Game')
# Define piles (number of coins in each pile)
piles = [4, 4, 4, 4] # 4 piles with 4 coins each
selected_stones = [] # To store selected stones for removal
selected_pile = None # Tracks the pile from which coins are selected
# Players
player_turn = 1 # Player 1 starts (alternates between 1 and 2)
# Game State
game_over = False
winner = None
def draw_piles():
"""Draws the piles of coins as circles with padding."""
x_pos = 130 # Start from left with padding
y_start = 250
padding = 150 # Increased space between piles for better symmetry
radius = 20
for idx, pile in enumerate(piles):
y_pos = y_start
for stone in range(pile):
color = RED
if (idx, stone) in selected_stones:
color = GREEN # Show selected stones as green
pygame.draw.circle(screen, color, (x_pos, y_pos), radius)
y_pos -= 2 * radius + 10 # Space between circles
text = FONT.render(f'Pile {idx + 1}', True, BLACK)
screen.blit(text, (x_pos - 30, 320))
x_pos += padding # Increase x position and add padding
def check_game_over():
"""Check if the game is over (all piles empty)."""
global winner, game_over
if all(pile == 0 for pile in piles):
winner = 2 if player_turn == 1 else 1 # The other player wins
game_over = True
def draw_game_state():
"""Draws the current game state including piles and turn."""
screen.fill(LIGHT_GREY) # Background color
draw_piles()
if game_over:
if player_turn == 1:
text = FONT.render(f'You win, hooray!', True, GREEN)
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Adjusted y-position for spacing
else:
text = FONT.render(f'AI wins!', True, GREEN)
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Adjusted y-position for spacing
# Draw the Restart button
pygame.draw.rect(screen, DARK_GREY, (WIDTH // 2 - 60, HEIGHT - 60, 120, 40))
restart_text = FONT.render("Restart", True, WHITE)
screen.blit(restart_text, (WIDTH // 2 - restart_text.get_width() // 2, HEIGHT - 50))
else:
if player_turn == 1:
text = FONT.render(f'Your turn!', True, BLACK)
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Centered player label
else:
text = FONT.render(f'Computer thinking... ', True, BLACK)
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Centered player labe
# Draw the "Remove" button at the bottom
pygame.draw.rect(screen, BLACK, (WIDTH // 2 - 60, HEIGHT - 120, 120, 40))
remove_text = FONT.render("Remove", True, WHITE)
screen.blit(remove_text, (WIDTH // 2 - remove_text.get_width() // 2, HEIGHT - 110))
def remove_stones():
"""Removes the selected stones from the selected pile."""
global player_turn, selected_pile
for pile_index, stone_index in selected_stones:
piles[pile_index] -= 1
selected_stones.clear()
selected_pile = None # Reset selected pile after removal
player_turn = 2 if player_turn == 1 else 1 # Switch turns
check_game_over()
def handle_selection(pile_index, stone_index):
"""Handles selecting or deselecting stones, ensuring only one pile can be selected."""
global selected_pile
if selected_pile is None or selected_pile == pile_index:
selected_pile = pile_index # Lock the selection to the current pile
if (pile_index, stone_index) in selected_stones:
selected_stones.remove((pile_index, stone_index)) # Deselect
else:
selected_stones.append((pile_index, stone_index)) # Select
def restart_game():
"""Restarts the game."""
global piles, player_turn, selected_stones, selected_pile, game_over, winner
piles = [4, 4, 4, 4] # Reset piles
selected_stones.clear()
selected_pile = None
player_turn = 1
game_over = False
winner = None
def start_game(ai):
"""Starts the game and integrates AI for playing against the computer."""
global player_turn, game_over
# Main game loop
clock = pygame.time.Clock()
while True:
clock.tick(FPS)
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
elif event.type == pygame.MOUSEBUTTONDOWN and not game_over and player_turn == 1:
mouse_x, mouse_y = event.pos
# Check for pile selection (clicking on a coin)
x_pos = 130
y_start = 250
padding = 150
radius = 20
for pile_index, pile in enumerate(piles):
y_pos = y_start
for stone_index in range(pile):
dist = ((mouse_x - x_pos)**2 + (mouse_y - y_pos)**2)**0.5
if dist <= radius:
handle_selection(pile_index, stone_index)
y_pos -= 2 * radius + 10
x_pos += padding
# Check for "Remove" button click
if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 120 <= mouse_y <= HEIGHT - 80:
remove_stones()
# If game over, check for "Restart" button click
elif event.type == pygame.MOUSEBUTTONDOWN and game_over:
mouse_x, mouse_y = event.pos
if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 60 <= mouse_y <= HEIGHT - 20:
restart_game()
# If it's the AI's turn and the game is not over
if player_turn == 2 and not game_over:
draw_game_state()
pygame.display.flip()
# Simulate thinking
time.sleep(2)
# AI makes its move
action = ai.choose_action(piles, epsilon=False)
remove_stones_from_ai(action)
draw_game_state()
pygame.display.flip()
def remove_stones_from_ai(action):
"""Handles AI stone removal."""
pile, count = action
for i in range(count):
piles[pile] -= 1
global player_turn
player_turn = 1 # Switch back to human player
check_game_over()

123
nim.py Normal file
View file

@ -0,0 +1,123 @@
import random
class Nim():
def __init__(self, initial=[4, 4, 4, 4]):
self.piles = initial.copy()
self.player = 0 # Player 0 starts
self.winner = None
@classmethod
def available_actions(cls, piles):
actions = set()
for i, pile in enumerate(piles):
for j in range(1, pile + 1):
actions.add((i, j))
return actions
@classmethod
def other_player(cls, player):
return 0 if player == 1 else 1
def switch_player(self):
self.player = Nim.other_player(self.player)
def move(self, action):
pile, count = action
self.piles[pile] -= count
self.switch_player()
if all(pile == 0 for pile in self.piles):
self.winner = self.player
class NimAI():
def __init__(self, alpha=0.5, epsilon=0.1):
self.q = dict() # Q-value table
self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value
self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value
self.alpha = alpha # Learning rate
self.epsilon = epsilon # Exploration rate
def update(self, old_state, action, new_state, reward):
old_q = self.get_q_value(old_state, action)
best_future_q = self.best_future_reward(new_state)
self.update_q_value(old_state, action, old_q, reward, best_future_q)
def get_q_value(self, state, action):
"""
Return the Q-value for a given state-action pair.
Parameters:
state (list): The current game state.
action (tuple): The action being evaluated.
Returns:
float: The Q-value associated with the (state, action) pair.
Returns 0 if the pair is not yet in the Q-table.
"""
print(self.q)
def update_q_value(self, state, action, old_q, reward, future_q):
"""
Update the Q-value for a state-action pair using the Q-learning formula.
Parameters:
state (list): The current game state.
action (tuple): The action taken.
old_q (float): The previous Q-value for the (state, action) pair.
reward (float): The reward received after taking the action.
future_q (float): The maximum Q-value for the next state.
"""
raise NotImplementedError
def best_future_reward(self, state):
"""
Determine the highest Q-value among all possible actions in a given state.
Parameters:
state (list): The state for which to compute the best future reward.
Returns:
float: The highest Q-value among available actions.
Returns 0 if no actions are available.
"""
raise NotImplementedError
def choose_action(self, state, epsilon=True):
"""
Choose an action for the given state using an epsilon-greedy strategy.
Parameters:
state (list): The current game state.
epsilon (bool): If True, use epsilon-greedy exploration; otherwise, choose the best action.
Returns:
tuple: The chosen action from the available actions.
"""
raise NotImplementedError
def train(n):
player = NimAI()
for i in range(n):
game = Nim([4, 4, 4, 4])
last_move = {0: {"state": None, "action": None}, 1: {"state": None, "action": None}}
while True:
state = game.piles.copy()
action = player.choose_action(state)
last_move[game.player]["state"] = state
last_move[game.player]["action"] = action
game.move(action)
new_state = game.piles.copy()
if game.winner is not None:
player.update(state, action, new_state, -1)
player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 1)
break
elif last_move[game.player]["state"] is not None:
player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 0)
return player

11
play.py Normal file
View file

@ -0,0 +1,11 @@
from nim import train
from game import start_game
if __name__ == "__main__":
# Train the AI with 1000 games
print("START TRAINING \n")
ai = train(1000)
# Start the game and play against the trained AI
print("STARTING THE GAME \n")
start_game(ai)

31
test.py Normal file
View file

@ -0,0 +1,31 @@
from nim import NimAI
def test_get_q_value(ai):
print("\n--- Testing get_q_value ---")
state = (0, 0, 0, 2)
action = (3, 2)
value = ai.get_q_value(state, action)
print(f"Q-value for state {state}, action {action}: {value}")
def test_update_q_value(ai):
print("\n--- Testing update_q_value ---")
def test_best_future_reward(ai):
print("\n--- Testing best_future_reward ---")
def test_choose_action(ai):
print("\n--- Testing choose_action ---")
if __name__ == "__main__":
ai = NimAI()
test_get_q_value(ai)
test_update_q_value(ai)
test_best_future_reward(ai)
test_choose_action(ai)
print("\nAll tests completed.")