First commit
This commit is contained in:
commit
0d2340b7e8
4 changed files with 344 additions and 0 deletions
179
game.py
Normal file
179
game.py
Normal file
|
|
@ -0,0 +1,179 @@
|
||||||
|
import pygame
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
# Initialize Pygame
|
||||||
|
pygame.init()
|
||||||
|
|
||||||
|
# Constants
|
||||||
|
WIDTH, HEIGHT = 700, 500
|
||||||
|
FPS = 30
|
||||||
|
WHITE = (255, 255, 255)
|
||||||
|
BLACK = (0, 0, 0)
|
||||||
|
RED = (255, 0, 0)
|
||||||
|
GREEN = (0, 255, 0)
|
||||||
|
LIGHT_GREY = (200, 200, 200)
|
||||||
|
DARK_GREY = (50, 50, 50)
|
||||||
|
FONT = pygame.font.Font(None, 36)
|
||||||
|
|
||||||
|
# Create screen
|
||||||
|
screen = pygame.display.set_mode((WIDTH, HEIGHT))
|
||||||
|
pygame.display.set_caption('Nim Game')
|
||||||
|
|
||||||
|
# Define piles (number of coins in each pile)
|
||||||
|
piles = [4, 4, 4, 4] # 4 piles with 4 coins each
|
||||||
|
selected_stones = [] # To store selected stones for removal
|
||||||
|
selected_pile = None # Tracks the pile from which coins are selected
|
||||||
|
|
||||||
|
# Players
|
||||||
|
player_turn = 1 # Player 1 starts (alternates between 1 and 2)
|
||||||
|
|
||||||
|
# Game State
|
||||||
|
game_over = False
|
||||||
|
winner = None
|
||||||
|
|
||||||
|
def draw_piles():
|
||||||
|
"""Draws the piles of coins as circles with padding."""
|
||||||
|
x_pos = 130 # Start from left with padding
|
||||||
|
y_start = 250
|
||||||
|
padding = 150 # Increased space between piles for better symmetry
|
||||||
|
radius = 20
|
||||||
|
for idx, pile in enumerate(piles):
|
||||||
|
y_pos = y_start
|
||||||
|
for stone in range(pile):
|
||||||
|
color = RED
|
||||||
|
if (idx, stone) in selected_stones:
|
||||||
|
color = GREEN # Show selected stones as green
|
||||||
|
pygame.draw.circle(screen, color, (x_pos, y_pos), radius)
|
||||||
|
y_pos -= 2 * radius + 10 # Space between circles
|
||||||
|
text = FONT.render(f'Pile {idx + 1}', True, BLACK)
|
||||||
|
screen.blit(text, (x_pos - 30, 320))
|
||||||
|
x_pos += padding # Increase x position and add padding
|
||||||
|
|
||||||
|
def check_game_over():
|
||||||
|
"""Check if the game is over (all piles empty)."""
|
||||||
|
global winner, game_over
|
||||||
|
if all(pile == 0 for pile in piles):
|
||||||
|
winner = 2 if player_turn == 1 else 1 # The other player wins
|
||||||
|
game_over = True
|
||||||
|
|
||||||
|
def draw_game_state():
|
||||||
|
"""Draws the current game state including piles and turn."""
|
||||||
|
screen.fill(LIGHT_GREY) # Background color
|
||||||
|
draw_piles()
|
||||||
|
|
||||||
|
if game_over:
|
||||||
|
if player_turn == 1:
|
||||||
|
text = FONT.render(f'You win, hooray!', True, GREEN)
|
||||||
|
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Adjusted y-position for spacing
|
||||||
|
else:
|
||||||
|
text = FONT.render(f'AI wins!', True, GREEN)
|
||||||
|
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Adjusted y-position for spacing
|
||||||
|
|
||||||
|
# Draw the Restart button
|
||||||
|
pygame.draw.rect(screen, DARK_GREY, (WIDTH // 2 - 60, HEIGHT - 60, 120, 40))
|
||||||
|
restart_text = FONT.render("Restart", True, WHITE)
|
||||||
|
screen.blit(restart_text, (WIDTH // 2 - restart_text.get_width() // 2, HEIGHT - 50))
|
||||||
|
else:
|
||||||
|
if player_turn == 1:
|
||||||
|
text = FONT.render(f'Your turn!', True, BLACK)
|
||||||
|
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Centered player label
|
||||||
|
else:
|
||||||
|
text = FONT.render(f'Computer thinking... ', True, BLACK)
|
||||||
|
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Centered player labe
|
||||||
|
|
||||||
|
|
||||||
|
# Draw the "Remove" button at the bottom
|
||||||
|
pygame.draw.rect(screen, BLACK, (WIDTH // 2 - 60, HEIGHT - 120, 120, 40))
|
||||||
|
remove_text = FONT.render("Remove", True, WHITE)
|
||||||
|
screen.blit(remove_text, (WIDTH // 2 - remove_text.get_width() // 2, HEIGHT - 110))
|
||||||
|
|
||||||
|
def remove_stones():
|
||||||
|
"""Removes the selected stones from the selected pile."""
|
||||||
|
global player_turn, selected_pile
|
||||||
|
for pile_index, stone_index in selected_stones:
|
||||||
|
piles[pile_index] -= 1
|
||||||
|
selected_stones.clear()
|
||||||
|
selected_pile = None # Reset selected pile after removal
|
||||||
|
player_turn = 2 if player_turn == 1 else 1 # Switch turns
|
||||||
|
check_game_over()
|
||||||
|
|
||||||
|
def handle_selection(pile_index, stone_index):
|
||||||
|
"""Handles selecting or deselecting stones, ensuring only one pile can be selected."""
|
||||||
|
global selected_pile
|
||||||
|
if selected_pile is None or selected_pile == pile_index:
|
||||||
|
selected_pile = pile_index # Lock the selection to the current pile
|
||||||
|
if (pile_index, stone_index) in selected_stones:
|
||||||
|
selected_stones.remove((pile_index, stone_index)) # Deselect
|
||||||
|
else:
|
||||||
|
selected_stones.append((pile_index, stone_index)) # Select
|
||||||
|
|
||||||
|
def restart_game():
|
||||||
|
"""Restarts the game."""
|
||||||
|
global piles, player_turn, selected_stones, selected_pile, game_over, winner
|
||||||
|
piles = [4, 4, 4, 4] # Reset piles
|
||||||
|
selected_stones.clear()
|
||||||
|
selected_pile = None
|
||||||
|
player_turn = 1
|
||||||
|
game_over = False
|
||||||
|
winner = None
|
||||||
|
|
||||||
|
def start_game(ai):
|
||||||
|
"""Starts the game and integrates AI for playing against the computer."""
|
||||||
|
global player_turn, game_over
|
||||||
|
|
||||||
|
# Main game loop
|
||||||
|
clock = pygame.time.Clock()
|
||||||
|
while True:
|
||||||
|
clock.tick(FPS)
|
||||||
|
|
||||||
|
for event in pygame.event.get():
|
||||||
|
if event.type == pygame.QUIT:
|
||||||
|
pygame.quit()
|
||||||
|
sys.exit()
|
||||||
|
elif event.type == pygame.MOUSEBUTTONDOWN and not game_over and player_turn == 1:
|
||||||
|
mouse_x, mouse_y = event.pos
|
||||||
|
# Check for pile selection (clicking on a coin)
|
||||||
|
x_pos = 130
|
||||||
|
y_start = 250
|
||||||
|
padding = 150
|
||||||
|
radius = 20
|
||||||
|
for pile_index, pile in enumerate(piles):
|
||||||
|
y_pos = y_start
|
||||||
|
for stone_index in range(pile):
|
||||||
|
dist = ((mouse_x - x_pos)**2 + (mouse_y - y_pos)**2)**0.5
|
||||||
|
if dist <= radius:
|
||||||
|
handle_selection(pile_index, stone_index)
|
||||||
|
y_pos -= 2 * radius + 10
|
||||||
|
x_pos += padding
|
||||||
|
# Check for "Remove" button click
|
||||||
|
if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 120 <= mouse_y <= HEIGHT - 80:
|
||||||
|
remove_stones()
|
||||||
|
|
||||||
|
# If game over, check for "Restart" button click
|
||||||
|
elif event.type == pygame.MOUSEBUTTONDOWN and game_over:
|
||||||
|
mouse_x, mouse_y = event.pos
|
||||||
|
if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 60 <= mouse_y <= HEIGHT - 20:
|
||||||
|
restart_game()
|
||||||
|
|
||||||
|
# If it's the AI's turn and the game is not over
|
||||||
|
if player_turn == 2 and not game_over:
|
||||||
|
draw_game_state()
|
||||||
|
pygame.display.flip()
|
||||||
|
# Simulate thinking
|
||||||
|
time.sleep(2)
|
||||||
|
# AI makes its move
|
||||||
|
action = ai.choose_action(piles, epsilon=False)
|
||||||
|
remove_stones_from_ai(action)
|
||||||
|
|
||||||
|
draw_game_state()
|
||||||
|
pygame.display.flip()
|
||||||
|
|
||||||
|
def remove_stones_from_ai(action):
|
||||||
|
"""Handles AI stone removal."""
|
||||||
|
pile, count = action
|
||||||
|
for i in range(count):
|
||||||
|
piles[pile] -= 1
|
||||||
|
global player_turn
|
||||||
|
player_turn = 1 # Switch back to human player
|
||||||
|
check_game_over()
|
||||||
123
nim.py
Normal file
123
nim.py
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
import random
|
||||||
|
|
||||||
|
class Nim():
|
||||||
|
def __init__(self, initial=[4, 4, 4, 4]):
|
||||||
|
self.piles = initial.copy()
|
||||||
|
self.player = 0 # Player 0 starts
|
||||||
|
self.winner = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def available_actions(cls, piles):
|
||||||
|
actions = set()
|
||||||
|
for i, pile in enumerate(piles):
|
||||||
|
for j in range(1, pile + 1):
|
||||||
|
actions.add((i, j))
|
||||||
|
return actions
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def other_player(cls, player):
|
||||||
|
return 0 if player == 1 else 1
|
||||||
|
|
||||||
|
def switch_player(self):
|
||||||
|
self.player = Nim.other_player(self.player)
|
||||||
|
|
||||||
|
def move(self, action):
|
||||||
|
pile, count = action
|
||||||
|
self.piles[pile] -= count
|
||||||
|
self.switch_player()
|
||||||
|
if all(pile == 0 for pile in self.piles):
|
||||||
|
self.winner = self.player
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class NimAI():
|
||||||
|
def __init__(self, alpha=0.5, epsilon=0.1):
|
||||||
|
self.q = dict() # Q-value table
|
||||||
|
self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value
|
||||||
|
self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value
|
||||||
|
|
||||||
|
self.alpha = alpha # Learning rate
|
||||||
|
self.epsilon = epsilon # Exploration rate
|
||||||
|
|
||||||
|
def update(self, old_state, action, new_state, reward):
|
||||||
|
old_q = self.get_q_value(old_state, action)
|
||||||
|
best_future_q = self.best_future_reward(new_state)
|
||||||
|
self.update_q_value(old_state, action, old_q, reward, best_future_q)
|
||||||
|
|
||||||
|
def get_q_value(self, state, action):
|
||||||
|
"""
|
||||||
|
Return the Q-value for a given state-action pair.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
state (list): The current game state.
|
||||||
|
action (tuple): The action being evaluated.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: The Q-value associated with the (state, action) pair.
|
||||||
|
Returns 0 if the pair is not yet in the Q-table.
|
||||||
|
"""
|
||||||
|
print(self.q)
|
||||||
|
|
||||||
|
def update_q_value(self, state, action, old_q, reward, future_q):
|
||||||
|
"""
|
||||||
|
Update the Q-value for a state-action pair using the Q-learning formula.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
state (list): The current game state.
|
||||||
|
action (tuple): The action taken.
|
||||||
|
old_q (float): The previous Q-value for the (state, action) pair.
|
||||||
|
reward (float): The reward received after taking the action.
|
||||||
|
future_q (float): The maximum Q-value for the next state.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def best_future_reward(self, state):
|
||||||
|
"""
|
||||||
|
Determine the highest Q-value among all possible actions in a given state.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
state (list): The state for which to compute the best future reward.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: The highest Q-value among available actions.
|
||||||
|
Returns 0 if no actions are available.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def choose_action(self, state, epsilon=True):
|
||||||
|
"""
|
||||||
|
Choose an action for the given state using an epsilon-greedy strategy.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
state (list): The current game state.
|
||||||
|
epsilon (bool): If True, use epsilon-greedy exploration; otherwise, choose the best action.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: The chosen action from the available actions.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def train(n):
|
||||||
|
player = NimAI()
|
||||||
|
|
||||||
|
for i in range(n):
|
||||||
|
game = Nim([4, 4, 4, 4])
|
||||||
|
last_move = {0: {"state": None, "action": None}, 1: {"state": None, "action": None}}
|
||||||
|
|
||||||
|
while True:
|
||||||
|
state = game.piles.copy()
|
||||||
|
action = player.choose_action(state)
|
||||||
|
last_move[game.player]["state"] = state
|
||||||
|
last_move[game.player]["action"] = action
|
||||||
|
|
||||||
|
game.move(action)
|
||||||
|
new_state = game.piles.copy()
|
||||||
|
|
||||||
|
if game.winner is not None:
|
||||||
|
player.update(state, action, new_state, -1)
|
||||||
|
player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 1)
|
||||||
|
break
|
||||||
|
elif last_move[game.player]["state"] is not None:
|
||||||
|
player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 0)
|
||||||
|
|
||||||
|
return player
|
||||||
11
play.py
Normal file
11
play.py
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
from nim import train
|
||||||
|
from game import start_game
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Train the AI with 1000 games
|
||||||
|
print("START TRAINING \n")
|
||||||
|
ai = train(1000)
|
||||||
|
|
||||||
|
# Start the game and play against the trained AI
|
||||||
|
print("STARTING THE GAME \n")
|
||||||
|
start_game(ai)
|
||||||
31
test.py
Normal file
31
test.py
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
from nim import NimAI
|
||||||
|
|
||||||
|
def test_get_q_value(ai):
|
||||||
|
print("\n--- Testing get_q_value ---")
|
||||||
|
state = (0, 0, 0, 2)
|
||||||
|
action = (3, 2)
|
||||||
|
value = ai.get_q_value(state, action)
|
||||||
|
print(f"Q-value for state {state}, action {action}: {value}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_q_value(ai):
|
||||||
|
print("\n--- Testing update_q_value ---")
|
||||||
|
|
||||||
|
|
||||||
|
def test_best_future_reward(ai):
|
||||||
|
print("\n--- Testing best_future_reward ---")
|
||||||
|
|
||||||
|
|
||||||
|
def test_choose_action(ai):
|
||||||
|
print("\n--- Testing choose_action ---")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
ai = NimAI()
|
||||||
|
|
||||||
|
test_get_q_value(ai)
|
||||||
|
test_update_q_value(ai)
|
||||||
|
test_best_future_reward(ai)
|
||||||
|
test_choose_action(ai)
|
||||||
|
|
||||||
|
print("\nAll tests completed.")
|
||||||
Loading…
Add table
Add a link
Reference in a new issue