First commit
This commit is contained in:
commit
0d2340b7e8
4 changed files with 344 additions and 0 deletions
179
game.py
Normal file
179
game.py
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
import pygame
|
||||
import sys
|
||||
import time
|
||||
|
||||
# Initialize Pygame
|
||||
pygame.init()
|
||||
|
||||
# Constants
|
||||
WIDTH, HEIGHT = 700, 500
|
||||
FPS = 30
|
||||
WHITE = (255, 255, 255)
|
||||
BLACK = (0, 0, 0)
|
||||
RED = (255, 0, 0)
|
||||
GREEN = (0, 255, 0)
|
||||
LIGHT_GREY = (200, 200, 200)
|
||||
DARK_GREY = (50, 50, 50)
|
||||
FONT = pygame.font.Font(None, 36)
|
||||
|
||||
# Create screen
|
||||
screen = pygame.display.set_mode((WIDTH, HEIGHT))
|
||||
pygame.display.set_caption('Nim Game')
|
||||
|
||||
# Define piles (number of coins in each pile)
|
||||
piles = [4, 4, 4, 4] # 4 piles with 4 coins each
|
||||
selected_stones = [] # To store selected stones for removal
|
||||
selected_pile = None # Tracks the pile from which coins are selected
|
||||
|
||||
# Players
|
||||
player_turn = 1 # Player 1 starts (alternates between 1 and 2)
|
||||
|
||||
# Game State
|
||||
game_over = False
|
||||
winner = None
|
||||
|
||||
def draw_piles():
|
||||
"""Draws the piles of coins as circles with padding."""
|
||||
x_pos = 130 # Start from left with padding
|
||||
y_start = 250
|
||||
padding = 150 # Increased space between piles for better symmetry
|
||||
radius = 20
|
||||
for idx, pile in enumerate(piles):
|
||||
y_pos = y_start
|
||||
for stone in range(pile):
|
||||
color = RED
|
||||
if (idx, stone) in selected_stones:
|
||||
color = GREEN # Show selected stones as green
|
||||
pygame.draw.circle(screen, color, (x_pos, y_pos), radius)
|
||||
y_pos -= 2 * radius + 10 # Space between circles
|
||||
text = FONT.render(f'Pile {idx + 1}', True, BLACK)
|
||||
screen.blit(text, (x_pos - 30, 320))
|
||||
x_pos += padding # Increase x position and add padding
|
||||
|
||||
def check_game_over():
|
||||
"""Check if the game is over (all piles empty)."""
|
||||
global winner, game_over
|
||||
if all(pile == 0 for pile in piles):
|
||||
winner = 2 if player_turn == 1 else 1 # The other player wins
|
||||
game_over = True
|
||||
|
||||
def draw_game_state():
|
||||
"""Draws the current game state including piles and turn."""
|
||||
screen.fill(LIGHT_GREY) # Background color
|
||||
draw_piles()
|
||||
|
||||
if game_over:
|
||||
if player_turn == 1:
|
||||
text = FONT.render(f'You win, hooray!', True, GREEN)
|
||||
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Adjusted y-position for spacing
|
||||
else:
|
||||
text = FONT.render(f'AI wins!', True, GREEN)
|
||||
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Adjusted y-position for spacing
|
||||
|
||||
# Draw the Restart button
|
||||
pygame.draw.rect(screen, DARK_GREY, (WIDTH // 2 - 60, HEIGHT - 60, 120, 40))
|
||||
restart_text = FONT.render("Restart", True, WHITE)
|
||||
screen.blit(restart_text, (WIDTH // 2 - restart_text.get_width() // 2, HEIGHT - 50))
|
||||
else:
|
||||
if player_turn == 1:
|
||||
text = FONT.render(f'Your turn!', True, BLACK)
|
||||
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Centered player label
|
||||
else:
|
||||
text = FONT.render(f'Computer thinking... ', True, BLACK)
|
||||
screen.blit(text, (WIDTH // 2 - text.get_width() // 2, 30)) # Centered player labe
|
||||
|
||||
|
||||
# Draw the "Remove" button at the bottom
|
||||
pygame.draw.rect(screen, BLACK, (WIDTH // 2 - 60, HEIGHT - 120, 120, 40))
|
||||
remove_text = FONT.render("Remove", True, WHITE)
|
||||
screen.blit(remove_text, (WIDTH // 2 - remove_text.get_width() // 2, HEIGHT - 110))
|
||||
|
||||
def remove_stones():
|
||||
"""Removes the selected stones from the selected pile."""
|
||||
global player_turn, selected_pile
|
||||
for pile_index, stone_index in selected_stones:
|
||||
piles[pile_index] -= 1
|
||||
selected_stones.clear()
|
||||
selected_pile = None # Reset selected pile after removal
|
||||
player_turn = 2 if player_turn == 1 else 1 # Switch turns
|
||||
check_game_over()
|
||||
|
||||
def handle_selection(pile_index, stone_index):
|
||||
"""Handles selecting or deselecting stones, ensuring only one pile can be selected."""
|
||||
global selected_pile
|
||||
if selected_pile is None or selected_pile == pile_index:
|
||||
selected_pile = pile_index # Lock the selection to the current pile
|
||||
if (pile_index, stone_index) in selected_stones:
|
||||
selected_stones.remove((pile_index, stone_index)) # Deselect
|
||||
else:
|
||||
selected_stones.append((pile_index, stone_index)) # Select
|
||||
|
||||
def restart_game():
|
||||
"""Restarts the game."""
|
||||
global piles, player_turn, selected_stones, selected_pile, game_over, winner
|
||||
piles = [4, 4, 4, 4] # Reset piles
|
||||
selected_stones.clear()
|
||||
selected_pile = None
|
||||
player_turn = 1
|
||||
game_over = False
|
||||
winner = None
|
||||
|
||||
def start_game(ai):
|
||||
"""Starts the game and integrates AI for playing against the computer."""
|
||||
global player_turn, game_over
|
||||
|
||||
# Main game loop
|
||||
clock = pygame.time.Clock()
|
||||
while True:
|
||||
clock.tick(FPS)
|
||||
|
||||
for event in pygame.event.get():
|
||||
if event.type == pygame.QUIT:
|
||||
pygame.quit()
|
||||
sys.exit()
|
||||
elif event.type == pygame.MOUSEBUTTONDOWN and not game_over and player_turn == 1:
|
||||
mouse_x, mouse_y = event.pos
|
||||
# Check for pile selection (clicking on a coin)
|
||||
x_pos = 130
|
||||
y_start = 250
|
||||
padding = 150
|
||||
radius = 20
|
||||
for pile_index, pile in enumerate(piles):
|
||||
y_pos = y_start
|
||||
for stone_index in range(pile):
|
||||
dist = ((mouse_x - x_pos)**2 + (mouse_y - y_pos)**2)**0.5
|
||||
if dist <= radius:
|
||||
handle_selection(pile_index, stone_index)
|
||||
y_pos -= 2 * radius + 10
|
||||
x_pos += padding
|
||||
# Check for "Remove" button click
|
||||
if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 120 <= mouse_y <= HEIGHT - 80:
|
||||
remove_stones()
|
||||
|
||||
# If game over, check for "Restart" button click
|
||||
elif event.type == pygame.MOUSEBUTTONDOWN and game_over:
|
||||
mouse_x, mouse_y = event.pos
|
||||
if WIDTH // 2 - 60 <= mouse_x <= WIDTH // 2 + 60 and HEIGHT - 60 <= mouse_y <= HEIGHT - 20:
|
||||
restart_game()
|
||||
|
||||
# If it's the AI's turn and the game is not over
|
||||
if player_turn == 2 and not game_over:
|
||||
draw_game_state()
|
||||
pygame.display.flip()
|
||||
# Simulate thinking
|
||||
time.sleep(2)
|
||||
# AI makes its move
|
||||
action = ai.choose_action(piles, epsilon=False)
|
||||
remove_stones_from_ai(action)
|
||||
|
||||
draw_game_state()
|
||||
pygame.display.flip()
|
||||
|
||||
def remove_stones_from_ai(action):
|
||||
"""Handles AI stone removal."""
|
||||
pile, count = action
|
||||
for i in range(count):
|
||||
piles[pile] -= 1
|
||||
global player_turn
|
||||
player_turn = 1 # Switch back to human player
|
||||
check_game_over()
|
||||
123
nim.py
Normal file
123
nim.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
import random
|
||||
|
||||
class Nim():
|
||||
def __init__(self, initial=[4, 4, 4, 4]):
|
||||
self.piles = initial.copy()
|
||||
self.player = 0 # Player 0 starts
|
||||
self.winner = None
|
||||
|
||||
@classmethod
|
||||
def available_actions(cls, piles):
|
||||
actions = set()
|
||||
for i, pile in enumerate(piles):
|
||||
for j in range(1, pile + 1):
|
||||
actions.add((i, j))
|
||||
return actions
|
||||
|
||||
@classmethod
|
||||
def other_player(cls, player):
|
||||
return 0 if player == 1 else 1
|
||||
|
||||
def switch_player(self):
|
||||
self.player = Nim.other_player(self.player)
|
||||
|
||||
def move(self, action):
|
||||
pile, count = action
|
||||
self.piles[pile] -= count
|
||||
self.switch_player()
|
||||
if all(pile == 0 for pile in self.piles):
|
||||
self.winner = self.player
|
||||
|
||||
|
||||
|
||||
class NimAI():
|
||||
def __init__(self, alpha=0.5, epsilon=0.1):
|
||||
self.q = dict() # Q-value table
|
||||
self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value
|
||||
self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value
|
||||
|
||||
self.alpha = alpha # Learning rate
|
||||
self.epsilon = epsilon # Exploration rate
|
||||
|
||||
def update(self, old_state, action, new_state, reward):
|
||||
old_q = self.get_q_value(old_state, action)
|
||||
best_future_q = self.best_future_reward(new_state)
|
||||
self.update_q_value(old_state, action, old_q, reward, best_future_q)
|
||||
|
||||
def get_q_value(self, state, action):
|
||||
"""
|
||||
Return the Q-value for a given state-action pair.
|
||||
|
||||
Parameters:
|
||||
state (list): The current game state.
|
||||
action (tuple): The action being evaluated.
|
||||
|
||||
Returns:
|
||||
float: The Q-value associated with the (state, action) pair.
|
||||
Returns 0 if the pair is not yet in the Q-table.
|
||||
"""
|
||||
print(self.q)
|
||||
|
||||
def update_q_value(self, state, action, old_q, reward, future_q):
|
||||
"""
|
||||
Update the Q-value for a state-action pair using the Q-learning formula.
|
||||
|
||||
Parameters:
|
||||
state (list): The current game state.
|
||||
action (tuple): The action taken.
|
||||
old_q (float): The previous Q-value for the (state, action) pair.
|
||||
reward (float): The reward received after taking the action.
|
||||
future_q (float): The maximum Q-value for the next state.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def best_future_reward(self, state):
|
||||
"""
|
||||
Determine the highest Q-value among all possible actions in a given state.
|
||||
|
||||
Parameters:
|
||||
state (list): The state for which to compute the best future reward.
|
||||
|
||||
Returns:
|
||||
float: The highest Q-value among available actions.
|
||||
Returns 0 if no actions are available.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def choose_action(self, state, epsilon=True):
|
||||
"""
|
||||
Choose an action for the given state using an epsilon-greedy strategy.
|
||||
|
||||
Parameters:
|
||||
state (list): The current game state.
|
||||
epsilon (bool): If True, use epsilon-greedy exploration; otherwise, choose the best action.
|
||||
|
||||
Returns:
|
||||
tuple: The chosen action from the available actions.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def train(n):
|
||||
player = NimAI()
|
||||
|
||||
for i in range(n):
|
||||
game = Nim([4, 4, 4, 4])
|
||||
last_move = {0: {"state": None, "action": None}, 1: {"state": None, "action": None}}
|
||||
|
||||
while True:
|
||||
state = game.piles.copy()
|
||||
action = player.choose_action(state)
|
||||
last_move[game.player]["state"] = state
|
||||
last_move[game.player]["action"] = action
|
||||
|
||||
game.move(action)
|
||||
new_state = game.piles.copy()
|
||||
|
||||
if game.winner is not None:
|
||||
player.update(state, action, new_state, -1)
|
||||
player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 1)
|
||||
break
|
||||
elif last_move[game.player]["state"] is not None:
|
||||
player.update(last_move[game.player]["state"], last_move[game.player]["action"], new_state, 0)
|
||||
|
||||
return player
|
||||
11
play.py
Normal file
11
play.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
from nim import train
|
||||
from game import start_game
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Train the AI with 1000 games
|
||||
print("START TRAINING \n")
|
||||
ai = train(1000)
|
||||
|
||||
# Start the game and play against the trained AI
|
||||
print("STARTING THE GAME \n")
|
||||
start_game(ai)
|
||||
31
test.py
Normal file
31
test.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
from nim import NimAI
|
||||
|
||||
def test_get_q_value(ai):
|
||||
print("\n--- Testing get_q_value ---")
|
||||
state = (0, 0, 0, 2)
|
||||
action = (3, 2)
|
||||
value = ai.get_q_value(state, action)
|
||||
print(f"Q-value for state {state}, action {action}: {value}")
|
||||
|
||||
|
||||
def test_update_q_value(ai):
|
||||
print("\n--- Testing update_q_value ---")
|
||||
|
||||
|
||||
def test_best_future_reward(ai):
|
||||
print("\n--- Testing best_future_reward ---")
|
||||
|
||||
|
||||
def test_choose_action(ai):
|
||||
print("\n--- Testing choose_action ---")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ai = NimAI()
|
||||
|
||||
test_get_q_value(ai)
|
||||
test_update_q_value(ai)
|
||||
test_best_future_reward(ai)
|
||||
test_choose_action(ai)
|
||||
|
||||
print("\nAll tests completed.")
|
||||
Loading…
Add table
Add a link
Reference in a new issue