uhhhh
This commit is contained in:
parent
0d2340b7e8
commit
a74be2fc31
11 changed files with 2367 additions and 184 deletions
19
.direnv/bin/nix-direnv-reload
Executable file
19
.direnv/bin/nix-direnv-reload
Executable file
|
|
@ -0,0 +1,19 @@
|
|||
#!/usr/bin/env bash
|
||||
set -e
|
||||
if [[ ! -d "/home/mia/git/Nim-AI-template" ]]; then
|
||||
echo "Cannot find source directory; Did you move it?"
|
||||
echo "(Looking for "/home/mia/git/Nim-AI-template")"
|
||||
echo 'Cannot force reload with this script - use "direnv reload" manually and then try again'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# rebuild the cache forcefully
|
||||
_nix_direnv_force_reload=1 direnv exec "/home/mia/git/Nim-AI-template" true
|
||||
|
||||
# Update the mtime for .envrc.
|
||||
# This will cause direnv to reload again - but without re-building.
|
||||
touch "/home/mia/git/Nim-AI-template/.envrc"
|
||||
|
||||
# Also update the timestamp of whatever profile_rc we have.
|
||||
# This makes sure that we know we are up to date.
|
||||
touch -r "/home/mia/git/Nim-AI-template/.envrc" "/home/mia/git/Nim-AI-template/.direnv"/*.rc
|
||||
1
.direnv/nix-profile-25.11-wl5m60vn27dl0dnq
Symbolic link
1
.direnv/nix-profile-25.11-wl5m60vn27dl0dnq
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
/nix/store/swz054hgfjc292xpp0axf1p089mlcbcd-nix-shell-env
|
||||
2124
.direnv/nix-profile-25.11-wl5m60vn27dl0dnq.rc
Normal file
2124
.direnv/nix-profile-25.11-wl5m60vn27dl0dnq.rc
Normal file
File diff suppressed because it is too large
Load diff
1
.envrc
Normal file
1
.envrc
Normal file
|
|
@ -0,0 +1 @@
|
|||
use nix
|
||||
BIN
__pycache__/game.cpython-313.pyc
Normal file
BIN
__pycache__/game.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/game.cpython-315.pyc
Normal file
BIN
__pycache__/game.cpython-315.pyc
Normal file
Binary file not shown.
BIN
__pycache__/nim.cpython-313.pyc
Normal file
BIN
__pycache__/nim.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/nim.cpython-315.pyc
Normal file
BIN
__pycache__/nim.cpython-315.pyc
Normal file
Binary file not shown.
7
game.py
7
game.py
|
|
@ -91,6 +91,8 @@ def draw_game_state():
|
|||
def remove_stones():
|
||||
"""Removes the selected stones from the selected pile."""
|
||||
global player_turn, selected_pile
|
||||
if not selected_stones:
|
||||
return # Do nothing if nothing is selected
|
||||
for pile_index, stone_index in selected_stones:
|
||||
piles[pile_index] -= 1
|
||||
selected_stones.clear()
|
||||
|
|
@ -102,11 +104,14 @@ def handle_selection(pile_index, stone_index):
|
|||
"""Handles selecting or deselecting stones, ensuring only one pile can be selected."""
|
||||
global selected_pile
|
||||
if selected_pile is None or selected_pile == pile_index:
|
||||
selected_pile = pile_index # Lock the selection to the current pile
|
||||
if (pile_index, stone_index) in selected_stones:
|
||||
selected_stones.remove((pile_index, stone_index)) # Deselect
|
||||
# If no stones are selected anymore, allow switching piles
|
||||
if not selected_stones:
|
||||
selected_pile = None
|
||||
else:
|
||||
selected_stones.append((pile_index, stone_index)) # Select
|
||||
selected_pile = pile_index # Lock the selection to the current pile
|
||||
|
||||
def restart_game():
|
||||
"""Restarts the game."""
|
||||
|
|
|
|||
33
nim.py
33
nim.py
|
|
@ -56,12 +56,18 @@ class NimAI():
|
|||
float: The Q-value associated with the (state, action) pair.
|
||||
Returns 0 if the pair is not yet in the Q-table.
|
||||
"""
|
||||
print(self.q)
|
||||
print(self.q, state, action)
|
||||
try:
|
||||
return self.q[(tuple(state), action)]
|
||||
except:
|
||||
return 0
|
||||
|
||||
def update_q_value(self, state, action, old_q, reward, future_q):
|
||||
"""
|
||||
Update the Q-value for a state-action pair using the Q-learning formula.
|
||||
|
||||
Q(s, a) ← Q(s, a) + α * (Belohnung + γ * max_a' Q(s', a') - Q(s, a))
|
||||
|
||||
Parameters:
|
||||
state (list): The current game state.
|
||||
action (tuple): The action taken.
|
||||
|
|
@ -69,10 +75,11 @@ class NimAI():
|
|||
reward (float): The reward received after taking the action.
|
||||
future_q (float): The maximum Q-value for the next state.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
self.q[tuple(state), action] = old_q + self.alpha * (reward + self.epsilon * future_q - old_q)
|
||||
return 0
|
||||
|
||||
def best_future_reward(self, state):
|
||||
"""
|
||||
"""
|
||||
Determine the highest Q-value among all possible actions in a given state.
|
||||
|
||||
Parameters:
|
||||
|
|
@ -82,7 +89,15 @@ class NimAI():
|
|||
float: The highest Q-value among available actions.
|
||||
Returns 0 if no actions are available.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
# actions = []
|
||||
# for q in self.q.key:
|
||||
# if q[0] == state:
|
||||
# actions.append(q[1])
|
||||
actions = tuple([key[1] for key in self.q.keys() if key[0] == state])
|
||||
try:
|
||||
return max([q for q in self.q[tuple(state), actions]])
|
||||
except:
|
||||
return 0
|
||||
|
||||
def choose_action(self, state, epsilon=True):
|
||||
"""
|
||||
|
|
@ -95,7 +110,15 @@ class NimAI():
|
|||
Returns:
|
||||
tuple: The chosen action from the available actions.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
if epsilon:
|
||||
return random.choice(tuple(Nim.available_actions(state)))
|
||||
# keys = [key[1] for key in self.q.key if key[0] == state]
|
||||
# for key in keys:
|
||||
else:
|
||||
try:
|
||||
return max([key[1] for key in self.q.keys() if key[0] == state])
|
||||
except:
|
||||
return (0,0)
|
||||
|
||||
def train(n):
|
||||
player = NimAI()
|
||||
|
|
|
|||
10
shell.nix
Normal file
10
shell.nix
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
let
|
||||
pkgs = import <nixpkgs> {};
|
||||
in pkgs.mkShell {
|
||||
packages = [
|
||||
(pkgs.python3.withPackages (python-pkgs: [
|
||||
python-pkgs.pygame
|
||||
]))
|
||||
];
|
||||
}
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue