diff --git a/__pycache__/nim.cpython-313.pyc b/__pycache__/nim.cpython-313.pyc index af19071..9065861 100644 Binary files a/__pycache__/nim.cpython-313.pyc and b/__pycache__/nim.cpython-313.pyc differ diff --git a/nim.py b/nim.py index ad1a42c..8056c8b 100644 --- a/nim.py +++ b/nim.py @@ -31,16 +31,10 @@ class Nim(): class NimAI(): - def __init__(self, alpha=0.5, epsilon=1): + def __init__(self, alpha=0.5, epsilon=0.1): self.q = dict() # Q-value table - # self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value - # self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value - - self.q[((1,1,1,0), (0,1))] = 0.4 - self.q[((1,1,1,0), (1,1))] = 0.9 - self.q[((1,1,1,0), (2,1))] = 0.7 - self.q[((2,1,1,0), (0,1))] = 0.2 - + self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value + self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value self.alpha = alpha # Learning rate self.epsilon = epsilon # Exploration rate @@ -95,17 +89,15 @@ class NimAI(): float: The highest Q-value among available actions. Returns 0 if no actions are available. """ - state = tuple(state) # actions = [] - # for q in self.q: - # print(q) - # if q[0] == tuple(state): + # for q in self.q.key: + # if q[0] == state: # actions.append(q[1]) - actions = [key[1] for key in self.q if key[0] == state] - # print(actions) - # print(self.q[state, action] for action in actions) - # print(max(self.q[state, action] for action in actions)) - return 0 if actions == [] else max(self.q[state, action] for action in actions) + actions = tuple([key[1] for key in self.q.keys() if key[0] == state]) + try: + return max([q for q in self.q[tuple(state), actions]]) + except: + return 0 def choose_action(self, state, epsilon=True): """ diff --git a/test.py b/test.py index 3a974b3..0731673 100644 --- a/test.py +++ b/test.py @@ -10,17 +10,10 @@ def test_get_q_value(ai): def test_update_q_value(ai): print("\n--- Testing update_q_value ---") - state = (2, 1, 1, 0) - action = (0, 1) - print(ai.q) - print(ai.update_q_value([2, 1, 1, 0], (0, 1), 0.2, 1, 0.8)) - print(ai.q) def test_best_future_reward(ai): print("\n--- Testing best_future_reward ---") - print(ai.best_future_reward([1,1,1,0])) - print(ai.best_future_reward([1,1,1,1])) def test_choose_action(ai):