diff --git a/__pycache__/nim.cpython-313.pyc b/__pycache__/nim.cpython-313.pyc index 9065861..af19071 100644 Binary files a/__pycache__/nim.cpython-313.pyc and b/__pycache__/nim.cpython-313.pyc differ diff --git a/nim.py b/nim.py index 8056c8b..ad1a42c 100644 --- a/nim.py +++ b/nim.py @@ -31,10 +31,16 @@ class Nim(): class NimAI(): - def __init__(self, alpha=0.5, epsilon=0.1): + def __init__(self, alpha=0.5, epsilon=1): self.q = dict() # Q-value table - self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value - self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value + # self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value + # self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value + + self.q[((1,1,1,0), (0,1))] = 0.4 + self.q[((1,1,1,0), (1,1))] = 0.9 + self.q[((1,1,1,0), (2,1))] = 0.7 + self.q[((2,1,1,0), (0,1))] = 0.2 + self.alpha = alpha # Learning rate self.epsilon = epsilon # Exploration rate @@ -89,15 +95,17 @@ class NimAI(): float: The highest Q-value among available actions. Returns 0 if no actions are available. """ + state = tuple(state) # actions = [] - # for q in self.q.key: - # if q[0] == state: + # for q in self.q: + # print(q) + # if q[0] == tuple(state): # actions.append(q[1]) - actions = tuple([key[1] for key in self.q.keys() if key[0] == state]) - try: - return max([q for q in self.q[tuple(state), actions]]) - except: - return 0 + actions = [key[1] for key in self.q if key[0] == state] + # print(actions) + # print(self.q[state, action] for action in actions) + # print(max(self.q[state, action] for action in actions)) + return 0 if actions == [] else max(self.q[state, action] for action in actions) def choose_action(self, state, epsilon=True): """ diff --git a/test.py b/test.py index 0731673..3a974b3 100644 --- a/test.py +++ b/test.py @@ -10,10 +10,17 @@ def test_get_q_value(ai): def test_update_q_value(ai): print("\n--- Testing update_q_value ---") + state = (2, 1, 1, 0) + action = (0, 1) + print(ai.q) + print(ai.update_q_value([2, 1, 1, 0], (0, 1), 0.2, 1, 0.8)) + print(ai.q) def test_best_future_reward(ai): print("\n--- Testing best_future_reward ---") + print(ai.best_future_reward([1,1,1,0])) + print(ai.best_future_reward([1,1,1,1])) def test_choose_action(ai):