diff --git a/__pycache__/nim.cpython-313.pyc b/__pycache__/nim.cpython-313.pyc index af19071..80edc6e 100644 Binary files a/__pycache__/nim.cpython-313.pyc and b/__pycache__/nim.cpython-313.pyc differ diff --git a/nim.py b/nim.py index ad1a42c..6d77749 100644 --- a/nim.py +++ b/nim.py @@ -62,7 +62,7 @@ class NimAI(): float: The Q-value associated with the (state, action) pair. Returns 0 if the pair is not yet in the Q-table. """ - print(self.q, state, action) + # print(self.q, state, action) try: return self.q[(tuple(state), action)] except: @@ -123,10 +123,13 @@ class NimAI(): # keys = [key[1] for key in self.q.key if key[0] == state] # for key in keys: else: - try: - return max([key[1] for key in self.q.keys() if key[0] == state]) - except: - return (0,0) + # state = tuple(state) + # max(self.q[state, action]) for action in [key[1] for key in self.q if key[0] == state] + # for q in self.q: + # if q[0] == state: + + return list(self.q.keys())[list(self.q.values()).index(self.best_future_reward(state))][1] + def train(n): player = NimAI() diff --git a/play.py b/play.py index 6206d25..40fe060 100644 --- a/play.py +++ b/play.py @@ -5,6 +5,7 @@ if __name__ == "__main__": # Train the AI with 1000 games print("START TRAINING \n") ai = train(1000) + print(ai.q) # Start the game and play against the trained AI print("STARTING THE GAME \n") diff --git a/test.py b/test.py index 3a974b3..675edd2 100644 --- a/test.py +++ b/test.py @@ -25,6 +25,8 @@ def test_best_future_reward(ai): def test_choose_action(ai): print("\n--- Testing choose_action ---") + print(ai.choose_action([1,1,1,0])) + print(ai.choose_action([1,1,1,0], epsilon=False)) if __name__ == "__main__":