From ed2bfe4f88cbe7f794fe7246b90c2517f2965397 Mon Sep 17 00:00:00 2001 From: mia Date: Thu, 23 Apr 2026 10:39:31 +0200 Subject: [PATCH] final working i think --- __pycache__/nim.cpython-313.pyc | Bin 7731 -> 7686 bytes nim.py | 13 ++++++++----- play.py | 1 + test.py | 2 ++ 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/__pycache__/nim.cpython-313.pyc b/__pycache__/nim.cpython-313.pyc index af19071c92a4d1c4d5317fe1cc425c6d507b4ae2..80edc6e1bf74004e7f2269c2388515d0ba40d374 100644 GIT binary patch delta 535 zcmdmN(`Li>nU|M~0SH!Hdzq;!x{>cE_vAk;oa}Bup{WdVlk-@`nWC9D$M8flGBQox z%j?OZ36yDI_#ib|j!%odNm7Pk4wJ%Wf4)p6-TrJr zkVKIKkhsO1n^>XA0#!0OR^$+)%VY)7^2swrZP-m2fhrgnCeIL6VicUbTU3ezD#9?i zKw5+)lp#oTa;B&XA1hE%Fe8XmVbEmSY$~R~m>>e=X>#9UD@x2u$9*c?rSEG0%j!1Q z)txS?I~_2&?i_W|IqE`m!e!^g%j!uNg_AF&q+S$GyTFrHBm^?IO(K*rdGZ|zHAdyh z%#unhqO9qRn>8d4GYJHO9H0jxY(YfeDKbGP16>((3X$dplghZq(n?fpca3`0 zEzp?Q_?esYt~ht=niN`cdMXCt8H^)M)giVVjm|N5L?&spK?_KUjVso%`U~jbCFPot zo4PI@s%L#YIj#a*i;Ygn{PS}Z&|lh@60jv965Enq0(d| ztT|UL$Aoy~nbBmz=p&SPA$d!;?O8;#cO6~ghxaD7i02I@fOxu7#Z-UN@YAm5UN_HbTm6|S;?33B)++^CQR&JB#7Kw`zeT0%NpT2GW??lQm ziE)A{{cYln)`?femZr~^9b655*P36YmNSc)<@{oPsZjLCUl$h4_noUV9~)L@UkuEj zTELeBi-iq;yr>O-b`9@-@yCCc8LGi=THuelpEiBh9z$3Q)b~dnYlmwjBX+4o72-Ow zn8I~#^vgo3)$~y`q>G#{hDovH>%fpG`a;~J1g%})2j)sh=MjlsiG=v%@51EMzzMpy zp1=VmOX^)K9f+dqk~CBgLjDg%R;&br-8l;7k(*&cnIqyRs5`c1?AQjvPILk>xDAq( M(_BNhK^F>t0qzQ%mjD0& diff --git a/nim.py b/nim.py index ad1a42c..6d77749 100644 --- a/nim.py +++ b/nim.py @@ -62,7 +62,7 @@ class NimAI(): float: The Q-value associated with the (state, action) pair. Returns 0 if the pair is not yet in the Q-table. """ - print(self.q, state, action) + # print(self.q, state, action) try: return self.q[(tuple(state), action)] except: @@ -123,10 +123,13 @@ class NimAI(): # keys = [key[1] for key in self.q.key if key[0] == state] # for key in keys: else: - try: - return max([key[1] for key in self.q.keys() if key[0] == state]) - except: - return (0,0) + # state = tuple(state) + # max(self.q[state, action]) for action in [key[1] for key in self.q if key[0] == state] + # for q in self.q: + # if q[0] == state: + + return list(self.q.keys())[list(self.q.values()).index(self.best_future_reward(state))][1] + def train(n): player = NimAI() diff --git a/play.py b/play.py index 6206d25..40fe060 100644 --- a/play.py +++ b/play.py @@ -5,6 +5,7 @@ if __name__ == "__main__": # Train the AI with 1000 games print("START TRAINING \n") ai = train(1000) + print(ai.q) # Start the game and play against the trained AI print("STARTING THE GAME \n") diff --git a/test.py b/test.py index 3a974b3..675edd2 100644 --- a/test.py +++ b/test.py @@ -25,6 +25,8 @@ def test_best_future_reward(ai): def test_choose_action(ai): print("\n--- Testing choose_action ---") + print(ai.choose_action([1,1,1,0])) + print(ai.choose_action([1,1,1,0], epsilon=False)) if __name__ == "__main__":