From e4fca60692fe3b7671e33c43eb0e14f6e7409693 Mon Sep 17 00:00:00 2001 From: mia Date: Thu, 23 Apr 2026 10:00:43 +0200 Subject: [PATCH] Implementiere best_future_reward --- __pycache__/nim.cpython-313.pyc | Bin 7652 -> 7731 bytes nim.py | 16 +++++++++------- test.py | 2 ++ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/__pycache__/nim.cpython-313.pyc b/__pycache__/nim.cpython-313.pyc index 181141d67368065866b608205017271ef43af02c..af19071c92a4d1c4d5317fe1cc425c6d507b4ae2 100644 GIT binary patch delta 732 zcmYjPUr19?82`>aySUjj_vg-GPH+AT?`RR~q8d7*q9B8Ef26_OT6b#c%5&A}lPe*7 z3gagD5J3`@Ah=i$KK58X#A4)wPX+bhQ??1^Ti;nKeenIx_nq&@_xr&)-_PfK)upPc zO2CMH-X3{me_I{0C?d8Lv-A!j3Q@8M5rSno0;Frd9wPz$n^Dr&EKaOGSQ@XTXi5mu zQ*dqoAT9v(lNN~ZZRH3;kaPrm<%Lv^bb3I6NAagiAkmoGBq7%cRaq)`-%9d z*6r~PsVQ~(0UPWdq_(5Ac&Er$x$ zp5@R|B71seYF%2H)x8TH3**ncFGKScd27#(5v2MhruWT{=Z)c4+7~7KFT;10ea$6HF+yCwfMe+*ttD1wMaEx!jHjjm delta 596 zcmdmN^Te9(GcPX}0}uqCd6{`ncq3n!pd>qx%M8Sy`4|}(rZPkWNeGzyfJ2Nah;{OA zQQ67M1VtG`C+`uImnvq;V={%R$zzsdhz98g1CV+fg!WKIn6gl26DC%m5@CigmSQG- zhGHgR1`ienhJ2R*CSNuN25trgpjaNuWI-Vjp!pC^8iOKGSe_xB(Uw`BA&pU!d9%8Z z7NeCWOHm-utRe>>af>B8wX*mYb8cdVCTkHdNH_>2%nTGRk^*rZfJ8CKZUqH}$%P`? z_0>Rj1H%_N24RU6ChN^snyt59X}!kwvV_kn+Xc=C7`16stZ{`&3W>Sm<>HvGBC<4d=`XBDmBA`l!qNvH+#7!A#OuPh0}5qMuawSOq^R2(yZQ*V?Qq$;k{Nv6B0oDYi!%xp2?13A E01@bm`v3p{ diff --git a/nim.py b/nim.py index bce6111..ad1a42c 100644 --- a/nim.py +++ b/nim.py @@ -95,15 +95,17 @@ class NimAI(): float: The highest Q-value among available actions. Returns 0 if no actions are available. """ + state = tuple(state) # actions = [] - # for q in self.q.key: - # if q[0] == state: + # for q in self.q: + # print(q) + # if q[0] == tuple(state): # actions.append(q[1]) - actions = tuple([key[1] for key in self.q.keys() if key[0] == state]) - try: - return max([q for q in self.q[tuple(state), actions]]) - except: - return 0 + actions = [key[1] for key in self.q if key[0] == state] + # print(actions) + # print(self.q[state, action] for action in actions) + # print(max(self.q[state, action] for action in actions)) + return 0 if actions == [] else max(self.q[state, action] for action in actions) def choose_action(self, state, epsilon=True): """ diff --git a/test.py b/test.py index dfa2c8b..3a974b3 100644 --- a/test.py +++ b/test.py @@ -19,6 +19,8 @@ def test_update_q_value(ai): def test_best_future_reward(ai): print("\n--- Testing best_future_reward ---") + print(ai.best_future_reward([1,1,1,0])) + print(ai.best_future_reward([1,1,1,1])) def test_choose_action(ai):