From 933d0c5d277f70dbf3ec6926351bc2511dab5e3f Mon Sep 17 00:00:00 2001 From: mia Date: Thu, 23 Apr 2026 09:39:16 +0200 Subject: [PATCH] Implementiere update_q_value --- __pycache__/nim.cpython-313.pyc | Bin 7495 -> 7652 bytes nim.py | 12 +++++++++--- test.py | 5 +++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/__pycache__/nim.cpython-313.pyc b/__pycache__/nim.cpython-313.pyc index 90658617e3ea63406224a7e1b64888db1983f157..181141d67368065866b608205017271ef43af02c 100644 GIT binary patch delta 743 zcmX?Z^~9R@GcPX}0}uqCd6{`{BX2(k5NBso2g$L6*o+*Le{jgx zbNZ#vnh63o?KPQJvJ{B`6(A8zK<-MWA`k@@O+R}E1l|A@7uf&>ffQUvdKwr!L&&{E z=z~~t5-6w1Qe+1+u813~8?4NBayX|Nnv2CR!}IA*}!l|P;|0b2TKpz2Mz`` z%?_4+wobMS{E8Pjly0bM5YPR<#vmrW`7Wn1Bcs7)L2gziMu*AbyxJ13Kuc~3`eo)i zdg`U8mc$pvmnG(urkZY!;$6<94RH;~Ne&<)5J+gU74d@DK_Eg8M0kS;ugN+Ba*Xbi zy#*v0-8ZKSEM$rUsRrx2#gd&`S$vB*H?cyK6`}hUGf=ol3Z|-<706Nmfg*m8m>ZC| zB>{0+QfhHYd|GKqX;Er?QEGW&QA+sc6k#1^1F#(|Amwr(A`(P6fQSen0Ssw`Q=LGf zQIijdnKI@~{w6k89@TGtnoQ|H6+rO79vr2Uw~5P(`U3eypdc!82N57A6~#|}C9cQH z!z%wtX0xosZ6;YnNZ5kp;oO)1fnc(Xw3=uK5aByHTv~-2uDHl^a+|aw kW8>x((l(5Yv6G+5_=?6cGRl5r0Fj>^n8g`|iiCg~0f8;A`~Uy| delta 668 zcmaE2ecX!oGcPX}0}#AQy`HJJk++|Nv25~Oj`-*ZfgC1~I0!^262#SHDq;nR+5w4|AhUr2wn*Y4lkak>u{nX1I8Wx}(lRgxG8-7~ z2#QV?>tN|&yP>Yx!P3vx$##KX`67o38ux*y)aF($V@5`;%{#eSnHU`>@8{K)a041~ zOVBSf*U?iiJ+&mhFup7?r!-Z6GZWu(rWA->APXHqL=ceBWGmtYv4cT`9*FP(5#B)J z7DrlXNoi4Pd|@#gkgEU&Mf@OX50JD7#G2BAl*AICB8W|nn|TEnGC6>Z23vBAB|EjU z_!e_+VudCv!m3-$K;a@On5tq>Kp~m$KKY%nEMwqi9uXa81F%^vAf<93A_}IdC=$rh zWI-5Om4R1f=UGNl7m0Ko(M^jR}OV5j}$C<%E{KcH90Eu&h2m=to z10wt;f00(DA0P2L1 A%m4rY diff --git a/nim.py b/nim.py index 8056c8b..bce6111 100644 --- a/nim.py +++ b/nim.py @@ -31,10 +31,16 @@ class Nim(): class NimAI(): - def __init__(self, alpha=0.5, epsilon=0.1): + def __init__(self, alpha=0.5, epsilon=1): self.q = dict() # Q-value table - self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value - self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value + # self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value + # self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value + + self.q[((1,1,1,0), (0,1))] = 0.4 + self.q[((1,1,1,0), (1,1))] = 0.9 + self.q[((1,1,1,0), (2,1))] = 0.7 + self.q[((2,1,1,0), (0,1))] = 0.2 + self.alpha = alpha # Learning rate self.epsilon = epsilon # Exploration rate diff --git a/test.py b/test.py index 0731673..dfa2c8b 100644 --- a/test.py +++ b/test.py @@ -10,6 +10,11 @@ def test_get_q_value(ai): def test_update_q_value(ai): print("\n--- Testing update_q_value ---") + state = (2, 1, 1, 0) + action = (0, 1) + print(ai.q) + print(ai.update_q_value([2, 1, 1, 0], (0, 1), 0.2, 1, 0.8)) + print(ai.q) def test_best_future_reward(ai):