Implementiere best_future_reward

Implementiere update_q_value
2026-04-23 10:00:43 +02:00 · 2026-04-23 09:39:16 +02:00
3 changed files with 25 additions and 10 deletions
--- a/pycache/nim.cpython-313.pyc
+++ b/pycache/nim.cpython-313.pyc
--- a/nim.py
+++ b/nim.py
@ -31,10 +31,16 @@ class Nim():
 class NimAI():
-    def __init__(self, alpha=0.5, epsilon=0.1):
+    def __init__(self, alpha=0.5, epsilon=1):
        self.q = dict()  # Q-value table
-        self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value 
+        # self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value 
-        self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value 
+        # self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value 
        self.q[((1,1,1,0), (0,1))] = 0.4
        self.q[((1,1,1,0), (1,1))] = 0.9
        self.q[((1,1,1,0), (2,1))] = 0.7
        self.q[((2,1,1,0), (0,1))] = 0.2
        self.alpha = alpha  # Learning rate
        self.epsilon = epsilon  # Exploration rate
@ -89,15 +95,17 @@ class NimAI():
        float: The highest Q-value among available actions. 
               Returns 0 if no actions are available.
    """
        state = tuple(state)
        # actions = []
-        # for q in self.q.key:
+        # for q in self.q:
-        #     if q[0] == state:
+        #     print(q)
        #     if q[0] == tuple(state):
        #         actions.append(q[1])
-        actions = tuple([key[1] for key in self.q.keys() if key[0] == state])
+        actions = [key[1] for key in self.q if key[0] == state]
-        try:
+        # print(actions)
-            return max([q for q in self.q[tuple(state), actions]])
+        # print(self.q[state, action] for action in actions)
-        except:
+        # print(max(self.q[state, action] for action in actions))
-            return 0
+        return 0 if actions == [] else max(self.q[state, action] for action in actions)
    def choose_action(self, state, epsilon=True):
        """
--- a/test.py
+++ b/test.py
@ -10,10 +10,17 @@ def test_get_q_value(ai):
 def test_update_q_value(ai):
    print("\n--- Testing update_q_value ---")
    state = (2, 1, 1, 0)
    action = (0, 1)
    print(ai.q)
    print(ai.update_q_value([2, 1, 1, 0], (0, 1), 0.2, 1, 0.8))
    print(ai.q)
 def test_best_future_reward(ai):
    print("\n--- Testing best_future_reward ---")
    print(ai.best_future_reward([1,1,1,0]))
    print(ai.best_future_reward([1,1,1,1]))
 def test_choose_action(ai):
Author	SHA1	Message	Date
mia	e4fca60692	Implementiere best_future_reward	2026-04-23 10:00:43 +02:00
mia	933d0c5d27	Implementiere update_q_value	2026-04-23 09:39:16 +02:00