Implementiere best_future_reward

2026-04-23 10:00:43 +02:00 · 2026-04-23 10:00:43 +02:00 · e4fca60692
commit e4fca60692
parent 933d0c5d27
3 changed files with 11 additions and 7 deletions
--- a/pycache/nim.cpython-313.pyc
+++ b/pycache/nim.cpython-313.pyc
--- a/nim.py
+++ b/nim.py
@ -95,15 +95,17 @@ class NimAI():
        float: The highest Q-value among available actions. 
               Returns 0 if no actions are available.
    """
+        state = tuple(state)
        # actions = []
-        # for q in self.q.key:
-        #     if q[0] == state:
+        # for q in self.q:
+        #     print(q)
+        #     if q[0] == tuple(state):
        #         actions.append(q[1])
-        actions = tuple([key[1] for key in self.q.keys() if key[0] == state])
-        try:
-            return max([q for q in self.q[tuple(state), actions]])
-        except:
-            return 0
+        actions = [key[1] for key in self.q if key[0] == state]
+        # print(actions)
+        # print(self.q[state, action] for action in actions)
+        # print(max(self.q[state, action] for action in actions))
+        return 0 if actions == [] else max(self.q[state, action] for action in actions)

    def choose_action(self, state, epsilon=True):
        """
--- a/test.py
+++ b/test.py
@ -19,6 +19,8 @@ def test_update_q_value(ai):

 def test_best_future_reward(ai):
    print("\n--- Testing best_future_reward ---")
+    print(ai.best_future_reward([1,1,1,0]))
+    print(ai.best_future_reward([1,1,1,1]))


 def test_choose_action(ai):