Implementiere best_future_reward

This commit is contained in:
mia 2026-04-23 10:00:43 +02:00
parent 933d0c5d27
commit e4fca60692
3 changed files with 11 additions and 7 deletions

Binary file not shown.

16
nim.py
View file

@ -95,15 +95,17 @@ class NimAI():
float: The highest Q-value among available actions. float: The highest Q-value among available actions.
Returns 0 if no actions are available. Returns 0 if no actions are available.
""" """
state = tuple(state)
# actions = [] # actions = []
# for q in self.q.key: # for q in self.q:
# if q[0] == state: # print(q)
# if q[0] == tuple(state):
# actions.append(q[1]) # actions.append(q[1])
actions = tuple([key[1] for key in self.q.keys() if key[0] == state]) actions = [key[1] for key in self.q if key[0] == state]
try: # print(actions)
return max([q for q in self.q[tuple(state), actions]]) # print(self.q[state, action] for action in actions)
except: # print(max(self.q[state, action] for action in actions))
return 0 return 0 if actions == [] else max(self.q[state, action] for action in actions)
def choose_action(self, state, epsilon=True): def choose_action(self, state, epsilon=True):
""" """

View file

@ -19,6 +19,8 @@ def test_update_q_value(ai):
def test_best_future_reward(ai): def test_best_future_reward(ai):
print("\n--- Testing best_future_reward ---") print("\n--- Testing best_future_reward ---")
print(ai.best_future_reward([1,1,1,0]))
print(ai.best_future_reward([1,1,1,1]))
def test_choose_action(ai): def test_choose_action(ai):