Implementiere best_future_reward
This commit is contained in:
parent
933d0c5d27
commit
e4fca60692
3 changed files with 11 additions and 7 deletions
Binary file not shown.
16
nim.py
16
nim.py
|
|
@ -95,15 +95,17 @@ class NimAI():
|
|||
float: The highest Q-value among available actions.
|
||||
Returns 0 if no actions are available.
|
||||
"""
|
||||
state = tuple(state)
|
||||
# actions = []
|
||||
# for q in self.q.key:
|
||||
# if q[0] == state:
|
||||
# for q in self.q:
|
||||
# print(q)
|
||||
# if q[0] == tuple(state):
|
||||
# actions.append(q[1])
|
||||
actions = tuple([key[1] for key in self.q.keys() if key[0] == state])
|
||||
try:
|
||||
return max([q for q in self.q[tuple(state), actions]])
|
||||
except:
|
||||
return 0
|
||||
actions = [key[1] for key in self.q if key[0] == state]
|
||||
# print(actions)
|
||||
# print(self.q[state, action] for action in actions)
|
||||
# print(max(self.q[state, action] for action in actions))
|
||||
return 0 if actions == [] else max(self.q[state, action] for action in actions)
|
||||
|
||||
def choose_action(self, state, epsilon=True):
|
||||
"""
|
||||
|
|
|
|||
2
test.py
2
test.py
|
|
@ -19,6 +19,8 @@ def test_update_q_value(ai):
|
|||
|
||||
def test_best_future_reward(ai):
|
||||
print("\n--- Testing best_future_reward ---")
|
||||
print(ai.best_future_reward([1,1,1,0]))
|
||||
print(ai.best_future_reward([1,1,1,1]))
|
||||
|
||||
|
||||
def test_choose_action(ai):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue