Compare commits

..

No commits in common. "e4fca60692fe3b7671e33c43eb0e14f6e7409693" and "a74be2fc31f18450846c9a27968860d4f48b6c9e" have entirely different histories.

3 changed files with 10 additions and 25 deletions

Binary file not shown.

28
nim.py
View file

@ -31,16 +31,10 @@ class Nim():
class NimAI(): class NimAI():
def __init__(self, alpha=0.5, epsilon=1): def __init__(self, alpha=0.5, epsilon=0.1):
self.q = dict() # Q-value table self.q = dict() # Q-value table
# self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value
# self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value
self.q[((1,1,1,0), (0,1))] = 0.4
self.q[((1,1,1,0), (1,1))] = 0.9
self.q[((1,1,1,0), (2,1))] = 0.7
self.q[((2,1,1,0), (0,1))] = 0.2
self.alpha = alpha # Learning rate self.alpha = alpha # Learning rate
self.epsilon = epsilon # Exploration rate self.epsilon = epsilon # Exploration rate
@ -95,17 +89,15 @@ class NimAI():
float: The highest Q-value among available actions. float: The highest Q-value among available actions.
Returns 0 if no actions are available. Returns 0 if no actions are available.
""" """
state = tuple(state)
# actions = [] # actions = []
# for q in self.q: # for q in self.q.key:
# print(q) # if q[0] == state:
# if q[0] == tuple(state):
# actions.append(q[1]) # actions.append(q[1])
actions = [key[1] for key in self.q if key[0] == state] actions = tuple([key[1] for key in self.q.keys() if key[0] == state])
# print(actions) try:
# print(self.q[state, action] for action in actions) return max([q for q in self.q[tuple(state), actions]])
# print(max(self.q[state, action] for action in actions)) except:
return 0 if actions == [] else max(self.q[state, action] for action in actions) return 0
def choose_action(self, state, epsilon=True): def choose_action(self, state, epsilon=True):
""" """

View file

@ -10,17 +10,10 @@ def test_get_q_value(ai):
def test_update_q_value(ai): def test_update_q_value(ai):
print("\n--- Testing update_q_value ---") print("\n--- Testing update_q_value ---")
state = (2, 1, 1, 0)
action = (0, 1)
print(ai.q)
print(ai.update_q_value([2, 1, 1, 0], (0, 1), 0.2, 1, 0.8))
print(ai.q)
def test_best_future_reward(ai): def test_best_future_reward(ai):
print("\n--- Testing best_future_reward ---") print("\n--- Testing best_future_reward ---")
print(ai.best_future_reward([1,1,1,0]))
print(ai.best_future_reward([1,1,1,1]))
def test_choose_action(ai): def test_choose_action(ai):