Compare commits
2 commits
a74be2fc31
...
e4fca60692
| Author | SHA1 | Date | |
|---|---|---|---|
| e4fca60692 | |||
| 933d0c5d27 |
3 changed files with 25 additions and 10 deletions
Binary file not shown.
28
nim.py
28
nim.py
|
|
@ -31,10 +31,16 @@ class Nim():
|
||||||
|
|
||||||
|
|
||||||
class NimAI():
|
class NimAI():
|
||||||
def __init__(self, alpha=0.5, epsilon=0.1):
|
def __init__(self, alpha=0.5, epsilon=1):
|
||||||
self.q = dict() # Q-value table
|
self.q = dict() # Q-value table
|
||||||
self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value
|
# self.q[(0, 0, 0, 2), (3, 2)] = -1 # Test Q-Value
|
||||||
self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value
|
# self.q[(0, 0, 0, 2), (3, 1)] = 10 # Test Q-Value
|
||||||
|
|
||||||
|
self.q[((1,1,1,0), (0,1))] = 0.4
|
||||||
|
self.q[((1,1,1,0), (1,1))] = 0.9
|
||||||
|
self.q[((1,1,1,0), (2,1))] = 0.7
|
||||||
|
self.q[((2,1,1,0), (0,1))] = 0.2
|
||||||
|
|
||||||
|
|
||||||
self.alpha = alpha # Learning rate
|
self.alpha = alpha # Learning rate
|
||||||
self.epsilon = epsilon # Exploration rate
|
self.epsilon = epsilon # Exploration rate
|
||||||
|
|
@ -89,15 +95,17 @@ class NimAI():
|
||||||
float: The highest Q-value among available actions.
|
float: The highest Q-value among available actions.
|
||||||
Returns 0 if no actions are available.
|
Returns 0 if no actions are available.
|
||||||
"""
|
"""
|
||||||
|
state = tuple(state)
|
||||||
# actions = []
|
# actions = []
|
||||||
# for q in self.q.key:
|
# for q in self.q:
|
||||||
# if q[0] == state:
|
# print(q)
|
||||||
|
# if q[0] == tuple(state):
|
||||||
# actions.append(q[1])
|
# actions.append(q[1])
|
||||||
actions = tuple([key[1] for key in self.q.keys() if key[0] == state])
|
actions = [key[1] for key in self.q if key[0] == state]
|
||||||
try:
|
# print(actions)
|
||||||
return max([q for q in self.q[tuple(state), actions]])
|
# print(self.q[state, action] for action in actions)
|
||||||
except:
|
# print(max(self.q[state, action] for action in actions))
|
||||||
return 0
|
return 0 if actions == [] else max(self.q[state, action] for action in actions)
|
||||||
|
|
||||||
def choose_action(self, state, epsilon=True):
|
def choose_action(self, state, epsilon=True):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
7
test.py
7
test.py
|
|
@ -10,10 +10,17 @@ def test_get_q_value(ai):
|
||||||
|
|
||||||
def test_update_q_value(ai):
|
def test_update_q_value(ai):
|
||||||
print("\n--- Testing update_q_value ---")
|
print("\n--- Testing update_q_value ---")
|
||||||
|
state = (2, 1, 1, 0)
|
||||||
|
action = (0, 1)
|
||||||
|
print(ai.q)
|
||||||
|
print(ai.update_q_value([2, 1, 1, 0], (0, 1), 0.2, 1, 0.8))
|
||||||
|
print(ai.q)
|
||||||
|
|
||||||
|
|
||||||
def test_best_future_reward(ai):
|
def test_best_future_reward(ai):
|
||||||
print("\n--- Testing best_future_reward ---")
|
print("\n--- Testing best_future_reward ---")
|
||||||
|
print(ai.best_future_reward([1,1,1,0]))
|
||||||
|
print(ai.best_future_reward([1,1,1,1]))
|
||||||
|
|
||||||
|
|
||||||
def test_choose_action(ai):
|
def test_choose_action(ai):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue