uhhhh
This commit is contained in:
parent
0d2340b7e8
commit
a74be2fc31
11 changed files with 2367 additions and 184 deletions
33
nim.py
33
nim.py
|
|
@ -56,11 +56,17 @@ class NimAI():
|
|||
float: The Q-value associated with the (state, action) pair.
|
||||
Returns 0 if the pair is not yet in the Q-table.
|
||||
"""
|
||||
print(self.q)
|
||||
print(self.q, state, action)
|
||||
try:
|
||||
return self.q[(tuple(state), action)]
|
||||
except:
|
||||
return 0
|
||||
|
||||
def update_q_value(self, state, action, old_q, reward, future_q):
|
||||
"""
|
||||
Update the Q-value for a state-action pair using the Q-learning formula.
|
||||
|
||||
Q(s, a) ← Q(s, a) + α * (Belohnung + γ * max_a' Q(s', a') - Q(s, a))
|
||||
|
||||
Parameters:
|
||||
state (list): The current game state.
|
||||
|
|
@ -69,10 +75,11 @@ class NimAI():
|
|||
reward (float): The reward received after taking the action.
|
||||
future_q (float): The maximum Q-value for the next state.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
self.q[tuple(state), action] = old_q + self.alpha * (reward + self.epsilon * future_q - old_q)
|
||||
return 0
|
||||
|
||||
def best_future_reward(self, state):
|
||||
"""
|
||||
"""
|
||||
Determine the highest Q-value among all possible actions in a given state.
|
||||
|
||||
Parameters:
|
||||
|
|
@ -82,7 +89,15 @@ class NimAI():
|
|||
float: The highest Q-value among available actions.
|
||||
Returns 0 if no actions are available.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
# actions = []
|
||||
# for q in self.q.key:
|
||||
# if q[0] == state:
|
||||
# actions.append(q[1])
|
||||
actions = tuple([key[1] for key in self.q.keys() if key[0] == state])
|
||||
try:
|
||||
return max([q for q in self.q[tuple(state), actions]])
|
||||
except:
|
||||
return 0
|
||||
|
||||
def choose_action(self, state, epsilon=True):
|
||||
"""
|
||||
|
|
@ -95,7 +110,15 @@ class NimAI():
|
|||
Returns:
|
||||
tuple: The chosen action from the available actions.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
if epsilon:
|
||||
return random.choice(tuple(Nim.available_actions(state)))
|
||||
# keys = [key[1] for key in self.q.key if key[0] == state]
|
||||
# for key in keys:
|
||||
else:
|
||||
try:
|
||||
return max([key[1] for key in self.q.keys() if key[0] == state])
|
||||
except:
|
||||
return (0,0)
|
||||
|
||||
def train(n):
|
||||
player = NimAI()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue