40 lines
1,000 B
Python
40 lines
1,000 B
Python
from nim import NimAI
|
|
|
|
def test_get_q_value(ai):
|
|
print("\n--- Testing get_q_value ---")
|
|
state = (0, 0, 0, 2)
|
|
action = (3, 2)
|
|
value = ai.get_q_value(state, action)
|
|
print(f"Q-value for state {state}, action {action}: {value}")
|
|
|
|
|
|
def test_update_q_value(ai):
|
|
print("\n--- Testing update_q_value ---")
|
|
state = (2, 1, 1, 0)
|
|
action = (0, 1)
|
|
print(ai.q)
|
|
print(ai.update_q_value([2, 1, 1, 0], (0, 1), 0.2, 1, 0.8))
|
|
print(ai.q)
|
|
|
|
|
|
def test_best_future_reward(ai):
|
|
print("\n--- Testing best_future_reward ---")
|
|
print(ai.best_future_reward([1,1,1,0]))
|
|
print(ai.best_future_reward([1,1,1,1]))
|
|
|
|
|
|
def test_choose_action(ai):
|
|
print("\n--- Testing choose_action ---")
|
|
print(ai.choose_action([1,1,1,0]))
|
|
print(ai.choose_action([1,1,1,0], epsilon=False))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
ai = NimAI()
|
|
|
|
test_get_q_value(ai)
|
|
test_update_q_value(ai)
|
|
test_best_future_reward(ai)
|
|
test_choose_action(ai)
|
|
|
|
print("\nAll tests completed.")
|