From c5f4f9526af8f8aa057ead5ed623b04e189ca44e Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Sun, 1 May 2022 10:48:41 +1000 Subject: [PATCH 01/18] Explicitly specify gym version 0.20.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d33850a..5c0f3d4 100644 --- a/setup.py +++ b/setup.py @@ -3,5 +3,5 @@ setup( name='gym_go', version='0.0.1', - install_requires=['gym'] # and other dependencies + install_requires=['gym==0.20.0'] # and other dependencies ) From 53ffc843eb08ff25224bf27f824ab293266e25b6 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Sun, 1 May 2022 12:02:56 +1000 Subject: [PATCH 02/18] Bump gym version to 0.21.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5c0f3d4..85f9319 100644 --- a/setup.py +++ b/setup.py @@ -3,5 +3,5 @@ setup( name='gym_go', version='0.0.1', - install_requires=['gym==0.20.0'] # and other dependencies + install_requires=['gym==0.21.0'] # and other dependencies ) From 6eca9a8ca2a0cc9271b0a6206e7c20b7b79cfd6a Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Sun, 1 May 2022 12:04:09 +1000 Subject: [PATCH 03/18] Always reset environment before use --- README.md | 1 + demo.py | 1 + gym_go/tests/test_basics.py | 3 +++ gym_go/tests/test_invalid_moves.py | 2 ++ 4 files changed, 7 insertions(+) diff --git a/README.md b/README.md index 943e08d..4059f3a 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ pip install -e . import gym go_env = gym.make('gym_go:go-v0', size=7, komi=0, reward_method='real') +go_env.reset() first_action = (2,5) second_action = (5,2) diff --git a/demo.py b/demo.py index 9c9d2d5..39615f3 100644 --- a/demo.py +++ b/demo.py @@ -10,6 +10,7 @@ # Initialize environment go_env = gym.make('gym_go:go-v0', size=args.boardsize, komi=args.komi) +go_env.reset() # Game loop done = False diff --git a/gym_go/tests/test_basics.py b/gym_go/tests/test_basics.py index 307ed1f..abc4945 100644 --- a/gym_go/tests/test_basics.py +++ b/gym_go/tests/test_basics.py @@ -174,6 +174,7 @@ def test_num_liberties(self): def test_komi(self): env = gym.make('gym_go:go-v0', size=7, komi=2.5, reward_method='real') + env.reset() # White win _ = env.step(None) @@ -224,6 +225,7 @@ def test_children(self): def test_real_reward(self): env = gym.make('gym_go:go-v0', size=7, reward_method='real') + env.reset() # In game state, reward, done, info = env.step((0, 0)) @@ -259,6 +261,7 @@ def test_real_reward(self): def test_heuristic_reward(self): env = gym.make('gym_go:go-v0', size=7, reward_method='heuristic') + env.reset() # In game state, reward, done, info = env.step((0, 0)) diff --git a/gym_go/tests/test_invalid_moves.py b/gym_go/tests/test_invalid_moves.py index 3693bbe..809d94f 100644 --- a/gym_go/tests/test_invalid_moves.py +++ b/gym_go/tests/test_invalid_moves.py @@ -186,6 +186,7 @@ def test_small_suicide(self): """ self.env = gym.make('gym_go:go-v0', size=3, reward_method='real') + self.env.reset() for move in [6, 7, 8, 5, 4, 8, 0, 1]: state, reward, done, info = self.env.step(move) @@ -203,6 +204,7 @@ def test_invalid_after_capture(self): """ self.env = gym.make('gym_go:go-v0', size=3, reward_method='real') + self.env.reset() for move in [0, 8, 6, 4, 1, 2, 3, 7]: state, reward, done, info = self.env.step(move) From b4fa9964cb9931af028401f1ed3f5643e833e6c2 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Sun, 1 May 2022 12:12:01 +1000 Subject: [PATCH 04/18] Bump gym version to 0.22.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 85f9319..7771a43 100644 --- a/setup.py +++ b/setup.py @@ -3,5 +3,5 @@ setup( name='gym_go', version='0.0.1', - install_requires=['gym==0.21.0'] # and other dependencies + install_requires=['gym==0.22.0'] # and other dependencies ) From 1636be69f6613d88adfc075720548cafbf6a8308 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Sun, 1 May 2022 12:13:33 +1000 Subject: [PATCH 05/18] Explicitly require gym_go gym.make no longer does this for us --- README.md | 3 ++- demo.py | 3 ++- gym_go/tests/efficiency.py | 3 ++- gym_go/tests/test_basics.py | 14 +++++++------- gym_go/tests/test_invalid_moves.py | 6 +++--- gym_go/tests/test_valid_moves.py | 2 +- 6 files changed, 17 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 4059f3a..d0bd5e7 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,9 @@ pip install -e . ### Coding example ```python import gym +import gym_go -go_env = gym.make('gym_go:go-v0', size=7, komi=0, reward_method='real') +go_env = gym.make('go-v0', size=7, komi=0, reward_method='real') go_env.reset() first_action = (2,5) diff --git a/demo.py b/demo.py index 39615f3..7d6a05f 100644 --- a/demo.py +++ b/demo.py @@ -1,6 +1,7 @@ import argparse import gym +import gym_go # Arguments parser = argparse.ArgumentParser(description='Demo Go Environment') @@ -9,7 +10,7 @@ args = parser.parse_args() # Initialize environment -go_env = gym.make('gym_go:go-v0', size=args.boardsize, komi=args.komi) +go_env = gym.make('go-v0', size=args.boardsize, komi=args.komi) go_env.reset() # Game loop diff --git a/gym_go/tests/efficiency.py b/gym_go/tests/efficiency.py index 7b179b7..392ca11 100644 --- a/gym_go/tests/efficiency.py +++ b/gym_go/tests/efficiency.py @@ -2,6 +2,7 @@ import unittest import gym +import gym_go import numpy as np from tqdm import tqdm @@ -11,7 +12,7 @@ class Efficiency(unittest.TestCase): iterations = 64 def setUp(self) -> None: - self.env = gym.make('gym_go:go-v0', size=self.boardsize, reward_method='real') + self.env = gym.make('go-v0', size=self.boardsize, reward_method='real') def testOrderedTrajs(self): durs = [] diff --git a/gym_go/tests/test_basics.py b/gym_go/tests/test_basics.py index abc4945..e015460 100644 --- a/gym_go/tests/test_basics.py +++ b/gym_go/tests/test_basics.py @@ -10,13 +10,13 @@ class TestGoEnvBasics(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.env = gym.make('gym_go:go-v0', size=7, reward_method='real') + self.env = gym.make('go-v0', size=7, reward_method='real') def setUp(self): self.env.reset() def test_state(self): - env = gym.make('gym_go:go-v0', size=7) + env = gym.make('go-v0', size=7) state = env.reset() self.assertIsInstance(state, np.ndarray) self.assertEqual(state.shape[0], govars.NUM_CHNLS) @@ -27,7 +27,7 @@ def test_board_sizes(self): expected_sizes = [7, 13, 19] for expec_size in expected_sizes: - env = gym.make('gym_go:go-v0', size=expec_size) + env = gym.make('go-v0', size=expec_size) state = env.reset() self.assertEqual(state.shape[1], expec_size) self.assertEqual(state.shape[2], expec_size) @@ -150,7 +150,7 @@ def test_game_does_not_end_with_disjoint_passes(self): self.assertFalse(done) def test_num_liberties(self): - env = gym.make('gym_go:go-v0', size=7) + env = gym.make('go-v0', size=7) steps = [(0, 0), (0, 1)] libs = [(2, 0), (1, 2)] @@ -173,7 +173,7 @@ def test_num_liberties(self): self.assertEqual(whitelibs, libs[1], state) def test_komi(self): - env = gym.make('gym_go:go-v0', size=7, komi=2.5, reward_method='real') + env = gym.make('go-v0', size=7, komi=2.5, reward_method='real') env.reset() # White win @@ -224,7 +224,7 @@ def test_children(self): self.assertTrue((children[a] == 0).all()) def test_real_reward(self): - env = gym.make('gym_go:go-v0', size=7, reward_method='real') + env = gym.make('go-v0', size=7, reward_method='real') env.reset() # In game @@ -260,7 +260,7 @@ def test_real_reward(self): env.close() def test_heuristic_reward(self): - env = gym.make('gym_go:go-v0', size=7, reward_method='heuristic') + env = gym.make('go-v0', size=7, reward_method='heuristic') env.reset() # In game diff --git a/gym_go/tests/test_invalid_moves.py b/gym_go/tests/test_invalid_moves.py index 809d94f..2bb85ca 100644 --- a/gym_go/tests/test_invalid_moves.py +++ b/gym_go/tests/test_invalid_moves.py @@ -11,7 +11,7 @@ class TestGoEnvInvalidMoves(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.env = gym.make('gym_go:go-v0', size=7, reward_method='real') + self.env = gym.make('go-v0', size=7, reward_method='real') def setUp(self): self.env.reset() @@ -185,7 +185,7 @@ def test_small_suicide(self): :return: """ - self.env = gym.make('gym_go:go-v0', size=3, reward_method='real') + self.env = gym.make('go-v0', size=3, reward_method='real') self.env.reset() for move in [6, 7, 8, 5, 4, 8, 0, 1]: state, reward, done, info = self.env.step(move) @@ -203,7 +203,7 @@ def test_invalid_after_capture(self): :return: """ - self.env = gym.make('gym_go:go-v0', size=3, reward_method='real') + self.env = gym.make('go-v0', size=3, reward_method='real') self.env.reset() for move in [0, 8, 6, 4, 1, 2, 3, 7]: state, reward, done, info = self.env.step(move) diff --git a/gym_go/tests/test_valid_moves.py b/gym_go/tests/test_valid_moves.py index 0e54064..a3806b5 100644 --- a/gym_go/tests/test_valid_moves.py +++ b/gym_go/tests/test_valid_moves.py @@ -10,7 +10,7 @@ class TestGoEnvValidMoves(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.env = gym.make('gym_go:go-v0', size=7, reward_method='real') + self.env = gym.make('go-v0', size=7, reward_method='real') def setUp(self): self.env.reset() From 83648efe9a6524c7fb2a13e4b6bad9f18720f88c Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Sun, 1 May 2022 12:21:44 +1000 Subject: [PATCH 06/18] Bump gym version to 0.23.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7771a43..c86abb7 100644 --- a/setup.py +++ b/setup.py @@ -3,5 +3,5 @@ setup( name='gym_go', version='0.0.1', - install_requires=['gym==0.22.0'] # and other dependencies + install_requires=['gym==0.23.1'] # and other dependencies ) From 9c7841f58cf708f7d5e78c6e5a08439b6448a80e Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Sun, 1 May 2022 12:32:40 +1000 Subject: [PATCH 07/18] Stop using deprecated scipy.ndimage.measurements namespace --- gym_go/gogame.py | 2 +- gym_go/state_utils.py | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/gym_go/gogame.py b/gym_go/gogame.py index 4d68339..4d36d7e 100644 --- a/gym_go/gogame.py +++ b/gym_go/gogame.py @@ -280,7 +280,7 @@ def areas(state): all_pieces = np.sum(state[[govars.BLACK, govars.WHITE]], axis=0) empties = 1 - all_pieces - empty_labels, num_empty_areas = ndimage.measurements.label(empties) + empty_labels, num_empty_areas = ndimage.label(empties) black_area, white_area = np.sum(state[govars.BLACK]), np.sum(state[govars.WHITE]) for label in range(1, num_empty_areas + 1): diff --git a/gym_go/state_utils.py b/gym_go/state_utils.py index 913ed55..1ded092 100644 --- a/gym_go/state_utils.py +++ b/gym_go/state_utils.py @@ -1,6 +1,5 @@ import numpy as np from scipy import ndimage -from scipy.ndimage import measurements from gym_go import govars @@ -45,8 +44,8 @@ def compute_invalid_moves(state, player, ko_protect=None): definite_valids_array = np.zeros(state.shape[1:]) # Get all groups - all_own_groups, num_own_groups = measurements.label(state[player]) - all_opp_groups, num_opp_groups = measurements.label(state[1 - player]) + all_own_groups, num_own_groups = ndimage.label(state[player]) + all_opp_groups, num_opp_groups = ndimage.label(state[1 - player]) expanded_own_groups = np.zeros((num_own_groups, *state.shape[1:])) expanded_opp_groups = np.zeros((num_opp_groups, *state.shape[1:])) @@ -108,8 +107,8 @@ def batch_compute_invalid_moves(batch_state, batch_player, batch_ko_protect): batch_definite_valids_array = np.zeros(batch_state.shape[:1] + batch_state.shape[2:]) # Get all groups - batch_all_own_groups, _ = measurements.label(batch_state[batch_idcs, batch_player], group_struct) - batch_all_opp_groups, _ = measurements.label(batch_state[batch_idcs, 1 - batch_player], group_struct) + batch_all_own_groups, _ = ndimage.label(batch_state[batch_idcs, batch_player], group_struct) + batch_all_opp_groups, _ = ndimage.label(batch_state[batch_idcs, 1 - batch_player], group_struct) batch_data = enumerate(zip(batch_all_own_groups, batch_all_opp_groups, batch_empties)) for i, (all_own_groups, all_opp_groups, empties) in batch_data: @@ -163,7 +162,7 @@ def update_pieces(state, adj_locs, player): all_pieces = np.sum(state[[govars.BLACK, govars.WHITE]], axis=0) empties = 1 - all_pieces - all_opp_groups, _ = ndimage.measurements.label(state[opponent]) + all_opp_groups, _ = ndimage.label(state[opponent]) # Go through opponent groups all_adj_labels = all_opp_groups[adj_locs[:, 0], adj_locs[:, 1]] @@ -187,7 +186,7 @@ def batch_update_pieces(batch_non_pass, batch_state, batch_adj_locs, batch_playe batch_all_pieces = np.sum(batch_state[:, [govars.BLACK, govars.WHITE]], axis=1) batch_empties = 1 - batch_all_pieces - batch_all_opp_groups, _ = ndimage.measurements.label(batch_state[batch_non_pass, batch_opponent], + batch_all_opp_groups, _ = ndimage.label(batch_state[batch_non_pass, batch_opponent], group_struct) batch_data = enumerate(zip(batch_all_opp_groups, batch_all_pieces, batch_empties, batch_adj_locs, batch_opponent)) From be8f0c96cb6430e514676949795bfd3e22ab66ec Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Sun, 1 May 2022 12:35:07 +1000 Subject: [PATCH 08/18] Use built-in types (np.int and np.bool are deprecated) --- gym_go/gogame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gym_go/gogame.py b/gym_go/gogame.py index 4d36d7e..de3d9af 100644 --- a/gym_go/gogame.py +++ b/gym_go/gogame.py @@ -247,7 +247,7 @@ def turn(state): def batch_turn(batch_state): - return np.max(batch_state[:, govars.TURN_CHNL], axis=(1, 2)).astype(np.int) + return np.max(batch_state[:, govars.TURN_CHNL], axis=(1, 2)).astype(int) def liberties(state: np.ndarray): @@ -258,7 +258,7 @@ def liberties(state: np.ndarray): liberty_list = [] for player_pieces in [blacks, whites]: liberties = ndimage.binary_dilation(player_pieces, state_utils.surround_struct) - liberties *= (1 - all_pieces).astype(np.bool) + liberties *= (1 - all_pieces).astype(bool) liberty_list.append(liberties) return liberty_list[0], liberty_list[1] From 5f019556c53269104915b00cd1c775c545c3906a Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Sun, 1 May 2022 14:44:29 +1000 Subject: [PATCH 09/18] Keep track of game state history in env --- gym_go/envs/go_env.py | 4 ++++ gym_go/tests/test_super_ko.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 gym_go/tests/test_super_ko.py diff --git a/gym_go/envs/go_env.py b/gym_go/envs/go_env.py index b2d4d66..f66a09d 100644 --- a/gym_go/envs/go_env.py +++ b/gym_go/envs/go_env.py @@ -31,6 +31,7 @@ def __init__(self, size, komi=0, reward_method='real'): self.size = size self.komi = komi self.state_ = gogame.init_state(size) + self.history = [] self.reward_method = RewardMethod(reward_method) self.observation_space = gym.spaces.Box(np.float32(0), np.float32(govars.NUM_CHNLS), shape=(govars.NUM_CHNLS, size, size)) @@ -43,6 +44,7 @@ def reset(self): done, return state ''' self.state_ = gogame.init_state(self.size) + self.history = [] self.done = False return np.copy(self.state_) @@ -59,7 +61,9 @@ def step(self, action): elif action is None: action = self.size ** 2 + self.old_state = self.state() self.state_ = gogame.next_state(self.state_, action, canonical=False) + self.history.append(self.old_state) self.done = gogame.game_ended(self.state_) return np.copy(self.state_), self.reward(), self.done, self.info() diff --git a/gym_go/tests/test_super_ko.py b/gym_go/tests/test_super_ko.py new file mode 100644 index 0000000..b7471df --- /dev/null +++ b/gym_go/tests/test_super_ko.py @@ -0,0 +1,29 @@ +import unittest + +import gym +import gym_go + +class TestGoEnvSuperKo(unittest.TestCase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.env = gym.make('go-v0', size=2) + + def setUp(self): + self.env.reset() + + def test_initial_history(self): + self.assertEqual(self.env.history, []) + + def test_step_builds_history(self): + self.env.step((0, 0)) + self.assertEqual(len(self.env.history), 1) + + def test_reset_clears_history(self): + self.env.step((0, 0)) + self.assertNotEqual(self.env.history, []) + self.env.reset() + self.assertEqual(self.env.history, []) + + +if __name__ == '__main__': + unittest.main() From ec53fd00d038e847922ea8f9c25a45f721f96161 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Sun, 1 May 2022 14:58:05 +1000 Subject: [PATCH 10/18] Pass state history to (batch)_compute_invalid_moves --- gym_go/envs/go_env.py | 2 +- gym_go/gogame.py | 8 ++++---- gym_go/state_utils.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/gym_go/envs/go_env.py b/gym_go/envs/go_env.py index f66a09d..b0bfdd4 100644 --- a/gym_go/envs/go_env.py +++ b/gym_go/envs/go_env.py @@ -62,7 +62,7 @@ def step(self, action): action = self.size ** 2 self.old_state = self.state() - self.state_ = gogame.next_state(self.state_, action, canonical=False) + self.state_ = gogame.next_state(self.state_, action, canonical=False, history=self.history) self.history.append(self.old_state) self.done = gogame.game_ended(self.state_) return np.copy(self.state_), self.reward(), self.done, self.info() diff --git a/gym_go/gogame.py b/gym_go/gogame.py index de3d9af..e62ff3e 100644 --- a/gym_go/gogame.py +++ b/gym_go/gogame.py @@ -31,7 +31,7 @@ def batch_init_state(batch_size, board_size): return batch_state -def next_state(state, action1d, canonical=False): +def next_state(state, action1d, canonical=False, history=None): # Deep copy the state to modify state = np.copy(state) @@ -75,7 +75,7 @@ def next_state(state, action1d, canonical=False): ko_protect = killed_group[0] # Update invalid moves - state[govars.INVD_CHNL] = state_utils.compute_invalid_moves(state, player, ko_protect) + state[govars.INVD_CHNL] = state_utils.compute_invalid_moves(state, player, ko_protect, history) # Switch turn state_utils.set_turn(state) @@ -87,7 +87,7 @@ def next_state(state, action1d, canonical=False): return state -def batch_next_states(batch_states, batch_action1d, canonical=False): +def batch_next_states(batch_states, batch_action1d, canonical=False, batch_histories=None): # Deep copy the state to modify batch_states = np.copy(batch_states) @@ -138,7 +138,7 @@ def batch_next_states(batch_states, batch_action1d, canonical=False): # Update invalid moves batch_states[:, govars.INVD_CHNL] = state_utils.batch_compute_invalid_moves(batch_states, batch_players, - batch_ko_protect) + batch_ko_protect, batch_histories) # Switch turn state_utils.batch_set_turn(batch_states) diff --git a/gym_go/state_utils.py b/gym_go/state_utils.py index 1ded092..d6dc9a4 100644 --- a/gym_go/state_utils.py +++ b/gym_go/state_utils.py @@ -20,7 +20,7 @@ neighbor_deltas = np.array([[-1, 0], [1, 0], [0, -1], [0, 1]]) -def compute_invalid_moves(state, player, ko_protect=None): +def compute_invalid_moves(state, player, ko_protect=None, history=None): """ Updates invalid moves in the OPPONENT's perspective 1.) Opponent cannot move at a location @@ -82,7 +82,7 @@ def compute_invalid_moves(state, player, ko_protect=None): return invalid_moves > 0 -def batch_compute_invalid_moves(batch_state, batch_player, batch_ko_protect): +def batch_compute_invalid_moves(batch_state, batch_player, batch_ko_protect, batch_history=None): """ Updates invalid moves in the OPPONENT's perspective 1.) Opponent cannot move at a location From 760a17e5f3c8e7e6aade8928fa52d096d136a65e Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Tue, 3 May 2022 11:48:41 +1000 Subject: [PATCH 11/18] Switch turn before calculating invalid moves --- gym_go/gogame.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gym_go/gogame.py b/gym_go/gogame.py index e62ff3e..40adf9b 100644 --- a/gym_go/gogame.py +++ b/gym_go/gogame.py @@ -74,12 +74,12 @@ def next_state(state, action1d, canonical=False, history=None): if len(killed_group) == 1: ko_protect = killed_group[0] - # Update invalid moves - state[govars.INVD_CHNL] = state_utils.compute_invalid_moves(state, player, ko_protect, history) - # Switch turn state_utils.set_turn(state) + # Update invalid moves + state[govars.INVD_CHNL] = state_utils.compute_invalid_moves(state, player, ko_protect, history) + if canonical: # Set canonical form state = canonical_form(state) @@ -136,13 +136,13 @@ def batch_next_states(batch_states, batch_action1d, canonical=False, batch_histo if len(killed_group) == 1: batch_ko_protect[batch_non_pass[i]] = killed_group[0] + # Switch turn + state_utils.batch_set_turn(batch_states) + # Update invalid moves batch_states[:, govars.INVD_CHNL] = state_utils.batch_compute_invalid_moves(batch_states, batch_players, batch_ko_protect, batch_histories) - # Switch turn - state_utils.batch_set_turn(batch_states) - if canonical: # Set canonical form batch_states = batch_canonical_form(batch_states) From 5167076c766990d72ab9d87d171d5da710bd8d11 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Tue, 3 May 2022 12:16:08 +1000 Subject: [PATCH 12/18] When game is over, all moves should be invalid --- gym_go/gogame.py | 2 +- gym_go/tests/test_invalid_moves.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gym_go/gogame.py b/gym_go/gogame.py index 40adf9b..3fdbcd9 100644 --- a/gym_go/gogame.py +++ b/gym_go/gogame.py @@ -153,7 +153,7 @@ def batch_next_states(batch_states, batch_action1d, canonical=False, batch_histo def invalid_moves(state): # return a fixed size binary vector if game_ended(state): - return np.zeros(action_size(state)) + return np.ones(action_size(state)) return np.append(state[govars.INVD_CHNL].flatten(), 0) diff --git a/gym_go/tests/test_invalid_moves.py b/gym_go/tests/test_invalid_moves.py index 2bb85ca..2464459 100644 --- a/gym_go/tests/test_invalid_moves.py +++ b/gym_go/tests/test_invalid_moves.py @@ -4,7 +4,7 @@ import gym import numpy as np -from gym_go import govars +from gym_go import govars, gogame class TestGoEnvInvalidMoves(unittest.TestCase): @@ -175,6 +175,8 @@ def test_invalid_game_already_over_move(self): with self.assertRaises(Exception): self.env.step((0, 0)) + self.assertTrue((gogame.invalid_moves(self.env.state()) == 1).all()) + def test_small_suicide(self): """ 7, 8, 0, From 6a5c98c54f541c3959a504f12b60ff4d25269724 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Mon, 2 May 2022 09:09:36 +1000 Subject: [PATCH 13/18] Super-ko for compute_invalid_moves --- gym_go/state_utils.py | 25 +++++++++++++++++++++- gym_go/tests/test_invalid_moves.py | 33 ++++++++++++++++++++++++++++++ gym_go/tests/test_super_ko.py | 10 +++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/gym_go/state_utils.py b/gym_go/state_utils.py index d6dc9a4..e1e2f52 100644 --- a/gym_go/state_utils.py +++ b/gym_go/state_utils.py @@ -1,7 +1,7 @@ import numpy as np from scipy import ndimage -from gym_go import govars +from gym_go import govars, gogame group_struct = np.array([[[0, 0, 0], [0, 0, 0], @@ -41,6 +41,7 @@ def compute_invalid_moves(state, player, ko_protect=None, history=None): # Setup invalid and valid arrays possible_invalid_array = np.zeros(state.shape[1:]) + super_ko_invalid_array = np.zeros(state.shape[1:]) definite_valids_array = np.zeros(state.shape[1:]) # Get all groups @@ -79,6 +80,28 @@ def compute_invalid_moves(state, player, ko_protect=None, history=None): # Ko-protection if ko_protect is not None: invalid_moves[ko_protect[0], ko_protect[1]] = 1 + + # Super ko-protection + if history is not None and len(history) > 0: + # Create a new state with updated invalid moves so we can calculate child moves + updated_state = np.copy(state) + updated_state[govars.INVD_CHNL] = (invalid_moves > 0) + + children = gogame.children(updated_state) + board_size = np.prod(state.shape[1:]) + children = children[:board_size] + + trunc_history = np.array(history)[:, :2] + for action1d, child_state in enumerate(children): + # Skip children that don't represent a valid move + if (child_state[:2] == 0).all(): + continue + if (trunc_history == child_state[:2]).all(axis=1).all(axis=1).all(axis=1).any(): + action2d = action1d // state.shape[1:][0], action1d % state.shape[1:][1] + super_ko_invalid_array[action2d[0], action2d[1]] = 1 + + invalid_moves = invalid_moves + super_ko_invalid_array + return invalid_moves > 0 diff --git a/gym_go/tests/test_invalid_moves.py b/gym_go/tests/test_invalid_moves.py index 2464459..b86d1af 100644 --- a/gym_go/tests/test_invalid_moves.py +++ b/gym_go/tests/test_invalid_moves.py @@ -160,6 +160,39 @@ def test_invalid_no_liberty_move(self): with self.assertRaises(Exception): self.env.step(final_move) + + def test_invalid_super_ko_move(self): + """ + 1/5/7, 3/6, + + 4, 2, + + :return: + """ + + self.env = gym.make('go-v0', size=2, reward_method='real') + self.env.reset() + + for move in [(0, 0), (1, 1), (1, 0), (0, 1), (0, 0), (1, 0)]: + state, reward, done, info = self.env.step(move) + + # Test invalid channel + self.assertEqual( + np.count_nonzero(state[govars.INVD_CHNL]), + 4, + state[govars.INVD_CHNL] + ) + self.assertEqual(np.count_nonzero(state[govars.INVD_CHNL] == 1), 4) + self.assertEqual(state[govars.INVD_CHNL, 0, 0], 1) + + # Assert pieces channel is empty at ko-protection coordinate + self.assertEqual(state[govars.BLACK, 0, 0], 0) + self.assertEqual(state[govars.WHITE, 0, 0], 0) + + final_move = (0, 0) + with self.assertRaises(Exception): + self.env.step(final_move) + def test_invalid_game_already_over_move(self): self.env.step(None) self.env.step(None) diff --git a/gym_go/tests/test_super_ko.py b/gym_go/tests/test_super_ko.py index b7471df..263b152 100644 --- a/gym_go/tests/test_super_ko.py +++ b/gym_go/tests/test_super_ko.py @@ -2,6 +2,8 @@ import gym import gym_go +from gym_go import gogame +from gym_go import state_utils class TestGoEnvSuperKo(unittest.TestCase): def __init__(self, *args, **kwargs): @@ -24,6 +26,14 @@ def test_reset_clears_history(self): self.env.reset() self.assertEqual(self.env.history, []) + def test_invalid_moves(self): + """Given an empty board and a history with a move, that same move should be invalid""" + state = gogame.init_state(2) + history = [gogame.next_state(state, 0)] + + invalid_moves = state_utils.compute_invalid_moves(state, 0, ko_protect=None, history=history) + + self.assertTrue((invalid_moves == [[1, 0], [0, 0]]).all()) if __name__ == '__main__': unittest.main() From 0e5fe0293918435fd55fcac8222b5bca4864903c Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Tue, 3 May 2022 16:22:19 +1000 Subject: [PATCH 14/18] Super-ko for batch_compute_invalid_moves --- gym_go/state_utils.py | 25 +++++++++++++++++++++++++ gym_go/tests/test_super_ko.py | 16 ++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/gym_go/state_utils.py b/gym_go/state_utils.py index e1e2f52..3d81c8b 100644 --- a/gym_go/state_utils.py +++ b/gym_go/state_utils.py @@ -127,6 +127,7 @@ def batch_compute_invalid_moves(batch_state, batch_player, batch_ko_protect, bat # Setup invalid and valid arrays batch_possible_invalid_array = np.zeros(batch_state.shape[:1] + batch_state.shape[2:]) + batch_super_ko_invalid_array = np.zeros(batch_state.shape[:1] + batch_state.shape[2:]) batch_definite_valids_array = np.zeros(batch_state.shape[:1] + batch_state.shape[2:]) # Get all groups @@ -175,6 +176,30 @@ def batch_compute_invalid_moves(batch_state, batch_player, batch_ko_protect, bat for i, ko_protect in enumerate(batch_ko_protect): if ko_protect is not None: invalid_moves[i, ko_protect[0], ko_protect[1]] = 1 + + # Super ko-protection + if batch_history is not None: + # Create a new state with updated invalid moves so we can calculate child moves + updated_states = np.copy(batch_state) + updated_states[:, govars.INVD_CHNL] = (invalid_moves > 0) + + board_size = np.prod(batch_state.shape[2:]) + batch_children = np.array( + [gogame.children(s)[:board_size] for s in updated_states] + ) + + trunc_history = batch_history[:, :, :2] + for i, state in enumerate(batch_state): + for action1d, child_state in enumerate(batch_children[i]): + # Skip children that don't represent a valid move + if (child_state[:2] == 0).all(): + continue + if (trunc_history[i] == child_state[:2]).all(axis=1).all(axis=1).all(axis=1).any(): + action2d = action1d // state.shape[1:][0], action1d % state.shape[1:][1] + batch_super_ko_invalid_array[i, action2d[0], action2d[1]] = 1 + + invalid_moves = invalid_moves + batch_super_ko_invalid_array + return invalid_moves > 0 diff --git a/gym_go/tests/test_super_ko.py b/gym_go/tests/test_super_ko.py index 263b152..4af2f8b 100644 --- a/gym_go/tests/test_super_ko.py +++ b/gym_go/tests/test_super_ko.py @@ -1,5 +1,7 @@ import unittest +import numpy as np + import gym import gym_go from gym_go import gogame @@ -35,5 +37,19 @@ def test_invalid_moves(self): self.assertTrue((invalid_moves == [[1, 0], [0, 0]]).all()) + def test_batch_invalid_moves(self): + """Given an empty board and a history with a move, that same move should be invalid""" + state = gogame.init_state(2) + history = [gogame.next_state(state, 0)] + + invalid_moves = state_utils.batch_compute_invalid_moves( + np.expand_dims(state, 0), + np.array([0]), + batch_ko_protect=np.array([None]), + batch_history=np.expand_dims(history, 0) + ) + + self.assertTrue((invalid_moves == [[[1, 0], [0, 0]]]).all()) + if __name__ == '__main__': unittest.main() From bbff5dd8a22b0adf940c629379484c52b3310eff Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Tue, 3 May 2022 16:51:07 +1000 Subject: [PATCH 15/18] Allow super ko to be enabled/disabled via GoEnv --- gym_go/envs/go_env.py | 11 +++++++---- gym_go/tests/test_invalid_moves.py | 20 +++++++++++++++++++- gym_go/tests/test_super_ko.py | 20 +++++++++++++++++++- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/gym_go/envs/go_env.py b/gym_go/envs/go_env.py index b0bfdd4..226f2d8 100644 --- a/gym_go/envs/go_env.py +++ b/gym_go/envs/go_env.py @@ -21,8 +21,9 @@ class GoEnv(gym.Env): govars = govars gogame = gogame - def __init__(self, size, komi=0, reward_method='real'): + def __init__(self, size, komi=0, super_ko=False, reward_method='real'): ''' + @param super_ko: whether to enable super-ko rule (history tracking) @param reward_method: either 'heuristic' or 'real' heuristic: gives # black pieces - # white pieces. real: gives 0 for in-game move, 1 for winning, -1 for losing, @@ -31,7 +32,7 @@ def __init__(self, size, komi=0, reward_method='real'): self.size = size self.komi = komi self.state_ = gogame.init_state(size) - self.history = [] + self.history = [] if super_ko else None self.reward_method = RewardMethod(reward_method) self.observation_space = gym.spaces.Box(np.float32(0), np.float32(govars.NUM_CHNLS), shape=(govars.NUM_CHNLS, size, size)) @@ -44,7 +45,8 @@ def reset(self): done, return state ''' self.state_ = gogame.init_state(self.size) - self.history = [] + if self.history is not None: + self.history = [] self.done = False return np.copy(self.state_) @@ -63,7 +65,8 @@ def step(self, action): self.old_state = self.state() self.state_ = gogame.next_state(self.state_, action, canonical=False, history=self.history) - self.history.append(self.old_state) + if self.history is not None: + self.history.append(self.old_state) self.done = gogame.game_ended(self.state_) return np.copy(self.state_), self.reward(), self.done, self.info() diff --git a/gym_go/tests/test_invalid_moves.py b/gym_go/tests/test_invalid_moves.py index b86d1af..0896e34 100644 --- a/gym_go/tests/test_invalid_moves.py +++ b/gym_go/tests/test_invalid_moves.py @@ -170,7 +170,7 @@ def test_invalid_super_ko_move(self): :return: """ - self.env = gym.make('go-v0', size=2, reward_method='real') + self.env = gym.make('go-v0', size=2, super_ko=True, reward_method='real') self.env.reset() for move in [(0, 0), (1, 1), (1, 0), (0, 1), (0, 0), (1, 0)]: @@ -193,6 +193,24 @@ def test_invalid_super_ko_move(self): with self.assertRaises(Exception): self.env.step(final_move) + def test_valid_when_super_ko_disabled(self): + self.env = gym.make('go-v0', size=2, super_ko=False, reward_method='real') + self.env.reset() + + for move in [(0, 0), (1, 1), (1, 0), (0, 1), (0, 0), (1, 0)]: + state, reward, done, info = self.env.step(move) + + # Test invalid channel + self.assertEqual( + np.count_nonzero(state[govars.INVD_CHNL]), + 3, + state[govars.INVD_CHNL] + ) + self.assertEqual(np.count_nonzero(state[govars.INVD_CHNL] == 1), 3) + self.assertEqual(state[govars.INVD_CHNL, 0, 0], 0) + + self.env.step((0, 0)) + def test_invalid_game_already_over_move(self): self.env.step(None) self.env.step(None) diff --git a/gym_go/tests/test_super_ko.py b/gym_go/tests/test_super_ko.py index 4af2f8b..9f24747 100644 --- a/gym_go/tests/test_super_ko.py +++ b/gym_go/tests/test_super_ko.py @@ -10,7 +10,7 @@ class TestGoEnvSuperKo(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.env = gym.make('go-v0', size=2) + self.env = gym.make('go-v0', size=2, super_ko=True) def setUp(self): self.env.reset() @@ -18,16 +18,34 @@ def setUp(self): def test_initial_history(self): self.assertEqual(self.env.history, []) + def test_initial_history_no_super_ko(self): + self.env = gym.make('go-v0', size=2, super_ko=False) + self.assertEqual(self.env.history, None) + def test_step_builds_history(self): self.env.step((0, 0)) self.assertEqual(len(self.env.history), 1) + def test_step_ignores_history_no_super_ko(self): + self.env = gym.make('go-v0', size=2, super_ko=False) + self.env.reset() + self.env.step((0, 0)) + self.assertEqual(self.env.history, None) + def test_reset_clears_history(self): self.env.step((0, 0)) self.assertNotEqual(self.env.history, []) self.env.reset() self.assertEqual(self.env.history, []) + def test_reset_clears_history_no_super_ko(self): + self.env = gym.make('go-v0', size=2, super_ko=False) + self.env.reset() + self.env.step((0, 0)) + self.assertEqual(self.env.history, None) + self.env.reset() + self.assertEqual(self.env.history, None) + def test_invalid_moves(self): """Given an empty board and a history with a move, that same move should be invalid""" state = gogame.init_state(2) From 363d51b5f640532d35bb007fd1b3e3c9e995d72e Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Tue, 3 May 2022 16:51:32 +1000 Subject: [PATCH 16/18] Run efficiency tests both with and without super ko --- gym_go/tests/efficiency.py | 43 +++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/gym_go/tests/efficiency.py b/gym_go/tests/efficiency.py index 392ca11..16a4ea1 100644 --- a/gym_go/tests/efficiency.py +++ b/gym_go/tests/efficiency.py @@ -11,10 +11,32 @@ class Efficiency(unittest.TestCase): boardsize = 9 iterations = 64 - def setUp(self) -> None: + def testOrderedTrajs(self): self.env = gym.make('go-v0', size=self.boardsize, reward_method='real') + self.doOrderedTrajs() - def testOrderedTrajs(self): + def testOrderedTrajsSuperKo(self): + self.env = gym.make('go-v0', size=self.boardsize, reward_method='real', super_ko=True) + self.doOrderedTrajs('super ko') + + def testLowerBound(self): + self.env = gym.make('go-v0', size=self.boardsize, reward_method='real') + self.doLowerBound() + + def testLowerBoundSuperKo(self): + self.env = gym.make('go-v0', size=self.boardsize, reward_method='real', super_ko=True) + self.doLowerBound('super ko') + + def testRandTrajsWithChildren(self): + self.env = gym.make('go-v0', size=self.boardsize, reward_method='real') + self.doRandTrajsWithChildren() + + def testRandTrajsWithChildrenSuperKo(self): + self.env = gym.make('go-v0', size=self.boardsize, reward_method='real', super_ko=True) + self.doRandTrajsWithChildren('super ko') + + + def doOrderedTrajs(self, msg=''): durs = [] for _ in tqdm(range(self.iterations)): start = time.time() @@ -28,9 +50,12 @@ def testOrderedTrajs(self): avg_time = np.mean(durs) std_time = np.std(durs) - print(f"Ordered Trajs: {avg_time:.3f} AVG, {std_time:.3f} STD", flush=True) + if msg != '': + msg = f' ({msg})' + print(f"Ordered Trajs{msg}: {avg_time:.3f} AVG, {std_time:.3f} STD", flush=True) - def testLowerBound(self): + + def doLowerBound(self, msg=''): durs = [] for _ in tqdm(range(self.iterations)): start = time.time() @@ -52,9 +77,11 @@ def testLowerBound(self): avg_time = np.mean(durs) std_time = np.std(durs) - print(f"Lower bound: {avg_time:.3f} AVG, {std_time:.3f} STD", flush=True) + if msg != '': + msg = f' ({msg})' + print(f"Lower bound{msg}: {avg_time:.3f} AVG, {std_time:.3f} STD", flush=True) - def testRandTrajsWithChildren(self): + def doRandTrajsWithChildren(self, msg=''): durs = [] num_steps = [] for _ in tqdm(range(self.iterations)): @@ -84,7 +111,9 @@ def testRandTrajsWithChildren(self): avg_time = np.mean(durs) std_time = np.std(durs) avg_steps = np.mean(num_steps) - print(f"Rand Trajs w/ Children: {avg_time:.3f} AVG SEC, {std_time:.3f} STD SEC, {avg_steps:.1f} AVG STEPS", + if msg != '': + msg = f' ({msg})' + print(f"Rand Trajs w/ Children{msg}: {avg_time:.3f} AVG SEC, {std_time:.3f} STD SEC, {avg_steps:.1f} AVG STEPS", flush=True) From b81c3e7c7f84569ec002e9040d34abad56dea45f Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Tue, 3 May 2022 16:51:51 +1000 Subject: [PATCH 17/18] Remove trailing whitespace in README --- README.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index d0bd5e7..a6e1307 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # About -An environment for the board game Go. It is implemented using OpenAI's Gym API. +An environment for the board game Go. It is implemented using OpenAI's Gym API. It is also optimized to be as efficient as possible in order to efficiently train ML models. # Installation @@ -25,7 +25,7 @@ go_env.render('terminal') ``` ``` - 0 1 2 3 4 5 6 + 0 1 2 3 4 5 6 0 ╔═╤═╤═╤═╤═╤═╗ 1 ╟─┼─┼─┼─┼─┼─╢ 2 ╟─┼─┼─┼─┼─○─╢ @@ -43,7 +43,7 @@ go_env.render('terminal') ``` ``` - 0 1 2 3 4 5 6 + 0 1 2 3 4 5 6 0 ╔═╤═╤═╤═╤═╤═╗ 1 ╟─┼─┼─┼─┼─┼─╢ 2 ╟─┼─┼─┼─┼─○─╢ @@ -64,21 +64,21 @@ python3 demo.py ![alt text](screenshots/human_ui.png) ### High level API -[GoEnv](gym_go/envs/go_env.py) defines the Gym environment for Go. -It contains the highest level API for basic Go usage. +[GoEnv](gym_go/envs/go_env.py) defines the Gym environment for Go. +It contains the highest level API for basic Go usage. ### Low level API [GoGame](gym_go/gogame.py) is the set of low-level functions that defines all the game logic of Go. `GoEnv`'s high level API is built on `GoGame`. -These sets of functions are intended for a more detailed and finetuned +These sets of functions are intended for a more detailed and finetuned usage of Go. # Scoring -We use Trump Taylor scoring, a simple area scoring, to determine the winner. A player's _area_ is defined as the number of empty points a -player's pieces surround plus the number of player's pieces on the board. The _winner_ is the player with the larger +We use Trump Taylor scoring, a simple area scoring, to determine the winner. A player's _area_ is defined as the number of empty points a +player's pieces surround plus the number of player's pieces on the board. The _winner_ is the player with the larger area (a game is tied if both players have an equal amount of area on the board). -There is also support for `komi`, a bias score constant to balance the advantage of black going first. +There is also support for `komi`, a bias score constant to balance the advantage of black going first. By default `komi` is set to 0. # Game ending @@ -92,16 +92,16 @@ Reward methods are in _black_'s perspective * `0` - Game is tied * `1` - Black won * `0` - Otherwise -* **Heuristic**: If the game is ongoing, the reward is `black area - white area`. -If black won, the reward is `BOARD_SIZE**2`. +* **Heuristic**: If the game is ongoing, the reward is `black area - white area`. +If black won, the reward is `BOARD_SIZE**2`. If white won, the reward is `-BOARD_SIZE**2`. If tied, the reward is `0`. # State -The `state` object that is returned by the `reset` and `step` functions of the environment is a -`6 x BOARD_SIZE x BOARD_SIZE` numpy array. All values in the array are either `0` or `1` +The `state` object that is returned by the `reset` and `step` functions of the environment is a +`6 x BOARD_SIZE x BOARD_SIZE` numpy array. All values in the array are either `0` or `1` * **First and second channel:** represent the black and white pieces respectively. -* **Third channel:** Indicator layer for whose turn it is +* **Third channel:** Indicator layer for whose turn it is * **Fourth channel:** Invalid moves (including ko-protection) for the next action * **Fifth channel:** Indicator layer for whether the previous move was a pass * **Sixth channel:** Indicator layer for whether the game is over From 6fc7d2c0124deda92f7ef7bd531ad7d629d06721 Mon Sep 17 00:00:00 2001 From: Rohan Mitchell Date: Tue, 3 May 2022 17:01:53 +1000 Subject: [PATCH 18/18] Document super ko in README --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index a6e1307..cc61c12 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,17 @@ area (a game is tied if both players have an equal amount of area on the board). There is also support for `komi`, a bias score constant to balance the advantage of black going first. By default `komi` is set to 0. +# Ko and super ko +The game supports a simple implementation of the ko rule by default, which prevents single move take-back scenarios. In addition, an optional +super ko rule can be enabled when initializing the gym: + +```python +go_env = gym.make('go-v0', size=7, super_ko=True) +``` + +This rule implements positional super ko by tracking play history, which catches repeating positions not detected by the regular ko rule +at the price of a performance overhead. + # Game ending A game ends when both players pass consecutively