From c5f4f9526af8f8aa057ead5ed623b04e189ca44e Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Sun, 1 May 2022 10:48:41 +1000
Subject: [PATCH 01/18] Explicitly specify gym version 0.20.0

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index d33850a..5c0f3d4 100644
--- a/setup.py
+++ b/setup.py
@@ -3,5 +3,5 @@
 setup(
     name='gym_go',
     version='0.0.1',
-    install_requires=['gym']  # and other dependencies
+    install_requires=['gym==0.20.0']  # and other dependencies
 )

From 53ffc843eb08ff25224bf27f824ab293266e25b6 Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Sun, 1 May 2022 12:02:56 +1000
Subject: [PATCH 02/18] Bump gym version to 0.21.0

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 5c0f3d4..85f9319 100644
--- a/setup.py
+++ b/setup.py
@@ -3,5 +3,5 @@
 setup(
     name='gym_go',
     version='0.0.1',
-    install_requires=['gym==0.20.0']  # and other dependencies
+    install_requires=['gym==0.21.0']  # and other dependencies
 )

From 6eca9a8ca2a0cc9271b0a6206e7c20b7b79cfd6a Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Sun, 1 May 2022 12:04:09 +1000
Subject: [PATCH 03/18] Always reset environment before use

---
 README.md                          | 1 +
 demo.py                            | 1 +
 gym_go/tests/test_basics.py        | 3 +++
 gym_go/tests/test_invalid_moves.py | 2 ++
 4 files changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 943e08d..4059f3a 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ pip install -e .
 import gym
 
 go_env = gym.make('gym_go:go-v0', size=7, komi=0, reward_method='real')
+go_env.reset()
 
 first_action = (2,5)
 second_action = (5,2)
diff --git a/demo.py b/demo.py
index 9c9d2d5..39615f3 100644
--- a/demo.py
+++ b/demo.py
@@ -10,6 +10,7 @@
 
 # Initialize environment
 go_env = gym.make('gym_go:go-v0', size=args.boardsize, komi=args.komi)
+go_env.reset()
 
 # Game loop
 done = False
diff --git a/gym_go/tests/test_basics.py b/gym_go/tests/test_basics.py
index 307ed1f..abc4945 100644
--- a/gym_go/tests/test_basics.py
+++ b/gym_go/tests/test_basics.py
@@ -174,6 +174,7 @@ def test_num_liberties(self):
 
     def test_komi(self):
         env = gym.make('gym_go:go-v0', size=7, komi=2.5, reward_method='real')
+        env.reset()
 
         # White win
         _ = env.step(None)
@@ -224,6 +225,7 @@ def test_children(self):
 
     def test_real_reward(self):
         env = gym.make('gym_go:go-v0', size=7, reward_method='real')
+        env.reset()
 
         # In game
         state, reward, done, info = env.step((0, 0))
@@ -259,6 +261,7 @@ def test_real_reward(self):
 
     def test_heuristic_reward(self):
         env = gym.make('gym_go:go-v0', size=7, reward_method='heuristic')
+        env.reset()
 
         # In game
         state, reward, done, info = env.step((0, 0))
diff --git a/gym_go/tests/test_invalid_moves.py b/gym_go/tests/test_invalid_moves.py
index 3693bbe..809d94f 100644
--- a/gym_go/tests/test_invalid_moves.py
+++ b/gym_go/tests/test_invalid_moves.py
@@ -186,6 +186,7 @@ def test_small_suicide(self):
         """
 
         self.env = gym.make('gym_go:go-v0', size=3, reward_method='real')
+        self.env.reset()
         for move in [6, 7, 8, 5, 4, 8, 0, 1]:
             state, reward, done, info = self.env.step(move)
 
@@ -203,6 +204,7 @@ def test_invalid_after_capture(self):
         """
 
         self.env = gym.make('gym_go:go-v0', size=3, reward_method='real')
+        self.env.reset()
         for move in [0, 8, 6, 4, 1, 2, 3, 7]:
             state, reward, done, info = self.env.step(move)
 

From b4fa9964cb9931af028401f1ed3f5643e833e6c2 Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Sun, 1 May 2022 12:12:01 +1000
Subject: [PATCH 04/18] Bump gym version to 0.22.0

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 85f9319..7771a43 100644
--- a/setup.py
+++ b/setup.py
@@ -3,5 +3,5 @@
 setup(
     name='gym_go',
     version='0.0.1',
-    install_requires=['gym==0.21.0']  # and other dependencies
+    install_requires=['gym==0.22.0']  # and other dependencies
 )

From 1636be69f6613d88adfc075720548cafbf6a8308 Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Sun, 1 May 2022 12:13:33 +1000
Subject: [PATCH 05/18] Explicitly require gym_go

gym.make no longer does this for us
---
 README.md                          |  3 ++-
 demo.py                            |  3 ++-
 gym_go/tests/efficiency.py         |  3 ++-
 gym_go/tests/test_basics.py        | 14 +++++++-------
 gym_go/tests/test_invalid_moves.py |  6 +++---
 gym_go/tests/test_valid_moves.py   |  2 +-
 6 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 4059f3a..d0bd5e7 100644
--- a/README.md
+++ b/README.md
@@ -13,8 +13,9 @@ pip install -e .
 ### Coding example
 ```python
 import gym
+import gym_go
 
-go_env = gym.make('gym_go:go-v0', size=7, komi=0, reward_method='real')
+go_env = gym.make('go-v0', size=7, komi=0, reward_method='real')
 go_env.reset()
 
 first_action = (2,5)
diff --git a/demo.py b/demo.py
index 39615f3..7d6a05f 100644
--- a/demo.py
+++ b/demo.py
@@ -1,6 +1,7 @@
 import argparse
 
 import gym
+import gym_go
 
 # Arguments
 parser = argparse.ArgumentParser(description='Demo Go Environment')
@@ -9,7 +10,7 @@
 args = parser.parse_args()
 
 # Initialize environment
-go_env = gym.make('gym_go:go-v0', size=args.boardsize, komi=args.komi)
+go_env = gym.make('go-v0', size=args.boardsize, komi=args.komi)
 go_env.reset()
 
 # Game loop
diff --git a/gym_go/tests/efficiency.py b/gym_go/tests/efficiency.py
index 7b179b7..392ca11 100644
--- a/gym_go/tests/efficiency.py
+++ b/gym_go/tests/efficiency.py
@@ -2,6 +2,7 @@
 import unittest
 
 import gym
+import gym_go
 import numpy as np
 from tqdm import tqdm
 
@@ -11,7 +12,7 @@ class Efficiency(unittest.TestCase):
     iterations = 64
 
     def setUp(self) -> None:
-        self.env = gym.make('gym_go:go-v0', size=self.boardsize, reward_method='real')
+        self.env = gym.make('go-v0', size=self.boardsize, reward_method='real')
 
     def testOrderedTrajs(self):
         durs = []
diff --git a/gym_go/tests/test_basics.py b/gym_go/tests/test_basics.py
index abc4945..e015460 100644
--- a/gym_go/tests/test_basics.py
+++ b/gym_go/tests/test_basics.py
@@ -10,13 +10,13 @@ class TestGoEnvBasics(unittest.TestCase):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.env = gym.make('gym_go:go-v0', size=7, reward_method='real')
+        self.env = gym.make('go-v0', size=7, reward_method='real')
 
     def setUp(self):
         self.env.reset()
 
     def test_state(self):
-        env = gym.make('gym_go:go-v0', size=7)
+        env = gym.make('go-v0', size=7)
         state = env.reset()
         self.assertIsInstance(state, np.ndarray)
         self.assertEqual(state.shape[0], govars.NUM_CHNLS)
@@ -27,7 +27,7 @@ def test_board_sizes(self):
         expected_sizes = [7, 13, 19]
 
         for expec_size in expected_sizes:
-            env = gym.make('gym_go:go-v0', size=expec_size)
+            env = gym.make('go-v0', size=expec_size)
             state = env.reset()
             self.assertEqual(state.shape[1], expec_size)
             self.assertEqual(state.shape[2], expec_size)
@@ -150,7 +150,7 @@ def test_game_does_not_end_with_disjoint_passes(self):
         self.assertFalse(done)
 
     def test_num_liberties(self):
-        env = gym.make('gym_go:go-v0', size=7)
+        env = gym.make('go-v0', size=7)
 
         steps = [(0, 0), (0, 1)]
         libs = [(2, 0), (1, 2)]
@@ -173,7 +173,7 @@ def test_num_liberties(self):
             self.assertEqual(whitelibs, libs[1], state)
 
     def test_komi(self):
-        env = gym.make('gym_go:go-v0', size=7, komi=2.5, reward_method='real')
+        env = gym.make('go-v0', size=7, komi=2.5, reward_method='real')
         env.reset()
 
         # White win
@@ -224,7 +224,7 @@ def test_children(self):
                     self.assertTrue((children[a] == 0).all())
 
     def test_real_reward(self):
-        env = gym.make('gym_go:go-v0', size=7, reward_method='real')
+        env = gym.make('go-v0', size=7, reward_method='real')
         env.reset()
 
         # In game
@@ -260,7 +260,7 @@ def test_real_reward(self):
         env.close()
 
     def test_heuristic_reward(self):
-        env = gym.make('gym_go:go-v0', size=7, reward_method='heuristic')
+        env = gym.make('go-v0', size=7, reward_method='heuristic')
         env.reset()
 
         # In game
diff --git a/gym_go/tests/test_invalid_moves.py b/gym_go/tests/test_invalid_moves.py
index 809d94f..2bb85ca 100644
--- a/gym_go/tests/test_invalid_moves.py
+++ b/gym_go/tests/test_invalid_moves.py
@@ -11,7 +11,7 @@ class TestGoEnvInvalidMoves(unittest.TestCase):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.env = gym.make('gym_go:go-v0', size=7, reward_method='real')
+        self.env = gym.make('go-v0', size=7, reward_method='real')
 
     def setUp(self):
         self.env.reset()
@@ -185,7 +185,7 @@ def test_small_suicide(self):
         :return:
         """
 
-        self.env = gym.make('gym_go:go-v0', size=3, reward_method='real')
+        self.env = gym.make('go-v0', size=3, reward_method='real')
         self.env.reset()
         for move in [6, 7, 8, 5, 4, 8, 0, 1]:
             state, reward, done, info = self.env.step(move)
@@ -203,7 +203,7 @@ def test_invalid_after_capture(self):
         :return:
         """
 
-        self.env = gym.make('gym_go:go-v0', size=3, reward_method='real')
+        self.env = gym.make('go-v0', size=3, reward_method='real')
         self.env.reset()
         for move in [0, 8, 6, 4, 1, 2, 3, 7]:
             state, reward, done, info = self.env.step(move)
diff --git a/gym_go/tests/test_valid_moves.py b/gym_go/tests/test_valid_moves.py
index 0e54064..a3806b5 100644
--- a/gym_go/tests/test_valid_moves.py
+++ b/gym_go/tests/test_valid_moves.py
@@ -10,7 +10,7 @@ class TestGoEnvValidMoves(unittest.TestCase):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.env = gym.make('gym_go:go-v0', size=7, reward_method='real')
+        self.env = gym.make('go-v0', size=7, reward_method='real')
 
     def setUp(self):
         self.env.reset()

From 83648efe9a6524c7fb2a13e4b6bad9f18720f88c Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Sun, 1 May 2022 12:21:44 +1000
Subject: [PATCH 06/18] Bump gym version to 0.23.1

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7771a43..c86abb7 100644
--- a/setup.py
+++ b/setup.py
@@ -3,5 +3,5 @@
 setup(
     name='gym_go',
     version='0.0.1',
-    install_requires=['gym==0.22.0']  # and other dependencies
+    install_requires=['gym==0.23.1']  # and other dependencies
 )

From 9c7841f58cf708f7d5e78c6e5a08439b6448a80e Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Sun, 1 May 2022 12:32:40 +1000
Subject: [PATCH 07/18] Stop using deprecated scipy.ndimage.measurements
 namespace

---
 gym_go/gogame.py      |  2 +-
 gym_go/state_utils.py | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/gym_go/gogame.py b/gym_go/gogame.py
index 4d68339..4d36d7e 100644
--- a/gym_go/gogame.py
+++ b/gym_go/gogame.py
@@ -280,7 +280,7 @@ def areas(state):
     all_pieces = np.sum(state[[govars.BLACK, govars.WHITE]], axis=0)
     empties = 1 - all_pieces
 
-    empty_labels, num_empty_areas = ndimage.measurements.label(empties)
+    empty_labels, num_empty_areas = ndimage.label(empties)
 
     black_area, white_area = np.sum(state[govars.BLACK]), np.sum(state[govars.WHITE])
     for label in range(1, num_empty_areas + 1):
diff --git a/gym_go/state_utils.py b/gym_go/state_utils.py
index 913ed55..1ded092 100644
--- a/gym_go/state_utils.py
+++ b/gym_go/state_utils.py
@@ -1,6 +1,5 @@
 import numpy as np
 from scipy import ndimage
-from scipy.ndimage import measurements
 
 from gym_go import govars
 
@@ -45,8 +44,8 @@ def compute_invalid_moves(state, player, ko_protect=None):
     definite_valids_array = np.zeros(state.shape[1:])
 
     # Get all groups
-    all_own_groups, num_own_groups = measurements.label(state[player])
-    all_opp_groups, num_opp_groups = measurements.label(state[1 - player])
+    all_own_groups, num_own_groups = ndimage.label(state[player])
+    all_opp_groups, num_opp_groups = ndimage.label(state[1 - player])
     expanded_own_groups = np.zeros((num_own_groups, *state.shape[1:]))
     expanded_opp_groups = np.zeros((num_opp_groups, *state.shape[1:]))
 
@@ -108,8 +107,8 @@ def batch_compute_invalid_moves(batch_state, batch_player, batch_ko_protect):
     batch_definite_valids_array = np.zeros(batch_state.shape[:1] + batch_state.shape[2:])
 
     # Get all groups
-    batch_all_own_groups, _ = measurements.label(batch_state[batch_idcs, batch_player], group_struct)
-    batch_all_opp_groups, _ = measurements.label(batch_state[batch_idcs, 1 - batch_player], group_struct)
+    batch_all_own_groups, _ = ndimage.label(batch_state[batch_idcs, batch_player], group_struct)
+    batch_all_opp_groups, _ = ndimage.label(batch_state[batch_idcs, 1 - batch_player], group_struct)
 
     batch_data = enumerate(zip(batch_all_own_groups, batch_all_opp_groups, batch_empties))
     for i, (all_own_groups, all_opp_groups, empties) in batch_data:
@@ -163,7 +162,7 @@ def update_pieces(state, adj_locs, player):
     all_pieces = np.sum(state[[govars.BLACK, govars.WHITE]], axis=0)
     empties = 1 - all_pieces
 
-    all_opp_groups, _ = ndimage.measurements.label(state[opponent])
+    all_opp_groups, _ = ndimage.label(state[opponent])
 
     # Go through opponent groups
     all_adj_labels = all_opp_groups[adj_locs[:, 0], adj_locs[:, 1]]
@@ -187,7 +186,7 @@ def batch_update_pieces(batch_non_pass, batch_state, batch_adj_locs, batch_playe
     batch_all_pieces = np.sum(batch_state[:, [govars.BLACK, govars.WHITE]], axis=1)
     batch_empties = 1 - batch_all_pieces
 
-    batch_all_opp_groups, _ = ndimage.measurements.label(batch_state[batch_non_pass, batch_opponent],
+    batch_all_opp_groups, _ = ndimage.label(batch_state[batch_non_pass, batch_opponent],
                                                          group_struct)
 
     batch_data = enumerate(zip(batch_all_opp_groups, batch_all_pieces, batch_empties, batch_adj_locs, batch_opponent))

From be8f0c96cb6430e514676949795bfd3e22ab66ec Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Sun, 1 May 2022 12:35:07 +1000
Subject: [PATCH 08/18] Use built-in types (np.int and np.bool are deprecated)

---
 gym_go/gogame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gym_go/gogame.py b/gym_go/gogame.py
index 4d36d7e..de3d9af 100644
--- a/gym_go/gogame.py
+++ b/gym_go/gogame.py
@@ -247,7 +247,7 @@ def turn(state):
 
 
 def batch_turn(batch_state):
-    return np.max(batch_state[:, govars.TURN_CHNL], axis=(1, 2)).astype(np.int)
+    return np.max(batch_state[:, govars.TURN_CHNL], axis=(1, 2)).astype(int)
 
 
 def liberties(state: np.ndarray):
@@ -258,7 +258,7 @@ def liberties(state: np.ndarray):
     liberty_list = []
     for player_pieces in [blacks, whites]:
         liberties = ndimage.binary_dilation(player_pieces, state_utils.surround_struct)
-        liberties *= (1 - all_pieces).astype(np.bool)
+        liberties *= (1 - all_pieces).astype(bool)
         liberty_list.append(liberties)
 
     return liberty_list[0], liberty_list[1]

From 5f019556c53269104915b00cd1c775c545c3906a Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Sun, 1 May 2022 14:44:29 +1000
Subject: [PATCH 09/18] Keep track of game state history in env

---
 gym_go/envs/go_env.py         |  4 ++++
 gym_go/tests/test_super_ko.py | 29 +++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 gym_go/tests/test_super_ko.py

diff --git a/gym_go/envs/go_env.py b/gym_go/envs/go_env.py
index b2d4d66..f66a09d 100644
--- a/gym_go/envs/go_env.py
+++ b/gym_go/envs/go_env.py
@@ -31,6 +31,7 @@ def __init__(self, size, komi=0, reward_method='real'):
         self.size = size
         self.komi = komi
         self.state_ = gogame.init_state(size)
+        self.history = []
         self.reward_method = RewardMethod(reward_method)
         self.observation_space = gym.spaces.Box(np.float32(0), np.float32(govars.NUM_CHNLS),
                                                 shape=(govars.NUM_CHNLS, size, size))
@@ -43,6 +44,7 @@ def reset(self):
         done, return state
         '''
         self.state_ = gogame.init_state(self.size)
+        self.history = []
         self.done = False
         return np.copy(self.state_)
 
@@ -59,7 +61,9 @@ def step(self, action):
         elif action is None:
             action = self.size ** 2
 
+        self.old_state = self.state()
         self.state_ = gogame.next_state(self.state_, action, canonical=False)
+        self.history.append(self.old_state)
         self.done = gogame.game_ended(self.state_)
         return np.copy(self.state_), self.reward(), self.done, self.info()
 
diff --git a/gym_go/tests/test_super_ko.py b/gym_go/tests/test_super_ko.py
new file mode 100644
index 0000000..b7471df
--- /dev/null
+++ b/gym_go/tests/test_super_ko.py
@@ -0,0 +1,29 @@
+import unittest
+
+import gym
+import gym_go
+
+class TestGoEnvSuperKo(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.env = gym.make('go-v0', size=2)
+
+    def setUp(self):
+        self.env.reset()
+
+    def test_initial_history(self):
+        self.assertEqual(self.env.history, [])
+
+    def test_step_builds_history(self):
+        self.env.step((0, 0))
+        self.assertEqual(len(self.env.history), 1)
+
+    def test_reset_clears_history(self):
+        self.env.step((0, 0))
+        self.assertNotEqual(self.env.history, [])
+        self.env.reset()
+        self.assertEqual(self.env.history, [])
+
+
+if __name__ == '__main__':
+    unittest.main()

From ec53fd00d038e847922ea8f9c25a45f721f96161 Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Sun, 1 May 2022 14:58:05 +1000
Subject: [PATCH 10/18] Pass state history to (batch)_compute_invalid_moves

---
 gym_go/envs/go_env.py | 2 +-
 gym_go/gogame.py      | 8 ++++----
 gym_go/state_utils.py | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/gym_go/envs/go_env.py b/gym_go/envs/go_env.py
index f66a09d..b0bfdd4 100644
--- a/gym_go/envs/go_env.py
+++ b/gym_go/envs/go_env.py
@@ -62,7 +62,7 @@ def step(self, action):
             action = self.size ** 2
 
         self.old_state = self.state()
-        self.state_ = gogame.next_state(self.state_, action, canonical=False)
+        self.state_ = gogame.next_state(self.state_, action, canonical=False, history=self.history)
         self.history.append(self.old_state)
         self.done = gogame.game_ended(self.state_)
         return np.copy(self.state_), self.reward(), self.done, self.info()
diff --git a/gym_go/gogame.py b/gym_go/gogame.py
index de3d9af..e62ff3e 100644
--- a/gym_go/gogame.py
+++ b/gym_go/gogame.py
@@ -31,7 +31,7 @@ def batch_init_state(batch_size, board_size):
     return batch_state
 
 
-def next_state(state, action1d, canonical=False):
+def next_state(state, action1d, canonical=False, history=None):
     # Deep copy the state to modify
     state = np.copy(state)
 
@@ -75,7 +75,7 @@ def next_state(state, action1d, canonical=False):
                 ko_protect = killed_group[0]
 
     # Update invalid moves
-    state[govars.INVD_CHNL] = state_utils.compute_invalid_moves(state, player, ko_protect)
+    state[govars.INVD_CHNL] = state_utils.compute_invalid_moves(state, player, ko_protect, history)
 
     # Switch turn
     state_utils.set_turn(state)
@@ -87,7 +87,7 @@ def next_state(state, action1d, canonical=False):
     return state
 
 
-def batch_next_states(batch_states, batch_action1d, canonical=False):
+def batch_next_states(batch_states, batch_action1d, canonical=False, batch_histories=None):
     # Deep copy the state to modify
     batch_states = np.copy(batch_states)
 
@@ -138,7 +138,7 @@ def batch_next_states(batch_states, batch_action1d, canonical=False):
 
     # Update invalid moves
     batch_states[:, govars.INVD_CHNL] = state_utils.batch_compute_invalid_moves(batch_states, batch_players,
-                                                                                batch_ko_protect)
+                                                                                batch_ko_protect, batch_histories)
 
     # Switch turn
     state_utils.batch_set_turn(batch_states)
diff --git a/gym_go/state_utils.py b/gym_go/state_utils.py
index 1ded092..d6dc9a4 100644
--- a/gym_go/state_utils.py
+++ b/gym_go/state_utils.py
@@ -20,7 +20,7 @@
 neighbor_deltas = np.array([[-1, 0], [1, 0], [0, -1], [0, 1]])
 
 
-def compute_invalid_moves(state, player, ko_protect=None):
+def compute_invalid_moves(state, player, ko_protect=None, history=None):
     """
     Updates invalid moves in the OPPONENT's perspective
     1.) Opponent cannot move at a location
@@ -82,7 +82,7 @@ def compute_invalid_moves(state, player, ko_protect=None):
     return invalid_moves > 0
 
 
-def batch_compute_invalid_moves(batch_state, batch_player, batch_ko_protect):
+def batch_compute_invalid_moves(batch_state, batch_player, batch_ko_protect, batch_history=None):
     """
     Updates invalid moves in the OPPONENT's perspective
     1.) Opponent cannot move at a location

From 760a17e5f3c8e7e6aade8928fa52d096d136a65e Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Tue, 3 May 2022 11:48:41 +1000
Subject: [PATCH 11/18] Switch turn before calculating invalid moves

---
 gym_go/gogame.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gym_go/gogame.py b/gym_go/gogame.py
index e62ff3e..40adf9b 100644
--- a/gym_go/gogame.py
+++ b/gym_go/gogame.py
@@ -74,12 +74,12 @@ def next_state(state, action1d, canonical=False, history=None):
             if len(killed_group) == 1:
                 ko_protect = killed_group[0]
 
-    # Update invalid moves
-    state[govars.INVD_CHNL] = state_utils.compute_invalid_moves(state, player, ko_protect, history)
-
     # Switch turn
     state_utils.set_turn(state)
 
+    # Update invalid moves
+    state[govars.INVD_CHNL] = state_utils.compute_invalid_moves(state, player, ko_protect, history)
+
     if canonical:
         # Set canonical form
         state = canonical_form(state)
@@ -136,13 +136,13 @@ def batch_next_states(batch_states, batch_action1d, canonical=False, batch_histo
             if len(killed_group) == 1:
                 batch_ko_protect[batch_non_pass[i]] = killed_group[0]
 
+    # Switch turn
+    state_utils.batch_set_turn(batch_states)
+
     # Update invalid moves
     batch_states[:, govars.INVD_CHNL] = state_utils.batch_compute_invalid_moves(batch_states, batch_players,
                                                                                 batch_ko_protect, batch_histories)
 
-    # Switch turn
-    state_utils.batch_set_turn(batch_states)
-
     if canonical:
         # Set canonical form
         batch_states = batch_canonical_form(batch_states)

From 5167076c766990d72ab9d87d171d5da710bd8d11 Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Tue, 3 May 2022 12:16:08 +1000
Subject: [PATCH 12/18] When game is over, all moves should be invalid

---
 gym_go/gogame.py                   | 2 +-
 gym_go/tests/test_invalid_moves.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gym_go/gogame.py b/gym_go/gogame.py
index 40adf9b..3fdbcd9 100644
--- a/gym_go/gogame.py
+++ b/gym_go/gogame.py
@@ -153,7 +153,7 @@ def batch_next_states(batch_states, batch_action1d, canonical=False, batch_histo
 def invalid_moves(state):
     # return a fixed size binary vector
     if game_ended(state):
-        return np.zeros(action_size(state))
+        return np.ones(action_size(state))
     return np.append(state[govars.INVD_CHNL].flatten(), 0)
 
 
diff --git a/gym_go/tests/test_invalid_moves.py b/gym_go/tests/test_invalid_moves.py
index 2bb85ca..2464459 100644
--- a/gym_go/tests/test_invalid_moves.py
+++ b/gym_go/tests/test_invalid_moves.py
@@ -4,7 +4,7 @@
 import gym
 import numpy as np
 
-from gym_go import govars
+from gym_go import govars, gogame
 
 
 class TestGoEnvInvalidMoves(unittest.TestCase):
@@ -175,6 +175,8 @@ def test_invalid_game_already_over_move(self):
         with self.assertRaises(Exception):
             self.env.step((0, 0))
 
+        self.assertTrue((gogame.invalid_moves(self.env.state()) == 1).all())
+
     def test_small_suicide(self):
         """
         7,   8,   0,

From 6a5c98c54f541c3959a504f12b60ff4d25269724 Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Mon, 2 May 2022 09:09:36 +1000
Subject: [PATCH 13/18] Super-ko for compute_invalid_moves

---
 gym_go/state_utils.py              | 25 +++++++++++++++++++++-
 gym_go/tests/test_invalid_moves.py | 33 ++++++++++++++++++++++++++++++
 gym_go/tests/test_super_ko.py      | 10 +++++++++
 3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/gym_go/state_utils.py b/gym_go/state_utils.py
index d6dc9a4..e1e2f52 100644
--- a/gym_go/state_utils.py
+++ b/gym_go/state_utils.py
@@ -1,7 +1,7 @@
 import numpy as np
 from scipy import ndimage
 
-from gym_go import govars
+from gym_go import govars, gogame
 
 group_struct = np.array([[[0, 0, 0],
                           [0, 0, 0],
@@ -41,6 +41,7 @@ def compute_invalid_moves(state, player, ko_protect=None, history=None):
 
     # Setup invalid and valid arrays
     possible_invalid_array = np.zeros(state.shape[1:])
+    super_ko_invalid_array = np.zeros(state.shape[1:])
     definite_valids_array = np.zeros(state.shape[1:])
 
     # Get all groups
@@ -79,6 +80,28 @@ def compute_invalid_moves(state, player, ko_protect=None, history=None):
     # Ko-protection
     if ko_protect is not None:
         invalid_moves[ko_protect[0], ko_protect[1]] = 1
+
+    # Super ko-protection
+    if history is not None and len(history) > 0:
+        # Create a new state with updated invalid moves so we can calculate child moves
+        updated_state = np.copy(state)
+        updated_state[govars.INVD_CHNL] = (invalid_moves > 0)
+
+        children = gogame.children(updated_state)
+        board_size = np.prod(state.shape[1:])
+        children = children[:board_size]
+
+        trunc_history = np.array(history)[:, :2]
+        for action1d, child_state in enumerate(children):
+            # Skip children that don't represent a valid move
+            if (child_state[:2] == 0).all():
+                continue
+            if (trunc_history == child_state[:2]).all(axis=1).all(axis=1).all(axis=1).any():
+                action2d = action1d // state.shape[1:][0], action1d % state.shape[1:][1]
+                super_ko_invalid_array[action2d[0], action2d[1]] = 1
+
+        invalid_moves = invalid_moves + super_ko_invalid_array
+
     return invalid_moves > 0
 
 
diff --git a/gym_go/tests/test_invalid_moves.py b/gym_go/tests/test_invalid_moves.py
index 2464459..b86d1af 100644
--- a/gym_go/tests/test_invalid_moves.py
+++ b/gym_go/tests/test_invalid_moves.py
@@ -160,6 +160,39 @@ def test_invalid_no_liberty_move(self):
         with self.assertRaises(Exception):
             self.env.step(final_move)
 
+
+    def test_invalid_super_ko_move(self):
+        """
+        1/5/7, 3/6,
+
+        4,       2,
+
+        :return:
+        """
+
+        self.env = gym.make('go-v0', size=2, reward_method='real')
+        self.env.reset()
+
+        for move in [(0, 0), (1, 1), (1, 0), (0, 1), (0, 0), (1, 0)]:
+            state, reward, done, info = self.env.step(move)
+
+        # Test invalid channel
+        self.assertEqual(
+            np.count_nonzero(state[govars.INVD_CHNL]),
+            4,
+            state[govars.INVD_CHNL]
+        )
+        self.assertEqual(np.count_nonzero(state[govars.INVD_CHNL] == 1), 4)
+        self.assertEqual(state[govars.INVD_CHNL, 0, 0], 1)
+
+        # Assert pieces channel is empty at ko-protection coordinate
+        self.assertEqual(state[govars.BLACK, 0, 0], 0)
+        self.assertEqual(state[govars.WHITE, 0, 0], 0)
+
+        final_move = (0, 0)
+        with self.assertRaises(Exception):
+            self.env.step(final_move)
+
     def test_invalid_game_already_over_move(self):
         self.env.step(None)
         self.env.step(None)
diff --git a/gym_go/tests/test_super_ko.py b/gym_go/tests/test_super_ko.py
index b7471df..263b152 100644
--- a/gym_go/tests/test_super_ko.py
+++ b/gym_go/tests/test_super_ko.py
@@ -2,6 +2,8 @@
 
 import gym
 import gym_go
+from gym_go import gogame
+from gym_go import state_utils
 
 class TestGoEnvSuperKo(unittest.TestCase):
     def __init__(self, *args, **kwargs):
@@ -24,6 +26,14 @@ def test_reset_clears_history(self):
         self.env.reset()
         self.assertEqual(self.env.history, [])
 
+    def test_invalid_moves(self):
+        """Given an empty board and a history with a move, that same move should be invalid"""
+        state = gogame.init_state(2)
+        history = [gogame.next_state(state, 0)]
+
+        invalid_moves = state_utils.compute_invalid_moves(state, 0, ko_protect=None, history=history)
+
+        self.assertTrue((invalid_moves == [[1, 0], [0, 0]]).all())
 
 if __name__ == '__main__':
     unittest.main()

From 0e5fe0293918435fd55fcac8222b5bca4864903c Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Tue, 3 May 2022 16:22:19 +1000
Subject: [PATCH 14/18] Super-ko for batch_compute_invalid_moves

---
 gym_go/state_utils.py         | 25 +++++++++++++++++++++++++
 gym_go/tests/test_super_ko.py | 16 ++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/gym_go/state_utils.py b/gym_go/state_utils.py
index e1e2f52..3d81c8b 100644
--- a/gym_go/state_utils.py
+++ b/gym_go/state_utils.py
@@ -127,6 +127,7 @@ def batch_compute_invalid_moves(batch_state, batch_player, batch_ko_protect, bat
 
     # Setup invalid and valid arrays
     batch_possible_invalid_array = np.zeros(batch_state.shape[:1] + batch_state.shape[2:])
+    batch_super_ko_invalid_array = np.zeros(batch_state.shape[:1] + batch_state.shape[2:])
     batch_definite_valids_array = np.zeros(batch_state.shape[:1] + batch_state.shape[2:])
 
     # Get all groups
@@ -175,6 +176,30 @@ def batch_compute_invalid_moves(batch_state, batch_player, batch_ko_protect, bat
     for i, ko_protect in enumerate(batch_ko_protect):
         if ko_protect is not None:
             invalid_moves[i, ko_protect[0], ko_protect[1]] = 1
+
+    # Super ko-protection
+    if batch_history is not None:
+        # Create a new state with updated invalid moves so we can calculate child moves
+        updated_states = np.copy(batch_state)
+        updated_states[:, govars.INVD_CHNL] = (invalid_moves > 0)
+
+        board_size = np.prod(batch_state.shape[2:])
+        batch_children = np.array(
+            [gogame.children(s)[:board_size] for s in updated_states]
+        )
+
+        trunc_history = batch_history[:, :, :2]
+        for i, state in enumerate(batch_state):
+            for action1d, child_state in enumerate(batch_children[i]):
+                # Skip children that don't represent a valid move
+                if (child_state[:2] == 0).all():
+                    continue
+                if (trunc_history[i] == child_state[:2]).all(axis=1).all(axis=1).all(axis=1).any():
+                    action2d = action1d // state.shape[1:][0], action1d % state.shape[1:][1]
+                    batch_super_ko_invalid_array[i, action2d[0], action2d[1]] = 1
+
+        invalid_moves = invalid_moves + batch_super_ko_invalid_array
+
     return invalid_moves > 0
 
 
diff --git a/gym_go/tests/test_super_ko.py b/gym_go/tests/test_super_ko.py
index 263b152..4af2f8b 100644
--- a/gym_go/tests/test_super_ko.py
+++ b/gym_go/tests/test_super_ko.py
@@ -1,5 +1,7 @@
 import unittest
 
+import numpy as np
+
 import gym
 import gym_go
 from gym_go import gogame
@@ -35,5 +37,19 @@ def test_invalid_moves(self):
 
         self.assertTrue((invalid_moves == [[1, 0], [0, 0]]).all())
 
+    def test_batch_invalid_moves(self):
+        """Given an empty board and a history with a move, that same move should be invalid"""
+        state = gogame.init_state(2)
+        history = [gogame.next_state(state, 0)]
+
+        invalid_moves = state_utils.batch_compute_invalid_moves(
+            np.expand_dims(state, 0),
+            np.array([0]),
+            batch_ko_protect=np.array([None]),
+            batch_history=np.expand_dims(history, 0)
+        )
+
+        self.assertTrue((invalid_moves == [[[1, 0], [0, 0]]]).all())
+
 if __name__ == '__main__':
     unittest.main()

From bbff5dd8a22b0adf940c629379484c52b3310eff Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Tue, 3 May 2022 16:51:07 +1000
Subject: [PATCH 15/18] Allow super ko to be enabled/disabled via GoEnv

---
 gym_go/envs/go_env.py              | 11 +++++++----
 gym_go/tests/test_invalid_moves.py | 20 +++++++++++++++++++-
 gym_go/tests/test_super_ko.py      | 20 +++++++++++++++++++-
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/gym_go/envs/go_env.py b/gym_go/envs/go_env.py
index b0bfdd4..226f2d8 100644
--- a/gym_go/envs/go_env.py
+++ b/gym_go/envs/go_env.py
@@ -21,8 +21,9 @@ class GoEnv(gym.Env):
     govars = govars
     gogame = gogame
 
-    def __init__(self, size, komi=0, reward_method='real'):
+    def __init__(self, size, komi=0, super_ko=False, reward_method='real'):
         '''
+        @param super_ko: whether to enable super-ko rule (history tracking)
         @param reward_method: either 'heuristic' or 'real'
         heuristic: gives # black pieces - # white pieces.
         real: gives 0 for in-game move, 1 for winning, -1 for losing,
@@ -31,7 +32,7 @@ def __init__(self, size, komi=0, reward_method='real'):
         self.size = size
         self.komi = komi
         self.state_ = gogame.init_state(size)
-        self.history = []
+        self.history = [] if super_ko else None
         self.reward_method = RewardMethod(reward_method)
         self.observation_space = gym.spaces.Box(np.float32(0), np.float32(govars.NUM_CHNLS),
                                                 shape=(govars.NUM_CHNLS, size, size))
@@ -44,7 +45,8 @@ def reset(self):
         done, return state
         '''
         self.state_ = gogame.init_state(self.size)
-        self.history = []
+        if self.history is not None:
+            self.history = []
         self.done = False
         return np.copy(self.state_)
 
@@ -63,7 +65,8 @@ def step(self, action):
 
         self.old_state = self.state()
         self.state_ = gogame.next_state(self.state_, action, canonical=False, history=self.history)
-        self.history.append(self.old_state)
+        if self.history is not None:
+            self.history.append(self.old_state)
         self.done = gogame.game_ended(self.state_)
         return np.copy(self.state_), self.reward(), self.done, self.info()
 
diff --git a/gym_go/tests/test_invalid_moves.py b/gym_go/tests/test_invalid_moves.py
index b86d1af..0896e34 100644
--- a/gym_go/tests/test_invalid_moves.py
+++ b/gym_go/tests/test_invalid_moves.py
@@ -170,7 +170,7 @@ def test_invalid_super_ko_move(self):
         :return:
         """
 
-        self.env = gym.make('go-v0', size=2, reward_method='real')
+        self.env = gym.make('go-v0', size=2, super_ko=True, reward_method='real')
         self.env.reset()
 
         for move in [(0, 0), (1, 1), (1, 0), (0, 1), (0, 0), (1, 0)]:
@@ -193,6 +193,24 @@ def test_invalid_super_ko_move(self):
         with self.assertRaises(Exception):
             self.env.step(final_move)
 
+    def test_valid_when_super_ko_disabled(self):
+        self.env = gym.make('go-v0', size=2, super_ko=False, reward_method='real')
+        self.env.reset()
+
+        for move in [(0, 0), (1, 1), (1, 0), (0, 1), (0, 0), (1, 0)]:
+            state, reward, done, info = self.env.step(move)
+
+        # Test invalid channel
+        self.assertEqual(
+            np.count_nonzero(state[govars.INVD_CHNL]),
+            3,
+            state[govars.INVD_CHNL]
+        )
+        self.assertEqual(np.count_nonzero(state[govars.INVD_CHNL] == 1), 3)
+        self.assertEqual(state[govars.INVD_CHNL, 0, 0], 0)
+
+        self.env.step((0, 0))
+
     def test_invalid_game_already_over_move(self):
         self.env.step(None)
         self.env.step(None)
diff --git a/gym_go/tests/test_super_ko.py b/gym_go/tests/test_super_ko.py
index 4af2f8b..9f24747 100644
--- a/gym_go/tests/test_super_ko.py
+++ b/gym_go/tests/test_super_ko.py
@@ -10,7 +10,7 @@
 class TestGoEnvSuperKo(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.env = gym.make('go-v0', size=2)
+        self.env = gym.make('go-v0', size=2, super_ko=True)
 
     def setUp(self):
         self.env.reset()
@@ -18,16 +18,34 @@ def setUp(self):
     def test_initial_history(self):
         self.assertEqual(self.env.history, [])
 
+    def test_initial_history_no_super_ko(self):
+        self.env = gym.make('go-v0', size=2, super_ko=False)
+        self.assertEqual(self.env.history, None)
+
     def test_step_builds_history(self):
         self.env.step((0, 0))
         self.assertEqual(len(self.env.history), 1)
 
+    def test_step_ignores_history_no_super_ko(self):
+        self.env = gym.make('go-v0', size=2, super_ko=False)
+        self.env.reset()
+        self.env.step((0, 0))
+        self.assertEqual(self.env.history, None)
+
     def test_reset_clears_history(self):
         self.env.step((0, 0))
         self.assertNotEqual(self.env.history, [])
         self.env.reset()
         self.assertEqual(self.env.history, [])
 
+    def test_reset_clears_history_no_super_ko(self):
+        self.env = gym.make('go-v0', size=2, super_ko=False)
+        self.env.reset()
+        self.env.step((0, 0))
+        self.assertEqual(self.env.history, None)
+        self.env.reset()
+        self.assertEqual(self.env.history, None)
+
     def test_invalid_moves(self):
         """Given an empty board and a history with a move, that same move should be invalid"""
         state = gogame.init_state(2)

From 363d51b5f640532d35bb007fd1b3e3c9e995d72e Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Tue, 3 May 2022 16:51:32 +1000
Subject: [PATCH 16/18] Run efficiency tests both with and without super ko

---
 gym_go/tests/efficiency.py | 43 +++++++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 7 deletions(-)

diff --git a/gym_go/tests/efficiency.py b/gym_go/tests/efficiency.py
index 392ca11..16a4ea1 100644
--- a/gym_go/tests/efficiency.py
+++ b/gym_go/tests/efficiency.py
@@ -11,10 +11,32 @@ class Efficiency(unittest.TestCase):
     boardsize = 9
     iterations = 64
 
-    def setUp(self) -> None:
+    def testOrderedTrajs(self):
         self.env = gym.make('go-v0', size=self.boardsize, reward_method='real')
+        self.doOrderedTrajs()
 
-    def testOrderedTrajs(self):
+    def testOrderedTrajsSuperKo(self):
+        self.env = gym.make('go-v0', size=self.boardsize, reward_method='real', super_ko=True)
+        self.doOrderedTrajs('super ko')
+
+    def testLowerBound(self):
+        self.env = gym.make('go-v0', size=self.boardsize, reward_method='real')
+        self.doLowerBound()
+
+    def testLowerBoundSuperKo(self):
+        self.env = gym.make('go-v0', size=self.boardsize, reward_method='real', super_ko=True)
+        self.doLowerBound('super ko')
+
+    def testRandTrajsWithChildren(self):
+        self.env = gym.make('go-v0', size=self.boardsize, reward_method='real')
+        self.doRandTrajsWithChildren()
+
+    def testRandTrajsWithChildrenSuperKo(self):
+        self.env = gym.make('go-v0', size=self.boardsize, reward_method='real', super_ko=True)
+        self.doRandTrajsWithChildren('super ko')
+
+
+    def doOrderedTrajs(self, msg=''):
         durs = []
         for _ in tqdm(range(self.iterations)):
             start = time.time()
@@ -28,9 +50,12 @@ def testOrderedTrajs(self):
 
         avg_time = np.mean(durs)
         std_time = np.std(durs)
-        print(f"Ordered Trajs: {avg_time:.3f} AVG, {std_time:.3f} STD", flush=True)
+        if msg != '':
+            msg = f' ({msg})'
+        print(f"Ordered Trajs{msg}: {avg_time:.3f} AVG, {std_time:.3f} STD", flush=True)
 
-    def testLowerBound(self):
+
+    def doLowerBound(self, msg=''):
         durs = []
         for _ in tqdm(range(self.iterations)):
             start = time.time()
@@ -52,9 +77,11 @@ def testLowerBound(self):
 
         avg_time = np.mean(durs)
         std_time = np.std(durs)
-        print(f"Lower bound: {avg_time:.3f} AVG, {std_time:.3f} STD", flush=True)
+        if msg != '':
+            msg = f' ({msg})'
+        print(f"Lower bound{msg}: {avg_time:.3f} AVG, {std_time:.3f} STD", flush=True)
 
-    def testRandTrajsWithChildren(self):
+    def doRandTrajsWithChildren(self, msg=''):
         durs = []
         num_steps = []
         for _ in tqdm(range(self.iterations)):
@@ -84,7 +111,9 @@ def testRandTrajsWithChildren(self):
         avg_time = np.mean(durs)
         std_time = np.std(durs)
         avg_steps = np.mean(num_steps)
-        print(f"Rand Trajs w/ Children: {avg_time:.3f} AVG SEC, {std_time:.3f} STD SEC, {avg_steps:.1f} AVG STEPS",
+        if msg != '':
+            msg = f' ({msg})'
+        print(f"Rand Trajs w/ Children{msg}: {avg_time:.3f} AVG SEC, {std_time:.3f} STD SEC, {avg_steps:.1f} AVG STEPS",
               flush=True)
 
 

From b81c3e7c7f84569ec002e9040d34abad56dea45f Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Tue, 3 May 2022 16:51:51 +1000
Subject: [PATCH 17/18] Remove trailing whitespace in README

---
 README.md | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index d0bd5e7..a6e1307 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 # About
-An environment for the board game Go. It is implemented using OpenAI's Gym API. 
+An environment for the board game Go. It is implemented using OpenAI's Gym API.
 It is also optimized to be as efficient as possible in order to efficiently train ML models.
 
 # Installation
@@ -25,7 +25,7 @@ go_env.render('terminal')
 ```
 
 ```
-     0 1 2 3 4 5 6 
+     0 1 2 3 4 5 6
 0    ╔═╤═╤═╤═╤═╤═╗
 1    ╟─┼─┼─┼─┼─┼─╢
 2    ╟─┼─┼─┼─┼─○─╢
@@ -43,7 +43,7 @@ go_env.render('terminal')
 ```
 
 ```
-	0 1 2 3 4 5 6 
+	0 1 2 3 4 5 6
 0	╔═╤═╤═╤═╤═╤═╗
 1	╟─┼─┼─┼─┼─┼─╢
 2	╟─┼─┼─┼─┼─○─╢
@@ -64,21 +64,21 @@ python3 demo.py
 ![alt text](screenshots/human_ui.png)
 
 ### High level API
-[GoEnv](gym_go/envs/go_env.py) defines the Gym environment for Go. 
-It contains the highest level API for basic Go usage.  
+[GoEnv](gym_go/envs/go_env.py) defines the Gym environment for Go.
+It contains the highest level API for basic Go usage.
 
 ### Low level API
 [GoGame](gym_go/gogame.py) is the set of low-level functions that defines all the game logic of Go.
 `GoEnv`'s high level API is built on `GoGame`.
-These sets of functions are intended for a more detailed and finetuned 
+These sets of functions are intended for a more detailed and finetuned
 usage of Go.
 
 # Scoring
-We use Trump Taylor scoring, a simple area scoring, to determine the winner. A player's _area_ is defined as the number of empty points a 
-player's pieces surround plus the number of player's pieces on the board. The _winner_ is the player with the larger 
+We use Trump Taylor scoring, a simple area scoring, to determine the winner. A player's _area_ is defined as the number of empty points a
+player's pieces surround plus the number of player's pieces on the board. The _winner_ is the player with the larger
 area (a game is tied if both players have an equal amount of area on the board).
 
-There is also support for `komi`, a bias score constant to balance the advantage of black going first. 
+There is also support for `komi`, a bias score constant to balance the advantage of black going first.
 By default `komi` is set to 0.
 
 # Game ending
@@ -92,16 +92,16 @@ Reward methods are in _black_'s perspective
     * `0` - Game is tied
     * `1` - Black won
   * `0` - Otherwise
-* **Heuristic**: If the game is ongoing, the reward is `black area - white area`. 
-If black won, the reward is `BOARD_SIZE**2`. 
+* **Heuristic**: If the game is ongoing, the reward is `black area - white area`.
+If black won, the reward is `BOARD_SIZE**2`.
 If white won, the reward is `-BOARD_SIZE**2`.
 If tied, the reward is `0`.
 
 # State
-The `state` object that is returned by the `reset` and `step` functions of the environment is a 
-`6 x BOARD_SIZE x BOARD_SIZE` numpy array. All values in the array are either `0` or `1` 
+The `state` object that is returned by the `reset` and `step` functions of the environment is a
+`6 x BOARD_SIZE x BOARD_SIZE` numpy array. All values in the array are either `0` or `1`
 * **First and second channel:** represent the black and white pieces respectively.
-* **Third channel:** Indicator layer for whose turn it is 
+* **Third channel:** Indicator layer for whose turn it is
 * **Fourth channel:** Invalid moves (including ko-protection) for the next action
 * **Fifth channel:** Indicator layer for whether the previous move was a pass
 * **Sixth channel:** Indicator layer for whether the game is over

From 6fc7d2c0124deda92f7ef7bd531ad7d629d06721 Mon Sep 17 00:00:00 2001
From: Rohan Mitchell <rohan@rohanmitchell.com>
Date: Tue, 3 May 2022 17:01:53 +1000
Subject: [PATCH 18/18] Document super ko in README

---
 README.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/README.md b/README.md
index a6e1307..cc61c12 100644
--- a/README.md
+++ b/README.md
@@ -81,6 +81,17 @@ area (a game is tied if both players have an equal amount of area on the board).
 There is also support for `komi`, a bias score constant to balance the advantage of black going first.
 By default `komi` is set to 0.
 
+# Ko and super ko
+The game supports a simple implementation of the ko rule by default, which prevents single move take-back scenarios. In addition, an optional
+super ko rule can be enabled when initializing the gym:
+
+```python
+go_env = gym.make('go-v0', size=7, super_ko=True)
+```
+
+This rule implements positional super ko by tracking play history, which catches repeating positions not detected by the regular ko rule
+at the price of a performance overhead.
+
 # Game ending
 A game ends when both players pass consecutively