-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenvironment.py
71 lines (55 loc) · 2.39 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gym
from gym import spaces
from rubik.cube import Cube
from evaluation import score_cube, PATTERNS
import numpy as np
from lib import *
ACTIONS = ["R", "Ri", "L", "Li", "U", "Ui", "D", "Di", "F", "Fi", "B", "Bi",
# "Y", "Yi",
# "Li Ui L U F U Fi", # swap out front left corner of second layer
# "R U Ri Ui Fi Ui F" # swap out front right corner of second layer
]
class RubiksCubeEnv(gym.Env):
def __init__(self, cube):
super(RubiksCubeEnv, self).__init__()
self.cube = cube
self.step_count = 0
self.action_history = ""
self.action_space = spaces.Discrete(len(ACTIONS)) # 6 faces * 2 directions (clockwise, counter-clockwise)
# 9 stickers per face * 6 faces * 6 colors = 54
self.observation_space = spaces.Box(low=0, high=1, shape=(54,),
dtype=np.float32) # 54 stickers, each with one of 6 colors
def step(self, action):
self.cube.sequence(ACTIONS[action])
self.action_history = self.action_history + " " + ACTIONS[action]
self.step_count += 1
observation = self.get_observation()
step_cost = 40
reward = score_cube(self.cube) - step_cost
done = self.is_solved()
if done:
print(f"{COLORS['green']} !! SOLVED in {self.step_count} turns !! Reward: {reward}{COLORS['reset']} (Scramble: {self.scrable}, sol: {self.action_history})")
return observation, reward, done, {}
def reset(self, permutations = 2):
self.step_count = 0
self.action_history = ""
self.cube = Cube(PATTERNS["all"])
# print("Initial cube:")
# print_colored(self.cube)
# Shuffle everything:
self.scrable = ""
for i in range(permutations):
self.scrable = self.scrable + " " + np.random.choice(ACTIONS)
self.cube.sequence(self.scrable)
# print("Scrambled cube:")
# print_colored(self.cube)
return self.get_observation()
def get_observation(self):
color_mapping = {'R': 0, 'G': 1, 'B': 2, 'Y': 3, 'W': 4, 'O': 5}
# convert string to array:
state = self.cube.flat_str()
# convert into numeric values
cube_array = np.array([color_mapping[color] for color in state])
return cube_array
def is_solved(self):
return str(PATTERNS["all"]) == str(self.cube)