Skip to content

Commit

Permalink
Minor change to save compute when rendering RLToyEnv with image_repre…
Browse files Browse the repository at this point in the history
…sentations; added close() to release pygame resources when called; fixed 6 tests in TestGymEnvWrapper, 8 in TestRLToyEnv;
  • Loading branch information
RaghuSpaceRajan committed Jan 10, 2025
1 parent 445da74 commit ea0fc1f
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 119 deletions.
4 changes: 1 addition & 3 deletions mdp_playground/envs/gym_env_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,9 +360,7 @@ def step(self, action):
)
probs[action] = 1 - self.transition_noise
old_action = action
action = int(
self._np_random.choice(self.env.action_space.n, size=1, p=probs)
) # random
action = self._np_random.choice(self.env.action_space.n, size=1, p=probs).item() # random
if old_action != action:
# print("NOISE inserted", old_action, action)
self.total_noisy_transitions_episode += 1
Expand Down
28 changes: 21 additions & 7 deletions mdp_playground/envs/rl_toy_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -2348,9 +2348,7 @@ def render(self,):
# to only instantiate the render_space here and not in __init__ because it's only needed
# if render() is called.
if self.window is None:
if self.image_representations:
self.render_space = self.observation_space
else:
if not self.image_representations:
if self.config["state_space_type"] == "discrete":
self.render_space = ImageMultiDiscrete(
self.state_space_size,
Expand Down Expand Up @@ -2396,10 +2394,11 @@ def render(self,):
if self.clock is None and self.render_mode == "human":
self.clock = pygame.time.Clock()

# ##TODO There are repeated calculations here in calling get_concatenated_image
# that can be taken from storing variables in step() or reset().
if self.render_mode == "human":
rgb_array = self.render_space.get_concatenated_image(self.curr_state)
if not self.image_representations:
rgb_array = self.render_space.get_concatenated_image(self.curr_state)
elif self.image_representations:
rgb_array = self.curr_obs
pygame_surface = pygame.surfarray.make_surface(rgb_array)
self.window.blit(pygame_surface, pygame_surface.get_rect())
pygame.event.pump()
Expand All @@ -2409,7 +2408,22 @@ def render(self,):
# The following line will automatically add a delay to keep the framerate stable.
self.clock.tick(self.metadata["render_fps"])
elif self.render_mode == "rgb_array":
return self.render_space.get_concatenated_image(self.curr_state)
if not self.image_representations:
return self.render_space.get_concatenated_image(self.curr_state)
elif self.image_representations:
return self.curr_obs

def close(self):
'''
Closes the environment and the pygame window if it was opened.
'''
if self.window is not None and self.render_mode == "human":
import pygame
pygame.display.quit()
pygame.quit()
self.window = None
self.clock = None


def dist_of_pt_from_line(pt, ptA, ptB):
"""Returns shortest distance of a point from a line defined by 2 points - ptA and ptB.
Expand Down
40 changes: 20 additions & 20 deletions tests/test_gym_env_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_r_delay(self):
"grayscale_obs": False,
"state_space_type": "discrete",
"action_space_type": "discrete",
"seed": 0,
"seed": 1,
# },
# 'seed': 0, #seed
}
Expand All @@ -52,7 +52,7 @@ def test_r_delay(self):
act = aew.action_space.sample()
next_state, reward, done, trunc, info = aew.step(act)
print("step, reward, done, act:", i, reward, done, act)
if i == 154 or i == 159:
if i == 124 or i == 152 or i == 171:
assert reward == 44.0, (
"1-step delayed reward in step: "
+ str(i)
Expand All @@ -73,7 +73,7 @@ def test_r_shift(self):
"grayscale_obs": False,
"state_space_type": "discrete",
"action_space_type": "discrete",
"seed": 0,
"seed": 1,
# },
# 'seed': 0, #seed
}
Expand All @@ -91,7 +91,7 @@ def test_r_shift(self):
act = aew.action_space.sample()
next_state, reward, done, trunc, info = aew.step(act)
print("step, reward, done, act:", i, reward, done, act)
if i == 153 or i == 158:
if i == 123 or i == 151 or i == 170:
assert reward == 45.0, (
"Shifted reward in step: " + str(i) + " should have been 45.0."
)
Expand All @@ -114,7 +114,7 @@ def test_r_scale(self):
"grayscale_obs": False,
"state_space_type": "discrete",
"action_space_type": "discrete",
"seed": 0,
"seed": 1,
# },
# 'seed': 0, #seed
}
Expand All @@ -131,7 +131,7 @@ def test_r_scale(self):
act = aew.action_space.sample()
next_state, reward, done, trunc, info = aew.step(act)
print("step, reward, done, act:", i, reward, done, act)
if i == 153 or i == 158:
if i == 123 or i == 151 or i == 170:
assert reward == 88.0, (
"Scaled reward in step: " + str(i) + " should have been 88.0."
)
Expand Down Expand Up @@ -236,15 +236,15 @@ def test_r_delay_p_noise_r_noise(self):
print("\033[32;1;4mTEST_MULTIPLE\033[0m")
config = {
"delay": 1,
"reward_noise": lambda a: a.normal(0, 0.1),
"transition_noise": 0.1,
"reward_noise": lambda s, a, rng: rng.normal(0, 0.1),
"transition_noise": 0.2,
# "GymEnvWrapper": {
"atari_preprocessing": True,
"frame_skip": 4,
"grayscale_obs": False,
"state_space_type": "discrete",
"action_space_type": "discrete",
"seed": 0,
"seed": 1,
# },
# 'seed': 0, #seed
}
Expand All @@ -262,24 +262,24 @@ def test_r_delay_p_noise_r_noise(self):
next_state, reward, done, trunc, info = aew.step(act)
print("step, reward, done, act:", i, reward, done, act)
# Testing hardcoded values at these timesteps implicitly tests that there
# were 21 noisy transitions in total and noise inserted in rewards.
if i == 154:
# were noisy transitions and noise inserted in rewards.
if i == 147:
np.testing.assert_allclose(
reward,
44.12183457980473,
44.0668047426572,
rtol=1e-05,
err_msg="1-step delayed reward in step: "
+ str(i)
+ " should have been 44.0.",
+ " should have been 44.066...",
)
if i == 199:
if i == 173:
np.testing.assert_allclose(
reward,
0.07467690634910334,
44.088450289124935,
rtol=1e-05,
err_msg="1-step delayed reward in step: "
+ str(i)
+ " should have been 44.0.",
+ " should have been 44.088...",
)
total_reward += reward
print("total_reward:", total_reward)
Expand All @@ -296,7 +296,7 @@ def test_discrete_irr_features(self):
"grayscale_obs": False,
"state_space_type": "discrete",
"action_space_type": "discrete",
"seed": 0,
"seed": 1,
"irrelevant_features": {
"state_space_type": "discrete",
"action_space_type": "discrete",
Expand Down Expand Up @@ -331,7 +331,7 @@ def test_discrete_irr_features(self):
act,
next_state[1],
)
if i == 154 or i == 159:
if i == 128 or i == 151:
assert reward == 44.0, (
"1-step delayed reward in step: "
+ str(i)
Expand All @@ -355,7 +355,7 @@ def test_image_transforms(self):
"grayscale_obs": False,
"state_space_type": "discrete",
"action_space_type": "discrete",
"seed": 0,
"seed": 1,
# },
# 'seed': 0, #seed
}
Expand All @@ -373,7 +373,7 @@ def test_image_transforms(self):
act = aew.action_space.sample()
next_state, reward, done, trunc, info = aew.step(act)
print("step, reward, done, act:", i, reward, done, act)
if i == 153 or i == 158:
if i == 123 or i == 151:
assert reward == 44.0, (
"Reward in step: " + str(i) + " should have been 44.0."
)
Expand Down
Loading

0 comments on commit ea0fc1f

Please sign in to comment.