diff --git a/README.md b/README.md index df3559eb..1acc5eb6 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ python gym_test.py Alternatively, you can run the following snippet: ```python -import gym +import gymnasium as gym import evogym.envs from evogym import sample_robot @@ -78,15 +78,14 @@ from evogym import sample_robot if __name__ == '__main__': body, connections = sample_robot((5,5)) - env = gym.make('Walker-v0', body=body) + env = gym.make('Walker-v0', body=body, render_mode='human') env.reset() while True: action = env.action_space.sample()-1 - ob, reward, done, info = env.step(action) - env.render() + ob, reward, terminated, truncated, info = env.step(action) - if done: + if terminated or truncated: env.reset() env.close() diff --git a/evogym/envs/__init__.py b/evogym/envs/__init__.py index 6484d8c6..49e7efd3 100644 --- a/evogym/envs/__init__.py +++ b/evogym/envs/__init__.py @@ -10,7 +10,7 @@ from evogym.envs.traverse import * from evogym.envs.walk import * -from gym.envs.registration import register +from gymnasium.envs.registration import register ## SIMPLE ## register( diff --git a/evogym/envs/balance.py b/evogym/envs/balance.py index 61a8b6b7..8589fe35 100644 --- a/evogym/envs/balance.py +++ b/evogym/envs/balance.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,24 +10,31 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class Balance(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Balancer-v0.json')) self.world.add_from_array('robot', body, 15, 3, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=float) def get_obs(self, pos_final): com_final = np.mean(pos_final, 1) @@ -71,12 +78,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -84,26 +91,32 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class BalanceJump(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Balancer-v1.json')) self.world.add_from_array('robot', body, 10, 1, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=float) def get_obs(self, pos_final): com_final = np.mean(pos_final, 1) @@ -148,12 +161,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -161,4 +174,4 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs \ No newline at end of file + return obs, {} \ No newline at end of file diff --git a/evogym/envs/base.py b/evogym/envs/base.py index 7d007b18..7d35752e 100644 --- a/evogym/envs/base.py +++ b/evogym/envs/base.py @@ -1,10 +1,10 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding -from typing import Dict, Optional, List +from typing import Dict, Optional, List, Any from evogym import * import random @@ -19,20 +19,34 @@ class EvoGymBase(gym.Env): Args: world (EvoWorld): object specifying the voxel layout of the environment. + render_mode (Optional[str]): values of `screen` and `human` will automatically render to a debug window every `step()`. If set to `img` or `rgb_array`, `render()` will return an image array. No rendering by default (default = None) + render_options (Optional[Dict[str, Any]]): dictionary of rendering options. See EvoGymBase.render() for details (default = None) """ - def __init__(self, world: EvoWorld) -> None: + + metadata = {'render_modes': ['screen', 'human', 'img', 'rgb_array']} + + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ) -> None: # sim - self._sim = EvoSim(self.world) + self._sim = EvoSim(world) self._default_viewer = EvoViewer(self._sim) + + # render + self._render_mode = render_mode + self._render_options = render_options def step(self, action: Dict[str, np.ndarray]) -> bool: """ - Step the environment by running physcis computations. + Step the environment by running physics computations. Args: action (Dict[str, np.ndarray]): dictionary mapping robot names to actions. Actions are `(n,)` arrays, where `n` is the number of actuators in the target robot. - + Returns: bool: whether or not the simulation has reached an unstable state and cannot be recovered (`True` = unstable). """ @@ -42,10 +56,13 @@ def step(self, action: Dict[str, np.ndarray]) -> bool: a[abs(a) < 1e-8] = 0 self._sim.set_action(robot_name, a) done = self._sim.step() + + if self._render_mode == 'human' or self._render_mode == 'screen': + self.render() return done - def reset(self,) -> None: + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> None: """ Reset the simulation to the initial state. """ @@ -71,27 +88,31 @@ def default_viewer(self,) -> EvoViewer: """ return self._default_viewer - def render(self, - mode: str ='screen', - verbose: bool = False, - hide_background: bool = False, - hide_grid: bool = False, - hide_edges: bool = False, - hide_voxels: bool = False) -> Optional[np.ndarray]: - """ - Render the simulation. - - Args: - mode (str): values of 'screen' and 'human' will render to a debug window. If set to 'img' will return an image array. - verbose (bool): whether or not to print the rendering speed (rps) every second. - hide_background (bool): whether or not to render the cream-colored background. If shut off background will be white. - hide_grid (bool): whether or not to render the grid. - hide_edges (bool): whether or not to render edges around all objects. - hide_voxels (bool): whether or not to render voxels. - + def render( + self, + ) -> Optional[np.ndarray]: + """ + Render the simulation according to the `render_mode` and `render_options` specified at initialization. + The following rendering options are available as key-value pairs in the `render_options` dictionary: + - `verbose` (bool): whether or not to print the rendering speed (rps) every second. (default = False) + - `hide_background` (bool): whether or not to render the cream-colored background. If shut off background will be white. (default = False) + - `hide_grid` (bool): whether or not to render the grid. (default = False) + - `hide_edges` (bool): whether or not to render edges around all objects. (default = False) + - `hide_voxels` (bool): whether or not to render voxels. (default = False) + Returns: - Optional[np.ndarray]: if `mode` is set to `img`, will return an image array. + Optional[np.ndarray]: if `mode` is set to `img` or `rgb_array`, will return an image array. Otherwise, will return `None`. """ + mode, render_options = self._render_mode, {} if self._render_options is None else self._render_options + if mode is None: + return None + + verbose = render_options.get('verbose', False) + hide_background = render_options.get('hide_background', False) + hide_grid = render_options.get('hide_grid', False) + hide_edges = render_options.get('hide_edges', False) + hide_voxels = render_options.get('hide_voxels', False) + return self.default_viewer.render(mode, verbose, hide_background, hide_grid, hide_edges, hide_voxels) def close(self) -> None: @@ -360,9 +381,14 @@ class BenchmarkBase(EvoGymBase): DATA_PATH = pkg_resources.resource_filename('evogym.envs', os.path.join('sim_files')) VOXEL_SIZE = 0.1 - def __init__(self, world): + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): - EvoGymBase.__init__(self, world) + EvoGymBase.__init__(self, world=world, render_mode=render_mode, render_options=render_options) self.default_viewer.track_objects('robot') def step(self, action): diff --git a/evogym/envs/change_shape.py b/evogym/envs/change_shape.py index c782edc6..14b98872 100644 --- a/evogym/envs/change_shape.py +++ b/evogym/envs/change_shape.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,22 +10,29 @@ from math import * import numpy as np import os +from typing import Dict, Any, Optional class ShapeBase(BenchmarkBase): - def __init__(self, world): - super().__init__(world) + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): - def reset(self): + super().__init__(world=world, render_mode=render_mode, render_options=render_options) + + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} ### ---------------------------------------------------------------------- @@ -77,21 +84,27 @@ def convex_poly_area(self, pts_cw): class MaximizeShape(ShapeBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ShapeChange.json')) self.world.add_from_array('robot', body, 7, 1, connections=connections) # init sim - ShapeBase.__init__(self, self.world) + ShapeBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=float) def step(self, action): @@ -117,8 +130,8 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} def get_reward(self, robot_pos_init, robot_pos_final): @@ -136,21 +149,27 @@ def get_reward(self, robot_pos_init, robot_pos_final): class MinimizeShape(ShapeBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ShapeChange.json')) self.world.add_from_array('robot', body, 7, 1, connections=connections) # init sim - ShapeBase.__init__(self, self.world) + ShapeBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=float) def step(self, action): @@ -176,8 +195,8 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} def get_reward(self, robot_pos_init, robot_pos_final): @@ -195,21 +214,27 @@ def get_reward(self, robot_pos_init, robot_pos_final): class MaximizeXShape(ShapeBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ShapeChange.json')) self.world.add_from_array('robot', body, 7, 1, connections=connections) # init sim - ShapeBase.__init__(self, self.world) + ShapeBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=float) def step(self, action): @@ -235,8 +260,8 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} def get_reward(self, robot_pos_init, robot_pos_final): @@ -255,21 +280,27 @@ def get_reward(self, robot_pos_init, robot_pos_final): class MaximizeYShape(ShapeBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ShapeChange.json')) self.world.add_from_array('robot', body, 7, 1, connections=connections) # init sim - ShapeBase.__init__(self, self.world) + ShapeBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(num_robot_points,), dtype=float) def step(self, action): @@ -295,8 +326,8 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} def get_reward(self, robot_pos_init, robot_pos_final): diff --git a/evogym/envs/climb.py b/evogym/envs/climb.py index 72377f9c..a71eb7d3 100644 --- a/evogym/envs/climb.py +++ b/evogym/envs/climb.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,16 +10,22 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class ClimbBase(BenchmarkBase): - def __init__(self, world): - - super().__init__(world) + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): + + super().__init__(world=world, render_mode=render_mode, render_options=render_options) - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -27,26 +33,32 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class Climb0(ClimbBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Climber-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - ClimbBase.__init__(self, self.world) + ClimbBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=float) def step(self, action): @@ -79,26 +91,32 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class Climb1(ClimbBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Climber-v1.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - ClimbBase.__init__(self, self.world) + ClimbBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=float) def step(self, action): @@ -131,27 +149,33 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class Climb2(ClimbBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Climber-v2.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - ClimbBase.__init__(self, self.world) + ClimbBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 3 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -182,12 +206,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -197,4 +221,4 @@ def reset(self): self.get_ceil_obs("robot", ["pipe"], self.sight_dist), )) - return obs \ No newline at end of file + return obs, {} \ No newline at end of file diff --git a/evogym/envs/flip.py b/evogym/envs/flip.py index 994e35db..d529d1e6 100644 --- a/evogym/envs/flip.py +++ b/evogym/envs/flip.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,25 +10,32 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class Flipping(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Flipper-v0.json')) self.world.add_from_array('robot', body, 60, 1, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(1 + num_robot_points,), dtype=float) # reward self.num_flips = 0 @@ -78,12 +85,12 @@ def step(self, action): done = True - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) self.num_flips = 0 @@ -93,4 +100,4 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs \ No newline at end of file + return obs, {} \ No newline at end of file diff --git a/evogym/envs/jump.py b/evogym/envs/jump.py index 0fab281e..ac528cc4 100644 --- a/evogym/envs/jump.py +++ b/evogym/envs/jump.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,25 +10,32 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class StationaryJump(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Jumper-v0.json')) self.world.add_from_array('robot', body, 32, 1, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 2 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points + (self.sight_dist*2 +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points + (self.sight_dist*2 +1),), dtype=float) def step(self, action): @@ -58,12 +65,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -72,4 +79,4 @@ def reset(self): self.get_floor_obs("robot", ["ground"], self.sight_dist), )) - return obs + return obs, {} diff --git a/evogym/envs/manipulate.py b/evogym/envs/manipulate.py index 083d3c5d..1c7b257c 100644 --- a/evogym/envs/manipulate.py +++ b/evogym/envs/manipulate.py @@ -1,8 +1,8 @@ from evogym.envs.base import EvoGymBase -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -11,15 +11,22 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class PackageBase(BenchmarkBase): - def __init__(self, world): - super().__init__(world) + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): + + super().__init__(world=world, render_mode=render_mode, render_options=render_options) self.default_viewer.track_objects('robot', 'package') - def get_obs(self, robot_pos_final, robot_vel_final, package_pos_final, package_vel_final): + def get_obs(self, robot_pos_final, robot_vel_final, package_pos_final, package_vel_final) -> np.ndarray: robot_com_pos = np.mean(robot_pos_final, axis=1) robot_com_vel = np.mean(robot_vel_final, axis=1) @@ -55,9 +62,9 @@ def get_reward(self, package_pos_init, package_pos_final, robot_pos_init, robot_ return reward - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation robot_pos_final = self.object_pos_at_time(self.get_time(), "robot") @@ -71,26 +78,32 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class CarrySmallRect(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Carrier-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 3.0*self.VOXEL_SIZE @@ -149,26 +162,32 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class CarrySmallRectToTable(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Carrier-v1.json')) self.world.add_from_array('robot', body, 1, 4, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 6.0*self.VOXEL_SIZE @@ -225,26 +244,32 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class PushSmallRect(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Pusher-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 0.0*self.VOXEL_SIZE @@ -285,26 +310,32 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class PushSmallRectOnOppositeSide(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Pusher-v1.json')) self.world.add_from_array('robot', body, 13, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 0.0*self.VOXEL_SIZE @@ -345,26 +376,32 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class ThrowSmallRect(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Thrower-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(6 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 0.0*self.VOXEL_SIZE @@ -415,15 +452,24 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class CatchSmallRect(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): self.robot_body = body self.robot_connections = connections + + self.render_mode = render_mode + self.render_options = render_options self.random_init() @@ -452,7 +498,7 @@ def random_init(self): self.world.add_object(peg2) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=self.render_mode, render_options=self.render_options) self.default_viewer.track_objects('robot', 'package') @@ -460,13 +506,13 @@ def random_init(self): num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 5.0*self.VOXEL_SIZE - def get_obs_catch(self, robot_pos_final, package_pos_final): + def get_obs_catch(self, robot_pos_final, package_pos_final) -> np.ndarray: robot_com_pos = np.mean(robot_pos_final, axis=1) package_com_pos = np.mean(package_pos_final, axis=1) @@ -525,12 +571,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) self.default_viewer.hide_debug_window() self.random_init() @@ -558,25 +604,31 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class ToppleBeam(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'BeamToppler-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 0.0*self.VOXEL_SIZE @@ -651,12 +703,12 @@ def step(self, action): reward += 1.0 done = True - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) # observation robot_pos_final = self.object_pos_at_time(self.get_time(), "robot") @@ -671,25 +723,31 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class SlideBeam(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'BeamSlider-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=float) # threshhold height self.thresh_height = 0.0*self.VOXEL_SIZE @@ -760,12 +818,12 @@ def step(self, action): reward += 1.0 done = True - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) # observation robot_pos_final = self.object_pos_at_time(self.get_time(), "robot") @@ -780,25 +838,31 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class LiftSmallRect(PackageBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Lifter-v0.json')) self.world.add_from_array('robot', body, 2, 3, connections=connections) # init sim - PackageBase.__init__(self, self.world) + PackageBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(7 + num_robot_points,), dtype=float) def get_reward_lift(self, robot_pos_init, robot_pos_final, package_pos_init, package_pos_final): @@ -852,12 +916,12 @@ def step(self, action): print("SIMULATION UNSTABLE... TERMINATING") reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) # observation robot_pos_final = self.object_pos_at_time(self.get_time(), "robot") @@ -872,5 +936,5 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} \ No newline at end of file diff --git a/evogym/envs/multi_goal.py b/evogym/envs/multi_goal.py index 9be0e38b..2243d68b 100644 --- a/evogym/envs/multi_goal.py +++ b/evogym/envs/multi_goal.py @@ -1,7 +1,7 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -10,6 +10,7 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class Goal(): def __init__(self, name, requirements = None): @@ -20,8 +21,14 @@ def evaluate_reward(self, args): class GoalBase(BenchmarkBase): - def __init__(self, world): - super().__init__(world) + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): + + super().__init__(world=world, render_mode=render_mode, render_options=render_options) def init_reward_goals(self, goals): @@ -90,21 +97,27 @@ def get_obs(self, args): class BiWalk(GoalBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'BidirectionalWalker-v0.json')) self.world.add_from_array('robot', body, 33, 1, connections=connections) # init sim - GoalBase.__init__(self, self.world) + GoalBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(5 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(5 + num_robot_points,), dtype=float) self.set_random_goals(20, 50, 100) @@ -171,12 +184,12 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) self.current_goal = 0 self.set_random_goals(20, 50, 100) @@ -196,4 +209,4 @@ def reset(self): }) )) - return obs + return obs, {} diff --git a/evogym/envs/traverse.py b/evogym/envs/traverse.py index faeb8bec..1cfcbdb9 100644 --- a/evogym/envs/traverse.py +++ b/evogym/envs/traverse.py @@ -1,8 +1,8 @@ from evogym.envs.base import EvoGymBase -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding from evogym import * from evogym.envs import BenchmarkBase @@ -11,11 +11,18 @@ import math import numpy as np import os +from typing import Dict, Any, Optional class StairsBase(BenchmarkBase): - def __init__(self, world): - super().__init__(world) + def __init__( + self, + world: EvoWorld, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): + + super().__init__(world=world, render_mode=render_mode, render_options=render_options) def get_reward(self, robot_pos_init, robot_pos_final): @@ -25,9 +32,9 @@ def get_reward(self, robot_pos_init, robot_pos_final): reward = (robot_com_pos_final[0] - robot_com_pos_init[0]) return reward - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation robot_ort = self.object_orientation_at_time(self.get_time(), "robot") @@ -38,27 +45,33 @@ def reset(self): self.get_floor_obs("robot", ["ground"], self.sight_dist), )) - return obs + return obs, {} class StepsUp(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'UpStepper-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -97,27 +110,33 @@ def step(self, action): done = True reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class StepsDown(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'DownStepper-v0.json')) self.world.add_from_array('robot', body, 1, 11, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -156,27 +175,33 @@ def step(self, action): done = True reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class WalkingBumpy(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ObstacleTraverser-v0.json')) self.world.add_from_array('robot', body, 2, 1, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -216,27 +241,33 @@ def step(self, action): done = True reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class WalkingBumpy2(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'ObstacleTraverser-v1.json')) self.world.add_from_array('robot', body, 2, 4, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -273,27 +304,33 @@ def step(self, action): done = True reward += 2.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class VerticalBarrier(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Hurdler-v0.json')) self.world.add_from_array('robot', body, 2, 4, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -328,27 +365,33 @@ def step(self, action): done = True reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} class FloatingPlatform(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'PlatformJumper-v0.json')) self.world.add_from_array('robot', body, 1, 6, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) # terrain self.terrain_list = ["platform_1", "platform_2", "platform_3", "platform_4", "platform_5", "platform_6", "platform_7"] @@ -392,12 +435,12 @@ def step(self, action): reward -= 3.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) # observation robot_ort = self.object_orientation_at_time(self.get_time(), "robot") @@ -408,26 +451,32 @@ def reset(self): self.get_floor_obs("robot", self.terrain_list, self.sight_dist), )) - return obs + return obs, {} class Gaps(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'GapJumper-v0.json')) self.world.add_from_array('robot', body, 1, 6, connections=connections) # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) # terrain self.terrain_list = ["platform_1", "platform_2", "platform_3", "platform_4", "platform_5"] @@ -466,12 +515,12 @@ def step(self, action): reward -= 3.0 done = True - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - EvoGymBase.reset(self) + EvoGymBase.reset(self, seed=seed, options=options) # observation robot_ort = self.object_orientation_at_time(self.get_time(), "robot") @@ -482,11 +531,17 @@ def reset(self): self.get_floor_obs("robot", self.terrain_list, self.sight_dist), )) - return obs + return obs, {} class BlockSoup(StairsBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Traverser-v0.json')) @@ -526,15 +581,15 @@ def __init__(self, body, connections=None): count += 1 # init sim - StairsBase.__init__(self, self.world) + StairsBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(3 + num_robot_points + (2*self.sight_dist +1),), dtype=float) def step(self, action): @@ -570,12 +625,12 @@ def step(self, action): done = True reward += 2.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation robot_ort = self.object_orientation_at_time(self.get_time(), "robot") @@ -586,4 +641,4 @@ def reset(self): self.get_floor_obs("robot", self.terrain_list, self.sight_dist), )) - return obs \ No newline at end of file + return obs, {} \ No newline at end of file diff --git a/evogym/envs/walk.py b/evogym/envs/walk.py index d02a4f68..c753529b 100644 --- a/evogym/envs/walk.py +++ b/evogym/envs/walk.py @@ -1,7 +1,8 @@ -import gym -from gym import error, spaces -from gym import utils -from gym.utils import seeding +import gymnasium as gym +from gymnasium import error, spaces +from gymnasium import utils +from gymnasium.utils import seeding +from typing import Optional, Dict, Any from evogym import * from evogym.envs import BenchmarkBase @@ -13,21 +14,27 @@ class WalkingFlat(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'Walker-v0.json')) self.world.add_from_array('robot', body, 1, 1, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points,), dtype=float) def step(self, action): @@ -61,12 +68,12 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -74,25 +81,31 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class SoftBridge(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'BridgeWalker-v0.json')) self.world.add_from_array('robot', body, 2, 5, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + 1 + num_robot_points,), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + 1 + num_robot_points,), dtype=float) def step(self, action): @@ -127,12 +140,12 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -141,26 +154,32 @@ def reset(self): self.get_relative_pos_obs("robot"), )) - return obs + return obs, {} class Duck(BenchmarkBase): - def __init__(self, body, connections=None): + def __init__( + self, + body: np.ndarray, + connections: Optional[np.ndarray] = None, + render_mode: Optional[str] = None, + render_options: Optional[Dict[str, Any]] = None, + ): # make world self.world = EvoWorld.from_json(os.path.join(self.DATA_PATH, 'CaveCrawler-v0.json')) self.world.add_from_array('robot', body, 1, 2, connections=connections) # init sim - BenchmarkBase.__init__(self, self.world) + BenchmarkBase.__init__(self, world=self.world, render_mode=render_mode, render_options=render_options) # set action space and observation space num_actuators = self.get_actuator_indices('robot').size num_robot_points = self.object_pos_at_time(self.get_time(), "robot").size self.sight_dist = 5 - self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=np.float) - self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points + 2*(self.sight_dist*2 +1),), dtype=np.float) + self.action_space = spaces.Box(low= 0.6, high=1.6, shape=(num_actuators,), dtype=float) + self.observation_space = spaces.Box(low=-100.0, high=100.0, shape=(2 + num_robot_points + 2*(self.sight_dist*2 +1),), dtype=float) def step(self, action): @@ -196,12 +215,12 @@ def step(self, action): done = True reward += 1.0 - # observation, reward, has simulation met termination conditions, debugging info - return obs, reward, done, {} + # observation, reward, has simulation met termination conditions, truncated, debugging info + return obs, reward, done, False, {} - def reset(self): + def reset(self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) -> Tuple[np.ndarray, Dict[str, Any]]: - super().reset() + super().reset(seed=seed, options=options) # observation obs = np.concatenate(( @@ -211,4 +230,4 @@ def reset(self): self.get_ceil_obs("robot", ["terrain"], self.sight_dist), )) - return obs + return obs, {} diff --git a/evogym/viewer.py b/evogym/viewer.py index 20cb787a..14f4c539 100644 --- a/evogym/viewer.py +++ b/evogym/viewer.py @@ -187,18 +187,20 @@ def set_tracking_settings(self, **settings) -> None: parsed_arg = arg.split('_')[1] self._tracking_lock[arg] = parsed_arg - def render(self, - mode: str ='screen', - verbose: bool = False, - hide_background: bool = False, - hide_grid: bool = False, - hide_edges: bool = False, - hide_voxels: bool = False) -> Optional[np.ndarray]: + def render( + self, + mode: str = 'screen', + verbose: bool = False, + hide_background: bool = False, + hide_grid: bool = False, + hide_edges: bool = False, + hide_voxels: bool = False + ) -> Optional[np.ndarray]: """ Render the simulation. Args: - mode (str): values of 'screen' and 'human' will render to a debug window. If set to 'img' will return an image array. + mode (str): values of `screen` and `human` will render to a debug window. If set to `img` or `rgb_array` will return an image array. verbose (bool): whether or not to print the rendering speed (rps) every second. hide_background (bool): whether or not to render the cream-colored background. If shut off background will be white. hide_grid (bool): whether or not to render the grid. @@ -206,10 +208,10 @@ def render(self, hide_voxels (bool): whether or not to render voxels. Returns: - Optional[np.ndarray]: if `mode` is set to `img`, will return an image array. + Optional[np.ndarray]: if `mode` is set to `img` or `rgb_array`, will return an image array. """ - accepted_modes = ['screen', 'human', 'img'] + accepted_modes = ['screen', 'human', 'img', 'rgb_array'] if not mode in accepted_modes: raise ValueError( f'mode {mode} is not a valid mode. The valid modes are {accepted_modes}' @@ -236,7 +238,7 @@ def render(self, self._has_init_screen_camera = True self._viewer.render(self.screen_camera, *render_settings) - if mode == 'img': + if mode == 'img' or mode == 'rgb_array': if not self._has_init_img_camera: self._init_img_camera() self._has_init_img_camera = True diff --git a/pyproject.toml b/pyproject.toml index 14117b6c..b453189b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "gym==0.22.0", + "gymnasium", "numpy", ] diff --git a/tests/test_render.py b/tests/test_render.py index f0d26cab..7ec26227 100644 --- a/tests/test_render.py +++ b/tests/test_render.py @@ -1,22 +1,21 @@ from unittest import TestCase import evogym.envs -import gym +import gymnasium as gym from evogym import sample_robot class RenderTest(TestCase): def test_it(self): body, connections = sample_robot((5, 5)) - env = gym.make("Walker-v0", body=body) + env = gym.make("Walker-v0", body=body, render_mode="human") env.reset() for _ in range(100): action = env.action_space.sample() - 1 - ob, reward, done, info = env.step(action) - env.render() + ob, reward, terminated, truncated, info = env.step(action) - if done: + if terminated or truncated: env.reset() env.close()