From 6523e1d6e517f4a92b3930eabca79cdf2f6d92c4 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Thu, 22 Jun 2023 15:47:49 +0200 Subject: [PATCH 01/50] Implementing record interface - first step towards video recorder interface - preliminary support for some environments, pygame environment support still missing - core support functionality - dummy class for video recording implemented --- mushroom_rl/core/core.py | 114 ++++++++++-------- mushroom_rl/core/environment.py | 38 +++--- mushroom_rl/environments/atari.py | 9 +- mushroom_rl/environments/car_on_hill.py | 4 +- mushroom_rl/environments/cart_pole.py | 4 +- mushroom_rl/environments/dm_control_env.py | 7 +- mushroom_rl/environments/grid_world.py | 7 +- mushroom_rl/environments/gym_env.py | 12 +- mushroom_rl/environments/habitat_env.py | 16 +-- mushroom_rl/environments/igibson_env.py | 7 +- mushroom_rl/environments/inverted_pendulum.py | 4 +- mushroom_rl/environments/minigrid_env.py | 1 + mushroom_rl/environments/mujoco.py | 4 +- mushroom_rl/environments/puddle_world.py | 4 +- mushroom_rl/environments/pybullet.py | 9 +- mushroom_rl/environments/segway.py | 7 +- mushroom_rl/environments/ship_steering.py | 7 +- mushroom_rl/utils/pybullet/viewer.py | 2 + mushroom_rl/utils/record.py | 9 ++ 19 files changed, 167 insertions(+), 98 deletions(-) create mode 100644 mushroom_rl/utils/record.py diff --git a/mushroom_rl/core/core.py b/mushroom_rl/core/core.py index 5e480fa16..44c50ddc8 100644 --- a/mushroom_rl/core/core.py +++ b/mushroom_rl/core/core.py @@ -1,6 +1,7 @@ from tqdm import tqdm from collections import defaultdict +from mushroom_rl.utils.record import VideoRecorder class Core(object): @@ -8,15 +9,14 @@ class Core(object): Implements the functions to run a generic algorithm. """ - def __init__(self, agent, mdp, callbacks_fit=None, callback_step=None): + def __init__(self, agent, mdp, callbacks_fit=None, callback_step=None, record_dictionary=None): """ Constructor. Args: agent (Agent): the agent moving according to a policy; mdp (Environment): the environment in which the agent moves; - callbacks_fit (list): list of callbacks to execute at the end of - each fit; + callbacks_fit (list): list of callbacks to execute at the end of each fit; callback_step (Callback): callback to execute after each step; """ @@ -36,14 +36,17 @@ def __init__(self, agent, mdp, callbacks_fit=None, callback_step=None): self._n_steps_per_fit = None self._n_episodes_per_fit = None + if record_dictionary is None: + record_dictionary = dict() + self._record = self._build_recorder_class(**record_dictionary) + def learn(self, n_steps=None, n_episodes=None, n_steps_per_fit=None, - n_episodes_per_fit=None, render=False, quiet=False): + n_episodes_per_fit=None, render=False, quiet=False, record=False): """ - This function moves the agent in the environment and fits the policy - using the collected samples. The agent can be moved for a given number - of steps or a given number of episodes and, independently from this - choice, the policy can be fitted after a given number of steps or a - given number of episodes. By default, the environment is reset. + This function moves the agent in the environment and fits the policy using the collected samples. + The agent can be moved for a given number of steps or a given number of episodes and, independently from this + choice, the policy can be fitted after a given number of steps or a given number of episodes. + The environment is reset at the beginning of the learning process. Args: n_steps (int, None): number of steps to move the agent; @@ -53,7 +56,8 @@ def learn(self, n_steps=None, n_episodes=None, n_steps_per_fit=None, n_episodes_per_fit (int, None): number of episodes between each fit of the policy; render (bool, False): whether to render the environment or not; - quiet (bool, False): whether to show the progress bar or not. + quiet (bool, False): whether to show the progress bar or not; + record (bool, False): whether to record a video of the environment or not. """ assert (n_episodes_per_fit is not None and n_steps_per_fit is None)\ @@ -63,31 +67,27 @@ def learn(self, n_steps=None, n_episodes=None, n_steps_per_fit=None, self._n_episodes_per_fit = n_episodes_per_fit if n_steps_per_fit is not None: - fit_condition =\ - lambda: self._current_steps_counter >= self._n_steps_per_fit + fit_condition = lambda: self._current_steps_counter >= self._n_steps_per_fit else: - fit_condition = lambda: self._current_episodes_counter\ - >= self._n_episodes_per_fit + fit_condition = lambda: self._current_episodes_counter >= self._n_episodes_per_fit - self._run(n_steps, n_episodes, fit_condition, render, quiet, get_env_info=False) + self._run(n_steps, n_episodes, fit_condition, render, quiet, record, get_env_info=False) def evaluate(self, initial_states=None, n_steps=None, n_episodes=None, - render=False, quiet=False, get_env_info=False): + render=False, quiet=False, record=False, get_env_info=False): """ This function moves the agent in the environment using its policy. - The agent is moved for a provided number of steps, episodes, or from - a set of initial states for the whole episode. By default, the - environment is reset. + The agent is moved for a provided number of steps, episodes, or from a set of initial states for the whole + episode. The environment is reset at the beginning of the learning process. Args: - initial_states (np.ndarray, None): the starting states of each - episode; + initial_states (np.ndarray, None): the starting states of each episode; n_steps (int, None): number of steps to move the agent; n_episodes (int, None): number of episodes to move the agent; render (bool, False): whether to render the environment or not; quiet (bool, False): whether to show the progress bar or not; - get_env_info (bool, False): whether to return the environment - info list or not. + record (bool, False): whether to record a video of the environment or not; + get_env_info (bool, False): whether to return the environment info list or not. Returns: The collected dataset and, optionally, an extra dataset of @@ -96,45 +96,36 @@ def evaluate(self, initial_states=None, n_steps=None, n_episodes=None, """ fit_condition = lambda: False - return self._run(n_steps, n_episodes, fit_condition, render, quiet, get_env_info, - initial_states) + return self._run(n_steps, n_episodes, fit_condition, render, quiet, record, get_env_info, initial_states) - def _run(self, n_steps, n_episodes, fit_condition, render, quiet, get_env_info, - initial_states=None): + def _run(self, n_steps, n_episodes, fit_condition, render, quiet, record, get_env_info, initial_states=None): assert n_episodes is not None and n_steps is None and initial_states is None\ or n_episodes is None and n_steps is not None and initial_states is None\ or n_episodes is None and n_steps is None and initial_states is not None - self._n_episodes = len( - initial_states) if initial_states is not None else n_episodes + self._n_episodes = len( initial_states) if initial_states is not None else n_episodes if n_steps is not None: - move_condition =\ - lambda: self._total_steps_counter < n_steps + move_condition = lambda: self._total_steps_counter < n_steps - steps_progress_bar = tqdm(total=n_steps, - dynamic_ncols=True, disable=quiet, - leave=False) + steps_progress_bar = tqdm(total=n_steps, dynamic_ncols=True, disable=quiet, leave=False) episodes_progress_bar = tqdm(disable=True) else: - move_condition =\ - lambda: self._total_episodes_counter < self._n_episodes + move_condition = lambda: self._total_episodes_counter < self._n_episodes steps_progress_bar = tqdm(disable=True) - episodes_progress_bar = tqdm(total=self._n_episodes, - dynamic_ncols=True, disable=quiet, - leave=False) + episodes_progress_bar = tqdm(total=self._n_episodes, dynamic_ncols=True, disable=quiet, leave=False) - dataset, dataset_info = self._run_impl(move_condition, fit_condition, steps_progress_bar, - episodes_progress_bar, render, initial_states) + dataset, dataset_info = self._run_impl(move_condition, fit_condition, steps_progress_bar, episodes_progress_bar, + render, record, initial_states) if get_env_info: return dataset, dataset_info else: return dataset - def _run_impl(self, move_condition, fit_condition, steps_progress_bar, - episodes_progress_bar, render, initial_states): + def _run_impl(self, move_condition, fit_condition, steps_progress_bar, episodes_progress_bar, render, record, + initial_states): self._total_episodes_counter = 0 self._total_steps_counter = 0 self._current_episodes_counter = 0 @@ -148,7 +139,7 @@ def _run_impl(self, move_condition, fit_condition, steps_progress_bar, if last: self.reset(initial_states) - sample, step_info = self._step(render) + sample, step_info = self._step(render, record) self.callback_step([sample]) @@ -182,12 +173,15 @@ def _run_impl(self, move_condition, fit_condition, steps_progress_bar, self.agent.stop() self.mdp.stop() + if record: + self._record.stop() + steps_progress_bar.close() episodes_progress_bar.close() return dataset, dataset_info - def _step(self, render): + def _step(self, render, record): """ Single step. @@ -195,9 +189,8 @@ def _step(self, render): render (bool): whether to render or not. Returns: - A tuple containing the previous state, the action sampled by the - agent, the reward obtained, the reached state, the absorbing flag - of the reached state and the last step flag. + A tuple containing the previous state, the action sampled by the agent, the reward obtained, the reached + state, the absorbing flag of the reached state and the last step flag. """ action = self.agent.draw_action(self._state) @@ -206,7 +199,8 @@ def _step(self, render): self._episode_steps += 1 if render: - self.mdp.render() + frame = self.mdp.render(record) + self._record(frame) last = not( self._episode_steps < self.mdp.info.horizon and not absorbing) @@ -222,8 +216,7 @@ def reset(self, initial_states=None): Reset the state of the agent. """ - if initial_states is None\ - or self._total_episodes_counter == self._n_episodes: + if initial_states is None or self._total_episodes_counter == self._n_episodes: initial_state = None else: initial_state = initial_states[self._total_episodes_counter] @@ -249,3 +242,22 @@ def _preprocess(self, state): state = p(state) return state + + @staticmethod + def _build_recorder_class(recorder_class=None, **kwargs): + """ + Method to create a video recorder class. + + Args: + recorder_class (class): the class used to record the video. By default, we use the ``VideoRecorder`` class + from mushroom. The class must implement the ``__call__`` and ``stop`` methods. + + Returns: + The recorder object. + + """ + + if not recorder_class: + recorder_class = VideoRecorder + + return recorder_class(**kwargs) diff --git a/mushroom_rl/core/environment.py b/mushroom_rl/core/environment.py index 12e497f84..5da1c01ca 100644 --- a/mushroom_rl/core/environment.py +++ b/mushroom_rl/core/environment.py @@ -36,8 +36,7 @@ def __init__(self, observation_space, action_space, gamma, horizon): def size(self): """ Returns: - The sum of the number of discrete states and discrete actions. Only - works for discrete spaces. + The sum of the number of discrete states and discrete actions. Only works for discrete spaces. """ return self.observation_space.size + self.action_space.size @@ -46,8 +45,7 @@ def size(self): def shape(self): """ Returns: - The concatenation of the shape tuple of the state and action - spaces. + The concatenation of the shape tuple of the state and action spaces. """ return self.observation_space.shape + self.action_space.shape @@ -86,10 +84,9 @@ def make(env_name, *args, **kwargs): """ Generate an environment given an environment name and parameters. The environment is created using the generate method, if available. Otherwise, the constructor is used. - The generate method has a simpler interface than the constructor, making it easier to generate - a standard version of the environment. If the environment name contains a '.' separator, the string - is splitted, the first element is used to select the environment and the other elements are passed as - positional parameters. + The generate method has a simpler interface than the constructor, making it easier to generate a standard + version of the environment. If the environment name contains a '.' separator, the string is splitted, the first + element is used to select the environment and the other elements are passed as positional parameters. Args: env_name (str): Name of the environment, @@ -118,8 +115,7 @@ def __init__(self, mdp_info): Constructor. Args: - mdp_info (MDPInfo): an object containing the info of the - environment. + mdp_info (MDPInfo): an object containing the info of the environment. """ self._mdp_info = mdp_info @@ -135,8 +131,7 @@ def seed(self, seed): if hasattr(self, 'env') and hasattr(self.env, 'seed'): self.env.seed(seed) else: - warnings.warn('This environment has no custom seed. ' - 'The call will have no effect. ' + warnings.warn('This environment has no custom seed. The call will have no effect. ' 'You can set the seed manually by setting numpy/torch seed') def reset(self, state=None): @@ -160,21 +155,28 @@ def step(self, action): action (np.ndarray): the action to execute. Returns: - The state reached by the agent executing ``action`` in its current - state, the reward obtained in the transition and a flag to signal - if the next state is absorbing. Also, an additional dictionary is + The state reached by the agent executing ``action`` in its current state, the reward obtained in the + transition and a flag to signal if the next state is absorbing. Also, an additional dictionary is returned (possibly empty). """ raise NotImplementedError - def render(self): + def render(self, record): + """ + Args: + record (bool): whether the visualized image should be returned or not. + + Returns: + The visualized image, or None if the record flag is set to false. + + """ raise NotImplementedError def stop(self): """ - Method used to stop an mdp. Useful when dealing with real world - environments, simulators, or when using openai-gym rendering + Method used to stop an mdp. Useful when dealing with real world environments, simulators, or when using + openai-gym rendering """ pass diff --git a/mushroom_rl/environments/atari.py b/mushroom_rl/environments/atari.py index 659e2d387..1153bc959 100644 --- a/mushroom_rl/environments/atari.py +++ b/mushroom_rl/environments/atari.py @@ -136,8 +136,13 @@ def step(self, action): return LazyFrames(list(self._state), self._history_length), reward, absorbing, info - def render(self, mode='human'): - self.env.render(mode=mode) + def render(self, record): + self.env.render(mode='human') + + if record: + return self.env.render(mode='rgb_array') + else: + return None def stop(self): self.env.close() diff --git a/mushroom_rl/environments/car_on_hill.py b/mushroom_rl/environments/car_on_hill.py index bf822d92a..5ccaad557 100644 --- a/mushroom_rl/environments/car_on_hill.py +++ b/mushroom_rl/environments/car_on_hill.py @@ -69,7 +69,7 @@ def step(self, action): return self._state, reward, absorbing, {} - def render(self): + def render(self, record): # Slope self._viewer.function(0, 1, self._height) @@ -93,6 +93,8 @@ def render(self): self._viewer.display(self._dt) + return None + @staticmethod def _angle(x): if x < 0.5: diff --git a/mushroom_rl/environments/cart_pole.py b/mushroom_rl/environments/cart_pole.py index 467e251b3..c899b5508 100644 --- a/mushroom_rl/environments/cart_pole.py +++ b/mushroom_rl/environments/cart_pole.py @@ -91,7 +91,7 @@ def step(self, action): return self._state, reward, absorbing, {} - def render(self, mode='human'): + def render(self, record): start = 1.25 * self._l * np.ones(2) end = 1.25 * self._l * np.ones(2) @@ -109,6 +109,8 @@ def render(self, mode='human'): self._viewer.display(self._dt) + return None + def stop(self): self._viewer.close() diff --git a/mushroom_rl/environments/dm_control_env.py b/mushroom_rl/environments/dm_control_env.py index 4fc510b99..61912b877 100644 --- a/mushroom_rl/environments/dm_control_env.py +++ b/mushroom_rl/environments/dm_control_env.py @@ -88,12 +88,17 @@ def step(self, action): return self._state, reward, absorbing, {} - def render(self): + def render(self, record): img = self.env.physics.render(self._viewer.size[1], self._viewer.size[0], self._camera_id) self._viewer.display(img) + if record: + return img + else: + return None + def stop(self): self._viewer.close() diff --git a/mushroom_rl/environments/grid_world.py b/mushroom_rl/environments/grid_world.py index 01cfb45b0..20754be74 100644 --- a/mushroom_rl/environments/grid_world.py +++ b/mushroom_rl/environments/grid_world.py @@ -23,8 +23,7 @@ def __init__(self, mdp_info, height, width, start, goal): """ assert not np.array_equal(start, goal) - assert goal[0] < height and goal[1] < width,\ - 'Goal position not suitable for the grid world dimension.' + assert goal[0] < height and goal[1] < width, 'Goal position not suitable for the grid world dimension.' self._state = None self._height = height @@ -54,7 +53,7 @@ def step(self, action): return self._state, reward, absorbing, info - def render(self): + def render(self, record): for row in range(1, self._height): for col in range(1, self._width): self._viewer.line(np.array([col, 0]), @@ -78,6 +77,8 @@ def render(self): self._viewer.display(.1) + return None + def _step(self, state, action): raise NotImplementedError('AbstractGridWorld is an abstract class.') diff --git a/mushroom_rl/environments/gym_env.py b/mushroom_rl/environments/gym_env.py index db2e9bd90..a7018fb03 100644 --- a/mushroom_rl/environments/gym_env.py +++ b/mushroom_rl/environments/gym_env.py @@ -97,12 +97,20 @@ def step(self, action): return np.atleast_1d(obs), reward, absorbing, info - def render(self, mode='human'): + def render(self, record): if self._first or self._not_pybullet: - self.env.render(mode=mode) + self.env.render(mode='human') + self._first = False time.sleep(self._render_dt) + if record: + self.env.render(mode='rgb_array') + else: + return None + + return None + def stop(self): try: if self._not_pybullet: diff --git a/mushroom_rl/environments/habitat_env.py b/mushroom_rl/environments/habitat_env.py index 48b48006d..b27758910 100644 --- a/mushroom_rl/environments/habitat_env.py +++ b/mushroom_rl/environments/habitat_env.py @@ -260,16 +260,18 @@ def step(self, action): def stop(self): self._viewer.close() - def render(self, mode='rgb_array'): - if mode == "rgb_array": - frame = observations_to_image( - self.env._last_full_obs, self.env.unwrapped._env.get_metrics() - ) - else: - raise ValueError(f"Render mode {mode} not currently supported.") + def render(self, record): + frame = observations_to_image( + self.env._last_full_obs, self.env.unwrapped._env.get_metrics() + ) self._viewer.display(frame) + if record: + return frame + else: + return None + @staticmethod def _convert_observation(observation): return observation.transpose((2, 0, 1)) diff --git a/mushroom_rl/environments/igibson_env.py b/mushroom_rl/environments/igibson_env.py index fb681afee..a2b20c049 100644 --- a/mushroom_rl/environments/igibson_env.py +++ b/mushroom_rl/environments/igibson_env.py @@ -126,9 +126,14 @@ def close(self): def stop(self): self._viewer.close() - def render(self, mode='human'): + def render(self, record): self._viewer.display(self._image) + if record: + return self._image + else: + return None + @staticmethod def _convert_observation(observation): return observation.transpose((2, 0, 1)) diff --git a/mushroom_rl/environments/inverted_pendulum.py b/mushroom_rl/environments/inverted_pendulum.py index a80023da8..c2cb300f8 100644 --- a/mushroom_rl/environments/inverted_pendulum.py +++ b/mushroom_rl/environments/inverted_pendulum.py @@ -85,7 +85,7 @@ def step(self, action): return self._state, reward, False, {} - def render(self, mode='human'): + def render(self, record): start = 1.25 * self._l * np.ones(2) end = 1.25 * self._l * np.ones(2) @@ -99,6 +99,8 @@ def render(self, mode='human'): self._viewer.display(self._dt) + return None + def stop(self): self._viewer.close() diff --git a/mushroom_rl/environments/minigrid_env.py b/mushroom_rl/environments/minigrid_env.py index 0c0a55858..36724ae9c 100644 --- a/mushroom_rl/environments/minigrid_env.py +++ b/mushroom_rl/environments/minigrid_env.py @@ -15,6 +15,7 @@ from mushroom_rl.utils.spaces import Discrete, Box from mushroom_rl.utils.frames import LazyFrames, preprocess_frame + class MiniGrid(Gym): """ Interface for gym_minigrid environments. It makes it possible to diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 2e3b28072..1aac9747f 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -166,12 +166,14 @@ def step(self, action): return self._modify_observation(cur_obs), reward, absorbing, info - def render(self): + def render(self, record): if self._viewer is None: self._viewer = MujocoGlfwViewer(self._model, self.dt, **self._viewer_params) self._viewer.render(self._data) + return None + def stop(self): if self._viewer is not None: self._viewer.stop() diff --git a/mushroom_rl/environments/puddle_world.py b/mushroom_rl/environments/puddle_world.py index bc9738087..5a86c44bd 100644 --- a/mushroom_rl/environments/puddle_world.py +++ b/mushroom_rl/environments/puddle_world.py @@ -86,7 +86,7 @@ def step(self, action): return self._state, reward, absorbing, {} - def render(self): + def render(self, record): if self._pixels is None: img_size = 100 pixels = np.zeros((img_size, img_size, 3)) @@ -116,6 +116,8 @@ def render(self): self._viewer.display(0.1) + return None + def stop(self): if self._viewer is not None: self._viewer.close() diff --git a/mushroom_rl/environments/pybullet.py b/mushroom_rl/environments/pybullet.py index ff4ad3bae..f3cf6cb96 100644 --- a/mushroom_rl/environments/pybullet.py +++ b/mushroom_rl/environments/pybullet.py @@ -95,8 +95,13 @@ def reset(self, state=None): return observation - def render(self): - self._viewer.display() + def render(self, record): + frame = self._viewer.display() + + if record: + return frame + else: + return None def stop(self): self._viewer.close() diff --git a/mushroom_rl/environments/segway.py b/mushroom_rl/environments/segway.py index 24bb64116..a0a8b2b10 100644 --- a/mushroom_rl/environments/segway.py +++ b/mushroom_rl/environments/segway.py @@ -116,7 +116,7 @@ def _dynamics(self, state, t, u): return dx - def render(self, mode='human'): + def render(self, record): start = 2.5 * self._l * np.ones(2) end = 2.5 * self._l * np.ones(2) @@ -132,8 +132,7 @@ def render(self, mode='human'): end[0] += -2 * self._l * np.sin(self._state[0]) + self._last_x end[1] += 2 * self._l * np.cos(self._state[0]) - if (start[0] > 5 * self._l and end[0] > 5 * self._l) \ - or (start[0] < 0 and end[0] < 0): + if (start[0] > 5 * self._l and end[0] > 5 * self._l) or (start[0] < 0 and end[0] < 0): start[0] = start[0] % 5 * self._l end[0] = end[0] % 5 * self._l @@ -142,5 +141,7 @@ def render(self, mode='human'): self._viewer.display(self._dt) + return None + diff --git a/mushroom_rl/environments/ship_steering.py b/mushroom_rl/environments/ship_steering.py index ecc4a77a9..05d5bc6e8 100644 --- a/mushroom_rl/environments/ship_steering.py +++ b/mushroom_rl/environments/ship_steering.py @@ -9,8 +9,7 @@ class ShipSteering(Environment): """ The Ship Steering environment as presented in: - "Hierarchical Policy Gradient Algorithms". Ghavamzadeh M. and Mahadevan S.. - 2013. + "Hierarchical Policy Gradient Algorithms". Ghavamzadeh M. and Mahadevan S.. 2013. """ def __init__(self, small=True, n_steps_action=3): @@ -107,7 +106,7 @@ def step(self, action): return self._state, reward, absorbing, {} - def render(self, mode='human'): + def render(self, record): self._viewer.line(self._gate_s, self._gate_e, width=3) @@ -123,6 +122,8 @@ def render(self, mode='human'): self._viewer.display(self._dt) + return None + def stop(self): self._viewer.close() diff --git a/mushroom_rl/utils/pybullet/viewer.py b/mushroom_rl/utils/pybullet/viewer.py index d66ac72f7..ffaa9755c 100644 --- a/mushroom_rl/utils/pybullet/viewer.py +++ b/mushroom_rl/utils/pybullet/viewer.py @@ -21,6 +21,8 @@ def display(self): img = self._get_image() super().display(img) + return img + def _get_image(self): view_matrix = self._client.computeViewMatrixFromYawPitchRoll(cameraTargetPosition=self._origin, distance=self._distance, diff --git a/mushroom_rl/utils/record.py b/mushroom_rl/utils/record.py new file mode 100644 index 000000000..4830d410e --- /dev/null +++ b/mushroom_rl/utils/record.py @@ -0,0 +1,9 @@ +class VideoRecorder(object): + def __init__(self, **kwargs): + pass + + def __call__(self, frame): + pass + + def stop(self): + pass \ No newline at end of file From 7d0718a3e834b9e4e613185a1fbe929ea5260e8e Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Thu, 22 Jun 2023 15:58:21 +0200 Subject: [PATCH 02/50] Added assertion in core - ensure render is true when record is true - updated doc --- mushroom_rl/core/core.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mushroom_rl/core/core.py b/mushroom_rl/core/core.py index 44c50ddc8..1b7043df9 100644 --- a/mushroom_rl/core/core.py +++ b/mushroom_rl/core/core.py @@ -57,12 +57,15 @@ def learn(self, n_steps=None, n_episodes=None, n_steps_per_fit=None, of the policy; render (bool, False): whether to render the environment or not; quiet (bool, False): whether to show the progress bar or not; - record (bool, False): whether to record a video of the environment or not. + record (bool, False): whether to record a video of the environment or not. If True, also the render flag + should be set to True. """ assert (n_episodes_per_fit is not None and n_steps_per_fit is None)\ or (n_episodes_per_fit is None and n_steps_per_fit is not None) + assert (render and record) or (not record), "To record, the render flag must be set to true" + self._n_steps_per_fit = n_steps_per_fit self._n_episodes_per_fit = n_episodes_per_fit @@ -86,7 +89,8 @@ def evaluate(self, initial_states=None, n_steps=None, n_episodes=None, n_episodes (int, None): number of episodes to move the agent; render (bool, False): whether to render the environment or not; quiet (bool, False): whether to show the progress bar or not; - record (bool, False): whether to record a video of the environment or not; + record (bool, False): whether to record a video of the environment or not. If True, also the render flag + should be set to True; get_env_info (bool, False): whether to return the environment info list or not. Returns: @@ -94,6 +98,8 @@ def evaluate(self, initial_states=None, n_steps=None, n_episodes=None, environment info, collected at each step. """ + assert (render and record) or (not record), "To record, the render flag must be set to true" + fit_condition = lambda: False return self._run(n_steps, n_episodes, fit_condition, render, quiet, record, get_env_info, initial_states) From 45c59e3cfaa22544ebfdb9c61e5863877e3a10af Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Thu, 22 Jun 2023 16:28:35 +0200 Subject: [PATCH 03/50] Work on recording and visualization - added stop function to all environments where it was missing - fixed bug in recording core, now we only record when the record flag is set - Improved pybullet viewer to get the current frame - now all pybullet environment can record the data --- mushroom_rl/core/core.py | 4 +++- mushroom_rl/environments/car_on_hill.py | 7 ++++++- mushroom_rl/environments/cart_pole.py | 4 +++- mushroom_rl/environments/grid_world.py | 7 ++++++- mushroom_rl/environments/inverted_pendulum.py | 4 +++- mushroom_rl/environments/puddle_world.py | 4 +++- mushroom_rl/environments/segway.py | 7 ++++++- mushroom_rl/environments/ship_steering.py | 4 +++- mushroom_rl/utils/viewer.py | 15 +++++++++++++++ 9 files changed, 48 insertions(+), 8 deletions(-) diff --git a/mushroom_rl/core/core.py b/mushroom_rl/core/core.py index 1b7043df9..67573c796 100644 --- a/mushroom_rl/core/core.py +++ b/mushroom_rl/core/core.py @@ -206,7 +206,9 @@ def _step(self, render, record): if render: frame = self.mdp.render(record) - self._record(frame) + + if record: + self._record(frame) last = not( self._episode_steps < self.mdp.info.horizon and not absorbing) diff --git a/mushroom_rl/environments/car_on_hill.py b/mushroom_rl/environments/car_on_hill.py index 5ccaad557..1f8c7d0c9 100644 --- a/mushroom_rl/environments/car_on_hill.py +++ b/mushroom_rl/environments/car_on_hill.py @@ -91,9 +91,14 @@ def render(self, record): angle = self._angle(x_car) self._viewer.polygon(c_car, angle, car_body, color=(32, 193, 54)) + frame = self._viewer.get_frame() if record else None + self._viewer.display(self._dt) - return None + return frame + + def stop(self): + self._viewer.close() @staticmethod def _angle(x): diff --git a/mushroom_rl/environments/cart_pole.py b/mushroom_rl/environments/cart_pole.py index c899b5508..324e3f9da 100644 --- a/mushroom_rl/environments/cart_pole.py +++ b/mushroom_rl/environments/cart_pole.py @@ -107,9 +107,11 @@ def render(self, record): self._viewer.force_arrow(start, direction, value, self._max_u, self._l / 5) + frame = self._viewer.get_frame() if record else None + self._viewer.display(self._dt) - return None + return frame def stop(self): self._viewer.close() diff --git a/mushroom_rl/environments/grid_world.py b/mushroom_rl/environments/grid_world.py index 20754be74..1fce454bd 100644 --- a/mushroom_rl/environments/grid_world.py +++ b/mushroom_rl/environments/grid_world.py @@ -75,9 +75,14 @@ def render(self, record): self._height - (.5 + state_grid[0])]) self._viewer.circle(state_center, .4, (0, 0, 255)) + frame = self._viewer.get_frame() if record else None + self._viewer.display(.1) - return None + return frame + + def stop(self): + self._viewer.close() def _step(self, state, action): raise NotImplementedError('AbstractGridWorld is an abstract class.') diff --git a/mushroom_rl/environments/inverted_pendulum.py b/mushroom_rl/environments/inverted_pendulum.py index c2cb300f8..c95b9faca 100644 --- a/mushroom_rl/environments/inverted_pendulum.py +++ b/mushroom_rl/environments/inverted_pendulum.py @@ -97,9 +97,11 @@ def render(self, record): self._viewer.circle(end, self._l / 20) self._viewer.torque_arrow(start, -self._last_u, self._max_u, self._l / 5) + frame = self._viewer.get_frame() if record else None + self._viewer.display(self._dt) - return None + return frame def stop(self): self._viewer.close() diff --git a/mushroom_rl/environments/puddle_world.py b/mushroom_rl/environments/puddle_world.py index 5a86c44bd..ed1ef76dc 100644 --- a/mushroom_rl/environments/puddle_world.py +++ b/mushroom_rl/environments/puddle_world.py @@ -114,9 +114,11 @@ def render(self, record): self._viewer.polygon(self._goal, 0, goal_area, color=(255, 0, 0), width=1) + frame = self._viewer.get_frame() if record else None + self._viewer.display(0.1) - return None + return frame def stop(self): if self._viewer is not None: diff --git a/mushroom_rl/environments/segway.py b/mushroom_rl/environments/segway.py index a0a8b2b10..121b2c083 100644 --- a/mushroom_rl/environments/segway.py +++ b/mushroom_rl/environments/segway.py @@ -139,9 +139,14 @@ def render(self, record): self._viewer.line(start, end) self._viewer.circle(start, self._r) + frame = self._viewer.get_frame() if record else None + self._viewer.display(self._dt) - return None + return frame + + def stop(self): + self._viewer.close() diff --git a/mushroom_rl/environments/ship_steering.py b/mushroom_rl/environments/ship_steering.py index 05d5bc6e8..2f7dd9b36 100644 --- a/mushroom_rl/environments/ship_steering.py +++ b/mushroom_rl/environments/ship_steering.py @@ -120,9 +120,11 @@ def render(self, record): self._viewer.polygon(self._state[:2], self._state[2], boat, color=(32, 193, 54)) + frame = self._viewer.get_frame() if record else None + self._viewer.display(self._dt) - return None + return frame def stop(self): self._viewer.close() diff --git a/mushroom_rl/utils/viewer.py b/mushroom_rl/utils/viewer.py index c5c231456..d29c02659 100644 --- a/mushroom_rl/utils/viewer.py +++ b/mushroom_rl/utils/viewer.py @@ -314,6 +314,21 @@ def function(self, x_s, x_e, f, n_points=100, width=1, color=(255, 255, 255)): points = [self._transform([a, b]) for a, b in zip(x,y)] pygame.draw.lines(self.screen, color, False, points, width) + @staticmethod + def get_frame(): + """ + Getter. + + Returns: + The current Pygame surface as an RGB array. + + """ + surf = pygame.display.get_surface() + pygame_frame = pygame.surfarray.array3d(surf) + frame = pygame_frame.swapaxes(0, 1) + + return frame + def display(self, s): """ Display current frame and initialize the next frame to the background From f8da8fdb16e72be51dc57cd57fc5652ce0cc2b0b Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Thu, 22 Jun 2023 16:35:56 +0200 Subject: [PATCH 04/50] Updated tutorials to include the recording --- docs/source/tutorials/code/room_env.py | 7 ++++++- docs/source/tutorials/tutorials.5_environments.rst | 10 +++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/source/tutorials/code/room_env.py b/docs/source/tutorials/code/room_env.py index cfc85a86b..6ae2e335f 100644 --- a/docs/source/tutorials/code/room_env.py +++ b/docs/source/tutorials/code/room_env.py @@ -86,16 +86,21 @@ def step(self, action): # Return all the information + empty dictionary (used to pass additional information) return self._state, reward, absorbing, {} - def render(self): + def render(self, record): # Draw a red circle for the agent self._viewer.circle(self._state, 0.1, color=(255, 0, 0)) # Draw a green circle for the goal self._viewer.circle(self._goal, self._goal_radius, color=(0, 255, 0)) + # Get the image if the record flag is set to true + frame = self._viewer.get_frame() if record else None + # Display the image for 0.1 seconds self._viewer.display(0.1) + return frame + # Register the class RoomToyEnv.register() diff --git a/docs/source/tutorials/tutorials.5_environments.rst b/docs/source/tutorials/tutorials.5_environments.rst index ca50e0a8a..754b3f04a 100644 --- a/docs/source/tutorials/tutorials.5_environments.rst +++ b/docs/source/tutorials/tutorials.5_environments.rst @@ -159,14 +159,14 @@ visualization tool for 2D Reinforcement Learning algorithms. The viewer class ha simply draw two circles representing the agent and the goal area: .. literalinclude:: code/room_env.py - :lines: 89-97 + :lines: 89-102 For more information about the viewer, refer to the class documentation. To conclude our environment, it's also possible to register it as specified in the previous section of this tutorial: .. literalinclude:: code/room_env.py - :lines: 100-101 + :lines: 105-106 Learning in the toy environment @@ -179,17 +179,17 @@ We first import all necessary classes and utilities, then we construct the envir reproducibility). .. literalinclude:: code/room_env.py - :lines: 103-116 + :lines: 108-121 We now proceed then to create the agent policy, which is a linear policy using tiles features, similar to the one used by the Mountain Car experiment from R. Sutton book. .. literalinclude:: code/room_env.py - :lines: 118-139 + :lines: 123-144 Finally, using the ``Core`` class we set up an RL experiment. We first evaluate the initial policy for three episodes on the environment. Then we learn the task using the algorithm build above for 20000 steps. In the end, we evaluate the learned policy for 3 more episodes. .. literalinclude:: code/room_env.py - :lines: 141- + :lines: 146- From bd456035c4104ec97166cb4754bc34fe818b8710 Mon Sep 17 00:00:00 2001 From: robfiras Date: Thu, 22 Jun 2023 17:04:06 +0200 Subject: [PATCH 05/50] Added video recorder, and adapted MuJoCo environment and viewer to support it. --- mushroom_rl/environments/mujoco.py | 6 +-- mushroom_rl/utils/mujoco/viewer.py | 39 +++++++++++++----- mushroom_rl/utils/record.py | 65 ++++++++++++++++++++++++++++-- 3 files changed, 92 insertions(+), 18 deletions(-) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 1aac9747f..01c46691e 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -168,11 +168,9 @@ def step(self, action): def render(self, record): if self._viewer is None: - self._viewer = MujocoGlfwViewer(self._model, self.dt, **self._viewer_params) + self._viewer = MujocoGlfwViewer(self._model, self.dt, record=record, **self._viewer_params) - self._viewer.render(self._data) - - return None + return self._viewer.render(self._data, record) def stop(self): if self._viewer is not None: diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index ecef6f8b8..337a7daa9 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -13,7 +13,9 @@ class MujocoGlfwViewer: c: Turn contact force and constraint visualisation on / off t: Make models transparent """ - def __init__(self, model, dt, width=1920, height=1080, start_paused=False, custom_render_callback=None): + def __init__(self, model, dt, width=1920, height=1080, start_paused=False, + custom_render_callback=None, record=False): + self.button_left = False self.button_right = False self.button_middle = False @@ -26,6 +28,10 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, custo glfw.init() glfw.window_hint(glfw.COCOA_RETINA_FRAMEBUFFER, 0) + if record: + # dont allow to change the window size to have equal frame size during recording + glfw.window_hint(glfw.RESIZABLE, False) + self._loop_count = 0 self._time_per_render = 1 / 60. self._paused = start_paused @@ -52,8 +58,6 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, custo self._viewport = mujoco.MjrRect(0, 0, width, height) self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(100)) - self.rgb_buffer = np.empty((width, height, 3), dtype=np.uint8) - self.custom_render_callback = custom_render_callback def mouse_button(self, window, button, act, mods): @@ -106,7 +110,7 @@ def keyboard(self, window, key, scancode, act, mods): def scroll(self, window, x_offset, y_offset): mujoco.mjv_moveCamera(self._model, mujoco.mjtMouse.mjMOUSE_ZOOM, 0, 0.05 * y_offset, self._scene, self._camera) - def render(self, data): + def render(self, data, record): def render_inner_loop(self): render_start = time.time() @@ -132,21 +136,36 @@ def render_inner_loop(self): exit(0) self._time_per_render = 0.9 * self._time_per_render + 0.1 * (time.time() - render_start) - """ - if return_img: - mujoco.mjr_readPixels(self.rgb_buffer, None, self._viewport, self._context) - return self.rgb_buffer - """ if self._paused: while self._paused: render_inner_loop(self) - self._loop_count += self.dt / self._time_per_render + if record: + self._loop_count = 1 + else: + self._loop_count += self.dt / self._time_per_render while self._loop_count > 0: render_inner_loop(self) self._loop_count -= 1 + if record: + return self.read_pixels() + + def read_pixels(self, depth=False): + + shape = glfw.get_framebuffer_size(self._window) + + if depth: + rgb_img = np.zeros((shape[1], shape[0], 3), dtype=np.uint8) + depth_img = np.zeros((shape[1], shape[0], 1), dtype=np.float32) + mujoco.mjr_readPixels(rgb_img, depth_img, self._viewport, self._context) + return (np.flipud(rgb_img), np.flipud(depth_img)) + else: + img = np.zeros((shape[1], shape[0], 3), dtype=np.uint8) + mujoco.mjr_readPixels(img, None, self._viewport, self._context) + return np.flipud(img) + def stop(self): glfw.destroy_window(self._window) diff --git a/mushroom_rl/utils/record.py b/mushroom_rl/utils/record.py index 4830d410e..fb411e732 100644 --- a/mushroom_rl/utils/record.py +++ b/mushroom_rl/utils/record.py @@ -1,9 +1,66 @@ +import os +import cv2 +import datetime +from pathlib import Path + + class VideoRecorder(object): - def __init__(self, **kwargs): - pass + """ + Simple video record that creates a video from a stream of images. + """ + + def __init__(self, path="./mushroom_rl_recordings", tag=None, video_name=None, fps=60): + """ + Constructor. + + Args: + path: Path at which videos will be stored. + tag: Name of the directory at path in which the video will be stored. If None, a timestamp will be created. + fps: Frame rate of the video. + """ + + if not os.path.isdir(path): + os.mkdir(path) + + if tag is None: + date_time = datetime.datetime.now() + tag = date_time.strftime("%d-%m-%Y_%H-%M-%S") + + path = path + "/" + tag + + if not os.path.isdir(path): + os.mkdir(path) + + if video_name: + suffix = Path(video_name).suffix + if suffix == "": + video_name += ".mp4" + elif suffix != ".mp4": + raise ValueError("Provided video name has unsupported suffix \"%s\"! " + "Please use \".mp4\" or don't provide suffix." % suffix) + + path += "/" + video_name + else: + path += "/" + "recording.mp4" + + self._path = path + + self._fps = fps + + self._video_writer = None def __call__(self, frame): - pass + """ + Args: + frame (np.ndarray): Frame to be added to the video (H, W, RGB) + """ + if self._video_writer is None: + height, width = frame.shape[:2] + self._video_writer = cv2.VideoWriter(self._path, cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), + self._fps, (width, height)) + + self._video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) def stop(self): - pass \ No newline at end of file + cv2.destroyAllWindows() + self._video_writer.release() \ No newline at end of file From bb0f7c9d533f4d8092de5bed410145c581bd2d70 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Thu, 22 Jun 2023 17:05:45 +0200 Subject: [PATCH 06/50] Fixed environment interface - render has default parameter set to false - updated interface in all environments and in the tutorial - fixed tests --- docs/source/tutorials/code/room_env.py | 2 +- mushroom_rl/core/environment.py | 4 ++-- mushroom_rl/environments/atari.py | 2 +- mushroom_rl/environments/car_on_hill.py | 2 +- mushroom_rl/environments/cart_pole.py | 2 +- mushroom_rl/environments/dm_control_env.py | 2 +- mushroom_rl/environments/grid_world.py | 2 +- mushroom_rl/environments/gym_env.py | 2 +- mushroom_rl/environments/habitat_env.py | 2 +- mushroom_rl/environments/igibson_env.py | 2 +- mushroom_rl/environments/inverted_pendulum.py | 2 +- mushroom_rl/environments/mujoco.py | 2 +- mushroom_rl/environments/puddle_world.py | 2 +- mushroom_rl/environments/pybullet.py | 2 +- mushroom_rl/environments/segway.py | 2 +- mushroom_rl/environments/ship_steering.py | 2 +- 16 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/source/tutorials/code/room_env.py b/docs/source/tutorials/code/room_env.py index 6ae2e335f..7ea5edf0c 100644 --- a/docs/source/tutorials/code/room_env.py +++ b/docs/source/tutorials/code/room_env.py @@ -86,7 +86,7 @@ def step(self, action): # Return all the information + empty dictionary (used to pass additional information) return self._state, reward, absorbing, {} - def render(self, record): + def render(self, record=False): # Draw a red circle for the agent self._viewer.circle(self._state, 0.1, color=(255, 0, 0)) diff --git a/mushroom_rl/core/environment.py b/mushroom_rl/core/environment.py index 5da1c01ca..0703dac25 100644 --- a/mushroom_rl/core/environment.py +++ b/mushroom_rl/core/environment.py @@ -162,10 +162,10 @@ def step(self, action): """ raise NotImplementedError - def render(self, record): + def render(self, record=False): """ Args: - record (bool): whether the visualized image should be returned or not. + record (bool, False): whether the visualized image should be returned or not. Returns: The visualized image, or None if the record flag is set to false. diff --git a/mushroom_rl/environments/atari.py b/mushroom_rl/environments/atari.py index 1153bc959..5abc56118 100644 --- a/mushroom_rl/environments/atari.py +++ b/mushroom_rl/environments/atari.py @@ -136,7 +136,7 @@ def step(self, action): return LazyFrames(list(self._state), self._history_length), reward, absorbing, info - def render(self, record): + def render(self, record=False): self.env.render(mode='human') if record: diff --git a/mushroom_rl/environments/car_on_hill.py b/mushroom_rl/environments/car_on_hill.py index 1f8c7d0c9..6af526e68 100644 --- a/mushroom_rl/environments/car_on_hill.py +++ b/mushroom_rl/environments/car_on_hill.py @@ -69,7 +69,7 @@ def step(self, action): return self._state, reward, absorbing, {} - def render(self, record): + def render(self, record=False): # Slope self._viewer.function(0, 1, self._height) diff --git a/mushroom_rl/environments/cart_pole.py b/mushroom_rl/environments/cart_pole.py index 324e3f9da..1ba0a03c5 100644 --- a/mushroom_rl/environments/cart_pole.py +++ b/mushroom_rl/environments/cart_pole.py @@ -91,7 +91,7 @@ def step(self, action): return self._state, reward, absorbing, {} - def render(self, record): + def render(self, record=False): start = 1.25 * self._l * np.ones(2) end = 1.25 * self._l * np.ones(2) diff --git a/mushroom_rl/environments/dm_control_env.py b/mushroom_rl/environments/dm_control_env.py index 61912b877..795e0c349 100644 --- a/mushroom_rl/environments/dm_control_env.py +++ b/mushroom_rl/environments/dm_control_env.py @@ -88,7 +88,7 @@ def step(self, action): return self._state, reward, absorbing, {} - def render(self, record): + def render(self, record=False): img = self.env.physics.render(self._viewer.size[1], self._viewer.size[0], self._camera_id) diff --git a/mushroom_rl/environments/grid_world.py b/mushroom_rl/environments/grid_world.py index 1fce454bd..9e27b1890 100644 --- a/mushroom_rl/environments/grid_world.py +++ b/mushroom_rl/environments/grid_world.py @@ -53,7 +53,7 @@ def step(self, action): return self._state, reward, absorbing, info - def render(self, record): + def render(self, record=False): for row in range(1, self._height): for col in range(1, self._width): self._viewer.line(np.array([col, 0]), diff --git a/mushroom_rl/environments/gym_env.py b/mushroom_rl/environments/gym_env.py index a7018fb03..18b5c969e 100644 --- a/mushroom_rl/environments/gym_env.py +++ b/mushroom_rl/environments/gym_env.py @@ -97,7 +97,7 @@ def step(self, action): return np.atleast_1d(obs), reward, absorbing, info - def render(self, record): + def render(self, record=False): if self._first or self._not_pybullet: self.env.render(mode='human') diff --git a/mushroom_rl/environments/habitat_env.py b/mushroom_rl/environments/habitat_env.py index b27758910..13050689f 100644 --- a/mushroom_rl/environments/habitat_env.py +++ b/mushroom_rl/environments/habitat_env.py @@ -260,7 +260,7 @@ def step(self, action): def stop(self): self._viewer.close() - def render(self, record): + def render(self, record=False): frame = observations_to_image( self.env._last_full_obs, self.env.unwrapped._env.get_metrics() ) diff --git a/mushroom_rl/environments/igibson_env.py b/mushroom_rl/environments/igibson_env.py index a2b20c049..050fb506c 100644 --- a/mushroom_rl/environments/igibson_env.py +++ b/mushroom_rl/environments/igibson_env.py @@ -126,7 +126,7 @@ def close(self): def stop(self): self._viewer.close() - def render(self, record): + def render(self, record=False): self._viewer.display(self._image) if record: diff --git a/mushroom_rl/environments/inverted_pendulum.py b/mushroom_rl/environments/inverted_pendulum.py index c95b9faca..8d6d56813 100644 --- a/mushroom_rl/environments/inverted_pendulum.py +++ b/mushroom_rl/environments/inverted_pendulum.py @@ -85,7 +85,7 @@ def step(self, action): return self._state, reward, False, {} - def render(self, record): + def render(self, record=False): start = 1.25 * self._l * np.ones(2) end = 1.25 * self._l * np.ones(2) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 1aac9747f..60f338f38 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -166,7 +166,7 @@ def step(self, action): return self._modify_observation(cur_obs), reward, absorbing, info - def render(self, record): + def render(self, record=False): if self._viewer is None: self._viewer = MujocoGlfwViewer(self._model, self.dt, **self._viewer_params) diff --git a/mushroom_rl/environments/puddle_world.py b/mushroom_rl/environments/puddle_world.py index ed1ef76dc..e74ff06ed 100644 --- a/mushroom_rl/environments/puddle_world.py +++ b/mushroom_rl/environments/puddle_world.py @@ -86,7 +86,7 @@ def step(self, action): return self._state, reward, absorbing, {} - def render(self, record): + def render(self, record=False): if self._pixels is None: img_size = 100 pixels = np.zeros((img_size, img_size, 3)) diff --git a/mushroom_rl/environments/pybullet.py b/mushroom_rl/environments/pybullet.py index f3cf6cb96..b36dc0753 100644 --- a/mushroom_rl/environments/pybullet.py +++ b/mushroom_rl/environments/pybullet.py @@ -95,7 +95,7 @@ def reset(self, state=None): return observation - def render(self, record): + def render(self, record=False): frame = self._viewer.display() if record: diff --git a/mushroom_rl/environments/segway.py b/mushroom_rl/environments/segway.py index 121b2c083..9c792f0fa 100644 --- a/mushroom_rl/environments/segway.py +++ b/mushroom_rl/environments/segway.py @@ -116,7 +116,7 @@ def _dynamics(self, state, t, u): return dx - def render(self, record): + def render(self, record=False): start = 2.5 * self._l * np.ones(2) end = 2.5 * self._l * np.ones(2) diff --git a/mushroom_rl/environments/ship_steering.py b/mushroom_rl/environments/ship_steering.py index 2f7dd9b36..f5967ee1a 100644 --- a/mushroom_rl/environments/ship_steering.py +++ b/mushroom_rl/environments/ship_steering.py @@ -106,7 +106,7 @@ def step(self, action): return self._state, reward, absorbing, {} - def render(self, record): + def render(self, record=False): self._viewer.line(self._gate_s, self._gate_e, width=3) From 5d348ec4699c2d14ef76672ab91569ad76986e91 Mon Sep 17 00:00:00 2001 From: robfiras Date: Thu, 22 Jun 2023 18:56:19 +0200 Subject: [PATCH 07/50] Bugfix in recording in gym environments. Updated the video recorder to allow saving multiple videos in a row. --- mushroom_rl/environments/gym_env.py | 2 +- mushroom_rl/utils/record.py | 48 +++++++++++++++-------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/mushroom_rl/environments/gym_env.py b/mushroom_rl/environments/gym_env.py index 18b5c969e..71dfdd9c3 100644 --- a/mushroom_rl/environments/gym_env.py +++ b/mushroom_rl/environments/gym_env.py @@ -105,7 +105,7 @@ def render(self, record=False): time.sleep(self._render_dt) if record: - self.env.render(mode='rgb_array') + return self.env.render(mode='rgb_array') else: return None diff --git a/mushroom_rl/utils/record.py b/mushroom_rl/utils/record.py index fb411e732..996704d56 100644 --- a/mushroom_rl/utils/record.py +++ b/mushroom_rl/utils/record.py @@ -16,34 +16,20 @@ def __init__(self, path="./mushroom_rl_recordings", tag=None, video_name=None, f Args: path: Path at which videos will be stored. tag: Name of the directory at path in which the video will be stored. If None, a timestamp will be created. + video_name: Name of the video without extension. Default is "recording". fps: Frame rate of the video. """ - if not os.path.isdir(path): - os.mkdir(path) - if tag is None: date_time = datetime.datetime.now() tag = date_time.strftime("%d-%m-%Y_%H-%M-%S") - path = path + "/" + tag - - if not os.path.isdir(path): - os.mkdir(path) - - if video_name: - suffix = Path(video_name).suffix - if suffix == "": - video_name += ".mp4" - elif suffix != ".mp4": - raise ValueError("Provided video name has unsupported suffix \"%s\"! " - "Please use \".mp4\" or don't provide suffix." % suffix) + self._path = Path(path) + self._path = self._path / tag + self._path.mkdir(parents=True, exist_ok=True) - path += "/" + video_name - else: - path += "/" + "recording.mp4" - - self._path = path + self._video_name = video_name if video_name else "recording" + self._counter = 0 self._fps = fps @@ -54,13 +40,29 @@ def __call__(self, frame): Args: frame (np.ndarray): Frame to be added to the video (H, W, RGB) """ + assert frame is not None + if self._video_writer is None: height, width = frame.shape[:2] - self._video_writer = cv2.VideoWriter(self._path, cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), - self._fps, (width, height)) + self._create_video_writer(height, width) self._video_writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) + def _create_video_writer(self, height, width): + + name = self._video_name + if self._counter > 0: + name += f"-{self._counter}.mp4" + else: + name += ".mp4" + + path = self._path / name + + self._video_writer = cv2.VideoWriter(str(path), cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), + self._fps, (width, height)) + def stop(self): cv2.destroyAllWindows() - self._video_writer.release() \ No newline at end of file + self._video_writer.release() + self._video_writer = None + self._counter += 1 From d53b5e6b2080e6b87cfd38978f6351a92a0048d5 Mon Sep 17 00:00:00 2001 From: robfiras Date: Thu, 22 Jun 2023 18:58:18 +0200 Subject: [PATCH 08/50] removed recording todo. --- TODO.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/TODO.txt b/TODO.txt index afc80021d..2208cca03 100644 --- a/TODO.txt +++ b/TODO.txt @@ -17,7 +17,6 @@ Approximator: * add neural network generator For Mushroom 2.0: - * Record method in environment and record option in the core * Simplify Regressor interface: drop GenericRegressor, remove facade pattern * vectorize basis functions and simplify interface, simplify facade pattern * remove custom save for plotting, use Serializable From cef388b027df201cf3d972c3f6966308e7c9e177 Mon Sep 17 00:00:00 2001 From: robfiras Date: Mon, 26 Jun 2023 16:04:20 +0200 Subject: [PATCH 09/50] Added new MultiMujoco interface that allows to pass a list of Mujoco XMLs to create N environments at a time. This class allows to randomly sample one environment at a time to perform a rollout. --- mushroom_rl/environments/__init__.py | 2 +- mushroom_rl/environments/mujoco.py | 221 ++++++++++++++++++++++++--- mushroom_rl/utils/mujoco/viewer.py | 14 +- 3 files changed, 218 insertions(+), 19 deletions(-) diff --git a/mushroom_rl/environments/__init__.py b/mushroom_rl/environments/__init__.py index 0b4c53cba..c966a67d6 100644 --- a/mushroom_rl/environments/__init__.py +++ b/mushroom_rl/environments/__init__.py @@ -43,7 +43,7 @@ try: MuJoCo = None - from .mujoco import MuJoCo + from .mujoco import MuJoCo, MultiMuJoCo from .mujoco_envs import * except ImportError: pass diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 486fe9a14..46f299f23 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -77,23 +77,9 @@ def __init__(self, file_name, actuation_spec, observation_spec, gamma, horizon, # Read the actuation spec and build the mapping between actions and ids # as well as their limits - if len(actuation_spec) == 0: - self._action_indices = [i for i in range(0, len(self._data.actuator_force))] - else: - self._action_indices = [] - for name in actuation_spec: - self._action_indices.append(self._model.actuator(name).id) + self._action_indices = self.get_action_indices(self._model, self._data, actuation_spec) - low = [] - high = [] - for index in self._action_indices: - if self._model.actuator_ctrllimited[index]: - low.append(self._model.actuator_ctrlrange[index][0]) - high.append(self._model.actuator_ctrlrange[index][1]) - else: - low.append(-np.inf) - high.append(np.inf) - action_space = Box(np.array(low), np.array(high)) + action_space = self.get_action_space(self._action_indices, self._model) # Read the observation spec to build a mapping at every step. It is # ensured that the values appear in the order they are specified. @@ -436,7 +422,6 @@ def setup(self, obs): if obs is not None: self.obs_helper._modify_data(self._data, obs) - def get_all_observation_keys(self): """ A function that returns all observation keys defined in the observation specification. @@ -451,6 +436,55 @@ def get_all_observation_keys(self): def dt(self): return self._timestep * self._n_intermediate_steps * self._n_substeps + @staticmethod + def get_action_indices(model, data, actuation_spec): + """ + Returns the action indices given the MuJoCo model, data, and actuation_spec. + + Args: + model: MuJoCo model. + data: MuJoCo data structure. + actuation_spec (list): A list specifying the names of the joints + which should be controllable by the agent. Can be left empty + when all actuators should be used; + + Returns: + A list of actuator indices. + + """ + if len(actuation_spec) == 0: + action_indices = [i for i in range(0, len(data.actuator_force))] + else: + action_indices = [] + for name in actuation_spec: + action_indices.append(model.actuator(name).id) + return action_indices + + @staticmethod + def get_action_space(action_indices, model): + """ + Returns the action space bounding box given the action_indices and the model. + + Args: + action_indices (list): A list of actuator indices. + model: MuJoCo model. + + Returns: + A bounding box for the action space. + + """ + low = [] + high = [] + for index in action_indices: + if model.actuator_ctrllimited[index]: + low.append(model.actuator_ctrlrange[index][0]) + high.append(model.actuator_ctrlrange[index][1]) + else: + low.append(-np.inf) + high.append(np.inf) + action_space = Box(np.array(low), np.array(high)) + return action_space + @staticmethod def user_warning_raise_exception(warning): """ @@ -468,3 +502,156 @@ def user_warning_raise_exception(warning): raise RuntimeError(warning + 'Check for NaN in simulation.') else: raise RuntimeError('Got MuJoCo Warning: ' + warning) + + +class MultiMuJoCo(MuJoCo): + """ + Class to create N environments at the same time using the MuJoCo simulator. This class is not meant to run + N environments in parallel, but to load and create N environments, and randomly sample one of the + environment every episode. + + """ + + def __init__(self, file_names, actuation_spec, observation_spec, gamma, horizon, timestep=None, + n_substeps=1, n_intermediate_steps=1, additional_data_spec=None, collision_groups=None, **viewer_params): + """ + Constructor. + + Args: + file_names (list): The list of paths to the XML files to create the + environments; + actuation_spec (list): A list specifying the names of the joints + which should be controllable by the agent. Can be left empty + when all actuators should be used; + observation_spec (list): A list containing the names of data that + should be made available to the agent as an observation and + their type (ObservationType). They are combined with a key, + which is used to access the data. An entry in the list + is given by: (key, name, type); + gamma (float): The discounting factor of the environment; + horizon (int): The maximum horizon for the environment; + timestep (float): The timestep used by the MuJoCo + simulator. If None, the default timestep specified in the XML will be used; + n_substeps (int, 1): The number of substeps to use by the MuJoCo + simulator. An action given by the agent will be applied for + n_substeps before the agent receives the next observation and + can act accordingly; + n_intermediate_steps (int, 1): The number of steps between every action + taken by the agent. Similar to n_substeps but allows the user + to modify, control and access intermediate states. + additional_data_spec (list, None): A list containing the data fields of + interest, which should be read from or written to during + simulation. The entries are given as the following tuples: + (key, name, type) key is a string for later referencing in the + "read_data" and "write_data" methods. The name is the name of + the object in the XML specification and the type is the + ObservationType; + collision_groups (list, None): A list containing groups of geoms for + which collisions should be checked during simulation via + ``check_collision``. The entries are given as: + ``(key, geom_names)``, where key is a string for later + referencing in the "check_collision" method, and geom_names is + a list of geom names in the XML specification. + **viewer_params: other parameters to be passed to the viewer. + See MujocoGlfwViewer documentation for the available options. + + """ + # Create the simulation + assert type(file_names) + self._models = [mujoco.MjModel.from_xml_path(f) for f in file_names] + self._current_model_idx = 0 + self._model = self._models[self._current_model_idx] + if timestep is not None: + self._model.opt.timestep = timestep + self._timestep = timestep + else: + self._timestep = self._model.opt.timestep + + self._datas = [mujoco.MjData(m) for m in self._models] + self._data = self._datas[self._current_model_idx] + + self._n_intermediate_steps = n_intermediate_steps + self._n_substeps = n_substeps + self._viewer_params = viewer_params + self._viewer = None + self._obs = None + + # Read the actuation spec and build the mapping between actions and ids + # as well as their limits + self._action_indices = self.get_action_indices(self._model, self._data, actuation_spec) + + action_space = self.get_action_space(self._action_indices, self._model) + + # all env need to have the same action space, do sanity check + for m, d in zip(self._models, self._datas): + action_ind = self.get_action_indices(m, d, actuation_spec) + action_sp = self.get_action_space(action_ind, m) + if not np.array_equal(action_ind, self._action_indices) or \ + not np.array_equal(action_space.low, action_sp.low) or\ + not np.array_equal(action_space.high, action_sp.high): + raise ValueError("The provided environments differ in the their action spaces. " + "This is not allowed.") + + # Read the observation spec to build a mapping at every step. It is + # ensured that the values appear in the order they are specified. + self.obs_helpers = [ObservationHelper(observation_spec, m, d, max_joint_velocity=3) + for m, d in zip(self._models, self._datas)] + self.obs_helper = self.obs_helpers[0] + + observation_space = Box(*self.obs_helper.get_obs_limits()) + + # multi envs with different obs limits are now allowed, do sanity check + for oh in self.obs_helpers: + low, high = self.obs_helper.get_obs_limits() + if not np.array_equal(low, observation_space.low) or not np.array_equal(high, observation_space.high): + raise ValueError("The provided environments differ in the their observation limits. " + "This is not allowed.") + + # Pre-process the additional data to allow easier writing and reading + # to and from arrays in MuJoCo + self.additional_data = {} + if additional_data_spec is not None: + for key, name, ot in additional_data_spec: + self.additional_data[key] = (name, ot) + + # Pre-process the collision groups for "fast" detection of contacts + self.collision_groups = {} + if collision_groups is not None: + for name, geom_names in collision_groups: + self.collision_groups[name] = {mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_GEOM, geom_name) + for geom_name in geom_names} + + # Finally, we create the MDP information and call the constructor of + # the parent class + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + + mdp_info = self._modify_mdp_info(mdp_info) + + # set the warning callback to stop the simulation when a mujoco warning occurs + mujoco.set_mju_user_warning(self.user_warning_raise_exception) + + # needed for recording + self._record_video = False + self._video = None + + # call grad-parent class, not MuJoCo + super(MuJoCo, self).__init__(mdp_info) + + def reset(self, obs=None): + mujoco.mj_resetData(self._model, self._data) + + self._current_model_idx = np.random.randint(0, len(self._models)) + self._model = self._models[self._current_model_idx] + self._data = self._datas[self._current_model_idx] + self.obs_helper = self.obs_helpers[self._current_model_idx] + self.setup() + + if self._viewer is not None: + self._viewer.load_new_model(self._model) + + self._obs = self._create_observation(self.obs_helper.build_obs(self._data)) + return self._modify_observation(self._obs) + + @property + def more_than_one_env(self): + return len(self._models) > 1 \ No newline at end of file diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index 337a7daa9..c245028e0 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -60,6 +60,19 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self.custom_render_callback = custom_render_callback + def load_new_model(self, model): + """ Loads a new model to the viewer, and resets the scene, camera, viewport, and context. """ + self._model = model + self._scene = mujoco.MjvScene(model, 1000) + self._scene_option = mujoco.MjvOption() + + self._camera = mujoco.MjvCamera() + mujoco.mjv_defaultFreeCamera(model, self._camera) + self._camera.distance *= 0.3 + + self._viewport = mujoco.MjrRect(0, 0, self._width, self._height) + self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(100)) + def mouse_button(self, window, button, act, mods): self.button_left = glfw.get_mouse_button(self._window, glfw.MOUSE_BUTTON_LEFT) == glfw.PRESS self.button_right = glfw.get_mouse_button(self._window, glfw.MOUSE_BUTTON_RIGHT) == glfw.PRESS @@ -168,4 +181,3 @@ def read_pixels(self, depth=False): def stop(self): glfw.destroy_window(self._window) - From f96fda03f104fafe363fff3efc82113ff010c4c9 Mon Sep 17 00:00:00 2001 From: robfiras Date: Tue, 27 Jun 2023 14:45:50 +0200 Subject: [PATCH 10/50] Bugfix MultiMujoco. - fixed bug in which the MultiMujoco env was relying on an older version of the ObservationHelper. --- mushroom_rl/environments/mujoco.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 46f299f23..5729ba576 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -513,7 +513,8 @@ class MultiMuJoCo(MuJoCo): """ def __init__(self, file_names, actuation_spec, observation_spec, gamma, horizon, timestep=None, - n_substeps=1, n_intermediate_steps=1, additional_data_spec=None, collision_groups=None, **viewer_params): + n_substeps=1, n_intermediate_steps=1, additional_data_spec=None, collision_groups=None, + max_joint_vel=None, **viewer_params): """ Constructor. @@ -552,6 +553,9 @@ def __init__(self, file_names, actuation_spec, observation_spec, gamma, horizon, ``(key, geom_names)``, where key is a string for later referencing in the "check_collision" method, and geom_names is a list of geom names in the XML specification. + max_joint_vel (list, None): A list with the maximum joint velocities which are provided in the mdp_info. + The list has to define a maximum velocity for every occurrence of JOINT_VEL in the observation_spec. The + velocity will not be limited in mujoco **viewer_params: other parameters to be passed to the viewer. See MujocoGlfwViewer documentation for the available options. @@ -594,9 +598,10 @@ def __init__(self, file_names, actuation_spec, observation_spec, gamma, horizon, # Read the observation spec to build a mapping at every step. It is # ensured that the values appear in the order they are specified. - self.obs_helpers = [ObservationHelper(observation_spec, m, d, max_joint_velocity=3) + self.obs_helpers = [ObservationHelper(observation_spec, self._model, self._data, + max_joint_velocity=max_joint_vel) for m, d in zip(self._models, self._datas)] - self.obs_helper = self.obs_helpers[0] + self.obs_helper = self.obs_helpers[self._current_model_idx] observation_space = Box(*self.obs_helper.get_obs_limits()) @@ -649,7 +654,7 @@ def reset(self, obs=None): if self._viewer is not None: self._viewer.load_new_model(self._model) - self._obs = self._create_observation(self.obs_helper.build_obs(self._data)) + self._obs = self._create_observation(self.obs_helper._build_obs(self._data)) return self._modify_observation(self._obs) @property From e201338bf0263028ad0f1846cfc6eb0533d71b64 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Wed, 28 Jun 2023 14:17:23 +0200 Subject: [PATCH 11/50] Added dt to MDPInfo - now MDPInfo has a field dt representing the control timestep - all environment set correctly or using a default value dt - removed _dt variables in the mushroom environments - code and comments refactoring - fixed docstring in viewer --- mushroom_rl/core/environment.py | 9 ++++-- mushroom_rl/environments/atari.py | 3 +- mushroom_rl/environments/car_on_hill.py | 8 ++--- mushroom_rl/environments/cart_pole.py | 9 +++--- mushroom_rl/environments/dm_control_env.py | 2 +- mushroom_rl/environments/finite_mdp.py | 7 +++-- mushroom_rl/environments/grid_world.py | 30 ++++++++++++++++--- mushroom_rl/environments/gym_env.py | 7 ++--- mushroom_rl/environments/habitat_env.py | 5 ++-- mushroom_rl/environments/igibson_env.py | 5 ++-- mushroom_rl/environments/inverted_pendulum.py | 8 ++--- mushroom_rl/environments/lqr.py | 15 +++++----- mushroom_rl/environments/minigrid_env.py | 3 +- mushroom_rl/environments/mujoco.py | 2 +- mushroom_rl/environments/puddle_world.py | 5 ++-- mushroom_rl/environments/pybullet.py | 2 +- .../pybullet_envs/air_hockey/defend.py | 28 ----------------- .../pybullet_envs/air_hockey/hit.py | 29 ------------------ .../pybullet_envs/air_hockey/prepare.py | 30 ------------------- .../pybullet_envs/air_hockey/repel.py | 28 ----------------- mushroom_rl/environments/segway.py | 11 ++++--- mushroom_rl/environments/ship_steering.py | 14 ++++----- mushroom_rl/utils/viewer.py | 4 +-- 23 files changed, 89 insertions(+), 175 deletions(-) diff --git a/mushroom_rl/core/environment.py b/mushroom_rl/core/environment.py index 0703dac25..7a2fa485b 100644 --- a/mushroom_rl/core/environment.py +++ b/mushroom_rl/core/environment.py @@ -9,7 +9,7 @@ class MDPInfo(Serializable): This class is used to store the information of the environment. """ - def __init__(self, observation_space, action_space, gamma, horizon): + def __init__(self, observation_space, action_space, gamma, horizon, dt=1e-1): """ Constructor. @@ -17,19 +17,22 @@ def __init__(self, observation_space, action_space, gamma, horizon): observation_space ([Box, Discrete]): the state space; action_space ([Box, Discrete]): the action space; gamma (float): the discount factor; - horizon (int): the horizon. + horizon (int): the horizon; + dt (float, 1e-1): the control timestep of the environment. """ self.observation_space = observation_space self.action_space = action_space self.gamma = gamma self.horizon = horizon + self.dt = dt self._add_save_attr( observation_space='mushroom', action_space='mushroom', gamma='primitive', - horizon='primitive' + horizon='primitive', + dt='primitive' ) @property diff --git a/mushroom_rl/environments/atari.py b/mushroom_rl/environments/atari.py index 5abc56118..c11321ea9 100644 --- a/mushroom_rl/environments/atari.py +++ b/mushroom_rl/environments/atari.py @@ -91,7 +91,8 @@ def __init__(self, name, width=84, height=84, ends_at_life=False, low=0., high=255., shape=(history_length, self._img_size[1], self._img_size[0])) horizon = np.inf # the gym time limit is used. gamma = .99 - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + dt = 1/60 + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) super().__init__(mdp_info) diff --git a/mushroom_rl/environments/car_on_hill.py b/mushroom_rl/environments/car_on_hill.py index 6af526e68..32eacdc02 100644 --- a/mushroom_rl/environments/car_on_hill.py +++ b/mushroom_rl/environments/car_on_hill.py @@ -27,13 +27,13 @@ def __init__(self, horizon=100, gamma=.95): high = np.array([self.max_pos, self.max_velocity]) self._g = 9.81 self._m = 1. - self._dt = .1 self._discrete_actions = [-4., 4.] # MDP properties + dt = .1 observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(2) - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) # Visualization self._viewer = Viewer(1, 1) @@ -51,7 +51,7 @@ def reset(self, state=None): def step(self, action): action = self._discrete_actions[action[0]] sa = np.append(self._state, action) - new_state = odeint(self._dpds, sa, [0, self._dt]) + new_state = odeint(self._dpds, sa, [0, self.info.dt]) self._state = new_state[-1, :-1] @@ -93,7 +93,7 @@ def render(self, record=False): frame = self._viewer.get_frame() if record else None - self._viewer.display(self._dt) + self._viewer.display(self.info.dt) return frame diff --git a/mushroom_rl/environments/cart_pole.py b/mushroom_rl/environments/cart_pole.py index 1ba0a03c5..fe15afe37 100644 --- a/mushroom_rl/environments/cart_pole.py +++ b/mushroom_rl/environments/cart_pole.py @@ -36,15 +36,15 @@ def __init__(self, m=2., M=8., l=.5, g=9.8, mu=1e-2, max_u=50., noise_u=10., self._g = g self._alpha = 1 / (self._m + self._M) self._mu = mu - self._dt = .1 self._max_u = max_u self._noise_u = noise_u high = np.array([np.inf, np.inf]) # MDP properties + dt = .1 observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(3) - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) # Visualization self._viewer = Viewer(2.5 * l, 2.5 * l) @@ -76,8 +76,7 @@ def step(self, action): self._last_u = u u += np.random.uniform(-self._noise_u, self._noise_u) - new_state = odeint(self._dynamics, self._state, [0, self._dt], - (u,)) + new_state = odeint(self._dynamics, self._state, [0, self.info.dt], (u,)) self._state = np.array(new_state[-1]) self._state[0] = normalize_angle(self._state[0]) @@ -109,7 +108,7 @@ def render(self, record=False): frame = self._viewer.get_frame() if record else None - self._viewer.display(self._dt) + self._viewer.display(self.info.dt) return frame diff --git a/mushroom_rl/environments/dm_control_env.py b/mushroom_rl/environments/dm_control_env.py index 795e0c349..987e00fc4 100644 --- a/mushroom_rl/environments/dm_control_env.py +++ b/mushroom_rl/environments/dm_control_env.py @@ -62,7 +62,7 @@ def __init__(self, domain_name, task_name, horizon=None, gamma=0.99, task_kwargs # MDP properties action_space = self._convert_action_space(self.env.action_spec()) observation_space = self._convert_observation_space(self.env.observation_spec()) - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) self._viewer = ImageViewer((width_screen, height_screen), dt) self._camera_id = camera_id diff --git a/mushroom_rl/environments/finite_mdp.py b/mushroom_rl/environments/finite_mdp.py index 71dff6bec..b49995c00 100644 --- a/mushroom_rl/environments/finite_mdp.py +++ b/mushroom_rl/environments/finite_mdp.py @@ -9,7 +9,7 @@ class FiniteMDP(Environment): Finite Markov Decision Process. """ - def __init__(self, p, rew, mu=None, gamma=.9, horizon=np.inf): + def __init__(self, p, rew, mu=None, gamma=.9, horizon=np.inf, dt=1e-1): """ Constructor. @@ -18,7 +18,8 @@ def __init__(self, p, rew, mu=None, gamma=.9, horizon=np.inf): rew (np.ndarray): reward matrix; mu (np.ndarray, None): initial state probability distribution; gamma (float, .9): discount factor; - horizon (int, np.inf): the horizon. + horizon (int, np.inf): the horizon; + dt (float, 1e-1): the control timestep of the environment. """ assert p.shape == rew.shape @@ -34,7 +35,7 @@ def __init__(self, p, rew, mu=None, gamma=.9, horizon=np.inf): action_space = spaces.Discrete(p.shape[1]) horizon = horizon gamma = gamma - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) super().__init__(mdp_info) diff --git a/mushroom_rl/environments/grid_world.py b/mushroom_rl/environments/grid_world.py index 9e27b1890..360aead01 100644 --- a/mushroom_rl/environments/grid_world.py +++ b/mushroom_rl/environments/grid_world.py @@ -116,13 +116,24 @@ class GridWorld(AbstractGridWorld): Standard grid world. """ - def __init__(self, height, width, goal, start=(0, 0)): + def __init__(self, height, width, goal, start=(0, 0), dt=0.1): + """ + Constructor + + Args: + height (int): height of the grid; + width (int): width of the grid; + goal (tuple): 2D coordinates of the goal state; + start (tuple, (0, 0)): 2D coordinates of the starting state; + dt (float, 0.1): the control timestep of the environment. + + """ # MDP properties observation_space = spaces.Discrete(height * width) action_space = spaces.Discrete(4) horizon = 100 gamma = .9 - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) super().__init__(mdp_info, height, width, start, goal) @@ -145,13 +156,24 @@ class GridWorldVanHasselt(AbstractGridWorld): "Double Q-Learning". Hasselt H. V.. 2010. """ - def __init__(self, height=3, width=3, goal=(0, 2), start=(2, 0)): + def __init__(self, height=3, width=3, goal=(0, 2), start=(2, 0), dt=0.1): + """ + Constructor + + Args: + height (int, 3): height of the grid; + width (int, 3): width of the grid; + goal (tuple, (0, 2)): 2D coordinates of the goal state; + start (tuple, (2, 0)): 2D coordinates of the starting state; + dt (float, 0.1): the control timestep of the environment. + + """ # MDP properties observation_space = spaces.Discrete(height * width) action_space = spaces.Discrete(4) horizon = np.inf gamma = .95 - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) super().__init__(mdp_info, height, width, start, goal) diff --git a/mushroom_rl/environments/gym_env.py b/mushroom_rl/environments/gym_env.py index 71dfdd9c3..66f3e12bf 100644 --- a/mushroom_rl/environments/gym_env.py +++ b/mushroom_rl/environments/gym_env.py @@ -53,8 +53,6 @@ def __init__(self, name, horizon=None, gamma=0.99, wrappers=None, wrappers_args= self.env = gym.make(name, **env_args) - self._render_dt = self.env.unwrapped.dt if hasattr(self.env.unwrapped, "dt") else 0.0 - if wrappers is not None: if wrappers_args is None: wrappers_args = [dict()] * len(wrappers) @@ -71,9 +69,10 @@ def __init__(self, name, horizon=None, gamma=0.99, wrappers=None, wrappers_args= gym_spaces.MultiDiscrete) assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete) + dt = self.env.unwrapped.dt if hasattr(self.env.unwrapped, "dt") else 0.1 action_space = self._convert_gym_space(self.env.action_space) observation_space = self._convert_gym_space(self.env.observation_space) - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] @@ -102,7 +101,7 @@ def render(self, record=False): self.env.render(mode='human') self._first = False - time.sleep(self._render_dt) + time.sleep(self.info.dt) if record: return self.env.render(mode='rgb_array') diff --git a/mushroom_rl/environments/habitat_env.py b/mushroom_rl/environments/habitat_env.py index 13050689f..685550517 100644 --- a/mushroom_rl/environments/habitat_env.py +++ b/mushroom_rl/environments/habitat_env.py @@ -233,17 +233,18 @@ def __init__(self, wrapper, config_file, base_config_file=None, horizon=None, ga self._img_size = env.observation_space.shape[0:2] # MDP properties + dt = 1/10 action_space = self.env.action_space observation_space = Box( low=0., high=255., shape=(3, self._img_size[1], self._img_size[0])) - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a - self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), 1/10) + self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), dt) Environment.__init__(self, mdp_info) diff --git a/mushroom_rl/environments/igibson_env.py b/mushroom_rl/environments/igibson_env.py index 050fb506c..a9d7b80e6 100644 --- a/mushroom_rl/environments/igibson_env.py +++ b/mushroom_rl/environments/igibson_env.py @@ -95,17 +95,18 @@ def __init__(self, config_file, horizon=None, gamma=0.99, is_discrete=False, self._img_size = env.observation_space.shape[0:2] # MDP properties + dt = 1/60 action_space = self.env.action_space observation_space = Box( low=0., high=255., shape=(3, self._img_size[1], self._img_size[0])) - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a - self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), 1/60) + self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), dt) self._image = None Environment.__init__(self, mdp_info) diff --git a/mushroom_rl/environments/inverted_pendulum.py b/mushroom_rl/environments/inverted_pendulum.py index 8d6d56813..75af58d61 100644 --- a/mushroom_rl/environments/inverted_pendulum.py +++ b/mushroom_rl/environments/inverted_pendulum.py @@ -38,15 +38,15 @@ def __init__(self, random_start=False, m=1., l=1., g=9.8, mu=1e-2, self._g = g self._mu = mu self._random = random_start - self._dt = .01 self._max_u = max_u self._max_omega = 5 / 2 * np.pi high = np.array([np.pi, self._max_omega]) # MDP properties + dt = .01 observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Box(low=np.array([-max_u]), high=np.array([max_u])) - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) # Visualization self._viewer = Viewer(2.5 * l, 2.5 * l) @@ -73,7 +73,7 @@ def reset(self, state=None): def step(self, action): u = self._bound(action[0], -self._max_u, self._max_u) - new_state = odeint(self._dynamics, self._state, [0, self._dt], args=(u.item(),)) + new_state = odeint(self._dynamics, self._state, [0, self.info.dt], args=(u.item(),)) self._state = np.array(new_state[-1]) self._state[0] = normalize_angle(self._state[0]) @@ -99,7 +99,7 @@ def render(self, record=False): frame = self._viewer.get_frame() if record else None - self._viewer.display(self._dt) + self._viewer.display(self.info.dt) return frame diff --git a/mushroom_rl/environments/lqr.py b/mushroom_rl/environments/lqr.py index 43949338e..8e317d693 100644 --- a/mushroom_rl/environments/lqr.py +++ b/mushroom_rl/environments/lqr.py @@ -25,9 +25,8 @@ class LQR(Environment): Parisi S., Pirotta M., Smacchia N., Bascetta L., Restelli M.. 2014 """ - def __init__(self, A, B, Q, R, max_pos=np.inf, max_action=np.inf, - random_init=False, episodic=False, gamma=0.9, horizon=50, - initial_state=None): + def __init__(self, A, B, Q, R, max_pos=np.inf, max_action=np.inf, random_init=False, episodic=False, gamma=0.9, + horizon=50, initial_state=None, dt=0.1): """ Constructor. @@ -42,7 +41,8 @@ def __init__(self, A, B, Q, R, max_pos=np.inf, max_action=np.inf, episodic (bool, False): end the episode when the state goes over the threshold; gamma (float, 0.9): discount factor; - horizon (int, 50): horizon of the mdp. + horizon (int, 50): horizon of the mdp; + dt (float, 0.1): the control timestep of the environment. """ self.A = A @@ -65,7 +65,9 @@ def __init__(self, A, B, Q, R, max_pos=np.inf, max_action=np.inf, observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) + + self._state = None super().__init__(mdp_info) @@ -143,8 +145,7 @@ def step(self, action): reward = -self._max_pos ** 2 * 10 absorbing = True else: - self._state = self._bound(self._state, - self.info.observation_space.low, + self._state = self._bound(self._state, self.info.observation_space.low, self.info.observation_space.high) return self._state, reward, absorbing, {} diff --git a/mushroom_rl/environments/minigrid_env.py b/mushroom_rl/environments/minigrid_env.py index 36724ae9c..acbb9084e 100644 --- a/mushroom_rl/environments/minigrid_env.py +++ b/mushroom_rl/environments/minigrid_env.py @@ -69,7 +69,8 @@ def __init__(self, name, horizon=None, gamma=0.99, history_length=4, observation_space = Box( low=0., high=obs_high, shape=(history_length, self._img_size[1], self._img_size[0])) self.env.max_steps = horizon + 1 # Hack to ignore gym time limit (do not use np.inf, since MiniGrid returns r(t) = 1 - 0.9t/T) - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + dt = 1/self.env.unwrapped.metadata["render_fps"] + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) Environment.__init__(self, mdp_info) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 5729ba576..6f4a57e26 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -103,7 +103,7 @@ def __init__(self, file_name, actuation_spec, observation_spec, gamma, horizon, # Finally, we create the MDP information and call the constructor of # the parent class - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, self.dt) mdp_info = self._modify_mdp_info(mdp_info) diff --git a/mushroom_rl/environments/puddle_world.py b/mushroom_rl/environments/puddle_world.py index e74ff06ed..140623572 100644 --- a/mushroom_rl/environments/puddle_world.py +++ b/mushroom_rl/environments/puddle_world.py @@ -51,9 +51,10 @@ def __init__(self, start=None, goal=None, goal_threshold=.1, noise_step=.025, self._actions[i][i // 2] = thrust * (i % 2 * 2 - 1) # MDP properties + dt = 0.1 action_space = Discrete(5) observation_space = Box(0., 1., shape=(2,)) - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) # Visualization self._pixels = None @@ -116,7 +117,7 @@ def render(self, record=False): frame = self._viewer.get_frame() if record else None - self._viewer.display(0.1) + self._viewer.display(self.info.dt) return frame diff --git a/mushroom_rl/environments/pybullet.py b/mushroom_rl/environments/pybullet.py index b36dc0753..11931ae88 100644 --- a/mushroom_rl/environments/pybullet.py +++ b/mushroom_rl/environments/pybullet.py @@ -73,7 +73,7 @@ def __init__(self, files, actuation_spec, observation_spec, gamma, action_space = Box(*self._indexer.action_limits) observation_space = Box(*self._indexer.observation_limits) - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, self.dt) # Let the child class modify the mdp_info data structure mdp_info = self._modify_mdp_info(mdp_info) diff --git a/mushroom_rl/environments/pybullet_envs/air_hockey/defend.py b/mushroom_rl/environments/pybullet_envs/air_hockey/defend.py index 5dd0c5cb9..c11c9dfda 100644 --- a/mushroom_rl/environments/pybullet_envs/air_hockey/defend.py +++ b/mushroom_rl/environments/pybullet_envs/air_hockey/defend.py @@ -159,31 +159,3 @@ def _create_observation(self, state): obs = super(AirHockeyDefendBullet, self)._create_observation(state) return np.append(obs, [self.has_hit, self.has_bounce]) - -if __name__ == '__main__': - import time - - env = AirHockeyDefendBullet(debug_gui=True, obs_noise=False, obs_delay=False, n_intermediate_steps=4, random_init=True) - - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() - while True: - action = np.zeros(3) - observation, reward, done, info = env.step(action) - gamma *= env.info.gamma - J += gamma * reward - R += reward - steps += 1 - - - if done or steps > env.info.horizon: - print("J: ", J, " R: ", R) - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() - time.sleep(1 / 60.) diff --git a/mushroom_rl/environments/pybullet_envs/air_hockey/hit.py b/mushroom_rl/environments/pybullet_envs/air_hockey/hit.py index ec91fbda8..ecee285c3 100644 --- a/mushroom_rl/environments/pybullet_envs/air_hockey/hit.py +++ b/mushroom_rl/environments/pybullet_envs/air_hockey/hit.py @@ -161,32 +161,3 @@ def _create_observation(self, state): obs = super(AirHockeyHitBullet, self)._create_observation(state) return np.append(obs, [self.has_hit]) - -if __name__ == '__main__': - import time - - env = AirHockeyHitBullet(debug_gui=True, env_noise=False, obs_noise=False, obs_delay=False, n_intermediate_steps=4, - table_boundary_terminate=True, random_init=True, init_robot_state="right") - - env.reset() - R = 0. - J = 0. - gamma = 1. - steps = 0 - - while True: - # action = np.random.randn(3) * 5 - action = np.array([0] * 3) - observation, reward, done, info = env.step(action) - gamma *= env.info.gamma - J += gamma * reward - R += reward - steps += 1 - if done or steps > env.info.horizon: - print("J: ", J, " R: ", R) - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() - time.sleep(1 / 60.) diff --git a/mushroom_rl/environments/pybullet_envs/air_hockey/prepare.py b/mushroom_rl/environments/pybullet_envs/air_hockey/prepare.py index 8b33a11e0..9c5c96ea4 100644 --- a/mushroom_rl/environments/pybullet_envs/air_hockey/prepare.py +++ b/mushroom_rl/environments/pybullet_envs/air_hockey/prepare.py @@ -184,33 +184,3 @@ def _simulation_post_step(self): def _create_observation(self, state): obs = super(AirHockeyPrepareBullet, self)._create_observation(state) return np.append(obs, [self.has_hit]) - - -if __name__ == '__main__': - import time - - env = AirHockeyPrepareBullet(debug_gui=True, obs_noise=False, obs_delay=False, n_intermediate_steps=4, random_init=True, - init_state="bottom") - - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() - while True: - - # action = np.random.randn(3) * 5 - action = np.array([0, 0, 0]) - observation, reward, done, info = env.step(action) - gamma *= env.info.gamma - J += gamma * reward - R += reward - steps += 4 - if done or steps > env.info.horizon * 2: - print("J: ", J, " R: ", R) - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() - time.sleep(1 / 60.) \ No newline at end of file diff --git a/mushroom_rl/environments/pybullet_envs/air_hockey/repel.py b/mushroom_rl/environments/pybullet_envs/air_hockey/repel.py index 87b40b441..95233ac9b 100644 --- a/mushroom_rl/environments/pybullet_envs/air_hockey/repel.py +++ b/mushroom_rl/environments/pybullet_envs/air_hockey/repel.py @@ -152,31 +152,3 @@ def _simulation_post_step(self): def _create_observation(self, state): obs = super(AirHockeyRepelBullet, self)._create_observation(state) return np.append(obs, [self.has_hit]) - - -if __name__ == "__main__": - import time - - env = AirHockeyRepelBullet(debug_gui=True, env_noise=False, obs_noise=False, obs_delay=False, n_intermediate_steps=4, - random_init=True) - - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() - while True: - action = np.zeros(3) - observation, reward, done, info = env.step(action) - gamma *= env.info.gamma - J += gamma * reward - R += reward - steps += 1 - if done or steps > env.info.horizon: - print("J: ", J, " R: ", R) - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() - time.sleep(1 / 60.) diff --git a/mushroom_rl/environments/segway.py b/mushroom_rl/environments/segway.py index 9c792f0fa..f63958b4b 100644 --- a/mushroom_rl/environments/segway.py +++ b/mushroom_rl/environments/segway.py @@ -31,7 +31,6 @@ def __init__(self, random_start=False): self._Ir = 4.54e-4 * 2 self._l = 13.8e-2 self._r = 5.5e-2 - self._dt = 1e-2 self._g = 9.81 self._max_u = 5 @@ -40,11 +39,12 @@ def __init__(self, random_start=False): high = np.array([-np.pi / 2, 15, 75]) # MDP properties + dt = 1e-2 observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Box(low=np.array([-self._max_u]), high=np.array([self._max_u])) horizon = 300 - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) # Visualization self._viewer = Viewer(5 * self._l, 5 * self._l) @@ -70,8 +70,7 @@ def reset(self, state=None): def step(self, action): u = self._bound(action[0], -self._max_u, self._max_u) - new_state = odeint(self._dynamics, self._state, [0, self._dt], - (u,)) + new_state = odeint(self._dynamics, self._state, [0, self.info.dt], (u,)) self._state = np.array(new_state[-1]) self._state[0] = normalize_angle(self._state[0]) @@ -120,7 +119,7 @@ def render(self, record=False): start = 2.5 * self._l * np.ones(2) end = 2.5 * self._l * np.ones(2) - dx = -self._state[2] * self._r * self._dt + dx = -self._state[2] * self._r * self.info.dt self._last_x += dx @@ -141,7 +140,7 @@ def render(self, record=False): frame = self._viewer.get_frame() if record else None - self._viewer.display(self._dt) + self._viewer.display(self.info.dt) return frame diff --git a/mushroom_rl/environments/ship_steering.py b/mushroom_rl/environments/ship_steering.py index f5967ee1a..e2cc7c073 100644 --- a/mushroom_rl/environments/ship_steering.py +++ b/mushroom_rl/environments/ship_steering.py @@ -29,7 +29,6 @@ def __init__(self, small=True, n_steps_action=3): self.omega_max = np.array([np.pi / 12.]) self._v = 3. self._T = 5. - self._dt = .2 self._gate_s = np.empty(2) self._gate_e = np.empty(2) self._gate_s[0] = 100 if small else 350 @@ -43,11 +42,12 @@ def __init__(self, small=True, n_steps_action=3): self.n_steps_action = n_steps_action # MDP properties + dt = .2 observation_space = spaces.Box(low=low, high=high) action_space = spaces.Box(low=-self.omega_max, high=self.omega_max) horizon = 5000 gamma = .99 - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) # Visualization self._viewer = Viewer(self.field_size, self.field_size, @@ -78,10 +78,10 @@ def step(self, action): for _ in range(self.n_steps_action): state = new_state new_state = np.empty(4) - new_state[0] = state[0] + self._v * np.cos(state[2]) * self._dt - new_state[1] = state[1] + self._v * np.sin(state[2]) * self._dt - new_state[2] = normalize_angle(state[2] + state[3] * self._dt) - new_state[3] = state[3] + (r - state[3]) * self._dt / self._T + new_state[0] = state[0] + self._v * np.cos(state[2]) * self.info.dt + new_state[1] = state[1] + self._v * np.sin(state[2]) * self.info.dt + new_state[2] = normalize_angle(state[2] + state[3] * self.info.dt) + new_state[3] = state[3] + (r - state[3]) * self.info.dt / self._T if new_state[0] > self.field_size \ or new_state[1] > self.field_size \ @@ -122,7 +122,7 @@ def render(self, record=False): frame = self._viewer.get_frame() if record else None - self._viewer.display(self._dt) + self._viewer.display(self.info.dt) return frame diff --git a/mushroom_rl/utils/viewer.py b/mushroom_rl/utils/viewer.py index d29c02659..7b5a74700 100644 --- a/mushroom_rl/utils/viewer.py +++ b/mushroom_rl/utils/viewer.py @@ -78,8 +78,8 @@ def __init__(self, env_width, env_height, width=500, height=500, Constructor. Args: - env_width (int): The x dimension limit of the desired environment; - env_height (int): The y dimension limit of the desired environment; + env_width (float): The x dimension limit of the desired environment; + env_height (float): The y dimension limit of the desired environment; width (int, 500): width of the environment window; height (int, 500): height of the environment window; background (tuple, (0, 0, 0)): background color of the screen. From bc541cc6e0be851976c54337dc86d3ee28364f34 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Wed, 28 Jun 2023 14:22:01 +0200 Subject: [PATCH 12/50] Updated tutorial on env generation - now added the dt in the mdp info - using mdp info in the viewer --- docs/source/tutorials/code/room_env.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/tutorials/code/room_env.py b/docs/source/tutorials/code/room_env.py index 7ea5edf0c..8aea07b7d 100644 --- a/docs/source/tutorials/code/room_env.py +++ b/docs/source/tutorials/code/room_env.py @@ -7,7 +7,7 @@ class RoomToyEnv(Environment): - def __init__(self, size=5., goal=[2.5, 2.5], goal_radius=0.6): + def __init__(self, size=5., goal=(2.5, 2.5), goal_radius=0.6): # Save important environment information self._size = size @@ -23,7 +23,7 @@ def __init__(self, size=5., goal=[2.5, 2.5], goal_radius=0.6): observation_space = Box(0, size, shape) # Create the MDPInfo structure, needed by the environment interface - mdp_info = MDPInfo(observation_space, action_space, gamma=0.99, horizon=100) + mdp_info = MDPInfo(observation_space, action_space, gamma=0.99, horizon=100, dt=0.1) super().__init__(mdp_info) @@ -96,8 +96,8 @@ def render(self, record=False): # Get the image if the record flag is set to true frame = self._viewer.get_frame() if record else None - # Display the image for 0.1 seconds - self._viewer.display(0.1) + # Display the image for the control time (0.1 seconds) + self._viewer.display(self.info.dt) return frame From f8ff7d0ee14f7e1a0ea1a58889a59020b08c2f65 Mon Sep 17 00:00:00 2001 From: robfiras Date: Wed, 28 Jun 2023 14:26:53 +0200 Subject: [PATCH 13/50] Added function to MultiMujoco. - added function that generates binary vector to identify the current task in MultiMujoco --- mushroom_rl/environments/mujoco.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 6f4a57e26..6aeea2c35 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -659,4 +659,28 @@ def reset(self, obs=None): @property def more_than_one_env(self): - return len(self._models) > 1 \ No newline at end of file + return len(self._models) > 1 + + @staticmethod + def _get_env_id_map(current_model_idx, n_models): + """ + Retuns a binary vector to identify environment. This can be passed to the observation space. + + Args: + current_model_idx (int): index of the current model. + n_models (int): total number of models. + + Returns: + ndarray containing a binary vector identifying the current environment. + + """ + bits_needed = 1+int(np.log((n_models-1))/np.log(2)) + id_mask = np.zeros(bits_needed) + bin_rep = np.binary_repr(current_model_idx)[::-1] + for i, b in enumerate(bin_rep): + idx = bits_needed - 1 - i # reverse idx + if int(b): + id_mask[idx] = 1.0 + else: + id_mask[idx] = 0.0 + return id_mask From 75b7719527058cf6ac4d89415fbcedc62708d7a9 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Wed, 28 Jun 2023 15:10:07 +0200 Subject: [PATCH 14/50] Added automatic fps detection for recording - if not manually specified, the fps is computed automatically from environment dt --- .gitignore | 1 + mushroom_rl/core/core.py | 8 +++++--- mushroom_rl/environments/dm_control_env.py | 7 ++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index bdd9cee37..b2ad13c21 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.DS_store build/ dist/ +examples/mushroom_rl_recordings/ examples/habitat/Replica-Dataset examples/habitat/data mushroom_rl.egg-info/ diff --git a/mushroom_rl/core/core.py b/mushroom_rl/core/core.py index 67573c796..22d6e20e3 100644 --- a/mushroom_rl/core/core.py +++ b/mushroom_rl/core/core.py @@ -251,8 +251,7 @@ def _preprocess(self, state): return state - @staticmethod - def _build_recorder_class(recorder_class=None, **kwargs): + def _build_recorder_class(self, recorder_class=None, fps=None, **kwargs): """ Method to create a video recorder class. @@ -268,4 +267,7 @@ def _build_recorder_class(recorder_class=None, **kwargs): if not recorder_class: recorder_class = VideoRecorder - return recorder_class(**kwargs) + if not fps: + fps = int(1 / self.mdp.info.dt) + + return recorder_class(fps=fps, **kwargs) diff --git a/mushroom_rl/environments/dm_control_env.py b/mushroom_rl/environments/dm_control_env.py index 987e00fc4..51eb7b879 100644 --- a/mushroom_rl/environments/dm_control_env.py +++ b/mushroom_rl/environments/dm_control_env.py @@ -89,9 +89,7 @@ def step(self, action): return self._state, reward, absorbing, {} def render(self, record=False): - img = self.env.physics.render(self._viewer.size[1], - self._viewer.size[0], - self._camera_id) + img = self.env.physics.render(self._viewer.size[1], self._viewer.size[0], self._camera_id) self._viewer.display(img) if record: @@ -118,8 +116,7 @@ def _convert_observation_space_vector(observation_space): @staticmethod def _convert_observation_space_pixels(observation_space): img_size = observation_space['pixels'].shape - return Box(low=0., high=255., - shape=(3, img_size[0], img_size[1])) + return Box(low=0., high=255., shape=(3, img_size[0], img_size[1])) @staticmethod def _convert_action_space(action_space): From 080f8ffa8a799bbc801978824f187af8dcc87da5 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Wed, 28 Jun 2023 15:44:13 +0200 Subject: [PATCH 15/50] Updating .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b2ad13c21..c1ae69e5b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ examples/mushroom_rl_recordings/ examples/habitat/Replica-Dataset examples/habitat/data mushroom_rl.egg-info/ +mushroom_rl_recordings/ .idea/ *.pyc *.pyd From e7c949f43ecb02238e32337518552beac6388f98 Mon Sep 17 00:00:00 2001 From: robfiras Date: Thu, 29 Jun 2023 13:41:50 +0200 Subject: [PATCH 16/50] Added new viewer for dm_control. - added new viewer using cv2. - used for dm_control. - resolved a problem in which the viewer was freezing in dm_control environments. This was due a compatability issue of dm_control and pygame. --- mushroom_rl/environments/dm_control_env.py | 13 +++-- mushroom_rl/utils/viewer.py | 64 ++++++++++++++++++++++ 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/mushroom_rl/environments/dm_control_env.py b/mushroom_rl/environments/dm_control_env.py index 51eb7b879..6bfb0d096 100644 --- a/mushroom_rl/environments/dm_control_env.py +++ b/mushroom_rl/environments/dm_control_env.py @@ -8,7 +8,7 @@ from mushroom_rl.core import Environment, MDPInfo from mushroom_rl.utils.spaces import * -from mushroom_rl.utils.viewer import ImageViewer +from mushroom_rl.utils.viewer import CV2Viewer class DMControl(Environment): @@ -64,7 +64,9 @@ def __init__(self, domain_name, task_name, horizon=None, gamma=0.99, task_kwargs observation_space = self._convert_observation_space(self.env.observation_spec()) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, dt) - self._viewer = ImageViewer((width_screen, height_screen), dt) + self._height_screen = height_screen + self._width_screen = width_screen + self._viewer = CV2Viewer("dm_control", dt, self._width_screen, self._height_screen) self._camera_id = camera_id super().__init__(mdp_info) @@ -89,7 +91,10 @@ def step(self, action): return self._state, reward, absorbing, {} def render(self, record=False): - img = self.env.physics.render(self._viewer.size[1], self._viewer.size[0], self._camera_id) + img = self.env.physics.render(self._width_screen, + self._height_screen, + self._camera_id) + self._viewer.display(img) if record: @@ -112,10 +117,10 @@ def _convert_observation_space_vector(observation_space): return Box(low=-np.inf, high=np.inf, shape=(observation_shape,)) - @staticmethod def _convert_observation_space_pixels(observation_space): img_size = observation_space['pixels'].shape + return Box(low=0., high=255., shape=(3, img_size[0], img_size[1])) @staticmethod diff --git a/mushroom_rl/utils/viewer.py b/mushroom_rl/utils/viewer.py index 7b5a74700..7eb7091c1 100644 --- a/mushroom_rl/utils/viewer.py +++ b/mushroom_rl/utils/viewer.py @@ -5,6 +5,7 @@ import pygame import time import numpy as np +import cv2 class ImageViewer: @@ -359,3 +360,66 @@ def _transform(self, p): def _rotate(p, theta): return np.array([np.cos(theta) * p[0] - np.sin(theta) * p[1], np.sin(theta) * p[0] + np.cos(theta) * p[1]]) + + +class CV2Viewer: + + """ + Simple viewer to display rendered images using cv2. + + """ + + def __init__(self, window_name, dt, width, height): + self._window_name = window_name + self._dt = dt + self._created_viewer = False + self._width = width + self._height = height + + def display(self, img): + """ + Displays an image. + + Args: + img (np.array): Image to display + + """ + + # display image the first time + if not self._created_viewer: + # Removes toolbar and status bar + cv2.namedWindow(self._window_name, flags=cv2.WINDOW_GUI_NORMAL) + cv2.resizeWindow(self._window_name, self._width, self._height) + cv2.imshow(self._window_name, cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) + self._wait() + self._created_viewer = True + + # if the window is not closed yet, display another image + elif not self._window_was_closed(): + cv2.imshow(self._window_name, cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) + self._wait() + + # window was closed, interrupt simulation + else: + exit() + + def _wait(self): + """ + Wait for the specified amount of time. Time is supposed to be in milliseconds. + + """ + wait_time = int(self._dt * 1000) + cv2.waitKey(wait_time) + + def _window_was_closed(self): + """ + Check if a window was closed. + + Returns: + True if the window was closed. + + """ + return cv2.getWindowProperty(self._window_name, cv2.WND_PROP_VISIBLE) == 0 + + def close(self): + cv2.destroyWindow(self._window_name) From 343c9e1e5b3771322ec386e307fe7ebbb59a0c96 Mon Sep 17 00:00:00 2001 From: robfiras Date: Thu, 29 Jun 2023 14:41:59 +0200 Subject: [PATCH 17/50] Adapted reset MultiMujoco. - allowed to pass an observation from the core's inital states during reset to set the initial state in the simulation. --- mushroom_rl/environments/mujoco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 6aeea2c35..6a4be0c90 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -649,7 +649,7 @@ def reset(self, obs=None): self._model = self._models[self._current_model_idx] self._data = self._datas[self._current_model_idx] self.obs_helper = self.obs_helpers[self._current_model_idx] - self.setup() + self.setup(obs) if self._viewer is not None: self._viewer.load_new_model(self._model) From 6daf1c0f23cbce42905432cf2708129506ace991 Mon Sep 17 00:00:00 2001 From: robfiras Date: Thu, 29 Jun 2023 18:31:28 +0200 Subject: [PATCH 18/50] Updated MultiMujoco viewing settings. - viewer now saves width and height of window. - model is not reloaded to the viewer if we have only one model. --- mushroom_rl/environments/mujoco.py | 2 +- mushroom_rl/utils/mujoco/viewer.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 6a4be0c90..6908866c2 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -651,7 +651,7 @@ def reset(self, obs=None): self.obs_helper = self.obs_helpers[self._current_model_idx] self.setup(obs) - if self._viewer is not None: + if self._viewer is not None and self.more_than_one_env: self._viewer.load_new_model(self._model) self._obs = self._create_observation(self.obs_helper._build_obs(self._data)) diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index c245028e0..8c81b394f 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -39,6 +39,9 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self._window = glfw.create_window(width=width, height=height, title="MuJoCo", monitor=None, share=None) glfw.make_context_current(self._window) + self._width = width + self._height = height + # Disable v_sync, so swap_buffers does not block # glfw.swap_interval(0) From c269ecd224888753032ee09d8e90bd63d1f6d27f Mon Sep 17 00:00:00 2001 From: robfiras Date: Mon, 3 Jul 2023 19:31:56 +0200 Subject: [PATCH 19/50] Added more features to the MuJoCo viewer. - Now three different cameras can be chosen during rendering by pressing TAB; static, follow, and top_static. - Added more documentation for the MuJoCo viewer. --- mushroom_rl/utils/mujoco/viewer.py | 214 ++++++++++++++++++++++++++++- 1 file changed, 212 insertions(+), 2 deletions(-) diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index 8c81b394f..d25804ff4 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -1,6 +1,7 @@ import glfw import mujoco import time +from itertools import cycle import numpy as np @@ -12,9 +13,29 @@ class MujocoGlfwViewer: Space: Pause / Unpause simulation c: Turn contact force and constraint visualisation on / off t: Make models transparent + """ + def __init__(self, model, dt, width=1920, height=1080, start_paused=False, - custom_render_callback=None, record=False): + custom_render_callback=None, record=False, camera_params=None, + default_camera_mode="static"): + """ + Constructor. + + Args: + model: Mujoco model. + dt (float): Timestep of the environment, (not the simulation). + width (int): Width of the viewer window. + height (int): Height of the viewer window. + start_paused (bool): If True, the rendering is paused in the beginning of the simulation. + custom_render_callback (func): Custom render callback function, which is supposed to be called + during rendering. + record (bool): If true, frames are returned during rendering. + camera_params (dict): Dictionary of dictionaries including custom parameterization of the three cameras. + Checkout the function get_default_camera_params() to know what parameters are expected. Is some camera + type specification or parameter is missing, the default one is used. + + """ self.button_left = False self.button_right = False @@ -57,6 +78,18 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self._camera = mujoco.MjvCamera() mujoco.mjv_defaultFreeCamera(model, self._camera) + if camera_params is None: + self._camera_params = self.get_default_camera_params() + else: + self._camera_params = self._assert_camera_params(camera_params) + self._all_camera_modes = ("static", "follow", "top_static") + self._camera_mode_iter = cycle(self._all_camera_modes) + self._camera_mode = next(self._camera_mode_iter) + self._camera_mode_target = self._camera_mode + assert default_camera_mode in self._all_camera_modes + while self._camera_mode_target != default_camera_mode: + self._camera_mode_target = next(self._camera_mode_iter) + self._set_camera() self._viewport = mujoco.MjrRect(0, 0, width, height) self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(100)) @@ -64,7 +97,15 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self.custom_render_callback = custom_render_callback def load_new_model(self, model): - """ Loads a new model to the viewer, and resets the scene, camera, viewport, and context. """ + """ + Loads a new model to the viewer, and resets the scene, camera, viewport, and context. + This is used in MultiMujoco environments. + + Args: + model: Mujoco model. + + """ + self._model = model self._scene = mujoco.MjvScene(model, 1000) self._scene_option = mujoco.MjvOption() @@ -77,6 +118,17 @@ def load_new_model(self, model): self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(100)) def mouse_button(self, window, button, act, mods): + """ + Mouse button callback for glfw. + + Args: + window: glfw window. + button: glfw button id. + act: glfw action. + mods: glfw mods. + + """ + self.button_left = glfw.get_mouse_button(self._window, glfw.MOUSE_BUTTON_LEFT) == glfw.PRESS self.button_right = glfw.get_mouse_button(self._window, glfw.MOUSE_BUTTON_RIGHT) == glfw.PRESS self.button_middle = glfw.get_mouse_button(self._window, glfw.MOUSE_BUTTON_MIDDLE) == glfw.PRESS @@ -84,6 +136,16 @@ def mouse_button(self, window, button, act, mods): self.last_x, self.last_y = glfw.get_cursor_pos(self._window) def mouse_move(self, window, x_pos, y_pos): + """ + Mouse mode callback for glfw. + + Args: + window: glfw window. + x_pos: Current mouse x position. + y_pos: Current mouse y position. + + """ + if not self.button_left and not self.button_right and not self.button_middle: return @@ -107,6 +169,18 @@ def mouse_move(self, window, x_pos, y_pos): mujoco.mjv_moveCamera(self._model, action, dx / width, dy / height, self._scene, self._camera) def keyboard(self, window, key, scancode, act, mods): + """ + Keyboard callback for glfw. + + Args: + window: glfw window. + key: glfw key event. + scancode: glfw scancode. + act: glfw action. + mods: glfw mods. + + """ + if act != glfw.RELEASE: return @@ -123,10 +197,34 @@ def keyboard(self, window, key, scancode, act, mods): self._scene_option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = not self._scene_option.flags[ mujoco.mjtVisFlag.mjVIS_TRANSPARENT] + if key == glfw.KEY_TAB: + self._camera_mode_target = next(self._camera_mode_iter) + def scroll(self, window, x_offset, y_offset): + """ + Scrolling callback for glfw. + + Args: + window: glfw window. + x_offset: x scrolling offset. + y_offset: y scrolling offset. + + """ + mujoco.mjv_moveCamera(self._model, mujoco.mjtMouse.mjMOUSE_ZOOM, 0, 0.05 * y_offset, self._scene, self._camera) def render(self, data, record): + """ + Main rendering function. + + Args: + data: Mujoco data structure. + record (bool): If true, frames are returned during rendering. + + Returns: + If record is True, frames are returned during rendering, else None. + + """ def render_inner_loop(self): render_start = time.time() @@ -163,12 +261,24 @@ def render_inner_loop(self): self._loop_count += self.dt / self._time_per_render while self._loop_count > 0: render_inner_loop(self) + self._set_camera() self._loop_count -= 1 if record: return self.read_pixels() def read_pixels(self, depth=False): + """ + Reads the pixels from the glfw viewer. + + Args: + depth (bool): If True, depth map is also returned. + + Returns: + If depth is True, tuple of np.arrays (rgb and depth), else just a single + np.array for the rgb image. + + """ shape = glfw.get_framebuffer_size(self._window) @@ -183,4 +293,104 @@ def read_pixels(self, depth=False): return np.flipud(img) def stop(self): + """ + Destroys the glfw image. + + """ + glfw.destroy_window(self._window) + + def _set_camera(self): + """ + Sets the camera mode to the current camera mode target. Allowed camera + modes are "follow" in which the model is tracked, "static" that is a static + camera at the default camera positon, and "top_static" that is a static + camera on top of the model. + + """ + + if self._camera_mode_target == "follow": + if self._camera_mode != "follow": + self._camera.fixedcamid = -1 + self._camera.type = mujoco.mjtCamera.mjCAMERA_TRACKING + self._camera.trackbodyid = 0 + self._set_camera_properties(self._camera_mode_target) + elif self._camera_mode_target == "static": + if self._camera_mode != "static": + self._camera.fixedcamid = 0 + self._camera.type = mujoco.mjtCamera.mjCAMERA_FREE + self._camera.trackbodyid = -1 + self._set_camera_properties(self._camera_mode_target) + elif self._camera_mode_target == "top_static": + if self._camera_mode != "top_static": + self._camera.fixedcamid = 0 + self._camera.type = mujoco.mjtCamera.mjCAMERA_FREE + self._camera.trackbodyid = -1 + self._set_camera_properties(self._camera_mode_target) + + def _set_camera_properties(self, mode): + """ + Sets the camera properties "distance", "elevation", and "azimuth" + as well as the camera mode based on the provided mode. + + Args: + mode (str): Camera mode. (either "follow", "static", or "top_static") + + """ + + cam_params = self._camera_params[mode] + self._camera.distance = cam_params["distance"] + self._camera.elevation = cam_params["elevation"] + self._camera.azimuth = cam_params["azimuth"] + self._camera_mode = mode + + def _assert_camera_params(self, camera_params): + """ + Asserts if the provided camera parameters are valid or not. Also, if + properties of some camera types are not specified, the default parameters + are used. + + Args: + camera_params (dict): Dictionary of dictionaries containig parameters for each camera type. + + Returns: + Dictionary of dictionaries with parameters for each camera type. + + """ + + default_camera_params = self.get_default_camera_params() + + # check if the provided camera types and parameters are valid + for cam_type in camera_params.keys(): + assert cam_type in default_camera_params.keys(), f"Camera type \"{cam_type}\" is unknown. Allowed " \ + f"camera types are {list(default_camera_params.keys())}." + for param in camera_params[cam_type].keys(): + assert param in default_camera_params[cam_type].keys(), f"Parameter \"{param}\" of camera type " \ + f"\"{cam_type}\" is unknown. Allowed " \ + f"parameters are" \ + f" {list(default_camera_params[cam_type].keys())}" + + # add default parameters if not specified + for cam_type in default_camera_params.keys(): + if cam_type not in camera_params.keys(): + camera_params[cam_type] = default_camera_params[cam_type] + else: + for param in default_camera_params[cam_type].keys(): + if param not in camera_params[cam_type].keys(): + camera_params[cam_type][param] = default_camera_params[cam_type][param] + + return camera_params + + @staticmethod + def get_default_camera_params(): + """ + Getter for default camera paramterization. + + Returns: + Dictionary of dictionaries with default parameters for each camera type. + + """ + + return dict(static=dict(distance=15.0, elevation=-45.0, azimuth=90.0), + follow=dict(distance=3.5, elevation=0.0, azimuth=90.0), + top_static=dict(distance=5.0, elevation=-90.0, azimuth=90.0)) From 9a337f18e669b54d269bc3dc578dba11774c648d Mon Sep 17 00:00:00 2001 From: robfiras Date: Tue, 4 Jul 2023 13:11:56 +0200 Subject: [PATCH 20/50] Changed default camera view in the MuJoCo air hockey environment. --- mushroom_rl/environments/mujoco_envs/air_hockey/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mushroom_rl/environments/mujoco_envs/air_hockey/base.py b/mushroom_rl/environments/mujoco_envs/air_hockey/base.py index e44edfad8..06de5564f 100644 --- a/mushroom_rl/environments/mujoco_envs/air_hockey/base.py +++ b/mushroom_rl/environments/mujoco_envs/air_hockey/base.py @@ -15,7 +15,8 @@ class AirHockeyBase(MuJoCo): """ def __init__(self, n_agents=1, env_noise=False, obs_noise=False, gamma=0.99, horizon=500, - timestep=1 / 240., n_substeps=1, n_intermediate_steps=1, **viewer_params): + timestep=1 / 240., n_substeps=1, n_intermediate_steps=1, default_camera_mode="top_static", + **viewer_params): """ Constructor. @@ -113,7 +114,7 @@ def __init__(self, n_agents=1, env_noise=False, obs_noise=False, gamma=0.99, hor max_joint_vel = np.concatenate([spec['max_joint_vel'] for spec in self.agents]) super().__init__(scene, action_spec, observation_spec, gamma, horizon, timestep, n_substeps, n_intermediate_steps, additional_data, collision_spec, max_joint_vel, - **viewer_params) + default_camera_mode=default_camera_mode, **viewer_params) # Get the transformations from table to robot coordinate system for i, agent_spec in enumerate(self.agents): From b85d7b7a8a974b0f2fa6ed14c45fea7e253e9945 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Wed, 5 Jul 2023 14:51:57 +0200 Subject: [PATCH 21/50] Minor refactoring of torch policies - moved custom distributions in utils.torch - implemented DiagonalMultivariateNormal distribution --- mushroom_rl/policy/torch_policy.py | 11 ++-------- mushroom_rl/utils/torch.py | 32 +++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/mushroom_rl/policy/torch_policy.py b/mushroom_rl/policy/torch_policy.py index e1710b777..2cd673598 100644 --- a/mushroom_rl/policy/torch_policy.py +++ b/mushroom_rl/policy/torch_policy.py @@ -6,7 +6,7 @@ from mushroom_rl.policy import Policy from mushroom_rl.approximators import Regressor from mushroom_rl.approximators.parametric import TorchApproximator -from mushroom_rl.utils.torch import to_float_tensor +from mushroom_rl.utils.torch import to_float_tensor, CategoricalWrapper from mushroom_rl.utils.parameters import to_parameter from itertools import chain @@ -259,13 +259,6 @@ class BoltzmannTorchPolicy(TorchPolicy): Torch policy implementing a Boltzmann policy. """ - class CategoricalWrapper(torch.distributions.Categorical): - def __init__(self, logits): - super().__init__(logits=logits) - - def log_prob(self, value): - return super().log_prob(value.squeeze()) - def __init__(self, network, input_shape, output_shape, beta, use_cuda=False, **params): """ Constructor. @@ -314,7 +307,7 @@ def entropy_t(self, state): def distribution_t(self, state): logits = self._logits(state, **self._predict_params, output_tensor=True) * self._beta(state.numpy()) - return BoltzmannTorchPolicy.CategoricalWrapper(logits) + return CategoricalWrapper(logits) def set_weights(self, weights): self._logits.set_weights(weights) diff --git a/mushroom_rl/utils/torch.py b/mushroom_rl/utils/torch.py index d586c67dc..461fa7ecf 100644 --- a/mushroom_rl/utils/torch.py +++ b/mushroom_rl/utils/torch.py @@ -131,4 +131,34 @@ def update_optimizer_parameters(optimizer, new_parameters): del optimizer.state[p_old] optimizer.state[p_new] = data - optimizer.param_groups[0]['params'] = new_parameters \ No newline at end of file + optimizer.param_groups[0]['params'] = new_parameters + + +class CategoricalWrapper(torch.distributions.Categorical): + """ + Wrapper for the Torch Categorical distribution. + + Needed to convert a vector of mushroom discrete action in an input with the proper shape of the original + distribution implemented in torch + + """ + def __init__(self, logits): + super().__init__(logits=logits) + + def log_prob(self, value): + return super().log_prob(value.squeeze()) + + +class DiagonalMultivariateGaussian(torch.distributions.Normal): + """ + Wrapper for the Torch Normal distribution, implementing a diagonal distribution. + + It behaves as the MultivariateNormal distribution, but avoids the computation of the full covariance matrix, + optimizing the computation time, particulalrly when a high dimensional vector is sampled. + + """ + def __init__(self, loc, scale): + super().__init__(loc=loc, scale=scale) + + def log_prob(self, value): + return torch.sum(super().log_prob(value), -1) \ No newline at end of file From 1a3780b472fc84ed77305db19594cd251d60ac0c Mon Sep 17 00:00:00 2001 From: Davide Tateo Date: Wed, 5 Jul 2023 17:44:41 +0200 Subject: [PATCH 22/50] Update setup.py - box2d not installed anymore when using all --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cc49d99b8..10fd52869 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ def glob_data_files(data_package, data_type=None): all_deps = [] for group_name in extras: - if group_name not in ['plots']: + if group_name not in ['plots','box2d']: all_deps += extras[group_name] extras['all'] = all_deps From 94b5fbd561b2d9e3059ab0c13e00a152b7d3ea44 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Thu, 6 Jul 2023 10:51:10 +0200 Subject: [PATCH 23/50] Code cleanup - Removed prints from all mushroom main code - removed __main__ test code from air hockey environments --- mushroom_rl/environments/inverted_pendulum.py | 1 - .../mujoco_envs/air_hockey/defend.py | 26 ------------------- .../mujoco_envs/air_hockey/hit.py | 26 +------------------ .../mujoco_envs/air_hockey/prepare.py | 26 ------------------- .../mujoco_envs/air_hockey/repel.py | 26 ------------------- 5 files changed, 1 insertion(+), 104 deletions(-) diff --git a/mushroom_rl/environments/inverted_pendulum.py b/mushroom_rl/environments/inverted_pendulum.py index 75af58d61..3fc7ed1b9 100644 --- a/mushroom_rl/environments/inverted_pendulum.py +++ b/mushroom_rl/environments/inverted_pendulum.py @@ -107,7 +107,6 @@ def stop(self): self._viewer.close() def _dynamics(self, state, t, u): - print(t) theta = state[0] omega = self._bound(state[1], -self._max_omega, self._max_omega) diff --git a/mushroom_rl/environments/mujoco_envs/air_hockey/defend.py b/mushroom_rl/environments/mujoco_envs/air_hockey/defend.py index 2caacc739..d86e277ea 100644 --- a/mushroom_rl/environments/mujoco_envs/air_hockey/defend.py +++ b/mushroom_rl/environments/mujoco_envs/air_hockey/defend.py @@ -102,29 +102,3 @@ def is_absorbing(self, state): if (self.has_hit or self.has_bounce) and puck_pos_x > 0: return True return super().is_absorbing(state) - - -if __name__ == '__main__': - env = AirHockeyDefend(obs_noise=False, n_intermediate_steps=4, random_init=True) - - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() - while True: - action = np.zeros(3) - observation, reward, done, info = env.step(action) - env.render() - gamma *= env.info.gamma - J += gamma * reward - R += reward - steps += 1 - - if done or steps > env.info.horizon: - print("J: ", J, " R: ", R) - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() diff --git a/mushroom_rl/environments/mujoco_envs/air_hockey/hit.py b/mushroom_rl/environments/mujoco_envs/air_hockey/hit.py index 44614eafb..d1c55b029 100644 --- a/mushroom_rl/environments/mujoco_envs/air_hockey/hit.py +++ b/mushroom_rl/environments/mujoco_envs/air_hockey/hit.py @@ -102,30 +102,6 @@ def reward(self, state, action, next_state, absorbing): r = 2 * r_hit + 10 * r_goal r -= self.action_penalty * np.linalg.norm(action) - return r + return r -if __name__ == '__main__': - env = AirHockeyHit(env_noise=False, obs_noise=False, n_intermediate_steps=4, random_init=True, - init_robot_state="right") - - env.reset() - R = 0. - J = 0. - gamma = 1. - steps = 0 - while True: - action = np.random.randn(3) * 5 - observation, reward, done, info = env.step(action) - env.render() - gamma *= env.info.gamma - J += gamma * reward - R += reward - steps += 1 - if done or steps > env.info.horizon: - print("J: ", J, " R: ", R) - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() diff --git a/mushroom_rl/environments/mujoco_envs/air_hockey/prepare.py b/mushroom_rl/environments/mujoco_envs/air_hockey/prepare.py index f7ae803a4..70803bb1a 100644 --- a/mushroom_rl/environments/mujoco_envs/air_hockey/prepare.py +++ b/mushroom_rl/environments/mujoco_envs/air_hockey/prepare.py @@ -133,29 +133,3 @@ def is_absorbing(self, state): if puck_pos[0] > 0 or abs(puck_pos[1]) < 0.01: return True return False - - -if __name__ == '__main__': - env = AirHockeyPrepare(obs_noise=False, n_intermediate_steps=4, random_init=True, - sub_problem="bottom") - - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() - while True: - action = np.random.randn(3) * 5 - observation, reward, done, info = env.step(action) - env.render() - gamma *= env.info.gamma - J += gamma * reward - R += reward - steps += 4 - if done or steps > env.info.horizon * 2: - print("J: ", J, " R: ", R) - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() diff --git a/mushroom_rl/environments/mujoco_envs/air_hockey/repel.py b/mushroom_rl/environments/mujoco_envs/air_hockey/repel.py index a81569174..acd19b93b 100644 --- a/mushroom_rl/environments/mujoco_envs/air_hockey/repel.py +++ b/mushroom_rl/environments/mujoco_envs/air_hockey/repel.py @@ -101,29 +101,3 @@ def is_absorbing(self, state): if super().is_absorbing(state): return True return self.has_bounce - - -if __name__ == "__main__": - env = AirHockeyRepel(env_noise=False, obs_noise=False, n_intermediate_steps=4, - random_init=True) - - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() - while True: - action = np.zeros(3) - observation, reward, done, info = env.step(action) - env.render() - gamma *= env.info.gamma - J += gamma * reward - R += reward - steps += 1 - if done or steps > env.info.horizon: - print("J: ", J, " R: ", R) - R = 0. - J = 0. - gamma = 1. - steps = 0 - env.reset() From 0dbefa8f14d1f1a4f7a1167f7f3ff18019eb88d9 Mon Sep 17 00:00:00 2001 From: robfiras Date: Mon, 10 Jul 2023 20:59:27 +0200 Subject: [PATCH 24/50] Fixed bug in Mujoco viewer with MultiEnv. - now the viewer does not reset to different view once the environment is changed. --- mushroom_rl/utils/mujoco/viewer.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index d25804ff4..39b30f5f3 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -98,7 +98,7 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, def load_new_model(self, model): """ - Loads a new model to the viewer, and resets the scene, camera, viewport, and context. + Loads a new model to the viewer, and resets the scene and context. This is used in MultiMujoco environments. Args: @@ -109,12 +109,6 @@ def load_new_model(self, model): self._model = model self._scene = mujoco.MjvScene(model, 1000) self._scene_option = mujoco.MjvOption() - - self._camera = mujoco.MjvCamera() - mujoco.mjv_defaultFreeCamera(model, self._camera) - self._camera.distance *= 0.3 - - self._viewport = mujoco.MjrRect(0, 0, self._width, self._height) self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(100)) def mouse_button(self, window, button, act, mods): From cf137d4dc506e0fe4d088e7a28d9340c22a253b1 Mon Sep 17 00:00:00 2001 From: robfiras Date: Wed, 12 Jul 2023 14:37:03 +0200 Subject: [PATCH 25/50] Fix in env_id_map used in MultiMujoco. --- mushroom_rl/environments/mujoco.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 6908866c2..639df6132 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -674,6 +674,7 @@ def _get_env_id_map(current_model_idx, n_models): ndarray containing a binary vector identifying the current environment. """ + n_models = np.maximum(n_models, 2) bits_needed = 1+int(np.log((n_models-1))/np.log(2)) id_mask = np.zeros(bits_needed) bin_rep = np.binary_repr(current_model_idx)[::-1] From 6059e72297ce307ad0e4f85c7404924dfb460bb8 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Wed, 12 Jul 2023 16:55:07 +0200 Subject: [PATCH 26/50] Fixed Plotter, updated to pyqtgraph 13 - some deprecated classes where removed, replaced in the code - using directly PyQt5 data - cleanup of init file that was causing an import error --- mushroom_rl/utils/plots/__init__.py | 3 --- mushroom_rl/utils/plots/window.py | 13 ++++++------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/mushroom_rl/utils/plots/__init__.py b/mushroom_rl/utils/plots/__init__.py index 7a684cd12..512b99973 100644 --- a/mushroom_rl/utils/plots/__init__.py +++ b/mushroom_rl/utils/plots/__init__.py @@ -16,8 +16,5 @@ __all__ += ['Actions', 'LenOfEpisodeTraining', 'Observations', 'RewardPerEpisode', 'RewardPerStep'] - from ._implementations import common_buffers - __all__.append('common_buffers') - except ImportError: pass diff --git a/mushroom_rl/utils/plots/window.py b/mushroom_rl/utils/plots/window.py index 8669982ff..b05375789 100644 --- a/mushroom_rl/utils/plots/window.py +++ b/mushroom_rl/utils/plots/window.py @@ -1,14 +1,13 @@ import time -from PyQt5.QtGui import QBrush, QColor -from PyQt5.QtWidgets import QTreeWidgetItem +from PyQt5.QtGui import QGuiApplication, QBrush, QColor +from PyQt5.QtWidgets import QTreeWidget, QTreeWidgetItem, QSplitter from PyQt5 import QtCore import pyqtgraph as pg -from pyqtgraph.Qt import QtGui -class Window(QtGui.QSplitter): +class Window(QSplitter): """ This class is used creating windows for plotting. @@ -48,7 +47,7 @@ def __init__(self, plot_list, track_if_deactivated=False, super().__init__(QtCore.Qt.Horizontal) - self._activation_widget = QtGui.QTreeWidget() + self._activation_widget = QTreeWidget() self._activation_widget.setHeaderLabels(["Plots"]) self._activation_widget.itemClicked.connect(self.clicked) @@ -75,7 +74,7 @@ def __init__(self, plot_list, track_if_deactivated=False, plot_instance, plot_instance.plot_data_items_list[i] ] - self._GraphicsWindow = pg.GraphicsWindow(title=title) + self._GraphicsWindow = pg.GraphicsLayoutWidget(title=title) self.addWidget(self._activation_widget) self.addWidget(self._GraphicsWindow) @@ -120,7 +119,7 @@ def refresh(self): for plot_instance in self.plot_list: plot_instance.refresh() - QtGui.QGuiApplication.processEvents() + QGuiApplication.processEvents() def activate(self, item): """ From bb887c4428bedd863f520e282b88ebb252de9737 Mon Sep 17 00:00:00 2001 From: robfiras Date: Wed, 12 Jul 2023 17:02:46 +0200 Subject: [PATCH 27/50] Updated video recorder. - now recording directory is not created until a video writer is created. --- mushroom_rl/utils/record.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mushroom_rl/utils/record.py b/mushroom_rl/utils/record.py index 996704d56..aa5761ba0 100644 --- a/mushroom_rl/utils/record.py +++ b/mushroom_rl/utils/record.py @@ -26,7 +26,6 @@ def __init__(self, path="./mushroom_rl_recordings", tag=None, video_name=None, f self._path = Path(path) self._path = self._path / tag - self._path.mkdir(parents=True, exist_ok=True) self._video_name = video_name if video_name else "recording" self._counter = 0 @@ -56,6 +55,8 @@ def _create_video_writer(self, height, width): else: name += ".mp4" + self._path.mkdir(parents=True, exist_ok=True) + path = self._path / name self._video_writer = cv2.VideoWriter(str(path), cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), From 3fc832a27a93b5ba8e8b336b319616443e81794e Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Wed, 19 Jul 2023 11:39:46 +0200 Subject: [PATCH 28/50] Fixed minor bug in dm_control rendering - height and width where exchanghed --- mushroom_rl/environments/dm_control_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mushroom_rl/environments/dm_control_env.py b/mushroom_rl/environments/dm_control_env.py index 6bfb0d096..030d4977d 100644 --- a/mushroom_rl/environments/dm_control_env.py +++ b/mushroom_rl/environments/dm_control_env.py @@ -91,8 +91,8 @@ def step(self, action): return self._state, reward, absorbing, {} def render(self, record=False): - img = self.env.physics.render(self._width_screen, - self._height_screen, + img = self.env.physics.render(self._height_screen, + self._width_screen, self._camera_id) self._viewer.display(img) From bea851a6edb694afa4e15e04040f5f128cc18449 Mon Sep 17 00:00:00 2001 From: Firas Al-Hafez Date: Mon, 17 Jul 2023 22:24:34 +0200 Subject: [PATCH 29/50] Added overlays to the Mujoco viewer. - added overlays with very basic information to the Mujoco viewer. - more information can be easily added now. --- mushroom_rl/utils/mujoco/viewer.py | 69 +++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index 39b30f5f3..1c350dc53 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -18,7 +18,7 @@ class MujocoGlfwViewer: def __init__(self, model, dt, width=1920, height=1080, start_paused=False, custom_render_callback=None, record=False, camera_params=None, - default_camera_mode="static"): + default_camera_mode="static", hide_menue_on_startup=False): """ Constructor. @@ -34,6 +34,7 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, camera_params (dict): Dictionary of dictionaries including custom parameterization of the three cameras. Checkout the function get_default_camera_params() to know what parameters are expected. Is some camera type specification or parameter is missing, the default one is used. + hide_menue_on_startup (bool): If True, the menue is hidden on startup. """ @@ -96,6 +97,9 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self.custom_render_callback = custom_render_callback + self._overlay = {} + self._hide_menue = hide_menue_on_startup + def load_new_model(self, model): """ Loads a new model to the viewer, and resets the scene and context. @@ -194,6 +198,12 @@ def keyboard(self, window, key, scancode, act, mods): if key == glfw.KEY_TAB: self._camera_mode_target = next(self._camera_mode_iter) + if key == glfw.KEY_H: + if self._hide_menue: + self._hide_menue = False + else: + self._hide_menue = True + def scroll(self, window, x_offset, y_offset): """ Scrolling callback for glfw. @@ -221,6 +231,9 @@ def render(self, data, record): """ def render_inner_loop(self): + + self._create_overlay() + render_start = time.time() mujoco.mjv_updateScene(self._model, data, self._scene_option, None, self._camera, @@ -231,6 +244,19 @@ def render_inner_loop(self): mujoco.mjr_render(self._viewport, self._scene, self._context) + for gridpos, [t1, t2] in self._overlay.items(): + + if self._hide_menue: + continue + + mujoco.mjr_overlay( + mujoco.mjtFontScale.mjFONTSCALE_150, # todo: it seems like the font scale has no effect in MacOS. + gridpos, + self._viewport, + t1, + t2, + self._context) + if self.custom_render_callback is not None: self.custom_render_callback(self._viewport, self._context) @@ -239,6 +265,8 @@ def render_inner_loop(self): self.frames += 1 + self._overlay.clear() + if glfw.window_should_close(self._window): self.stop() exit(0) @@ -294,6 +322,45 @@ def stop(self): glfw.destroy_window(self._window) + def _create_overlay(self): + """ + This function creates and adds all overlays used in the viewer. + + """ + + topleft = mujoco.mjtGridPos.mjGRID_TOPLEFT + topright = mujoco.mjtGridPos.mjGRID_TOPRIGHT + bottomleft = mujoco.mjtGridPos.mjGRID_BOTTOMLEFT + bottomright = mujoco.mjtGridPos.mjGRID_BOTTOMRIGHT + + def add_overlay(gridpos, text1, text2="", make_new_line=True): + if gridpos not in self._overlay: + self._overlay[gridpos] = ["", ""] + if make_new_line: + self._overlay[gridpos][0] += text1 + "\n" + self._overlay[gridpos][1] += text2 + "\n" + else: + self._overlay[gridpos][0] += text1 + self._overlay[gridpos][1] += text2 + + add_overlay( + bottomright, + "Framerate:", + str(1/self._time_per_render)) + + add_overlay( + topleft, + "Press H to hide the menue.") + + add_overlay( + topleft, + "Press TAB to switch cameras.") + + add_overlay( + topleft, + "Camera mode:", + self._camera_mode, make_new_line=False) + def _set_camera(self): """ Sets the camera mode to the current camera mode target. Allowed camera From 43b7de4ec9d224f4cfdae11106b9f35de6f6b9ba Mon Sep 17 00:00:00 2001 From: robfiras Date: Fri, 28 Jul 2023 16:42:18 +0200 Subject: [PATCH 30/50] Minor fixed Mujoco viewer. --- mushroom_rl/utils/mujoco/viewer.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index 1c350dc53..b4037236f 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -18,7 +18,7 @@ class MujocoGlfwViewer: def __init__(self, model, dt, width=1920, height=1080, start_paused=False, custom_render_callback=None, record=False, camera_params=None, - default_camera_mode="static", hide_menue_on_startup=False): + default_camera_mode="static", hide_menu_on_startup=False): """ Constructor. @@ -34,7 +34,7 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, camera_params (dict): Dictionary of dictionaries including custom parameterization of the three cameras. Checkout the function get_default_camera_params() to know what parameters are expected. Is some camera type specification or parameter is missing, the default one is used. - hide_menue_on_startup (bool): If True, the menue is hidden on startup. + hide_menu_on_startup (bool): If True, the menu is hidden on startup. """ @@ -93,12 +93,12 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self._set_camera() self._viewport = mujoco.MjrRect(0, 0, width, height) - self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(100)) + self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(150)) self.custom_render_callback = custom_render_callback self._overlay = {} - self._hide_menue = hide_menue_on_startup + self._hide_menu = hide_menu_on_startup def load_new_model(self, model): """ @@ -199,10 +199,10 @@ def keyboard(self, window, key, scancode, act, mods): self._camera_mode_target = next(self._camera_mode_iter) if key == glfw.KEY_H: - if self._hide_menue: - self._hide_menue = False + if self._hide_menu: + self._hide_menu = False else: - self._hide_menue = True + self._hide_menu = True def scroll(self, window, x_offset, y_offset): """ @@ -246,11 +246,11 @@ def render_inner_loop(self): for gridpos, [t1, t2] in self._overlay.items(): - if self._hide_menue: + if self._hide_menu: continue mujoco.mjr_overlay( - mujoco.mjtFontScale.mjFONTSCALE_150, # todo: it seems like the font scale has no effect in MacOS. + mujoco.mjtFont.mjFONT_SHADOW, gridpos, self._viewport, t1, @@ -346,11 +346,11 @@ def add_overlay(gridpos, text1, text2="", make_new_line=True): add_overlay( bottomright, "Framerate:", - str(1/self._time_per_render)) + str(int(1/self._time_per_render)), make_new_line=False) add_overlay( topleft, - "Press H to hide the menue.") + "Press H to hide the menu.") add_overlay( topleft, From fc804cba75782be7fc1d4828edfdc07a7738f5bb Mon Sep 17 00:00:00 2001 From: robfiras Date: Fri, 28 Jul 2023 17:00:33 +0200 Subject: [PATCH 31/50] Added more information to the Mujoco viewer overlay. --- mushroom_rl/utils/mujoco/viewer.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index b4037236f..d22b87f4f 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -9,10 +9,6 @@ class MujocoGlfwViewer: """ Class that creates a Glfw viewer for mujoco environments. - Controls: - Space: Pause / Unpause simulation - c: Turn contact force and constraint visualisation on / off - t: Make models transparent """ @@ -93,7 +89,8 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self._set_camera() self._viewport = mujoco.MjrRect(0, 0, width, height) - self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(150)) + self._font_scale = 100 + self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(self._font_scale)) self.custom_render_callback = custom_render_callback @@ -112,8 +109,7 @@ def load_new_model(self, model): self._model = model self._scene = mujoco.MjvScene(model, 1000) - self._scene_option = mujoco.MjvOption() - self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(100)) + self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(self._font_scale)) def mouse_button(self, window, button, act, mods): """ @@ -348,6 +344,10 @@ def add_overlay(gridpos, text1, text2="", make_new_line=True): "Framerate:", str(int(1/self._time_per_render)), make_new_line=False) + add_overlay( + topleft, + "Press SPACE to pause.") + add_overlay( topleft, "Press H to hide the menu.") @@ -356,6 +356,15 @@ def add_overlay(gridpos, text1, text2="", make_new_line=True): topleft, "Press TAB to switch cameras.") + add_overlay( + topleft, + "Press T to make the model transparent.") + + visualize_contact = "On" if self._scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] else "Off" + add_overlay( + topleft, + "Contact force visualization (Press C):", visualize_contact) + add_overlay( topleft, "Camera mode:", From 3c922696eb348af9e5bc412b4dbd49f09bd3cc8e Mon Sep 17 00:00:00 2001 From: robfiras Date: Fri, 28 Jul 2023 19:14:35 +0200 Subject: [PATCH 32/50] Added flag to MultiMujoco. - added the option to iterate sequentially through all environments/models instead of randomly choosing one. --- mushroom_rl/environments/mujoco.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 639df6132..d2c8bc708 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -514,7 +514,7 @@ class MultiMuJoCo(MuJoCo): def __init__(self, file_names, actuation_spec, observation_spec, gamma, horizon, timestep=None, n_substeps=1, n_intermediate_steps=1, additional_data_spec=None, collision_groups=None, - max_joint_vel=None, **viewer_params): + max_joint_vel=None, random_env_reset=True, **viewer_params): """ Constructor. @@ -555,13 +555,16 @@ def __init__(self, file_names, actuation_spec, observation_spec, gamma, horizon, a list of geom names in the XML specification. max_joint_vel (list, None): A list with the maximum joint velocities which are provided in the mdp_info. The list has to define a maximum velocity for every occurrence of JOINT_VEL in the observation_spec. The - velocity will not be limited in mujoco + velocity will not be limited in mujoco. + random_env_reset (bool): If True, a random environment/model is chosen after each episode. If False, it is + sequentially iterated through the environment/model list. **viewer_params: other parameters to be passed to the viewer. See MujocoGlfwViewer documentation for the available options. """ # Create the simulation assert type(file_names) + self._random_env_reset = random_env_reset self._models = [mujoco.MjModel.from_xml_path(f) for f in file_names] self._current_model_idx = 0 self._model = self._models[self._current_model_idx] @@ -645,7 +648,12 @@ def __init__(self, file_names, actuation_spec, observation_spec, gamma, horizon, def reset(self, obs=None): mujoco.mj_resetData(self._model, self._data) - self._current_model_idx = np.random.randint(0, len(self._models)) + if self._random_env_reset: + self._current_model_idx = np.random.randint(0, len(self._models)) + else: + self._current_model_idx = self._current_model_idx + 1 \ + if self._current_model_idx < len(self._models) - 1 else 0 + self._model = self._models[self._current_model_idx] self._data = self._datas[self._current_model_idx] self.obs_helper = self.obs_helpers[self._current_model_idx] From dc5a565b53e4626d2272b0173963c84b474f162c Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Wed, 2 Aug 2023 18:20:20 +0200 Subject: [PATCH 33/50] Updated angles utils to support rotation matrices --- mushroom_rl/utils/angles.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/mushroom_rl/utils/angles.py b/mushroom_rl/utils/angles.py index 974a25ccc..16fa606c8 100644 --- a/mushroom_rl/utils/angles.py +++ b/mushroom_rl/utils/angles.py @@ -118,3 +118,31 @@ def euler_to_quat(euler): return R.from_euler('xyz', euler).as_quat()[[3, 0, 1, 2]] else: return R.from_euler('xyz', euler.T).as_quat()[:, [3, 0, 1, 2]].T + + +def mat_to_euler(mat): + """ + Convert a rotation matrix to euler angles. + + Args: + mat (np.ndarray): a 3d rotation matrix. + + Returns: + The euler angles [x, y, z] representation of the quaternion + + """ + return R.from_matrix(mat).as_euler('xyz') + + +def euler_to_mat(euler): + """ + Convert euler angles into a a rotation matrix. + + Args: + euler (np.ndarray): euler angles [x, y, z] to be converted. + + Returns: + The rotation matrix representation of the euler angles + + """ + return R.from_euler('xyz', euler).as_matrix() From 2c95196d44cc76a2bd4944a01cd32d88f850ae2e Mon Sep 17 00:00:00 2001 From: robfiras Date: Fri, 4 Aug 2023 16:39:05 +0200 Subject: [PATCH 34/50] Minor fix MultiMujoco. - added dt to mdp_info. --- mushroom_rl/environments/mujoco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index d2c8bc708..2abc9ac83 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -631,7 +631,7 @@ def __init__(self, file_names, actuation_spec, observation_spec, gamma, horizon, # Finally, we create the MDP information and call the constructor of # the parent class - mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) + mdp_info = MDPInfo(observation_space, action_space, gamma, horizon, self.dt) mdp_info = self._modify_mdp_info(mdp_info) From 2decae31459a3481130afe1263bc0a5ba7954a99 Mon Sep 17 00:00:00 2001 From: Nico Bohlinger <34777138+nico-bohlinger@users.noreply.github.com> Date: Sat, 5 Aug 2023 17:24:13 +0200 Subject: [PATCH 35/50] Updated MuJoCo viewer (#129) * Enable toggle geom groups and slow down / speed up rendering * Add reference frames toggle --- mushroom_rl/utils/mujoco/viewer.py | 35 +++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index d22b87f4f..10783256d 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -52,6 +52,7 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self._loop_count = 0 self._time_per_render = 1 / 60. + self._run_speed_factor = 1.0 self._paused = start_paused self._window = glfw.create_window(width=width, height=height, title="MuJoCo", monitor=None, share=None) @@ -194,6 +195,19 @@ def keyboard(self, window, key, scancode, act, mods): if key == glfw.KEY_TAB: self._camera_mode_target = next(self._camera_mode_iter) + if key == glfw.KEY_S: + self._run_speed_factor /= 2.0 + + if key == glfw.KEY_F: + self._run_speed_factor *= 2.0 + + if key == glfw.KEY_G: + for i in range(len(self._scene_option.geomgroup)): + self._scene_option.geomgroup[i] = not self._scene_option.geomgroup[i] + + if key == glfw.KEY_E: + self._scene_option.frame = not self._scene_option.frame + if key == glfw.KEY_H: if self._hide_menu: self._hide_menu = False @@ -276,7 +290,7 @@ def render_inner_loop(self): if record: self._loop_count = 1 else: - self._loop_count += self.dt / self._time_per_render + self._loop_count += self.dt / (self._time_per_render * self._run_speed_factor) while self._loop_count > 0: render_inner_loop(self) self._set_camera() @@ -342,7 +356,7 @@ def add_overlay(gridpos, text1, text2="", make_new_line=True): add_overlay( bottomright, "Framerate:", - str(int(1/self._time_per_render)), make_new_line=False) + str(int(1/self._time_per_render * self._run_speed_factor)), make_new_line=False) add_overlay( topleft, @@ -368,7 +382,22 @@ def add_overlay(gridpos, text1, text2="", make_new_line=True): add_overlay( topleft, "Camera mode:", - self._camera_mode, make_new_line=False) + self._camera_mode) + + add_overlay( + topleft, + "Run speed = %.3f x real time" % + self._run_speed_factor, + "[S]lower, [F]aster") + + add_overlay( + topleft, + "Press E to toggle reference frames.") + + add_overlay( + topleft, + "Press G to toggle geom groups.", + make_new_line=False) def _set_camera(self): """ From c85c7321552f258a93f1b5ba4299cf3719be1f3d Mon Sep 17 00:00:00 2001 From: robfiras Date: Mon, 4 Sep 2023 18:37:22 +0200 Subject: [PATCH 36/50] Added new features to the Mujoco viewer. - viewer can now toggle the visualization of specific geom groups. - viewer now allows to set visualizaiton of specific geom groups on startup. --- mushroom_rl/utils/mujoco/viewer.py | 66 +++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index 10783256d..1db075460 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -14,7 +14,8 @@ class MujocoGlfwViewer: def __init__(self, model, dt, width=1920, height=1080, start_paused=False, custom_render_callback=None, record=False, camera_params=None, - default_camera_mode="static", hide_menu_on_startup=False): + default_camera_mode="static", hide_menu_on_startup=False, + geom_group_visualization_on_startup=None): """ Constructor. @@ -31,6 +32,8 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, Checkout the function get_default_camera_params() to know what parameters are expected. Is some camera type specification or parameter is missing, the default one is used. hide_menu_on_startup (bool): If True, the menu is hidden on startup. + geom_group_visualization_on_startup (int/list): int or list defining which geom group_ids should be + visualized on startup. If None, all are visualized. """ @@ -98,6 +101,14 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self._overlay = {} self._hide_menu = hide_menu_on_startup + if geom_group_visualization_on_startup is not None: + assert type(geom_group_visualization_on_startup) == list or type(geom_group_visualization_on_startup) == int + if type(geom_group_visualization_on_startup) is not list: + geom_group_visualization_on_startup = [geom_group_visualization_on_startup] + for group_id, _ in enumerate(self._scene_option.geomgroup): + if group_id not in geom_group_visualization_on_startup: + self._scene_option.geomgroup[group_id] = False + def load_new_model(self, model): """ Loads a new model to the viewer, and resets the scene and context. @@ -192,6 +203,36 @@ def keyboard(self, window, key, scancode, act, mods): self._scene_option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = not self._scene_option.flags[ mujoco.mjtVisFlag.mjVIS_TRANSPARENT] + if key == glfw.KEY_0: + self._scene_option.geomgroup[0] = not self._scene_option.geomgroup[0] + + if key == glfw.KEY_1: + self._scene_option.geomgroup[1] = not self._scene_option.geomgroup[1] + + if key == glfw.KEY_2: + self._scene_option.geomgroup[2] = not self._scene_option.geomgroup[2] + + if key == glfw.KEY_3: + self._scene_option.geomgroup[3] = not self._scene_option.geomgroup[3] + + if key == glfw.KEY_4: + self._scene_option.geomgroup[4] = not self._scene_option.geomgroup[4] + + if key == glfw.KEY_5: + self._scene_option.geomgroup[5] = not self._scene_option.geomgroup[5] + + if key == glfw.KEY_6: + self._scene_option.geomgroup[6] = not self._scene_option.geomgroup[6] + + if key == glfw.KEY_7: + self._scene_option.geomgroup[7] = not self._scene_option.geomgroup[7] + + if key == glfw.KEY_8: + self._scene_option.geomgroup[8] = not self._scene_option.geomgroup[8] + + if key == glfw.KEY_9: + self._scene_option.geomgroup[9] = not self._scene_option.geomgroup[9] + if key == glfw.KEY_TAB: self._camera_mode_target = next(self._camera_mode_iter) @@ -201,10 +242,6 @@ def keyboard(self, window, key, scancode, act, mods): if key == glfw.KEY_F: self._run_speed_factor *= 2.0 - if key == glfw.KEY_G: - for i in range(len(self._scene_option.geomgroup)): - self._scene_option.geomgroup[i] = not self._scene_option.geomgroup[i] - if key == glfw.KEY_E: self._scene_option.frame = not self._scene_option.frame @@ -374,6 +411,14 @@ def add_overlay(gridpos, text1, text2="", make_new_line=True): topleft, "Press T to make the model transparent.") + add_overlay( + topleft, + "Press E to toggle reference frames.") + + add_overlay( + topleft, + "Press 0-9 to disable/enable geom group visualization.") + visualize_contact = "On" if self._scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] else "Off" add_overlay( topleft, @@ -388,16 +433,7 @@ def add_overlay(gridpos, text1, text2="", make_new_line=True): topleft, "Run speed = %.3f x real time" % self._run_speed_factor, - "[S]lower, [F]aster") - - add_overlay( - topleft, - "Press E to toggle reference frames.") - - add_overlay( - topleft, - "Press G to toggle geom groups.", - make_new_line=False) + "[S]lower, [F]aster", make_new_line=False) def _set_camera(self): """ From 4df0e08de2063c7aba6cb5647dda58b133088cec Mon Sep 17 00:00:00 2001 From: robfiras Date: Tue, 5 Sep 2023 12:51:00 +0200 Subject: [PATCH 37/50] Updated Mujoco step function. - now the _compute_action is only called if its overridden. - minor clean-up in Multi-Mujoco. --- mushroom_rl/environments/mujoco.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 2abc9ac83..2933d1da5 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -110,6 +110,13 @@ def __init__(self, file_name, actuation_spec, observation_spec, gamma, horizon, # set the warning callback to stop the simulation when a mujoco warning occurs mujoco.set_mju_user_warning(self.user_warning_raise_exception) + # check whether the function compute_action was overridden or not. If yes, we want to compute + # the action at simulation frequency, if not we do it at control frequency. + if type(self)._compute_action == MuJoCo._compute_action: + self._recompute_action_per_step = False + else: + self._recompute_action_per_step = True + super().__init__(mdp_info) def seed(self, seed): @@ -129,10 +136,13 @@ def step(self, action): self._step_init(cur_obs, action) + ctrl_action = None + for i in range(self._n_intermediate_steps): - ctrl_action = self._compute_action(cur_obs, action) - self._data.ctrl[self._action_indices] = ctrl_action + if self._recompute_action_per_step or ctrl_action is None: + ctrl_action = self._compute_action(cur_obs, action) + self._data.ctrl[self._action_indices] = ctrl_action self._simulation_pre_step() @@ -140,6 +150,10 @@ def step(self, action): self._simulation_post_step() + if self._recompute_action_per_step: + cur_obs = self._create_observation(self.obs_helper._build_obs(self._data)) + + if not self._recompute_action_per_step: cur_obs = self._create_observation(self.obs_helper._build_obs(self._data)) self._step_finalize() @@ -638,9 +652,12 @@ def __init__(self, file_names, actuation_spec, observation_spec, gamma, horizon, # set the warning callback to stop the simulation when a mujoco warning occurs mujoco.set_mju_user_warning(self.user_warning_raise_exception) - # needed for recording - self._record_video = False - self._video = None + # check whether the function compute_action was overridden or not. If yes, we want to compute + # the action at simulation frequency, if not we do it at control frequency. + if type(self)._compute_action == MuJoCo._compute_action: + self._recompute_action_per_step = False + else: + self._recompute_action_per_step = True # call grad-parent class, not MuJoCo super(MuJoCo, self).__init__(mdp_info) From 69d3313110a242f27c6eb9c696901bef72b3171b Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Fri, 29 Sep 2023 16:46:55 +0200 Subject: [PATCH 38/50] Fixed bug of otimizer loader - fixes #130 - the optimizer loader was creating a spurious state when the optimizer was not used before save, now we check if thet state is empty, in that scenario we don't mess up with the state dictionary. --- mushroom_rl/utils/torch.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mushroom_rl/utils/torch.py b/mushroom_rl/utils/torch.py index 461fa7ecf..41b6caeea 100644 --- a/mushroom_rl/utils/torch.py +++ b/mushroom_rl/utils/torch.py @@ -126,10 +126,11 @@ def to_int_tensor(x, use_cuda=False): def update_optimizer_parameters(optimizer, new_parameters): - for p_old, p_new in zip(optimizer.param_groups[0]['params'], new_parameters): - data = optimizer.state[p_old] - del optimizer.state[p_old] - optimizer.state[p_new] = data + if len(optimizer.state) > 0: + for p_old, p_new in zip(optimizer.param_groups[0]['params'], new_parameters): + data = optimizer.state[p_old] + del optimizer.state[p_old] + optimizer.state[p_new] = data optimizer.param_groups[0]['params'] = new_parameters From dcc6415b81ed48df0bbee81de942611acd1dbcef Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Tue, 3 Oct 2023 18:19:36 +0200 Subject: [PATCH 39/50] Removed numpy_ml dependency - numpy_ml is unmantained and breaks in newer version of python - implemented Adam optimizer natively in mushroom - removed Adagrad and RMSProp optimizers --- mushroom_rl/utils/optimizers.py | 104 ++++++++----------------------- requirements.txt | 1 - tests/algorithms/helper/utils.py | 37 ++--------- 3 files changed, 33 insertions(+), 109 deletions(-) diff --git a/mushroom_rl/utils/optimizers.py b/mushroom_rl/utils/optimizers.py index 657f09fc4..596acfd56 100644 --- a/mushroom_rl/utils/optimizers.py +++ b/mushroom_rl/utils/optimizers.py @@ -1,5 +1,4 @@ import numpy as np -import numpy_ml as npml from mushroom_rl.core import Serializable from mushroom_rl.utils.parameters import Parameter @@ -124,98 +123,49 @@ class AdamOptimizer(Optimizer): This class implements the Adam optimizer. """ - def __init__(self, lr=0.001, decay1=0.9, decay2=0.999, maximize=True): + def __init__(self, lr=0.001, beta1=0.9, beta2=0.999, eps=1e-7, maximize=True): """ Constructor. Args: lr ([float, Parameter], 0.001): the learning rate; - decay1 (float, 0.9): Adam beta1 parameter; - decay2 (float, 0.999): Adam beta2 parameter; + beta1 (float, 0.9): Adam beta1 parameter; + beta2 (float, 0.999): Adam beta2 parameter; maximize (bool, True): by default Optimizers do a gradient ascent step. Set to False for gradient descent. """ super().__init__(lr, maximize) # lr_scheduler must be set to None, as we have our own scheduler - self._optimizer = npml.neural_nets.optimizers.Adam( - lr=self._lr.initial_value, - decay1=decay1, - decay2=decay2, - lr_scheduler=None - ) - - self._add_save_attr(_optimizer='pickle') - - def __call__(self, params, grads): - if self._maximize: - # -1*grads because numpy_ml does gradient descent by default, not ascent - grads *= -1 - # Fix the numpy_ml optimizer lr to the one we computed - self._optimizer.lr_scheduler.lr = self._lr() - return self._optimizer.update(params, grads, 'theta') - - -class AdaGradOptimizer(Optimizer): - """ - This class implements the AdaGrad optimizer. - - """ - def __init__(self, lr=0.001, maximize=True): - """ - Constructor. - - Args: - lr ([float, Parameter], 0.001): the learning rate; - maximize (bool, True): by default Optimizers do a gradient ascent step. Set to False for gradient descent. - - """ - super().__init__(lr, maximize) - # lr_scheduler must be set to None, as we have our own scheduler - self._optimizer = npml.neural_nets.optimizers.AdaGrad( - lr=self._lr.initial_value, - lr_scheduler=None - ) + self._m = None + self._v = None + self._beta1 = beta1 + self._beta2 = beta2 + self._eps = eps + self._t = 0 - self._add_save_attr(_optimizer='pickle') + self._add_save_attr(_m='numpy', + _v='numpy', + _beta1='primitive', + _beta2='primitive', + _t='primitive' + ) def __call__(self, params, grads): - if self._maximize: - # -1*grads because numpy_ml does gradient descent by default, not ascent + if not self._maximize: grads *= -1 - # Fix the numpy_ml optimizer lr to the one we computed - self._optimizer.lr_scheduler.lr = self._lr() - return self._optimizer.update(params, grads, 'theta') + if self._m is None: + self._t = 0 + self._m = np.zeros_like(params) + self._v = np.zeros_like(params) -class RMSPropOptimizer(Optimizer): - """ - This class implements the RMSProp optimizer. + self._t += 1 + self._m = self._beta1 * self._m + (1 - self._beta1) * grads + self._v = self._beta2 * self._v + (1 - self._beta2) * grads ** 2 - """ - def __init__(self, lr=0.001, decay=0.9, maximize=True): - """ - Constructor. + m_hat = self._m / (1 - self._beta1 ** self._t) + v_hat = self._v / (1 - self._beta2 ** self._t) - Args: - lr ([float, Parameter], 0.001): the learning rate; - decay (float, 0.9): rate of decay for the moving average; - maximize (bool, True): by default Optimizers do a gradient ascent step. Set to False for gradient descent. + update = self._lr() * m_hat / (np.sqrt(v_hat) + self._eps) - """ - super().__init__(lr, maximize) - # lr_scheduler must be set to None, as we have our own scheduler - self._optimizer = npml.neural_nets.optimizers.RMSProp( - lr=self._lr.initial_value, - decay=decay, - lr_scheduler=None - ) - - self._add_save_attr(_optimizer='pickle') - - def __call__(self, params, grads): - if self._maximize: - # -1*grads because numpy_ml does gradient descent by default, not ascent - grads *= -1 - # Fix the numpy_ml optimizer lr to the one we computed - self._optimizer.lr_scheduler.lr = self._lr() - return self._optimizer.update(params, grads, 'theta') + return params + update diff --git a/requirements.txt b/requirements.txt index 2b52c94c7..9d677fda2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ numpy -numpy_ml scipy scikit-learn matplotlib diff --git a/tests/algorithms/helper/utils.py b/tests/algorithms/helper/utils.py index 30b490d43..2fed7444a 100644 --- a/tests/algorithms/helper/utils.py +++ b/tests/algorithms/helper/utils.py @@ -1,8 +1,3 @@ -# Uncomment to run tests locally -# import sys -# import os -# sys.path = [os.getcwd()] + sys.path - import torch import numpy as np from sklearn.ensemble import ExtraTreesRegressor @@ -21,14 +16,14 @@ from mushroom_rl.policy.noise_policy import OrnsteinUhlenbeckPolicy from mushroom_rl.features._implementations.tiles_features import TilesFeatures from mushroom_rl.utils.parameters import Parameter, LinearParameter -from mushroom_rl.utils.optimizers import AdaptiveOptimizer, SGDOptimizer, AdamOptimizer, AdaGradOptimizer, \ - RMSPropOptimizer +from mushroom_rl.utils.optimizers import AdaptiveOptimizer, SGDOptimizer, AdamOptimizer from mushroom_rl.distributions.gaussian import GaussianDiagonalDistribution from mushroom_rl.utils.table import Table from mushroom_rl.utils.spaces import Discrete from mushroom_rl.features._implementations.functional_features import FunctionalFeatures from mushroom_rl.features._implementations.basis_features import BasisFeatures + class TestUtils: @classmethod @@ -49,11 +44,13 @@ def assert_eq(cls, this, that): this = this.model that = that.model for i in range(0, len(this)): - if cls._check_type(this[i], that[i], list) or cls._check_type(this[i], that[i], Ensemble) or cls._check_type(this[i], that[i], ExtraTreesRegressor): + if cls._check_type(this[i], that[i], list) or cls._check_type(this[i], that[i], Ensemble) \ + or cls._check_type(this[i], that[i], ExtraTreesRegressor): cls.assert_eq(this[i], that[i]) else: assert cls.eq_weights(this[i], that[i]) - elif cls._check_subtype(this, that, TorchPolicy) or cls._check_type(this, that, SACPolicy) or cls._check_subtype(this, that, ParametricPolicy): + elif cls._check_subtype(this, that, TorchPolicy) or cls._check_type(this, that, SACPolicy) \ + or cls._check_subtype(this, that, ParametricPolicy): assert cls.eq_weights(this, that) elif cls._check_subtype(this, that, TDPolicy): cls.assert_eq(this.get_q(), that.get_q()) @@ -81,10 +78,6 @@ def assert_eq(cls, this, that): assert cls.eq_sgd_optimizer(this, that) elif cls._check_type(this, that, AdamOptimizer): assert cls.eq_adam_optimizer(this, that) - elif cls._check_type(this, that, AdaGradOptimizer): - assert cls.eq_adagrad_optimizer(this, that) - elif cls._check_type(this, that, RMSPropOptimizer): - assert cls.eq_rmsprop_optimizer(this, that) elif cls._check_type(this, that, GaussianDiagonalDistribution): assert cls.eq_gaussian_diagonal_dist(this, that) elif cls._check_type(this, that, Table): @@ -314,24 +307,6 @@ def eq_adam_optimizer(cls, this, that): res = cls._eq_numpy(this._eps, that._eps) return res - @classmethod - def eq_adagrad_optimizer(cls, this, that): - """ - Compare two AdagradParameterOptimizer objects for equality - """ - - res = cls._eq_numpy(this._eps, that._eps) - return res - - @classmethod - def eq_rmsprop_optimizer(cls, this, that): - """ - Compare two RMSPropParameterOptimizer objects for equality - """ - - res = cls._eq_numpy(this._eps, that._eps) - return res - @classmethod def eq_gaussian_diagonal_dist(cls, this, that): """ From 71c124b33ede88682614d8402522b2ec06f2931b Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Tue, 3 Oct 2023 19:55:10 +0200 Subject: [PATCH 40/50] Improvements in the SAC algorithm - Added support for paper and original implementation of alpha loss - Default loss for alpha changed to the paper one - Fixed entropy calculation, now the algorithm uses samples --- .../actor_critic/deep_actor_critic/sac.py | 151 +++++++----------- 1 file changed, 58 insertions(+), 93 deletions(-) diff --git a/mushroom_rl/algorithms/actor_critic/deep_actor_critic/sac.py b/mushroom_rl/algorithms/actor_critic/deep_actor_critic/sac.py index 157d4bb96..b4236f2b3 100644 --- a/mushroom_rl/algorithms/actor_critic/deep_actor_critic/sac.py +++ b/mushroom_rl/algorithms/actor_critic/deep_actor_critic/sac.py @@ -17,27 +17,20 @@ class SACPolicy(Policy): """ - Class used to implement the policy used by the Soft Actor-Critic - algorithm. The policy is a Gaussian policy squashed by a tanh. - This class implements the compute_action_and_log_prob and the - compute_action_and_log_prob_t methods, that are fundamental for - the internals calculations of the SAC algorithm. + Class used to implement the policy used by the Soft Actor-Critic algorithm. + The policy is a Gaussian policy squashed by a tanh. This class implements the compute_action_and_log_prob and the + compute_action_and_log_prob_t methods, that are fundamental for the internals calculations of the SAC algorithm. """ - def __init__(self, mu_approximator, sigma_approximator, min_a, max_a, - log_std_min, log_std_max): + def __init__(self, mu_approximator, sigma_approximator, min_a, max_a, log_std_min, log_std_max): """ Constructor. Args: - mu_approximator (Regressor): a regressor computing mean in given a - state; - sigma_approximator (Regressor): a regressor computing the variance - in given a state; - min_a (np.ndarray): a vector specifying the minimum action value - for each component; - max_a (np.ndarray): a vector specifying the maximum action value - for each component. + mu_approximator (Regressor): a regressor computing mean in given a state; + sigma_approximator (Regressor): a regressor computing the variance in given a state; + min_a (np.ndarray): a vector specifying the minimum action value for each component; + max_a (np.ndarray): a vector specifying the maximum action value for each component. log_std_min ([float, Parameter]): min value for the policy log std; log_std_max ([float, Parameter]): max value for the policy log std. @@ -78,8 +71,7 @@ def draw_action(self, state): def compute_action_and_log_prob(self, state): """ - Function that samples actions using the reparametrization trick and - the log probability for such actions. + Function that samples actions using the reparametrization trick and the log probability for such actions. Args: state (np.ndarray): the state in which the action is sampled. @@ -93,17 +85,15 @@ def compute_action_and_log_prob(self, state): def compute_action_and_log_prob_t(self, state, compute_log_prob=True): """ - Function that samples actions using the reparametrization trick and, - optionally, the log probability for such actions. + Function that samples actions using the reparametrization trick and, optionally, the log probability for such + actions. Args: state (np.ndarray): the state in which the action is sampled; - compute_log_prob (bool, True): whether to compute the log - probability or not. + compute_log_prob (bool, True): whether to compute the log probability or not. Returns: - The actions sampled and, optionally, the log probability as torch - tensors. + The actions sampled and, optionally, the log probability as torch tensors. """ dist = self.distribution(state) @@ -123,8 +113,7 @@ def distribution(self, state): Compute the policy distribution in the given states. Args: - state (np.ndarray): the set of states where the distribution is - computed. + state (np.ndarray): the set of states where the distribution is computed. Returns: The torch distribution for the provided states. @@ -147,19 +136,15 @@ def entropy(self, state=None): The value of the entropy of the policy. """ - - return torch.mean(self.distribution(state).entropy()).detach().cpu().numpy().item() - - def reset(self): - pass + _, log_pi = self.compute_action_and_log_prob(state) + return -log_pi.mean() def set_weights(self, weights): """ Setter. Args: - weights (np.ndarray): the vector of the new weights to be used by - the policy. + weights (np.ndarray): the vector of the new weights to be used by the policy. """ mu_weights = weights[:self._mu_approximator.weights_size] @@ -190,8 +175,7 @@ def use_cuda(self): def parameters(self): """ - Returns the trainable policy parameters, as expected by torch - optimizers. + Returns the trainable policy parameters, as expected by torch optimizers. Returns: List of parameters to be optimized. @@ -208,38 +192,30 @@ class SAC(DeepAC): Haarnoja T. et al.. 2019. """ - def __init__(self, mdp_info, actor_mu_params, actor_sigma_params, - actor_optimizer, critic_params, batch_size, - initial_replay_size, max_replay_size, warmup_transitions, tau, - lr_alpha, log_std_min=-20, log_std_max=2, target_entropy=None, - critic_fit_params=None): + def __init__(self, mdp_info, actor_mu_params, actor_sigma_params, actor_optimizer, critic_params, batch_size, + initial_replay_size, max_replay_size, warmup_transitions, tau, lr_alpha, use_log_alpha_loss=False, + log_std_min=-20, log_std_max=2, target_entropy=None,critic_fit_params=None): """ Constructor. Args: - actor_mu_params (dict): parameters of the actor mean approximator - to build; - actor_sigma_params (dict): parameters of the actor sigm - approximator to build; - actor_optimizer (dict): parameters to specify the actor - optimizer algorithm; - critic_params (dict): parameters of the critic approximator to - build; + actor_mu_params (dict): parameters of the actor mean approximator to build; + actor_sigma_params (dict): parameters of the actor sigma approximator to build; + actor_optimizer (dict): parameters to specify the actor optimizer algorithm; + critic_params (dict): parameters of the critic approximator to build; batch_size ((int, Parameter)): the number of samples in a batch; - initial_replay_size (int): the number of samples to collect before - starting the learning; - max_replay_size (int): the maximum number of samples in the replay - memory; - warmup_transitions ([int, Parameter]): number of samples to accumulate in the - replay memory to start the policy fitting; + initial_replay_size (int): the number of samples to collect before starting the learning; + max_replay_size (int): the maximum number of samples in the replay memory; + warmup_transitions ([int, Parameter]): number of samples to accumulate in the replay memory to start the + policy fitting; tau ([float, Parameter]): value of coefficient for soft updates; lr_alpha ([float, Parameter]): Learning rate for the entropy coefficient; + use_log_alpha_loss (bool, False): whether to use the original implementation loss or the one from the + paper; log_std_min ([float, Parameter]): Min value for the policy log std; log_std_max ([float, Parameter]): Max value for the policy log std; - target_entropy (float, None): target entropy for the policy, if - None a default value is computed ; - critic_fit_params (dict, None): parameters of the fitting algorithm - of the critic approximator. + target_entropy (float, None): target entropy for the policy, if None a default value is computed; + critic_fit_params (dict, None): parameters of the fitting algorithm of the critic approximator. """ self._critic_fit_params = dict() if critic_fit_params is None else critic_fit_params @@ -248,6 +224,8 @@ def __init__(self, mdp_info, actor_mu_params, actor_sigma_params, self._warmup_transitions = to_parameter(warmup_transitions) self._tau = to_parameter(tau) + self._use_log_alpha_loss = use_log_alpha_loss + if target_entropy is None: self._target_entropy = -np.prod(mdp_info.action_space.shape).astype(np.float32) else: @@ -261,25 +239,16 @@ def __init__(self, mdp_info, actor_mu_params, actor_sigma_params, critic_params['n_models'] = 2 target_critic_params = deepcopy(critic_params) - self._critic_approximator = Regressor(TorchApproximator, - **critic_params) - self._target_critic_approximator = Regressor(TorchApproximator, - **target_critic_params) - - actor_mu_approximator = Regressor(TorchApproximator, - **actor_mu_params) - actor_sigma_approximator = Regressor(TorchApproximator, - **actor_sigma_params) - - policy = SACPolicy(actor_mu_approximator, - actor_sigma_approximator, - mdp_info.action_space.low, - mdp_info.action_space.high, - log_std_min, - log_std_max) - - self._init_target(self._critic_approximator, - self._target_critic_approximator) + self._critic_approximator = Regressor(TorchApproximator, **critic_params) + self._target_critic_approximator = Regressor(TorchApproximator, **target_critic_params) + + actor_mu_approximator = Regressor(TorchApproximator, **actor_mu_params) + actor_sigma_approximator = Regressor(TorchApproximator, **actor_sigma_params) + + policy = SACPolicy(actor_mu_approximator, actor_sigma_approximator, mdp_info.action_space.low, + mdp_info.action_space.high, log_std_min, log_std_max) + + self._init_target(self._critic_approximator, self._target_critic_approximator) self._log_alpha = torch.tensor(0., dtype=torch.float32) @@ -302,6 +271,7 @@ def __init__(self, mdp_info, actor_mu_params, actor_sigma_params, _replay_memory='mushroom', _critic_approximator='mushroom', _target_critic_approximator='mushroom', + _use_log_alpha_loss='primitive', _log_alpha='torch', _alpha_optim='torch' ) @@ -311,8 +281,7 @@ def __init__(self, mdp_info, actor_mu_params, actor_sigma_params, def fit(self, dataset, **info): self._replay_memory.add(dataset) if self._replay_memory.initialized: - state, action, reward, next_state, absorbing, _ = \ - self._replay_memory.get(self._batch_size()) + state, action, reward, next_state, absorbing, _ = self._replay_memory.get(self._batch_size()) if self._replay_memory.size > self._warmup_transitions(): action_new, log_prob = self.policy.compute_action_and_log_prob_t(state) @@ -323,24 +292,23 @@ def fit(self, dataset, **info): q_next = self._next_q(next_state, absorbing) q = reward + self.mdp_info.gamma * q_next - self._critic_approximator.fit(state, action, q, - **self._critic_fit_params) + self._critic_approximator.fit(state, action, q, **self._critic_fit_params) - self._update_target(self._critic_approximator, - self._target_critic_approximator) + self._update_target(self._critic_approximator, self._target_critic_approximator) def _loss(self, state, action_new, log_prob): - q_0 = self._critic_approximator(state, action_new, - output_tensor=True, idx=0) - q_1 = self._critic_approximator(state, action_new, - output_tensor=True, idx=1) + q_0 = self._critic_approximator(state, action_new, output_tensor=True, idx=0) + q_1 = self._critic_approximator(state, action_new, output_tensor=True, idx=1) q = torch.min(q_0, q_1) return (self._alpha * log_prob - q).mean() def _update_alpha(self, log_prob): - alpha_loss = - (self._log_alpha * (log_prob + self._target_entropy)).mean() + if self._use_log_alpha_loss: + alpha_loss = - (self._log_alpha * (log_prob + self._target_entropy)).mean() + else: + alpha_loss = - (self._alpha * (log_prob + self._target_entropy)).mean() self._alpha_optim.zero_grad() alpha_loss.backward() self._alpha_optim.step() @@ -348,14 +316,11 @@ def _update_alpha(self, log_prob): def _next_q(self, next_state, absorbing): """ Args: - next_state (np.ndarray): the states where next action has to be - evaluated; - absorbing (np.ndarray): the absorbing flag for the states in - ``next_state``. + next_state (np.ndarray): the states where next action has to be evaluated; + absorbing (np.ndarray): the absorbing flag for the states in ``next_state``. Returns: - Action-values returned by the critic for ``next_state`` and the - action returned by the actor. + Action-values returned by the critic for ``next_state`` and the action returned by the actor. """ a, log_prob_next = self.policy.compute_action_and_log_prob(next_state) From e69cb97f258775e734c6a5b88ba0e87a2db2fbbf Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Fri, 6 Oct 2023 12:11:34 +0200 Subject: [PATCH 41/50] Using cholesky decomposition for torch gaussian - should be much faster for higher action size --- mushroom_rl/policy/torch_policy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mushroom_rl/policy/torch_policy.py b/mushroom_rl/policy/torch_policy.py index 2cd673598..b152300b8 100644 --- a/mushroom_rl/policy/torch_policy.py +++ b/mushroom_rl/policy/torch_policy.py @@ -230,11 +230,11 @@ def entropy_t(self, state=None): return self._action_dim / 2 * np.log(2 * np.pi * np.e) + torch.sum(self._log_sigma) def distribution_t(self, state): - mu, sigma = self.get_mean_and_covariance(state) - return torch.distributions.MultivariateNormal(loc=mu, covariance_matrix=sigma) + mu, chol_sigma = self.get_mean_and_chol(state) + return torch.distributions.MultivariateNormal(loc=mu, scale_tril=chol_sigma) - def get_mean_and_covariance(self, state): - return self._mu(state, **self._predict_params, output_tensor=True), torch.diag(torch.exp(2 * self._log_sigma)) + def get_mean_and_chol(self, state): + return self._mu(state, **self._predict_params, output_tensor=True), torch.diag(torch.exp(self._log_sigma)) def set_weights(self, weights): log_sigma_data = torch.from_numpy(weights[-self._action_dim:]) From 5087dcb5fd28b64d40c0c9576fd0c06af56768d7 Mon Sep 17 00:00:00 2001 From: robfiras Date: Fri, 6 Oct 2023 18:25:06 +0200 Subject: [PATCH 42/50] Updated Mujoco interface. - now the interface also accept dm_control mjcf xml_handles as input. - paths can be passed still. --- mushroom_rl/environments/mujoco.py | 41 +++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 2933d1da5..2e9ae0457 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -1,5 +1,6 @@ import mujoco import numpy as np +from dm_control import mjcf from mushroom_rl.core import Environment, MDPInfo from mushroom_rl.utils.spaces import Box from mushroom_rl.utils.mujoco import * @@ -10,15 +11,14 @@ class MuJoCo(Environment): Class to create a Mushroom environment using the MuJoCo simulator. """ - def __init__(self, file_name, actuation_spec, observation_spec, gamma, horizon, timestep=None, n_substeps=1, + def __init__(self, xml_file, actuation_spec, observation_spec, gamma, horizon, timestep=None, n_substeps=1, n_intermediate_steps=1, additional_data_spec=None, collision_groups=None, max_joint_vel=None, **viewer_params): """ Constructor. Args: - file_name (string): The path to the XML file with which the - environment should be created; + xml_file (str/xml handle): A string with a path to the xml or an Mujoco xml handle. actuation_spec (list): A list specifying the names of the joints which should be controllable by the agent. Can be left empty when all actuators should be used; @@ -60,7 +60,7 @@ def __init__(self, file_name, actuation_spec, observation_spec, gamma, horizon, """ # Create the simulation - self._model = mujoco.MjModel.from_xml_path(file_name) + self._model = self.load_model(xml_file) if timestep is not None: self._model.opt.timestep = timestep self._timestep = timestep @@ -517,6 +517,30 @@ def user_warning_raise_exception(warning): else: raise RuntimeError('Got MuJoCo Warning: ' + warning) + @staticmethod + def load_model(xml_file): + """ + Takes an xml_file and compiles and loads the model. + + Args: + xml_file (str/xml handle): A string with a path to the xml or an Mujoco xml handle. + + Returns: + Mujoco model. + + """ + if type(xml_file) == mjcf.element.RootElement: + # load from xml handle + model = mujoco.MjModel.from_xml_string(xml=xml_file.to_xml_string(), + assets=xml_file.get_assets()) + elif type(xml_file) == str: + # load from path + model = mujoco.MjModel.from_xml_path(xml_file) + else: + raise ValueError(f"Unsupported type for xml_file {type(xml_file)}.") + + return model + class MultiMuJoCo(MuJoCo): """ @@ -526,15 +550,14 @@ class MultiMuJoCo(MuJoCo): """ - def __init__(self, file_names, actuation_spec, observation_spec, gamma, horizon, timestep=None, + def __init__(self, xml_files, actuation_spec, observation_spec, gamma, horizon, timestep=None, n_substeps=1, n_intermediate_steps=1, additional_data_spec=None, collision_groups=None, max_joint_vel=None, random_env_reset=True, **viewer_params): """ Constructor. Args: - file_names (list): The list of paths to the XML files to create the - environments; + xml_files (str/xml handle): A list containing strings with a path to the xml or Mujoco xml handles; actuation_spec (list): A list specifying the names of the joints which should be controllable by the agent. Can be left empty when all actuators should be used; @@ -577,9 +600,9 @@ def __init__(self, file_names, actuation_spec, observation_spec, gamma, horizon, """ # Create the simulation - assert type(file_names) self._random_env_reset = random_env_reset - self._models = [mujoco.MjModel.from_xml_path(f) for f in file_names] + self._models = [self.load_model(f) for f in xml_files] + self._current_model_idx = 0 self._model = self._models[self._current_model_idx] if timestep is not None: From c2de9cfeb1bfb42aa00d04e1bb1242bc7f698a5c Mon Sep 17 00:00:00 2001 From: robfiras Date: Fri, 6 Oct 2023 18:34:29 +0200 Subject: [PATCH 43/50] Updated GaussianTorchPolicy. - set validate_args flag to False to speed-up construction of Gaussian. - added an assert to check if sigma > 0.0. --- mushroom_rl/policy/torch_policy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mushroom_rl/policy/torch_policy.py b/mushroom_rl/policy/torch_policy.py index b152300b8..3a1a6f32b 100644 --- a/mushroom_rl/policy/torch_policy.py +++ b/mushroom_rl/policy/torch_policy.py @@ -231,9 +231,10 @@ def entropy_t(self, state=None): def distribution_t(self, state): mu, chol_sigma = self.get_mean_and_chol(state) - return torch.distributions.MultivariateNormal(loc=mu, scale_tril=chol_sigma) + return torch.distributions.MultivariateNormal(loc=mu, scale_tril=chol_sigma, validate_args=False) def get_mean_and_chol(self, state): + assert torch.all(torch.exp(self._log_sigma) > 0) return self._mu(state, **self._predict_params, output_tensor=True), torch.diag(torch.exp(self._log_sigma)) def set_weights(self, weights): From f2566e02591452000564a48308fd461865d7b439 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Mon, 9 Oct 2023 14:11:50 +0200 Subject: [PATCH 44/50] Version bump to 1.10.0 --- mushroom_rl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mushroom_rl/__init__.py b/mushroom_rl/__init__.py index 4175d3998..52af183e5 100644 --- a/mushroom_rl/__init__.py +++ b/mushroom_rl/__init__.py @@ -1 +1 @@ -__version__ = '1.9.2' +__version__ = '1.10.0' From ce59c7ce45c25207e04ae209dda5ff82c7c6c1f8 Mon Sep 17 00:00:00 2001 From: robfiras Date: Wed, 11 Oct 2023 15:28:17 +0200 Subject: [PATCH 45/50] Bugfix Mujoco camera. - initial mode couldn't be set for "static" and "top_static" - also "static" and "top_static" have the "lookat" parameter. --- mushroom_rl/utils/mujoco/viewer.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index 1db075460..6c7ed3390 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -85,8 +85,8 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self._camera_params = self._assert_camera_params(camera_params) self._all_camera_modes = ("static", "follow", "top_static") self._camera_mode_iter = cycle(self._all_camera_modes) - self._camera_mode = next(self._camera_mode_iter) - self._camera_mode_target = self._camera_mode + self._camera_mode = None + self._camera_mode_target = next(self._camera_mode_iter) assert default_camera_mode in self._all_camera_modes while self._camera_mode_target != default_camera_mode: self._camera_mode_target = next(self._camera_mode_iter) @@ -477,6 +477,8 @@ def _set_camera_properties(self, mode): self._camera.distance = cam_params["distance"] self._camera.elevation = cam_params["elevation"] self._camera.azimuth = cam_params["azimuth"] + if "lookat" in cam_params: + self._camera.lookat = np.array(cam_params["lookat"]) self._camera_mode = mode def _assert_camera_params(self, camera_params): @@ -526,6 +528,6 @@ def get_default_camera_params(): """ - return dict(static=dict(distance=15.0, elevation=-45.0, azimuth=90.0), + return dict(static=dict(distance=15.0, elevation=-45.0, azimuth=90.0, lookat=np.array([0.0, 0.0, 0.0])), follow=dict(distance=3.5, elevation=0.0, azimuth=90.0), - top_static=dict(distance=5.0, elevation=-90.0, azimuth=90.0)) + top_static=dict(distance=5.0, elevation=-90.0, azimuth=90.0, lookat=np.array([0.0, 0.0, 0.0]))) From bb22d6ca7041fe6935b822e21bdf437d5ee5264c Mon Sep 17 00:00:00 2001 From: Puze Liu Date: Thu, 12 Oct 2023 12:07:07 +0200 Subject: [PATCH 46/50] Change from_numpy to as_tensor in torch_approximator --- .../approximators/parametric/torch_approximator.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mushroom_rl/approximators/parametric/torch_approximator.py b/mushroom_rl/approximators/parametric/torch_approximator.py index 2300e54b9..51a896451 100644 --- a/mushroom_rl/approximators/parametric/torch_approximator.py +++ b/mushroom_rl/approximators/parametric/torch_approximator.py @@ -96,7 +96,7 @@ def predict(self, *args, output_tensor=False, **kwargs): """ if not self._use_cuda: - torch_args = [torch.from_numpy(x) if isinstance(x, np.ndarray) else x + torch_args = [torch.as_tensor(x) if isinstance(x, np.ndarray) else x for x in args] val = self.network(*torch_args, **kwargs) @@ -107,7 +107,7 @@ def predict(self, *args, output_tensor=False, **kwargs): else: val = val.detach().numpy() else: - torch_args = [torch.from_numpy(x).cuda() + torch_args = [torch.as_tensor(x).cuda() if isinstance(x, np.ndarray) else x.cuda() for x in args] val = self.network(*torch_args, **kwargs) @@ -239,15 +239,15 @@ def _fit_batch(self, batch, use_weights, kwargs): def _compute_batch_loss(self, batch, use_weights, kwargs): if use_weights: - weights = torch.from_numpy(batch[-1]).type(torch.float) + weights = torch.as_tensor(batch[-1]).type(torch.float) if self._use_cuda: weights = weights.cuda() batch = batch[:-1] if not self._use_cuda: - torch_args = [torch.from_numpy(x) for x in batch] + torch_args = [torch.as_tensor(x) for x in batch] else: - torch_args = [torch.from_numpy(x).cuda() for x in batch] + torch_args = [torch.as_tensor(x).cuda() for x in batch] x = torch_args[:-self._n_fit_targets] @@ -317,9 +317,9 @@ def diff(self, *args, **kwargs): """ if not self._use_cuda: - torch_args = [torch.from_numpy(np.atleast_2d(x)) for x in args] + torch_args = [torch.as_tensor(np.atleast_2d(x)) for x in args] else: - torch_args = [torch.from_numpy(np.atleast_2d(x)).cuda() + torch_args = [torch.as_tensor(np.atleast_2d(x)).cuda() for x in args] y_hat = self.network(*torch_args, **kwargs) From dcc4bff95f790c22589e5bdf3bbb9012f109c333 Mon Sep 17 00:00:00 2001 From: robfiras Date: Thu, 19 Oct 2023 16:26:22 +0200 Subject: [PATCH 47/50] Updated Mujoco interface. - now the generation of collision groups includes an assert to check wheter the specified geoms exists. --- mushroom_rl/environments/mujoco.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 2e9ae0457..39527412f 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -98,8 +98,12 @@ def __init__(self, xml_file, actuation_spec, observation_spec, gamma, horizon, t self.collision_groups = {} if collision_groups is not None: for name, geom_names in collision_groups: - self.collision_groups[name] = {mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_GEOM, geom_name) - for geom_name in geom_names} + col_group = list() + for geom_name in geom_names: + mj_id = mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_GEOM, geom_name) + assert mj_id != -1, f"geom \"{geom_name}\" not found! Can't be used for collision-checking." + col_group.append(mj_id) + self.collision_groups[name] = set(col_group) # Finally, we create the MDP information and call the constructor of # the parent class @@ -663,8 +667,12 @@ def __init__(self, xml_files, actuation_spec, observation_spec, gamma, horizon, self.collision_groups = {} if collision_groups is not None: for name, geom_names in collision_groups: - self.collision_groups[name] = {mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_GEOM, geom_name) - for geom_name in geom_names} + col_group = list() + for geom_name in geom_names: + mj_id = mujoco.mj_name2id(self._model, mujoco.mjtObj.mjOBJ_GEOM, geom_name) + assert mj_id != -1, f"geom \"{geom_name}\" not found! Can't be used for collision-checking." + col_group.append(mj_id) + self.collision_groups[name] = set(col_group) # Finally, we create the MDP information and call the constructor of # the parent class From 544b6a37e22c94ae76602d9ea9801b4cefae7e10 Mon Sep 17 00:00:00 2001 From: Michael Drolet <32950163+mdrolet01@users.noreply.github.com> Date: Wed, 25 Oct 2023 15:55:24 +0200 Subject: [PATCH 48/50] Headless (#132) * add offscreen render capability * add viewer changes for cluster * refactor headless feature * update glfw init order --- mushroom_rl/utils/mujoco/viewer.py | 141 ++++++++++++++++++++++------- 1 file changed, 110 insertions(+), 31 deletions(-) diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index 6c7ed3390..8bab2c306 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -2,8 +2,8 @@ import mujoco import time from itertools import cycle - import numpy as np +import os class MujocoGlfwViewer: @@ -15,7 +15,7 @@ class MujocoGlfwViewer: def __init__(self, model, dt, width=1920, height=1080, start_paused=False, custom_render_callback=None, record=False, camera_params=None, default_camera_mode="static", hide_menu_on_startup=False, - geom_group_visualization_on_startup=None): + geom_group_visualization_on_startup=None, headless=False): """ Constructor. @@ -46,37 +46,42 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self.frames = 0 self.start_time = time.time() - glfw.init() - glfw.window_hint(glfw.COCOA_RETINA_FRAMEBUFFER, 0) + self._headless = headless + self._model = model + self._font_scale = 100 + + if headless: + self._opengl_context = self.get_opengl_backend(width, height) + self._opengl_context.make_current() + self._width, self._height = self.update_headless_size(width, height) + else: + self._width, self._height = width, height + glfw.init() + glfw.window_hint(glfw.COCOA_RETINA_FRAMEBUFFER, 0) + self._window = glfw.create_window(width=self._width, height=self._height, title="MuJoCo", monitor=None, share=None) + glfw.make_context_current(self._window) + glfw.set_mouse_button_callback(self._window, self.mouse_button) + glfw.set_cursor_pos_callback(self._window, self.mouse_move) + glfw.set_key_callback(self._window, self.keyboard) + glfw.set_scroll_callback(self._window, self.scroll) + self._set_mujoco_buffers() + if record: # dont allow to change the window size to have equal frame size during recording glfw.window_hint(glfw.RESIZABLE, False) + self._viewport = mujoco.MjrRect(0, 0, self._width, self._height) self._loop_count = 0 self._time_per_render = 1 / 60. self._run_speed_factor = 1.0 self._paused = start_paused - self._window = glfw.create_window(width=width, height=height, title="MuJoCo", monitor=None, share=None) - glfw.make_context_current(self._window) - - self._width = width - self._height = height - # Disable v_sync, so swap_buffers does not block # glfw.swap_interval(0) - glfw.set_mouse_button_callback(self._window, self.mouse_button) - glfw.set_cursor_pos_callback(self._window, self.mouse_move) - glfw.set_key_callback(self._window, self.keyboard) - glfw.set_scroll_callback(self._window, self.scroll) - - self._model = model - - self._scene = mujoco.MjvScene(model, 1000) + self._scene = mujoco.MjvScene(self._model, 1000) self._scene_option = mujoco.MjvOption() - self._camera = mujoco.MjvCamera() mujoco.mjv_defaultFreeCamera(model, self._camera) if camera_params is None: @@ -92,10 +97,6 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self._camera_mode_target = next(self._camera_mode_iter) self._set_camera() - self._viewport = mujoco.MjrRect(0, 0, width, height) - self._font_scale = 100 - self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(self._font_scale)) - self.custom_render_callback = custom_render_callback self._overlay = {} @@ -264,6 +265,30 @@ def scroll(self, window, x_offset, y_offset): mujoco.mjv_moveCamera(self._model, mujoco.mjtMouse.mjMOUSE_ZOOM, 0, 0.05 * y_offset, self._scene, self._camera) + def _set_mujoco_buffers(self): + self._context = mujoco.MjrContext(self._model, mujoco.mjtFontScale(self._font_scale)) + if self._headless: + mujoco.mjr_setBuffer(mujoco.mjtFramebuffer.mjFB_OFFSCREEN, self._context) + if self._context.currentBuffer != mujoco.mjtFramebuffer.mjFB_OFFSCREEN: + raise RuntimeError("Offscreen rendering not supported") + else: + mujoco.mjr_setBuffer(mujoco.mjtFramebuffer.mjFB_WINDOW, self._context) + if self._context.currentBuffer != mujoco.mjtFramebuffer.mjFB_WINDOW: + raise RuntimeError("Window rendering not supported") + + def update_headless_size(self, width, height): + _context = mujoco.MjrContext(self._model, mujoco.mjtFontScale(self._font_scale)) + if width > _context.offWidth or height > _context.offHeight: + width = max(width, self._model.vis.global_.offwidth) + height = max(height, self._model.vis.global_.offheight) + + if width != _context.offWidth or height != _context.offHeight: + self._model.vis.global_.offwidth = width + self._model.vis.global_.offheight = height + + self._set_mujoco_buffers() + return width, height + def render(self, data, record): """ Main rendering function. @@ -279,7 +304,8 @@ def render(self, data, record): def render_inner_loop(self): - self._create_overlay() + if not self._headless: + self._create_overlay() render_start = time.time() @@ -287,7 +313,8 @@ def render_inner_loop(self): mujoco.mjtCatBit.mjCAT_ALL, self._scene) - self._viewport.width, self._viewport.height = glfw.get_window_size(self._window) + if not self._headless: + self._viewport.width, self._viewport.height = glfw.get_window_size(self._window) mujoco.mjr_render(self._viewport, self._scene, self._context) @@ -307,16 +334,18 @@ def render_inner_loop(self): if self.custom_render_callback is not None: self.custom_render_callback(self._viewport, self._context) - glfw.swap_buffers(self._window) - glfw.poll_events() + if not self._headless: + glfw.swap_buffers(self._window) + glfw.poll_events() self.frames += 1 self._overlay.clear() - if glfw.window_should_close(self._window): - self.stop() - exit(0) + if not self._headless: + if glfw.window_should_close(self._window): + self.stop() + exit(0) self._time_per_render = 0.9 * self._time_per_render + 0.1 * (time.time() - render_start) @@ -349,7 +378,10 @@ def read_pixels(self, depth=False): """ - shape = glfw.get_framebuffer_size(self._window) + if self._headless: + shape = [self._width, self._height] + else: + shape = glfw.get_framebuffer_size(self._window) if depth: rgb_img = np.zeros((shape[1], shape[0], 3), dtype=np.uint8) @@ -531,3 +563,50 @@ def get_default_camera_params(): return dict(static=dict(distance=15.0, elevation=-45.0, azimuth=90.0, lookat=np.array([0.0, 0.0, 0.0])), follow=dict(distance=3.5, elevation=0.0, azimuth=90.0), top_static=dict(distance=5.0, elevation=-90.0, azimuth=90.0, lookat=np.array([0.0, 0.0, 0.0]))) + + + def get_opengl_backend(self, width, height): + # Reference: https://github.com/openai/gym/blob/master/gym/envs/mujoco/mujoco_rendering.py + def _import_egl(width, height): + from mujoco.egl import GLContext + return GLContext(width, height) + + def _import_glfw(width, height): + from mujoco.glfw import GLContext + return GLContext(width, height) + + def _import_osmesa(width, height): + from mujoco.osmesa import GLContext + return GLContext(width, height) + + _ALL_RENDERERS = { + "glfw" : _import_glfw, + "osmesa" : _import_osmesa, + "egl" : _import_egl + } + + backend = os.environ.get("MUJOCO_GL") + if backend is not None: + try: + opengl_context = _ALL_RENDERERS[backend](width, height) + except KeyError: + raise RuntimeError( + "Environment variable {} must be one of {!r}: got {!r}.".format( + "MUJOCO_GL", _ALL_RENDERERS.keys(), backend + ) + ) + + else: + for name, _ in _ALL_RENDERERS.items(): + try: + opengl_context = _ALL_RENDERERS[name](width, height) + backend = name + break + except: # noqa:E722 + pass + if backend is None: + raise RuntimeError( + "No OpenGL backend could be imported. Attempting to create a " + "rendering context will result in a RuntimeError." + ) + return opengl_context \ No newline at end of file From 656b2db9c1a50dd68c837a942a37f306468a0028 Mon Sep 17 00:00:00 2001 From: robfiras Date: Wed, 25 Oct 2023 15:58:24 +0200 Subject: [PATCH 49/50] Clean-up headless mode Mujoco Viewer. - fixed some bugs. - cleaned-up code. --- mushroom_rl/environments/mujoco.py | 6 +- mushroom_rl/utils/mujoco/__init__.py | 2 +- mushroom_rl/utils/mujoco/viewer.py | 96 +++++++++++++++++----------- 3 files changed, 61 insertions(+), 43 deletions(-) diff --git a/mushroom_rl/environments/mujoco.py b/mushroom_rl/environments/mujoco.py index 39527412f..cbc59802f 100644 --- a/mushroom_rl/environments/mujoco.py +++ b/mushroom_rl/environments/mujoco.py @@ -56,7 +56,7 @@ def __init__(self, xml_file, actuation_spec, observation_spec, gamma, horizon, t The list has to define a maximum velocity for every occurrence of JOINT_VEL in the observation_spec. The velocity will not be limited in mujoco **viewer_params: other parameters to be passed to the viewer. - See MujocoGlfwViewer documentation for the available options. + See MujocoViewer documentation for the available options. """ # Create the simulation @@ -172,7 +172,7 @@ def step(self, action): def render(self, record=False): if self._viewer is None: - self._viewer = MujocoGlfwViewer(self._model, self.dt, record=record, **self._viewer_params) + self._viewer = MujocoViewer(self._model, self.dt, record=record, **self._viewer_params) return self._viewer.render(self._data, record) @@ -600,7 +600,7 @@ def __init__(self, xml_files, actuation_spec, observation_spec, gamma, horizon, random_env_reset (bool): If True, a random environment/model is chosen after each episode. If False, it is sequentially iterated through the environment/model list. **viewer_params: other parameters to be passed to the viewer. - See MujocoGlfwViewer documentation for the available options. + See MujocoViewer documentation for the available options. """ # Create the simulation diff --git a/mushroom_rl/utils/mujoco/__init__.py b/mushroom_rl/utils/mujoco/__init__.py index 4fca999d5..0b5917b5b 100644 --- a/mushroom_rl/utils/mujoco/__init__.py +++ b/mushroom_rl/utils/mujoco/__init__.py @@ -1,3 +1,3 @@ -from .viewer import MujocoGlfwViewer +from .viewer import MujocoViewer from .observation_helper import ObservationHelper, ObservationType from .kinematics import forward_kinematics diff --git a/mushroom_rl/utils/mujoco/viewer.py b/mushroom_rl/utils/mujoco/viewer.py index 8bab2c306..fcdda9de2 100644 --- a/mushroom_rl/utils/mujoco/viewer.py +++ b/mushroom_rl/utils/mujoco/viewer.py @@ -1,20 +1,48 @@ +import os import glfw import mujoco import time +import collections from itertools import cycle import numpy as np -import os -class MujocoGlfwViewer: +def _import_egl(width, height): + from mujoco.egl import GLContext + + return GLContext(width, height) + + +def _import_glfw(width, height): + from mujoco.glfw import GLContext + + return GLContext(width, height) + + +def _import_osmesa(width, height): + from mujoco.osmesa import GLContext + + return GLContext(width, height) + + +_ALL_RENDERERS = collections.OrderedDict( + [ + ("glfw", _import_glfw), + ("egl", _import_egl), + ("osmesa", _import_osmesa), + ] +) + + +class MujocoViewer: """ - Class that creates a Glfw viewer for mujoco environments. + Class that creates a viewer for mujoco environments. """ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, custom_render_callback=None, record=False, camera_params=None, - default_camera_mode="static", hide_menu_on_startup=False, + default_camera_mode="static", hide_menu_on_startup=None, geom_group_visualization_on_startup=None, headless=False): """ Constructor. @@ -34,9 +62,15 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, hide_menu_on_startup (bool): If True, the menu is hidden on startup. geom_group_visualization_on_startup (int/list): int or list defining which geom group_ids should be visualized on startup. If None, all are visualized. + headless (bool): If True, render will be done in headless mode. """ + if hide_menu_on_startup is None and headless: + hide_menu_on_startup = True + elif hide_menu_on_startup is None and not headless: + hide_menu_on_startup = False + self.button_left = False self.button_right = False self.button_middle = False @@ -52,22 +86,26 @@ def __init__(self, model, dt, width=1920, height=1080, start_paused=False, self._font_scale = 100 if headless: - self._opengl_context = self.get_opengl_backend(width, height) + # use the OpenGL render that is available on the machine + self._opengl_context = self.setup_opengl_backend_headless(width, height) self._opengl_context.make_current() self._width, self._height = self.update_headless_size(width, height) else: + # use glfw self._width, self._height = width, height glfw.init() glfw.window_hint(glfw.COCOA_RETINA_FRAMEBUFFER, 0) - self._window = glfw.create_window(width=self._width, height=self._height, title="MuJoCo", monitor=None, share=None) + self._window = glfw.create_window(width=self._width, height=self._height, + title="MuJoCo", monitor=None, share=None) glfw.make_context_current(self._window) glfw.set_mouse_button_callback(self._window, self.mouse_button) glfw.set_cursor_pos_callback(self._window, self.mouse_move) glfw.set_key_callback(self._window, self.keyboard) glfw.set_scroll_callback(self._window, self.scroll) - self._set_mujoco_buffers() + + self._set_mujoco_buffers() - if record: + if record and not headless: # dont allow to change the window size to have equal frame size during recording glfw.window_hint(glfw.RESIZABLE, False) @@ -124,6 +162,7 @@ def load_new_model(self, model): self._scene = mujoco.MjvScene(model, 1000) self._context = mujoco.MjrContext(model, mujoco.mjtFontScale(self._font_scale)) + def mouse_button(self, window, button, act, mods): """ Mouse button callback for glfw. @@ -285,8 +324,7 @@ def update_headless_size(self, width, height): if width != _context.offWidth or height != _context.offHeight: self._model.vis.global_.offwidth = width self._model.vis.global_.offheight = height - - self._set_mujoco_buffers() + return width, height def render(self, data, record): @@ -337,16 +375,12 @@ def render_inner_loop(self): if not self._headless: glfw.swap_buffers(self._window) glfw.poll_events() - - self.frames += 1 - - self._overlay.clear() - - if not self._headless: if glfw.window_should_close(self._window): self.stop() exit(0) + self.frames += 1 + self._overlay.clear() self._time_per_render = 0.9 * self._time_per_render + 0.1 * (time.time() - render_start) if self._paused: @@ -379,7 +413,7 @@ def read_pixels(self, depth=False): """ if self._headless: - shape = [self._width, self._height] + shape = (self._width, self._height) else: shape = glfw.get_framebuffer_size(self._window) @@ -398,8 +432,8 @@ def stop(self): Destroys the glfw image. """ - - glfw.destroy_window(self._window) + if not self._headless: + glfw.destroy_window(self._window) def _create_overlay(self): """ @@ -565,25 +599,7 @@ def get_default_camera_params(): top_static=dict(distance=5.0, elevation=-90.0, azimuth=90.0, lookat=np.array([0.0, 0.0, 0.0]))) - def get_opengl_backend(self, width, height): - # Reference: https://github.com/openai/gym/blob/master/gym/envs/mujoco/mujoco_rendering.py - def _import_egl(width, height): - from mujoco.egl import GLContext - return GLContext(width, height) - - def _import_glfw(width, height): - from mujoco.glfw import GLContext - return GLContext(width, height) - - def _import_osmesa(width, height): - from mujoco.osmesa import GLContext - return GLContext(width, height) - - _ALL_RENDERERS = { - "glfw" : _import_glfw, - "osmesa" : _import_osmesa, - "egl" : _import_egl - } + def setup_opengl_backend_headless(self, width, height): backend = os.environ.get("MUJOCO_GL") if backend is not None: @@ -597,6 +613,7 @@ def _import_osmesa(width, height): ) else: + # iterate through all OpenGL backends to see which one is available for name, _ in _ALL_RENDERERS.items(): try: opengl_context = _ALL_RENDERERS[name](width, height) @@ -609,4 +626,5 @@ def _import_osmesa(width, height): "No OpenGL backend could be imported. Attempting to create a " "rendering context will result in a RuntimeError." ) - return opengl_context \ No newline at end of file + + return opengl_context From ce77935123f041aa1e118381451fa998bb5b0b80 Mon Sep 17 00:00:00 2001 From: boris-il-forte Date: Thu, 26 Oct 2023 15:44:51 +0200 Subject: [PATCH 50/50] Mushroom 1.10 - fixed makefile - minimum requirement for opencv - last commit in the 1.10 series --- Makefile | 2 ++ requirements.txt | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3c30854f2..7f13efeef 100644 --- a/Makefile +++ b/Makefile @@ -13,3 +13,5 @@ upload: clean: rm -rf dist rm -rf build + +.NOTPARALLEL: diff --git a/requirements.txt b/requirements.txt index 9d677fda2..519f4f848 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,6 @@ matplotlib joblib tqdm pygame -opencv-python +opencv-python>=4.7 torch pytest