From 6bc754981617cc3fc623f16c788d5af086169a35 Mon Sep 17 00:00:00 2001 From: okiwi6 <45100017+oleflb@users.noreply.github.com> Date: Mon, 27 Jan 2025 07:32:22 +0100 Subject: [PATCH] behead the action space --- .../nao_env/src/nao_env/nao_base_env.py | 153 ++++++++++++++ .../nao_env/src/nao_env/nao_standing.py | 125 +++--------- .../nao_env/src/nao_env/nao_standup.py | 190 ++++-------------- .../nao_env/src/nao_env/nao_walking.py | 103 +--------- .../nao_env/src/nao_env/wrappers/__init__.py | 2 - .../src/nao_interface/nao_interface.py | 7 + .../packages/rewards/src/rewards/base.py | 2 +- .../throwing/src/throwing/__init__.py | 2 +- .../mujoco/scripts/mujoco-walking.py | 4 +- 9 files changed, 231 insertions(+), 357 deletions(-) create mode 100644 tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_base_env.py diff --git a/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_base_env.py b/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_base_env.py new file mode 100644 index 0000000000..2bd8ab7df7 --- /dev/null +++ b/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_base_env.py @@ -0,0 +1,153 @@ +from pathlib import Path +from typing import Any, ClassVar, override + +import numpy as np +from gymnasium import utils +from gymnasium.envs.mujoco.mujoco_env import MujocoEnv +from gymnasium.spaces import Box +from nao_interface import Nao +from numpy.typing import NDArray +from throwing import ThrowableObject + +SENSOR_NAMES = [ + "accelerometer", + "gyroscope", + "head.yaw", + "head.pitch", + "left_leg.hip_yaw_pitch", + "left_leg.hip_roll", + "left_leg.hip_pitch", + "left_leg.knee_pitch", + "left_leg.ankle_pitch", + "left_leg.ankle_roll", + "right_leg.hip_roll", + "right_leg.hip_pitch", + "right_leg.knee_pitch", + "right_leg.ankle_pitch", + "right_leg.ankle_roll", + "left_arm.shoulder_pitch", + "left_arm.shoulder_roll", + "left_arm.elbow_yaw", + "left_arm.elbow_roll", + "left_arm.wrist_yaw", + "right_arm.shoulder_pitch", + "right_arm.shoulder_roll", + "right_arm.elbow_yaw", + "right_arm.elbow_roll", + "right_arm.wrist_yaw", +] + +ACTUATOR_NAMES = [ + "left_leg.hip_yaw_pitch", + "left_leg.hip_roll", + "left_leg.hip_pitch", + "left_leg.knee_pitch", + "left_leg.ankle_pitch", + "left_leg.ankle_roll", + "right_leg.hip_roll", + "right_leg.hip_pitch", + "right_leg.knee_pitch", + "right_leg.ankle_pitch", + "right_leg.ankle_roll", + "left_arm.shoulder_pitch", + "left_arm.shoulder_roll", + "left_arm.elbow_yaw", + "left_arm.elbow_roll", + "left_arm.wrist_yaw", + "right_arm.shoulder_pitch", + "right_arm.shoulder_roll", + "right_arm.elbow_yaw", + "right_arm.elbow_roll", + "right_arm.wrist_yaw", +] + +DEFAULT_CAMERA_CONFIG = { + "trackbodyid": 1, + "distance": 4.0, + "lookat": np.array((0.0, 0.0, 0.8925)), + "elevation": -20.0, +} + + +class NaoBaseEnv(MujocoEnv, utils.EzPickle): + metadata: ClassVar[dict[str, Any]] = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + "render_fps": 83, + } + + def __init__(self, *, throw_tomatoes: bool = False, **kwargs: Any) -> None: + observation_space = Box( + low=-np.inf, + high=np.inf, + shape=(31,), + dtype=np.float64, + ) + MujocoEnv.__init__( + self, + str(Path.cwd().joinpath("model", "scene.xml")), + frame_skip=4, + observation_space=observation_space, + default_camera_config=DEFAULT_CAMERA_CONFIG, + **kwargs, + ) + + self.throw_tomatoes = throw_tomatoes + self.projectile = ThrowableObject( + model=self.model, + data=self.data, + plane_body="floor", + throwable_body="tomato", + ) + self._actuation_mask = self._get_actuation_mask() + self.action_space_size = len(ACTUATOR_NAMES) + utils.EzPickle.__init__(self, **kwargs) + + def _get_actuation_mask(self) -> NDArray[np.bool_]: + actuation_mask = np.zeros(self.model.nu, dtype=np.bool_) + for name in ACTUATOR_NAMES: + actuation_mask[self.model.actuator(name).id] = 1 + return actuation_mask + + @override + def _set_action_space(self) -> Box: + bounds = ( + np.stack( + [self.model.actuator(name).ctrlrange for name in ACTUATOR_NAMES] + ) + .copy() + .astype(np.float32) + ) + low, high = bounds.T + self.action_space = Box(low=low, high=high, dtype=np.float32) + return self.action_space + + def _get_obs(self) -> NDArray[np.floating]: + nao = Nao(self.model, self.data) + + force_sensing_resistors_right = nao.right_fsr_values().sum() + force_sensing_resistors_left = nao.left_fsr_values().sum() + + sensors = np.concatenate( + [ + self.data.sensor(sensor_name).data + for sensor_name in SENSOR_NAMES + ], + ) + fsrs = np.array( + [force_sensing_resistors_right, force_sensing_resistors_left], + ) + + return np.concatenate([sensors, fsrs]) + + @override + def do_simulation( + self, + ctrl: NDArray[np.floating], + n_frames: int, + ) -> None: + self.data.ctrl[self._actuation_mask] += ctrl + self._step_mujoco_simulation(self.data.ctrl, n_frames) diff --git a/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_standing.py b/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_standing.py index b88e72ead0..d1221c2697 100644 --- a/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_standing.py +++ b/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_standing.py @@ -1,27 +1,21 @@ -from pathlib import Path -from typing import Any, ClassVar, override +from typing import Any, override import numpy as np -import rewards -from gymnasium import utils -from gymnasium.envs.mujoco.mujoco_env import MujocoEnv -from gymnasium.spaces import Box from nao_interface.nao_interface import Nao from nao_interface.poses import PENALIZED_POSE from numpy.typing import NDArray -from throwing import ThrowableObject +from rewards import ( + ConstantReward, + HeadOverTorsoPenalty, + RewardComposer, + RewardContext, + TorqueChangeRatePenalty, +) -DEFAULT_CAMERA_CONFIG = { - "trackbodyid": 1, - "distance": 4.0, - "lookat": np.array((0.0, 0.0, 0.8925)), - "elevation": -20.0, -} +from .nao_base_env import NaoBaseEnv OFFSET_QPOS = np.array( [ - 0.0, - 0.0, 0.0, 0.0, 0.09, @@ -48,104 +42,36 @@ HEAD_SET_HEIGHT = 0.51 -SENSOR_NAMES = [ - "accelerometer", - "gyroscope", - "head.yaw", - "head.pitch", - "left_leg.hip_yaw_pitch", - "left_leg.hip_roll", - "left_leg.hip_pitch", - "left_leg.knee_pitch", - "left_leg.ankle_pitch", - "left_leg.ankle_roll", - "right_leg.hip_roll", - "right_leg.hip_pitch", - "right_leg.knee_pitch", - "right_leg.ankle_pitch", - "right_leg.ankle_roll", - "left_arm.shoulder_pitch", - "left_arm.shoulder_roll", - "left_arm.elbow_yaw", - "left_arm.elbow_roll", - "left_arm.wrist_yaw", - "right_arm.shoulder_pitch", - "right_arm.shoulder_roll", - "right_arm.elbow_yaw", - "right_arm.elbow_roll", - "right_arm.wrist_yaw", -] - - -class NaoStanding(MujocoEnv, utils.EzPickle): - metadata: ClassVar = { - "render_modes": [ - "human", - "rgb_array", - "depth_array", - ], - "render_fps": 83, - } +class NaoStanding(NaoBaseEnv): def __init__( self, *, throw_tomatoes: bool, **kwargs: Any, ) -> None: - observation_space = Box( - low=-np.inf, - high=np.inf, - shape=(31,), - dtype=np.float64, - ) - - MujocoEnv.__init__( - self, - str(Path.cwd() / "model/scene.xml"), - frame_skip=4, - observation_space=observation_space, - default_camera_config=DEFAULT_CAMERA_CONFIG, + super().__init__( + throw_tomatoes=throw_tomatoes, **kwargs, ) - self.throw_tomatoes = throw_tomatoes - self.projectile = ThrowableObject( - model=self.model, - data=self.data, - plane_body="floor", - throwable_body="tomato", - ) + self.current_step = 0 self.termination_penalty = 10.0 - utils.EzPickle.__init__(self, **kwargs) - - @override - def _get_obs(self) -> NDArray[np.floating]: - nao = Nao(self.model, self.data) - force_sensing_resistors_right = nao.right_fsr_values().sum() - force_sensing_resistors_left = nao.left_fsr_values().sum() - - sensors = np.concatenate( - [ - self.data.sensor(sensor_name).data - for sensor_name in SENSOR_NAMES - ], - ) - fsrs = np.array( - [force_sensing_resistors_right, force_sensing_resistors_left], + self.reward = ( + RewardComposer() + .add(0.05, ConstantReward()) + .add(-0.01, TorqueChangeRatePenalty(self.model.nu, self.dt)) + .add(1.0, HeadOverTorsoPenalty()) ) - return np.concatenate([sensors, fsrs]) - @override def step(self, action: NDArray[np.floating]) -> tuple: self.current_step += 1 nao = Nao(self.model, self.data) if self.throw_tomatoes and self.projectile.has_ground_contact(): - robot_site_id = self.model.site("Robot").id - target = self.data.site_xpos[robot_site_id] + target = self.data.site("Robot").xpos alpha = self.current_step / 2500 time_to_reach = 0.2 * (1 - alpha) + 0.1 * alpha self.projectile.random_throw( @@ -154,21 +80,16 @@ def step(self, action: NDArray[np.floating]) -> tuple: distance=1.0, ) - last_actuator_force = self.data.actuator_force.copy() self.do_simulation(action + OFFSET_QPOS, self.frame_skip) head_center_z = self.data.site("head_center").xpos[2] - torque_penalty = 0.01 * rewards.torque_change_rate( - nao, - last_actuator_force, - ) - head_over_torso_penalty = 1.0 * rewards.head_over_torso_error(nao) - if self.render_mode == "human": self.render() terminated = head_center_z < 0.3 - reward = 0.05 - torque_penalty - head_over_torso_penalty + + distinct_rewards = self.reward.rewards(RewardContext(nao, action)) + reward = sum(distinct_rewards.values()) if terminated: reward -= self.termination_penalty @@ -179,7 +100,7 @@ def step(self, action: NDArray[np.floating]) -> tuple: reward, terminated, False, - {}, + distinct_rewards, ) @override diff --git a/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_standup.py b/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_standup.py index fde5a9169e..ce0cdec9a3 100644 --- a/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_standup.py +++ b/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_standup.py @@ -1,176 +1,56 @@ -from pathlib import Path -from typing import Any, ClassVar, override +from typing import Any, override import numpy as np -import rewards -from gymnasium import utils -from gymnasium.envs.mujoco.mujoco_env import MujocoEnv -from gymnasium.spaces import Box from nao_interface import Nao +from nao_interface.poses import PENALIZED_POSE from numpy.typing import NDArray - -DEFAULT_CAMERA_CONFIG = { - "trackbodyid": 1, - "distance": 4.0, - "lookat": np.array((0.0, 0.0, 0.8925)), - "elevation": -20.0, -} - -SENSOR_NAMES = [ - "accelerometer", - "gyroscope", - "head.yaw", - "head.pitch", - "left_leg.hip_yaw_pitch", - "left_leg.hip_roll", - "left_leg.hip_pitch", - "left_leg.knee_pitch", - "left_leg.ankle_pitch", - "left_leg.ankle_roll", - "right_leg.hip_roll", - "right_leg.hip_pitch", - "right_leg.knee_pitch", - "right_leg.ankle_pitch", - "right_leg.ankle_roll", - "left_arm.shoulder_pitch", - "left_arm.shoulder_roll", - "left_arm.elbow_yaw", - "left_arm.elbow_roll", - "left_arm.wrist_yaw", - "right_arm.shoulder_pitch", - "right_arm.shoulder_roll", - "right_arm.elbow_yaw", - "right_arm.elbow_roll", - "right_arm.wrist_yaw", -] - - -class NaoStandup(MujocoEnv, utils.EzPickle): - metadata: ClassVar = { - "render_modes": [ - "human", - "rgb_array", - "depth_array", - ], - "render_fps": 67, - } - - def __init__(self, **kwargs: Any) -> None: - observation_space = Box( - low=-np.inf, - high=np.inf, - shape=(31,), - dtype=np.float64, +from rewards import ( + ControlAmplitudePenalty, + ExternalImpactForcesPenalty, + HeadHeightReward, + RewardComposer, + TorqueChangeRatePenalty, +) +from rewards.base import RewardContext + +from .nao_base_env import NaoBaseEnv + + +class NaoStandup(NaoBaseEnv): + def __init__(self, *, throw_tomatoes: bool = False, **kwargs: Any) -> None: + super().__init__(throw_tomatoes=throw_tomatoes, **kwargs) + self.reward = ( + RewardComposer() + .add(1.0, HeadHeightReward()) + .add(-0.1, ControlAmplitudePenalty()) + .add(-0.5e-6, ExternalImpactForcesPenalty()) + .add(-0.01, TorqueChangeRatePenalty(self.model.nu, self.dt)) ) - MujocoEnv.__init__( - self, - str(Path.cwd() / "model/scene.xml"), - 5, - observation_space=observation_space, - default_camera_config=DEFAULT_CAMERA_CONFIG, - **kwargs, - ) - utils.EzPickle.__init__(self, **kwargs) - - @override - def _get_obs(self) -> NDArray[np.floating]: - nao = Nao(self.model, self.data) - - force_sensing_resistors_right = nao.right_fsr_values().sum() - force_sensing_resistors_left = nao.left_fsr_values().sum() - - sensors = np.concatenate( - [ - self.data.sensor(sensor_name).data - for sensor_name in SENSOR_NAMES - ], - ) - frs = np.array( - [force_sensing_resistors_right, force_sensing_resistors_left], - ) - - return np.concatenate([sensors, frs]) - @override def step(self, action: NDArray[np.floating]) -> tuple: self.do_simulation(action, self.frame_skip) nao = Nao(self.model, self.data) - head_elevation_reward = rewards.head_height(nao) - control_amplitude_penalty = 0.1 * rewards.ctrl_amplitude(nao) - impact_penalty = min(0.5e-6 * rewards.impact_forces(nao), 10) - - reward = ( - head_elevation_reward - - control_amplitude_penalty - - impact_penalty - + 1 - ) + distinct_rewards = self.reward.rewards(RewardContext(nao, action)) + reward = sum(distinct_rewards.values()) if self.render_mode == "human": self.render() - return ( - self._get_obs(), - reward, - False, - False, - { - "head_elevation_reward": head_elevation_reward, - "control_amplitude_penalty": control_amplitude_penalty, - "impact_penalty": impact_penalty, - }, - ) + return (self._get_obs(), reward, False, False, distinct_rewards) @override def reset_model(self) -> NDArray[np.floating]: - half_random_offset = 0.03 - face_down_keyframe_qpos = [ - 0.452845, - 0.219837, - 0.0556939, - 0.710551, - -0.0810676, - 0.693965, - 0.0834173, - -0.000571484, - 0.0239414, - 0.000401842, - -3.89047e-05, - -0.00175077, - 0.357233, - 0.0114063, - 0.000212495, - 0.000422366, - 3.92127e-05, - -0.00133669, - 0.356939, - 0.0112884, - -0.000206283, - 1.46985, - 0.110264, - 0.000766453, - -0.034298, - 3.65047e-05, - 1.47067, - -0.110094, - -0.00201064, - 0.0342998, - -0.00126886, - ] self.set_state( - face_down_keyframe_qpos - + self.np_random.uniform( - low=-half_random_offset, - high=half_random_offset, - size=self.model.nq, - ), - self.init_qvel - + self.np_random.uniform( - low=-half_random_offset, - high=half_random_offset, - size=self.model.nv, - ), + self.init_qpos, + self.init_qvel, ) + nao = Nao(self.model, self.data) + nao.reset(PENALIZED_POSE) + nao.set_transform( + np.array([-0.13252355, -0.0909888, 0.05897925]), + np.array([0.69360432, 0.13973604, -0.692682, 0.13992331]), + ) + return self._get_obs() diff --git a/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_walking.py b/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_walking.py index 1fb1b05d81..f6bb736b05 100644 --- a/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_walking.py +++ b/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/nao_walking.py @@ -1,11 +1,7 @@ -from pathlib import Path -from typing import Any, ClassVar, override +from typing import Any, override import numpy as np import walking_engine -from gymnasium import utils -from gymnasium.envs.mujoco.mujoco_env import MujocoEnv -from gymnasium.spaces import Box from nao_interface.nao_interface import Nao from nao_interface.poses import READY_POSE from numpy.typing import NDArray @@ -22,7 +18,6 @@ TorqueChangeRatePenalty, XDistanceReward, ) -from throwing import ThrowableObject from transforms.transforms import Pose2 from walking_engine import ( Control, @@ -31,13 +26,7 @@ ) from walking_engine.walking_types import Feet, Side, State -DEFAULT_CAMERA_CONFIG = { - "trackbodyid": 1, - "distance": 4.0, - "lookat": np.array((0.0, 0.0, 0.8925)), - # "type": 1, - "elevation": -20.0, -} +from .nao_base_env import NaoBaseEnv HEAD_THRESHOLD_HEIGHT = 0.4 @@ -69,34 +58,6 @@ ], ) -SENSOR_NAMES = [ - "accelerometer", - "gyroscope", - "head.yaw", - "head.pitch", - "left_leg.hip_yaw_pitch", - "left_leg.hip_roll", - "left_leg.hip_pitch", - "left_leg.knee_pitch", - "left_leg.ankle_pitch", - "left_leg.ankle_roll", - "right_leg.hip_roll", - "right_leg.hip_pitch", - "right_leg.knee_pitch", - "right_leg.ankle_pitch", - "right_leg.ankle_roll", - "left_arm.shoulder_pitch", - "left_arm.shoulder_roll", - "left_arm.elbow_yaw", - "left_arm.elbow_roll", - "left_arm.wrist_yaw", - "right_arm.shoulder_pitch", - "right_arm.shoulder_roll", - "right_arm.elbow_yaw", - "right_arm.elbow_roll", - "right_arm.wrist_yaw", -] - def initial_state(parameters: Parameters) -> State: return State( @@ -113,38 +74,13 @@ def initial_state(parameters: Parameters) -> State: ) -class NaoWalking(MujocoEnv, utils.EzPickle): - metadata: ClassVar[dict[str, Any]] = { - "render_modes": [ - "human", - "rgb_array", - "depth_array", - ], - "render_fps": 83, - } - +class NaoWalking(NaoBaseEnv): def __init__(self, *, throw_tomatoes: bool, **kwargs: Any) -> None: - observation_space = Box( - low=-np.inf, - high=np.inf, - shape=(31,), - dtype=np.float64, - ) - MujocoEnv.__init__( - self, - str(Path.cwd().joinpath("model", "scene.xml")), - frame_skip=4, - observation_space=observation_space, - default_camera_config=DEFAULT_CAMERA_CONFIG, + super().__init__( + throw_tomatoes=throw_tomatoes, **kwargs, ) - self.throw_tomatoes = throw_tomatoes - self.projectile = ThrowableObject( - model=self.model, - data=self.data, - plane_body="floor", - throwable_body="tomato", - ) + self.current_step = 0 self.parameters = Parameters( @@ -177,26 +113,6 @@ def __init__(self, *, throw_tomatoes: bool, **kwargs: Any) -> None: .add(-0.001, ControlAmplitudePenalty()) ) - utils.EzPickle.__init__(self, **kwargs) - - def _get_obs(self) -> NDArray[np.floating]: - nao = Nao(self.model, self.data) - - force_sensing_resistors_right = nao.right_fsr_values().sum() - force_sensing_resistors_left = nao.left_fsr_values().sum() - - sensors = np.concatenate( - [ - self.data.sensor(sensor_name).data - for sensor_name in SENSOR_NAMES - ], - ) - fsrs = np.array( - [force_sensing_resistors_right, force_sensing_resistors_left], - ) - - return np.concatenate([sensors, fsrs]) - @override def step(self, action: NDArray[np.floating]) -> tuple: nao = Nao(self.model, self.data) @@ -219,7 +135,7 @@ def step(self, action: NDArray[np.floating]) -> tuple: self.render() distinct_rewards = self.reward.rewards( - RewardContext(nao, self.state, action) + RewardContext(nao, action, self.state) ) reward = sum(distinct_rewards.values()) @@ -264,8 +180,7 @@ def do_simulation( control=control, dt=dt, ) - nao.data.ctrl[:] += ctrl - + nao.data.ctrl[self._actuation_mask] += ctrl self._step_mujoco_simulation(nao.data.ctrl, n_frames) @override @@ -296,7 +211,7 @@ def reset_model(self) -> NDArray[np.floating]: self.enable_walking = False self.do_simulation( - np.zeros(self.model.nu), + np.zeros(self.action_space_size), self.frame_skip * self.initialization_rounds, ) self.enable_walking = True diff --git a/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/wrappers/__init__.py b/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/wrappers/__init__.py index 6f5e3a208c..6f31be318b 100644 --- a/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/wrappers/__init__.py +++ b/tools/machine-learning/mujoco/packages/nao_env/src/nao_env/wrappers/__init__.py @@ -1,7 +1,5 @@ -from .wandb_log_episode_statistics import WandbLogEpisodeStatistics from .wandb_log_video_recorder import SingleEpisodeVideoRecorder __all__ = [ "SingleEpisodeVideoRecorder", - "WandbLogEpisodeStatistics", ] diff --git a/tools/machine-learning/mujoco/packages/nao_interface/src/nao_interface/nao_interface.py b/tools/machine-learning/mujoco/packages/nao_interface/src/nao_interface/nao_interface.py index 551e4450b8..75bfa6ee82 100644 --- a/tools/machine-learning/mujoco/packages/nao_interface/src/nao_interface/nao_interface.py +++ b/tools/machine-learning/mujoco/packages/nao_interface/src/nao_interface/nao_interface.py @@ -215,6 +215,13 @@ def __init__(self, model: mujoco.MjModel, data: mujoco.MjData) -> None: ), ) + def set_transform( + self, position: NDArray[np.floating], quaternion: NDArray[np.floating] + ) -> None: + nao = self.data.body("Nao") + nao.xpos = position + nao.xquat = quaternion + def reset(self, positions: dict[str, dict[str, float]]) -> None: mujoco.mj_resetData(self.model, self.data) diff --git a/tools/machine-learning/mujoco/packages/rewards/src/rewards/base.py b/tools/machine-learning/mujoco/packages/rewards/src/rewards/base.py index 009030d6ef..41e344ad1e 100644 --- a/tools/machine-learning/mujoco/packages/rewards/src/rewards/base.py +++ b/tools/machine-learning/mujoco/packages/rewards/src/rewards/base.py @@ -9,8 +9,8 @@ @dataclass class RewardContext: nao: Nao - walk_state: State action: NDArray[np.floating] + walk_state: State | None = None class BaseReward: diff --git a/tools/machine-learning/mujoco/packages/throwing/src/throwing/__init__.py b/tools/machine-learning/mujoco/packages/throwing/src/throwing/__init__.py index 1d6c3a2e6f..03bd728c2b 100644 --- a/tools/machine-learning/mujoco/packages/throwing/src/throwing/__init__.py +++ b/tools/machine-learning/mujoco/packages/throwing/src/throwing/__init__.py @@ -88,7 +88,7 @@ def random_throw( ) -> None: throwable_radius = self.model.geom_rbound[ self.model.body_geomadr[self.throwable_index] - ] + ].item() qpos_index = self.model.jnt_qposadr[ self.model.body_jntadr[self.throwable_index] ] diff --git a/tools/machine-learning/mujoco/scripts/mujoco-walking.py b/tools/machine-learning/mujoco/scripts/mujoco-walking.py index 72f8a4ec93..a5318bd1be 100644 --- a/tools/machine-learning/mujoco/scripts/mujoco-walking.py +++ b/tools/machine-learning/mujoco/scripts/mujoco-walking.py @@ -19,7 +19,7 @@ ) def main(*, throw_tomatoes: bool, load_policy: str | None) -> None: env = NaoWalking(throw_tomatoes=throw_tomatoes) - _, _, _, _, infos = env.step(np.zeros(env.model.nu)) + _, _, _, _, infos = env.step(np.zeros(env.action_space_size)) env.reset() model = None @@ -47,7 +47,7 @@ def main(*, throw_tomatoes: bool, load_policy: str | None) -> None: viewer.set_grid_divisions(x_div=10, y_div=5, x_axis_time=10.0, fig_idx=1) total_reward = 0.0 - action = np.zeros(env.model.nu) + action = np.zeros(env.action_space_size) while viewer.is_alive: start_time = time.time()