Skip to content

Commit

Permalink
behead the action space
Browse files Browse the repository at this point in the history
  • Loading branch information
oleflb committed Jan 28, 2025
1 parent 955ba91 commit 6bc7549
Show file tree
Hide file tree
Showing 9 changed files with 231 additions and 357 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
from pathlib import Path
from typing import Any, ClassVar, override

import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv
from gymnasium.spaces import Box
from nao_interface import Nao
from numpy.typing import NDArray
from throwing import ThrowableObject

SENSOR_NAMES = [
"accelerometer",
"gyroscope",
"head.yaw",
"head.pitch",
"left_leg.hip_yaw_pitch",
"left_leg.hip_roll",
"left_leg.hip_pitch",
"left_leg.knee_pitch",
"left_leg.ankle_pitch",
"left_leg.ankle_roll",
"right_leg.hip_roll",
"right_leg.hip_pitch",
"right_leg.knee_pitch",
"right_leg.ankle_pitch",
"right_leg.ankle_roll",
"left_arm.shoulder_pitch",
"left_arm.shoulder_roll",
"left_arm.elbow_yaw",
"left_arm.elbow_roll",
"left_arm.wrist_yaw",
"right_arm.shoulder_pitch",
"right_arm.shoulder_roll",
"right_arm.elbow_yaw",
"right_arm.elbow_roll",
"right_arm.wrist_yaw",
]

ACTUATOR_NAMES = [
"left_leg.hip_yaw_pitch",
"left_leg.hip_roll",
"left_leg.hip_pitch",
"left_leg.knee_pitch",
"left_leg.ankle_pitch",
"left_leg.ankle_roll",
"right_leg.hip_roll",
"right_leg.hip_pitch",
"right_leg.knee_pitch",
"right_leg.ankle_pitch",
"right_leg.ankle_roll",
"left_arm.shoulder_pitch",
"left_arm.shoulder_roll",
"left_arm.elbow_yaw",
"left_arm.elbow_roll",
"left_arm.wrist_yaw",
"right_arm.shoulder_pitch",
"right_arm.shoulder_roll",
"right_arm.elbow_yaw",
"right_arm.elbow_roll",
"right_arm.wrist_yaw",
]

DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 0.8925)),
"elevation": -20.0,
}


class NaoBaseEnv(MujocoEnv, utils.EzPickle):
metadata: ClassVar[dict[str, Any]] = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 83,
}

def __init__(self, *, throw_tomatoes: bool = False, **kwargs: Any) -> None:
observation_space = Box(
low=-np.inf,
high=np.inf,
shape=(31,),
dtype=np.float64,
)
MujocoEnv.__init__(
self,
str(Path.cwd().joinpath("model", "scene.xml")),
frame_skip=4,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)

self.throw_tomatoes = throw_tomatoes
self.projectile = ThrowableObject(
model=self.model,
data=self.data,
plane_body="floor",
throwable_body="tomato",
)
self._actuation_mask = self._get_actuation_mask()
self.action_space_size = len(ACTUATOR_NAMES)
utils.EzPickle.__init__(self, **kwargs)

def _get_actuation_mask(self) -> NDArray[np.bool_]:
actuation_mask = np.zeros(self.model.nu, dtype=np.bool_)
for name in ACTUATOR_NAMES:
actuation_mask[self.model.actuator(name).id] = 1
return actuation_mask

@override
def _set_action_space(self) -> Box:
bounds = (
np.stack(
[self.model.actuator(name).ctrlrange for name in ACTUATOR_NAMES]
)
.copy()
.astype(np.float32)
)
low, high = bounds.T
self.action_space = Box(low=low, high=high, dtype=np.float32)
return self.action_space

def _get_obs(self) -> NDArray[np.floating]:
nao = Nao(self.model, self.data)

force_sensing_resistors_right = nao.right_fsr_values().sum()
force_sensing_resistors_left = nao.left_fsr_values().sum()

sensors = np.concatenate(
[
self.data.sensor(sensor_name).data
for sensor_name in SENSOR_NAMES
],
)
fsrs = np.array(
[force_sensing_resistors_right, force_sensing_resistors_left],
)

return np.concatenate([sensors, fsrs])

@override
def do_simulation(
self,
ctrl: NDArray[np.floating],
n_frames: int,
) -> None:
self.data.ctrl[self._actuation_mask] += ctrl
self._step_mujoco_simulation(self.data.ctrl, n_frames)
Original file line number Diff line number Diff line change
@@ -1,27 +1,21 @@
from pathlib import Path
from typing import Any, ClassVar, override
from typing import Any, override

import numpy as np
import rewards
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv
from gymnasium.spaces import Box
from nao_interface.nao_interface import Nao
from nao_interface.poses import PENALIZED_POSE
from numpy.typing import NDArray
from throwing import ThrowableObject
from rewards import (
ConstantReward,
HeadOverTorsoPenalty,
RewardComposer,
RewardContext,
TorqueChangeRatePenalty,
)

DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 0.8925)),
"elevation": -20.0,
}
from .nao_base_env import NaoBaseEnv

OFFSET_QPOS = np.array(
[
0.0,
0.0,
0.0,
0.0,
0.09,
Expand All @@ -48,104 +42,36 @@

HEAD_SET_HEIGHT = 0.51

SENSOR_NAMES = [
"accelerometer",
"gyroscope",
"head.yaw",
"head.pitch",
"left_leg.hip_yaw_pitch",
"left_leg.hip_roll",
"left_leg.hip_pitch",
"left_leg.knee_pitch",
"left_leg.ankle_pitch",
"left_leg.ankle_roll",
"right_leg.hip_roll",
"right_leg.hip_pitch",
"right_leg.knee_pitch",
"right_leg.ankle_pitch",
"right_leg.ankle_roll",
"left_arm.shoulder_pitch",
"left_arm.shoulder_roll",
"left_arm.elbow_yaw",
"left_arm.elbow_roll",
"left_arm.wrist_yaw",
"right_arm.shoulder_pitch",
"right_arm.shoulder_roll",
"right_arm.elbow_yaw",
"right_arm.elbow_roll",
"right_arm.wrist_yaw",
]


class NaoStanding(MujocoEnv, utils.EzPickle):
metadata: ClassVar = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
"render_fps": 83,
}

class NaoStanding(NaoBaseEnv):
def __init__(
self,
*,
throw_tomatoes: bool,
**kwargs: Any,
) -> None:
observation_space = Box(
low=-np.inf,
high=np.inf,
shape=(31,),
dtype=np.float64,
)

MujocoEnv.__init__(
self,
str(Path.cwd() / "model/scene.xml"),
frame_skip=4,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
super().__init__(
throw_tomatoes=throw_tomatoes,
**kwargs,
)
self.throw_tomatoes = throw_tomatoes
self.projectile = ThrowableObject(
model=self.model,
data=self.data,
plane_body="floor",
throwable_body="tomato",
)

self.current_step = 0
self.termination_penalty = 10.0
utils.EzPickle.__init__(self, **kwargs)

@override
def _get_obs(self) -> NDArray[np.floating]:
nao = Nao(self.model, self.data)

force_sensing_resistors_right = nao.right_fsr_values().sum()
force_sensing_resistors_left = nao.left_fsr_values().sum()

sensors = np.concatenate(
[
self.data.sensor(sensor_name).data
for sensor_name in SENSOR_NAMES
],
)
fsrs = np.array(
[force_sensing_resistors_right, force_sensing_resistors_left],
self.reward = (
RewardComposer()
.add(0.05, ConstantReward())
.add(-0.01, TorqueChangeRatePenalty(self.model.nu, self.dt))
.add(1.0, HeadOverTorsoPenalty())
)

return np.concatenate([sensors, fsrs])

@override
def step(self, action: NDArray[np.floating]) -> tuple:
self.current_step += 1
nao = Nao(self.model, self.data)

if self.throw_tomatoes and self.projectile.has_ground_contact():
robot_site_id = self.model.site("Robot").id
target = self.data.site_xpos[robot_site_id]
target = self.data.site("Robot").xpos
alpha = self.current_step / 2500
time_to_reach = 0.2 * (1 - alpha) + 0.1 * alpha
self.projectile.random_throw(
Expand All @@ -154,21 +80,16 @@ def step(self, action: NDArray[np.floating]) -> tuple:
distance=1.0,
)

last_actuator_force = self.data.actuator_force.copy()
self.do_simulation(action + OFFSET_QPOS, self.frame_skip)
head_center_z = self.data.site("head_center").xpos[2]

torque_penalty = 0.01 * rewards.torque_change_rate(
nao,
last_actuator_force,
)
head_over_torso_penalty = 1.0 * rewards.head_over_torso_error(nao)

if self.render_mode == "human":
self.render()

terminated = head_center_z < 0.3
reward = 0.05 - torque_penalty - head_over_torso_penalty

distinct_rewards = self.reward.rewards(RewardContext(nao, action))
reward = sum(distinct_rewards.values())

if terminated:
reward -= self.termination_penalty
Expand All @@ -179,7 +100,7 @@ def step(self, action: NDArray[np.floating]) -> tuple:
reward,
terminated,
False,
{},
distinct_rewards,
)

@override
Expand Down
Loading

0 comments on commit 6bc7549

Please sign in to comment.