Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates for kbot walking setup. #9

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
# K-Scale Isaac Lab Playground Library

This repository implements training for robotic control policies in Isaac Sim simulator. For more information, see the [documentation](https://docs.kscale.dev/docs/isaac-lab#/).

Git LFS file not shown
4 changes: 2 additions & 2 deletions exts/kbot/kbot/assets/Robots/kbot.usd
Git LFS file not shown
95 changes: 55 additions & 40 deletions exts/kbot/kbot/assets/kbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,47 @@
from omni.isaac.lab.assets.articulation import ArticulationCfg

from kbot.assets import ISAAC_ASSET_DIR
import math

KBOT_04_ACTUATOR_CFG = IdentifiedActuatorCfg(
joint_names_expr=[".*hip_y", ".*knee"],
joint_names_expr=[".*_04"],
effort_limit=120.0,
velocity_limit=14,
saturation_effort=560,
stiffness={".*": 15.0},
damping={".*": 1.5},
armature={".*": 1.5e-4 * 81},
friction_static=0.8,
velocity_limit=100.0,
saturation_effort=240.0,
stiffness={".*": 200.0},
damping={".*": 5.0},
armature={".*": 0.01},
friction_static=0.0,
activation_vel=0.1,
friction_dynamic=0.02,
friction_dynamic=0.0,
)

KBOT_03_ACTUATOR_CFG = IdentifiedActuatorCfg(
joint_names_expr=[".*hip_z", ".*hip_x"],
joint_names_expr=[".*_03"],
effort_limit=60.0,
velocity_limit=14,
saturation_effort=560,
stiffness={".*": 15.0},
damping={".*": 1.5},
armature={".*": 1.5e-4 * 81},
friction_static=0.8,
velocity_limit=100.0,
saturation_effort=120.0,
stiffness={".*": 150.0},
damping={".*": 5.0},
armature={".*": 0.01},
friction_static=0.0,
activation_vel=0.1,
friction_dynamic=0.02,
friction_dynamic=0.0,
)

KBOT_01_ACTUATOR_CFG = IdentifiedActuatorCfg(
joint_names_expr=[".*ankle_y"],
KBOT_02_ACTUATOR_CFG = IdentifiedActuatorCfg(
joint_names_expr=[".*_02"],
effort_limit=17.0,
velocity_limit=14,
saturation_effort=560,
stiffness={".*": 15.0},
damping={".*": 1.5},
armature={".*": 1.5e-4 * 81},
friction_static=0.8,
velocity_limit=100.0,
saturation_effort=34.0,
stiffness={".*": 20.0},
damping={".*": 2.0},
armature={".*": 0.01},
friction_static=0.0,
activation_vel=0.1,
friction_dynamic=0.02,
friction_dynamic=0.0,
)


KBOT_CFG = ArticulationCfg(
spawn=sim_utils.UsdFileCfg(
usd_path=f"{ISAAC_ASSET_DIR}/Robots/kbot.usd",
Expand All @@ -58,28 +58,43 @@
max_depenetration_velocity=1.0,
),
articulation_props=sim_utils.ArticulationRootPropertiesCfg(
enabled_self_collisions=True, solver_position_iteration_count=4, solver_velocity_iteration_count=0
enabled_self_collisions=True,
solver_position_iteration_count=8,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unitree's velocity solver has 4 steps fyi.

        articulation_props=sim_utils.ArticulationRootPropertiesCfg(
            enabled_self_collisions=False, solver_position_iteration_count=8, solver_velocity_iteration_count=4
        ),
    ),

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm not sure what solver does/is. put a TODO to try 4 steps

solver_velocity_iteration_count=0,
),
),
init_state=ArticulationCfg.InitialStateCfg(
pos=(0.0, 0.0, 1.07),
pos=(0.0, 0.0, 1.2),
joint_pos={
'L_hip_y': 0.0,
'L_hip_z': 0.0,
'L_hip_x': 0.0,
'L_knee': 0.0,
'L_ankle_y': 0.0,
'R_hip_y': 0.0,
'R_hip_z': 0.0,
'R_hip_x': 0.0,
'R_knee': 0.0,
'R_ankle_y': 0.0,
"left_shoulder_pitch_03": 0.0,
"left_shoulder_roll_03": 0.0,
"left_shoulder_yaw_02": 0.0,
"left_elbow_02": -math.radians(90),
"left_wrist_02": 0.0,

"right_shoulder_pitch_03": 0.0,
"right_shoulder_roll_03": 0.0,
"right_shoulder_yaw_02": 0.0,
"right_elbow_02": math.radians(90),
"right_wrist_02": 0.0,

'left_hip_pitch_04': math.radians(30),
'left_hip_roll_03': 0.0,
'left_hip_yaw_03': 0.0,
'left_knee_04': math.radians(60),
'left_ankle_02': -math.radians(30),

'right_hip_pitch_04': -math.radians(30),
'right_hip_roll_03': 0.0,
'right_hip_yaw_03': 0.0,
'right_knee_04': -math.radians(60),
'right_ankle_02': -math.radians(30),
},
),
actuators={
"kbot_04": KBOT_04_ACTUATOR_CFG,
"kbot_03": KBOT_03_ACTUATOR_CFG,
"kbot_01": KBOT_01_ACTUATOR_CFG,
"kbot_02": KBOT_02_ACTUATOR_CFG,
},
soft_joint_pos_limit_factor=0.95,
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class KbotRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
value_loss_coef=1.0,
use_clipped_value_loss=True,
clip_param=0.2,
entropy_coef=0.005,
entropy_coef=0.008,
num_learning_epochs=5,
num_mini_batches=4,
learning_rate=1.0e-3,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,9 @@ def __post_init__(self):
super().__post_init__()

self.scene.robot = KBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
self.rewards.flat_orientation_l2.weight = -0.5
self.rewards.dof_pos_limits.weight = -1.0

self.observations.policy.height_scan = None
self.scene.height_scanner = None
# self.observations.policy.height_scan = None
# self.scene.height_scanner = None

@configclass
class KbotRoughEnvCfg_PLAY(KbotRoughEnvCfg):
Expand All @@ -42,5 +40,5 @@ def __post_init__(self):
# disable randomization for play
self.observations.policy.enable_corruption = False
# remove random pushing
self.randomization.base_external_force_torque = None
self.randomization.push_robot = None
self.events.base_external_force_torque = None
self.events.push_robot = None
2 changes: 2 additions & 0 deletions exts/kbot/kbot/tasks/locomotion/velocity/mdp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from omni.isaac.lab.envs.mdp import * # noqa: F401, F403

from omni.isaac.lab_tasks.manager_based.locomotion.velocity.mdp.rewards import feet_air_time_positive_biped, feet_slide, track_lin_vel_xy_yaw_frame_exp, track_ang_vel_z_world_exp # noqa: F401, F403

from .curriculums import * # noqa: F401, F403
from .rewards import * # noqa: F401, F403
from .events import * # noqa: F401, F403
Expand Down
100 changes: 50 additions & 50 deletions exts/kbot/kbot/tasks/locomotion/velocity/mdp/rewards.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,61 +10,61 @@
from omni.isaac.lab.envs import RLTaskEnv


def feet_air_time(env: RLTaskEnv, command_name: str, sensor_cfg: SceneEntityCfg, threshold_min: float,
threshold_max: float) -> torch.Tensor:
"""Reward long steps taken by the feet using L2-kernel.
# def feet_air_time(env: RLTaskEnv, command_name: str, sensor_cfg: SceneEntityCfg, threshold_min: float,
# threshold_max: float) -> torch.Tensor:
# """Reward long steps taken by the feet using L2-kernel.

This function rewards the agent for taking steps that are longer than a threshold. This helps ensure
that the robot lifts its feet off the ground and takes steps. The reward is computed as the sum of
the time for which the feet are in the air.
# This function rewards the agent for taking steps that are longer than a threshold. This helps ensure
# that the robot lifts its feet off the ground and takes steps. The reward is computed as the sum of
# the time for which the feet are in the air.

If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
"""
# extract the used quantities (to enable type-hinting)
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
# compute the reward
first_contact = contact_sensor.compute_first_contact(env.step_dt)[:, sensor_cfg.body_ids]
last_air_time = contact_sensor.data.last_air_time[:, sensor_cfg.body_ids]
# negative reward for small steps
air_time = (last_air_time - threshold_min) * first_contact
# no reward for large steps
air_time = torch.clamp(air_time, max=threshold_max - threshold_min)
reward = torch.sum(air_time, dim=1)
# no reward for zero command
reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
return reward
# If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
# """
# # extract the used quantities (to enable type-hinting)
# contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
# # compute the reward
# first_contact = contact_sensor.compute_first_contact(env.step_dt)[:, sensor_cfg.body_ids]
# last_air_time = contact_sensor.data.last_air_time[:, sensor_cfg.body_ids]
# # negative reward for small steps
# air_time = (last_air_time - threshold_min) * first_contact
# # no reward for large steps
# air_time = torch.clamp(air_time, max=threshold_max - threshold_min)
# reward = torch.sum(air_time, dim=1)
# # no reward for zero command
# reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
# return reward


def feet_air_time_positive_biped(env, command_name: str, threshold_min: float, threshold_max: float,
sensor_cfg: SceneEntityCfg) -> torch.Tensor:
"""Reward long steps taken by the feet for bipeds.
# def feet_air_time_positive_biped(env, command_name: str, threshold_min: float, threshold_max: float,
# sensor_cfg: SceneEntityCfg) -> torch.Tensor:
# """Reward long steps taken by the feet for bipeds.

This function rewards the agent for taking steps up to a specified threshold and also keep one foot at
a time in the air.
# This function rewards the agent for taking steps up to a specified threshold and also keep one foot at
# a time in the air.

If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
"""
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
# compute the reward
air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids]
contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids]
in_contact = contact_time > 0.0
in_mode_time = torch.where(in_contact, contact_time, air_time)
single_stance = torch.sum(in_contact.int(), dim=1) == 1
reward = torch.min(torch.where(single_stance.unsqueeze(-1), in_mode_time, 0.0), dim=1)[0]
reward = torch.clamp(reward, max=threshold_max)
# no reward for small steps
reward *= reward > threshold_min
# no reward for zero command
reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
return reward
# If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
# """
# contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
# # compute the reward
# air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids]
# contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids]
# in_contact = contact_time > 0.0
# in_mode_time = torch.where(in_contact, contact_time, air_time)
# single_stance = torch.sum(in_contact.int(), dim=1) == 1
# reward = torch.min(torch.where(single_stance.unsqueeze(-1), in_mode_time, 0.0), dim=1)[0]
# reward = torch.clamp(reward, max=threshold_max)
# # no reward for small steps
# reward *= reward > threshold_min
# # no reward for zero command
# reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
# return reward


def feet_slide(env, sensor_cfg: SceneEntityCfg, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
"""Penalize feet sliding"""
contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
contacts = contact_sensor.data.net_forces_w_history[:, :, sensor_cfg.body_ids, :].norm(dim=-1).max(dim=1)[0] > 1.0
asset = env.scene[asset_cfg.name]
body_vel = asset.data.body_lin_vel_w[:, asset_cfg.body_ids, :2]
reward = torch.sum(body_vel.norm(dim=-1) * contacts, dim=1)
return reward
# def feet_slide(env, sensor_cfg: SceneEntityCfg, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
# """Penalize feet sliding"""
# contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
# contacts = contact_sensor.data.net_forces_w_history[:, :, sensor_cfg.body_ids, :].norm(dim=-1).max(dim=1)[0] > 1.0
# asset = env.scene[asset_cfg.name]
# body_vel = asset.data.body_lin_vel_w[:, asset_cfg.body_ids, :2]
# reward = torch.sum(body_vel.norm(dim=-1) * contacts, dim=1)
# return reward
Loading