kscalelabs · alik-git · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025
diff --git a/README.md b/README.md
@@ -8,3 +8,4 @@
 # K-Scale Isaac Lab Playground Library
 
 This repository implements training for robotic control policies in Isaac Sim simulator. For more information, see the [documentation](https://docs.kscale.dev/docs/isaac-lab#/).
+
diff --git a/exts/kbot/kbot/assets/Robots/Props_test/instanceable_meshes.usd b/exts/kbot/kbot/assets/Robots/Props_test/instanceable_meshes.usd
diff --git a/exts/kbot/kbot/assets/Robots/kbot.usd b/exts/kbot/kbot/assets/Robots/kbot.usd
diff --git a/exts/kbot/kbot/assets/kbot.py b/exts/kbot/kbot/assets/kbot.py
@@ -3,47 +3,47 @@
 from omni.isaac.lab.assets.articulation import ArticulationCfg
 
 from kbot.assets import ISAAC_ASSET_DIR
+import math
 
 KBOT_04_ACTUATOR_CFG = IdentifiedActuatorCfg(
-    joint_names_expr=[".*hip_y", ".*knee"],
+    joint_names_expr=[".*_04"],
     effort_limit=120.0,
-    velocity_limit=14,
-    saturation_effort=560,
-    stiffness={".*": 15.0},
-    damping={".*": 1.5},
-    armature={".*": 1.5e-4 * 81},
-    friction_static=0.8,
+    velocity_limit=100.0,
+    saturation_effort=240.0,
+    stiffness={".*": 200.0},
+    damping={".*": 5.0},
+    armature={".*": 0.01},
+    friction_static=0.0,
     activation_vel=0.1,
-    friction_dynamic=0.02,
+    friction_dynamic=0.0,
 )
 
 KBOT_03_ACTUATOR_CFG = IdentifiedActuatorCfg(
-    joint_names_expr=[".*hip_z", ".*hip_x"],
+    joint_names_expr=[".*_03"],
     effort_limit=60.0,
-    velocity_limit=14,
-    saturation_effort=560,
-    stiffness={".*": 15.0},
-    damping={".*": 1.5},
-    armature={".*": 1.5e-4 * 81},
-    friction_static=0.8,
+    velocity_limit=100.0,
+    saturation_effort=120.0,
+    stiffness={".*": 150.0},
+    damping={".*": 5.0},
+    armature={".*": 0.01},
+    friction_static=0.0,
     activation_vel=0.1,
-    friction_dynamic=0.02,
+    friction_dynamic=0.0,
 )
 
-KBOT_01_ACTUATOR_CFG = IdentifiedActuatorCfg(
-    joint_names_expr=[".*ankle_y"],
+KBOT_02_ACTUATOR_CFG = IdentifiedActuatorCfg(
+    joint_names_expr=[".*_02"],
     effort_limit=17.0,
-    velocity_limit=14,
-    saturation_effort=560,
-    stiffness={".*": 15.0},
-    damping={".*": 1.5},
-    armature={".*": 1.5e-4 * 81},
-    friction_static=0.8,
+    velocity_limit=100.0,
+    saturation_effort=34.0,
+    stiffness={".*": 20.0},
+    damping={".*": 2.0},
+    armature={".*": 0.01},
+    friction_static=0.0,
     activation_vel=0.1,
-    friction_dynamic=0.02,
+    friction_dynamic=0.0,
 )
 
-
 KBOT_CFG = ArticulationCfg(
     spawn=sim_utils.UsdFileCfg(
         usd_path=f"{ISAAC_ASSET_DIR}/Robots/kbot.usd",
@@ -58,28 +58,43 @@
             max_depenetration_velocity=1.0,
         ),
         articulation_props=sim_utils.ArticulationRootPropertiesCfg(
-            enabled_self_collisions=True, solver_position_iteration_count=4, solver_velocity_iteration_count=0
+            enabled_self_collisions=True,
+            solver_position_iteration_count=8,
+            solver_velocity_iteration_count=0,
         ),
     ),
     init_state=ArticulationCfg.InitialStateCfg(
-        pos=(0.0, 0.0, 1.07),
+        pos=(0.0, 0.0, 1.2),
         joint_pos={
-            'L_hip_y': 0.0,
-            'L_hip_z': 0.0,
-            'L_hip_x': 0.0,
-            'L_knee': 0.0,
-            'L_ankle_y': 0.0,
-            'R_hip_y': 0.0,
-            'R_hip_z': 0.0,
-            'R_hip_x': 0.0,
-            'R_knee': 0.0,
-            'R_ankle_y': 0.0,
+            "left_shoulder_pitch_03": 0.0,
+            "left_shoulder_roll_03": 0.0,
+            "left_shoulder_yaw_02": 0.0,
+            "left_elbow_02": -math.radians(90), 
+            "left_wrist_02": 0.0,
+
+            "right_shoulder_pitch_03": 0.0,
+            "right_shoulder_roll_03": 0.0,
+            "right_shoulder_yaw_02": 0.0,
+            "right_elbow_02": math.radians(90),
+            "right_wrist_02": 0.0,
+
+            'left_hip_pitch_04': math.radians(30),
+            'left_hip_roll_03': 0.0,
+            'left_hip_yaw_03': 0.0,
+            'left_knee_04': math.radians(60),
+            'left_ankle_02': -math.radians(30),
+
+            'right_hip_pitch_04': -math.radians(30),
+            'right_hip_roll_03': 0.0,
+            'right_hip_yaw_03': 0.0,
+            'right_knee_04': -math.radians(60),
+            'right_ankle_02': -math.radians(30),
         },
     ),
     actuators={
         "kbot_04": KBOT_04_ACTUATOR_CFG,
         "kbot_03": KBOT_03_ACTUATOR_CFG,
-        "kbot_01": KBOT_01_ACTUATOR_CFG,
+        "kbot_02": KBOT_02_ACTUATOR_CFG,
     },
     soft_joint_pos_limit_factor=0.95,
-)
+)
diff --git a/exts/kbot/kbot/tasks/locomotion/velocity/config/kbot/agents/rsl_rl_cfg.py b/exts/kbot/kbot/tasks/locomotion/velocity/config/kbot/agents/rsl_rl_cfg.py
@@ -25,7 +25,7 @@ class KbotRoughPPORunnerCfg(RslRlOnPolicyRunnerCfg):
         value_loss_coef=1.0,
         use_clipped_value_loss=True,
         clip_param=0.2,
-        entropy_coef=0.005,
+        entropy_coef=0.008,
         num_learning_epochs=5,
         num_mini_batches=4,
         learning_rate=1.0e-3,

diff --git a/exts/kbot/kbot/tasks/locomotion/velocity/config/kbot/rough_env_cfg.py b/exts/kbot/kbot/tasks/locomotion/velocity/config/kbot/rough_env_cfg.py
@@ -16,11 +16,9 @@ def __post_init__(self):
         super().__post_init__()
 
         self.scene.robot = KBOT_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
-        self.rewards.flat_orientation_l2.weight = -0.5
-        self.rewards.dof_pos_limits.weight = -1.0
 
-        self.observations.policy.height_scan = None
-        self.scene.height_scanner = None
+        # self.observations.policy.height_scan = None
+        # self.scene.height_scanner = None
 
 @configclass
 class KbotRoughEnvCfg_PLAY(KbotRoughEnvCfg):
@@ -42,5 +40,5 @@ def __post_init__(self):
         # disable randomization for play
         self.observations.policy.enable_corruption = False
         # remove random pushing
-        self.randomization.base_external_force_torque = None
-        self.randomization.push_robot = None
+        self.events.base_external_force_torque = None
+        self.events.push_robot = None
diff --git a/exts/kbot/kbot/tasks/locomotion/velocity/mdp/__init__.py b/exts/kbot/kbot/tasks/locomotion/velocity/mdp/__init__.py
@@ -2,6 +2,8 @@
 
 from omni.isaac.lab.envs.mdp import *  # noqa: F401, F403
 
+from omni.isaac.lab_tasks.manager_based.locomotion.velocity.mdp.rewards import feet_air_time_positive_biped, feet_slide, track_lin_vel_xy_yaw_frame_exp, track_ang_vel_z_world_exp # noqa: F401, F403
+
 from .curriculums import *  # noqa: F401, F403
 from .rewards import *  # noqa: F401, F403
 from .events import *  # noqa: F401, F403

diff --git a/exts/kbot/kbot/tasks/locomotion/velocity/mdp/rewards.py b/exts/kbot/kbot/tasks/locomotion/velocity/mdp/rewards.py
@@ -10,61 +10,61 @@
     from omni.isaac.lab.envs import RLTaskEnv
 
 
-def feet_air_time(env: RLTaskEnv, command_name: str, sensor_cfg: SceneEntityCfg, threshold_min: float,
-                  threshold_max: float) -> torch.Tensor:
-    """Reward long steps taken by the feet using L2-kernel.
+# def feet_air_time(env: RLTaskEnv, command_name: str, sensor_cfg: SceneEntityCfg, threshold_min: float,
+#                   threshold_max: float) -> torch.Tensor:
+#     """Reward long steps taken by the feet using L2-kernel.
 
-    This function rewards the agent for taking steps that are longer than a threshold. This helps ensure
-    that the robot lifts its feet off the ground and takes steps. The reward is computed as the sum of
-    the time for which the feet are in the air.
+#     This function rewards the agent for taking steps that are longer than a threshold. This helps ensure
+#     that the robot lifts its feet off the ground and takes steps. The reward is computed as the sum of
+#     the time for which the feet are in the air.
 
-    If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
-    """
-    # extract the used quantities (to enable type-hinting)
-    contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
-    # compute the reward
-    first_contact = contact_sensor.compute_first_contact(env.step_dt)[:, sensor_cfg.body_ids]
-    last_air_time = contact_sensor.data.last_air_time[:, sensor_cfg.body_ids]
-    # negative reward for small steps
-    air_time = (last_air_time - threshold_min) * first_contact
-    # no reward for large steps
-    air_time = torch.clamp(air_time, max=threshold_max - threshold_min)
-    reward = torch.sum(air_time, dim=1)
-    # no reward for zero command
-    reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
-    return reward
+#     If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
+#     """
+#     # extract the used quantities (to enable type-hinting)
+#     contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
+#     # compute the reward
+#     first_contact = contact_sensor.compute_first_contact(env.step_dt)[:, sensor_cfg.body_ids]
+#     last_air_time = contact_sensor.data.last_air_time[:, sensor_cfg.body_ids]
+#     # negative reward for small steps
+#     air_time = (last_air_time - threshold_min) * first_contact
+#     # no reward for large steps
+#     air_time = torch.clamp(air_time, max=threshold_max - threshold_min)
+#     reward = torch.sum(air_time, dim=1)
+#     # no reward for zero command
+#     reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
+#     return reward
 
 
-def feet_air_time_positive_biped(env, command_name: str, threshold_min: float, threshold_max: float,
-                                 sensor_cfg: SceneEntityCfg) -> torch.Tensor:
-    """Reward long steps taken by the feet for bipeds.
+# def feet_air_time_positive_biped(env, command_name: str, threshold_min: float, threshold_max: float,
+#                                  sensor_cfg: SceneEntityCfg) -> torch.Tensor:
+#     """Reward long steps taken by the feet for bipeds.
 
-    This function rewards the agent for taking steps up to a specified threshold and also keep one foot at
-    a time in the air.
+#     This function rewards the agent for taking steps up to a specified threshold and also keep one foot at
+#     a time in the air.
 
-    If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
-    """
-    contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
-    # compute the reward
-    air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids]
-    contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids]
-    in_contact = contact_time > 0.0
-    in_mode_time = torch.where(in_contact, contact_time, air_time)
-    single_stance = torch.sum(in_contact.int(), dim=1) == 1
-    reward = torch.min(torch.where(single_stance.unsqueeze(-1), in_mode_time, 0.0), dim=1)[0]
-    reward = torch.clamp(reward, max=threshold_max)
-    # no reward for small steps
-    reward *= reward > threshold_min
-    # no reward for zero command
-    reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
-    return reward
+#     If the commands are small (i.e. the agent is not supposed to take a step), then the reward is zero.
+#     """
+#     contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
+#     # compute the reward
+#     air_time = contact_sensor.data.current_air_time[:, sensor_cfg.body_ids]
+#     contact_time = contact_sensor.data.current_contact_time[:, sensor_cfg.body_ids]
+#     in_contact = contact_time > 0.0
+#     in_mode_time = torch.where(in_contact, contact_time, air_time)
+#     single_stance = torch.sum(in_contact.int(), dim=1) == 1
+#     reward = torch.min(torch.where(single_stance.unsqueeze(-1), in_mode_time, 0.0), dim=1)[0]
+#     reward = torch.clamp(reward, max=threshold_max)
+#     # no reward for small steps
+#     reward *= reward > threshold_min
+#     # no reward for zero command
+#     reward *= torch.norm(env.command_manager.get_command(command_name)[:, :2], dim=1) > 0.1
+#     return reward
 
 
-def feet_slide(env, sensor_cfg: SceneEntityCfg, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
-    """Penalize feet sliding"""
-    contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
-    contacts = contact_sensor.data.net_forces_w_history[:, :, sensor_cfg.body_ids, :].norm(dim=-1).max(dim=1)[0] > 1.0
-    asset = env.scene[asset_cfg.name]
-    body_vel = asset.data.body_lin_vel_w[:, asset_cfg.body_ids, :2]
-    reward = torch.sum(body_vel.norm(dim=-1) * contacts, dim=1)
-    return reward
+# def feet_slide(env, sensor_cfg: SceneEntityCfg, asset_cfg: SceneEntityCfg = SceneEntityCfg("robot")) -> torch.Tensor:
+#     """Penalize feet sliding"""
+#     contact_sensor: ContactSensor = env.scene.sensors[sensor_cfg.name]
+#     contacts = contact_sensor.data.net_forces_w_history[:, :, sensor_cfg.body_ids, :].norm(dim=-1).max(dim=1)[0] > 1.0
+#     asset = env.scene[asset_cfg.name]
+#     body_vel = asset.data.body_lin_vel_w[:, asset_cfg.body_ids, :2]
+#     reward = torch.sum(body_vel.norm(dim=-1) * contacts, dim=1)
+#     return reward
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,3 +8,4 @@
		# K-Scale Isaac Lab Playground Library

		This repository implements training for robotic control policies in Isaac Sim simulator. For more information, see the [documentation](https://docs.kscale.dev/docs/isaac-lab#/).