HULKs · oleflb · Feb 1, 2025 · Feb 1, 2025 · Feb 1, 2025 · Feb 1, 2025
diff --git a/tools/machine-learning/mujoco/.gitignore b/tools/machine-learning/mujoco/.gitignore
@@ -3,3 +3,4 @@ videos/
 runs/
 models/
 MUJOCO_LOG.TXT
+result/
diff --git a/tools/machine-learning/mujoco/.python-version b/tools/machine-learning/mujoco/.python-version
@@ -1 +1 @@
-3.13
+3.12
diff --git a/tools/machine-learning/mujoco/packages/common_types/pyproject.toml b/tools/machine-learning/mujoco/packages/common_types/pyproject.toml
@@ -2,7 +2,7 @@
 name = "common_types"
 version = "0.1.0"
 description = "Add your description here"
-requires-python = ">=3.13"
+requires-python = ">=3.12"
 dependencies = []
 
 [build-system]

diff --git a/tools/machine-learning/mujoco/packages/kinematics/pyproject.toml b/tools/machine-learning/mujoco/packages/kinematics/pyproject.toml
@@ -2,7 +2,7 @@
 name = "kinematics"
 version = "0.1.0"
 description = "Add your description here"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 
 dependencies = ["common-types", "numpy>=1.2", "robot-dimensions", "transforms"]
 

diff --git a/tools/machine-learning/mujoco/packages/mujoco-interactive-viewer/pyproject.toml b/tools/machine-learning/mujoco/packages/mujoco-interactive-viewer/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 version = "0.1.0"
 name = "mujoco-interactive-viewer"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["glfw>=2.8.0", "mujoco>=3.2.6", "numpy>=1.26.4"]
 
 [build-system]

diff --git a/tools/machine-learning/mujoco/packages/nao_env/pyproject.toml b/tools/machine-learning/mujoco/packages/nao_env/pyproject.toml
@@ -2,7 +2,7 @@
 name = "nao-env"
 version = "0.1.0"
 description = "Add your description here"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 
 dependencies = [
   "gymnasium[mujoco]>=1.0.0",

diff --git a/tools/machine-learning/mujoco/packages/nao_interface/pyproject.toml b/tools/machine-learning/mujoco/packages/nao_interface/pyproject.toml
@@ -2,7 +2,7 @@
 name = "nao-interface"
 version = "0.1.0"
 description = "Add your description here"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["mujoco>=3.2.6", "numpy>=1.2.0"]
 
 [build-system]

diff --git a/tools/machine-learning/mujoco/packages/rewards/pyproject.toml b/tools/machine-learning/mujoco/packages/rewards/pyproject.toml
@@ -2,7 +2,7 @@
 version = "0.1.0"
 name = "rewards"
 description = "Add your description here"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = [
   "mujoco>=3.2.6",
   "nao-interface",

diff --git a/tools/machine-learning/mujoco/packages/robot_dimensions/pyproject.toml b/tools/machine-learning/mujoco/packages/robot_dimensions/pyproject.toml
@@ -2,7 +2,7 @@
 name = "robot-dimensions"
 version = "0.1.0"
 description = "Add your description here"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["numpy>=1.2.0"]
 
 [build-system]

diff --git a/tools/machine-learning/mujoco/packages/throwing/pyproject.toml b/tools/machine-learning/mujoco/packages/throwing/pyproject.toml
@@ -2,7 +2,7 @@
 name = "throwing"
 version = "0.1.0"
 description = "Add your description here"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["mujoco>=3.2.6", "numpy>=1.2.0"]
 
 [build-system]

diff --git a/tools/machine-learning/mujoco/packages/transforms/pyproject.toml b/tools/machine-learning/mujoco/packages/transforms/pyproject.toml
@@ -2,7 +2,7 @@
 name = "transforms"
 version = "0.1.0"
 description = "Add your description here"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = ["numpy>=1.2.0", "scipy>=1.14.1"]
 
 [build-system]

diff --git a/tools/machine-learning/mujoco/packages/walking_engine/pyproject.toml b/tools/machine-learning/mujoco/packages/walking_engine/pyproject.toml
@@ -2,7 +2,7 @@
 name = "walking-engine"
 version = "0.1.0"
 description = "Add your description here"
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dependencies = [
   "common-types",
   "kinematics",

diff --git a/tools/machine-learning/mujoco/pyproject.toml b/tools/machine-learning/mujoco/pyproject.toml
@@ -2,7 +2,7 @@
 name = "mujoco-env"
 version = "0.1.0"
 description = "Add your description here"
-requires-python = ">=3.13"
+requires-python = ">=3.12"
 
 dependencies = [
   "click>=8.1.7",
@@ -11,6 +11,8 @@ dependencies = [
   "moviepy>=1.0.3",
   "mujoco>=3.2.7",
   "numpy>=1.26",
+  "onnx==1.17.0",
+  "openvino==2024.1.0",
   "scipy>=1.14.1",
   "stable-baselines3[extra]==2.4.1",
   "tensorboard>=2.18.0",

diff --git a/tools/machine-learning/mujoco/scripts/convert_model.py b/tools/machine-learning/mujoco/scripts/convert_model.py
@@ -0,0 +1,99 @@
+from pathlib import Path
+
+import click
+import openvino as ov
+import torch
+from nao_env import nao_standing, nao_walking
+from stable_baselines3 import PPO
+from stable_baselines3.common.policies import ActorCriticPolicy
+from torch import nn
+
+
+class UndefinedObservationSpaceError(ValueError):
+    def __init__(self) -> None:
+        super().__init__("observation space must have a fixed size.")
+
+
+class UndefinedActionSpaceError(ValueError):
+    def __init__(self) -> None:
+        super().__init__("action space must have a fixed size.")
+
+
+class OnnxableSB3Policy(nn.Module):
+    def __init__(self, policy: ActorCriticPolicy, offset: torch.Tensor) -> None:
+        super().__init__()
+        self.offset = offset
+        self.policy = policy
+
+    def unscale_action(self, scaled_action: torch.Tensor) -> torch.Tensor:
+        low, high = (
+            torch.from_numpy(self.policy.action_space.low),
+            torch.from_numpy(self.policy.action_space.high),
+        )
+        return low + (0.5 * (scaled_action + 1.0) * (high - low))
+
+    def clip_action(self, action: torch.Tensor) -> torch.Tensor:
+        low, high = (
+            torch.from_numpy(self.policy.action_space.low).to(torch.float32),
+            torch.from_numpy(self.policy.action_space.high).to(torch.float32),
+        )
+        return torch.clamp(action, low, high)
+
+    def forward(self, observation: torch.Tensor) -> torch.Tensor:
+        actions = self.policy._predict(observation, deterministic=True)
+
+        if self.policy.squash_output:
+            actions = self.unscale_action(actions)
+        else:
+            actions = self.clip_action(actions)
+
+        return actions + self.offset
+
+
+@click.command()
+@click.argument(
+    "policy",
+    type=click.Path(exists=True),
+    help="The policy to convert to ONNX.",
+)
+@click.argument(
+    "environment-type",
+    type=click.Choice(["NaoStanding", "NaoStandup", "NaoWalking"]),
+)
+def main(policy: str, environment_type: str) -> None:
+    path = Path(policy)
+    name = path.parent.name
+    model = PPO.load(policy)
+
+    observation_size = model.observation_space.shape
+    if observation_size is None:
+        raise UndefinedObservationSpaceError()
+    action_size = model.action_space.shape
+    if action_size is None:
+        raise UndefinedActionSpaceError()
+
+    offset = {
+        "NaoStanding": torch.from_numpy(nao_standing.OFFSET_QPOS),
+        "NaoStandup": torch.zeros(action_size),
+        "NaoWalking": torch.from_numpy(nao_walking.OFFSET_QPOS),
+    }[environment_type]
+
+    network = OnnxableSB3Policy(model.policy, offset)
+    Path("result").mkdir(exist_ok=True)
+
+    with torch.inference_mode():
+        torch.onnx.export(
+            network,
+            (torch.randn(observation_size),),
+            f"result/{name}-model.onnx",
+            input_names=["input"],
+            output_names=["output"],
+            opset_version=17,
+        )
+
+    ov_model = ov.convert_model(f"result/{name}-model.onnx")
+    ov.save_model(ov_model, f"result/{name}-policy-ov.xml")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/machine-learning/mujoco/sync.sh b/tools/machine-learning/mujoco/sync.sh
@@ -0,0 +1,3 @@
+#! /usr/bin/env sh
+
+rsync -rP --info=progress2 --exclude-from=.gitignore --exclude=.venv . $1
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,3 +3,4 @@ videos/ @@
     runs/
     models/
     MUJOCO_LOG.TXT
+    result/
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		#! /usr/bin/env sh

		rsync -rP --info=progress2 --exclude-from=.gitignore --exclude=.venv . $1