Picovoice HMI server (#1)

Part of tue-robotics/tue_robocup#1177
tue-robotics · Nov 15, 2022 · e0996a0 · e0996a0
2 parents f849b8a + 2ba6050
commit e0996a0
Show file tree

Hide file tree

Showing 9 changed files with 407 additions and 675 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,14 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: recursive
+      - uses: psf/black@stable
+        with:
+          options: --check --diff --color -l 120 --exclude docs
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -0,0 +1,13 @@
+name: CI
+
+on: [push, pull_request]
+
+jobs:
+  tue-ci:
+    name: TUe CI - ${{ github.event_name }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: TUe CI
+        uses: tue-robotics/tue-env/ci/main@master
+        with:
+          package: ${{ github.event.repository.name }}
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,22 @@
+cmake_minimum_required(VERSION 3.0.2)
+project(hmi_picovoice)
+
+find_package(catkin REQUIRED COMPONENTS
+  actionlib
+  hmi
+  picovoice_msgs
+  rospy
+)
+
+catkin_python_setup()
+
+catkin_package()
+
+#############
+## Install ##
+#############
+
+catkin_install_python(PROGRAMS
+  scripts/${PROJECT_NAME}_node.py
+  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+)
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -1,2 +1,42 @@
 # hmi_picovoice
-Provides an hmi interface connecting to picovoice speech recognition
+
+[![CI](https://github.com/tue-robotics/hmi_picovoice/actions/workflows/main.yml/badge.svg)](https://github.com/tue-robotics/hmi_picovoice/actions/workflows/main.yml) [![Lint](https://github.com/tue-robotics/hmi_picovoice/actions/workflows/lint.yml/badge.svg)](https://github.com/tue-robotics/hmi_picovoice/actions/workflows/lint.yml)
+
+Provides a hmi interface connecting to picovoice rhino speech recognition.
+
+It depends on [`picovoice_driver`](https://github.com/reinzor/picovoice_ros) to interface with PicoVoice's backend.
+
+## Usage with `hmi` framework
+
+```bash
+export PICOVOICE_KEY=...  # Get from https://console.picovoice.ai, and choose eg the coffeeMaker example context
+rosrun picovoice_driver picovoice_driver_rhino _access_key:=$PICOVOICE_KEY __ns:=/robot/hmi
+```
+
+```bash
+rosrun hmi_picovoice hmi_picovoice_node.py _context_url:=coffee_maker_linux __ns:=/robot/hmi
+```
+
+```bash
+rosrun hmi multi_client __name:=/robot/hmi
+```
+
+```bash
+rosrun actionlib_tools axclient.py /robot/hmi
+```
+
+and in `axclient.py` GUI Client specify `grammar: 'orderBeverage'` and hit SEND GOAL.
+Then, say/speak out loud e.g. 'large cappucino' to your computer and if all is well, the Result field should show something like:
+
+```yaml
+talker_id: ''
+sentence: "orderBeverage"
+semantics: "{\"size\": \"large\", \"beverage\": \"cappuccino\"}"
+```
+
+## Parameters
+
+For `hmi_picovoice_node.py`:
+
+- `context_url` what model to use. **TODO**: Not sure where eg. the string `coffee_maker_linux` comes from
+- `require_endpoint` ["If set to False, Rhino does not require an endpoint (chunk of silence) before finishing inference"](https://picovoice.ai/docs/api/rhino-python/)
diff --git a/package.xml b/package.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<?xml-model
+  href="http://download.ros.org/schema/package_format3.xsd"
+  schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>hmi_picovoice</name>
+  <version>0.0.0</version>
+  <description>
+    Provides a hmi interface connecting to picovoice rhino speech recognition
+  </description>
+
+  <maintainer email="[email protected]">Rein Appeldoorn</maintainer>
+
+  <license>Apache 2.0</license>
+
+  <buildtool_depend>catkin</buildtool_depend>
+
+  <exec_depend>actionlib</exec_depend>
+  <exec_depend>hmi</exec_depend>
+  <exec_depend>picovoice_msgs</exec_depend>
+  <exec_depend>rospy</exec_depend>
+</package>
diff --git a/scripts/hmi_picovoice_node.py b/scripts/hmi_picovoice_node.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2022, TU/e Robotics
+# All rights reserved.
+#
+# Author: Rein Appeldoorn
+
+import rospy
+
+from hmi_picovoice.server import Server
+
+if __name__ == "__main__":
+    rospy.init_node("hmi_picovoice")
+
+    context_url = rospy.get_param("~context_url", "")
+    require_endpoint = rospy.get_param("~require_endpoint", False)
+    rate = rospy.get_param("~rate", 10.0)
+
+    if not context_url:
+        rospy.logfatal("Missing required parameter '~context_url'")
+        exit(1)
+
+    Server(rospy.get_name(), context_url, require_endpoint, rate)
+    rospy.spin()
diff --git a/setup.py b/setup.py
@@ -0,0 +1,15 @@
+#
+# Copyright (c) 2022, TU/e Robotics
+# All rights reserved.
+#
+# Author: Rein Appeldoorn
+
+from distutils.core import setup
+from catkin_pkg.python_setup import generate_distutils_setup
+
+d = generate_distutils_setup(
+    packages=["hmi_picovoice"],
+    package_dir={"": "src"},
+)
+
+setup(**d)
diff --git a/src/hmi_picovoice/server.py b/src/hmi_picovoice/server.py
@@ -0,0 +1,55 @@
+#
+# Copyright (c) 2022, TU/e Robotics
+# All rights reserved.
+#
+# Author: Rein Appeldoorn
+
+import actionlib
+import rospy
+from picovoice_msgs.msg import GetIntentAction, GetIntentGoal
+
+from hmi import AbstractHMIServer, HMIResult
+
+
+class Server(AbstractHMIServer):
+    def __init__(self, name: str, context_url: str, require_endpoint: bool, rate: float):
+        super(Server, self).__init__(name)
+
+        self._context_url = context_url
+        self._require_endpoint = require_endpoint
+        self._rate = rate
+
+        self._intent_client = actionlib.SimpleActionClient("get_intent", GetIntentAction)
+        rospy.loginfo(f"Waiting for {self._intent_client.action_client.ns} ..")
+        self._intent_client.wait_for_server()
+
+        rospy.loginfo(f"Server initialized (context_url={context_url}, require_endpoint={require_endpoint})")
+
+    def _determine_answer(self, description, grammar, target, is_preempt_requested):
+        self._intent_client.send_goal(
+            GetIntentGoal(
+                context_url=self._context_url,
+                require_endpoint=self._require_endpoint,
+                intents=[grammar],
+            )
+        )
+
+        r = rospy.Rate(self._rate)
+        while not rospy.is_shutdown():
+            if is_preempt_requested():
+                self._intent_client.cancel_all_goals()
+                self._intent_client.wait_for_result()
+                return None
+
+            result = self._intent_client.get_result()
+            if result is not None:
+                return (
+                    HMIResult(
+                        semantics={kv.key: kv.value for kv in result.slots},
+                        sentence=result.intent,
+                    )
+                    if result.is_understood
+                    else None
+                )
+
+            r.sleep()