badai-nguyen · badai-nguyen · Apr 12, 2024 · Jun 4, 2024 · Jun 12, 2024 · Jun 24, 2024
diff --git a/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp b/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp
@@ -187,6 +187,7 @@ class TrtCommon  // NOLINT
   bool isInitialized();
 
   nvinfer1::Dims getBindingDimensions(const int32_t index) const;
+  int32_t getBindingIndex(const char *name);
   int32_t getNbBindings();
   bool setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const;
   bool enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed);

diff --git a/common/tensorrt_common/src/tensorrt_common.cpp b/common/tensorrt_common/src/tensorrt_common.cpp
@@ -556,6 +556,11 @@ nvinfer1::Dims TrtCommon::getBindingDimensions(const int32_t index) const
 #endif
 }
 
+int32_t TrtCommon::getBindingIndex(const char * name)
+{
+  return engine_->getBindingIndex(name);
+}
+
 int32_t TrtCommon::getNbBindings()
 {
   return engine_->getNbBindings();

diff --git a/perception/tensorrt_yolox/config/yoloXP-sPlus-T4-960x960-pseudo-finetune-semseg.param.yaml b/perception/tensorrt_yolox/config/yoloXP-sPlus-T4-960x960-pseudo-finetune-semseg.param.yaml
@@ -0,0 +1,33 @@
+/**:
+  ros__parameters:
+    model_path: "$(var data_path)/tensorrt_yolox/$(var model_name).onnx"
+    label_path: "$(var data_path)/tensorrt_yolox/label.txt"
+    score_threshold: 0.35
+    nms_threshold: 0.7
+    precision: "int8" # Operation precision to be used on inference. Valid value is one of: [fp32, fp16, int8].
+    calibration_algorithm: "Entropy" # Calibration algorithm to be used for quantization when precision==int8. Valid value is one of: [Entropy, (Legacy | Percentile), MinMax].
+    dla_core_id: -1 # If positive ID value is specified, the node assign inference task to the DLA core.
+    quantize_first_layer: false # If true, set the operating precision for the first (input) layer to be fp16. This option is valid only when precision==int8.
+    quantize_last_layer: false # If true, set the operating precision for the last (output) layer to be fp16. This option is valid only when precision==int8.
+    profile_per_layer: false # If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose.
+    clip_value: 6.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
+    preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
+    calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.
+    # refine segmentation mask by overlay roi class
+    # disable when sematic segmentation accuracy is good enough
+    is_roi_overlap_segment: true
+
+    # minimum existence_probability of detected roi considered to replace segmentation
+    overlap_roi_score_threshold: 0.3
+
+    # publish color mask for result visualization
+    is_publish_color_mask: false
+
+    roi_overlay_segment_label:
+      UNKNOWN : true
+      CAR : true
+      TRUCK : true
+      BUS : true
+      MOTORCYCLE : true
+      BICYCLE : true
+      PEDESTRIAN : true
diff --git a/perception/tensorrt_yolox/config/yolox_semseg_backbonefeature.param.yaml b/perception/tensorrt_yolox/config/yolox_semseg_backbonefeature.param.yaml
@@ -0,0 +1,34 @@
+/**:
+  ros__parameters:
+    model_path: "$(var data_path)/tensorrt_yolox/$(var model_name).onnx"
+    label_path: "$(var data_path)/tensorrt_yolox/label.txt"
+    score_threshold: 0.35
+    nms_threshold: 0.7
+    precision: "int8" # Operation precision to be used on inference. Valid value is one of: [fp32, fp16, int8].
+    calibration_algorithm: "Entropy" # Calibration algorithm to be used for quantization when precision==int8. Valid value is one of: [Entropy, (Legacy | Percentile), MinMax].
+    dla_core_id: -1 # If positive ID value is specified, the node assign inference task to the DLA core.
+    quantize_first_layer: false # If true, set the operating precision for the first (input) layer to be fp16. This option is valid only when precision==int8.
+    quantize_last_layer: false # If true, set the operating precision for the last (output) layer to be fp16. This option is valid only when precision==int8.
+    profile_per_layer: false # If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose.
+    clip_value: 6.0 # If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration.
+    preprocess_on_gpu: true # If true, pre-processing is performed on GPU.
+    calibration_image_list_path: "" # Path to a file which contains path to images. Those images will be used for int8 quantization.
+    # refine segmentation mask by overlay roi class
+    # disable when sematic segmentation accuracy is good enough
+    is_roi_overlap_segment: true
+
+    # minimum existence_probability of detected roi considered to replace segmentation
+    overlap_roi_score_threshold: 0.3
+
+    # publish color mask for result visualization
+    is_publish_color_mask: false
+    color_map_path: "$(var data_path)/tensorrt_yolox/bdd100k_semseg.csv"
+    roi_overlay_segment_label:
+      UNKNOWN : true
+      CAR : true
+      TRUCK : true
+      BUS : true
+      MOTORCYCLE : true
+      BICYCLE : true
+      PEDESTRIAN : true
+      ANIMAL: true
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp
@@ -44,7 +44,7 @@ struct Roi
  * @param[in] stream cuda stream
  */
 extern void resize_bilinear_gpu(
-  unsigned char * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c,
+  float * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c,
   cudaStream_t stream);
 
 /**
@@ -121,6 +121,25 @@ extern void resize_bilinear_letterbox_nhwc_to_nchw32_gpu(
   float * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c,
   float norm, cudaStream_t stream);
 
+/**
+ * @brief Optimized preprocessing including resize, letterbox, nhwc2nchw, toFloat and normalization
+ * with batching for YOLOX on gpus
+ * @param[out] dst processed image
+ * @param[in] src image
+ * @param[in] d_w width for output
+ * @param[in] d_h height for output
+ * @param[in] d_c channel for output
+ * @param[in] s_w width for input
+ * @param[in] s_h height for input
+ * @param[in] s_c channel for input
+ * @param[in] batch batch size
+ * @param[in] norm normalization
+ * @param[in] stream cuda stream
+ */
+extern void resize_bilinear_nhwc_to_nchw32_batch_gpu(
+  float * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c, int batch,
+  float norm, cudaStream_t stream);
+
 /**
  * @brief Optimized preprocessing including resize, letterbox, nhwc2nchw, toFloat and normalization
  * with batching for YOLOX on gpus
@@ -140,6 +159,25 @@ extern void resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
   float * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c, int batch,
   float norm, cudaStream_t stream);
 
+/**
+ * @brief Optimized preprocessing including resize, letterbox, nhwc2nchw, toFloat and normalization
+ * with batching for YOLOX on gpus
+ * @param[out] dst processed image
+ * @param[in] src image
+ * @param[in] d_w width for output
+ * @param[in] d_h height for output
+ * @param[in] d_c channel for output
+ * @param[in] s_w width for input
+ * @param[in] s_h height for input
+ * @param[in] s_c channel for input
+ * @param[in] batch batch size
+ * @param[in] norm normalization
+ * @param[in] stream cuda stream
+ */
+extern void resize_bilinear_batch_gpu(
+  float * dst, float * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c, int batch,
+  float image_ratio, float norm, cudaStream_t stream);
+
 /**
  * @brief Optimized preprocessing including crop, resize, letterbox, nhwc2nchw, toFloat and
  * normalization with batching for YOLOX on gpus

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
@@ -168,6 +168,12 @@
    */
   void preprocessGpu(const std::vector<cv::Mat> & images);
 
+  /**
+   * @brief run preprocess on GPU
+   * @param[in] images batching images
+   */
+  void dacup_preprocessGpu(const std::vector<cv::Mat> & images);
+
   /**
    * @brief run preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
    * @param[in] images batching images
@@ -205,6 +211,9 @@
   bool feedforwardAndDecode(
     const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
     std::vector<cv::Mat> & color_masks);
+  bool dacup_feedforwardAndDecode(
+    const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
+    std::vector<cv::Mat> & color_masks);
   void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
   void generateGridsAndStride(
     const int target_w, const int target_h, const std::vector<int> & strides,
@@ -253,7 +262,14 @@
    */
   cv::Mat getMaskImageGpu(float * d_prob, nvinfer1::Dims dims, int out_w, int out_h, int b);
 
+  cv::Mat getAnomalyImage(float * anomaly, nvinfer1::Dims dims, int out_w, int out_h);
+
+  cv::Mat getAnomalyBox(
+    float * segmentation, float * anomaly, ObjectArray & objects, cv::Size & img_size, float scale,
+    nvinfer1::Dims dims, int out_w, int out_h);
+
   std::unique_ptr<tensorrt_common::TrtCommon> trt_common_;
+  std::unique_ptr<tensorrt_common::TrtCommon> trt_common_dacup_;
 
   std::vector<float> input_h_;
   CudaUniquePtr<float[]> input_d_;
@@ -310,6 +326,22 @@
   // device buffer for argmax postprocessing  on GPU
   CudaUniquePtr<unsigned char[]> argmax_buf_d_;
   std::vector<tensorrt_yolox::Colormap> sematic_color_map_;
+
+  // buff size for backbonefeature heads
+  CudaUniquePtr<float[]> backbonefeature_out_prob_d_;
+  CudaUniquePtrHost<float[]> backbonefeature_out_prob_h_;
+  size_t backbonefeature_out_elem_num_;
+  size_t backbonefeature_out_elem_num_per_batch_;
+
+  CudaUniquePtr<float[]> dacup_backbonefeatures_in_d_;
+  CudaUniquePtr<float[]> dacup_segmentation_in_d_;
+  CudaUniquePtr<float[]> dacup_image_in_d_;
+  // output
+  CudaUniquePtr<float[]> anomaly_out_d_;
+  CudaUniquePtrHost<float[]> anomaly_out_h_;
+  size_t anomaly_out_elem_num_;
+  size_t anomaly_out_elem_num_per_batch_;
+  std::vector<cv::Mat> anomaly_masks_;
 };
 
 }  // namespace tensorrt_yolox

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -15,6 +15,7 @@
 #ifndef TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
 #define TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
 
+#include "detected_object_validation/utils/utils.hpp"
 #include "object_recognition_utils/object_recognition_utils.hpp"
 
 #include <autoware/universe_utils/ros/debug_publisher.hpp>

diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -9,7 +9,7 @@
     default="yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls"
     description="options `yolox-sPlus-T4-960x960-pseudo-finetune` if only detection is needed, `yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls` if sematic segmentation is also needed"
   />
-  <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
+  <arg name="data_path" default="$(env HOME)/autoware_data_dacup" description="packages data and artifacts directory path"/>
   <arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_s_plus_opt.param.yaml"/>
   <arg name="use_decompress" default="true" description="use image decompress"/>
   <arg name="build_only" default="false" description="exit after trt engine is built"/>

diff --git a/perception/tensorrt_yolox/launch/yolox_semseg_backbonefeature.launch.xml b/perception/tensorrt_yolox/launch/yolox_semseg_backbonefeature.launch.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<launch>
+  <!-- cspell:ignore finetune -->
+  <arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
+  <arg name="output/objects" default="/perception/object_recognition/detection/rois0"/>
+  <arg name="model_name" default="yolox_semseg_backbonefeature"/>
+  <arg name="data_path" default="$(env HOME)/autoware_data_dacup" description="packages data and artifacts directory path"/>
+  <arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_semseg_backbonefeature.param.yaml"/>
+  <arg name="use_decompress" default="true" description="use image decompress"/>
+  <arg name="build_only" default="false" description="exit after trt engine is built"/>
+  <arg name="param_file" default="$(find-pkg-share image_transport_decompressor)/config/image_transport_decompressor.param.yaml"/>
+
+  <node pkg="image_transport_decompressor" exec="image_transport_decompressor_node" name="image_transport_decompressor_node" if="$(var use_decompress)">
+    <remap from="~/input/compressed_image" to="$(var input/image)/compressed"/>
+    <remap from="~/output/raw_image" to="$(var input/image)"/>
+    <param from="$(var param_file)" allow_substs="true"/>
+  </node>
+
+  <node pkg="tensorrt_yolox" exec="tensorrt_yolox_node_exe" name="tensorrt_yolox" output="screen">
+    <remap from="~/in/image" to="$(var input/image)"/>
+    <remap from="~/out/objects" to="$(var output/objects)"/>
+    <param from="$(var yolox_param_path)" allow_substs="true"/>
+    <param name="build_only" value="$(var build_only)"/>
+  </node>
+  <!-- <node pkg="rviz2" exec="rviz2" name="rviz2" output="screen" args="-d $(find-pkg-share autoware_launch)/rviz/autoware.rviz -s $(find-pkg-share autoware_launch)/rviz/image/autoware.png" if="true"/> -->
+</launch>
diff --git a/perception/tensorrt_yolox/launch/yolox_semseg_backbonefeature_feature0.launch.xml b/perception/tensorrt_yolox/launch/yolox_semseg_backbonefeature_feature0.launch.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<launch>
+  <!-- cspell:ignore finetune -->
+  <arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
+  <arg name="output/objects" default="/perception/object_recognition/detection/rois0"/>
+  <arg name="model_name" default="yolox_semseg_feature0_16cls"/>
+  <arg name="data_path" default="$(env HOME)/autoware_data_dacup" description="packages data and artifacts directory path"/>
+  <arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_semseg_backbonefeature.param.yaml"/>
+  <arg name="use_decompress" default="true" description="use image decompress"/>
+  <arg name="build_only" default="false" description="exit after trt engine is built"/>
+  <arg name="param_file" default="$(find-pkg-share image_transport_decompressor)/config/image_transport_decompressor.param.yaml"/>
+
+  <node pkg="image_transport_decompressor" exec="image_transport_decompressor_node" name="image_transport_decompressor_node" if="$(var use_decompress)">
+    <remap from="~/input/compressed_image" to="$(var input/image)/compressed"/>
+    <remap from="~/output/raw_image" to="$(var input/image)"/>
+    <param from="$(var param_file)" allow_substs="true"/>
+  </node>
+
+  <node pkg="tensorrt_yolox" exec="tensorrt_yolox_node_exe" name="tensorrt_yolox" output="screen">
+    <remap from="~/in/image" to="$(var input/image)"/>
+    <remap from="~/out/objects" to="$(var output/objects)"/>
+    <param from="$(var yolox_param_path)" allow_substs="true"/>
+    <param name="build_only" value="$(var build_only)"/>
+  </node>
+  <!-- <node pkg="rviz2" exec="rviz2" name="rviz2" output="screen" args="-d $(find-pkg-share autoware_launch)/rviz/autoware.rviz -s $(find-pkg-share autoware_launch)/rviz/image/autoware.png" if="true"/> -->
+</launch>
diff --git a/perception/tensorrt_yolox/launch/yolox_tiny.launch.xml b/perception/tensorrt_yolox/launch/yolox_tiny.launch.xml
@@ -3,7 +3,7 @@
   <arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
   <arg name="output/objects" default="/perception/object_recognition/detection/rois0"/>
   <arg name="model_name" default="yolox-tiny"/>
-  <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
+  <arg name="data_path" default="$(env HOME)/autoware_data_dacup" description="packages data and artifacts directory path"/>
   <arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_tiny.param.yaml"/>
   <arg name="use_decompress" default="true" description="use image decompress"/>
   <arg name="build_only" default="false" description="exit after trt engine is built"/>

diff --git a/perception/tensorrt_yolox/package.xml b/perception/tensorrt_yolox/package.xml
@@ -24,6 +24,7 @@
   <depend>image_transport</depend>
   <depend>libopencv-dev</depend>
   <depend>object_recognition_utils</depend>
+  <depend>detected_object_validation</depend>
   <depend>rclcpp</depend>
   <depend>rclcpp_components</depend>
   <depend>sensor_msgs</depend>