Merge pull request #2 from ZHEQIUSHUI/auto_size

auto input size
AXERA-TECH · Oct 30, 2023 · 9663e11 · 9663e11
2 parents f57eb93 + 1e0a45b
commit 9663e11
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 7 deletions.
diff --git a/qtproj/CLIPQT/mainwindow.ui b/qtproj/CLIPQT/mainwindow.ui
@@ -14,7 +14,7 @@
    <enum>Qt::DefaultContextMenu</enum>
   </property>
   <property name="windowTitle">
-   <string>MainWindow</string>
+   <string>CLIP</string>
   </property>
   <property name="styleSheet">
    <string notr="true"/>
@@ -38,13 +38,21 @@
               <height>40</height>
              </size>
             </property>
+            <property name="font">
+             <font>
+              <pointsize>14</pointsize>
+             </font>
+            </property>
             <property name="styleSheet">
              <string notr="true">border-radius: 20px;
 background-color: rgb(99, 122, 125);</string>
             </property>
             <property name="alignment">
              <set>Qt::AlignCenter</set>
             </property>
+            <property name="placeholderText">
+             <string>Type in Chinese or English and click Enter to search.</string>
+            </property>
            </widget>
           </item>
           <item>

diff --git a/src/Runner/CLIP.hpp b/src/Runner/CLIP.hpp
@@ -48,7 +48,7 @@ class CLIP
     int LEN_IMAGE_FEATURE = 512;
     int LEN_TEXT_FEATURE = 512;
     int LEN_TEXT_TOKEN = 77;
-
+    int input_height, input_width;
 public:
     CLIP()
     {

diff --git a/src/Runner/CLIPAX650.hpp b/src/Runner/CLIPAX650.hpp
@@ -13,7 +13,10 @@ class CLIPAX650 : public CLIP
     {
         m_encoder.reset(new ax_runner_ax650);
         m_encoder->init(encoder_path.c_str());
-        input = cv::Mat(224, 224, CV_8UC3, m_encoder->get_input(0).pVirAddr);
+        input_height = m_encoder->get_algo_height();
+        input_width = m_encoder->get_algo_width();
+        ALOGI("input size %d %d", input_height, input_width);
+        input = cv::Mat(input_height, input_width, CV_8UC3, m_encoder->get_input(0).pVirAddr);
 
         LEN_IMAGE_FEATURE = m_encoder->get_output(0).vShape[1];
         ALOGI("image feature len %d", LEN_IMAGE_FEATURE);
@@ -27,7 +30,7 @@ class CLIPAX650 : public CLIP
             ALOGE("encoder not init");
             return;
         }
-        cv::resize(image, input, cv::Size(224, 224));
+        cv::resize(image, input, cv::Size(input_width, input_height));
         cv::cvtColor(input, input, cv::COLOR_BGR2RGB);
         auto ret = m_encoder->inference();
 

diff --git a/src/Runner/CLIPOnnx.hpp b/src/Runner/CLIPOnnx.hpp
@@ -16,6 +16,11 @@ class CLIPOnnx : public CLIP
         config.nthread = 8;
         config.onnx_model = encoder_path;
         m_encoder->load(config);
+
+        input_width = m_encoder->getInputShape(0)[3];
+        input_height = m_encoder->getInputShape(0)[2];
+        ALOGI("input size %d %d", input_height, input_width);
+
         LEN_IMAGE_FEATURE = m_encoder->getOutputShape(0)[1];
         ALOGI("image feature len %d", LEN_IMAGE_FEATURE);
         image_features_input = std::vector<float>(1024 * LEN_IMAGE_FEATURE);
@@ -28,15 +33,15 @@ class CLIPOnnx : public CLIP
             ALOGE("encoder not init");
             return;
         }
-        cv::resize(image, input, cv::Size(224, 224));
+        cv::resize(image, input, cv::Size(input_width, input_height));
         cv::cvtColor(input, input, cv::COLOR_BGR2RGB);
 
         float *inputPtr = (float *)m_encoder->getInputPtr(0);
 
         uchar *img_data = input.data;
 
-        int letterbox_cols = 224;
-        int letterbox_rows = 224;
+        int letterbox_cols = input_width;
+        int letterbox_rows = input_height;
         for (int c = 0; c < 3; c++)
         {
             for (int h = 0; h < letterbox_rows; h++)