Merge branch 'branch-25.02' into rhdong/cagra-merge

rapidsai · Feb 5, 2025 · b49e04a · b49e04a
2 parents afb6026 + d7c258e
commit b49e04a
Show file tree

Hide file tree

Showing 32 changed files with 957 additions and 82 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -26,12 +26,23 @@ jobs:
       - wheel-build-cuvs
       - wheel-tests-cuvs
       - devcontainer
+      - telemetry-setup
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     if: always()
     with:
       needs: ${{ toJSON(needs) }}
+  telemetry-setup:
+    continue-on-error: true
+    runs-on: ubuntu-latest
+    env:
+      OTEL_SERVICE_NAME: 'pr-cuvs'
+    steps:
+      - name: Telemetry setup
+        if: ${{ vars.TELEMETRY_ENABLED == 'true' }}
+        uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main
   check-nightly-ci:
+    needs: telemetry-setup
     # Switch to ubuntu-latest once it defaults to a version of Ubuntu that
     # provides at least Python 3.11 (see
     # https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat)
@@ -44,6 +55,7 @@ jobs:
         with:
           repo: cuvs
   changed-files:
+    needs: telemetry-setup
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     with:
@@ -77,10 +89,12 @@ jobs:
           - '!rust/**'
           - '!thirdparty/LICENSES/**'
   checks:
+    needs: telemetry-setup
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     with:
       enable_check_generated_files: false
+      ignored_pr_jobs: "telemetry-summarize"
   conda-cpp-build:
     needs: checks
     secrets: inherit
@@ -162,6 +176,7 @@ jobs:
       script: ci/test_wheel_cuvs.sh
   devcontainer:
     secrets: inherit
+    needs: telemetry-setup
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     with:
       arch: '["amd64"]'
@@ -170,3 +185,13 @@ jobs:
         sccache -z;
         build-all --verbose;
         sccache -s;
+
+  telemetry-summarize:
+    # This job must use a self-hosted runner to record telemetry traces.
+    runs-on: linux-amd64-cpu4
+    needs: pr-builder
+    if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }}
+    continue-on-error: true
+    steps:
+      - name: Telemetry summarize
+        uses: rapidsai/shared-actions/telemetry-dispatch-summarize@main
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -695,6 +695,7 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$<BOOL:${CUVS_NVTX}>:NVTX_ENAB
       src/neighbors/ivf_pq_c.cpp
       src/neighbors/cagra_c.cpp
       $<$<BOOL:${BUILD_CAGRA_HNSWLIB}>:src/neighbors/hnsw_c.cpp>
+      src/neighbors/nn_descent_c.cpp
       src/neighbors/refine/refine_c.cpp
       src/preprocessing/quantize/scalar_c.cpp
       src/distance/pairwise_distance_c.cpp

diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
@@ -519,12 +519,10 @@ void register_search(std::shared_ptr<const dataset<T>> dataset,
 
 template <typename T>
 void dispatch_benchmark(std::string cmdline,
-                        const configuration& conf,
+                        configuration& conf,
                         bool force_overwrite,
                         bool build_mode,
                         bool search_mode,
-                        std::string data_prefix,
-                        std::string index_prefix,
                         kv_series override_kv,
                         Mode metric_objective,
                         const std::vector<int>& threads,
@@ -539,11 +537,10 @@ void dispatch_benchmark(std::string cmdline,
       ::benchmark::AddCustomContext(key, value);
     }
   }
-  const auto dataset_conf = conf.get_dataset_conf();
-  auto base_file          = combine_path(data_prefix, dataset_conf.base_file);
-  auto query_file         = combine_path(data_prefix, dataset_conf.query_file);
-  auto gt_file            = dataset_conf.groundtruth_neighbors_file;
-  if (gt_file.has_value()) { gt_file.emplace(combine_path(data_prefix, gt_file.value())); }
+  auto& dataset_conf = conf.get_dataset_conf();
+  auto base_file     = dataset_conf.base_file;
+  auto query_file    = dataset_conf.query_file;
+  auto gt_file       = dataset_conf.groundtruth_neighbors_file;
   auto dataset =
     std::make_shared<bench::dataset<T>>(dataset_conf.name,
                                         base_file,
@@ -555,7 +552,7 @@ void dispatch_benchmark(std::string cmdline,
                                         search_mode ? dataset_conf.filtering_rate : std::nullopt);
   ::benchmark::AddCustomContext("dataset", dataset_conf.name);
   ::benchmark::AddCustomContext("distance", dataset_conf.distance);
-  std::vector<configuration::index> indices = conf.get_indices();
+  std::vector<configuration::index>& indices = conf.get_indices();
   if (build_mode) {
     if (file_exists(base_file)) {
       log_info("Using the dataset file '%s'", base_file.c_str());
@@ -570,11 +567,11 @@ void dispatch_benchmark(std::string cmdline,
       for (auto param : apply_overrides(index.build_param, override_kv)) {
         auto modified_index        = index;
         modified_index.build_param = param;
-        modified_index.file        = combine_path(index_prefix, modified_index.file);
         more_indices.push_back(modified_index);
       }
     }
-    register_build<T>(dataset, more_indices, force_overwrite, no_lap_sync);
+    std::swap(more_indices, indices);  // update the config in case algorithms need to access it
+    register_build<T>(dataset, indices, force_overwrite, no_lap_sync);
   } else if (search_mode) {
     if (file_exists(query_file)) {
       log_info("Using the query file '%s'", query_file.c_str());
@@ -601,7 +598,6 @@ void dispatch_benchmark(std::string cmdline,
     }
     for (auto& index : indices) {
       index.search_params = apply_overrides(index.search_params, override_kv);
-      index.file          = combine_path(index_prefix, index.file);
     }
     register_search<T>(dataset, indices, metric_objective, threads, no_lap_sync);
   }
@@ -726,7 +722,7 @@ inline auto run_main(int argc, char** argv) -> int
     log_warn("cudart library is not found, GPU-based indices won't work.");
   }
 
-  configuration conf(conf_stream);
+  auto& conf        = bench::configuration::initialize(conf_stream, data_prefix, index_prefix);
   std::string dtype = conf.get_dataset_conf().dtype;
 
   if (dtype == "float") {
@@ -735,8 +731,6 @@ inline auto run_main(int argc, char** argv) -> int
                               force_overwrite,
                               build_mode,
                               search_mode,
-                              data_prefix,
-                              index_prefix,
                               override_kv,
                               metric_objective,
                               threads,
@@ -747,8 +741,6 @@ inline auto run_main(int argc, char** argv) -> int
                              force_overwrite,
                              build_mode,
                              search_mode,
-                             data_prefix,
-                             index_prefix,
                              override_kv,
                              metric_objective,
                              threads,
@@ -759,8 +751,6 @@ inline auto run_main(int argc, char** argv) -> int
                                      force_overwrite,
                                      build_mode,
                                      search_mode,
-                                     data_prefix,
-                                     index_prefix,
                                      override_kv,
                                      metric_objective,
                                      threads,
@@ -771,8 +761,6 @@ inline auto run_main(int argc, char** argv) -> int
                                     force_overwrite,
                                     build_mode,
                                     search_mode,
-                                    data_prefix,
-                                    index_prefix,
                                     override_kv,
                                     metric_objective,
                                     threads,

diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp
@@ -57,31 +57,53 @@ class configuration {
     std::optional<double> filtering_rate{std::nullopt};
   };
 
-  explicit inline configuration(std::istream& conf_stream)
+  [[nodiscard]] inline auto get_dataset_conf() const -> const dataset_conf&
+  {
+    return dataset_conf_;
+  }
+  [[nodiscard]] inline auto get_dataset_conf() -> dataset_conf& { return dataset_conf_; }
+  [[nodiscard]] inline auto get_indices() const -> const std::vector<index>& { return indices_; };
+  [[nodiscard]] inline auto get_indices() -> std::vector<index>& { return indices_; };
+
+  /** The benchmark initializes the configuration once and has a chance to modify it during the
+   * setup. */
+  static inline auto initialize(std::istream& conf_stream,
+                                std::string data_prefix,
+                                std::string index_prefix) -> configuration&
+  {
+    singleton_ =
+      std::unique_ptr<configuration>(new configuration{conf_stream, data_prefix, index_prefix});
+    return *singleton_;
+  }
+
+  /** Any algorithm can access the benchmark configuration as an immutable context. */
+  [[nodiscard]] static inline auto singleton() -> const configuration& { return *singleton_; }
+
+ private:
+  explicit inline configuration(std::istream& conf_stream,
+                                std::string data_prefix,
+                                std::string index_prefix)
   {
     // to enable comments in json
     auto conf = nlohmann::json::parse(conf_stream, nullptr, true, true);
 
-    parse_dataset(conf.at("dataset"));
-    parse_index(conf.at("index"), conf.at("search_basic_param"));
+    parse_dataset(conf.at("dataset"), data_prefix);
+    parse_index(conf.at("index"), conf.at("search_basic_param"), index_prefix);
   }
 
-  [[nodiscard]] inline auto get_dataset_conf() const -> dataset_conf { return dataset_conf_; }
-  [[nodiscard]] inline auto get_indices() const -> std::vector<index> { return indices_; };
-
- private:
-  inline void parse_dataset(const nlohmann::json& conf)
+  inline void parse_dataset(const nlohmann::json& conf, std::string data_prefix)
   {
     dataset_conf_.name       = conf.at("name");
-    dataset_conf_.base_file  = conf.at("base_file");
-    dataset_conf_.query_file = conf.at("query_file");
+    dataset_conf_.base_file  = combine_path(data_prefix, conf.at("base_file"));
+    dataset_conf_.query_file = combine_path(data_prefix, conf.at("query_file"));
     dataset_conf_.distance   = conf.at("distance");
     if (conf.contains("filtering_rate")) {
       dataset_conf_.filtering_rate.emplace(conf.at("filtering_rate"));
     }
 
     if (conf.contains("groundtruth_neighbors_file")) {
-      dataset_conf_.groundtruth_neighbors_file = conf.at("groundtruth_neighbors_file");
+      dataset_conf_.groundtruth_neighbors_file =
+        combine_path(data_prefix, conf.at("groundtruth_neighbors_file"));
     }
     if (conf.contains("subset_first_row")) {
       dataset_conf_.subset_first_row = conf.at("subset_first_row");
@@ -108,7 +130,9 @@ class configuration {
       }
     }
   }
-  inline void parse_index(const nlohmann::json& index_conf, const nlohmann::json& search_basic_conf)
+  inline void parse_index(const nlohmann::json& index_conf,
+                          const nlohmann::json& search_basic_conf,
+                          std::string index_prefix)
   {
     const int batch_size = search_basic_conf.at("batch_size");
     const int k          = search_basic_conf.at("k");
@@ -118,7 +142,7 @@ class configuration {
       index.name        = conf.at("name");
       index.algo        = conf.at("algo");
       index.build_param = conf.at("build_param");
-      index.file        = conf.at("file");
+      index.file        = combine_path(index_prefix, conf.at("file"));
       index.batch_size  = batch_size;
       index.k           = k;
 
@@ -147,6 +171,8 @@ class configuration {
 
   dataset_conf dataset_conf_;
   std::vector<index> indices_;
+
+  static inline std::unique_ptr<configuration> singleton_ = nullptr;
 };
 
 }  // namespace cuvs::bench
diff --git a/cpp/cmake/patches/faiss_override.json b/cpp/cmake/patches/faiss_override.json
@@ -1,9 +1,9 @@
 {
     "packages" : {
       "faiss" : {
-        "version": "1.7.4",
+        "version": "1.10.0",
         "git_url": "https://github.com/facebookresearch/faiss.git",
-        "git_tag": "main"
+        "git_tag": "v1.10.0"
       }
     }
   }
diff --git a/cpp/include/cuvs/neighbors/brute_force.h b/cpp/include/cuvs/neighbors/brute_force.h
@@ -68,8 +68,7 @@ cuvsError_t cuvsBruteForceIndexDestroy(cuvsBruteForceIndex_t index);
  *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
  *        or `kDLCPU`. Also, acceptable underlying types are:
  *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *        2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *        3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
@@ -120,7 +119,8 @@ cuvsError_t cuvsBruteForceBuild(cuvsResources_t res,
  *        It is also important to note that the BRUTEFORCE index must have been built
  *        with the same type of `queries`, such that `index.dtype.code ==
  *        queries.dl_tensor.dtype.code` Types for input are:
- *        1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ *        1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or
+ *          `kDLDataType.bits = 16`
  *        2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
  *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
  *

diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h
@@ -333,8 +333,9 @@ cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int* dim);
  *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
  *        or `kDLCPU`. Also, acceptable underlying types are:
  *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *        2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *        3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *        3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *        4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
@@ -421,8 +422,9 @@ cuvsError_t cuvsCagraExtend(cuvsResources_t res,
  * queries.dl_tensor.dtype.code` Types for input are:
  *        1. `queries`:
  *          a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *          b. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *          c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *          b. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *          c. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *          d. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
  *        2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
  *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
  *

diff --git a/cpp/include/cuvs/neighbors/ivf_pq.h b/cpp/include/cuvs/neighbors/ivf_pq.h
@@ -258,8 +258,9 @@ cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index);
  *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
  *        or `kDLCPU`. Also, acceptable underlying types are:
  *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *        2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *        3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *        3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *        4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
@@ -314,6 +315,7 @@ cuvsError_t cuvsIvfPqBuild(cuvsResources_t res,
  *        with the same type of `queries`, such that `index.dtype.code ==
  * queries.dl_tensor.dtype.code` Types for input are:
  *        1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ *            or `kDLDataType.bits = 16`
  *        2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
  *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
  *