Merge branch 'branch-25.02' into diskann-wrapper

rapidsai · Feb 5, 2025 · 5d95616 · 5d95616
2 parents 3b5e9e2 + 45703bf
commit 5d95616
Show file tree

Hide file tree

Showing 28 changed files with 882 additions and 46 deletions.
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -691,6 +691,7 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$<BOOL:${CUVS_NVTX}>:NVTX_ENAB
       src/neighbors/ivf_pq_c.cpp
       src/neighbors/cagra_c.cpp
       $<$<BOOL:${BUILD_CAGRA_HNSWLIB}>:src/neighbors/hnsw_c.cpp>
+      src/neighbors/nn_descent_c.cpp
       src/neighbors/refine/refine_c.cpp
       src/preprocessing/quantize/scalar_c.cpp
       src/distance/pairwise_distance_c.cpp

diff --git a/cpp/include/cuvs/neighbors/brute_force.h b/cpp/include/cuvs/neighbors/brute_force.h
@@ -68,8 +68,7 @@ cuvsError_t cuvsBruteForceIndexDestroy(cuvsBruteForceIndex_t index);
  *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
  *        or `kDLCPU`. Also, acceptable underlying types are:
  *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *        2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *        3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
@@ -120,7 +119,8 @@ cuvsError_t cuvsBruteForceBuild(cuvsResources_t res,
  *        It is also important to note that the BRUTEFORCE index must have been built
  *        with the same type of `queries`, such that `index.dtype.code ==
  *        queries.dl_tensor.dtype.code` Types for input are:
- *        1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ *        1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` or
+ *          `kDLDataType.bits = 16`
  *        2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
  *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
  *

diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h
@@ -333,8 +333,9 @@ cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int* dim);
  *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
  *        or `kDLCPU`. Also, acceptable underlying types are:
  *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *        2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *        3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *        3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *        4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
@@ -421,8 +422,9 @@ cuvsError_t cuvsCagraExtend(cuvsResources_t res,
  * queries.dl_tensor.dtype.code` Types for input are:
  *        1. `queries`:
  *          a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *          b. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *          c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *          b. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *          c. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *          d. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
  *        2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
  *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
  *

diff --git a/cpp/include/cuvs/neighbors/ivf_pq.h b/cpp/include/cuvs/neighbors/ivf_pq.h
@@ -258,8 +258,9 @@ cuvsError_t cuvsIvfPqIndexDestroy(cuvsIvfPqIndex_t index);
  *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
  *        or `kDLCPU`. Also, acceptable underlying types are:
  *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
- *        2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
- *        3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *        3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *        4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
  *
  * @code {.c}
  * #include <cuvs/core/c_api.h>
@@ -314,6 +315,7 @@ cuvsError_t cuvsIvfPqBuild(cuvsResources_t res,
  *        with the same type of `queries`, such that `index.dtype.code ==
  * queries.dl_tensor.dtype.code` Types for input are:
  *        1. `queries`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ *            or `kDLDataType.bits = 16`
  *        2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
  *        3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
  *

diff --git a/cpp/include/cuvs/neighbors/nn_descent.h b/cpp/include/cuvs/neighbors/nn_descent.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuvs/core/c_api.h>
+#include <cuvs/distance/distance.h>
+#include <dlpack/dlpack.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup nn_descent_c_index_params The nn-descent algorithm parameters.
+ * @{
+ */
+/**
+ * @brief Parameters used to build an nn-descent index
+ *
+ * `metric`: The distance metric to use
+ * `metric_arg`: The argument used by distance metrics like Minkowskidistance
+ * `graph_degree`: For an input dataset of dimensions (N, D),
+ * determines the final dimensions of the all-neighbors knn graph
+ * which turns out to be of dimensions (N, graph_degree)
+ * `intermediate_graph_degree`: Internally, nn-descent builds an
+ * all-neighbors knn graph of dimensions (N, intermediate_graph_degree)
+ * before selecting the final `graph_degree` neighbors. It's recommended
+ * that `intermediate_graph_degree` >= 1.5 * graph_degree
+ * `max_iterations`: The number of iterations that nn-descent will refine
+ * the graph for. More iterations produce a better quality graph at cost of performance
+ * `termination_threshold`: The delta at which nn-descent will terminate its iterations
+ */
+struct cuvsNNDescentIndexParams {
+  cuvsDistanceType metric;
+  float metric_arg;
+  size_t graph_degree;
+  size_t intermediate_graph_degree;
+  size_t max_iterations;
+  float termination_threshold;
+  bool return_distances;
+  size_t n_clusters;
+};
+
+typedef struct cuvsNNDescentIndexParams* cuvsNNDescentIndexParams_t;
+
+/**
+ * @brief Allocate NN-Descent Index params, and populate with default values
+ *
+ * @param[in] index_params cuvsNNDescentIndexParams_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsNNDescentIndexParamsCreate(cuvsNNDescentIndexParams_t* index_params);
+
+/**
+ * @brief De-allocate NN-Descent Index params
+ *
+ * @param[in] index_params
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsNNDescentIndexParamsDestroy(cuvsNNDescentIndexParams_t index_params);
+/**
+ * @}
+ */
+
+/**
+ * @defgroup nn_descent_c_index NN-Descent index
+ * @{
+ */
+/**
+ * @brief Struct to hold address of cuvs::neighbors::nn_descent::index and its active trained dtype
+ *
+ */
+typedef struct {
+  uintptr_t addr;
+  DLDataType dtype;
+} cuvsNNDescentIndex;
+
+typedef cuvsNNDescentIndex* cuvsNNDescentIndex_t;
+
+/**
+ * @brief Allocate NN-Descent index
+ *
+ * @param[in] index cuvsNNDescentIndex_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsNNDescentIndexCreate(cuvsNNDescentIndex_t* index);
+
+/**
+ * @brief De-allocate NN-Descent index
+ *
+ * @param[in] index cuvsNNDescentIndex_t to de-allocate
+ */
+cuvsError_t cuvsNNDescentIndexDestroy(cuvsNNDescentIndex_t index);
+/**
+ * @}
+ */
+
+/**
+ * @defgroup nn_descent_c_index_build NN-Descent index build
+ * @{
+ */
+/**
+ * @brief Build a NN-Descent index with a `DLManagedTensor` which has underlying
+ *        `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`,
+ *        or `kDLCPU`. Also, acceptable underlying types are:
+ *        1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ *        2. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 16`
+ *        3. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
+ *        4. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
+ *
+ * @code {.c}
+ * #include <cuvs/core/c_api.h>
+ * #include <cuvs/neighbors/nn_descent.h>
+ *
+ * // Create cuvsResources_t
+ * cuvsResources_t res;
+ * cuvsError_t res_create_status = cuvsResourcesCreate(&res);
+ *
+ * // Assume a populated `DLManagedTensor` type here
+ * DLManagedTensor dataset;
+ *
+ * // Create default index params
+ * cuvsNNDescentIndexParams_t index_params;
+ * cuvsError_t params_create_status = cuvsNNDescentIndexParamsCreate(&index_params);
+ *
+ * // Create NN-Descent index
+ * cuvsNNDescentIndex_t index;
+ * cuvsError_t index_create_status = cuvsNNDescentIndexCreate(&index);
+ *
+ * // Build the NN-Descent Index
+ * cuvsError_t build_status = cuvsNNDescentBuild(res, index_params, &dataset, index);
+ *
+ * // de-allocate `index_params`, `index` and `res`
+ * cuvsError_t params_destroy_status = cuvsNNDescentIndexParamsDestroy(index_params);
+ * cuvsError_t index_destroy_status = cuvsNNDescentIndexDestroy(index);
+ * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
+ * @endcode
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] index_params cuvsNNDescentIndexParams_t used to build NN-Descent index
+ * @param[in] dataset DLManagedTensor* training dataset on host or device memory
+ * @param[inout] graph Optional preallocated graph on host memory to store output
+ * @param[out] index cuvsNNDescentIndex_t Newly built NN-Descent index
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
+                               cuvsNNDescentIndexParams_t index_params,
+                               DLManagedTensor* dataset,
+                               DLManagedTensor* graph,
+                               cuvsNNDescentIndex_t index);
+/**
+ * @}
+ */
+
+/**
+ * @brief Get the KNN graph from a built NN-Descent index
+ *
+ * @param[in] index cuvsNNDescentIndex_t Built NN-Descent index
+ * @param[inout] graph Optional preallocated graph on host memory to store output
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsNNDescentIndexGetGraph(cuvsNNDescentIndex_t index, DLManagedTensor* graph);
+#ifdef __cplusplus
+}
+#endif
diff --git a/cpp/src/neighbors/brute_force_c.cpp b/cpp/src/neighbors/brute_force_c.cpp
@@ -33,7 +33,7 @@
 
 namespace {
 
-template <typename T, typename LayoutT = raft::row_major>
+template <typename T, typename LayoutT = raft::row_major, typename DistT = float>
 void* _build(cuvsResources_t res,
              DLManagedTensor* dataset_tensor,
              cuvsDistanceType metric,
@@ -49,11 +49,11 @@ void* _build(cuvsResources_t res,
   params.metric_arg = metric_arg;
 
   auto index_on_stack = cuvs::neighbors::brute_force::build(*res_ptr, params, mds);
-  auto index_on_heap  = new cuvs::neighbors::brute_force::index<T>(std::move(index_on_stack));
+  auto index_on_heap = new cuvs::neighbors::brute_force::index<T, DistT>(std::move(index_on_stack));
   return index_on_heap;
 }
 
-template <typename T, typename QueriesLayoutT = raft::row_major>
+template <typename T, typename QueriesLayoutT = raft::row_major, typename DistT = float>
 void _search(cuvsResources_t res,
              cuvsBruteForceIndex index,
              DLManagedTensor* queries_tensor,
@@ -62,11 +62,11 @@ void _search(cuvsResources_t res,
              cuvsFilter prefilter)
 {
   auto res_ptr   = reinterpret_cast<raft::resources*>(res);
-  auto index_ptr = reinterpret_cast<cuvs::neighbors::brute_force::index<T>*>(index.addr);
+  auto index_ptr = reinterpret_cast<cuvs::neighbors::brute_force::index<T, DistT>*>(index.addr);
 
   using queries_mdspan_type   = raft::device_matrix_view<T const, int64_t, QueriesLayoutT>;
   using neighbors_mdspan_type = raft::device_matrix_view<int64_t, int64_t, raft::row_major>;
-  using distances_mdspan_type = raft::device_matrix_view<float, int64_t, raft::row_major>;
+  using distances_mdspan_type = raft::device_matrix_view<DistT, int64_t, raft::row_major>;
   using prefilter_mds_type    = raft::device_vector_view<uint32_t, int64_t>;
   using prefilter_bmp_type    = cuvs::core::bitmap_view<uint32_t, int64_t>;
 
@@ -98,19 +98,19 @@ void _search(cuvsResources_t res,
   }
 }
 
-template <typename T>
+template <typename T, typename DistT = float>
 void _serialize(cuvsResources_t res, const char* filename, cuvsBruteForceIndex index)
 {
   auto res_ptr   = reinterpret_cast<raft::resources*>(res);
-  auto index_ptr = reinterpret_cast<cuvs::neighbors::brute_force::index<T>*>(index.addr);
+  auto index_ptr = reinterpret_cast<cuvs::neighbors::brute_force::index<T, DistT>*>(index.addr);
   cuvs::neighbors::brute_force::serialize(*res_ptr, std::string(filename), *index_ptr);
 }
 
-template <typename T>
+template <typename T, typename DistT = float>
 void* _deserialize(cuvsResources_t res, const char* filename)
 {
   auto res_ptr = reinterpret_cast<raft::resources*>(res);
-  auto index   = new cuvs::neighbors::brute_force::index<T>(*res_ptr);
+  auto index   = new cuvs::neighbors::brute_force::index<T, DistT>(*res_ptr);
   cuvs::neighbors::brute_force::deserialize(*res_ptr, std::string(filename), index);
   return index;
 }
@@ -126,14 +126,13 @@ extern "C" cuvsError_t cuvsBruteForceIndexDestroy(cuvsBruteForceIndex_t index_c_
   return cuvs::core::translate_exceptions([=] {
     auto index = *index_c_ptr;
 
-    if (index.dtype.code == kDLFloat) {
-      auto index_ptr = reinterpret_cast<cuvs::neighbors::brute_force::index<float>*>(index.addr);
+    if ((index.dtype.code == kDLFloat) && index.dtype.bits == 32) {
+      auto index_ptr =
+        reinterpret_cast<cuvs::neighbors::brute_force::index<float, float>*>(index.addr);
       delete index_ptr;
-    } else if (index.dtype.code == kDLInt) {
-      auto index_ptr = reinterpret_cast<cuvs::neighbors::brute_force::index<int8_t>*>(index.addr);
-      delete index_ptr;
-    } else if (index.dtype.code == kDLUInt) {
-      auto index_ptr = reinterpret_cast<cuvs::neighbors::brute_force::index<uint8_t>*>(index.addr);
+    } else if ((index.dtype.code == kDLFloat) && index.dtype.bits == 16) {
+      auto index_ptr =
+        reinterpret_cast<cuvs::neighbors::brute_force::index<half, float>*>(index.addr);
       delete index_ptr;
     }
     delete index_c_ptr;
@@ -148,6 +147,7 @@ extern "C" cuvsError_t cuvsBruteForceBuild(cuvsResources_t res,
 {
   return cuvs::core::translate_exceptions([=] {
     auto dataset = dataset_tensor->dl_tensor;
+    index->dtype = dataset.dtype;
 
     if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) {
       if (cuvs::core::is_c_contiguous(dataset_tensor)) {
@@ -159,7 +159,16 @@ extern "C" cuvsError_t cuvsBruteForceBuild(cuvsResources_t res,
       } else {
         RAFT_FAIL("dataset input to cuvsBruteForceBuild must be contiguous (non-strided)");
       }
-      index->dtype = dataset.dtype;
+    } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 16) {
+      if (cuvs::core::is_c_contiguous(dataset_tensor)) {
+        index->addr =
+          reinterpret_cast<uintptr_t>(_build<half>(res, dataset_tensor, metric, metric_arg));
+      } else if (cuvs::core::is_f_contiguous(dataset_tensor)) {
+        index->addr = reinterpret_cast<uintptr_t>(
+          _build<half, raft::col_major>(res, dataset_tensor, metric, metric_arg));
+      } else {
+        RAFT_FAIL("dataset input to cuvsBruteForceBuild must be contiguous (non-strided)");
+      }
     } else {
       RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
                 dataset.dtype.code,
@@ -204,6 +213,15 @@ extern "C" cuvsError_t cuvsBruteForceSearch(cuvsResources_t res,
       } else {
         RAFT_FAIL("queries input to cuvsBruteForceSearch must be contiguous (non-strided)");
       }
+    } else if (queries.dtype.code == kDLFloat && queries.dtype.bits == 16) {
+      if (cuvs::core::is_c_contiguous(queries_tensor)) {
+        _search<half>(res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter);
+      } else if (cuvs::core::is_f_contiguous(queries_tensor)) {
+        _search<half, raft::col_major>(
+          res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter);
+      } else {
+        RAFT_FAIL("queries input to cuvsBruteForceSearch must be contiguous (non-strided)");
+      }
     } else {
       RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d",
                 queries.dtype.code,
@@ -228,6 +246,9 @@ extern "C" cuvsError_t cuvsBruteForceDeserialize(cuvsResources_t res,
     if (dtype.kind == 'f' && dtype.itemsize == 4) {
       index->dtype.code = kDLFloat;
       index->addr       = reinterpret_cast<uintptr_t>(_deserialize<float>(res, filename));
+    } else if (dtype.kind == 'f' && dtype.itemsize == 2) {
+      index->dtype.code = kDLFloat;
+      index->addr       = reinterpret_cast<uintptr_t>(_deserialize<half>(res, filename));
     } else {
       RAFT_FAIL("Unsupported index dtype: %d and bits: %d", index->dtype.code, index->dtype.bits);
     }
@@ -241,6 +262,8 @@ extern "C" cuvsError_t cuvsBruteForceSerialize(cuvsResources_t res,
   return cuvs::core::translate_exceptions([=] {
     if (index->dtype.code == kDLFloat && index->dtype.bits == 32) {
       _serialize<float>(res, filename, *index);
+    } else if (index->dtype.code == kDLFloat && index->dtype.bits == 16) {
+      _serialize<half>(res, filename, *index);
     } else {
       RAFT_FAIL("Unsupported index dtype: %d and bits: %d", index->dtype.code, index->dtype.bits);
     }