Skip to content

Commit

Permalink
Merge branch 'fea-cuvs-bench-tests' of github.com:dantegd/cuvs into f…
Browse files Browse the repository at this point in the history
…ea-cuvs-bench-tests
  • Loading branch information
dantegd committed Feb 5, 2025
2 parents 36a6094 + fd35d75 commit d4d21a1
Show file tree
Hide file tree
Showing 21 changed files with 839 additions and 63 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.8.0-ubuntu24.04-py3.12"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
rust-build:
needs: conda-cpp-build
Expand Down
4 changes: 4 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,9 @@ if(BUILD_SHARED_LIBS)
src/neighbors/cagra_search_int8.cu
src/neighbors/cagra_search_uint8.cu
src/neighbors/detail/cagra/compute_distance.cu
src/neighbors/detail/cagra/compute_distance_standard_BitwiseHamming_u8_uint32_dim128_t8.cu
src/neighbors/detail/cagra/compute_distance_standard_BitwiseHamming_u8_uint32_dim256_t16.cu
src/neighbors/detail/cagra/compute_distance_standard_BitwiseHamming_u8_uint32_dim512_t32.cu
src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim128_t8.cu
src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim256_t16.cu
src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim512_t32.cu
Expand Down Expand Up @@ -469,6 +472,7 @@ if(BUILD_SHARED_LIBS)
src/neighbors/vamana_serialize_uint8.cu
src/neighbors/vamana_serialize_int8.cu
src/preprocessing/quantize/scalar.cu
src/preprocessing/quantize/binary.cu
src/selection/select_k_float_int64_t.cu
src/selection/select_k_float_int32_t.cu
src/selection/select_k_float_uint32_t.cu
Expand Down
2 changes: 2 additions & 0 deletions cpp/include/cuvs/distance/distance.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ typedef enum {
RusselRaoExpanded = 18,
/** Dice-Sorensen distance **/
DiceExpanded = 19,
/** Bitstring Hamming distance **/
BitwiseHamming = 20,
/** Precomputed (special value) **/
Precomputed = 100
} cuvsDistanceType;
Expand Down
170 changes: 170 additions & 0 deletions cpp/include/cuvs/preprocessing/quantize/binary.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/core/device_mdarray.hpp>
#include <raft/core/device_mdspan.hpp>
#include <raft/core/handle.hpp>
#include <raft/core/host_mdarray.hpp>
#include <raft/core/host_mdspan.hpp>

#include <cuda_fp16.h>

namespace cuvs::preprocessing::quantize::binary {

/**
* @defgroup binary Binary quantizer utilities
* @{
*/

/**
* @brief Applies binary quantization transform to given dataset. If a dataset element is positive,
* set the corresponding bit to 1.
*
* Usage example:
* @code{.cpp}
* raft::handle_t handle;
* auto quantized_dataset = raft::make_device_matrix<uint8_t, int64_t>(handle, samples,
* features); cuvs::preprocessing::quantize::binary::transform(handle, dataset,
* quantized_dataset.view());
* @endcode
*
* @param[in] res raft resource
* @param[in] dataset a row-major matrix view on device
* @param[out] out a row-major matrix view on device
*
*/
void transform(raft::resources const& res,
raft::device_matrix_view<const double, int64_t> dataset,
raft::device_matrix_view<uint8_t, int64_t> out);

/**
* @brief Applies binary quantization transform to given dataset. If a dataset element is positive,
* set the corresponding bit to 1.
*
* Usage example:
* @code{.cpp}
* raft::handle_t handle;
* auto quantized_dataset = raft::make_host_matrix<uint8_t, int64_t>(handle, samples,
* features); cuvs::preprocessing::quantize::binary::transform(handle, dataset,
* quantized_dataset.view());
* @endcode
*
* @param[in] res raft resource
* @param[in] dataset a row-major matrix view on host
* @param[out] out a row-major matrix view on host
*
*/
void transform(raft::resources const& res,
raft::host_matrix_view<const double, int64_t> dataset,
raft::host_matrix_view<uint8_t, int64_t> out);

/**
* @brief Applies binary quantization transform to given dataset. If a dataset element is positive,
* set the corresponding bit to 1.
*
* Usage example:
* @code{.cpp}
* raft::handle_t handle;
* raft::device_matrix<float, uint64_t> dataset = read_dataset(filename);
* int64_t quantized_dim = raft::div_rounding_up_safe(dataset.extent(1), sizeof(uint8_t) * 8);
* auto quantized_dataset = raft::make_device_matrix<uint8_t, int64_t>(
* handle, dataset.extent(0), quantized_dim);
* cuvs::preprocessing::quantize::binary::transform(handle, dataset, quantized_dataset.view());
* @endcode
*
* @param[in] res raft resource
* @param[in] dataset a row-major matrix view on device
* @param[out] out a row-major matrix view on device
*
*/
void transform(raft::resources const& res,
raft::device_matrix_view<const float, int64_t> dataset,
raft::device_matrix_view<uint8_t, int64_t> out);

/**
* @brief Applies binary quantization transform to given dataset. If a dataset element is positive,
* set the corresponding bit to 1.
*
* Usage example:
* @code{.cpp}
* raft::handle_t handle;
* raft::host_matrix<float, uint64_t> dataset = read_dataset(filename);
* int64_t quantized_dim = raft::div_rounding_up_safe(dataset.extent(1), sizeof(uint8_t) * 8);
* auto quantized_dataset = raft::make_host_matrix<uint8_t, int64_t>(
* handle, dataset.extent(0), quantized_dim);
* cuvs::preprocessing::quantize::binary::transform(handle, dataset, quantized_dataset.view());
* @endcode
*
* @param[in] res raft resource
* @param[in] dataset a row-major matrix view on host
* @param[out] out a row-major matrix view on host
*
*/
void transform(raft::resources const& res,
raft::host_matrix_view<const float, int64_t> dataset,
raft::host_matrix_view<uint8_t, int64_t> out);

/**
* @brief Applies binary quantization transform to given dataset. If a dataset element is positive,
* set the corresponding bit to 1.
*
* Usage example:
* @code{.cpp}
* raft::handle_t handle;
* raft::device_matrix<half, uint64_t> dataset = read_dataset(filename);
* int64_t quantized_dim = raft::div_rounding_up_safe(dataset.extent(1), sizeof(uint8_t) * 8);
* auto quantized_dataset = raft::make_device_matrix<uint8_t, int64_t>(
* handle, dataset.extent(0), quantized_dim);
* cuvs::preprocessing::quantize::binary::transform(handle, dataset, quantized_dataset.view());
* @endcode
*
* @param[in] res raft resource
* @param[in] dataset a row-major matrix view on device
* @param[out] out a row-major matrix view on device
*
*/
void transform(raft::resources const& res,
raft::device_matrix_view<const half, int64_t> dataset,
raft::device_matrix_view<uint8_t, int64_t> out);

/**
* @brief Applies binary quantization transform to given dataset. If a dataset element is positive,
* set the corresponding bit to 1.
*
* Usage example:
* @code{.cpp}
* raft::handle_t handle;
* raft::host_matrix<half, uint64_t> dataset = read_dataset(filename);
* int64_t quantized_dim = raft::div_rounding_up_safe(dataset.extent(1), sizeof(uint8_t) * 8);
* auto quantized_dataset = raft::make_host_matrix<uint8_t, int64_t>(
* handle, dataset.extent(0), quantized_dim);
* cuvs::preprocessing::quantize::binary::transform(handle, dataset, quantized_dataset.view());
* @endcode
*
* @param[in] res raft resource
* @param[in] dataset a row-major matrix view on host
* @param[out] out a row-major matrix view on host
*
*/
void transform(raft::resources const& res,
raft::host_matrix_view<const half, int64_t> dataset,
raft::host_matrix_view<uint8_t, int64_t> out);

/** @} */ // end of group binary

} // namespace cuvs::preprocessing::quantize::binary
16 changes: 12 additions & 4 deletions cpp/internal/cuvs_internal/neighbors/naive_knn.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,27 @@ RAFT_KERNEL naive_distance_kernel(EvalT* dist,
for (IdxT i = 0; i < k; ++i) {
IdxT xidx = i + midx * k;
IdxT yidx = i + nidx * k;
auto xv = EvalT(x[xidx]);
auto yv = EvalT(y[yidx]);
auto xv = x[xidx];
auto yv = y[yidx];
switch (metric) {
case cuvs::distance::DistanceType::InnerProduct: {
acc += xv * yv;
acc += static_cast<EvalT>(xv) * static_cast<EvalT>(yv);
} break;
case cuvs::distance::DistanceType::CosineExpanded: {
acc += static_cast<EvalT>(xv) * static_cast<EvalT>(yv);
} break;
case cuvs::distance::DistanceType::L2SqrtExpanded:
case cuvs::distance::DistanceType::L2SqrtUnexpanded:
case cuvs::distance::DistanceType::L2Expanded:
case cuvs::distance::DistanceType::L2Unexpanded: {
auto diff = xv - yv;
auto diff = static_cast<EvalT>(xv) - static_cast<EvalT>(yv);
acc += diff * diff;
} break;
case cuvs::distance::DistanceType::BitwiseHamming: {
if constexpr (std::is_same_v<uint8_t, DataT>) {
acc += __popc(static_cast<uint32_t>(xv ^ yv) & 0xff);
}
} break;
default: break;
}
}
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/neighbors/detail/cagra/cagra_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,11 @@ index<T, IdxT> build(
knn_build_params = cagra::graph_build_params::ivf_pq_params(dataset.extents(), params.metric);
}
}
RAFT_EXPECTS(
params.metric != BitwiseHamming ||
std::holds_alternative<cagra::graph_build_params::iterative_search_params>(knn_build_params),
"IVF_PQ and NN_DESCENT for CAGRA graph build do not support BitwiseHamming as a metric. Please "
"use the iterative CAGRA search build.");

auto cagra_graph = raft::make_host_matrix<IdxT, int64_t>(0, 0);

Expand Down
28 changes: 26 additions & 2 deletions cpp/src/neighbors/detail/cagra/compute_distance-ext.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,24 @@ extern template struct vpq_descriptor_spec<DistanceType::L2Expanded,
uint8_t,
uint32_t,
float>;
extern template struct standard_descriptor_spec<DistanceType::BitwiseHamming,
8,
128,
uint8_t,
uint32_t,
float>;
extern template struct standard_descriptor_spec<DistanceType::BitwiseHamming,
16,
256,
uint8_t,
uint32_t,
float>;
extern template struct standard_descriptor_spec<DistanceType::BitwiseHamming,
32,
512,
uint8_t,
uint32_t,
float>;

extern template struct instance_selector<
standard_descriptor_spec<DistanceType::L2Expanded, 8, 128, float, uint32_t, float>,
Expand Down Expand Up @@ -441,7 +459,10 @@ extern template struct instance_selector<
standard_descriptor_spec<DistanceType::L2Expanded, 32, 512, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::InnerProduct, 32, 512, uint8_t, uint32_t, float>,
vpq_descriptor_spec<DistanceType::L2Expanded, 32, 512, 8, 2, half, uint8_t, uint32_t, float>,
vpq_descriptor_spec<DistanceType::L2Expanded, 32, 512, 8, 4, half, uint8_t, uint32_t, float>>;
vpq_descriptor_spec<DistanceType::L2Expanded, 32, 512, 8, 4, half, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::BitwiseHamming, 8, 128, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::BitwiseHamming, 16, 256, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::BitwiseHamming, 32, 512, uint8_t, uint32_t, float>>;

using descriptor_instances = instance_selector<
standard_descriptor_spec<DistanceType::L2Expanded, 8, 128, float, uint32_t, float>,
Expand Down Expand Up @@ -491,7 +512,10 @@ using descriptor_instances = instance_selector<
standard_descriptor_spec<DistanceType::L2Expanded, 32, 512, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::InnerProduct, 32, 512, uint8_t, uint32_t, float>,
vpq_descriptor_spec<DistanceType::L2Expanded, 32, 512, 8, 2, half, uint8_t, uint32_t, float>,
vpq_descriptor_spec<DistanceType::L2Expanded, 32, 512, 8, 4, half, uint8_t, uint32_t, float>>;
vpq_descriptor_spec<DistanceType::L2Expanded, 32, 512, 8, 4, half, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::BitwiseHamming, 8, 128, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::BitwiseHamming, 16, 256, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::BitwiseHamming, 32, 512, uint8_t, uint32_t, float>>;

template <typename DataT, typename IndexT, typename DistanceT, typename DatasetT>
auto dataset_descriptor_init(const cagra::search_params& params,
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/neighbors/detail/cagra/compute_distance.cu
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ template struct instance_selector<
standard_descriptor_spec<DistanceType::L2Expanded, 32, 512, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::InnerProduct, 32, 512, uint8_t, uint32_t, float>,
vpq_descriptor_spec<DistanceType::L2Expanded, 32, 512, 8, 2, half, uint8_t, uint32_t, float>,
vpq_descriptor_spec<DistanceType::L2Expanded, 32, 512, 8, 4, half, uint8_t, uint32_t, float>>;
vpq_descriptor_spec<DistanceType::L2Expanded, 32, 512, 8, 4, half, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::BitwiseHamming, 8, 128, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::BitwiseHamming, 16, 256, uint8_t, uint32_t, float>,
standard_descriptor_spec<DistanceType::BitwiseHamming, 32, 512, uint8_t, uint32_t, float>>;

} // namespace cuvs::neighbors::cagra::detail
18 changes: 18 additions & 0 deletions cpp/src/neighbors/detail/cagra/compute_distance_00_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,24 @@
f.write(template.format(includes=includes, content=content))
cmake_list.append(f" src/neighbors/detail/cagra/{path}")

# CAGRA (Binary Hamming distance)
for (mxdim, team) in mxdim_team:
metric = 'BitwiseHamming'
type_path = 'u8_uint32'
idx_t = 'uint32_t'
distance_t = 'float'
data_t = 'uint8_t'

path = f"compute_distance_standard_{metric}_{type_path}_dim{mxdim}_t{team}.cu"
includes = '#include "compute_distance_standard-impl.cuh"'
params = f"{metric_prefix}{metric}, {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}"
spec = f"standard_descriptor_spec<{params}>"
content = f"""template struct {spec};"""
specs.append(spec)
with open(path, "w") as f:
f.write(template.format(includes=includes, content=content))
cmake_list.append(f" src/neighbors/detail/cagra/{path}")

with open("compute_distance-ext.cuh", "w") as f:
includes = '''
#pragma once
Expand Down
Loading

0 comments on commit d4d21a1

Please sign in to comment.