Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into rhdong/bf-bitset
Browse files Browse the repository at this point in the history
  • Loading branch information
rhdong authored Jan 30, 2025
2 parents 3ecccfb + 4ca47c9 commit 6cc5059
Show file tree
Hide file tree
Showing 93 changed files with 26,437 additions and 362 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,6 @@ ivf_pq_index
# cuvs_bench
datasets/
/*.json

# java
.classpath
15 changes: 13 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@ ARGS=$*
# scripts, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcuvs python rust docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
VALIDARGS="clean libcuvs python rust java docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<tool>] [--limit-tests=<targets>] [--limit-bench-ann=<targets>] [--build-metrics=<filename>]
where <target> is:
clean - remove all existing build artifacts and configuration (start over)
libcuvs - build the cuvs C++ code only. Also builds the C-wrapper library
around the C++ code.
python - build the cuvs Python package
rust - build the cuvs Rust bindings
java - build the cuvs Java bindings
docs - build the documentation
tests - build the tests
bench-ann - build end-to-end ann benchmarks
Expand Down Expand Up @@ -61,7 +62,8 @@ SPHINX_BUILD_DIR=${REPODIR}/docs
DOXYGEN_BUILD_DIR=${REPODIR}/cpp/doxygen
PYTHON_BUILD_DIR=${REPODIR}/python/cuvs/_skbuild
RUST_BUILD_DIR=${REPODIR}/rust/target
BUILD_DIRS="${LIBCUVS_BUILD_DIR} ${PYTHON_BUILD_DIR} ${RUST_BUILD_DIR}"
JAVA_BUILD_DIR=${REPODIR}/java/cuvs-java/target
BUILD_DIRS="${LIBCUVS_BUILD_DIR} ${PYTHON_BUILD_DIR} ${RUST_BUILD_DIR} ${JAVA_BUILD_DIR}"

# Set defaults for vars modified by flags to this script
CMAKE_LOG_LEVEL=""
Expand Down Expand Up @@ -445,6 +447,15 @@ if (( ${NUMARGS} == 0 )) || hasArg rust; then
cargo test
fi

# Build the cuvs Java bindings
if (( ${NUMARGS} == 0 )) || hasArg java; then
if ! hasArg libcuvs; then
echo "Please add 'libcuvs' to this script's arguments (ex. './build.sh libcuvs java') if libcuvs libraries are not already built"
fi
cd ${REPODIR}/java
./build.sh
fi

export RAPIDS_VERSION="$(sed -E -e 's/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2.\3/' "${REPODIR}/VERSION")"
export RAPIDS_VERSION_MAJOR_MINOR="$(sed -E -e 's/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2/' "${REPODIR}/VERSION")"

Expand Down
8 changes: 8 additions & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ NEXT_UCXX_SHORT_TAG="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG
# Need to distutils-normalize the original version
NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))")
NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_UCXX_SHORT_TAG}'))")
PATCH_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_PATCH}'))")

echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"

Expand Down Expand Up @@ -96,3 +97,10 @@ find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r
sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
sed_runner "s@rapids-\${localWorkspaceFolderBasename}-${CURRENT_SHORT_TAG}@rapids-\${localWorkspaceFolderBasename}-${NEXT_SHORT_TAG}@g" "${filename}"
done

# Update Java API version
NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}"
sed_runner "s/VERSION=\".*\"/VERSION=\"${NEXT_FULL_JAVA_TAG}\"/g" java/build.sh
for FILE in java/*/pom.xml; do
sed_runner "/<!--CUVS_JAVA#VERSION_UPDATE_MARKER_START-->.*<!--CUVS_JAVA#VERSION_UPDATE_MARKER_END-->/s//<!--CUVS_JAVA#VERSION_UPDATE_MARKER_START--><version>${NEXT_FULL_JAVA_TAG}<\/version><!--CUVS_JAVA#VERSION_UPDATE_MARKER_END-->/g" "${FILE}"
done
9 changes: 8 additions & 1 deletion cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

#include "cuvs_cagra_wrapper.h"
#include <cuvs/neighbors/hnsw.hpp>
#include <raft/core/logger.hpp>

#include <chrono>
#include <memory>

namespace cuvs::bench {
Expand Down Expand Up @@ -90,8 +92,13 @@ void cuvs_cagra_hnswlib<T, IdxT>::build(const T* dataset, size_t nrow)
auto host_dataset_view = raft::make_host_matrix_view<const T, int64_t>(dataset, nrow, this->dim_);
auto opt_dataset_view =
std::optional<raft::host_matrix_view<const T, int64_t>>(std::move(host_dataset_view));
hnsw_index_ = cuvs::neighbors::hnsw::from_cagra(
const auto start_clock = std::chrono::system_clock::now();
hnsw_index_ = cuvs::neighbors::hnsw::from_cagra(
handle_, build_param_.hnsw_index_params, *cagra_index, opt_dataset_view);
int time =
std::chrono::duration_cast<std::chrono::seconds>(std::chrono::system_clock::now() - start_clock)
.count();
RAFT_LOG_DEBUG("Graph saved to HNSW format in %d:%d min", time / 60, time % 60);
}

template <typename T, typename IdxT>
Expand Down
66 changes: 48 additions & 18 deletions cpp/include/cuvs/neighbors/cagra.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1599,11 +1599,16 @@ void deserialize(raft::resources const& handle,
* @param[in] handle the raft handle
* @param[in] os output stream
* @param[in] index CAGRA index
* @param[in] dataset [optional] host array that stores the dataset, required if the index
* does not contain the dataset.
*
*/
void serialize_to_hnswlib(raft::resources const& handle,
std::ostream& os,
const cuvs::neighbors::cagra::index<float, uint32_t>& index);
void serialize_to_hnswlib(
raft::resources const& handle,
std::ostream& os,
const cuvs::neighbors::cagra::index<float, uint32_t>& index,
std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>> dataset =
std::nullopt);

/**
* Save a CAGRA build index in hnswlib base-layer-only serialized format
Expand All @@ -1628,11 +1633,16 @@ void serialize_to_hnswlib(raft::resources const& handle,
* @param[in] handle the raft handle
* @param[in] filename the file name for saving the index
* @param[in] index CAGRA index
* @param[in] dataset [optional] host array that stores the dataset, required if the index
* does not contain the dataset.
*
*/
void serialize_to_hnswlib(raft::resources const& handle,
const std::string& filename,
const cuvs::neighbors::cagra::index<float, uint32_t>& index);
void serialize_to_hnswlib(
raft::resources const& handle,
const std::string& filename,
const cuvs::neighbors::cagra::index<float, uint32_t>& index,
std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>> dataset =
std::nullopt);

/**
* Write the CAGRA built index as a base layer HNSW index to an output stream
Expand All @@ -1656,11 +1666,16 @@ void serialize_to_hnswlib(raft::resources const& handle,
* @param[in] handle the raft handle
* @param[in] os output stream
* @param[in] index CAGRA index
* @param[in] dataset [optional] host array that stores the dataset, required if the index
* does not contain the dataset.
*
*/
void serialize_to_hnswlib(raft::resources const& handle,
std::ostream& os,
const cuvs::neighbors::cagra::index<int8_t, uint32_t>& index);
void serialize_to_hnswlib(
raft::resources const& handle,
std::ostream& os,
const cuvs::neighbors::cagra::index<int8_t, uint32_t>& index,
std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>> dataset =
std::nullopt);

/**
* Save a CAGRA build index in hnswlib base-layer-only serialized format
Expand All @@ -1685,11 +1700,16 @@ void serialize_to_hnswlib(raft::resources const& handle,
* @param[in] handle the raft handle
* @param[in] filename the file name for saving the index
* @param[in] index CAGRA index
* @param[in] dataset [optional] host array that stores the dataset, required if the index
* does not contain the dataset.
*
*/
void serialize_to_hnswlib(raft::resources const& handle,
const std::string& filename,
const cuvs::neighbors::cagra::index<int8_t, uint32_t>& index);
void serialize_to_hnswlib(
raft::resources const& handle,
const std::string& filename,
const cuvs::neighbors::cagra::index<int8_t, uint32_t>& index,
std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>> dataset =
std::nullopt);

/**
* Write the CAGRA built index as a base layer HNSW index to an output stream
Expand All @@ -1713,11 +1733,16 @@ void serialize_to_hnswlib(raft::resources const& handle,
* @param[in] handle the raft handle
* @param[in] os output stream
* @param[in] index CAGRA index
* @param[in] dataset [optional] host array that stores the dataset, required if the index
* does not contain the dataset.
*
*/
void serialize_to_hnswlib(raft::resources const& handle,
std::ostream& os,
const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& index);
void serialize_to_hnswlib(
raft::resources const& handle,
std::ostream& os,
const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& index,
std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>> dataset =
std::nullopt);

/**
* Save a CAGRA build index in hnswlib base-layer-only serialized format
Expand All @@ -1742,11 +1767,16 @@ void serialize_to_hnswlib(raft::resources const& handle,
* @param[in] handle the raft handle
* @param[in] filename the file name for saving the index
* @param[in] index CAGRA index
* @param[in] dataset [optional] host array that stores the dataset, required if the index
* does not contain the dataset.
*
*/
void serialize_to_hnswlib(raft::resources const& handle,
const std::string& filename,
const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& index);
void serialize_to_hnswlib(
raft::resources const& handle,
const std::string& filename,
const cuvs::neighbors::cagra::index<uint8_t, uint32_t>& index,
std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>> dataset =
std::nullopt);

/**
* @}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cuvs/neighbors/hnsw.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ enum cuvsHnswHierarchy {

struct cuvsHnswIndexParams {
/* hierarchy of the hnsw index */
cuvsHnswHierarchy hierarchy;
enum cuvsHnswHierarchy hierarchy;
/** Size of the candidate list during hierarchy construction when hierarchy is `CPU`*/
int ef_construction;
/** Number of host threads to use to construct hierarchy when hierarchy is `CPU`
Expand Down
95 changes: 50 additions & 45 deletions cpp/src/neighbors/cagra_serialize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,51 +20,56 @@

namespace cuvs::neighbors::cagra {

#define CUVS_INST_CAGRA_SERIALIZE(DTYPE) \
void serialize(raft::resources const& handle, \
const std::string& filename, \
const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index, \
bool include_dataset) \
{ \
cuvs::neighbors::cagra::detail::serialize<DTYPE, uint32_t>( \
handle, filename, index, include_dataset); \
}; \
\
void deserialize(raft::resources const& handle, \
const std::string& filename, \
cuvs::neighbors::cagra::index<DTYPE, uint32_t>* index) \
{ \
cuvs::neighbors::cagra::detail::deserialize<DTYPE, uint32_t>(handle, filename, index); \
}; \
void serialize(raft::resources const& handle, \
std::ostream& os, \
const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index, \
bool include_dataset) \
{ \
cuvs::neighbors::cagra::detail::serialize<DTYPE, uint32_t>( \
handle, os, index, include_dataset); \
} \
\
void deserialize(raft::resources const& handle, \
std::istream& is, \
cuvs::neighbors::cagra::index<DTYPE, uint32_t>* index) \
{ \
cuvs::neighbors::cagra::detail::deserialize<DTYPE, uint32_t>(handle, is, index); \
} \
\
void serialize_to_hnswlib(raft::resources const& handle, \
std::ostream& os, \
const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index) \
{ \
cuvs::neighbors::cagra::detail::serialize_to_hnswlib<DTYPE, uint32_t>(handle, os, index); \
} \
\
void serialize_to_hnswlib(raft::resources const& handle, \
const std::string& filename, \
const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index) \
{ \
cuvs::neighbors::cagra::detail::serialize_to_hnswlib<DTYPE, uint32_t>( \
handle, filename, index); \
#define CUVS_INST_CAGRA_SERIALIZE(DTYPE) \
void serialize(raft::resources const& handle, \
const std::string& filename, \
const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index, \
bool include_dataset) \
{ \
cuvs::neighbors::cagra::detail::serialize<DTYPE, uint32_t>( \
handle, filename, index, include_dataset); \
}; \
\
void deserialize(raft::resources const& handle, \
const std::string& filename, \
cuvs::neighbors::cagra::index<DTYPE, uint32_t>* index) \
{ \
cuvs::neighbors::cagra::detail::deserialize<DTYPE, uint32_t>(handle, filename, index); \
}; \
void serialize(raft::resources const& handle, \
std::ostream& os, \
const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index, \
bool include_dataset) \
{ \
cuvs::neighbors::cagra::detail::serialize<DTYPE, uint32_t>( \
handle, os, index, include_dataset); \
} \
\
void deserialize(raft::resources const& handle, \
std::istream& is, \
cuvs::neighbors::cagra::index<DTYPE, uint32_t>* index) \
{ \
cuvs::neighbors::cagra::detail::deserialize<DTYPE, uint32_t>(handle, is, index); \
} \
\
void serialize_to_hnswlib( \
raft::resources const& handle, \
std::ostream& os, \
const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index, \
std::optional<raft::host_matrix_view<const DTYPE, int64_t, raft::row_major>> dataset) \
{ \
cuvs::neighbors::cagra::detail::serialize_to_hnswlib<DTYPE, uint32_t>( \
handle, os, index, dataset); \
} \
\
void serialize_to_hnswlib( \
raft::resources const& handle, \
const std::string& filename, \
const cuvs::neighbors::cagra::index<DTYPE, uint32_t>& index, \
std::optional<raft::host_matrix_view<const DTYPE, int64_t, raft::row_major>> dataset) \
{ \
cuvs::neighbors::cagra::detail::serialize_to_hnswlib<DTYPE, uint32_t>( \
handle, filename, index, dataset); \
}

} // namespace cuvs::neighbors::cagra
Loading

0 comments on commit 6cc5059

Please sign in to comment.