Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into nvcomp-error-msgs
Browse files Browse the repository at this point in the history
  • Loading branch information
vuule authored Jan 30, 2025
2 parents 92e2a00 + 5c179d1 commit b31931c
Show file tree
Hide file tree
Showing 12 changed files with 92 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,55 +3,65 @@
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "12.5",
"CUDA": "12.8",
"PYTHON_PACKAGE_MANAGER": "conda",
"BASE": "rapidsai/devcontainers:25.04-cpp-mambaforge-ubuntu22.04"
}
},
"runArgs": [
"--rm",
"--name",
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.5-conda"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.8-conda"
],
"hostRequirements": {"gpu": "optional"},
"hostRequirements": {
"gpu": "optional"
},
"features": {
"ghcr.io/rapidsai/devcontainers/features/cuda:25.4": {
"version": "12.5",
"installCompilers": false,
"installProfilers": true,
"installDevPackages": false,
"installcuDNN": false,
"installcuTensor": false,
"installNCCL": false,
"installCUDARuntime": false,
"installNVRTC": false,
"installOpenCL": false,
"installcuBLAS": false,
"installcuSPARSE": false,
"installcuFFT": false,
"installcuFile": false,
"installcuRAND": false,
"installcuSOLVER": false,
"installNPP": false,
"installnvJPEG": false,
"pruneStaticLibs": true
},
"version": "12.8",
"installCompilers": false,
"installProfilers": true,
"installDevPackages": false,
"installcuDNN": false,
"installcuTensor": false,
"installNCCL": false,
"installCUDARuntime": false,
"installNVRTC": false,
"installOpenCL": false,
"installcuBLAS": false,
"installcuSPARSE": false,
"installcuFFT": false,
"installcuFile": false,
"installcuRAND": false,
"installcuSOLVER": false,
"installNPP": false,
"installnvJPEG": false,
"pruneStaticLibs": true
},
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.4": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/cuda",
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.8-envs}"
],
"postAttachCommand": [
"/bin/bash",
"-c",
"if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"
],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cudf,type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.5-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
"source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,42 @@
"context": "${localWorkspaceFolder}/.devcontainer",
"dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
"args": {
"CUDA": "12.5",
"CUDA": "12.8",
"PYTHON_PACKAGE_MANAGER": "pip",
"BASE": "rapidsai/devcontainers:25.04-cpp-cuda12.5-ubuntu22.04"
"BASE": "rapidsai/devcontainers:25.04-cpp-cuda12.8-ubuntu22.04"
}
},
"runArgs": [
"--rm",
"--name",
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.5-pip"
"${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-25.04-cuda12.8-pip"
],
"hostRequirements": {"gpu": "optional"},
"hostRequirements": {
"gpu": "optional"
},
"features": {
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:25.4": {}
},
"overrideFeatureInstallOrder": [
"ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
],
"initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs}"],
"postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
"initializeCommand": [
"/bin/bash",
"-c",
"mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs}"
],
"postAttachCommand": [
"/bin/bash",
"-c",
"if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"
],
"workspaceFolder": "/home/coder",
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cudf,type=bind,consistency=consistent",
"mounts": [
"source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
"source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.5-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
"source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
],
"customizations": {
"vscode": {
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ jobs:
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
node_type: "cpu16"
cpp-linters:
secrets: inherit
needs: checks
Expand Down Expand Up @@ -227,6 +228,7 @@ jobs:
# build for every combination of arch and CUDA version, but only for the latest Python
matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
build_type: pull-request
node_type: "cpu16"
script: "ci/build_wheel_libcudf.sh"
wheel-build-pylibcudf:
needs: [checks, wheel-build-libcudf]
Expand Down Expand Up @@ -302,8 +304,9 @@ jobs:
needs: telemetry-setup
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
node_type: "cpu32"
arch: '["amd64"]'
cuda: '["12.5"]'
cuda: '["12.8"]'
build_command: |
sccache -z;
build-all -DBUILD_BENCHMARKS=ON --verbose;
Expand Down Expand Up @@ -336,7 +339,7 @@ jobs:
needs: pandas-tests
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
node_type: cpu4
node_type: "cpu4"
build_type: pull-request
run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"

Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ Instructions for a minimal build environment without conda are included below.
# create the conda environment (assuming in base `cudf` directory)
# note: RAPIDS currently doesn't support `channel_priority: strict`;
# use `channel_priority: flexible` instead
conda env create --name cudf_dev --file conda/environments/all_cuda-125_arch-x86_64.yaml
conda env create --name cudf_dev --file conda/environments/all_cuda-128_arch-x86_64.yaml
# activate the environment
conda activate cudf_dev
```
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge

```bash
conda install -c rapidsai -c conda-forge -c nvidia \
cudf=25.04 python=3.12 cuda-version=12.5
cudf=25.04 python=3.12 cuda-version=12.8
```

We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies:
- cuda-nvtx-dev
- cuda-python>=12.6.2,<13.0a0
- cuda-sanitizer-api
- cuda-version=12.5
- cuda-version=12.8
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.3
Expand Down Expand Up @@ -99,4 +99,4 @@ dependencies:
- transformers==4.39.3
- typing_extensions>=4.0.0
- zlib>=1.2.13
name: all_cuda-125_arch-x86_64
name: all_cuda-128_arch-x86_64
18 changes: 11 additions & 7 deletions cpp/src/groupby/hash/compute_aggregations.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -67,13 +67,17 @@ rmm::device_uvector<cudf::size_type> compute_aggregations(
auto const grid_size =
max_occupancy_grid_size<typename SetType::ref_type<cuco::insert_and_find_tag>>(num_rows);
auto const available_shmem_size = get_available_shared_memory_size(grid_size);
auto const has_sufficient_shmem =
available_shmem_size > (compute_shmem_offsets_size(flattened_values.num_columns()) * 2);
auto const has_dictionary_request = std::any_of(
requests.begin(), requests.end(), [](cudf::groupby::aggregation_request const& request) {
return cudf::is_dictionary(request.values.type());
auto const offsets_buffer_size = compute_shmem_offsets_size(flattened_values.num_columns()) * 2;
auto const data_buffer_size = available_shmem_size - offsets_buffer_size;
auto const is_shared_memory_compatible = std::all_of(
requests.begin(), requests.end(), [&](cudf::groupby::aggregation_request const& request) {
if (cudf::is_dictionary(request.values.type())) { return false; }
// Ensure there is enough buffer space to store local aggregations up to the max cardinality
// for shared memory aggregations
auto const size = cudf::type_dispatcher<cudf::dispatch_storage_type>(request.values.type(),
size_of_functor{});
return static_cast<size_type>(data_buffer_size) >= (size * GROUPBY_CARDINALITY_THRESHOLD);
});
auto const is_shared_memory_compatible = !has_dictionary_request and has_sufficient_shmem;

// Performs naive global memory aggregations when the workload is not compatible with shared
// memory, such as when aggregating dictionary columns or when there is insufficient dynamic
Expand Down
11 changes: 1 addition & 10 deletions cpp/src/groupby/hash/compute_shared_memory_aggs.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,15 +35,6 @@

namespace cudf::groupby::detail::hash {
namespace {
/// Functor used by type dispatcher returning the size of the underlying C++ type
struct size_of_functor {
template <typename T>
__device__ constexpr cudf::size_type operator()()
{
return sizeof(T);
}
};

/// Shared memory data alignment
CUDF_HOST_DEVICE cudf::size_type constexpr ALIGNMENT = 8;

Expand Down
11 changes: 10 additions & 1 deletion cpp/src/groupby/hash/single_pass_functors.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,6 +25,15 @@
#include <cuda/std/cstddef>

namespace cudf::groupby::detail::hash {
/// Functor used by type dispatcher returning the size of the underlying C++ type
struct size_of_functor {
template <typename T>
CUDF_HOST_DEVICE constexpr cudf::size_type operator()()
{
return sizeof(T);
}
};

// TODO: TO BE REMOVED issue tracked via #17171
template <typename T, cudf::aggregation::Kind k>
__device__ constexpr bool is_supported()
Expand Down
6 changes: 5 additions & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ files:
all:
output: conda
matrix:
cuda: ["11.8", "12.5"]
cuda: ["11.8", "12.8"]
arch: [x86_64]
includes:
# Note that clang-tidy is not included here because cudf's preferred
Expand Down Expand Up @@ -525,6 +525,10 @@ dependencies:
cuda: "12.5"
packages:
- cuda-version=12.5
- matrix:
cuda: "12.8"
packages:
- cuda-version=12.8
cuda:
specific:
- output_types: conda
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,9 @@ dependencies:
packages:
- cuda-version=12.5
- matrix:
cuda: "12"
cuda: "12.8"
packages:
- cuda-version=12.5
- cuda-version=12.8
py_version:
specific:
- output_types: conda
Expand Down
2 changes: 1 addition & 1 deletion python/libcudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ select = [
]

# PyPI limit is 600 MiB, fail CI before we get too close to that
max_allowed_size_compressed = '525M'
max_allowed_size_compressed = '575M'

[tool.scikit-build]
build-dir = "build/{wheel_tag}"
Expand Down

0 comments on commit b31931c

Please sign in to comment.