Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into cudf-polars-multi-groupby
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora authored Jan 29, 2025
2 parents f5205bd + 43fc535 commit a7cd29f
Show file tree
Hide file tree
Showing 164 changed files with 2,039 additions and 833 deletions.
12 changes: 6 additions & 6 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ java/ @rapidsai/cudf-java-codeowners
#CI code owners
/.github/ @rapidsai/ci-codeowners
/ci/ @rapidsai/ci-codeowners
/.pre-commit-config.yaml @rapidsai/ci-codeowners

#packaging code owners
/.devcontainer/ @rapidsai/packaging-codeowners
/conda/ @rapidsai/packaging-codeowners
/dependencies.yaml @rapidsai/packaging-codeowners
/build.sh @rapidsai/packaging-codeowners
pyproject.toml @rapidsai/packaging-codeowners
/.pre-commit-config.yaml @rapidsai/packaging-codeowners
/.devcontainer/ @rapidsai/packaging-codeowners
/conda/ @rapidsai/packaging-codeowners
dependencies.yaml @rapidsai/packaging-codeowners
/build.sh @rapidsai/packaging-codeowners
pyproject.toml @rapidsai/packaging-codeowners
8 changes: 4 additions & 4 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
package-name: libcudf
package-type: cpp
wheel-build-pylibcudf:
needs: [wheel-publish-libcudf]
needs: [wheel-build-libcudf]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand All @@ -111,7 +111,7 @@ jobs:
package-name: pylibcudf
package-type: python
wheel-build-cudf:
needs: wheel-publish-pylibcudf
needs: wheel-build-pylibcudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand All @@ -132,7 +132,7 @@ jobs:
package-name: cudf
package-type: python
wheel-build-dask-cudf:
needs: wheel-publish-cudf
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand All @@ -155,7 +155,7 @@ jobs:
package-name: dask_cudf
package-type: python
wheel-build-cudf-polars:
needs: wheel-publish-pylibcudf
needs: wheel-build-pylibcudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand Down
23 changes: 11 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,6 @@ repos:
"python/cudf_polars/cudf_polars",
"python/dask_cudf/dask_cudf"]
pass_filenames: false
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.9.1
hooks:
- id: nbqa-isort
# Use the cudf_kafka isort orderings in notebooks so that dask
# and RAPIDS packages have their own sections.
args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.6
hooks:
Expand Down Expand Up @@ -153,15 +146,13 @@ repos:
^CHANGELOG.md$
)
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.0
rev: v0.9.3
hooks:
- id: ruff
args: ["--fix"]
files: python/.*$
- id: ruff-format
files: python/.*$
- repo: https://github.com/rapidsai/pre-commit-hooks
rev: v0.4.0
rev: v0.5.0
hooks:
- id: verify-copyright
exclude: |
Expand All @@ -172,11 +163,19 @@ repos:
cpp/cmake/Modules/FindCUDAToolkit[.]cmake$
)
- id: verify-alpha-spec
- id: verify-codeowners
args: [--fix, --project-prefix=cudf]
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.16.0
rev: v1.17.0
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.10.0.1
hooks:
- id: shellcheck
args: ["--severity=warning"]
files: ^ci/

default_language_version:
python: python3
11 changes: 7 additions & 4 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

set -euo pipefail

export RAPIDS_VERSION="$(rapids-version)"
export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
RAPIDS_VERSION="$(rapids-version)"
RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
export RAPIDS_VERSION
export RAPIDS_VERSION_MAJOR_MINOR

rapids-logger "Create test conda environment"
. /opt/conda/etc/profile.d/conda.sh
Expand Down Expand Up @@ -33,15 +35,16 @@ rapids-mamba-retry install \
"cudf=${RAPIDS_VERSION}" \
"dask-cudf=${RAPIDS_VERSION}"

export RAPIDS_DOCS_DIR="$(mktemp -d)"
RAPIDS_DOCS_DIR="$(mktemp -d)"
export RAPIDS_DOCS_DIR

EXITCODE=0
trap "EXITCODE=1" ERR
set +e

rapids-logger "Build CPP docs"
pushd cpp/doxygen
aws s3 cp s3://rapidsai-docs/librmm/html/${RAPIDS_VERSION_MAJOR_MINOR}/rmm.tag . || echo "Failed to download rmm Doxygen tag"
aws s3 cp s3://rapidsai-docs/librmm/html/"${RAPIDS_VERSION_MAJOR_MINOR}"/rmm.tag . || echo "Failed to download rmm Doxygen tag"
doxygen Doxyfile
mkdir -p "${RAPIDS_DOCS_DIR}/libcudf/html"
mv html/* "${RAPIDS_DOCS_DIR}/libcudf/html"
Expand Down
4 changes: 2 additions & 2 deletions ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

set -euo pipefail

package_dir="python/cudf"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"

# Downloads libcudf and pylibcudf wheels from this current build,
# then ensures 'cudf' wheel builds always use the 'libcudf' and 'pylibcudf' just built in the same CI run.
Expand Down
4 changes: 2 additions & 2 deletions ci/build_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -8,5 +8,5 @@ package_dir="python/cudf_polars"
./ci/build_wheel.sh cudf-polars ${package_dir}
./ci/validate_wheel.sh ${package_dir} dist

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
4 changes: 2 additions & 2 deletions ci/build_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -8,5 +8,5 @@ package_dir="python/dask_cudf"
./ci/build_wheel.sh dask-cudf ${package_dir}
./ci/validate_wheel.sh ${package_dir} dist

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 python ${package_dir}/dist
2 changes: 1 addition & 1 deletion ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ set -euo pipefail
package_name="libcudf"
package_dir="python/libcudf"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"

rapids-logger "Generating build requirements"

Expand Down
4 changes: 2 additions & 2 deletions ci/build_wheel_pylibcudf.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

set -euo pipefail

package_dir="python/pylibcudf"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"

# Downloads libcudf wheel from this current build,
# then ensures 'pylibcudf' wheel builds always use the 'libcudf' just built in the same CI run.
Expand Down
6 changes: 3 additions & 3 deletions ci/check_style.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -20,8 +20,8 @@ RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

FORMAT_FILE_URL="https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION_MAJOR_MINOR}/cmake-format-rapids-cmake.json"
export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
mkdir -p "$(dirname "${RAPIDS_CMAKE_FORMAT_FILE}")"
wget -O ${RAPIDS_CMAKE_FORMAT_FILE} "${FORMAT_FILE_URL}"

# Run pre-commit checks
pre-commit run --all-files --show-diff-on-failure
16 changes: 9 additions & 7 deletions ci/checks/doxygen.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
###############################
# cuDF doxygen warnings check #
###############################
Expand All @@ -14,25 +14,27 @@ fi
function version { echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'; }

# doxygen supported version 1.9.1
DOXYGEN_VERSION=`doxygen --version`
if [ ! $(version "$DOXYGEN_VERSION") -eq $(version "1.9.1") ] ; then
DOXYGEN_VERSION=$(doxygen --version)
if [ ! "$(version "$DOXYGEN_VERSION")" -eq "$(version "1.9.1")" ] ; then
echo -e "warning: Unsupported doxygen version $DOXYGEN_VERSION"
echo -e "Expecting doxygen version 1.9.1"
exit 0
fi

# Set variables for doxygen
# We can't use gha-tools' rapids-version and rapids-version-major-minor here because this script can run outside of CI
export RAPIDS_VERSION="$(sed -E -e "s/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2.\3/" VERSION)"
export RAPIDS_VERSION_MAJOR_MINOR="$(sed -E -e "s/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2/" VERSION)"
RAPIDS_VERSION="$(sed -E -e "s/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2.\3/" VERSION)"
RAPIDS_VERSION_MAJOR_MINOR="$(sed -E -e "s/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2/" VERSION)"
export RAPIDS_VERSION
export RAPIDS_VERSION_MAJOR_MINOR

# Run doxygen, ignore missing tag files error
TAG_ERROR1="error: Tag file '.*.tag' does not exist or is not a file. Skipping it..."
TAG_ERROR2="error: cannot open tag file .*.tag for writing"
DOXYGEN_STDERR=`cd cpp/doxygen && { cat Doxyfile ; echo QUIET = YES; echo GENERATE_HTML = NO; } | doxygen - 2>&1 | sed "/\($TAG_ERROR1\|$TAG_ERROR2\)/d"`
DOXYGEN_STDERR=$(cd cpp/doxygen && { cat Doxyfile ; echo QUIET = YES; echo GENERATE_HTML = NO; } | doxygen - 2>&1 | sed "/\($TAG_ERROR1\|$TAG_ERROR2\)/d")
RETVAL=$?

if [ "$RETVAL" != "0" ] || [ ! -z "$DOXYGEN_STDERR" ]; then
if [ "$RETVAL" != "0" ] || [ -n "$DOXYGEN_STDERR" ]; then
echo -e "$DOXYGEN_STDERR"
RETVAL=1 #because return value is not generated by doxygen 1.8.20
fi
Expand Down
4 changes: 1 addition & 3 deletions ci/cpp_linters.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -20,8 +20,6 @@ set +u
conda activate clang_tidy
set -u

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

source rapids-configure-sccache

# Run the build via CMake, which will run clang-tidy when CUDF_STATIC_LINTERS is enabled.
Expand Down
26 changes: 18 additions & 8 deletions ci/cudf_pandas_scripts/fetch_pandas_versions.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,34 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

import argparse

import requests
from packaging.version import Version
from packaging.specifiers import SpecifierSet
import argparse
from packaging.version import Version


def get_pandas_versions(pandas_range):
url = "https://pypi.org/pypi/pandas/json"
response = requests.get(url)
data = response.json()
versions = [Version(v) for v in data['releases']]
versions = [Version(v) for v in data["releases"]]
specifier = SpecifierSet(pandas_range.lstrip("pandas"))
matching_versions = [v for v in versions if v in specifier]
matching_minors = sorted(set(".".join((str(v.major), str(v.minor))) for v in matching_versions), key=Version)
matching_minors = sorted(
set(".".join((str(v.major), str(v.minor))) for v in matching_versions),
key=Version,
)
return matching_minors


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Filter pandas versions by prefix.")
parser.add_argument("pandas_range", type=str, help="The version prefix to filter by.")
parser = argparse.ArgumentParser(
description="Filter pandas versions by prefix."
)
parser.add_argument(
"pandas_range", type=str, help="The version prefix to filter by."
)
args = parser.parse_args()

versions = get_pandas_versions(args.pandas_range)
print(','.join(versions))
print(",".join(versions))
5 changes: 2 additions & 3 deletions ci/cudf_pandas_scripts/pandas-tests/diff.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -13,7 +13,6 @@ rapids-logger "Github job name: ${GH_JOB_NAME}"
rapids-logger "Rapids version: ${RAPIDS_FULL_VERSION}"

PY_VER="310"
MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py${PY_VER}.main-${RAPIDS_FULL_VERSION}-results.json
PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py${PY_VER}.pr-${RAPIDS_FULL_VERSION}-results.json

rapids-logger "Fetching latest available results from nightly"
Expand All @@ -22,7 +21,7 @@ COMPARE_ENV=$(tail -n 1 s3_output.txt)
rapids-logger "Latest available results from nightly: ${COMPARE_ENV}"

aws s3 cp "s3://rapids-downloads/${COMPARE_ENV}" main-results.json
aws s3 cp $PR_ARTIFACT pr-results.json
aws s3 cp "$PR_ARTIFACT" pr-results.json

# Compute the diff and prepare job summary:
python -m pip install pandas tabulate
Expand Down
30 changes: 22 additions & 8 deletions ci/cudf_pandas_scripts/pandas-tests/job-summary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

Expand Down Expand Up @@ -68,17 +68,27 @@ def emoji_failed(x):
pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
total_usage = main_df["_slow_function_call"] + main_df["_fast_function_call"]
main_df["CPU Usage"] = ((main_df["_slow_function_call"] / total_usage) * 100.0).round(1)
main_df["GPU Usage"] = ((main_df["_fast_function_call"] / total_usage) * 100.0).round(1)
main_df["CPU Usage"] = (
(main_df["_slow_function_call"] / total_usage) * 100.0
).round(1)
main_df["GPU Usage"] = (
(main_df["_fast_function_call"] / total_usage) * 100.0
).round(1)

total_usage = pr_df["_slow_function_call"] + pr_df["_fast_function_call"]
pr_df["CPU Usage"] = ((pr_df["_slow_function_call"] / total_usage) * 100.0).round(1)
pr_df["GPU Usage"] = ((pr_df["_fast_function_call"] / total_usage) * 100.0).round(1)
pr_df["CPU Usage"] = (
(pr_df["_slow_function_call"] / total_usage) * 100.0
).round(1)
pr_df["GPU Usage"] = (
(pr_df["_fast_function_call"] / total_usage) * 100.0
).round(1)

cpu_usage_mean = pr_df["CPU Usage"].mean().round(2)
gpu_usage_mean = pr_df["GPU Usage"].mean().round(2)

gpu_usage_rate_change = abs(pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean())
gpu_usage_rate_change = abs(
pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean()
)
pr_df["CPU Usage"] = pr_df["CPU Usage"].fillna(0)
pr_df["GPU Usage"] = pr_df["GPU Usage"].fillna(0)
main_df["CPU Usage"] = main_df["CPU Usage"].fillna(0)
Expand All @@ -92,8 +102,12 @@ def emoji_failed(x):
pr_df["CPU Usage"] = pr_df["CPU Usage"].astype(str) + "%"
pr_df["GPU Usage"] = pr_df["GPU Usage"].astype(str) + "%"

pr_df = pr_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
diff_df = diff_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
pr_df = pr_df[
["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]
]
diff_df = diff_df[
["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]
]
diff_df.columns = diff_df.columns + "_diff"
diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed)
Expand Down
Loading

0 comments on commit a7cd29f

Please sign in to comment.