Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into streams-final
Browse files Browse the repository at this point in the history
  • Loading branch information
shrshi authored Jan 29, 2025
2 parents cf8fc92 + 39bcd16 commit 77e3ae4
Show file tree
Hide file tree
Showing 66 changed files with 767 additions and 332 deletions.
13 changes: 2 additions & 11 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ci:
autoupdate_branch: ""
autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
autoupdate_schedule: quarterly
skip: ["verify-alpha-spec", "nbqa-isort"]
skip: ["verify-alpha-spec"]
submodules: false

repos:
Expand Down Expand Up @@ -41,13 +41,6 @@ repos:
"python/cudf_polars/cudf_polars",
"python/dask_cudf/dask_cudf"]
pass_filenames: false
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.9.1
hooks:
- id: nbqa-isort
# Use the cudf_kafka isort orderings in notebooks so that dask
# and RAPIDS packages have their own sections.
args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v16.0.6
hooks:
Expand Down Expand Up @@ -153,13 +146,11 @@ repos:
^CHANGELOG.md$
)
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.0
rev: v0.9.3
hooks:
- id: ruff
args: ["--fix"]
files: python/.*$
- id: ruff-format
files: python/.*$
- repo: https://github.com/rapidsai/pre-commit-hooks
rev: v0.4.0
hooks:
Expand Down
26 changes: 18 additions & 8 deletions ci/cudf_pandas_scripts/fetch_pandas_versions.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,34 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

import argparse

import requests
from packaging.version import Version
from packaging.specifiers import SpecifierSet
import argparse
from packaging.version import Version


def get_pandas_versions(pandas_range):
url = "https://pypi.org/pypi/pandas/json"
response = requests.get(url)
data = response.json()
versions = [Version(v) for v in data['releases']]
versions = [Version(v) for v in data["releases"]]
specifier = SpecifierSet(pandas_range.lstrip("pandas"))
matching_versions = [v for v in versions if v in specifier]
matching_minors = sorted(set(".".join((str(v.major), str(v.minor))) for v in matching_versions), key=Version)
matching_minors = sorted(
set(".".join((str(v.major), str(v.minor))) for v in matching_versions),
key=Version,
)
return matching_minors


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Filter pandas versions by prefix.")
parser.add_argument("pandas_range", type=str, help="The version prefix to filter by.")
parser = argparse.ArgumentParser(
description="Filter pandas versions by prefix."
)
parser.add_argument(
"pandas_range", type=str, help="The version prefix to filter by."
)
args = parser.parse_args()

versions = get_pandas_versions(args.pandas_range)
print(','.join(versions))
print(",".join(versions))
30 changes: 22 additions & 8 deletions ci/cudf_pandas_scripts/pandas-tests/job-summary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

Expand Down Expand Up @@ -68,17 +68,27 @@ def emoji_failed(x):
pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
total_usage = main_df["_slow_function_call"] + main_df["_fast_function_call"]
main_df["CPU Usage"] = ((main_df["_slow_function_call"] / total_usage) * 100.0).round(1)
main_df["GPU Usage"] = ((main_df["_fast_function_call"] / total_usage) * 100.0).round(1)
main_df["CPU Usage"] = (
(main_df["_slow_function_call"] / total_usage) * 100.0
).round(1)
main_df["GPU Usage"] = (
(main_df["_fast_function_call"] / total_usage) * 100.0
).round(1)

total_usage = pr_df["_slow_function_call"] + pr_df["_fast_function_call"]
pr_df["CPU Usage"] = ((pr_df["_slow_function_call"] / total_usage) * 100.0).round(1)
pr_df["GPU Usage"] = ((pr_df["_fast_function_call"] / total_usage) * 100.0).round(1)
pr_df["CPU Usage"] = (
(pr_df["_slow_function_call"] / total_usage) * 100.0
).round(1)
pr_df["GPU Usage"] = (
(pr_df["_fast_function_call"] / total_usage) * 100.0
).round(1)

cpu_usage_mean = pr_df["CPU Usage"].mean().round(2)
gpu_usage_mean = pr_df["GPU Usage"].mean().round(2)

gpu_usage_rate_change = abs(pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean())
gpu_usage_rate_change = abs(
pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean()
)
pr_df["CPU Usage"] = pr_df["CPU Usage"].fillna(0)
pr_df["GPU Usage"] = pr_df["GPU Usage"].fillna(0)
main_df["CPU Usage"] = main_df["CPU Usage"].fillna(0)
Expand All @@ -92,8 +102,12 @@ def emoji_failed(x):
pr_df["CPU Usage"] = pr_df["CPU Usage"].astype(str) + "%"
pr_df["GPU Usage"] = pr_df["GPU Usage"].astype(str) + "%"

pr_df = pr_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
diff_df = diff_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
pr_df = pr_df[
["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]
]
diff_df = diff_df[
["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]
]
diff_df.columns = diff_df.columns + "_diff"
diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed)
Expand Down
34 changes: 18 additions & 16 deletions ci/utils/nbtestlog2junitxml.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
# Generate a junit-xml file from parsing a nbtest log

import re
from xml.etree.ElementTree import Element, ElementTree
from os import path
import string
from enum import Enum

from os import path
from xml.etree.ElementTree import Element, ElementTree

startingPatt = re.compile(r"^STARTING: ([\w\.\-]+)$")
skippingPatt = re.compile(r"^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$")
skippingPatt = re.compile(
r"^SKIPPING: ([\w\.\-]+)\s*(\(([\w\.\-\ \,]+)\))?\s*$"
)
exitCodePatt = re.compile(r"^EXIT CODE: (\d+)$")
folderPatt = re.compile(r"^FOLDER: ([\w\.\-]+)$")
timePatt = re.compile(r"^real\s+([\d\.ms]+)$")
Expand Down Expand Up @@ -37,12 +38,8 @@ def makeFailureElement(outputLines):


def setFileNameAttr(attrDict, fileName):
attrDict.update(file=fileName,
classname="",
line="",
name="",
time=""
)
attrDict.update(file=fileName, classname="", line="", name="", time="")


def setClassNameAttr(attrDict, className):
attrDict["classname"] = className
Expand Down Expand Up @@ -76,11 +73,12 @@ def parseLog(logFile, testSuiteElement):
testSuiteElement.attrib["timestamp"] = ""

attrDict = {}
#setFileNameAttr(attrDict, logFile)
# setFileNameAttr(attrDict, logFile)
setFileNameAttr(attrDict, "nbtest")

parserStateEnum = Enum("parserStateEnum",
"newTest startingLine finishLine exitCode")
parserStateEnum = Enum(
"parserStateEnum", "newTest startingLine finishLine exitCode"
)
parserState = parserStateEnum.newTest

testOutput = ""
Expand All @@ -98,7 +96,9 @@ def parseLog(logFile, testSuiteElement):
setTimeAttr(attrDict, "0m0s")
skippedElement = makeTestCaseElement(attrDict)
message = m.group(3) or ""
skippedElement.append(Element("skipped", message=message, type=""))
skippedElement.append(
Element("skipped", message=message, type="")
)
testSuiteElement.append(skippedElement)
incrNumAttr(testSuiteElement, "skipped")
incrNumAttr(testSuiteElement, "tests")
Expand Down Expand Up @@ -160,4 +160,6 @@ def parseLog(logFile, testSuiteElement):
testSuiteElement = Element("testsuite", name="nbtest", hostname="")
parseLog(sys.argv[1], testSuiteElement)
testSuitesElement.append(testSuiteElement)
ElementTree(testSuitesElement).write(sys.argv[1]+".xml", xml_declaration=True)
ElementTree(testSuitesElement).write(
sys.argv[1] + ".xml", xml_declaration=True
)
5 changes: 4 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,10 @@ ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp)
# ##################################################################################################
# * rolling benchmark
# ---------------------------------------------------------------------------------
ConfigureNVBench(ROLLING_NVBENCH rolling/grouped_rolling_sum.cpp rolling/rolling_sum.cpp)
ConfigureNVBench(
ROLLING_NVBENCH rolling/grouped_range_rolling_sum.cu rolling/grouped_rolling_sum.cpp
rolling/range_rolling_sum.cu rolling/rolling_sum.cpp
)

add_custom_target(
run_benchmarks
Expand Down
11 changes: 5 additions & 6 deletions cpp/benchmarks/io/parquet/parquet_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ void BM_parquet_read_data(nvbench::state& state,
void BM_parquet_read_long_strings(nvbench::state& state)
{
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const run_length = static_cast<cudf::size_type>(state.get_int64("run_length"));

auto const d_type = get_type_or_group(static_cast<int32_t>(data_type::STRING));
auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
Expand All @@ -106,14 +105,15 @@ void BM_parquet_read_long_strings(nvbench::state& state)

auto const avg_string_length = static_cast<cudf::size_type>(state.get_int64("avg_string_length"));
// corresponds to 3 sigma (full width 6 sigma: 99.7% of range)
auto const half_width = static_cast<cudf::size_type>(state.get_int64("half_width_string_length"));
auto const half_width =
avg_string_length >> 3; // 32 +/- 4, 128 +/- 16, 1024 +/- 128, 8k +/- 1k, etc.
auto const length_min = avg_string_length - half_width;
auto const length_max = avg_string_length + half_width;

data_profile profile =
data_profile_builder()
.cardinality(cardinality)
.avg_run_length(run_length)
.avg_run_length(1)
.distribution(data_type::STRING, distribution_id::NORMAL, length_min, length_max);

auto const num_rows_written = [&]() {
Expand Down Expand Up @@ -414,6 +414,5 @@ NVBENCH_BENCH(BM_parquet_read_long_strings)
.add_string_axis("io_type", {"DEVICE_BUFFER"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32})
.add_int64_axis("avg_string_length", {16, 48, 96})
.add_int64_axis("half_width_string_length", {16, 32, 64}); // length = avg +/- half_width
.add_int64_power_of_two_axis("avg_string_length",
nvbench::range(4, 16, 2)); // 16, 64, ... -> 64k
135 changes: 135 additions & 0 deletions cpp/benchmarks/rolling/grouped_range_rolling_sum.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>

#include <cudf/aggregation.hpp>
#include <cudf/binaryop.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/rolling.hpp>
#include <cudf/rolling/range_window_bounds.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/sorting.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/memory_resource.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/iterator/counting_iterator.h>
#include <thrust/tabulate.h>

#include <nvbench/nvbench.cuh>

#include <algorithm>
#include <cstdint>
#include <optional>

void bench_grouped_range_rolling_sum(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
// Configurable parameter is window range.
// Since orderby column is approximately equally spaced at unit
// intervals, this approximately controls the number of entries in
// the window.
auto const preceding_range = cudf::numeric_scalar<cudf::size_type>{
static_cast<cudf::size_type>(state.get_int64("preceding_range") * 1000), true};
auto const following_range = cudf::numeric_scalar<cudf::size_type>{
static_cast<cudf::size_type>(state.get_int64("preceding_range") * 1000), true};
auto const has_nulls = static_cast<bool>(state.get_int64("has_nulls"));

auto vals = [&] {
data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
cudf::type_to_id<std::int32_t>(), distribution_id::UNIFORM, 0, 100);
return create_random_column(cudf::type_to_id<std::int32_t>(), row_count{num_rows}, profile);
}();
auto const keys = [&] {
data_profile const profile =
data_profile_builder()
.cardinality(cardinality)
.no_validity()
.distribution(cudf::type_to_id<cudf::size_type>(), distribution_id::UNIFORM, 0, num_rows);
auto keys =
create_random_column(cudf::type_to_id<cudf::size_type>(), row_count{num_rows}, profile);
return cudf::sort(cudf::table_view{{keys->view()}});
}();
auto orderby = [&] {
auto seq =
cudf::make_numeric_column(cudf::data_type{cudf::type_to_id<cudf::size_type>()}, num_rows);
// Equally spaced rows separated by 1000 unit intervals
thrust::tabulate(
rmm::exec_policy(cudf::get_default_stream()),
seq->mutable_view().begin<cudf::size_type>(),
seq->mutable_view().end<cudf::size_type>(),
[] __device__(cudf::size_type i) { return static_cast<cudf::size_type>(i) * 1000; });
// Add some units of noise
data_profile profile = data_profile_builder().cardinality(0).distribution(
cudf::type_to_id<cudf::duration_ms>(), distribution_id::NORMAL, -2000, 2000);
profile.set_null_probability(has_nulls ? std::optional<double>{400.0 / num_rows}
: std::nullopt);
auto noise =
create_random_column(cudf::type_to_id<cudf::size_type>(), row_count{num_rows}, profile);
auto result =
cudf::binary_operation(seq->view(), noise->view(), cudf::binary_operator::ADD, seq->type());
auto columns = cudf::sort_by_key(cudf::table_view{{result->view()}},
cudf::table_view{{keys->get_column(0).view(), result->view()}},
{cudf::order::ASCENDING, cudf::order::ASCENDING},
{cudf::null_order::AFTER, cudf::null_order::AFTER})
->release();
return std::move(columns[0]);
}();

auto req = cudf::make_sum_aggregation<cudf::rolling_aggregation>();

auto const mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto const result =
cudf::grouped_range_rolling_window(keys->view(),
orderby->view(),
cudf::order::ASCENDING,
vals->view(),
cudf::range_window_bounds::get(preceding_range),
cudf::range_window_bounds::get(following_range),
1,
*req);
});
auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
state.add_element_count(static_cast<double>(num_rows) / elapsed_time / 1'000'000., "Mrows/s");
state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
}

NVBENCH_BENCH(bench_grouped_range_rolling_sum)
.set_name("range_grouped_rolling_sum")
.add_int64_power_of_two_axis("num_rows", {14, 22, 28})
.add_int64_axis("preceding_range", {100})
.add_int64_axis("following_range", {100})
.add_int64_axis("has_nulls", {0, 1})
.add_int64_axis("cardinality", {10, 100, 1'000'000, 100'000'000});

NVBENCH_BENCH(bench_grouped_range_rolling_sum)
.set_name("range_grouped_rolling_sum_large_windows")
.add_int64_power_of_two_axis("num_rows", {28})
.add_int64_axis("preceding_range", {10'000, 40'000})
.add_int64_axis("following_range", {0})
.add_int64_axis("has_nulls", {0, 1})
.add_int64_axis("cardinality", {100});
Loading

0 comments on commit 77e3ae4

Please sign in to comment.