Skip to content

Commit

Permalink
Add bytes_per_second to groupby max benchmark.
Browse files Browse the repository at this point in the history
To calculate the number of bytes written and read by the benchmark a few
helper function are introduced which calculate the payload size of a
column, a table, and the results of a groupby.

This patch relates to rapidsai#13735.
  • Loading branch information
Martin Marenz committed Aug 28, 2023
1 parent 3c8ce98 commit 8a474b6
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 11 deletions.
5 changes: 3 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ target_include_directories(

# Use an OBJECT library so we only compile these helper source files only once
add_library(
cudf_benchmark_common OBJECT "${CUDF_SOURCE_DIR}/tests/utilities/base_fixture.cpp"
synchronization/synchronization.cpp io/cuio_common.cpp
cudf_benchmark_common OBJECT
"${CUDF_SOURCE_DIR}/tests/utilities/base_fixture.cpp" synchronization/synchronization.cpp
io/cuio_common.cpp common/memory_statistics.cpp
)
target_link_libraries(cudf_benchmark_common PRIVATE cudf_datagen $<TARGET_NAME_IF_EXISTS:conda_env>)
add_custom_command(
Expand Down
51 changes: 51 additions & 0 deletions cpp/benchmarks/common/memory_statistics.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "memory_statistics.hpp"

#include <cudf/column/column.hpp>
#include <cudf/null_mask.hpp>

#include <numeric>

uint64_t required_bytes(const cudf::column_view& column)
{
uint64_t read_bytes = column.size() * cudf::size_of(column.type());
if (column.nullable()) { read_bytes += cudf::bitmask_allocation_size_bytes(column.size()); }

return read_bytes;
}

uint64_t required_bytes(const cudf::table_view& table)
{
return std::accumulate(table.begin(), table.end(), 0, [](uint64_t acc, const auto& col) {
return acc + required_bytes(col);
});
}

uint64_t required_bytes(
const cudf::host_span<cudf::groupby::aggregation_result>& aggregation_results)
{
uint64_t read_bytes = 0;

for (auto const& aggregation : aggregation_results) { // vector of aggregation results
for (auto const& col : aggregation.results) { // vector of columns per result
read_bytes += required_bytes(col->view());
}
}

return read_bytes;
}
57 changes: 57 additions & 0 deletions cpp/benchmarks/common/memory_statistics.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cudf/column/column_view.hpp>
#include <cudf/groupby.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/utilities/span.hpp>

/**
* @brief Calculate the number of bytes needed to completely read/write the provided column.
*
* The functions computes only the size of the payload of the column in bytes, it excludes
* any metadata of the column structure itself.
*
* @param column View of the input column
* @returns Number of bytes needed to read or write the column.
*/
uint64_t required_bytes(const cudf::column_view& column);

/**
* @brief Calculate the number of bytes needed to completely read/write the provided table.
*
* The functions computes only the size of the payload of the table in bytes, it excludes
* any metadata of the column structure itself.
*
* @param table View of the input table.
* @returns Number of bytes needed to read or write the table.
*/
uint64_t required_bytes(const cudf::table_view& table);

/**
* @brief Calculate the number of bytes needed to completely read/write the provided range of
* aggregation results.
*
* The functions computes only the size of the payload of the aggregation results in bytes, it
* excludes any metadata of the column structure itself.
*
* @param aggregation_results Sequence of aggregation results from groupby execution.
* @returns Number of bytes needed to read or write the aggregation results.
*/
uint64_t required_bytes(
const cudf::host_span<cudf::groupby::aggregation_result>& aggregation_results);
34 changes: 25 additions & 9 deletions cpp/benchmarks/groupby/group_max.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/common/memory_statistics.hpp>

#include <cudf/groupby.hpp>

#include <nvbench/nvbench.cuh>

#include <optional>

template <typename Type>
void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
{
Expand All @@ -31,25 +34,38 @@ void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
return create_random_column(cudf::type_to_id<int32_t>(), row_count{size}, profile);
}();

auto const null_freq = state.get_float64("null_probability");
bool const has_null = null_freq > 0;

auto const vals = [&] {
auto builder = data_profile_builder().cardinality(0).distribution(
cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 1000);
if (const auto null_freq = state.get_float64("null_probability"); null_freq > 0) {
builder.null_probability(null_freq);
} else {
builder.no_validity();
}
auto builder = data_profile_builder()
.cardinality(0)
.null_probability(has_null ? std::optional<double>(null_freq) : std::nullopt)
.distribution(cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 1000);

return create_random_column(cudf::type_to_id<Type>(), row_count{size}, data_profile{builder});
}();

auto keys_view = keys->view();
auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys_view, keys_view, keys_view}));
auto keys_view = keys->view();
auto const keys_table = cudf::table_view({keys_view, keys_view, keys_view});
auto gb_obj = cudf::groupby::groupby(keys_table);

std::vector<cudf::groupby::aggregation_request> requests;
requests.emplace_back(cudf::groupby::aggregation_request());
requests[0].values = vals->view();
requests[0].aggregations.push_back(cudf::make_max_aggregation<cudf::groupby_aggregation>());

// add memory statistics
state.add_global_memory_reads<nvbench::uint8_t>(required_bytes(vals->view()));
state.add_global_memory_reads<nvbench::uint8_t>(required_bytes(keys_table));

// number of written bytes depends on random distribution of keys,
// for larger `size` it converges against the number of elements
// in the input distribution (101 elements)
auto [res_table, res_agg] = gb_obj.aggregate(requests);
state.add_global_memory_writes<uint8_t>(required_bytes(res_table->view()));
state.add_global_memory_writes<uint8_t>(required_bytes(res_agg));

state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); });
Expand Down

0 comments on commit 8a474b6

Please sign in to comment.