From a7f5261208f67e414ea0fdab924792bdb1283b50 Mon Sep 17 00:00:00 2001 From: Martin Marenz Date: Fri, 25 Aug 2023 20:22:00 +0200 Subject: [PATCH] Enable fractional null probability for hashing benchmark In the past, the HASING_NVBENCH benchmark treated the nulls parameter as a boolean. Any value other than 0.0 resulted in a null probability of 1.0. Now, the nulls parameter directly determines the null probability. For instance, a value of 0.1 will generate 10% of the data as null. Moreover, setting nulls to 0.0 produces data without a null bitmask. Additionally, `bytes_per_second` are added to the benchmark. This patch relates to #13735. --- cpp/benchmarks/hashing/hash.cpp | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp index f0e9202612e..86ea17316b8 100644 --- a/cpp/benchmarks/hashing/hash.cpp +++ b/cpp/benchmarks/hashing/hash.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -24,25 +25,49 @@ static void bench_hash(nvbench::state& state) { - auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const nulls = static_cast(state.get_float64("nulls")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const nulls = state.get_float64("nulls"); + // disable null bitmask if probability is exactly 0.0 + bool const no_nulls = nulls == 0.0; auto const hash_name = state.get_string("hash_name"); - data_profile const profile = data_profile_builder().null_probability(nulls); - auto const data = create_random_table( + data_profile const profile = + data_profile_builder().null_probability(no_nulls ? std::nullopt : std::optional{nulls}); + auto const data = create_random_table( {cudf::type_id::INT64, cudf::type_id::STRING}, row_count{num_rows}, profile); auto stream = cudf::get_default_stream(); state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + // collect statistics + cudf::strings_column_view input(data->get_column(1).view()); + auto const chars_size = input.chars_size(); + // add memory read from string column + state.add_global_memory_reads(chars_size); + // add memory read from int64_t column + state.add_global_memory_reads(num_rows); + // add memory read from bitmaks + if (!no_nulls) { + state.add_global_memory_reads(2 * + cudf::bitmask_allocation_size_bytes(num_rows)); + } + // memory written depends on used hash + if (hash_name == "murmurhash3_x86_32") { + state.add_global_memory_reads(num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::murmurhash3_x86_32(data->view()); }); } else if (hash_name == "md5") { + // md5 writes out string with 32bytes + state.add_global_memory_reads(32 * num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::md5(data->view()); }); } else if (hash_name == "spark_murmurhash3_x86_32") { + state.add_global_memory_reads(num_rows); + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::spark_murmurhash3_x86_32(data->view()); });