Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorized hash grouping by a single text column #7586

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
9390673
Vectorized hash grouping by a single text column
akuzm Jan 10, 2025
a68572f
forgotten files
akuzm Jan 10, 2025
4aeecac
cleanup
akuzm Jan 10, 2025
979e59b
cleanup
akuzm Jan 10, 2025
3311861
bytes view
akuzm Jan 10, 2025
ef106b7
fix the test
akuzm Jan 10, 2025
89fd4ad
disable clang-tidy for imported code
akuzm Jan 10, 2025
192ec2c
try to detect i386 differently
akuzm Jan 10, 2025
37b4cca
fix cmake
akuzm Jan 10, 2025
16d4908
clang-tidy
akuzm Jan 10, 2025
e4b79ef
fix?
akuzm Jan 22, 2025
5b08c99
cmake
akuzm Jan 22, 2025
c4fc939
changelog
akuzm Jan 22, 2025
8cf8c69
license cleanup
akuzm Jan 22, 2025
8d45d71
disable on windows
akuzm Jan 22, 2025
2bac2c4
Merge branch 'main' into hash-text
akuzm Jan 23, 2025
dc188e6
fix
akuzm Jan 23, 2025
b63c057
fix
akuzm Jan 23, 2025
17d4124
fixes
akuzm Jan 23, 2025
cfbf9d4
more fixes
akuzm Jan 23, 2025
7cd0424
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Jan 24, 2025
dddd0ce
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Jan 29, 2025
1919e38
add a test for groupagg mode with nulls
akuzm Jan 29, 2025
1529ed9
format
akuzm Jan 29, 2025
c7efeb0
ignore the test
akuzm Jan 29, 2025
d0e2431
yaml
akuzm Jan 29, 2025
621500d
comment
akuzm Feb 3, 2025
04e2776
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Feb 3, 2025
18f51f5
fix after merge
akuzm Feb 3, 2025
09d1036
Update tsl/src/nodes/vector_agg/hashing/hash_strategy_single_text.c
akuzm Feb 4, 2025
679c56d
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Feb 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/linux-32bit-build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,13 @@ jobs:
DEBIAN_FRONTEND: noninteractive
# vectorized_aggregation has different output on i386 because int8 is by
# reference and currently it cannot be used for vectorized hash grouping.
IGNORES: "append-* transparent_decompression-* transparent_decompress_chunk-* pg_dump telemetry bgw_db_scheduler* hypercore_vacuum vectorized_aggregation"
# vector_agg_text and vector_agg_groupagg use the UMASH hashing library
# that we can't compile on i386.
IGNORES: >-
append-* transparent_decompression-*
transparent_decompress_chunk-* pg_dump telemetry bgw_db_scheduler*
hypercore_vacuum vectorized_aggregation vector_agg_text
vector_agg_groupagg
SKIPS: chunk_adaptive histogram_test-*
EXTENSIONS: "postgres_fdw test_decoding pageinspect pgstattuple"
strategy:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/windows-build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
build_type: ${{ fromJson(needs.config.outputs.build_type) }}
ignores: ["chunk_adaptive metadata telemetry"]
tsl_ignores: ["compression_algos"]
tsl_skips: ["bgw_db_scheduler bgw_db_scheduler_fixed"]
tsl_skips: ["vector_agg_text vector_agg_groupagg bgw_db_scheduler bgw_db_scheduler_fixed"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to skip these tests on windows? Is it also because of UMASH?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I didn't get it to compile there, so decided to disable for now.

pg_config: ["-cfsync=off -cstatement_timeout=60s"]
include:
- pg: 14
Expand Down
1 change: 1 addition & 0 deletions .unreleased/vectorized-text-grouping
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implements: #7586 Vectorized aggregation with grouping by a single text column.
3 changes: 2 additions & 1 deletion tsl/src/nodes/vector_agg/grouping_policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ typedef enum
VAGT_Batch,
VAGT_HashSingleFixed2,
VAGT_HashSingleFixed4,
VAGT_HashSingleFixed8
VAGT_HashSingleFixed8,
VAGT_HashSingleText
} VectorAggGroupingType;

extern GroupingPolicy *create_grouping_policy_batch(int num_agg_defs, VectorAggDef *agg_defs,
Expand Down
10 changes: 10 additions & 0 deletions tsl/src/nodes/vector_agg/grouping_policy_hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
extern HashingStrategy single_fixed_2_strategy;
extern HashingStrategy single_fixed_4_strategy;
extern HashingStrategy single_fixed_8_strategy;
#ifdef TS_USE_UMASH
extern HashingStrategy single_text_strategy;
#endif

static const GroupingPolicy grouping_policy_hash_functions;

Expand Down Expand Up @@ -70,6 +73,11 @@ create_grouping_policy_hash(int num_agg_defs, VectorAggDef *agg_defs, int num_gr

switch (grouping_type)
{
#ifdef TS_USE_UMASH
case VAGT_HashSingleText:
policy->hashing = single_text_strategy;
break;
#endif
case VAGT_HashSingleFixed8:
policy->hashing = single_fixed_8_strategy;
break;
Expand All @@ -84,6 +92,8 @@ create_grouping_policy_hash(int num_agg_defs, VectorAggDef *agg_defs, int num_gr
break;
}

policy->hashing.key_body_mctx = policy->agg_extra_mctx;

policy->hashing.init(&policy->hashing, policy);

return &policy->funcs;
Expand Down
5 changes: 5 additions & 0 deletions tsl/src/nodes/vector_agg/hashing/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,9 @@ set(SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/hash_strategy_single_fixed_4.c
${CMAKE_CURRENT_SOURCE_DIR}/hash_strategy_single_fixed_8.c
${CMAKE_CURRENT_SOURCE_DIR}/hash_strategy_common.c)

if(USE_UMASH)
list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/hash_strategy_single_text.c)
endif()

target_sources(${TSL_LIBRARY_NAME} PRIVATE ${SOURCES})
135 changes: 135 additions & 0 deletions tsl/src/nodes/vector_agg/hashing/hash_strategy_single_text.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* This file and its contents are licensed under the Timescale License.
* Please see the included NOTICE for copyright information and
* LICENSE-TIMESCALE for a copy of the license.
*/

/*
* Implementation of column hashing for a single text column.
*/

#include <postgres.h>

#include <common/hashfn.h>

#include "compression/arrow_c_data_interface.h"
#include "nodes/decompress_chunk/compressed_batch.h"
#include "nodes/vector_agg/exec.h"
#include "nodes/vector_agg/grouping_policy_hash.h"
#include "template_helper.h"

#include "batch_hashing_params.h"

#include "umash_fingerprint_key.h"

#define EXPLAIN_NAME "single text"
#define KEY_VARIANT single_text
#define OUTPUT_KEY_TYPE BytesView

static void
single_text_key_hashing_init(HashingStrategy *hashing)
{
hashing->umash_params = umash_key_hashing_init();
}

typedef struct BytesView
{
const uint8 *data;
uint32 len;
} BytesView;
Comment on lines +35 to +39
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of defining a new type, can we use StringInfo and initReadOnlyStringInfo? Just an idea.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, it's not a complex type, and StringInfo has some unrelated things, so I'd keep it as is.


static BytesView
get_bytes_view(CompressedColumnValues *column_values, int arrow_row)
{
const uint32 start = ((uint32 *) column_values->buffers[1])[arrow_row];
const int32 value_bytes = ((uint32 *) column_values->buffers[1])[arrow_row + 1] - start;
Assert(value_bytes >= 0);

return (BytesView){ .len = value_bytes, .data = &((uint8 *) column_values->buffers[2])[start] };
}

static pg_attribute_always_inline void
single_text_key_hashing_get_key(BatchHashingParams params, int row, void *restrict output_key_ptr,
void *restrict hash_table_key_ptr, bool *restrict valid)
{
Assert(params.policy->num_grouping_columns == 1);

BytesView *restrict output_key = (BytesView *) output_key_ptr;
HASH_TABLE_KEY_TYPE *restrict hash_table_key = (HASH_TABLE_KEY_TYPE *) hash_table_key_ptr;

if (unlikely(params.single_grouping_column.decompression_type == DT_Scalar))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question for my own understanding (not asking for any changes for this PR):

This is deep into vector aggregation, so I would expect that this function only gets passed an arrow array. But now we need to check for different non-array formats, including impossible cases (e.g, DT_Iterator). If we only passed in arrays, these checks would not be necessary.

The arrow array format already supports everything we need. Even scalar/segmentby values can be represented by arrow arrays (e.g., run-end encoded).

Now we need this extra code to check for different formats/cases everywhere we reach into the data. Some of them shouldn't even be possible here.

IMO, the API to retrieve a value should be something:

Datum d = arrow_array_get_value_at(array, rownum, &isnull, &valuelen);

This function can easily check the encoding of the array (dict, runend, etc.) to retrieve the value requested.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I have already regretted supporting the scalar values throughout aggregation. Technically it should perform better, because it avoids creating e.g. arrow array with the same constant value for every row, and sometimes we perform the computations in a different way for scalar values. But the implementation complexity might be a little too high. Maybe I should look into removing this at least for the key column, and always materializing them into arrow arrays. I'm going to consider this after we merge the multi-column aggregation.

The external interface might still turn out to be more complex than what you suggest, and closer to the current CompressedColumnValues, because sometimes we have to statically generate the function that works e.g. with dictionary encoding specifically, and that won't be possible if we determine this inside an opaque callback. We can't call an opaque callback (i.e. a non-inlinable dynamic function) for every row because it's going to produce significantly less performant code.

{
*valid = !*params.single_grouping_column.output_isnull;
if (*valid)
{
output_key->len = VARSIZE_ANY_EXHDR(*params.single_grouping_column.output_value);
output_key->data =
(const uint8 *) VARDATA_ANY(*params.single_grouping_column.output_value);
}
else
{
output_key->len = 0;
output_key->data = NULL;
}
}
else if (params.single_grouping_column.decompression_type == DT_ArrowText)
{
*output_key = get_bytes_view(&params.single_grouping_column, row);
*valid = arrow_row_is_valid(params.single_grouping_column.buffers[0], row);
}
else if (params.single_grouping_column.decompression_type == DT_ArrowTextDict)
{
const int16 index = ((int16 *) params.single_grouping_column.buffers[3])[row];
*output_key = get_bytes_view(&params.single_grouping_column, index);
*valid = arrow_row_is_valid(params.single_grouping_column.buffers[0], row);
}
else
{
pg_unreachable();

Check warning on line 88 in tsl/src/nodes/vector_agg/hashing/hash_strategy_single_text.c

View check run for this annotation

Codecov / codecov/patch

tsl/src/nodes/vector_agg/hashing/hash_strategy_single_text.c#L88

Added line #L88 was not covered by tests
}

DEBUG_PRINT("%p consider key row %d key index %d is %d bytes: ",
params.policy,
row,
params.policy->last_used_key_index + 1,
output_key->len);
for (size_t i = 0; i < output_key->len; i++)
{
DEBUG_PRINT("%.2x.", output_key->data[i]);
}
DEBUG_PRINT("\n");

const struct umash_fp fp = umash_fprint(params.policy->hashing.umash_params,
/* seed = */ ~0ULL,
output_key->data,
output_key->len);
*hash_table_key = umash_fingerprint_get_key(fp);
}

static pg_attribute_always_inline void
single_text_key_hashing_store_new(GroupingPolicyHash *restrict policy, uint32 new_key_index,
BytesView output_key)
{
const int total_bytes = output_key.len + VARHDRSZ;
text *restrict stored = (text *) MemoryContextAlloc(policy->hashing.key_body_mctx, total_bytes);
SET_VARSIZE(stored, total_bytes);
memcpy(VARDATA(stored), output_key.data, output_key.len);
Comment on lines +113 to +116
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest making use of PostgreSQL builtin function:

Suggested change
const int total_bytes = output_key.len + VARHDRSZ;
text *restrict stored = (text *) MemoryContextAlloc(policy->hashing.key_body_mctx, total_bytes);
SET_VARSIZE(stored, total_bytes);
memcpy(VARDATA(stored), output_key.data, output_key.len);
MemoryConext oldmcxt = MemoryContextSwitchTo(policy->hashing.key_body_mctx);
text *stored = cstring_to_text_with_len(output_key.data, output_key.len);
MemoryContextSwitchTo(oldmcxt);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to have something that can be inlined here, because it's a part of a hot loop that builds the hash table.

policy->hashing.output_keys[new_key_index] = PointerGetDatum(stored);
}

/*
* We use the standard single-key key output functions.
*/
static void
single_text_emit_key(GroupingPolicyHash *policy, uint32 current_key,
TupleTableSlot *aggregated_slot)
{
return hash_strategy_output_key_single_emit(policy, current_key, aggregated_slot);
}

static void
single_text_key_hashing_prepare_for_batch(GroupingPolicyHash *policy, TupleTableSlot *vector_slot)
{
}

#include "hash_strategy_impl.c"
11 changes: 10 additions & 1 deletion tsl/src/nodes/vector_agg/hashing/hashing_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,12 @@ typedef struct HashingStrategy
* This is stored separately from hash table keys, because they might not
* have the full column values, and also storing them contiguously here
* leads to better memory access patterns when emitting the results.
* The details of the key storage are managed by the hashing strategy.
* The details of the key storage are managed by the hashing strategy. The
* by-reference keys can use a separate memory context for dense storage.
*/
Datum *restrict output_keys;
uint64 num_allocated_output_keys;
MemoryContext key_body_mctx;

/*
* In single-column grouping, we store the null key outside of the hash
Expand All @@ -54,6 +56,13 @@ typedef struct HashingStrategy
* to reduce the hash table size.
*/
uint32 null_key_index;

#ifdef TS_USE_UMASH
/*
* UMASH fingerprinting parameters.
*/
struct umash_params *umash_params;
#endif
} HashingStrategy;

void hash_strategy_output_key_alloc(GroupingPolicyHash *policy, uint16 nrows);
Expand Down
45 changes: 45 additions & 0 deletions tsl/src/nodes/vector_agg/hashing/umash_fingerprint_key.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* This file and its contents are licensed under the Timescale License.
* Please see the included NOTICE for copyright information and
* LICENSE-TIMESCALE for a copy of the license.
*/
#pragma once

/*
* Helpers to use the umash fingerprint as a hash table key in our hashing
* strategies for vectorized grouping.
*/

#include "import/umash.h"

/*
* The struct is packed so that the hash table entry fits into 16
* bytes with the uint32 key index that goes before.
*/
struct umash_fingerprint_key
{
uint32 hash;
uint64 rest;
} pg_attribute_packed();

#define HASH_TABLE_KEY_TYPE struct umash_fingerprint_key
#define KEY_HASH(X) (X.hash)
#define KEY_EQUAL(a, b) (a.hash == b.hash && a.rest == b.rest)

static inline struct umash_fingerprint_key
umash_fingerprint_get_key(struct umash_fp fp)
{
const struct umash_fingerprint_key key = {
.hash = fp.hash[0] & (~(uint32) 0),
.rest = fp.hash[1],
};
return key;
}

static inline struct umash_params *
umash_key_hashing_init()
{
struct umash_params *params = palloc0(sizeof(struct umash_params));
umash_params_derive(params, 0xabcdef1234567890ull, NULL);
return params;
}
11 changes: 11 additions & 0 deletions tsl/src/nodes/vector_agg/plan.c
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,8 @@ get_vectorized_grouping_type(Agg *agg, CustomScan *custom, List *resolved_target
/*
* We support hashed vectorized grouping by one fixed-size by-value
* compressed column.
* We can use our hash table for GroupAggregate as well, because it preserves
* the input order of the keys.
*/
if (num_grouping_columns == 1)
{
Expand All @@ -526,6 +528,15 @@ get_vectorized_grouping_type(Agg *agg, CustomScan *custom, List *resolved_target
break;
}
}
#ifdef TS_USE_UMASH
else
{
Ensure(single_grouping_var->vartype == TEXTOID,
"invalid vector type %d for grouping",
single_grouping_var->vartype);
return VAGT_HashSingleText;
}
#endif
}

return VAGT_Invalid;
Expand Down
79 changes: 79 additions & 0 deletions tsl/test/expected/vector_agg_groupagg.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
-- Check that the vectorized aggregation works properly in the GroupAggregate
-- mode.
create table groupagg(t int, s text, value int);
select create_hypertable('groupagg', 't', chunk_time_interval => 10000);
NOTICE: adding not-null constraint to column "t"
create_hypertable
-----------------------
(1,public,groupagg,t)
(1 row)

insert into groupagg
select
xfast * 100 + xslow,
case when xfast = 13 then null else xfast end,
xfast * 7 + xslow * 3
from generate_series(10, 99) xfast,
generate_series(1, 10000) xslow
;
alter table groupagg set (timescaledb.compress, timescaledb.compress_segmentby = '',
timescaledb.compress_orderby = 's');
select count(compress_chunk(x)) from show_chunks('groupagg') x;
count
-------
2
(1 row)

set enable_hashagg to off;
set timescaledb.debug_require_vector_agg to 'allow';
select s, sum(value) from groupagg group by s order by s limit 10;
s | sum
----+-----------
10 | 150715000
11 | 150785000
12 | 150855000
14 | 150995000
15 | 151065000
16 | 151135000
17 | 151205000
18 | 151275000
19 | 151345000
20 | 151415000
(10 rows)

reset timescaledb.debug_require_vector_agg;
select count(decompress_chunk(x)) from show_chunks('groupagg') x;
count
-------
2
(1 row)

alter table groupagg set (timescaledb.compress, timescaledb.compress_segmentby = '',
timescaledb.compress_orderby = 's nulls first');
select count(compress_chunk(x)) from show_chunks('groupagg') x;
count
-------
2
(1 row)

set timescaledb.debug_require_vector_agg to 'require';
select s , sum(value) from groupagg group by s order by s nulls first limit 10;
s | sum
----+-----------
| 150925000
10 | 150715000
11 | 150785000
12 | 150855000
14 | 150995000
15 | 151065000
16 | 151135000
17 | 151205000
18 | 151275000
19 | 151345000
(10 rows)

reset enable_hashagg;
reset timescaledb.debug_require_vector_agg;
Loading
Loading