Skip to content

Commit

Permalink
optimize scan to flat vector
Browse files Browse the repository at this point in the history
  • Loading branch information
Tishj committed Jan 14, 2025
1 parent 327336c commit 808bcba
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 10 deletions.
31 changes: 23 additions & 8 deletions src/storage/compression/dict_fsst/decompression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,30 @@ void CompressedStringScanState::ScanToFlatVector(Vector &result, idx_t result_of

BitpackingPrimitives::UnPackBuffer<sel_t>(data_ptr_cast(sel_vec_ptr), src, decompress_count, current_width);

for (idx_t i = 0; i < scan_count; i++) {
// Lookup dict offset in index buffer
auto string_number = sel_vec->get_index(i + start_offset);
if (string_number == 0) {
validity.SetInvalid(result_offset + i);
if (dictionary) {
// We have prepared the full dictionary, we can reference these strings directly
auto dictionary_values = FlatVector::GetData<string_t>(*dictionary);
for (idx_t i = 0; i < scan_count; i++) {
// Lookup dict offset in index buffer
auto string_number = sel_vec->get_index(i + start_offset);
if (string_number == 0) {
validity.SetInvalid(result_offset + i);
}
result_data[result_offset + i] = dictionary_values[string_number];
}
} else {
// This path is taken for fetch, where we don't want to decompress the full dictionary
for (idx_t i = 0; i < scan_count; i++) {
// Lookup dict offset in index buffer
auto string_number = sel_vec->get_index(i + start_offset);
if (string_number == 0) {
validity.SetInvalid(result_offset + i);
}
auto dict_offset = index_buffer_ptr[string_number];
auto str_len = GetStringLength(UnsafeNumericCast<sel_t>(string_number));
result_data[result_offset + i] =
FetchStringFromDict(result, UnsafeNumericCast<int32_t>(dict_offset), str_len);
}
auto dict_offset = index_buffer_ptr[string_number];
auto str_len = GetStringLength(UnsafeNumericCast<sel_t>(string_number));
result_data[result_offset + i] = FetchStringFromDict(result, UnsafeNumericCast<int32_t>(dict_offset), str_len);
}
}

Expand Down
4 changes: 2 additions & 2 deletions test/sql/storage/compression/dict_fsst/fetch_row.test
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ SELECT
WHEN 0 THEN 'aaaa'
WHEN 1 THEN 'bbbb'
WHEN 2 THEN 'cccc'
WHEN 3 THEN 'dddd'
WHEN 3 THEN 'this is not an inlined string'
WHEN 4 THEN NULL
END AS b
FROM range(10_000) t(x);
Expand All @@ -48,5 +48,5 @@ select distinct b from test order by a % 5;
aaaa
bbbb
cccc
dddd
this is not an inlined string
NULL

0 comments on commit 808bcba

Please sign in to comment.