From 808bcba975b1e994bc9dd9e318c6cf855f26f1a2 Mon Sep 17 00:00:00 2001 From: Tishj Date: Tue, 14 Jan 2025 12:46:41 +0100 Subject: [PATCH] optimize scan to flat vector --- .../compression/dict_fsst/decompression.cpp | 31 ++++++++++++++----- .../compression/dict_fsst/fetch_row.test | 4 +-- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/storage/compression/dict_fsst/decompression.cpp b/src/storage/compression/dict_fsst/decompression.cpp index f0a957596be..0df5f3d43bd 100644 --- a/src/storage/compression/dict_fsst/decompression.cpp +++ b/src/storage/compression/dict_fsst/decompression.cpp @@ -106,15 +106,30 @@ void CompressedStringScanState::ScanToFlatVector(Vector &result, idx_t result_of BitpackingPrimitives::UnPackBuffer(data_ptr_cast(sel_vec_ptr), src, decompress_count, current_width); - for (idx_t i = 0; i < scan_count; i++) { - // Lookup dict offset in index buffer - auto string_number = sel_vec->get_index(i + start_offset); - if (string_number == 0) { - validity.SetInvalid(result_offset + i); + if (dictionary) { + // We have prepared the full dictionary, we can reference these strings directly + auto dictionary_values = FlatVector::GetData(*dictionary); + for (idx_t i = 0; i < scan_count; i++) { + // Lookup dict offset in index buffer + auto string_number = sel_vec->get_index(i + start_offset); + if (string_number == 0) { + validity.SetInvalid(result_offset + i); + } + result_data[result_offset + i] = dictionary_values[string_number]; + } + } else { + // This path is taken for fetch, where we don't want to decompress the full dictionary + for (idx_t i = 0; i < scan_count; i++) { + // Lookup dict offset in index buffer + auto string_number = sel_vec->get_index(i + start_offset); + if (string_number == 0) { + validity.SetInvalid(result_offset + i); + } + auto dict_offset = index_buffer_ptr[string_number]; + auto str_len = GetStringLength(UnsafeNumericCast(string_number)); + result_data[result_offset + i] = + FetchStringFromDict(result, UnsafeNumericCast(dict_offset), str_len); } - auto dict_offset = index_buffer_ptr[string_number]; - auto str_len = GetStringLength(UnsafeNumericCast(string_number)); - result_data[result_offset + i] = FetchStringFromDict(result, UnsafeNumericCast(dict_offset), str_len); } } diff --git a/test/sql/storage/compression/dict_fsst/fetch_row.test b/test/sql/storage/compression/dict_fsst/fetch_row.test index 1d3f85bffe0..e60eb440ca0 100644 --- a/test/sql/storage/compression/dict_fsst/fetch_row.test +++ b/test/sql/storage/compression/dict_fsst/fetch_row.test @@ -24,7 +24,7 @@ SELECT WHEN 0 THEN 'aaaa' WHEN 1 THEN 'bbbb' WHEN 2 THEN 'cccc' - WHEN 3 THEN 'dddd' + WHEN 3 THEN 'this is not an inlined string' WHEN 4 THEN NULL END AS b FROM range(10_000) t(x); @@ -48,5 +48,5 @@ select distinct b from test order by a % 5; aaaa bbbb cccc -dddd +this is not an inlined string NULL