Skip to content

Commit

Permalink
some more encapsulation of the scan code
Browse files Browse the repository at this point in the history
  • Loading branch information
Tishj committed Dec 12, 2024
1 parent 41f1157 commit 427ccc8
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 46 deletions.
3 changes: 0 additions & 3 deletions src/include/duckdb/storage/compression/dictionary/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ struct DictionaryCompression {

static StringDictionaryContainer GetDictionary(ColumnSegment &segment, BufferHandle &handle);
static void SetDictionary(ColumnSegment &segment, BufferHandle &handle, StringDictionaryContainer container);
static string_t FetchStringFromDict(ColumnSegment &segment, StringDictionaryContainer dict, data_ptr_t baseptr,
int32_t dict_offset, uint16_t string_len);
static uint16_t GetStringLength(uint32_t *index_buffer_ptr, sel_t index);
};

//! Abstract class managing the compression state for size analysis or compression.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ struct CompressedStringScanState : public StringScanState {
}

public:
void ScanToFlatVector(ColumnSegment &segment, Vector &result, idx_t result_offset, idx_t start, idx_t scan_count);
void Initialize(ColumnSegment &segment, bool initialize_dictionary = true);
void ScanToFlatVector(Vector &result, idx_t result_offset, idx_t start, idx_t scan_count);
void ScanToDictionaryVector(ColumnSegment &segment, Vector &result, idx_t result_offset, idx_t start,
idx_t scan_count);

public:
void Initialize(ColumnSegment &segment, bool initialize_dictionary = true);
private:
string_t FetchStringFromDict(int32_t dict_offset, uint16_t string_len);
uint16_t GetStringLength(sel_t index);

public:
BufferHandle owned_handle;
Expand All @@ -42,6 +44,7 @@ struct CompressedStringScanState : public StringScanState {
buffer_ptr<Vector> dictionary;
idx_t dictionary_size;
StringDictionaryContainer dict;
idx_t block_size;
};

} // namespace duckdb
24 changes: 0 additions & 24 deletions src/storage/compression/dictionary/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,30 +36,6 @@ void DictionaryCompression::SetDictionary(ColumnSegment &segment, BufferHandle &
Store<uint32_t>(container.end, data_ptr_cast(&header_ptr->dict_end));
}

string_t DictionaryCompression::FetchStringFromDict(ColumnSegment &segment, StringDictionaryContainer dict,
data_ptr_t baseptr, int32_t dict_offset, uint16_t string_len) {

D_ASSERT(dict_offset >= 0 && dict_offset <= NumericCast<int32_t>(segment.GetBlockManager().GetBlockSize()));
if (dict_offset == 0) {
return string_t(nullptr, 0);
}

// normal string: read string from this block
auto dict_end = baseptr + dict.end;
auto dict_pos = dict_end - dict_offset;

auto str_ptr = char_ptr_cast(dict_pos);
return string_t(str_ptr, string_len);
}

uint16_t DictionaryCompression::GetStringLength(uint32_t *index_buffer_ptr, sel_t index) {
if (index == 0) {
return 0;
} else {
return UnsafeNumericCast<uint16_t>(index_buffer_ptr[index] - index_buffer_ptr[index - 1]);
}
}

DictionaryCompressionState::DictionaryCompressionState(const CompressionInfo &info) : CompressionState(info) {
}
DictionaryCompressionState::~DictionaryCompressionState() {
Expand Down
38 changes: 29 additions & 9 deletions src/storage/compression/dictionary/decompression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,28 @@

namespace duckdb {

uint16_t CompressedStringScanState::GetStringLength(sel_t index) {
if (index == 0) {
return 0;
} else {
return UnsafeNumericCast<uint16_t>(index_buffer_ptr[index] - index_buffer_ptr[index - 1]);
}
}

string_t CompressedStringScanState::FetchStringFromDict(int32_t dict_offset, uint16_t string_len) {
D_ASSERT(dict_offset >= 0 && dict_offset <= NumericCast<int32_t>(block_size));
if (dict_offset == 0) {
return string_t(nullptr, 0);
}

// normal string: read string from this block
auto dict_end = baseptr + dict.end;
auto dict_pos = dict_end - dict_offset;

auto str_ptr = char_ptr_cast(dict_pos);
return string_t(str_ptr, string_len);
}

void CompressedStringScanState::Initialize(ColumnSegment &segment, bool initialize_dictionary) {
baseptr = handle->Ptr() + segment.GetBlockOffset();

Expand All @@ -18,6 +40,8 @@ void CompressedStringScanState::Initialize(ColumnSegment &segment, bool initiali
index_buffer_ptr = reinterpret_cast<uint32_t *>(baseptr + index_buffer_offset);
base_data = data_ptr_cast(baseptr + DictionaryCompression::DICTIONARY_HEADER_SIZE);

block_size = segment.GetBlockManager().GetBlockSize();

if (!initialize_dictionary) {
// Used by fetch, as fetch will never produce a DictionaryVector
return;
Expand All @@ -30,14 +54,12 @@ void CompressedStringScanState::Initialize(ColumnSegment &segment, bool initiali

for (uint32_t i = 0; i < index_buffer_count; i++) {
// NOTE: the passing of dict_child_vector, will not be used, its for big strings
uint16_t str_len = DictionaryCompression::GetStringLength(index_buffer_ptr, i);
dict_child_data[i] = DictionaryCompression::FetchStringFromDict(
segment, dict, baseptr, UnsafeNumericCast<int32_t>(index_buffer_ptr[i]), str_len);
uint16_t str_len = GetStringLength(i);
dict_child_data[i] = FetchStringFromDict(UnsafeNumericCast<int32_t>(index_buffer_ptr[i]), str_len);
}
}

void CompressedStringScanState::ScanToFlatVector(ColumnSegment &segment, Vector &result, idx_t result_offset,
idx_t start, idx_t scan_count) {
void CompressedStringScanState::ScanToFlatVector(Vector &result, idx_t result_offset, idx_t start, idx_t scan_count) {
auto result_data = FlatVector::GetData<string_t>(result);

// Handling non-bitpacking-group-aligned start values;
Expand All @@ -61,10 +83,8 @@ void CompressedStringScanState::ScanToFlatVector(ColumnSegment &segment, Vector
// Lookup dict offset in index buffer
auto string_number = sel_vec->get_index(i + start_offset);
auto dict_offset = index_buffer_ptr[string_number];
auto str_len =
DictionaryCompression::GetStringLength(index_buffer_ptr, UnsafeNumericCast<sel_t>(string_number));
result_data[result_offset + i] = DictionaryCompression::FetchStringFromDict(
segment, dict, baseptr, UnsafeNumericCast<int32_t>(dict_offset), str_len);
auto str_len = GetStringLength(UnsafeNumericCast<sel_t>(string_number));
result_data[result_offset + i] = FetchStringFromDict(UnsafeNumericCast<int32_t>(dict_offset), str_len);
}
}

Expand Down
10 changes: 3 additions & 7 deletions src/storage/compression/dictionary_compression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ void DictionaryCompressionStorage::StringScanPartial(ColumnSegment &segment, Col
auto start = segment.GetRelativeIndex(state.row_index);
if (!ALLOW_DICT_VECTORS || scan_count != STANDARD_VECTOR_SIZE ||
start % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE != 0) {
scan_state.ScanToFlatVector(segment, result, result_offset, start, scan_count);
scan_state.ScanToFlatVector(result, result_offset, start, scan_count);
} else {
scan_state.ScanToDictionaryVector(segment, result, result_offset, start, scan_count);
}
Expand All @@ -148,13 +148,9 @@ void DictionaryCompressionStorage::StringScan(ColumnSegment &segment, ColumnScan
void DictionaryCompressionStorage::StringFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id,
Vector &result, idx_t result_idx) {
// fetch a single row from the string segment
// first pin the main buffer if it is not already pinned
CompressedStringScanState scan_state(state.GetOrInsertHandle(segment));
auto baseptr = scan_state.handle->Ptr() + segment.GetBlockOffset();
auto header_ptr = reinterpret_cast<dictionary_compression_header_t *>(baseptr);
scan_state.current_width = (bitpacking_width_t)Load<uint32_t>(data_ptr_cast(&header_ptr->bitpacking_width));

scan_state.ScanToFlatVector(segment, result, result_idx, row_id, 1);
scan_state.Initialize(segment, false);
scan_state.ScanToFlatVector(result, result_idx, row_id, 1);
}

//===--------------------------------------------------------------------===//
Expand Down

0 comments on commit 427ccc8

Please sign in to comment.