Skip to content

Commit

Permalink
create an actual compression_type entry for it, and register it in th…
Browse files Browse the repository at this point in the history
…e CompressionFunctionSet like any other compression function
  • Loading branch information
Tishj committed Dec 16, 2024
1 parent e4db440 commit eb342dd
Show file tree
Hide file tree
Showing 13 changed files with 56 additions and 25 deletions.
24 changes: 22 additions & 2 deletions src/common/enum_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
#include "duckdb/execution/operator/csv_scanner/quote_rules.hpp"
#include "duckdb/execution/reservoir_sample.hpp"
#include "duckdb/function/aggregate_state.hpp"
#include "duckdb/function/compression_function.hpp"
#include "duckdb/function/copy_function.hpp"
#include "duckdb/function/function.hpp"
#include "duckdb/function/macro_function.hpp"
Expand Down Expand Up @@ -858,19 +859,38 @@ const StringUtil::EnumStringLiteral *GetCompressionTypeValues() {
{ static_cast<uint32_t>(CompressionType::COMPRESSION_ALPRD), "COMPRESSION_ALPRD" },
{ static_cast<uint32_t>(CompressionType::COMPRESSION_ZSTD), "COMPRESSION_ZSTD" },
{ static_cast<uint32_t>(CompressionType::COMPRESSION_ROARING), "COMPRESSION_ROARING" },
{ static_cast<uint32_t>(CompressionType::COMPRESSION_EMPTY), "COMPRESSION_EMPTY" },
{ static_cast<uint32_t>(CompressionType::COMPRESSION_COUNT), "COMPRESSION_COUNT" }
};
return values;
}

template<>
const char* EnumUtil::ToChars<CompressionType>(CompressionType value) {
return StringUtil::EnumToString(GetCompressionTypeValues(), 15, "CompressionType", static_cast<uint32_t>(value));
return StringUtil::EnumToString(GetCompressionTypeValues(), 16, "CompressionType", static_cast<uint32_t>(value));
}

template<>
CompressionType EnumUtil::FromString<CompressionType>(const char *value) {
return static_cast<CompressionType>(StringUtil::StringToEnum(GetCompressionTypeValues(), 15, "CompressionType", value));
return static_cast<CompressionType>(StringUtil::StringToEnum(GetCompressionTypeValues(), 16, "CompressionType", value));
}

const StringUtil::EnumStringLiteral *GetCompressionValidityValues() {
static constexpr StringUtil::EnumStringLiteral values[] {
{ static_cast<uint32_t>(CompressionValidity::REQUIRES_VALIDITY), "REQUIRES_VALIDITY" },
{ static_cast<uint32_t>(CompressionValidity::NO_VALIDITY_REQUIRED), "NO_VALIDITY_REQUIRED" }
};
return values;
}

template<>
const char* EnumUtil::ToChars<CompressionValidity>(CompressionValidity value) {
return StringUtil::EnumToString(GetCompressionValidityValues(), 2, "CompressionValidity", static_cast<uint32_t>(value));
}

template<>
CompressionValidity EnumUtil::FromString<CompressionValidity>(const char *value) {
return static_cast<CompressionValidity>(StringUtil::StringToEnum(GetCompressionValidityValues(), 2, "CompressionValidity", value));
}

const StringUtil::EnumStringLiteral *GetConflictManagerModeValues() {
Expand Down
4 changes: 4 additions & 0 deletions src/common/enums/compression_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ bool CompressionTypeIsDeprecated(CompressionType compression_type) {

CompressionType CompressionTypeFromString(const string &str) {
auto compression = StringUtil::Lower(str);
//! NOTE: this explicitly does not include 'constant' and 'empty validity', these are internal compression functions
//! not general purpose
if (compression == "uncompressed") {
return CompressionType::COMPRESSION_UNCOMPRESSED;
} else if (compression == "rle") {
Expand Down Expand Up @@ -83,6 +85,8 @@ string CompressionTypeToString(CompressionType type) {
return "ALPRD";
case CompressionType::COMPRESSION_ROARING:
return "Roaring";
case CompressionType::COMPRESSION_EMPTY:
return "Empty Validity";
default:
throw InternalException("Unrecognized compression type!");
}
Expand Down
6 changes: 2 additions & 4 deletions src/function/compression_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ static const DefaultCompressionMethod internal_compression_methods[] = {
{CompressionType::COMPRESSION_FSST, FSSTFun::GetFunction, FSSTFun::TypeIsSupported},
{CompressionType::COMPRESSION_ZSTD, ZSTDFun::GetFunction, ZSTDFun::TypeIsSupported},
{CompressionType::COMPRESSION_ROARING, RoaringCompressionFun::GetFunction, RoaringCompressionFun::TypeIsSupported},
{CompressionType::COMPRESSION_EMPTY, EmptyValidityCompressionFun::GetFunction,
EmptyValidityCompressionFun::TypeIsSupported},
{CompressionType::COMPRESSION_AUTO, nullptr, nullptr}};

static optional_ptr<CompressionFunction> FindCompressionFunction(CompressionFunctionSet &set, CompressionType type,
Expand Down Expand Up @@ -101,8 +103,4 @@ optional_ptr<CompressionFunction> DBConfig::GetCompressionFunction(CompressionTy
return LoadCompressionFunction(*compression_functions, type, physical_type);
}

CompressionFunction &DBConfig::GetEmptyValidity() {
return *empty_validity;
}

} // namespace duckdb
8 changes: 8 additions & 0 deletions src/include/duckdb/common/enum_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ enum class CompressedMaterializationDirection : uint8_t;

enum class CompressionType : uint8_t;

enum class CompressionValidity : uint8_t;

enum class ConflictManagerMode : uint8_t;

enum class ConstraintType : uint8_t;
Expand Down Expand Up @@ -482,6 +484,9 @@ const char* EnumUtil::ToChars<CompressedMaterializationDirection>(CompressedMate
template<>
const char* EnumUtil::ToChars<CompressionType>(CompressionType value);

template<>
const char* EnumUtil::ToChars<CompressionValidity>(CompressionValidity value);

template<>
const char* EnumUtil::ToChars<ConflictManagerMode>(ConflictManagerMode value);

Expand Down Expand Up @@ -999,6 +1004,9 @@ CompressedMaterializationDirection EnumUtil::FromString<CompressedMaterializatio
template<>
CompressionType EnumUtil::FromString<CompressionType>(const char *value);

template<>
CompressionValidity EnumUtil::FromString<CompressionValidity>(const char *value);

template<>
ConflictManagerMode EnumUtil::FromString<ConflictManagerMode>(const char *value);

Expand Down
5 changes: 3 additions & 2 deletions src/include/duckdb/common/enums/compression_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace duckdb {
enum class CompressionType : uint8_t {
COMPRESSION_AUTO = 0,
COMPRESSION_UNCOMPRESSED = 1,
COMPRESSION_CONSTANT = 2,
COMPRESSION_CONSTANT = 2, // internal only
COMPRESSION_RLE = 3,
COMPRESSION_DICTIONARY = 4,
COMPRESSION_PFOR_DELTA = 5,
Expand All @@ -28,7 +28,8 @@ enum class CompressionType : uint8_t {
COMPRESSION_ALPRD = 11,
COMPRESSION_ZSTD = 12,
COMPRESSION_ROARING = 13,
COMPRESSION_COUNT // This has to stay the last entry of the type!
COMPRESSION_EMPTY = 14, // internal only
COMPRESSION_COUNT // This has to stay the last entry of the type!
};

bool CompressionTypeIsDeprecated(CompressionType compression_type);
Expand Down
5 changes: 5 additions & 0 deletions src/include/duckdb/function/compression/compression.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,9 @@ struct RoaringCompressionFun {
static bool TypeIsSupported(const PhysicalType physical_type);
};

struct EmptyValidityCompressionFun {
static CompressionFunction GetFunction(PhysicalType type);
static bool TypeIsSupported(const PhysicalType physical_type);
};

} // namespace duckdb
3 changes: 0 additions & 3 deletions src/include/duckdb/main/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
#include "duckdb/parser/parser_extension.hpp"
#include "duckdb/planner/operator_extension.hpp"
#include "duckdb/storage/compression/bitpacking.hpp"
#include "duckdb/function/compression_function.hpp"
#include "duckdb/function/encoding_function.hpp"

namespace duckdb {
Expand Down Expand Up @@ -363,7 +362,6 @@ struct DBConfig {
//! Returns the compression function matching the compression and physical type.
DUCKDB_API optional_ptr<CompressionFunction> GetCompressionFunction(CompressionType type,
const PhysicalType physical_type);
DUCKDB_API CompressionFunction &GetEmptyValidity();

//! Returns the encode function matching the encoding name.
DUCKDB_API optional_ptr<EncodingFunction> GetEncodeFunction(const string &name) const;
Expand Down Expand Up @@ -405,7 +403,6 @@ struct DBConfig {
string SanitizeAllowedPath(const string &path) const;

private:
unique_ptr<CompressionFunction> empty_validity;
unique_ptr<CompressionFunctionSet> compression_functions;
unique_ptr<EncodingFunctionSet> encoding_functions;
unique_ptr<CastFunctionSet> cast_functions;
Expand Down
10 changes: 5 additions & 5 deletions src/include/duckdb/storage/compression/empty_validity.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ class EmptyValidityCompression {
};

public:
static unique_ptr<CompressionFunction> CreateFunction() {
return make_uniq<CompressionFunction>(CompressionType::COMPRESSION_AUTO, PhysicalType::BIT, InitAnalyze,
Analyze, FinalAnalyze, InitCompression, Compress, FinalizeCompress,
InitScan, Scan, ScanPartial, FetchRow, Skip, InitSegment);
static CompressionFunction CreateFunction() {
return CompressionFunction(CompressionType::COMPRESSION_EMPTY, PhysicalType::BIT, InitAnalyze, Analyze,
FinalAnalyze, InitCompression, Compress, FinalizeCompress, InitScan, Scan,
ScanPartial, FetchRow, Skip, InitSegment);
}

public:
Expand All @@ -54,7 +54,7 @@ class EmptyValidityCompression {
auto function = CreateFunction();
auto &info = state.info;
auto compressed_segment =
ColumnSegment::CreateTransientSegment(db, *function, type, 0, info.GetBlockSize(), info.GetBlockSize());
ColumnSegment::CreateTransientSegment(db, function, type, 0, info.GetBlockSize(), info.GetBlockSize());
compressed_segment->count = state.count;

auto &buffer_manager = BufferManager::GetBufferManager(checkpointer.GetDatabase());
Expand Down
2 changes: 1 addition & 1 deletion src/include/duckdb/storage/table/column_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class ColumnData {
return *compression;
}

bool IsEmptyValidity() const {
bool DoesNotRequireValidity() const {
if (type.id() != LogicalTypeId::VALIDITY) {
return false;
}
Expand Down
1 change: 0 additions & 1 deletion src/main/database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
namespace duckdb {

DBConfig::DBConfig() {
empty_validity = EmptyValidityCompression::CreateFunction();
compression_functions = make_uniq<CompressionFunctionSet>();
encoding_functions = make_uniq<EncodingFunctionSet>();
encoding_functions->Initialize(*this);
Expand Down
3 changes: 2 additions & 1 deletion src/storage/compression/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ add_library_unity(
patas.cpp
zstd.cpp
alprd.cpp
fsst.cpp)
fsst.cpp
empty_validity.cpp)
set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:duckdb_storage_compression>
PARENT_SCOPE)
6 changes: 4 additions & 2 deletions src/storage/table/column_data_checkpointer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ ColumnDataCheckpointer::ColumnDataCheckpointer(ColumnData &col_data_p, RowGroup
checkpoint_info(checkpoint_info_p) {

auto &config = DBConfig::GetConfig(GetDatabase());
if (is_validity && col_data_p.IsEmptyValidity()) {
compression_functions.push_back(config.GetEmptyValidity());
if (is_validity && col_data_p.DoesNotRequireValidity()) {
auto empty_validity =
config.GetCompressionFunction(CompressionType::COMPRESSION_EMPTY, GetType().InternalType());
compression_functions.push_back(empty_validity);
} else {
auto functions = config.GetCompressionFunctions(GetType().InternalType());
for (auto &func : functions) {
Expand Down
4 changes: 0 additions & 4 deletions src/storage/table/column_segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,6 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc

if (block_id == INVALID_BLOCK) {
function = config.GetCompressionFunction(CompressionType::COMPRESSION_CONSTANT, type.InternalType());
} else if (type.id() == LogicalTypeId::VALIDITY && compression_type == CompressionType::COMPRESSION_AUTO) {
// The validity is not actually stored in this block, this is just a dummy
function = config.GetEmptyValidity();
block = block_manager.RegisterBlock(block_id);
} else {
function = config.GetCompressionFunction(compression_type, type.InternalType());
block = block_manager.RegisterBlock(block_id);
Expand Down

0 comments on commit eb342dd

Please sign in to comment.