diff --git a/test/sql/storage/compression/dictionary/dictionary_covers_validity.test b/test/sql/storage/compression/dictionary/dictionary_covers_validity.test new file mode 100644 index 000000000000..5b707d8c4af2 --- /dev/null +++ b/test/sql/storage/compression/dictionary/dictionary_covers_validity.test @@ -0,0 +1,72 @@ +# name: test/sql/storage/compression/dictionary/dictionary_covers_validity.test +# group: [dictionary] + +load __TEST_DIR__/dictionary_covers_validity + +statement ok +set checkpoint_threshold='10mb'; + +statement ok +CREATE TABLE tbl AS SELECT + { + 'a': i, + 'b': NULL::VARCHAR + } col +FROM range(5000) t(i); + +statement ok +INSERT INTO tbl VALUES ( + { + 'a': 10000, + 'b': 'hello' + } +); + +statement ok +set force_compression='dictionary'; + +statement ok +force checkpoint; + +# Dictionary covers the validity, so the validity gets replaced with "Empty Validity" +query II +select segment_type, compression from pragma_storage_info('tbl'); +---- +VALIDITY Constant +BIGINT BitPacking +VALIDITY Constant +VARCHAR Dictionary +VALIDITY Empty Validity + +statement ok +INSERT INTO tbl VALUES ( + { + 'a': 10000, + 'b': 'hello' + } +); + +# Now force a different compression method, that doesn't cover the validity +statement ok +set force_compression='fsst'; + +statement ok +force checkpoint; + +# During checkpoint this will scan the dictionary compressed segments to get the validity +# this then gets compressed as normal (since FSST does not cover the validity) +query II +select segment_type, compression from pragma_storage_info('tbl'); +---- +VALIDITY Constant +BIGINT BitPacking +VALIDITY Constant +VARCHAR FSST +VALIDITY Roaring + +query I +SELECT col FROM tbl ORDER BY col.a DESC LIMIT 3; +---- +{'a': 10000, 'b': hello} +{'a': 10000, 'b': hello} +{'a': 4999, 'b': NULL}