forked from duckdb/duckdb
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
40 changed files
with
426 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
test/sql/storage/compression/dictionary/dictionary_compression_ratio.test_slow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# name: test/sql/storage/compression/dictionary/dictionary_compression_ratio.test_slow | ||
# description: Assert dictionary compression ratio is within reasonable margins | ||
# group: [dictionary] | ||
|
||
load __TEST_DIR__/test_dictionary.db | ||
|
||
# First test: detailed compression ratio | ||
statement ok | ||
PRAGMA force_compression='dictionary'; | ||
|
||
# Assuming 10 chars at 1 byte, with a 4byte offset and a 2byte length per string uncompressed: | ||
# Ratio absolute max at 3 bits per value (ignoring dict size) = (16/(3/8)) = 42.6666666667 | ||
statement ok | ||
CREATE TABLE test_dictionary AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i); | ||
|
||
statement ok | ||
CHECKPOINT; | ||
|
||
statement ok | ||
PRAGMA force_compression='uncompressed'; | ||
|
||
statement ok | ||
CREATE TABLE test_uncompressed AS SELECT concat('BEEPBOOP-', (i%3)::VARCHAR) AS i FROM range(0, 1250000) tbl(i); | ||
|
||
statement ok | ||
CHECKPOINT; | ||
|
||
# keep a wide margin for the compression ratio to account for changes (like the block size) that | ||
# influence the compression ratio | ||
|
||
query I | ||
SELECT uncompressed::FLOAT / dictionary::FLOAT > 30 AND uncompressed::FLOAT / dictionary::FLOAT < 55 FROM | ||
(SELECT count(DISTINCT block_id) AS dictionary FROM pragma_storage_info('test_dictionary') WHERE segment_type IN ('VARCHAR')) AS dictionary, | ||
(SELECT count(DISTINCT block_id) AS uncompressed FROM pragma_storage_info('test_uncompressed') WHERE segment_type IN ('VARCHAR')) AS uncompressed; | ||
---- | ||
True |
72 changes: 72 additions & 0 deletions
72
test/sql/storage/compression/dictionary/dictionary_covers_validity.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# name: test/sql/storage/compression/dictionary/dictionary_covers_validity.test | ||
# group: [dictionary] | ||
|
||
load __TEST_DIR__/dictionary_covers_validity | ||
|
||
statement ok | ||
set checkpoint_threshold='10mb'; | ||
|
||
statement ok | ||
CREATE TABLE tbl AS SELECT | ||
{ | ||
'a': i, | ||
'b': NULL::VARCHAR | ||
} col | ||
FROM range(5000) t(i); | ||
|
||
statement ok | ||
set force_compression='dictionary'; | ||
|
||
statement ok | ||
INSERT INTO tbl VALUES ( | ||
{ | ||
'a': 10000, | ||
'b': 'hello' | ||
} | ||
); | ||
|
||
statement ok | ||
force checkpoint; | ||
|
||
# Dictionary covers the validity, so the validity gets replaced with "Empty Validity" | ||
query II | ||
select segment_type, compression from pragma_storage_info('tbl'); | ||
---- | ||
VALIDITY Constant | ||
BIGINT BitPacking | ||
VALIDITY Constant | ||
VARCHAR Dictionary | ||
VALIDITY Empty Validity | ||
|
||
# Now force a different compression method, that doesn't cover the validity | ||
statement ok | ||
set force_compression='fsst'; | ||
|
||
statement ok | ||
INSERT INTO tbl VALUES ( | ||
{ | ||
'a': 10000, | ||
'b': 'hello' | ||
} | ||
); | ||
|
||
statement ok | ||
force checkpoint; | ||
|
||
# During checkpoint this will scan the dictionary compressed segments to get the validity | ||
# this then gets compressed as normal (since FSST does not cover the validity) | ||
query II | ||
select segment_type, compression from pragma_storage_info('tbl'); | ||
---- | ||
VALIDITY Constant | ||
BIGINT BitPacking | ||
VALIDITY Constant | ||
VARCHAR FSST | ||
VALIDITY Roaring | ||
|
||
query I | ||
SELECT col FROM tbl ORDER BY col.a DESC LIMIT 3; | ||
---- | ||
{'a': 10000, 'b': hello} | ||
{'a': 10000, 'b': hello} | ||
{'a': 4999, 'b': NULL} |
23 changes: 23 additions & 0 deletions
23
test/sql/storage/compression/dictionary/dictionary_storage_info.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# name: test/sql/storage/compression/dictionary/dictionary_storage_info.test | ||
# description: Test storage with Dictionary compression | ||
# group: [dictionary] | ||
|
||
# load the DB from disk | ||
load __TEST_DIR__/test_dictionary.db | ||
|
||
statement ok | ||
PRAGMA force_compression = 'dictionary' | ||
|
||
statement ok | ||
CREATE TABLE test (a VARCHAR, b VARCHAR); | ||
|
||
statement ok | ||
INSERT INTO test VALUES ('11', '22'), ('11', '22'), ('12', '21'), (NULL, NULL) | ||
|
||
statement ok | ||
CHECKPOINT | ||
|
||
query I | ||
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1 | ||
---- | ||
Dictionary |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# name: test/sql/storage/compression/dictionary/fetch_row.test | ||
# description: Test storage with Dictionary compression | ||
# group: [dictionary] | ||
|
||
require block_size 262144 | ||
|
||
# load the DB from disk | ||
load __TEST_DIR__/test_dictionary_fetchrow.db | ||
|
||
statement ok | ||
PRAGMA force_compression = 'dictionary' | ||
|
||
statement ok | ||
CREATE TABLE test ( | ||
a INTEGER, | ||
b VARCHAR | ||
); | ||
|
||
statement ok | ||
INSERT INTO test (a, b) | ||
SELECT | ||
x AS a, | ||
CASE x % 5 | ||
WHEN 0 THEN 'aaaa' | ||
WHEN 1 THEN 'bbbb' | ||
WHEN 2 THEN 'cccc' | ||
WHEN 3 THEN 'dddd' | ||
WHEN 4 THEN NULL | ||
END AS b | ||
FROM range(10_000) t(x); | ||
|
||
statement ok | ||
CHECKPOINT | ||
|
||
restart | ||
|
||
query I | ||
SELECT compression FROM pragma_storage_info('test') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1 | ||
---- | ||
Dictionary | ||
|
||
statement ok | ||
pragma verify_fetch_row; | ||
|
||
query I | ||
select distinct b from test order by a % 5; | ||
---- | ||
aaaa | ||
bbbb | ||
cccc | ||
dddd | ||
NULL |
24 changes: 24 additions & 0 deletions
24
test/sql/storage/compression/dictionary/force_dictionary.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# name: test/sql/storage/compression/dictionary/force_dictionary.test | ||
# description: Test forcing dictionary encoding as the compression scheme | ||
# group: [dictionary] | ||
|
||
require vector_size 2048 | ||
|
||
load __TEST_DIR__/force_dictionary.db | ||
|
||
statement ok | ||
PRAGMA force_compression = 'dictionary' | ||
|
||
statement ok | ||
CREATE TABLE test_dict (a VARCHAR); | ||
|
||
statement ok | ||
INSERT INTO test_dict SELECT i::VARCHAR FROM range(0, 2000) tbl(i); | ||
|
||
statement ok | ||
CHECKPOINT | ||
|
||
query I | ||
SELECT compression FROM pragma_storage_info('test_dict') WHERE segment_type ILIKE 'VARCHAR' LIMIT 1 | ||
---- | ||
Dictionary |
Oops, something went wrong.