From 1c1743d67a34bbe4098fda60f5ae374f12a6b518 Mon Sep 17 00:00:00 2001 From: Sven Klemm Date: Mon, 3 Feb 2025 15:59:39 +0100 Subject: [PATCH] Allow non-btree operator pushdown in UPDATE/DELETE queries on compressed chunks When pushing down expressions into the compressed scan we assumed all valid expressions use btree operators and dropped any that weren't. This patch changes the behaviour to keep those expressions and use them as heap filter on the compressed scan for UPDATE and DELETE on compressed chunks. --- .unreleased/pr_7649 | 1 + tsl/src/compression/compression_scankey.c | 66 +++-- tsl/test/shared/expected/compression_dml.out | 248 ++++++++++++++++++- tsl/test/shared/sql/compression_dml.sql | 63 ++++- 4 files changed, 345 insertions(+), 33 deletions(-) create mode 100644 .unreleased/pr_7649 diff --git a/.unreleased/pr_7649 b/.unreleased/pr_7649 new file mode 100644 index 00000000000..d9b72419d6b --- /dev/null +++ b/.unreleased/pr_7649 @@ -0,0 +1 @@ +Fixes: #7649 Allow non-btree operator pushdown in UPDATE/DELETE queries on compressed chunks diff --git a/tsl/src/compression/compression_scankey.c b/tsl/src/compression/compression_scankey.c index 9fc6c320354..5240457d0a9 100644 --- a/tsl/src/compression/compression_scankey.c +++ b/tsl/src/compression/compression_scankey.c @@ -17,7 +17,7 @@ static Oid deduce_filter_subtype(BatchFilter *filter, Oid att_typoid); static bool create_segment_filter_scankey(Relation in_rel, char *segment_filter_col_name, - StrategyNumber strategy, Oid subtype, + StrategyNumber strategy, Oid subtype, Oid opcode, ScanKeyData *scankeys, int *num_scankeys, Bitmapset **null_columns, Datum value, bool is_null_check, bool is_array_op); @@ -191,6 +191,7 @@ build_heap_scankeys(Oid hypertable_relid, Relation in_rel, Relation out_rel, attname, BTEqualStrategyNumber, InvalidOid, + InvalidOid, scankeys, &key_index, null_columns, @@ -212,6 +213,7 @@ build_heap_scankeys(Oid hypertable_relid, Relation in_rel, Relation out_rel, column_segment_min_name(index), BTLessEqualStrategyNumber, InvalidOid, + InvalidOid, scankeys, &key_index, null_columns, @@ -223,6 +225,7 @@ build_heap_scankeys(Oid hypertable_relid, Relation in_rel, Relation out_rel, column_segment_max_name(index), BTGreaterEqualStrategyNumber, InvalidOid, + InvalidOid, scankeys, &key_index, null_columns, @@ -461,6 +464,7 @@ build_update_delete_scankeys(Relation in_rel, List *heap_filters, int *num_scank NameStr(filter->column_name), filter->strategy, deduce_filter_subtype(filter, typoid), + filter->opcode, scankeys, &key_index, null_columns, @@ -482,9 +486,9 @@ build_update_delete_scankeys(Relation in_rel, List *heap_filters, int *num_scank static bool create_segment_filter_scankey(Relation in_rel, char *segment_filter_col_name, - StrategyNumber strategy, Oid subtype, ScanKeyData *scankeys, - int *num_scankeys, Bitmapset **null_columns, Datum value, - bool is_null_check, bool is_array_op) + StrategyNumber strategy, Oid subtype, Oid opcode, + ScanKeyData *scankeys, int *num_scankeys, Bitmapset **null_columns, + Datum value, bool is_null_check, bool is_array_op) { AttrNumber cmp_attno = get_attnum(in_rel->rd_id, segment_filter_col_name); Assert(cmp_attno != InvalidAttrNumber); @@ -510,30 +514,48 @@ create_segment_filter_scankey(Relation in_rel, char *segment_filter_col_name, return false; } - Oid atttypid = in_rel->rd_att->attrs[AttrNumberGetAttrOffset(cmp_attno)].atttypid; - - TypeCacheEntry *tce = lookup_type_cache(atttypid, TYPECACHE_BTREE_OPFAMILY); - if (!OidIsValid(tce->btree_opf)) - elog(ERROR, "no btree opfamily for type \"%s\"", format_type_be(atttypid)); - - Oid opr = get_opfamily_member(tce->btree_opf, atttypid, atttypid, strategy); - + Oid opr; /* - * Fall back to btree operator input type when it is binary compatible with - * the column type and no operator for column type could be found. + * All btree operators will have a valid strategy here. For + * non-btree operators e.g. <> we directly take the opcode + * here. We could do the same for btree in certain cases + * but some filters get transformed to min/max filters and + * won't keep the initial opcode so we would need to disambiguate + * between them. */ - if (!OidIsValid(opr) && IsBinaryCoercible(atttypid, tce->btree_opintype)) + if (strategy == InvalidStrategy) { - opr = - get_opfamily_member(tce->btree_opf, tce->btree_opintype, tce->btree_opintype, strategy); + opr = opcode; } + else + { + Oid atttypid = in_rel->rd_att->attrs[AttrNumberGetAttrOffset(cmp_attno)].atttypid; - /* No operator could be found so we can't create the scankey. */ - if (!OidIsValid(opr)) - return false; + TypeCacheEntry *tce = lookup_type_cache(atttypid, TYPECACHE_BTREE_OPFAMILY); + if (!OidIsValid(tce->btree_opf)) + elog(ERROR, "no btree opfamily for type \"%s\"", format_type_be(atttypid)); + + opr = get_opfamily_member(tce->btree_opf, atttypid, atttypid, strategy); + + /* + * Fall back to btree operator input type when it is binary compatible with + * the column type and no operator for column type could be found. + */ + if (!OidIsValid(opr) && IsBinaryCoercible(atttypid, tce->btree_opintype)) + { + opr = get_opfamily_member(tce->btree_opf, + tce->btree_opintype, + tce->btree_opintype, + strategy); + } + + /* No operator could be found so we can't create the scankey. */ + if (!OidIsValid(opr)) + return false; + + opr = get_opcode(opr); + } - opr = get_opcode(opr); - Assert(OidIsValid(opr)); /* We should never end up here but: no opcode, no optimization */ if (!OidIsValid(opr)) return false; diff --git a/tsl/test/shared/expected/compression_dml.out b/tsl/test/shared/expected/compression_dml.out index 7a3eda60d9d..02647506e4f 100644 --- a/tsl/test/shared/expected/compression_dml.out +++ b/tsl/test/shared/expected/compression_dml.out @@ -489,14 +489,12 @@ BEGIN; :ANALYZE DELETE FROM direct_delete WHERE reading <> 'r2'; QUERY PLAN Custom Scan (HypertableModify) (actual rows=0 loops=1) - Batches decompressed: 8 - Tuples decompressed: 8 + Batches deleted: 4 -> Delete on direct_delete (actual rows=0 loops=1) Delete on _hyper_X_X_chunk direct_delete_1 - -> Seq Scan on _hyper_X_X_chunk direct_delete_1 (actual rows=4 loops=1) + -> Seq Scan on _hyper_X_X_chunk direct_delete_1 (actual rows=0 loops=1) Filter: (reading <> 'r2'::text) - Rows Removed by Filter: 4 -(8 rows) +(6 rows) -- 4 tuples should still be there SELECT count(*) FROM direct_delete; @@ -569,14 +567,12 @@ BEGIN; :ANALYZE DELETE FROM direct_delete WHERE reading NOT IN ('r1'); QUERY PLAN Custom Scan (HypertableModify) (actual rows=0 loops=1) - Batches decompressed: 8 - Tuples decompressed: 8 + Batches deleted: 4 -> Delete on direct_delete (actual rows=0 loops=1) Delete on _hyper_X_X_chunk direct_delete_1 - -> Seq Scan on _hyper_X_X_chunk direct_delete_1 (actual rows=4 loops=1) + -> Seq Scan on _hyper_X_X_chunk direct_delete_1 (actual rows=0 loops=1) Filter: (reading <> 'r1'::text) - Rows Removed by Filter: 4 -(8 rows) +(6 rows) -- 4 tuples should still be there SELECT count(*) FROM direct_delete; @@ -688,3 +684,235 @@ QUERY PLAN DROP TRIGGER direct_delete_trigger ON direct_delete; DROP TABLE direct_delete; +-- test DML on metadata columns +CREATE TABLE compress_dml(time timestamptz NOT NULL, device text, reading text, value float); +SELECT table_name FROM create_hypertable('compress_dml', 'time'); + table_name + compress_dml +(1 row) + +ALTER TABLE compress_dml SET (timescaledb.compress, timescaledb.compress_segmentby='device', timescaledb.compress_orderby='time DESC, reading'); +INSERT INTO compress_dml VALUES +('2025-01-01','d1','r1',0.01), +('2025-01-01','d2','r2',0.01), +('2025-01-01','d3','r1',0.01), +('2025-01-01','d3','r2',0.01), +('2025-01-01','d4','r1',0.01), +('2025-01-01','d4',NULL,0.01), +('2025-01-01','d5','r2',0.01), +('2025-01-01','d5',NULL,0.01), +('2025-01-01','d6','r1',0.01), +('2025-01-01','d6','r2',0.01), +('2025-01-01','d6',NULL,0.01); +SELECT compress_chunk(show_chunks('compress_dml')); + compress_chunk + _timescaledb_internal._hyper_X_X_chunk +(1 row) + +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading = 'r1'; +QUERY PLAN + Custom Scan (HypertableModify) (actual rows=0 loops=1) + Batches decompressed: 4 + Tuples decompressed: 8 + -> Delete on compress_dml (actual rows=0 loops=1) + Delete on _hyper_X_X_chunk compress_dml_1 + -> Seq Scan on _hyper_X_X_chunk compress_dml_1 (actual rows=4 loops=1) + Filter: (reading = 'r1'::text) + Rows Removed by Filter: 4 +(8 rows) + +SELECT * FROM compress_dml t ORDER BY t; + time | device | reading | value +------------------------------+--------+---------+------- + Wed Jan 01 00:00:00 2025 PST | d2 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d3 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d4 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d5 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d5 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | | 0.01 +(7 rows) + +ROLLBACK; +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading <> 'r1'; +QUERY PLAN + Custom Scan (HypertableModify) (actual rows=0 loops=1) + Batches filtered: 2 + Batches decompressed: 4 + Tuples decompressed: 8 + -> Delete on compress_dml (actual rows=0 loops=1) + Delete on _hyper_X_X_chunk compress_dml_1 + -> Seq Scan on _hyper_X_X_chunk compress_dml_1 (actual rows=4 loops=1) + Filter: (reading <> 'r1'::text) + Rows Removed by Filter: 4 +(9 rows) + +SELECT * FROM compress_dml t ORDER BY t; + time | device | reading | value +------------------------------+--------+---------+------- + Wed Jan 01 00:00:00 2025 PST | d1 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d3 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d4 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d4 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d5 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | | 0.01 +(7 rows) + +ROLLBACK; +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading IS NULL; +QUERY PLAN + Custom Scan (HypertableModify) (actual rows=0 loops=1) + Batches decompressed: 6 + Tuples decompressed: 11 + -> Delete on compress_dml (actual rows=0 loops=1) + Delete on _hyper_X_X_chunk compress_dml_1 + -> Seq Scan on _hyper_X_X_chunk compress_dml_1 (actual rows=3 loops=1) + Filter: (reading IS NULL) + Rows Removed by Filter: 8 +(8 rows) + +SELECT * FROM compress_dml t ORDER BY t; + time | device | reading | value +------------------------------+--------+---------+------- + Wed Jan 01 00:00:00 2025 PST | d1 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d2 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d3 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d3 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d4 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d5 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | r2 | 0.01 +(8 rows) + +ROLLBACK; +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading IS NOT NULL; +QUERY PLAN + Custom Scan (HypertableModify) (actual rows=0 loops=1) + Batches decompressed: 6 + Tuples decompressed: 11 + -> Delete on compress_dml (actual rows=0 loops=1) + Delete on _hyper_X_X_chunk compress_dml_1 + -> Seq Scan on _hyper_X_X_chunk compress_dml_1 (actual rows=8 loops=1) + Filter: (reading IS NOT NULL) + Rows Removed by Filter: 3 +(8 rows) + +SELECT * FROM compress_dml t ORDER BY t; + time | device | reading | value +------------------------------+--------+---------+------- + Wed Jan 01 00:00:00 2025 PST | d4 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d5 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | | 0.01 +(3 rows) + +ROLLBACK; +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading IN ('r2','r3'); +QUERY PLAN + Custom Scan (HypertableModify) (actual rows=0 loops=1) + Batches decompressed: 6 + Tuples decompressed: 11 + -> Delete on compress_dml (actual rows=0 loops=1) + Delete on _hyper_X_X_chunk compress_dml_1 + -> Seq Scan on _hyper_X_X_chunk compress_dml_1 (actual rows=4 loops=1) + Filter: (reading = ANY ('{r2,r3}'::text[])) + Rows Removed by Filter: 7 +(8 rows) + +SELECT * FROM compress_dml t ORDER BY t; + time | device | reading | value +------------------------------+--------+---------+------- + Wed Jan 01 00:00:00 2025 PST | d1 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d3 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d4 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d4 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d5 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | | 0.01 +(7 rows) + +ROLLBACK; +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading = ANY('{r2,r3}'); +QUERY PLAN + Custom Scan (HypertableModify) (actual rows=0 loops=1) + Batches decompressed: 6 + Tuples decompressed: 11 + -> Delete on compress_dml (actual rows=0 loops=1) + Delete on _hyper_X_X_chunk compress_dml_1 + -> Seq Scan on _hyper_X_X_chunk compress_dml_1 (actual rows=4 loops=1) + Filter: (reading = ANY ('{r2,r3}'::text[])) + Rows Removed by Filter: 7 +(8 rows) + +SELECT * FROM compress_dml t ORDER BY t; + time | device | reading | value +------------------------------+--------+---------+------- + Wed Jan 01 00:00:00 2025 PST | d1 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d3 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d4 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d4 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d5 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | r1 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | | 0.01 +(7 rows) + +ROLLBACK; +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading NOT IN ('r2','r3'); +QUERY PLAN + Custom Scan (HypertableModify) (actual rows=0 loops=1) + Batches decompressed: 6 + Tuples decompressed: 11 + -> Delete on compress_dml (actual rows=0 loops=1) + Delete on _hyper_X_X_chunk compress_dml_1 + -> Seq Scan on _hyper_X_X_chunk compress_dml_1 (actual rows=4 loops=1) + Filter: (reading <> ALL ('{r2,r3}'::text[])) + Rows Removed by Filter: 7 +(8 rows) + +SELECT * FROM compress_dml t ORDER BY t; + time | device | reading | value +------------------------------+--------+---------+------- + Wed Jan 01 00:00:00 2025 PST | d2 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d3 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d4 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d5 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d5 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | | 0.01 +(7 rows) + +ROLLBACK; +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading <> ALL('{r2,r3}'); +QUERY PLAN + Custom Scan (HypertableModify) (actual rows=0 loops=1) + Batches decompressed: 6 + Tuples decompressed: 11 + -> Delete on compress_dml (actual rows=0 loops=1) + Delete on _hyper_X_X_chunk compress_dml_1 + -> Seq Scan on _hyper_X_X_chunk compress_dml_1 (actual rows=4 loops=1) + Filter: (reading <> ALL ('{r2,r3}'::text[])) + Rows Removed by Filter: 7 +(8 rows) + +SELECT * FROM compress_dml t ORDER BY t; + time | device | reading | value +------------------------------+--------+---------+------- + Wed Jan 01 00:00:00 2025 PST | d2 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d3 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d4 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d5 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d5 | | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | r2 | 0.01 + Wed Jan 01 00:00:00 2025 PST | d6 | | 0.01 +(7 rows) + +ROLLBACK; +DROP TABLE compress_dml; diff --git a/tsl/test/shared/sql/compression_dml.sql b/tsl/test/shared/sql/compression_dml.sql index 90ee67c1c2a..7f31dbe6a26 100644 --- a/tsl/test/shared/sql/compression_dml.sql +++ b/tsl/test/shared/sql/compression_dml.sql @@ -286,6 +286,67 @@ CREATE TRIGGER direct_delete_trigger AFTER DELETE ON direct_delete FOR EACH ROW BEGIN; :ANALYZE DELETE FROM direct_delete WHERE device = 'd1'; ROLLBACK; DROP TRIGGER direct_delete_trigger ON direct_delete; - DROP TABLE direct_delete; +-- test DML on metadata columns +CREATE TABLE compress_dml(time timestamptz NOT NULL, device text, reading text, value float); +SELECT table_name FROM create_hypertable('compress_dml', 'time'); +ALTER TABLE compress_dml SET (timescaledb.compress, timescaledb.compress_segmentby='device', timescaledb.compress_orderby='time DESC, reading'); + +INSERT INTO compress_dml VALUES +('2025-01-01','d1','r1',0.01), +('2025-01-01','d2','r2',0.01), +('2025-01-01','d3','r1',0.01), +('2025-01-01','d3','r2',0.01), +('2025-01-01','d4','r1',0.01), +('2025-01-01','d4',NULL,0.01), +('2025-01-01','d5','r2',0.01), +('2025-01-01','d5',NULL,0.01), +('2025-01-01','d6','r1',0.01), +('2025-01-01','d6','r2',0.01), +('2025-01-01','d6',NULL,0.01); + +SELECT compress_chunk(show_chunks('compress_dml')); + +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading = 'r1'; +SELECT * FROM compress_dml t ORDER BY t; +ROLLBACK; + +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading <> 'r1'; +SELECT * FROM compress_dml t ORDER BY t; +ROLLBACK; + +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading IS NULL; +SELECT * FROM compress_dml t ORDER BY t; +ROLLBACK; + +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading IS NOT NULL; +SELECT * FROM compress_dml t ORDER BY t; +ROLLBACK; + +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading IN ('r2','r3'); +SELECT * FROM compress_dml t ORDER BY t; +ROLLBACK; + +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading = ANY('{r2,r3}'); +SELECT * FROM compress_dml t ORDER BY t; +ROLLBACK; + +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading NOT IN ('r2','r3'); +SELECT * FROM compress_dml t ORDER BY t; +ROLLBACK; + +BEGIN; +:ANALYZE DELETE FROM compress_dml WHERE reading <> ALL('{r2,r3}'); +SELECT * FROM compress_dml t ORDER BY t; +ROLLBACK; + +DROP TABLE compress_dml; +