From c5bb43a61d5eb72cfac3025269f3ca2ddd0346c2 Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:37:35 +0000 Subject: [PATCH] Wrap regex terms in eval only marker instead of rewriting into filter function --- .../jexl/visitors/RewriteRegexVisitor.java | 8 +- .../pushdown/AnchorDetectionVisitor.java | 4 +- .../test/java/datawave/query/ShapesTest.java | 4 +- .../visitors/RewriteRegexVisitorTest.java | 158 +++++++++--------- .../pushdown/AnchorDetectionVisitorTest.java | 4 +- 5 files changed, 89 insertions(+), 89 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RewriteRegexVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RewriteRegexVisitor.java index aaefde6d34..2a1195e71b 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RewriteRegexVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/RewriteRegexVisitor.java @@ -12,13 +12,13 @@ import datawave.query.Constants; import datawave.query.jexl.JexlASTHelper; -import datawave.query.jexl.JexlNodeFactory; import datawave.query.jexl.NodeTypeCount; import datawave.query.jexl.nodes.QueryPropertyMarker; +import datawave.query.jexl.nodes.QueryPropertyMarker.MarkerType; import datawave.query.jexl.visitors.pushdown.AnchorDetectionVisitor; /** - * Rewrites regex terms as filter functions provided an anchor exists. + * Rewrites regex terms provided an anchor exists. Regex terms are wrapped in EvalOnly marker *

* An anchor is an executable term or subtree. *

@@ -175,8 +175,8 @@ public Object visit(ASTERNode node, Object data) { String literal = (String) JexlASTHelper.getLiteralValue(node); if (isNodeRewritableFromRules(field, literal)) { - JexlNode rewrite = JexlNodeFactory.buildFunctionNode("filter", "includeRegex", field, literal); - JexlNodes.replaceChild(node.jjtGetParent(), node, rewrite); + JexlNode marker = QueryPropertyMarker.create(node, MarkerType.EVALUATION_ONLY); + JexlNodes.replaceChild(node.jjtGetParent(), node, marker); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java index 018658b1eb..4740b29894 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java @@ -194,12 +194,12 @@ private Object visitMarker(QueryPropertyMarker.Instance instance) { switch (instance.getType()) { case BOUNDED_RANGE: - case DELAYED: - case EVALUATION_ONLY: case EXCEEDED_OR: case EXCEEDED_TERM: case EXCEEDED_VALUE: return true; + case DELAYED: + case EVALUATION_ONLY: default: return false; } diff --git a/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java b/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java index c51fe8154a..d8661daa4c 100644 --- a/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java @@ -893,7 +893,7 @@ public void testRewriteRegexFromIncludes() throws Exception { withQuery("ONLY_HEX == 'hexa' && TYPE =~ 'reg.*'"); withExpected(Sets.newHashSet(ShapesIngest.hexagonUid)); planAndExecuteQuery(); - assertPlannedQuery("ONLY_HEX == 'hexa' && filter:includeRegex(TYPE, 'reg.*')"); + assertPlannedQuery("ONLY_HEX == 'hexa' && ((_Eval_ = true) && (TYPE =~ 'reg.*'))"); } @Test @@ -909,6 +909,6 @@ public void testRewriteRegexWithExcludedFieldBecauseOfPatternMatch() throws Exce withQuery("ONLY_HEX == 'hexa' && SHAPE =~ 'hexag.*'"); withExpected(Sets.newHashSet(ShapesIngest.hexagonUid)); planAndExecuteQuery(); - assertPlannedQuery("ONLY_HEX == 'hexa' && filter:includeRegex(SHAPE, 'hexag.*')"); + assertPlannedQuery("ONLY_HEX == 'hexa' && ((_Eval_ = true) && (SHAPE =~ 'hexag.*'))"); } } diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java index fd6add4b90..e77c8d6fc5 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java @@ -34,8 +34,8 @@ public void beforeEach() { @Test public void testSingleTermAndRegex() { // term and indexed regex - test("F == 'a' && F =~ 'ba.*'", "F == 'a' && filter:includeRegex(F, 'ba.*')"); - test("IO == 'a' && F =~ 'ba.*'", "IO == 'a' && filter:includeRegex(F, 'ba.*')"); + test("F == 'a' && F =~ 'ba.*'", "F == 'a' && ((_Eval_ = true) && (F =~ 'ba.*'))"); + test("IO == 'a' && F =~ 'ba.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'ba.*'))"); test("NA == 'a' && F =~ 'ba.*'"); // term and index only regex is never rewritten @@ -44,9 +44,9 @@ public void testSingleTermAndRegex() { test("NA == 'a' && IO =~ 'ba.*'"); // term and non-indexed regex is always rewritten - test("F == 'a' && NA =~ 'ba.*'", "F == 'a' && filter:includeRegex(NA, 'ba.*')"); - test("IO == 'a' && NA =~ 'ba.*'", "IO == 'a' && filter:includeRegex(NA, 'ba.*')"); - test("NA == 'a' && NA =~ 'ba.*'", "NA == 'a' && filter:includeRegex(NA, 'ba.*')"); + test("F == 'a' && NA =~ 'ba.*'", "F == 'a' && ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("IO == 'a' && NA =~ 'ba.*'", "IO == 'a' && ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("NA == 'a' && NA =~ 'ba.*'", "NA == 'a' && ((_Eval_ = true) && (NA =~ 'ba.*'))"); } // A or regex @@ -63,9 +63,9 @@ public void testSingleTermOrRegex() { test("NA == 'a' || IO =~ 'ba.*'"); // top level union with non-indexed regex is a full table scan, do not rewrite - test("F == 'a' || NA =~ 'ba.*'", "F == 'a' || filter:includeRegex(NA, 'ba.*')"); - test("IO == 'a' || NA =~ 'ba.*'", "IO == 'a' || filter:includeRegex(NA, 'ba.*')"); - test("NA == 'a' || NA =~ 'ba.*'", "NA == 'a' || filter:includeRegex(NA, 'ba.*')"); + test("F == 'a' || NA =~ 'ba.*'", "F == 'a' || ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("IO == 'a' || NA =~ 'ba.*'", "IO == 'a' || ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("NA == 'a' || NA =~ 'ba.*'", "NA == 'a' || ((_Eval_ = true) && (NA =~ 'ba.*'))"); } // (A and B) or regex @@ -89,22 +89,22 @@ public void testNestedIntersectionOrRegex() { // the input queries are non-executable, non-indexed field still gets rewritten // all combinations of nested intersection and non-indexed regex - test("(F == 'a' && F == 'b') || NA =~ 'ba.*'", "(F == 'a' && F == 'b') || filter:includeRegex(NA, 'ba.*')"); - test("(F == 'a' && IO == 'b') || NA =~ 'ba.*'", "(F == 'a' && IO == 'b') || filter:includeRegex(NA, 'ba.*')"); - test("(F == 'a' && NA == 'b') || NA =~ 'ba.*'", "(F == 'a' && NA == 'b') || filter:includeRegex(NA, 'ba.*')"); - test("(IO == 'a' && IO == 'b') || NA =~ 'ba.*'", "(IO == 'a' && IO == 'b') || filter:includeRegex(NA, 'ba.*')"); - test("(IO == 'a' && NA == 'b') || NA =~ 'ba.*'", "(IO == 'a' && NA == 'b') || filter:includeRegex(NA, 'ba.*')"); - test("(NA == 'a' && NA == 'b') || Na =~ 'ba.*'", "(NA == 'a' && NA == 'b') || filter:includeRegex(Na, 'ba.*')"); + test("(F == 'a' && F == 'b') || NA =~ 'ba.*'", "(F == 'a' && F == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("(F == 'a' && IO == 'b') || NA =~ 'ba.*'", "(F == 'a' && IO == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("(F == 'a' && NA == 'b') || NA =~ 'ba.*'", "(F == 'a' && NA == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("(IO == 'a' && IO == 'b') || NA =~ 'ba.*'", "(IO == 'a' && IO == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("(IO == 'a' && NA == 'b') || NA =~ 'ba.*'", "(IO == 'a' && NA == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("(NA == 'a' && NA == 'b') || NA =~ 'ba.*'", "(NA == 'a' && NA == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))"); } // (A or B) and regex @Test public void testNestedUnionAndRegex() { // all combinations of nested intersection and indexed regex - test("(F == 'a' || F == 'b') && F =~ 'ba.*'", "(F == 'a' || F == 'b') && filter:includeRegex(F, 'ba.*')"); - test("(F == 'a' || IO == 'b') && F =~ 'ba.*'", "(F == 'a' || IO == 'b') && filter:includeRegex(F, 'ba.*')"); + test("(F == 'a' || F == 'b') && F =~ 'ba.*'", "(F == 'a' || F == 'b') && ((_Eval_ = true) && (F =~ 'ba.*'))"); + test("(F == 'a' || IO == 'b') && F =~ 'ba.*'", "(F == 'a' || IO == 'b') && ((_Eval_ = true) && (F =~ 'ba.*'))"); test("(F == 'a' || NA == 'b') && F =~ 'ba.*'"); - test("(IO == 'a' || IO == 'b') && F =~ 'ba.*'", "(IO == 'a' || IO == 'b') && filter:includeRegex(F, 'ba.*')"); + test("(IO == 'a' || IO == 'b') && F =~ 'ba.*'", "(IO == 'a' || IO == 'b') && ((_Eval_ = true) && (F =~ 'ba.*'))"); test("(IO == 'a' || NA == 'b') && F =~ 'ba.*'"); test("(NA == 'a' || NA == 'b') && F =~ 'ba.*'"); @@ -117,21 +117,21 @@ public void testNestedUnionAndRegex() { test("(NA == 'a' || NA == 'b') && IO =~ 'ba.*'"); // all combinations of nested intersection and non-indexed regex - test("(F == 'a' || F == 'b') && NA =~ 'ba.*'", "(F == 'a' || F == 'b') && filter:includeRegex(NA, 'ba.*')"); - test("(F == 'a' || IO == 'b') && NA =~ 'ba.*'", "(F == 'a' || IO == 'b') && filter:includeRegex(NA, 'ba.*')"); - test("(F == 'a' || NA == 'b') && NA =~ 'ba.*'", "(F == 'a' || NA == 'b') && filter:includeRegex(NA, 'ba.*')"); - test("(IO == 'a' || IO == 'b') && NA =~ 'ba.*'", "(IO == 'a' || IO == 'b') && filter:includeRegex(NA, 'ba.*')"); - test("(IO == 'a' || NA == 'b') && NA =~ 'ba.*'", "(IO == 'a' || NA == 'b') && filter:includeRegex(NA, 'ba.*')"); - test("(NA == 'a' || NA == 'b') && Na =~ 'ba.*'", "(NA == 'a' || NA == 'b') && filter:includeRegex(Na, 'ba.*')"); + test("(F == 'a' || F == 'b') && NA =~ 'ba.*'", "(F == 'a' || F == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("(F == 'a' || IO == 'b') && NA =~ 'ba.*'", "(F == 'a' || IO == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("(F == 'a' || NA == 'b') && NA =~ 'ba.*'", "(F == 'a' || NA == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("(IO == 'a' || IO == 'b') && NA =~ 'ba.*'", "(IO == 'a' || IO == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("(IO == 'a' || NA == 'b') && NA =~ 'ba.*'", "(IO == 'a' || NA == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))"); + test("(NA == 'a' || NA == 'b') && NA =~ 'ba.*'", "(NA == 'a' || NA == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))"); } // A and (B or regex) @Test public void testIntersectionWithNestedUnionWithSingleRegex() { // top level indexed term, variable indexed state for nested term, indexed regex - test("F == 'a' && (F == 'b' || F =~ 'ba.*')", "F == 'a' && (F == 'b' || filter:includeRegex(F, 'ba.*'))"); - test("F == 'a' && (IO == 'b' || F =~ 'ba.*')", "F == 'a' && (IO == 'b' || filter:includeRegex(F, 'ba.*'))"); - test("F == 'a' && (NA == 'b' || F =~ 'ba.*')", "F == 'a' && (NA == 'b' || filter:includeRegex(F, 'ba.*'))"); + test("F == 'a' && (F == 'b' || F =~ 'ba.*')", "F == 'a' && (F == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))"); + test("F == 'a' && (IO == 'b' || F =~ 'ba.*')", "F == 'a' && (IO == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))"); + test("F == 'a' && (NA == 'b' || F =~ 'ba.*')", "F == 'a' && (NA == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))"); // top level indexed term, variable indexed state for nested term, index only regex test("F == 'a' && (F == 'b' || IO =~ 'ba.*')"); @@ -139,14 +139,14 @@ public void testIntersectionWithNestedUnionWithSingleRegex() { test("F == 'a' && (NA == 'b' || IO =~ 'ba.*')"); // top level indexed term, variable indexed state for nested term, non-indexed regex - test("F == 'a' && (F == 'b' || NA =~ 'ba.*')", "F == 'a' && (F == 'b' || filter:includeRegex(NA, 'ba.*'))"); - test("F == 'a' && (IO == 'b' || NA =~ 'ba.*')", "F == 'a' && (IO == 'b' || filter:includeRegex(NA, 'ba.*'))"); - test("F == 'a' && (NA == 'b' || NA =~ 'ba.*')", "F == 'a' && (NA == 'b' || filter:includeRegex(NA, 'ba.*'))"); + test("F == 'a' && (F == 'b' || NA =~ 'ba.*')", "F == 'a' && (F == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))"); + test("F == 'a' && (IO == 'b' || NA =~ 'ba.*')", "F == 'a' && (IO == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))"); + test("F == 'a' && (NA == 'b' || NA =~ 'ba.*')", "F == 'a' && (NA == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))"); // top level index only term, variable indexed state for nested term, indexed regex - test("IO == 'a' && (F == 'b' || F =~ 'ba.*')", "IO == 'a' && (F == 'b' || filter:includeRegex(F, 'ba.*'))"); - test("IO == 'a' && (IO == 'b' || F =~ 'ba.*')", "IO == 'a' && (IO == 'b' || filter:includeRegex(F, 'ba.*'))"); - test("IO == 'a' && (NA == 'b' || F =~ 'ba.*')", "IO == 'a' && (NA == 'b' || filter:includeRegex(F, 'ba.*'))"); + test("IO == 'a' && (F == 'b' || F =~ 'ba.*')", "IO == 'a' && (F == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))"); + test("IO == 'a' && (IO == 'b' || F =~ 'ba.*')", "IO == 'a' && (IO == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))"); + test("IO == 'a' && (NA == 'b' || F =~ 'ba.*')", "IO == 'a' && (NA == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))"); // top level index only term, variable indexed state for nested term, index only regex test("IO == 'a' && (F == 'b' || IO =~ 'ba.*')"); @@ -154,9 +154,9 @@ public void testIntersectionWithNestedUnionWithSingleRegex() { test("IO == 'a' && (NA == 'b' || IO =~ 'ba.*')"); // top level index only term, variable indexed state for nested term, non-indexed regex - test("IO == 'a' && (F == 'b' || NA =~ 'ba.*')", "IO == 'a' && (F == 'b' || filter:includeRegex(NA, 'ba.*'))"); - test("IO == 'a' && (IO == 'b' || NA =~ 'ba.*')", "IO == 'a' && (IO == 'b' || filter:includeRegex(NA, 'ba.*'))"); - test("IO == 'a' && (NA == 'b' || NA =~ 'ba.*')", "IO == 'a' && (NA == 'b' || filter:includeRegex(NA, 'ba.*'))"); + test("IO == 'a' && (F == 'b' || NA =~ 'ba.*')", "IO == 'a' && (F == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))"); + test("IO == 'a' && (IO == 'b' || NA =~ 'ba.*')", "IO == 'a' && (IO == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))"); + test("IO == 'a' && (NA == 'b' || NA =~ 'ba.*')", "IO == 'a' && (NA == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))"); // top level non-indexed term, variable indexed state for nested term, indexed regex test("NA == 'a' && (F == 'b' || F =~ 'ba.*')"); @@ -169,9 +169,9 @@ public void testIntersectionWithNestedUnionWithSingleRegex() { test("NA == 'a' && (NA == 'b' || IO =~ 'ba.*')"); // top level non-indexed term, variable indexed state for nested term, non-indexed regex - test("NA == 'a' && (F == 'b' || NA =~ 'ba.*')", "NA == 'a' && (F == 'b' || filter:includeRegex(NA, 'ba.*'))"); - test("NA == 'a' && (IO == 'b' || NA =~ 'ba.*')", "NA == 'a' && (IO == 'b' || filter:includeRegex(NA, 'ba.*'))"); - test("NA == 'a' && (NA == 'b' || NA =~ 'ba.*')", "NA == 'a' && (NA == 'b' || filter:includeRegex(NA, 'ba.*'))"); + test("NA == 'a' && (F == 'b' || NA =~ 'ba.*')", "NA == 'a' && (F == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))"); + test("NA == 'a' && (IO == 'b' || NA =~ 'ba.*')", "NA == 'a' && (IO == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))"); + test("NA == 'a' && (NA == 'b' || NA =~ 'ba.*')", "NA == 'a' && (NA == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))"); } // A or (B and regex) @@ -227,67 +227,67 @@ public void testUnionWithNestedIntersectionWithSingleRegex() { @Test public void testIntersectionWithNestedUnionOfRegexes() { // indexed term and union of regexes with all possible index states - test("F == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "F == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(F, 'ac.*'))"); - test("F == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "F == 'a' && (filter:includeRegex(F, 'ab.*') || IO =~ 'ac.*')"); - test("F == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(NA, 'ac.*'))"); + test("F == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (F =~ 'ac.*')))"); + test("F == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || IO =~ 'ac.*')"); + test("F == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))"); test("F == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')"); - test("F == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (IO =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))"); - test("F == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (filter:includeRegex(NA, 'ab.*') || filter:includeRegex(NA, 'ac.*'))"); + test("F == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (IO =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))"); + test("F == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (NA =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))"); // index only term and union of regexes with all possible index states - test("IO == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(F, 'ac.*'))"); - test("IO == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(F, 'ab.*') || IO =~ 'ac.*')"); - test("IO == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(NA, 'ac.*'))"); + test("IO == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (F =~ 'ac.*')))"); + test("IO == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || IO =~ 'ac.*')"); + test("IO == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))"); test("IO == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')"); - test("IO == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (IO =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))"); - test("IO == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (filter:includeRegex(NA, 'ab.*') || filter:includeRegex(NA, 'ac.*'))"); + test("IO == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (IO =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))"); + test("IO == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (NA =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))"); // non-indexed tem and union of regexes with all possible index states test("NA == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')"); test("NA == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')"); - test("NA == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (F =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))"); + test("NA == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (F =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))"); test("NA == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')"); - test("NA == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (IO =~ 'ab.*' || filter:includeRegex(NA, 'ac.*'))"); - test("NA == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (filter:includeRegex(NA, 'ab.*') || filter:includeRegex(NA, 'ac.*'))"); + test("NA == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (IO =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))"); + test("NA == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (((_Eval_ = true) && (NA =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))"); } // A or (regex and regex) @Test public void testUnionWithNestedIntersectionOfRegexes() { // indexed term or intersection of regexes with all possible index states - test("F == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "F == 'a' || (filter:includeRegex(F, 'ab.*') && F =~ 'ac.*')"); - test("F == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "F == 'a' || (filter:includeRegex(F, 'ab.*') && IO =~ 'ac.*')"); - test("F == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (F =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))"); + test("F == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "F == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && F =~ 'ac.*')"); + test("F == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "F == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && IO =~ 'ac.*')"); + test("F == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (F =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))"); test("F == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')"); - test("F == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (IO =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))"); - test("F == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (filter:includeRegex(NA, 'ab.*') && filter:includeRegex(NA, 'ac.*'))"); + test("F == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (IO =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))"); + test("F == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (((_Eval_ = true) && (NA =~ 'ab.*')) && ((_Eval_ = true) && (NA =~ 'ac.*')))"); // index only term or intersection of regexes with all possible index states - test("IO == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "IO == 'a' || (filter:includeRegex(F, 'ab.*') && F =~ 'ac.*')"); - test("IO == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "IO == 'a' || (filter:includeRegex(F, 'ab.*') && IO =~ 'ac.*')"); - test("IO == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (F =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))"); + test("IO == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "IO == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && F =~ 'ac.*')"); + test("IO == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "IO == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && IO =~ 'ac.*')"); + test("IO == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (F =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))"); test("IO == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')"); - test("IO == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (IO =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))"); - test("IO == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (filter:includeRegex(NA, 'ab.*') && filter:includeRegex(NA, 'ac.*'))"); + test("IO == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (IO =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))"); + test("IO == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (((_Eval_ = true) && (NA =~ 'ab.*')) && ((_Eval_ = true) && (NA =~ 'ac.*')))"); // non-indexed tem or intersection of regexes with all possible index states - test("NA == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "NA == 'a' || (filter:includeRegex(F, 'ab.*') && F =~ 'ac.*')"); - test("NA == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "NA == 'a' || (filter:includeRegex(F, 'ab.*') && IO =~ 'ac.*')"); - test("NA == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (F =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))"); + test("NA == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "NA == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && F =~ 'ac.*')"); + test("NA == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "NA == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && IO =~ 'ac.*')"); + test("NA == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (F =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))"); test("NA == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')"); - test("NA == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (IO =~ 'ab.*' && filter:includeRegex(NA, 'ac.*'))"); - test("NA == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (filter:includeRegex(NA, 'ab.*') && filter:includeRegex(NA, 'ac.*'))"); + test("NA == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (IO =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))"); + test("NA == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (((_Eval_ = true) && (NA =~ 'ab.*')) && ((_Eval_ = true) && (NA =~ 'ac.*')))"); } // (A or regex) and (B or regex) @Test public void testNestedUnionsWithDistributedRegexes() { String query = "(F == 'a' || F =~ 'ab.*') && (F == 'b' || F =~ 'ac.*')"; - String expected = "(F == 'a' || filter:includeRegex(F, 'ab.*')) && (F == 'b' || F =~ 'ac.*')"; + String expected = "(F == 'a' || ((_Eval_ = true) && (F =~ 'ab.*'))) && (F == 'b' || F =~ 'ac.*')"; test(query, expected); query = "(F == 'a' || NA =~ 'ab.*') && (F == 'b' || F =~ 'ac.*')"; - expected = "(F == 'a' || filter:includeRegex(NA, 'ab.*')) && (F == 'b' || F =~ 'ac.*')"; + expected = "(F == 'a' || ((_Eval_ = true) && (NA =~ 'ab.*'))) && (F == 'b' || F =~ 'ac.*')"; test(query, expected); } @@ -295,7 +295,7 @@ public void testNestedUnionsWithDistributedRegexes() { @Test public void testNestedIntersectionsWithDistributedRegexes() { String query = "(F == 'a' && F =~ 'ab.*') || (F == 'b' && F =~ 'ac.*')"; - String expected = "(F == 'a' && filter:includeRegex(F, 'ab.*')) || (F == 'b' && filter:includeRegex(F, 'ac.*'))"; + String expected = "(F == 'a' && ((_Eval_ = true) && (F =~ 'ab.*'))) || (F == 'b' && ((_Eval_ = true) && (F =~ 'ac.*')))"; test(query, expected); } @@ -303,7 +303,7 @@ public void testNestedIntersectionsWithDistributedRegexes() { @Test public void testPartialAnchorAndNestedUnionRegex() { String query = "(F == 'a' || F == 'b') && (F =~ 'ab.*' || F =~ 'ac.*')"; - String expected = "(F == 'a' || F == 'b') && (filter:includeRegex(F, 'ab.*') || filter:includeRegex(F, 'ac.*'))"; + String expected = "(F == 'a' || F == 'b') && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (F =~ 'ac.*')))"; test(query, expected); } @@ -311,7 +311,7 @@ public void testPartialAnchorAndNestedUnionRegex() { @Test public void testLeftAnchorAndDeeplyNestedRegex() { String query = "F == 'a' && (F == 'b' || (F == 'c' && F =~ 'ab.*'))"; - String expected = "F == 'a' && (F == 'b' || (F == 'c' && filter:includeRegex(F, 'ab.*')))"; + String expected = "F == 'a' && (F == 'b' || (F == 'c' && ((_Eval_ = true) && (F =~ 'ab.*'))))"; test(query, expected); } @@ -319,14 +319,14 @@ public void testLeftAnchorAndDeeplyNestedRegex() { @Test public void testRightAnchorAndDeeplyNestedRegex() { String query = "((F =~ 'ab.*' && F == 'c') || F == 'b') && F == 'a'"; - String expected = "((filter:includeRegex(F, 'ab.*') && F == 'c') || F == 'b') && F == 'a'"; + String expected = "((((_Eval_ = true) && (F =~ 'ab.*')) && F == 'c') || F == 'b') && F == 'a'"; test(query, expected); } @Test public void testUnionOfTwoLegalRewrites() { String query = "(F == 'a' && F =~ 'ab.*') || (F == 'b' && F =~ 'ac.*')"; - String expected = "(F == 'a' && filter:includeRegex(F, 'ab.*')) || (F == 'b' && filter:includeRegex(F, 'ac.*'))"; + String expected = "(F == 'a' && ((_Eval_ = true) && (F =~ 'ab.*'))) || (F == 'b' && ((_Eval_ = true) && (F =~ 'ac.*')))"; test(query, expected); } @@ -340,13 +340,13 @@ public void testUnionOfTwoIllegalRewrites() { @Test public void testIncludeFieldsPreventNoRewrites() { withIncludeFields(Set.of("F", "F2")); - test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && filter:includeRegex(F, 'ab.*') && filter:includeRegex(F2, 'ac.*')"); + test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'ab.*')) && ((_Eval_ = true) && (F2 =~ 'ac.*'))"); } @Test public void testIncludeFieldsPreventSomeLegalRewrites() { withIncludeFields(Set.of("F2")); - test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && F =~ 'ab.*' && filter:includeRegex(F2, 'ac.*')"); + test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && F =~ 'ab.*' && ((_Eval_ = true) && (F2 =~ 'ac.*'))"); } @Test @@ -358,7 +358,7 @@ public void testExcludeFieldsPreventAllLegalRewrites() { @Test public void testExcludeFieldsPreventSomeLegalRewrites() { withExcludeFields(Set.of("F2")); - test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && filter:includeRegex(F, 'ab.*') && F2 =~ 'ac.*'"); + test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'ab.*')) && F2 =~ 'ac.*'"); } @Test @@ -374,7 +374,7 @@ public void testPatternBeatsExcludeFields() { withPattern("F", "zz.*"); withExcludeFields(Set.of("F")); // pattern beats exclude fields - test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && filter:includeRegex(F, 'zz.*')"); + test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'zz.*'))"); } @Test @@ -382,7 +382,7 @@ public void testPatternBeatsIncludeFields() { withPattern("F", "zz.*"); withIncludeFields(Set.of("F2")); // pattern beats include fields - test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && filter:includeRegex(F, 'zz.*')"); + test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'zz.*'))"); } @Test @@ -391,7 +391,7 @@ public void testPatternBeatsIncludeAndExcludeFields() { withIncludeFields(Set.of("F2")); withExcludeFields(Set.of("F")); // pattern beats include fields - test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && filter:includeRegex(F, 'zz.*')"); + test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'zz.*'))"); } /** diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java index 6b63061f7a..a9be296a03 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java @@ -115,8 +115,6 @@ void testMarkers() { // @formatter:off String[] anchorMarkers = new String[] { "((_Bounded_ = true) && (F > '2' && F < '5'))", - "((_Delayed_ = true) && (F == '1'))", - "((_Eval_ = true) && (F == '1'))", "((_List_ = true) && ((id = 'id') && (field = 'F') && (params = '{\"ranges\":[[\"[r1\",\"r2]\"],[\"[r3\",\"f4]\"]]}')))", "((_Value_ = true) && (F =~ 'ba.*'))", "((_Term_ = true) && (_ANYFIELD_ =~ 'ba.*'))" @@ -127,6 +125,8 @@ void testMarkers() { // @formatter:off String[] nonAnchorMarkers = new String[]{ + "((_Delayed_ = true) && (F == '1'))", + "((_Eval_ = true) && (F == '1'))", "((_Hole_ = true) && (F == '1'))", "((_Drop_ = true) && (F == '1'))", "((_Lenient_ = true) && (F == '1'))",