otherCandidates = new LinkedList<>();
+
+ for (int i = 0; i < node.jjtGetNumChildren(); i++) {
+ JexlNode child = node.jjtGetChild(i);
+
+ // this seems expensive, a visitor that returned raw counts, depth, and complexity would nice to have
+ NodeTypeCount counts = NodeTypeCountVisitor.countNodes(child, ASTERNode.class);
+
+ if (anchorDetectionVisitor.isAnchor(child)) {
+ if (counts.getTotal(ASTERNode.class) > 0) {
+ anchorCandidates.add(child);
+ } else {
+ anchorNonCandidates.add(child);
+ }
+ } else if (counts.getTotal(ASTERNode.class) > 0) {
+ otherCandidates.add(child);
+ }
+ }
+
+ if (!anchorCandidates.isEmpty() || !anchorNonCandidates.isEmpty()) {
+
+ if (!anchorNonCandidates.isEmpty()) {
+ // rewrite all anchor candidates
+ for (JexlNode candidate : anchorCandidates) {
+ candidate.jjtAccept(this, true);
+ }
+ } else {
+ // rewrite all anchor candidates except the last one, to preserve executability
+ for (int i = 0; i < anchorCandidates.size() - 1; i++) {
+ anchorCandidates.get(i).jjtAccept(this, true);
+ }
+ }
+
+ // if any anchor exists, rewrite other candidates
+ for (JexlNode otherCandidate : otherCandidates) {
+ otherCandidate.jjtAccept(this, true);
+ }
+ }
+
+ return data;
+ }
+
+ @Override
+ public Object visit(ASTERNode node, Object data) {
+ String field = JexlASTHelper.getIdentifier(node);
+
+ if (isLegalRewrite(field, data)) {
+
+ // once legality of rewrite is established make sure it's not filtered
+ String literal = (String) JexlASTHelper.getLiteralValue(node);
+
+ if (isNodeRewritableFromRules(field, literal)) {
+ JexlNode marker = QueryPropertyMarker.create(node, MarkerType.EVALUATION_ONLY);
+ JexlNodes.replaceChild(node.jjtGetParent(), node, marker);
+ }
+ }
+
+ return data;
+ }
+
+ private boolean isLegalRewrite(String field, Object data) {
+ // never rewrite ANY_FIELD or index-only fields
+ if (field.equals(Constants.ANY_FIELD) || indexOnlyFields.contains(field)) {
+ return false;
+ }
+
+ // 1. anchor exists elsewhere
+ // 2. field is not indexed
+ return data instanceof Boolean || !indexedFields.contains(field);
+ }
+
+ /**
+ * Determine if the node can be rewritten given any configured rules (include fields, exclude fields, patterns)
+ *
+ * @param field
+ * the field
+ * @param literal
+ * the literal
+ * @return true if the node can be rewritten
+ */
+ private boolean isNodeRewritableFromRules(String field, String literal) {
+ // check patterns first because they supersede include/exclude rules
+ for (RegexRewritePattern pattern : patterns) {
+ if (pattern.matches(field, literal)) {
+ return true;
+ }
+ }
+
+ // exclude fields beat include fields
+ if (!excludeFields.isEmpty() && excludeFields.contains(field)) {
+ return false;
+ }
+
+ if (!includeFields.isEmpty()) {
+ return includeFields.contains(field);
+ }
+
+ return true;
+ }
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java
new file mode 100644
index 0000000000..4740b29894
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitor.java
@@ -0,0 +1,208 @@
+package datawave.query.jexl.visitors.pushdown;
+
+import java.util.Set;
+
+import org.apache.commons.jexl3.parser.ASTAndNode;
+import org.apache.commons.jexl3.parser.ASTAssignment;
+import org.apache.commons.jexl3.parser.ASTEQNode;
+import org.apache.commons.jexl3.parser.ASTERNode;
+import org.apache.commons.jexl3.parser.ASTFunctionNode;
+import org.apache.commons.jexl3.parser.ASTGENode;
+import org.apache.commons.jexl3.parser.ASTGTNode;
+import org.apache.commons.jexl3.parser.ASTJexlScript;
+import org.apache.commons.jexl3.parser.ASTLENode;
+import org.apache.commons.jexl3.parser.ASTLTNode;
+import org.apache.commons.jexl3.parser.ASTNENode;
+import org.apache.commons.jexl3.parser.ASTNRNode;
+import org.apache.commons.jexl3.parser.ASTNotNode;
+import org.apache.commons.jexl3.parser.ASTOrNode;
+import org.apache.commons.jexl3.parser.ASTReference;
+import org.apache.commons.jexl3.parser.ASTReferenceExpression;
+import org.apache.commons.jexl3.parser.JexlNode;
+
+import datawave.query.jexl.JexlASTHelper;
+import datawave.query.jexl.nodes.QueryPropertyMarker;
+import datawave.query.jexl.visitors.ShortCircuitBaseVisitor;
+
+/**
+ * Determines if a subtree is an anchor for a given query
+ *
+ * An anchor is defined as an executable leaf or subtree.
+ */
+public class AnchorDetectionVisitor extends ShortCircuitBaseVisitor {
+
+ private final Set indexedFields;
+ private final Set indexOnlyFields;
+
+ /**
+ * Default constructor
+ *
+ * @param indexedFields
+ * the set of indexed query fields
+ * @param indexOnlyFields
+ * the set of index only query fields
+ */
+ public AnchorDetectionVisitor(Set indexedFields, Set indexOnlyFields) {
+ this.indexedFields = indexedFields;
+ this.indexOnlyFields = indexOnlyFields;
+ }
+
+ public boolean isAnchor(JexlNode node) {
+ return (boolean) node.jjtAccept(this, null);
+ }
+
+ // pass through nodes
+
+ @Override
+ public Object visit(ASTJexlScript node, Object data) {
+ return node.jjtGetChild(0).jjtAccept(this, data);
+ }
+
+ @Override
+ public Object visit(ASTReference node, Object data) {
+ return node.jjtGetChild(0).jjtAccept(this, data);
+ }
+
+ @Override
+ public Object visit(ASTReferenceExpression node, Object data) {
+ return node.jjtGetChild(0).jjtAccept(this, data);
+ }
+
+ @Override
+ public Object visit(ASTAssignment node, Object data) {
+ return false;
+ }
+
+ @Override
+ public Object visit(ASTNotNode node, Object data) {
+ return false;
+ }
+
+ // junction nodes
+
+ /**
+ * An OrNode is considered an anchor if and only if all children are anchor nodes
+ *
+ * @param node
+ * a JexlNode
+ * @param data
+ * an Object
+ * @return True if this node is an anchor
+ */
+ @Override
+ public Object visit(ASTOrNode node, Object data) {
+ for (int i = 0; i < node.jjtGetNumChildren(); i++) {
+ boolean childIsAnchor = (boolean) node.jjtGetChild(i).jjtAccept(this, data);
+ if (!childIsAnchor) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * An AndNode is considered an anchor if at least one child node is an anchor
+ *
+ * @param node
+ * a JexlNode
+ * @param data
+ * an Object
+ * @return True if this node is an anchor
+ */
+ @Override
+ public Object visit(ASTAndNode node, Object data) {
+ QueryPropertyMarker.Instance instance = QueryPropertyMarker.findInstance(node);
+ if (instance.isAnyType()) {
+ return visitMarker(instance);
+ }
+
+ for (int i = 0; i < node.jjtGetNumChildren(); i++) {
+ boolean isChildAnchor = (boolean) node.jjtGetChild(i).jjtAccept(this, data);
+ if (isChildAnchor) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // leaf nodes
+
+ @Override
+ public Object visit(ASTEQNode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTNENode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTLTNode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTGTNode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTLENode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTGENode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTERNode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTNRNode node, Object data) {
+ return visitLeaf(node);
+ }
+
+ @Override
+ public Object visit(ASTFunctionNode node, Object data) {
+ return false;
+ }
+
+ private boolean visitLeaf(JexlNode node) {
+ String field = JexlASTHelper.getIdentifier(node, true);
+ if (indexedFields.contains(field) || indexOnlyFields.contains(field)) {
+ if (node instanceof ASTEQNode || node instanceof ASTNENode) {
+ Object value = JexlASTHelper.getLiteralValue(node);
+ return value != null;
+ }
+ return true;
+ }
+ return false;
+ }
+
+ private Object visitMarker(QueryPropertyMarker.Instance instance) {
+
+ if (instance == null || instance.getType() == null) {
+ return false;
+ }
+
+ // might need to handle double markers, such as delayed bounded ranges
+
+ switch (instance.getType()) {
+ case BOUNDED_RANGE:
+ case EXCEEDED_OR:
+ case EXCEEDED_TERM:
+ case EXCEEDED_VALUE:
+ return true;
+ case DELAYED:
+ case EVALUATION_ONLY:
+ default:
+ return false;
+ }
+ }
+
+}
diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
index 8fbebd477e..c3684a2095 100644
--- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
+++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java
@@ -147,6 +147,7 @@
import datawave.query.jexl.visitors.RegexIndexExpansionVisitor;
import datawave.query.jexl.visitors.RewriteNegationsVisitor;
import datawave.query.jexl.visitors.RewriteNullFunctionsVisitor;
+import datawave.query.jexl.visitors.RewriteRegexVisitor;
import datawave.query.jexl.visitors.SetMembershipVisitor;
import datawave.query.jexl.visitors.SortedUIDsRequiredVisitor;
import datawave.query.jexl.visitors.TermCountingVisitor;
@@ -352,6 +353,11 @@ public class DefaultQueryPlanner extends QueryPlanner implements Cloneable {
*/
protected boolean showReducedQueryPrune = true;
+ /**
+ * Controls optimistic rewriting of regex terms as filter functions, preserving overall query executability
+ */
+ private RegexRewriteOptions regexRewriteOptions;
+
// handles boilerplate operations that surround a visitor's execution (e.g., timers, logging, validating)
private TimedVisitorManager visitorManager = new TimedVisitorManager();
@@ -387,6 +393,7 @@ protected DefaultQueryPlanner(DefaultQueryPlanner other) {
rangeStreamClass = other.rangeStreamClass;
setSourceLimit(other.sourceLimit);
setPushdownThreshold(other.getPushdownThreshold());
+ setRegexRewriteOptions(other.getRegexRewriteOptions());
setVisitorManager(other.getVisitorManager());
setTransformRules(other.getTransformRules() == null ? null : new ArrayList<>(other.transformRules));
}
@@ -903,6 +910,16 @@ protected ASTJexlScript updateQueryTree(ScannerFactory scannerFactory, MetadataH
config.setQueryTree(timedEnforceUniqueDisjunctionsWithinExpressions(timers, config.getQueryTree()));
}
+ // rewrite regex nodes, optimistically
+ if (regexRewriteOptions != null && regexRewriteOptions.isPreExpansionEnabled()) {
+ // @formatter:off
+ RewriteRegexVisitor.rewrite(config.getQueryTree(), getIndexedFields(), indexOnlyFields,
+ regexRewriteOptions.getPreExpansionIncludeFields(),
+ regexRewriteOptions.getPreExpansionExcludeFields(),
+ regexRewriteOptions.getPreExpansionPatterns());
+ // @formatter:on
+ }
+
if (disableBoundedLookup) {
// protection mechanism. If we disable bounded ranges and have a
// LT,GT or ER node, we should expand it
@@ -1057,6 +1074,16 @@ protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, Shard
config.setQueryTree(timedPushFunctions(timers, config.getQueryTree(), config, metadataHelper));
}
+ // rewrite regex nodes, optimistically
+ if (regexRewriteOptions != null && regexRewriteOptions.isPostExpansionEnabled()) {
+ // @formatter:off
+ RewriteRegexVisitor.rewrite(config.getQueryTree(), indexedFields, indexOnlyFields,
+ regexRewriteOptions.getPostExpansionIncludeFields(),
+ regexRewriteOptions.getPostExpansionExcludeFields(),
+ regexRewriteOptions.getPostExpansionPatterns());
+ // @formatter:on
+ }
+
if (executableExpansion) {
config.setQueryTree(timedExecutableExpansion(timers, config.getQueryTree(), config, metadataHelper));
}
@@ -3278,6 +3305,14 @@ public void finalize() {
}
}
+ public RegexRewriteOptions getRegexRewriteOptions() {
+ return regexRewriteOptions;
+ }
+
+ public void setRegexRewriteOptions(RegexRewriteOptions regexRewriteOptions) {
+ this.regexRewriteOptions = regexRewriteOptions;
+ }
+
protected CompositeMetadata getCompositeMetadata() {
if (compositeMetadata == null && compositeMetadataCallable != null) {
TraceStopwatch stopwatch = stageStopWatch.newStartedStopwatch(compositeMetadataCallable.stageName());
diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/RegexRewriteOptions.java b/warehouse/query-core/src/main/java/datawave/query/planner/RegexRewriteOptions.java
new file mode 100644
index 0000000000..966a957ec8
--- /dev/null
+++ b/warehouse/query-core/src/main/java/datawave/query/planner/RegexRewriteOptions.java
@@ -0,0 +1,87 @@
+package datawave.query.planner;
+
+import java.util.Collections;
+import java.util.Set;
+
+import datawave.query.jexl.visitors.RegexRewritePattern;
+import datawave.query.jexl.visitors.RewriteRegexVisitor;
+
+/**
+ * Provides fine-grain control over how the {@link RewriteRegexVisitor} operates pre and post index expansion
+ */
+public class RegexRewriteOptions {
+
+ private boolean preExpansionEnabled = false;
+ private Set preExpansionIncludeFields = Collections.emptySet();
+ private Set preExpansionExcludeFields = Collections.emptySet();
+ private Set preExpansionPatterns = Collections.emptySet();
+
+ private boolean postExpansionEnabled = false;
+ private Set postExpansionIncludeFields = Collections.emptySet();
+ private Set postExpansionExcludeFields = Collections.emptySet();
+ private Set postExpansionPatterns = Collections.emptySet();
+
+ public boolean isPreExpansionEnabled() {
+ return preExpansionEnabled;
+ }
+
+ public void setPreExpansionEnabled(boolean preExpansionEnabled) {
+ this.preExpansionEnabled = preExpansionEnabled;
+ }
+
+ public Set getPreExpansionIncludeFields() {
+ return preExpansionIncludeFields;
+ }
+
+ public void setPreExpansionIncludeFields(Set preExpansionIncludeFields) {
+ this.preExpansionIncludeFields = preExpansionIncludeFields;
+ }
+
+ public Set getPreExpansionExcludeFields() {
+ return preExpansionExcludeFields;
+ }
+
+ public void setPreExpansionExcludeFields(Set preExpansionExcludeFields) {
+ this.preExpansionExcludeFields = preExpansionExcludeFields;
+ }
+
+ public Set getPreExpansionPatterns() {
+ return preExpansionPatterns;
+ }
+
+ public void setPreExpansionPatterns(Set preExpansionPatterns) {
+ this.preExpansionPatterns = preExpansionPatterns;
+ }
+
+ public boolean isPostExpansionEnabled() {
+ return postExpansionEnabled;
+ }
+
+ public void setPostExpansionEnabled(boolean postExpansionEnabled) {
+ this.postExpansionEnabled = postExpansionEnabled;
+ }
+
+ public Set getPostExpansionIncludeFields() {
+ return postExpansionIncludeFields;
+ }
+
+ public void setPostExpansionIncludeFields(Set postExpansionIncludeFields) {
+ this.postExpansionIncludeFields = postExpansionIncludeFields;
+ }
+
+ public Set getPostExpansionExcludeFields() {
+ return postExpansionExcludeFields;
+ }
+
+ public void setPostExpansionExcludeFields(Set postExpansionExcludeFields) {
+ this.postExpansionExcludeFields = postExpansionExcludeFields;
+ }
+
+ public Set getPostExpansionPatterns() {
+ return postExpansionPatterns;
+ }
+
+ public void setPostExpansionPatterns(Set postExpansionPatterns) {
+ this.postExpansionPatterns = postExpansionPatterns;
+ }
+}
diff --git a/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java b/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java
index a44295201e..6aa887d348 100644
--- a/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java
+++ b/warehouse/query-core/src/test/java/datawave/query/ShapesTest.java
@@ -908,11 +908,34 @@ public void testSortQueryPreIndexWithFieldCounts() throws Exception {
}
}
+ @Test
+ public void testRewriteRegexFromIncludes() throws Exception {
+ withQuery("ONLY_HEX == 'hexa' && TYPE =~ 'reg.*'");
+ withExpected(Sets.newHashSet(ShapesIngest.hexagonUid));
+ planAndExecuteQuery();
+ assertPlannedQuery("ONLY_HEX == 'hexa' && ((_Eval_ = true) && (TYPE =~ 'reg.*'))");
+ }
+
+ @Test
+ public void testDoNotRewriteRegexWithExcludedField() throws Exception {
+ withQuery("ONLY_HEX == 'hexa' && SHAPE =~ 'hex.*'");
+ withExpected(Sets.newHashSet(ShapesIngest.hexagonUid));
+ planAndExecuteQuery();
+ assertPlannedQuery("ONLY_HEX == 'hexa' && ((_Delayed_ = true) && (SHAPE =~ 'hex.*'))");
+ }
+
+ @Test
+ public void testRewriteRegexWithExcludedFieldBecauseOfPatternMatch() throws Exception {
+ withQuery("ONLY_HEX == 'hexa' && SHAPE =~ 'hexag.*'");
+ withExpected(Sets.newHashSet(ShapesIngest.hexagonUid));
+ planAndExecuteQuery();
+ assertPlannedQuery("ONLY_HEX == 'hexa' && ((_Eval_ = true) && (SHAPE =~ 'hexag.*'))");
+ }
+
private void disableAllSortOptions() {
logic.setSortQueryPreIndexWithImpliedCounts(false);
logic.setSortQueryPreIndexWithFieldCounts(false);
logic.setSortQueryPostIndexWithFieldCounts(false);
logic.setSortQueryPostIndexWithTermCounts(false);
}
-
}
diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java
new file mode 100644
index 0000000000..e77c8d6fc5
--- /dev/null
+++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/RewriteRegexVisitorTest.java
@@ -0,0 +1,442 @@
+package datawave.query.jexl.visitors;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.commons.jexl3.parser.ASTJexlScript;
+import org.apache.commons.jexl3.parser.ParseException;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import datawave.query.jexl.JexlASTHelper;
+
+public class RewriteRegexVisitorTest {
+
+ private final Set indexedFields = Set.of("F", "F2", "IO", "IO2");
+ private final Set indexOnlyFields = Set.of("IO", "IO2");
+
+ private final Set includeFields = new HashSet<>();
+ private final Set excludeFields = new HashSet<>();
+
+ private final Set patterns = new HashSet<>();
+
+ @BeforeEach
+ public void beforeEach() {
+ includeFields.clear();
+ excludeFields.clear();
+ patterns.clear();
+ }
+
+ // A and regex
+ @Test
+ public void testSingleTermAndRegex() {
+ // term and indexed regex
+ test("F == 'a' && F =~ 'ba.*'", "F == 'a' && ((_Eval_ = true) && (F =~ 'ba.*'))");
+ test("IO == 'a' && F =~ 'ba.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'ba.*'))");
+ test("NA == 'a' && F =~ 'ba.*'");
+
+ // term and index only regex is never rewritten
+ test("F == 'a' && IO =~ 'ba.*'");
+ test("IO == 'a' && IO =~ 'ba.*'");
+ test("NA == 'a' && IO =~ 'ba.*'");
+
+ // term and non-indexed regex is always rewritten
+ test("F == 'a' && NA =~ 'ba.*'", "F == 'a' && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("IO == 'a' && NA =~ 'ba.*'", "IO == 'a' && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("NA == 'a' && NA =~ 'ba.*'", "NA == 'a' && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ }
+
+ // A or regex
+ @Test
+ public void testSingleTermOrRegex() {
+ // term or indexed regex is never rewritten
+ test("F == 'a' || F =~ 'ba.*'");
+ test("IO == 'a' || F =~ 'ba.*'");
+ test("NA == 'a' || F =~ 'ba.*'");
+
+ // term or index only regex is never rewritten
+ test("F == 'a' || IO =~ 'ba.*'");
+ test("IO == 'a' || IO =~ 'ba.*'");
+ test("NA == 'a' || IO =~ 'ba.*'");
+
+ // top level union with non-indexed regex is a full table scan, do not rewrite
+ test("F == 'a' || NA =~ 'ba.*'", "F == 'a' || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("IO == 'a' || NA =~ 'ba.*'", "IO == 'a' || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("NA == 'a' || NA =~ 'ba.*'", "NA == 'a' || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ }
+
+ // (A and B) or regex
+ @Test
+ public void testNestedIntersectionOrRegex() {
+ // all combinations of nested intersection and indexed regex
+ test("(F == 'a' && F == 'b') || F =~ 'ba.*'");
+ test("(F == 'a' && IO == 'b') || F =~ 'ba.*'");
+ test("(F == 'a' && NA == 'b') || F =~ 'ba.*'");
+ test("(IO == 'a' && IO == 'b') || F =~ 'ba.*'");
+ test("(IO == 'a' && NA == 'b') || F =~ 'ba.*'");
+ test("(NA == 'a' && NA == 'b') || F =~ 'ba.*'");
+
+ // all combinations of nested intersection and index only regex
+ test("(F == 'a' && F == 'b') || IO =~ 'ba.*'");
+ test("(F == 'a' && IO == 'b') || IO =~ 'ba.*'");
+ test("(F == 'a' && NA == 'b') || IO =~ 'ba.*'");
+ test("(IO == 'a' && IO == 'b') || IO =~ 'ba.*'");
+ test("(IO == 'a' && NA == 'b') || IO =~ 'ba.*'");
+ test("(NA == 'a' && NA == 'b') || IO =~ 'ba.*'");
+
+ // the input queries are non-executable, non-indexed field still gets rewritten
+ // all combinations of nested intersection and non-indexed regex
+ test("(F == 'a' && F == 'b') || NA =~ 'ba.*'", "(F == 'a' && F == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(F == 'a' && IO == 'b') || NA =~ 'ba.*'", "(F == 'a' && IO == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(F == 'a' && NA == 'b') || NA =~ 'ba.*'", "(F == 'a' && NA == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(IO == 'a' && IO == 'b') || NA =~ 'ba.*'", "(IO == 'a' && IO == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(IO == 'a' && NA == 'b') || NA =~ 'ba.*'", "(IO == 'a' && NA == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(NA == 'a' && NA == 'b') || NA =~ 'ba.*'", "(NA == 'a' && NA == 'b') || ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ }
+
+ // (A or B) and regex
+ @Test
+ public void testNestedUnionAndRegex() {
+ // all combinations of nested intersection and indexed regex
+ test("(F == 'a' || F == 'b') && F =~ 'ba.*'", "(F == 'a' || F == 'b') && ((_Eval_ = true) && (F =~ 'ba.*'))");
+ test("(F == 'a' || IO == 'b') && F =~ 'ba.*'", "(F == 'a' || IO == 'b') && ((_Eval_ = true) && (F =~ 'ba.*'))");
+ test("(F == 'a' || NA == 'b') && F =~ 'ba.*'");
+ test("(IO == 'a' || IO == 'b') && F =~ 'ba.*'", "(IO == 'a' || IO == 'b') && ((_Eval_ = true) && (F =~ 'ba.*'))");
+ test("(IO == 'a' || NA == 'b') && F =~ 'ba.*'");
+ test("(NA == 'a' || NA == 'b') && F =~ 'ba.*'");
+
+ // all combinations of nested intersection and index only regex
+ test("(F == 'a' || F == 'b') && IO =~ 'ba.*'");
+ test("(F == 'a' || IO == 'b') && IO =~ 'ba.*'");
+ test("(F == 'a' || NA == 'b') && IO =~ 'ba.*'");
+ test("(IO == 'a' || IO == 'b') && IO =~ 'ba.*'");
+ test("(IO == 'a' || NA == 'b') && IO =~ 'ba.*'");
+ test("(NA == 'a' || NA == 'b') && IO =~ 'ba.*'");
+
+ // all combinations of nested intersection and non-indexed regex
+ test("(F == 'a' || F == 'b') && NA =~ 'ba.*'", "(F == 'a' || F == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(F == 'a' || IO == 'b') && NA =~ 'ba.*'", "(F == 'a' || IO == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(F == 'a' || NA == 'b') && NA =~ 'ba.*'", "(F == 'a' || NA == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(IO == 'a' || IO == 'b') && NA =~ 'ba.*'", "(IO == 'a' || IO == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(IO == 'a' || NA == 'b') && NA =~ 'ba.*'", "(IO == 'a' || NA == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ test("(NA == 'a' || NA == 'b') && NA =~ 'ba.*'", "(NA == 'a' || NA == 'b') && ((_Eval_ = true) && (NA =~ 'ba.*'))");
+ }
+
+ // A and (B or regex)
+ @Test
+ public void testIntersectionWithNestedUnionWithSingleRegex() {
+ // top level indexed term, variable indexed state for nested term, indexed regex
+ test("F == 'a' && (F == 'b' || F =~ 'ba.*')", "F == 'a' && (F == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
+ test("F == 'a' && (IO == 'b' || F =~ 'ba.*')", "F == 'a' && (IO == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
+ test("F == 'a' && (NA == 'b' || F =~ 'ba.*')", "F == 'a' && (NA == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
+
+ // top level indexed term, variable indexed state for nested term, index only regex
+ test("F == 'a' && (F == 'b' || IO =~ 'ba.*')");
+ test("F == 'a' && (IO == 'b' || IO =~ 'ba.*')");
+ test("F == 'a' && (NA == 'b' || IO =~ 'ba.*')");
+
+ // top level indexed term, variable indexed state for nested term, non-indexed regex
+ test("F == 'a' && (F == 'b' || NA =~ 'ba.*')", "F == 'a' && (F == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("F == 'a' && (IO == 'b' || NA =~ 'ba.*')", "F == 'a' && (IO == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("F == 'a' && (NA == 'b' || NA =~ 'ba.*')", "F == 'a' && (NA == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+
+ // top level index only term, variable indexed state for nested term, indexed regex
+ test("IO == 'a' && (F == 'b' || F =~ 'ba.*')", "IO == 'a' && (F == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
+ test("IO == 'a' && (IO == 'b' || F =~ 'ba.*')", "IO == 'a' && (IO == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
+ test("IO == 'a' && (NA == 'b' || F =~ 'ba.*')", "IO == 'a' && (NA == 'b' || ((_Eval_ = true) && (F =~ 'ba.*')))");
+
+ // top level index only term, variable indexed state for nested term, index only regex
+ test("IO == 'a' && (F == 'b' || IO =~ 'ba.*')");
+ test("IO == 'a' && (IO == 'b' || IO =~ 'ba.*')");
+ test("IO == 'a' && (NA == 'b' || IO =~ 'ba.*')");
+
+ // top level index only term, variable indexed state for nested term, non-indexed regex
+ test("IO == 'a' && (F == 'b' || NA =~ 'ba.*')", "IO == 'a' && (F == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("IO == 'a' && (IO == 'b' || NA =~ 'ba.*')", "IO == 'a' && (IO == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("IO == 'a' && (NA == 'b' || NA =~ 'ba.*')", "IO == 'a' && (NA == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+
+ // top level non-indexed term, variable indexed state for nested term, indexed regex
+ test("NA == 'a' && (F == 'b' || F =~ 'ba.*')");
+ test("NA == 'a' && (IO == 'b' || F =~ 'ba.*')");
+ test("NA == 'a' && (NA == 'b' || F =~ 'ba.*')");
+
+ // top level non-indexed term, variable indexed state for nested term, index only regex
+ test("NA == 'a' && (F == 'b' || IO =~ 'ba.*')");
+ test("NA == 'a' && (IO == 'b' || IO =~ 'ba.*')");
+ test("NA == 'a' && (NA == 'b' || IO =~ 'ba.*')");
+
+ // top level non-indexed term, variable indexed state for nested term, non-indexed regex
+ test("NA == 'a' && (F == 'b' || NA =~ 'ba.*')", "NA == 'a' && (F == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("NA == 'a' && (IO == 'b' || NA =~ 'ba.*')", "NA == 'a' && (IO == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ test("NA == 'a' && (NA == 'b' || NA =~ 'ba.*')", "NA == 'a' && (NA == 'b' || ((_Eval_ = true) && (NA =~ 'ba.*')))");
+ }
+
+ // A or (B and regex)
+ @Test
+ public void testUnionWithNestedIntersectionWithSingleRegex() {
+ // top level indexed, variable index state of nested term, indexed regex
+ test("F == 'a' || (F == 'b' && F == 'ab.*')");
+ test("F == 'a' || (IO == 'b' && F == 'ab.*')");
+ test("F == 'a' || (NA == 'b' && F == 'ab.*')");
+
+ // top level indexed, variable index state of nested term, index only regex
+ test("F == 'a' || (F == 'b' && IO == 'ab.*')");
+ test("F == 'a' || (IO == 'b' && IO == 'ab.*')");
+ test("F == 'a' || (NA == 'b' && IO == 'ab.*')");
+
+ // top level indexed, variable index state of nested term, non-indexed regex
+ test("F == 'a' || (F == 'b' && NA == 'ab.*')");
+ test("F == 'a' || (IO == 'b' && NA == 'ab.*')");
+ test("F == 'a' || (NA == 'b' && NA == 'ab.*')");
+
+ // top level index only, variable index state of nested term, indexed regex
+ test("IO == 'a' || (F == 'b' && F == 'ab.*')");
+ test("IO == 'a' || (IO == 'b' && F == 'ab.*')");
+ test("IO == 'a' || (NA == 'b' && F == 'ab.*')");
+
+ // top level index only, variable index state of nested term, index only regex
+ test("IO == 'a' || (F == 'b' && IO == 'ab.*')");
+ test("IO == 'a' || (IO == 'b' && IO == 'ab.*')");
+ test("IO == 'a' || (NA == 'b' && IO == 'ab.*')");
+
+ // top level index only, variable index state of nested term, non-indexed regex
+ test("IO == 'a' || (F == 'b' && NA == 'ab.*')");
+ test("IO == 'a' || (IO == 'b' && NA == 'ab.*')");
+ test("IO == 'a' || (NA == 'b' && NA == 'ab.*')");
+
+ // top level non-indexed, variable index state of nested term, indexed regex
+ test("NA == 'a' || (F == 'b' && F == 'ab.*')");
+ test("NA == 'a' || (IO == 'b' && F == 'ab.*')");
+ test("NA == 'a' || (NA == 'b' && F == 'ab.*')");
+
+ // top level non-indexed, variable index state of nested term, index only regex
+ test("NA == 'a' || (F == 'b' && IO == 'ab.*')");
+ test("NA == 'a' || (IO == 'b' && IO == 'ab.*')");
+ test("NA == 'a' || (NA == 'b' && IO == 'ab.*')");
+
+ // top level non-indexed, variable index state of nested term, non-indexed regex
+ test("NA == 'a' || (F == 'b' && NA == 'ab.*')");
+ test("NA == 'a' || (IO == 'b' && NA == 'ab.*')");
+ test("NA == 'a' || (NA == 'b' && NA == 'ab.*')");
+ }
+
+ // A and (regex or regex)
+ @Test
+ public void testIntersectionWithNestedUnionOfRegexes() {
+ // indexed term and union of regexes with all possible index states
+ test("F == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (F =~ 'ac.*')))");
+ test("F == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || IO =~ 'ac.*')");
+ test("F == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("F == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')");
+ test("F == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (IO =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("F == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "F == 'a' && (((_Eval_ = true) && (NA =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+
+ // index only term and union of regexes with all possible index states
+ test("IO == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (F =~ 'ac.*')))");
+ test("IO == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || IO =~ 'ac.*')");
+ test("IO == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("IO == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')");
+ test("IO == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (IO =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("IO == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "IO == 'a' && (((_Eval_ = true) && (NA =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+
+ // non-indexed tem and union of regexes with all possible index states
+ test("NA == 'a' && (F =~ 'ab.*' || F =~ 'ac.*')");
+ test("NA == 'a' && (F =~ 'ab.*' || IO =~ 'ac.*')");
+ test("NA == 'a' && (F =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (F =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("NA == 'a' && (IO =~ 'ab.*' || IO =~ 'ac.*')");
+ test("NA == 'a' && (IO =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (IO =~ 'ab.*' || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("NA == 'a' && (NA =~ 'ab.*' || NA =~ 'ac.*')", "NA == 'a' && (((_Eval_ = true) && (NA =~ 'ab.*')) || ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ }
+
+ // A or (regex and regex)
+ @Test
+ public void testUnionWithNestedIntersectionOfRegexes() {
+ // indexed term or intersection of regexes with all possible index states
+ test("F == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "F == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && F =~ 'ac.*')");
+ test("F == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "F == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && IO =~ 'ac.*')");
+ test("F == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (F =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("F == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')");
+ test("F == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (IO =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("F == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "F == 'a' || (((_Eval_ = true) && (NA =~ 'ab.*')) && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+
+ // index only term or intersection of regexes with all possible index states
+ test("IO == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "IO == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && F =~ 'ac.*')");
+ test("IO == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "IO == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && IO =~ 'ac.*')");
+ test("IO == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (F =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("IO == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')");
+ test("IO == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (IO =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("IO == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "IO == 'a' || (((_Eval_ = true) && (NA =~ 'ab.*')) && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+
+ // non-indexed tem or intersection of regexes with all possible index states
+ test("NA == 'a' || (F =~ 'ab.*' && F =~ 'ac.*')", "NA == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && F =~ 'ac.*')");
+ test("NA == 'a' || (F =~ 'ab.*' && IO =~ 'ac.*')", "NA == 'a' || (((_Eval_ = true) && (F =~ 'ab.*')) && IO =~ 'ac.*')");
+ test("NA == 'a' || (F =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (F =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("NA == 'a' || (IO =~ 'ab.*' && IO =~ 'ac.*')");
+ test("NA == 'a' || (IO =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (IO =~ 'ab.*' && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ test("NA == 'a' || (NA =~ 'ab.*' && NA =~ 'ac.*')", "NA == 'a' || (((_Eval_ = true) && (NA =~ 'ab.*')) && ((_Eval_ = true) && (NA =~ 'ac.*')))");
+ }
+
+ // (A or regex) and (B or regex)
+ @Test
+ public void testNestedUnionsWithDistributedRegexes() {
+ String query = "(F == 'a' || F =~ 'ab.*') && (F == 'b' || F =~ 'ac.*')";
+ String expected = "(F == 'a' || ((_Eval_ = true) && (F =~ 'ab.*'))) && (F == 'b' || F =~ 'ac.*')";
+ test(query, expected);
+
+ query = "(F == 'a' || NA =~ 'ab.*') && (F == 'b' || F =~ 'ac.*')";
+ expected = "(F == 'a' || ((_Eval_ = true) && (NA =~ 'ab.*'))) && (F == 'b' || F =~ 'ac.*')";
+ test(query, expected);
+ }
+
+ // (A and regex) or (B and regex)
+ @Test
+ public void testNestedIntersectionsWithDistributedRegexes() {
+ String query = "(F == 'a' && F =~ 'ab.*') || (F == 'b' && F =~ 'ac.*')";
+ String expected = "(F == 'a' && ((_Eval_ = true) && (F =~ 'ab.*'))) || (F == 'b' && ((_Eval_ = true) && (F =~ 'ac.*')))";
+ test(query, expected);
+ }
+
+ // (A or B) and (regex or regex)
+ @Test
+ public void testPartialAnchorAndNestedUnionRegex() {
+ String query = "(F == 'a' || F == 'b') && (F =~ 'ab.*' || F =~ 'ac.*')";
+ String expected = "(F == 'a' || F == 'b') && (((_Eval_ = true) && (F =~ 'ab.*')) || ((_Eval_ = true) && (F =~ 'ac.*')))";
+ test(query, expected);
+ }
+
+ // A and (B or (C and regex)
+ @Test
+ public void testLeftAnchorAndDeeplyNestedRegex() {
+ String query = "F == 'a' && (F == 'b' || (F == 'c' && F =~ 'ab.*'))";
+ String expected = "F == 'a' && (F == 'b' || (F == 'c' && ((_Eval_ = true) && (F =~ 'ab.*'))))";
+ test(query, expected);
+ }
+
+ // ((regex and C) or B) and A
+ @Test
+ public void testRightAnchorAndDeeplyNestedRegex() {
+ String query = "((F =~ 'ab.*' && F == 'c') || F == 'b') && F == 'a'";
+ String expected = "((((_Eval_ = true) && (F =~ 'ab.*')) && F == 'c') || F == 'b') && F == 'a'";
+ test(query, expected);
+ }
+
+ @Test
+ public void testUnionOfTwoLegalRewrites() {
+ String query = "(F == 'a' && F =~ 'ab.*') || (F == 'b' && F =~ 'ac.*')";
+ String expected = "(F == 'a' && ((_Eval_ = true) && (F =~ 'ab.*'))) || (F == 'b' && ((_Eval_ = true) && (F =~ 'ac.*')))";
+ test(query, expected);
+ }
+
+ // (NA and regex) or (NA and regex)
+ @Test
+ public void testUnionOfTwoIllegalRewrites() {
+ String query = "(NA == 'a' && F =~ 'ab.*') || (NA == 'b' && F =~ 'ac.*')";
+ test(query);
+ }
+
+ @Test
+ public void testIncludeFieldsPreventNoRewrites() {
+ withIncludeFields(Set.of("F", "F2"));
+ test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'ab.*')) && ((_Eval_ = true) && (F2 =~ 'ac.*'))");
+ }
+
+ @Test
+ public void testIncludeFieldsPreventSomeLegalRewrites() {
+ withIncludeFields(Set.of("F2"));
+ test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && F =~ 'ab.*' && ((_Eval_ = true) && (F2 =~ 'ac.*'))");
+ }
+
+ @Test
+ public void testExcludeFieldsPreventAllLegalRewrites() {
+ withExcludeFields(Set.of("F", "F2"));
+ test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'");
+ }
+
+ @Test
+ public void testExcludeFieldsPreventSomeLegalRewrites() {
+ withExcludeFields(Set.of("F2"));
+ test("IO == 'a' && F =~ 'ab.*' && F2 =~ 'ac.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'ab.*')) && F2 =~ 'ac.*'");
+ }
+
+ @Test
+ public void testFullyInclusiveIncludeAndExcludeFields() {
+ withIncludeFields(Set.of("F"));
+ withExcludeFields(Set.of("F"));
+ // exclude fields beats include fields
+ test("IO == 'a' && F =~ 'ab.*'");
+ }
+
+ @Test
+ public void testPatternBeatsExcludeFields() {
+ withPattern("F", "zz.*");
+ withExcludeFields(Set.of("F"));
+ // pattern beats exclude fields
+ test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'zz.*'))");
+ }
+
+ @Test
+ public void testPatternBeatsIncludeFields() {
+ withPattern("F", "zz.*");
+ withIncludeFields(Set.of("F2"));
+ // pattern beats include fields
+ test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'zz.*'))");
+ }
+
+ @Test
+ public void testPatternBeatsIncludeAndExcludeFields() {
+ withPattern("F", "zz.*");
+ withIncludeFields(Set.of("F2"));
+ withExcludeFields(Set.of("F"));
+ // pattern beats include fields
+ test("IO == 'a' && F =~ 'zz.*'", "IO == 'a' && ((_Eval_ = true) && (F =~ 'zz.*'))");
+ }
+
+ /**
+ * Assert that the provided query does not change
+ *
+ * @param query
+ * the query
+ */
+ private void test(String query) {
+ test(query, query);
+ }
+
+ /**
+ * Assert that the provided query matches the expected query after the {@link RewriteRegexVisitor} is applied
+ *
+ * @param query
+ * the query
+ * @param expected
+ * the expected result
+ */
+ private void test(String query, String expected) {
+ ASTJexlScript script = parse(query);
+ RewriteRegexVisitor.rewrite(script, indexedFields, indexOnlyFields, includeFields, excludeFields, patterns);
+ String result = JexlStringBuildingVisitor.buildQuery(script);
+ assertEquals(expected, result);
+ }
+
+ private ASTJexlScript parse(String query) {
+ try {
+ return JexlASTHelper.parseAndFlattenJexlQuery(query);
+ } catch (ParseException e) {
+ fail("Failed to parse query: " + query, e);
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void withIncludeFields(Set includeFields) {
+ this.includeFields.addAll(includeFields);
+ }
+
+ private void withExcludeFields(Set excludeFields) {
+ this.excludeFields.addAll(excludeFields);
+ }
+
+ private void withPattern(String field, String literal) {
+ patterns.add(new RegexRewritePattern(field, literal));
+ }
+}
diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java
new file mode 100644
index 0000000000..a9be296a03
--- /dev/null
+++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/pushdown/AnchorDetectionVisitorTest.java
@@ -0,0 +1,274 @@
+package datawave.query.jexl.visitors.pushdown;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.commons.jexl3.parser.ASTJexlScript;
+import org.apache.commons.jexl3.parser.JexlNode;
+import org.junit.jupiter.api.Test;
+
+import datawave.query.jexl.JexlASTHelper;
+
+class AnchorDetectionVisitorTest {
+
+ private final Set indexOnlyFields = Collections.singleton("IO");
+ private final Set indexedFields = Collections.singleton("F");
+ private AnchorDetectionVisitor visitor;
+
+ @Test
+ void testIndexedLeaves() {
+ // @formatter:off
+ String[] queries = new String[]{
+ "F == '1'",
+ "F != '1'",
+ "F < '2'",
+ "F > '2'",
+ "F <= '2'",
+ "F >= '2'",
+ "F =~ 'ba.*'",
+ "F !~ 'ba.*'",
+ };
+ // @formatter:on
+
+ test(queries, true);
+ }
+
+ @Test
+ void testIndexOnlyLeaves() {
+ // @formatter:off
+ String[] queries = new String[]{
+ "IO == '1'",
+ "IO != '1'",
+ "IO < '2'",
+ "IO > '2'",
+ "IO <= '2'",
+ "IO >= '2'",
+ "IO =~ 'ba.*'",
+ "IO !~ 'ba.*'",
+ };
+ // @formatter:on
+
+ test(queries, true);
+ }
+
+ @Test
+ void testNonIndexedLeaves() {
+ // @formatter:off
+ String[] queries = new String[]{
+ "FIELD == '1'",
+ "FIELD != '1'",
+ "FIELD < '2'",
+ "FIELD > '2'",
+ "FIELD <= '2'",
+ "FIELD >= '2'",
+ "FIELD =~ 'ba.*'",
+ "FIELD !~ 'ba.*'",
+ };
+ // @formatter:on
+
+ test(queries, false);
+ }
+
+ @Test
+ void testNullLiterals() {
+ test("F == null", false);
+ test("F != null", false);
+ test("IO == null", false);
+ test("IO != null", false);
+ test("FIELD == null", false);
+ test("FIELD != null", false);
+ }
+
+ @Test
+ void testFilterFunctions() {
+ // @formatter:off
+ String[] queries = new String[]{
+ // index only include/exclude are rewritten to regex nodes
+ "filter:include(F, 'ba.*')",
+ "filter:exclude(F, 'ba.*')",
+ "filter:include(FIELD, 'ba.*')",
+ "filter:exclude(FIELD, 'ba.*')",
+ // isNull functions should be rewritten to 'F == null'
+ "filter:isNull(F)",
+ "filter:isNull(F)",
+ "filter:isNull(FIELD)",
+ "filter:isNull(FIELD)",
+ // isNotNull functions should be rewritten to !(F == null)
+ "filter:isNotNull(F)",
+ "filter:isNotNull(F)",
+ "filter:isNotNull(FIELD)",
+ "filter:isNotNull(FIELD)",
+ "filter:compare(F,'==','any',F)",
+ "filter:compare(IO,'==','any',IO)",
+ "filter:compare(FIELD,'==','any',FIELD)",
+ };
+ // @formatter:on
+
+ test(queries, false);
+ }
+
+ @Test
+ void testMarkers() {
+ // @formatter:off
+ String[] anchorMarkers = new String[] {
+ "((_Bounded_ = true) && (F > '2' && F < '5'))",
+ "((_List_ = true) && ((id = 'id') && (field = 'F') && (params = '{\"ranges\":[[\"[r1\",\"r2]\"],[\"[r3\",\"f4]\"]]}')))",
+ "((_Value_ = true) && (F =~ 'ba.*'))",
+ "((_Term_ = true) && (_ANYFIELD_ =~ 'ba.*'))"
+ };
+ // @formatter:on
+
+ test(anchorMarkers, true);
+
+ // @formatter:off
+ String[] nonAnchorMarkers = new String[]{
+ "((_Delayed_ = true) && (F == '1'))",
+ "((_Eval_ = true) && (F == '1'))",
+ "((_Hole_ = true) && (F == '1'))",
+ "((_Drop_ = true) && (F == '1'))",
+ "((_Lenient_ = true) && (F == '1'))",
+ "((_Strict_ = true) && (F == '1'))"
+ };
+ // @formatter:on
+
+ test(nonAnchorMarkers, false);
+ }
+
+ @Test
+ void testUnions() {
+ // @formatter:off
+ String[] anchorUnions = new String[] {
+ "F == '1' || F == '2'",
+ "F == '1' || IO == '1'",
+ "IO == '1' || IO == '2'"};
+ // @formatter:on
+
+ test(anchorUnions, true);
+
+ // @formatter:off
+ String[] nonAnchorUnions = new String[] {
+ "FIELD == '1' || F == '2'",
+ "F == '1' || IO == '1' || FIELD == '3'",
+ "FIELD == '1' || FIELD == '2'"};
+ // @formatter:onn
+
+ test(nonAnchorUnions, false);
+ }
+
+ @Test
+ void testIntersections() {
+ // @formatter:off
+ String[] anchorIntersections = new String[] {
+ "F == '1' && F == '2'",
+ "F == '1' && IO == '1'",
+ "IO == '1' && IO == '2'",
+ "F == '1' && IO == null",
+ "IO == '1' && IO == null",
+ // intersection needs just one anchor to be executable
+ "X == '1' && F == '2'", "X == '1' && IO == '2'"
+ };
+ // @formatter:on
+
+ test(anchorIntersections, true);
+
+ // @formatter:off
+ String[] nonAnchorQueries = new String[] {
+ "X == '1' && Y == '2' && Z == '3'",
+ "F == null && IO == null",
+ };
+ // @formatter:on
+
+ test(nonAnchorQueries, false);
+ }
+
+ @Test
+ void testNestedUnions() {
+ // @formatter:off
+ String[] anchorNestedUnions = new String[]{
+ "(F == '1' || F == '2') && (F == '3' || F == '4')",
+ "(F == '1' || F == '2') && (IO == '3' || IO == '4')",
+ "(IO == '1' || IO == '2') && (F == '3' || F == '4')",
+ "(F == '1' || IO == '2') && (F == '3' || IO == '4')",
+ "(IO == '1' || F == '2') && (IO == '3' || F == '4')",
+ };
+ // @formatter:on
+
+ test(anchorNestedUnions, true);
+ }
+
+ @Test
+ void testNestedIntersections() {
+ // @formatter:off
+ String[] anchorNestedIntersections = new String[]{
+ "(F == '1' && F == '2') || (F == '3' && F == '4')",
+ "(F == '1' && F == '2') || (IO == '3' && IO == '4')",
+ "(IO == '1' && IO == '2') || (F == '3' && F == '4')",
+ "(F == '1' && IO == '2') || (F == '3' && IO == '4')",
+ "(IO == '1' && F == '2') || (IO == '3' && F == '4')",
+ };
+ // @formatter:on
+
+ test(anchorNestedIntersections, true);
+ }
+
+ @Test
+ void testFullContentPhraseFunction() {
+ String query = "content:phrase(F, termOffsetMap, 'foo', 'bar') && F == 'foo' && F == 'bar'";
+ test(query, true);
+ }
+
+ @Test
+ void testArithmeticAndSizeMethods() {
+ // @formatter:off
+ String[] queries = new String[]{
+ // filter
+ "filter:getMinTime(F) == 1892160000000",
+ "filter:getMinTime(F) != 1892160000000",
+ "filter:getMinTime(F) > 1892160000000",
+ "filter:getMinTime(F) < 1892160000000",
+ "filter:getMinTime(F) >= 1892160000000",
+ "filter:getMinTime(F) <= 1892160000000",
+ // method
+ "F.size() == 1",
+ "F.size() != 1",
+ "F.size() > 1",
+ "F.size() < 1",
+ "F.size() >= 1",
+ "F.size() <= 1",
+ };
+ // @formatter:on
+
+ test(queries, false);
+ }
+
+ private void test(String[] queries, boolean expected) {
+ for (String query : queries) {
+ test(query, expected);
+ }
+ }
+
+ private void test(String query, boolean expected) {
+ JexlNode node = parseQuery(query);
+ assertEquals(expected, getVisitor().isAnchor(node));
+ }
+
+ private JexlNode parseQuery(String query) {
+ try {
+ ASTJexlScript script = JexlASTHelper.parseAndFlattenJexlQuery(query);
+ return script.jjtGetChild(0);
+ } catch (Exception e) {
+ fail("Could not parse query: " + query);
+ throw new IllegalStateException(e);
+ }
+ }
+
+ private AnchorDetectionVisitor getVisitor() {
+ if (visitor == null) {
+ visitor = new AnchorDetectionVisitor(indexedFields, indexOnlyFields);
+ }
+ return visitor;
+ }
+}
diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
index 7c043448b9..4c4e5f4fd3 100644
--- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
+++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml
@@ -368,10 +368,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ TYPE
+
+
+
+ SHAPE
+
+
+
+
+
+
+
+
+