From 2496aceda4ba0e19a13a1fecc67d3b44472b3a4f Mon Sep 17 00:00:00 2001 From: Juri Leino Date: Fri, 22 Mar 2024 20:00:32 +0100 Subject: [PATCH] [refactor] radical restructuring RangeIndexConfigAttributeCondition - only keep operator and two flags in memor - value, lowercasevalue and numericvalue were replaced by two functions - indexPredicate and queryPredicate --- .../RangeIndexConfigAttributeCondition.java | 353 +++++++++--------- 1 file changed, 180 insertions(+), 173 deletions(-) diff --git a/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexConfigAttributeCondition.java b/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexConfigAttributeCondition.java index 8dc5a8bab9b..c2dd1779451 100644 --- a/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexConfigAttributeCondition.java +++ b/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexConfigAttributeCondition.java @@ -36,7 +36,7 @@ import org.w3c.dom.Node; import javax.xml.XMLConstants; -import java.util.function.BiPredicate; +import java.util.function.Predicate; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -52,23 +52,27 @@ public class RangeIndexConfigAttributeCondition extends RangeIndexConfigConditio private final String attributeName; private final QName attribute; - private final String value; private final Operator operator; - private final boolean caseSensitive; - private final boolean numericComparison; - private final Pattern pattern; - private final Double numericValue; - private String lowercaseValue = null; - public RangeIndexConfigAttributeCondition(final Element elem, final NodePath parentPath) throws DatabaseConfigurationException { + private final boolean commutative; + private final boolean commutativeNegate; + + private final Predicate indexPredicate; + private final Predicate queryPredicate; + + public RangeIndexConfigAttributeCondition(final Element elem, final NodePath parentPath) throws DatabaseConfigurationException { if (parentPath.getLastComponent().getNameType() == ElementValue.ATTRIBUTE) { throw new DatabaseConfigurationException("Range index module: Attribute condition cannot be defined for an attribute:" + parentPath); } + if (!elem.hasAttribute("attribute")) { + throw new DatabaseConfigurationException("Range index module: No attribute qname in condition"); + } + attributeName = elem.getAttribute("attribute"); - if (attributeName == null || attributeName.isEmpty()) { - throw new DatabaseConfigurationException("Range index module: Empty or no attribute qname in condition"); + if (attributeName.isEmpty()) { + throw new DatabaseConfigurationException("Range index module: Attribute qname is empty"); } try { @@ -77,31 +81,23 @@ public RangeIndexConfigAttributeCondition(final Element elem, final NodePath par throw new DatabaseConfigurationException("Range index module error: " + e.getMessage(), e); } - value = elem.getAttribute("value"); - - final String caseString = elem.getAttribute("case"); - caseSensitive = (caseString != null && !caseString.equalsIgnoreCase("no")); - - final String numericString = elem.getAttribute("numeric"); - numericComparison = (numericString != null && numericString.equalsIgnoreCase("yes")); + final String value = elem.getAttribute("value"); - operator = getOperator(elem, numericComparison); + // QUESTION(JL): should we throw on here, if value == null? - // try to parse the number value if numeric comparison is specified - // store a reference to numeric value to avoid having to parse each time - numericValue = numericComparison ? getNumericValue(value) : null; + operator = getOperator(elem); + commutative = (operator == Operator.EQ || operator == Operator.NE); + commutativeNegate = (operator == Operator.GT || operator == Operator.LT || operator == Operator.GE || operator == Operator.LE); - // try to create a pattern matcher for a 'matches' condition - pattern = operator == Operator.MATCH ? getPattern(value, caseSensitive) : null; - } - - private static Pattern getPattern(final String value, final boolean caseSensitive) throws DatabaseConfigurationException { - final int flags = caseSensitive ? 0 : CASE_INSENSITIVE; - try { - return Pattern.compile(value, flags); - } catch (PatternSyntaxException e) { - RangeIndex.LOG.error(e); - throw new DatabaseConfigurationException("Range index module: Invalid regular expression in condition: " + value); + boolean numericComparison = elem.hasAttribute("numeric") && elem.getAttribute("numeric").equalsIgnoreCase("yes"); + if (numericComparison) { + final Double num = getNumericValue(value); + indexPredicate = getNumericMatcher(operator, num); + queryPredicate = getNumericTester(num); + } else { + boolean caseSensitive = elem.hasAttribute("case") && !elem.getAttribute("case").equalsIgnoreCase("no"); + indexPredicate = caseSensitive ? getCaseMatcher(operator, value) : getMatcher(operator, value); + queryPredicate = getTester(value, caseSensitive); } } @@ -120,7 +116,7 @@ private static Double getNumericValue(final String value) throws DatabaseConfigu * @return parsed operator * @throws DatabaseConfigurationException Operator not set or incompatible */ - private static Operator getOperator(final Element elem, final boolean numericComparison) throws DatabaseConfigurationException { + private static Operator getOperator(final Element elem) throws DatabaseConfigurationException { if (!elem.hasAttribute("operator")) { return Operator.EQ; } @@ -130,165 +126,90 @@ private static Operator getOperator(final Element elem, final boolean numericCom throw new DatabaseConfigurationException("Range index module: Invalid operator specified in range index condition: " + operatorName + "."); } - if (!numericComparison) { - return operator; - } + return operator; + } + private Predicate getNumericMatcher(final Operator operator, final Double value) throws DatabaseConfigurationException { return switch (operator) { - case MATCH, STARTS_WITH, ENDS_WITH, CONTAINS -> - throw new DatabaseConfigurationException("Range index module: Numeric comparison not applicable for operator: " + operator.name()); - default -> operator; + case EQ -> v -> value.equals(toDouble(v)); + case NE -> v -> !value.equals(toDouble(v)); + case GT -> v -> Double.compare(toDouble(v), value) > 0; + case LT -> v -> Double.compare(toDouble(v), value) < 0; + case GE -> v -> Double.compare(toDouble(v), value) >= 0; + case LE -> v -> Double.compare(toDouble(v), value) <= 0; + default -> throw new DatabaseConfigurationException("Range index module: Numeric comparison not applicable for operator: " + operator.name()); }; } - // lazily evaluate lowercase value to convert once when needed - private String getLowercaseValue() { - if (lowercaseValue == null && value != null) { - lowercaseValue = value.toLowerCase(); - } - return lowercaseValue; - } - - @Override - public boolean matches(final Node node) { - return node.getNodeType() == Node.ELEMENT_NODE && matchValue(((Element) node).getAttribute(attributeName)); - } - - private boolean matchValue(final String testValue) { + private static Predicate getCaseMatcher(final Operator operator, final String value) throws DatabaseConfigurationException { return switch (operator) { - case EQ -> booleanMatch(testValue); - case NE -> !booleanMatch(testValue); - case GT, LT, GE, LE -> matchOrdinal(operator, testValue); - case ENDS_WITH -> stringMatch(String::endsWith, testValue); - case STARTS_WITH -> stringMatch(String::startsWith, testValue); - case CONTAINS -> stringMatch(String::contains, testValue); - case MATCH -> pattern.matcher(testValue).matches(); + case EQ -> v -> v.equals(value); + case NE -> v -> !v.equals(value); + case GT -> v -> v.compareTo(value) > 0; + case LT -> v -> v.compareTo(value) < 0; + case GE -> v -> v.compareTo(value) >= 0; + case LE -> v -> v.compareTo(value) <= 0; + case ENDS_WITH -> v -> v.endsWith(value); + case STARTS_WITH -> v -> v.startsWith(value); + case CONTAINS -> v -> v.contains(value); + case MATCH -> { + final Pattern pattern = getPattern(value, 0); + yield v -> pattern.matcher(v).matches(); + } }; } - private boolean stringMatch(final BiPredicate predicate, final String testValue) { - return caseSensitive ? predicate.test(testValue, value) : predicate.test(testValue.toLowerCase(), getLowercaseValue()); - } - - private boolean booleanMatch(final String testValue) { - if (numericComparison) { - return numericValue.equals(toDouble(testValue)); - } - if (caseSensitive) { - return value.equals(testValue); - } - return value.equalsIgnoreCase(testValue); - } - - private boolean matchOrdinal(final Operator operator, final String testValue) { - final int result; - if (numericComparison) { - final double testDouble = toDouble(testValue); - result = Double.compare(testDouble, numericValue); - } else if (caseSensitive) { - result = testValue.compareTo(value); - } else { - result = testValue.toLowerCase().compareTo(getLowercaseValue()); - } - + private static Predicate getMatcher(final Operator operator, final String value) throws DatabaseConfigurationException { + final String lowerCased = value.toLowerCase(); return switch (operator) { - case GT -> result > 0; - case LT -> result < 0; - case GE -> result >= 0; - case LE -> result <= 0; - default -> false; + case EQ -> v -> v.equalsIgnoreCase(value); + case NE -> v -> !v.equalsIgnoreCase(value); + case GT -> v -> v.toLowerCase().compareTo(lowerCased) > 0; + case LT -> v -> v.toLowerCase().compareTo(lowerCased) < 0; + case GE -> v -> v.toLowerCase().compareTo(lowerCased) >= 0; + case LE -> v -> v.toLowerCase().compareTo(lowerCased) <= 0; + case ENDS_WITH -> v -> v.toLowerCase().endsWith(lowerCased); + case STARTS_WITH -> v -> v.toLowerCase().startsWith(lowerCased); + case CONTAINS -> v -> v.toLowerCase().contains(lowerCased); + case MATCH -> { + final Pattern pattern = getPattern(value, CASE_INSENSITIVE); + yield v -> pattern.matcher(v).matches(); + } }; } - private Double toDouble(final String value) { + private static Pattern getPattern(final String value, final int flags) throws DatabaseConfigurationException { try { - return Double.parseDouble(value); - } catch (NumberFormatException e) { - RangeIndex.LOG.debug("Non-numeric value encountered for numeric condition on @'{}': {}", attributeName, value); - return (double) 0; + return Pattern.compile(value, flags); + } catch (PatternSyntaxException e) { + RangeIndex.LOG.error(e); + throw new DatabaseConfigurationException("Range index module: Invalid regular expression in condition: " + value); } } - @Override - public boolean find(final Predicate predicate) { - final Expression inner = getInnerExpression(predicate); - if (!(inner instanceof GeneralComparison || inner instanceof InternalFunctionCall)) { - // predicate expression cannot be parsed as condition - return false; - } - - Operator currentOperator; - final Expression lhe; - final Expression rhe; + static Predicate throwingPredicateWrapper( + ThrowingPredicate throwingPredicate) { - // get the type of the expression inside the predicate and determine right and left hand arguments - if (inner instanceof InternalFunctionCall funcCall) { - // calls to matches() will not have been rewritten to a comparison, so check for function call - final Function func = funcCall.getFunction(); - if (!func.isCalledAs("matches")) { - // predicate expression cannot be parsed as condition + return v -> { + try { + return throwingPredicate.test(v); + } catch (XPathException e) { + RangeIndex.LOG.error("Value conversion error when testing predicate for condition, value: {}", v.toString()); + RangeIndex.LOG.error(e); return false; } + }; + } - lhe = unwrapSubExpression(func.getArgument(0)); - rhe = unwrapSubExpression(func.getArgument(1)); - currentOperator = Operator.MATCH; - } else { - final GeneralComparison comp = (GeneralComparison) inner; - - lhe = comp.getLeft(); - rhe = comp.getRight(); - currentOperator = RangeQueryRewriter.getOperator(comp); - } - - // find the attribute name and value pair from the predicate to check against - // first assume attribute is on the left and value is on the right - LocationStep testStep = findLocationStep(lhe); - AtomicValue testValue = findAtomicValue(rhe); - - if (testStep == null && testValue == null) { - switch (operator) { - // the equality operators are commutative so if attribute/value pair has not been found, - // check the other way around - case EQ, NE -> { - testStep = findLocationStep(rhe); - testValue = findAtomicValue(lhe); - } - // for ordinal comparisons, attribute and value can also be the other way around in the predicate - // but the operator has to be inverted - case GT, LT, GE, LE -> { - testStep = findLocationStep(rhe); - testValue = findAtomicValue(lhe); - currentOperator = invertOrdinalOperator(currentOperator); - } - } - } - - if (testStep == null || testValue == null || !currentOperator.equals(operator)) { - return false; - } - - final QName qname = testStep.getTest().getName(); - if (qname.getNameType() != ElementValue.ATTRIBUTE || !qname.equals(attribute)) { - return false; - } + private static Predicate getNumericTester (final Double value) { + return throwingPredicateWrapper(v -> v instanceof NumericValue && v.toJavaObject(Double.class).equals(value)); + } - try { - if (numericComparison) { - return testValue instanceof NumericValue && testValue.toJavaObject(Double.class).equals(numericValue); - } - if (testValue instanceof StringValue) { - final String testString = testValue.getStringValue(); - if (caseSensitive) { - return testString.equals(value); - } - return testString.equalsIgnoreCase(value); - } - } catch (XPathException e) { - RangeIndex.LOG.error("Value conversion error when testing predicate for condition, value: {}", testValue.toString()); - RangeIndex.LOG.error(e); + private static java.util.function.Predicate getTester (final String value, final boolean caseSensitive) { + if (caseSensitive) { + return throwingPredicateWrapper(v -> v instanceof StringValue && v.getStringValue().equals(value)); } - return false; + return throwingPredicateWrapper(v -> v instanceof StringValue && v.getStringValue().equalsIgnoreCase(value)); } /** @@ -300,7 +221,7 @@ public boolean find(final Predicate predicate) { * @param expr to unwrap * @return unwrapped expression */ - private Expression unwrapSubExpression(Expression expr) { + private static Expression unwrapSubExpression(Expression expr) { if (expr instanceof Atomize atomize) { expr = atomize.getExpression(); @@ -317,7 +238,7 @@ private Expression unwrapSubExpression(Expression expr) { return expr; } - private LocationStep findLocationStep(final Expression expr) { + private static LocationStep findLocationStep(final Expression expr) { if (expr instanceof LocationStep step) { return step; } @@ -325,7 +246,7 @@ private LocationStep findLocationStep(final Expression expr) { return null; } - private AtomicValue findAtomicValue(final Expression expr) { + private static AtomicValue findAtomicValue(final Expression expr) { if (expr instanceof AtomicValue atomic) { return atomic; } @@ -357,7 +278,7 @@ private AtomicValue findAtomicValue(final Expression expr) { return null; } - private Operator invertOrdinalOperator(Operator operator) { + private static Operator invertOrdinalOperator(final Operator operator) { return switch (operator) { case LE -> Operator.GE; case GE -> Operator.LE; @@ -366,4 +287,90 @@ private Operator invertOrdinalOperator(Operator operator) { default -> null; }; } + + @Override + public boolean matches(final Node node) { + return node.getNodeType() == Node.ELEMENT_NODE && indexPredicate.test(((Element) node).getAttribute(attributeName)); + } + + private Double toDouble(final String value) { + try { + return Double.parseDouble(value); + } catch (NumberFormatException e) { + RangeIndex.LOG.debug("Non-numeric value encountered for numeric condition on @'{}': {}", attributeName, value); + return (double) 0; + } + } + + private boolean match(final InternalFunctionCall functionCall) { + if (!operator.equals(Operator.MATCH)) { + return false; // nothing to do, everything else but matches is not handled here + } + final Function func = functionCall.getFunction(); + if (!func.isCalledAs("matches")) { + return false; // only calls to fn:matches() are handled here + } + final Expression lhe = unwrapSubExpression(func.getArgument(0)); + final Expression rhe = unwrapSubExpression(func.getArgument(1)); + final LocationStep testStep = findLocationStep(lhe); + final AtomicValue testValue = findAtomicValue(rhe); + + return canTest(testStep, testValue) && queryPredicate.test(testValue); + } + + private boolean compare(final GeneralComparison generalComparison) { + final Expression lhe = generalComparison.getLeft(); + final Expression rhe = generalComparison.getRight(); + + // find the attribute name and value pair from the predicate to check against + // first assume attribute is on the left and value is on the right + Operator currentOperator = RangeQueryRewriter.getOperator(generalComparison); + Operator invertedOperator = invertOrdinalOperator(currentOperator); + + if (!operator.equals(currentOperator) && !operator.equals(invertedOperator)) { + return false; // needless to do more as the operator cannot match + } + + LocationStep testStep = findLocationStep(lhe); + AtomicValue testValue = findAtomicValue(rhe); + + // the equality operators are commutative so if attribute/value pair has not been found, + // check the other way around + if (testStep == null && testValue == null && (commutative || commutativeNegate)) { + testStep = findLocationStep(rhe); + testValue = findAtomicValue(lhe); + // for LT, GT, GE and LE the operation has to be inverted + if (commutativeNegate) { + currentOperator = invertedOperator; + } + } + + return currentOperator.equals(operator) && canTest(testStep, testValue) && queryPredicate.test(testValue); + } + + private boolean canTest (final LocationStep step, final AtomicValue value) { + if (step == null || value == null) { + return false; + } + final QName qname = step.getTest().getName(); + return qname.getNameType() == ElementValue.ATTRIBUTE && qname.equals(attribute); + } + + @Override + public boolean find(final org.exist.xquery.Predicate predicate) { + final Expression inner = getInnerExpression(predicate); + if (inner instanceof InternalFunctionCall functionCall) { + return match(functionCall); + } + if (inner instanceof final GeneralComparison generalComparison) { + return compare(generalComparison); + } + // predicate expression cannot be parsed as condition + return false; + } + + @FunctionalInterface + public interface ThrowingPredicate { + boolean test(T t) throws E; + } }