From 660c31fca7957d69fd2df3a252314364fb206efe Mon Sep 17 00:00:00 2001 From: agazzarini Date: Wed, 1 Apr 2015 13:28:36 +0200 Subject: [PATCH] [ issue #28 ] Facet range queries --- .../search/faceting/RDFacetComponent.java | 5 - .../handler/search/faceting/RDFacets.java | 265 ++++++++++++++++-- .../rq/DateRangeEndpointCalculator.java | 39 +++ .../rq/DoubleRangeEndpointCalculator.java | 29 ++ .../search/faceting/rq/FacetRangeQuery.java | 43 +++ .../faceting/rq/RangeEndpointCalculator.java | 33 +++ 6 files changed, 381 insertions(+), 33 deletions(-) create mode 100644 solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/DateRangeEndpointCalculator.java create mode 100644 solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/DoubleRangeEndpointCalculator.java create mode 100644 solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/FacetRangeQuery.java create mode 100644 solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/RangeEndpointCalculator.java diff --git a/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/RDFacetComponent.java b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/RDFacetComponent.java index b3f4104..a6231b5 100644 --- a/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/RDFacetComponent.java +++ b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/RDFacetComponent.java @@ -22,14 +22,9 @@ public class RDFacetComponent extends FacetComponent { public static Logger log = LoggerFactory.getLogger(RDFacetComponent.class); public static final String COMPONENT_NAME = "facet"; private static final String PIVOT_KEY = "facet_pivot"; - - public static void main(String[] args) { - System.out.println(5007 >> 6); - } @Override public void process(ResponseBuilder rb) throws IOException { - // SolrParams params = rb.req.getParams(); if (rb.doFacets) { final ModifiableSolrParams params = new ModifiableSolrParams(); diff --git a/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/RDFacets.java b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/RDFacets.java index 88ce829..407ccde 100644 --- a/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/RDFacets.java +++ b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/RDFacets.java @@ -1,72 +1,281 @@ package org.gazzax.labs.solrdf.handler.search.faceting; import java.io.IOException; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.params.FacetParams.FacetRangeInclude; +import org.apache.solr.common.params.FacetParams.FacetRangeOther; import org.apache.solr.common.params.GroupParams; -import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.handler.component.ResponseBuilder; import org.apache.solr.request.SimpleFacets; -import org.apache.solr.schema.DateField; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; -import org.apache.solr.schema.SortableDoubleField; -import org.apache.solr.schema.SortableFloatField; -import org.apache.solr.schema.SortableIntField; -import org.apache.solr.schema.SortableLongField; -import org.apache.solr.schema.TrieField; +import org.apache.solr.schema.TrieDateField; +import org.apache.solr.schema.TrieDoubleField; import org.apache.solr.search.DocSet; import org.apache.solr.search.DocSetCollector; import org.apache.solr.search.QParser; import org.apache.solr.search.SyntaxError; +import org.gazzax.labs.solrdf.handler.search.faceting.rq.DateRangeEndpointCalculator; +import org.gazzax.labs.solrdf.handler.search.faceting.rq.DoubleRangeEndpointCalculator; +import org.gazzax.labs.solrdf.handler.search.faceting.rq.FacetRangeQuery; +import org.gazzax.labs.solrdf.handler.search.faceting.rq.RangeEndpointCalculator; public class RDFacets extends SimpleFacets { - static String FACET_RANGE_QUERY = FacetParams.FACET_RANGE + ".query"; - - private DocSet filteredDocSetForRangeQueries; + static String FACET_RANGE_QUERY = FacetParams.FACET_RANGE + ".q"; + static String FACET_RANGE_QUERY_HINT = FACET_RANGE_QUERY + ".hint"; + static String FACET_RANGE_QUERY_ALIAS = FACET_RANGE_QUERY + ".alias"; public RDFacets(final ResponseBuilder responseBuilder, final DocSet docs, final SolrParams params) { - super(responseBuilder.req, docs, new ModifiableSolrParams(params)); + super(responseBuilder.req, docs, params); } - + @Override public NamedList getFacetRangeCounts() throws IOException, SyntaxError { - // Remove any facet.range because I will use them for "workarounding" the SimpleFacets impl. - ((ModifiableSolrParams)params).remove(FacetParams.FACET_RANGE); + final NamedList result = new SimpleOrderedMap<>(); + final List rangeQueries = new ArrayList(); final String[] queries = params.getParams(FACET_RANGE_QUERY); - if (queries == null) { - return super.getFacetCounts(); + if (queries != null && queries.length > 0) { + final String hint = required.get(FACET_RANGE_QUERY_HINT); + final String alias = params.get(FACET_RANGE_QUERY_ALIAS); + final String start = required.get(FacetParams.FACET_RANGE_START); + final String end = required.get(FacetParams.FACET_RANGE_END); + final String gapExpression = required.get(FacetParams.FACET_RANGE_GAP); + + // TODO: add other parameters, too + for (final String query : queries) { + rangeQueries.add(new FacetRangeQuery(query, alias, hint, start, end, gapExpression)); + } + } + + int index = 0; + String facetRangeQuery = null; + while ( (facetRangeQuery = params.get(FACET_RANGE_QUERY + "." + index)) != null) { + final String hint = required.get(FACET_RANGE_QUERY_HINT + "." + index); + final String alias = params.get(FACET_RANGE_QUERY_ALIAS + "." + index); + final String start = required.get(FacetParams.FACET_RANGE_START + "." + index); + final String end = required.get(FacetParams.FACET_RANGE_END + "." + index); + final String gapExpression = required.get(FacetParams.FACET_RANGE_GAP + "." + index); + + // TODO: add other parameters, too + rangeQueries.add(new FacetRangeQuery(facetRangeQuery, alias, hint, start, end, gapExpression)); + } + + if (rangeQueries.isEmpty()) { + return result; + } + + for (final FacetRangeQuery rangeQuery : rangeQueries) { + final Query query = QParser.getParser(rangeQuery.q, null, req).getQuery(); + final DocSetCollector collector = new DocSetCollector(docs.size() >> 6, docs.size()); + + req.getSearcher().search(query, docs.getTopFilter(), collector); + facetRangeCounts(rangeQuery, result, collector.getDocSet()); } + return result; + } + + /** + * + * NOTE: The same method already exists in the superclass but unfortunately it has a "default" visibility so it cannot be overriden. + * + * @param facetRangeQuery the facet range query. + * @param result the result value object. + * @throws IOException + * @throws SyntaxError + */ + @SuppressWarnings("unchecked") + protected > void facetRangeCounts( + final FacetRangeQuery query, + final NamedList result, + final DocSet filteredDocSet) throws IOException, SyntaxError { + final IndexSchema schema = searcher.getSchema(); - // FIXME : doesn't work with more than one fqr - for (final String q : queries) { - final Query query = QParser.getParser(q, null, req).getQuery(); - final DocSetCollector collector = new DocSetCollector(docs.size() >> 6, docs.size()); + // TODO: Get a better point about this method, because I think something needs to be changed + // (i.e. rewritten because it cannot be overriden) + parseParams(FacetParams.FACET_RANGE, query.fieldName); + + final SchemaField schemaField = schema.getField(query.fieldName); + final RangeEndpointCalculator strategy = rangeEndpointCalculator(schemaField); + + final NamedList facetRange = new SimpleOrderedMap<>(); + final NamedList counts = new NamedList<>(); + facetRange.add("counts", counts); + + final T start = strategy.getValue(query.start); + T end = strategy.getValue(query.end); + + if (end.compareTo(start) < 0) { + throw new SolrException( + ErrorCode.BAD_REQUEST, + "range facet 'end' comes before 'start': " + end + " < " + start); + } + + final String gap = query.gap; + facetRange.add("gap", gap); + + final int minCount = params.getFieldInt(query.fieldName, FacetParams.FACET_MINCOUNT, 0); + + final EnumSet include = FacetRangeInclude.parseParam( + params.getFieldParams(query.fieldName, FacetParams.FACET_RANGE_INCLUDE)); + + T low = start; + + final boolean useHardEnd = params.getFieldBool(query.fieldName, FacetParams.FACET_RANGE_HARD_END, false); + while (low.compareTo(end) < 0) { + T high = strategy.addGap(low, gap); + if (end.compareTo(high) < 0) { + if (useHardEnd) { + high = end; + } else { + end = high; + } + } + + if (high.compareTo(low) < 0) { + throw new SolrException( + ErrorCode.BAD_REQUEST, + "range facet infinite loop (is gap negative? did the math overflow?)"); + } - req.getSearcher().search(query, docs.getTopFilter(), collector); - filteredDocSetForRangeQueries = collector.getDocSet(); + if (high.compareTo(low) == 0) { + throw new SolrException( + ErrorCode.BAD_REQUEST, + "range facet infinite loop: gap is either zero, or too small relative start/end and caused underflow: " + + low + " + " + gap + " = " + high); + } + + final boolean includeLower = (include + .contains(FacetRangeInclude.LOWER) || (include + .contains(FacetRangeInclude.EDGE) && 0 == low + .compareTo(start))); - ((ModifiableSolrParams)params).set(FacetParams.FACET_RANGE, "o_n"); + final boolean includeUpper = ( + include + .contains(FacetRangeInclude.UPPER) || (include + .contains(FacetRangeInclude.EDGE) && 0 == high + .compareTo(end))); + + final String lowS = strategy.format(low); + final String highS = strategy.format(high); + + final int count = rangeCount(schemaField, lowS, highS, includeLower, includeUpper, filteredDocSet); + if (count >= minCount) { + counts.add(lowS, count); + } + + low = high; } + + facetRange.add("start", strategy.format(start)); + facetRange.add("end", strategy.format(end)); - return super.getFacetRangeCounts(); - } + final String[] othersP = params.getFieldParams(query.fieldName, FacetParams.FACET_RANGE_OTHER); + if (null != othersP && 0 < othersP.length) { + Set others = EnumSet.noneOf(FacetRangeOther.class); - @Override + for (final String o : othersP) { + others.add(FacetRangeOther.get(o)); + } + + // no matter what other values are listed, we don't do + // anything if "none" is specified. + if (!others.contains(FacetRangeOther.NONE)) { + + boolean all = others.contains(FacetRangeOther.ALL); + final String startS = strategy.format(start); + final String endS = strategy.format(end); + + if (all || others.contains(FacetRangeOther.BEFORE)) { + // include upper bound if "outer" or if first gap doesn't + // already include it + facetRange.add(FacetRangeOther.BEFORE.toString(), + rangeCount( + schemaField, + null, + startS, + false, + (include.contains(FacetRangeInclude.OUTER) || (!(include + .contains(FacetRangeInclude.LOWER) || include + .contains(FacetRangeInclude.EDGE)))))); + + } + if (all || others.contains(FacetRangeOther.AFTER)) { + // include lower bound if "outer" or if last gap doesn't + // already include it + facetRange.add(FacetRangeOther.AFTER.toString(), + rangeCount( + schemaField, + endS, + null, + (include.contains(FacetRangeInclude.OUTER) || (!(include + .contains(FacetRangeInclude.UPPER) || include + .contains(FacetRangeInclude.EDGE)))), + false)); + } + if (all || others.contains(FacetRangeOther.BETWEEN)) { + facetRange.add(FacetRangeOther.BETWEEN.toString(), + rangeCount( + schemaField, + startS, + endS, + (include.contains(FacetRangeInclude.LOWER) || include + .contains(FacetRangeInclude.EDGE)), + (include.contains(FacetRangeInclude.UPPER) || include + .contains(FacetRangeInclude.EDGE)))); + + } + } + } + result.add(key, facetRange); + } + + @SuppressWarnings("rawtypes") + RangeEndpointCalculator rangeEndpointCalculator(final SchemaField field) { + final FieldType fieldType = field.getType(); + + if (fieldType instanceof TrieDoubleField) { + return new DoubleRangeEndpointCalculator(field); + } else if (fieldType instanceof TrieDateField) { + return new DateRangeEndpointCalculator(field); + } + + throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to range facet on field " + field + " (not a Trie(Double|Date)Field)."); + } + + /** + * A similar method exsists on the superclass but it is using a different docset. + * + * @param schemaField the target field. + * @param lowBound the low bound. + * @param highBound the high bound. + * @param includeLowBound if low bound must be included. + * @param includeHighBound if high bound must be included. + * @param domain the {@link DocSet} resulting from the range query. + * @return the number of occurrences for a given range, for a given range query. + * @throws IOException in case of I/O failure. + */ protected int rangeCount( final SchemaField schemaField, final String lowBound, final String highBound, final boolean includeLowBound, - final boolean includeHighBound) throws IOException { + final boolean includeHighBound, + final DocSet domain) throws IOException { final Query rangeQ = schemaField.getType().getRangeQuery( null, schemaField, @@ -77,7 +286,7 @@ protected int rangeCount( if (params.getBool(GroupParams.GROUP_FACET, false)) { return getGroupedFacetQueryCount(rangeQ); } else { - return searcher.numDocs(rangeQ, filteredDocSetForRangeQueries); + return searcher.numDocs(rangeQ, domain); } } } \ No newline at end of file diff --git a/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/DateRangeEndpointCalculator.java b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/DateRangeEndpointCalculator.java new file mode 100644 index 0000000..5b9e836 --- /dev/null +++ b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/DateRangeEndpointCalculator.java @@ -0,0 +1,39 @@ +package org.gazzax.labs.solrdf.handler.search.faceting.rq; + +import java.text.ParseException; +import java.util.Date; + +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.schema.DateField; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.util.DateMathParser; + +@SuppressWarnings("deprecation") +public class DateRangeEndpointCalculator extends RangeEndpointCalculator { + + public DateRangeEndpointCalculator(final SchemaField f) { + super(f); + } + + @Override + public String format(Date value) { + return ((DateField)field.getType()).toExternal(value); + } + + @Override + public Date getValue(String rawval) { + return ((DateField) field.getType()).parseMath(null, rawval); + } + + @Override + public Date addGap(Date value, String gap) { + final DateMathParser dmp = new DateMathParser(); + dmp.setNow(value); + try { + return dmp.parseMath(gap); + } catch (final ParseException exception) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to parse date expression " + gap); + } + } +} \ No newline at end of file diff --git a/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/DoubleRangeEndpointCalculator.java b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/DoubleRangeEndpointCalculator.java new file mode 100644 index 0000000..7797f9e --- /dev/null +++ b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/DoubleRangeEndpointCalculator.java @@ -0,0 +1,29 @@ +package org.gazzax.labs.solrdf.handler.search.faceting.rq; + +import org.apache.solr.schema.SchemaField; + +public class DoubleRangeEndpointCalculator extends RangeEndpointCalculator { + /** + * Builds a new {@link RangeEndpointCalculator} associated with the given field. + * + * @param f the (schema) field. + */ + public DoubleRangeEndpointCalculator(final SchemaField f) { + super(f); + } + + @Override + public String format(final Double value) { + return (value % 1.0 > 0) ? String.valueOf(value) : String.valueOf(value.intValue()); + } + + @Override + public Double getValue(final String rawValue) { + return Double.valueOf(rawValue); + } + + @Override + public Double addGap(final Double value, final String gapRawValue) { + return value + Double.parseDouble(gapRawValue); + } + } \ No newline at end of file diff --git a/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/FacetRangeQuery.java b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/FacetRangeQuery.java new file mode 100644 index 0000000..741961f --- /dev/null +++ b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/FacetRangeQuery.java @@ -0,0 +1,43 @@ +package org.gazzax.labs.solrdf.handler.search.faceting.rq; + +import org.gazzax.labs.solrdf.Field; + +/** + * A stupid value object for encapsulating a facet query with all related parameters. + * + * @author Andrea Gazzarini + * @since 1.0 + */ +public class FacetRangeQuery { + public final String q; + public final String fieldName; + public final String alias; + public final String start; + public final String end; + public final String gap; + + /** + * Builds a new {@link FacetRangeQuery}. + * + * @param q the query. + * @param alias the query alias. + * @param hint the facet range type hint. + * @param start the start bound. + * @param end the end bound. + * @param gap the gap expression. + */ + public FacetRangeQuery( + final String q, + final String alias, + final String hint, + final String start, + final String end, + final String gap) { + this.q = q; + this.alias = alias; + this.fieldName = "date".equals(hint) ? Field.DATE_OBJECT : Field.NUMERIC_OBJECT; + this.start = start; + this.end = end; + this.gap = gap; + } +} \ No newline at end of file diff --git a/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/RangeEndpointCalculator.java b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/RangeEndpointCalculator.java new file mode 100644 index 0000000..5ab97b3 --- /dev/null +++ b/solrdf/src/main/java/org/gazzax/labs/solrdf/handler/search/faceting/rq/RangeEndpointCalculator.java @@ -0,0 +1,33 @@ +package org.gazzax.labs.solrdf.handler.search.faceting.rq; + +import org.apache.solr.schema.SchemaField; + +public abstract class RangeEndpointCalculator> { + + protected final SchemaField field; + + public RangeEndpointCalculator(final SchemaField field) { + this.field = field; + } + + /** + * Formats a Range endpoint for use as a range label name in the response. + * Default Impl just uses toString() + */ + public String format(final T value) { + return value.toString(); + } + + /** + * Parses a String param into an Range endpoint value throwing + * a useful exception if not possible + */ + public abstract T getValue(final String rawval); + + /** + * Adds the String gap param to a low Range endpoint value to determine + * the corrisponding high Range endpoint value, throwing + * a useful exception if not possible. + */ + public abstract T addGap(T value, String gap); + } \ No newline at end of file