Update code to new Lucene version.

evolvedbinary · Dec 23, 2024 · 74ffc42 · 74ffc42
1 parent 7ebeada
commit 74ffc42
Show file tree

Hide file tree

Showing 14 changed files with 403 additions and 235 deletions.
diff --git a/.idea/runConfigurations/Java_Admin_Client.xml b/.idea/runConfigurations/Java_Admin_Client.xml
diff --git a/.idea/runConfigurations/Jetty_Server.xml b/.idea/runConfigurations/Jetty_Server.xml
diff --git a/.idea/runConfigurations/Local_Jetty_Server.xml b/.idea/runConfigurations/Local_Jetty_Server.xml
diff --git a/exist-parent/pom.xml b/exist-parent/pom.xml
@@ -115,7 +115,7 @@
         <appassembler.version>2.1.0</appassembler.version>
         <aspectj.version>1.9.22.1</aspectj.version>
         <exquery.distribution.version>0.2.1</exquery.distribution.version>
-        <icu.version>59.1</icu.version>
+        <icu.version>74.2</icu.version>
         <izpack.version>5.2.3</izpack.version>
         <jansi.version>2.4.1</jansi.version>
         <jaxb.api.version>4.0.2</jaxb.api.version>

diff --git a/extensions/indexes/lucene/src/main/java/org/exist/indexing/lucene/ExistFacetsCollector.java b/extensions/indexes/lucene/src/main/java/org/exist/indexing/lucene/ExistFacetsCollector.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2024 Evolved Binary Ltd
+ *
+ * This code is proprietary and is not Open Source.
+ */
+package org.exist.indexing.lucene;
+
+import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.index.LeafReaderContext;
+
+import java.io.IOException;
+
+public class ExistFacetsCollector extends FacetsCollector {
+
+    @Override
+    public void doSetNextReader(LeafReaderContext context) throws IOException {
+        super.doSetNextReader(context);
+    }
+}
diff --git a/extensions/indexes/lucene/src/main/java/org/exist/indexing/lucene/LuceneIndexWorker.java b/extensions/indexes/lucene/src/main/java/org/exist/indexing/lucene/LuceneIndexWorker.java
diff --git a/...ucene/src/main/java/org/exist/indexing/lucene/analyzers/NoDiacriticsStandardAnalyzer.java b/...ucene/src/main/java/org/exist/indexing/lucene/analyzers/NoDiacriticsStandardAnalyzer.java
@@ -102,11 +102,6 @@ protected NoDiacriticsStandardAnalyzer() {
         this((CharArraySet)null);
     }
 
-    @Override
-    protected TokenStreamComponents createComponents(String fieldName) {
-        return null;
-    }
-
     /**
      * Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}).
      *
@@ -153,8 +148,8 @@ public int getMaxTokenLength() {
         return maxTokenLength;
     }
 
-    //@Override
-    protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
+    @Override
+    protected TokenStreamComponents createComponents(final String fieldName) {
 
         final StandardTokenizer src = new StandardTokenizer();
         src.setMaxTokenLength(maxTokenLength);

diff --git a/extensions/indexes/lucene/src/test/java/org/exist/indexing/lucene/AnalyzerConfigTest.java b/extensions/indexes/lucene/src/test/java/org/exist/indexing/lucene/AnalyzerConfigTest.java
@@ -1,4 +1,13 @@
 /*
+ * Copyright (C) 2024 Evolved Binary Ltd
+ *
+ * Changes made by Evolved Binary are proprietary and are not Open Source.
+ *
+ * NOTE: Parts of this file contain code from The eXist-db Authors.
+ *       The original license header is included below.
+ *
+ * ----------------------------------------------------------------------------
+ *
  * eXist-db Open Source Native XML Database
  * Copyright (C) 2001 The eXist-db Authors
  *
@@ -502,7 +511,7 @@ public IntegerAndSetConstructorMockAnalyzer(final Version luceneVersion, final I
         }
 
         @Override
-        protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
+        protected TokenStreamComponents createComponents(String fieldName) {
             throw new UnsupportedOperationException("This analyzer is a mock for testing");
         }
     }
@@ -523,7 +532,7 @@ public IntAndSetConstructorMockAnalyzer(final Version luceneVersion, final int a
         }
 
         @Override
-        protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
+        protected TokenStreamComponents createComponents(String fieldName) {
             throw new UnsupportedOperationException("This analyzer is a mock for testing");
         }
     }
@@ -544,7 +553,7 @@ public BooleanAndSetConstructorMockAnalyzer(final Version luceneVersion, final B
         }
 
         @Override
-        protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
+        protected TokenStreamComponents createComponents(String fieldName) {
             throw new UnsupportedOperationException("This analyzer is a mock for testing");
         }
     }
@@ -565,7 +574,7 @@ public PrimitiveBooleanAndSetConstructorMockAnalyzer(final Version luceneVersion
         }
 
         @Override
-        protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
+        protected TokenStreamComponents createComponents(String fieldName) {
             throw new UnsupportedOperationException("This analyzer is a mock for testing");
         }
     }
@@ -584,7 +593,7 @@ public CharArrayConstructorMockAnalyzer(final Version luceneVersion, final char[
         }
 
         @Override
-        protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
+        protected TokenStreamComponents createComponents(String fieldName) {
             throw new UnsupportedOperationException("This analyzer is a mock for testing");
         }
     }
@@ -603,7 +612,7 @@ public StringArrayConstructorMockAnalyzer(final Version luceneVersion, final Str
         }
 
         @Override
-        protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
+        protected TokenStreamComponents createComponents(String fieldName) {
             throw new UnsupportedOperationException("This analyzer is a mock for testing");
         }
     }

diff --git a/...e/src/test/java/org/exist/indexing/lucene/analyzers/NoDiacriticsStandardAnalyzerTest.java b/...e/src/test/java/org/exist/indexing/lucene/analyzers/NoDiacriticsStandardAnalyzerTest.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2024 Evolved Binary Ltd
+ *
+ * This code is proprietary and is not Open Source.
+ */
+package org.exist.indexing.lucene.analyzers;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class NoDiacriticsStandardAnalyzerTest {
+
+
+
+    @Test
+    public void createComponents() throws Exception {
+        var analyzer = new NoDiacriticsStandardAnalyzer();
+        //This code will fail on wrong ICU4J dependency
+        var result = analyzer.createComponents("myFieldName");
+        assertNotNull(result);
+    }
+
+
+}
diff --git a/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexAnalyzer.java b/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexAnalyzer.java
@@ -1,4 +1,13 @@
 /*
+ * Copyright (C) 2024 Evolved Binary Ltd
+ *
+ * Changes made by Evolved Binary are proprietary and are not Open Source.
+ *
+ * NOTE: Parts of this file contain code from The eXist-db Authors.
+ *       The original license header is included below.
+ *
+ * ----------------------------------------------------------------------------
+ *
  * eXist-db Open Source Native XML Database
  * Copyright (C) 2001 The eXist-db Authors
  *
@@ -27,7 +36,7 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.collation.ICUCollationAttributeFactory;
+import org.apache.lucene.analysis.icu.ICUCollationAttributeFactory;
 import org.apache.lucene.util.AttributeFactory;
 import org.exist.util.Collations;
 import org.exist.util.DatabaseConfigurationException;
@@ -104,12 +113,12 @@ public void addCollation(String uri) throws DatabaseConfigurationException {
     }
 
     @Override
-    protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
+    protected TokenStreamComponents createComponents(final String fieldName) {
         AttributeFactory factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
         if (collator != null) {
             factory = new ICUCollationAttributeFactory(collator);
         }
-        final Tokenizer src = new KeywordTokenizer(factory, reader, 256);
+        final Tokenizer src = new KeywordTokenizer(factory, 256);
         TokenStream tok = src;
         for (final FilterConfig filter: filterConfigs) {
             tok = filter.constructor.apply(tok);

diff --git a/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexConfigElement.java b/extensions/indexes/range/src/main/java/org/exist/indexing/range/RangeIndexConfigElement.java
@@ -1,4 +1,13 @@
 /*
+ * Copyright (C) 2024 Evolved Binary Ltd
+ *
+ * Changes made by Evolved Binary are proprietary and are not Open Source.
+ *
+ * NOTE: Parts of this file contain code from The eXist-db Authors.
+ *       The original license header is included below.
+ *
+ * ----------------------------------------------------------------------------
+ *
  * eXist-db Open Source Native XML Database
  * Copyright (C) 2001 The eXist-db Authors
  *
@@ -23,6 +32,9 @@
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.*;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.NumericUtils;
@@ -147,91 +159,81 @@ public Field convertToField(String fieldName, String content) throws IOException
                 case Type.LONG:
                 case Type.UNSIGNED_LONG:
                     long lvalue = Long.parseLong(content);
-                    return new LongField(fieldName, lvalue, LongField.TYPE_NOT_STORED);
+                    return new LongPoint(fieldName, lvalue);
                 case Type.INT:
                 case Type.UNSIGNED_INT:
                 case Type.SHORT:
                 case Type.UNSIGNED_SHORT:
                     int ivalue = Integer.parseInt(content);
-                    return new IntField(fieldName, ivalue, IntField.TYPE_NOT_STORED);
+                    return new IntPoint(fieldName, ivalue);
                 case Type.DECIMAL:
                 case Type.DOUBLE:
                     double dvalue = Double.parseDouble(content);
-                    return new DoubleField(fieldName, dvalue, DoubleField.TYPE_NOT_STORED);
+                    return new DoublePoint(fieldName, dvalue);
                 case Type.FLOAT:
                     float fvalue = Float.parseFloat(content);
-                    return new FloatField(fieldName, fvalue, FloatField.TYPE_NOT_STORED);
+                    return new FloatPoint(fieldName, fvalue);
                 case Type.DATE:
                     DateValue dv = new DateValue(content);
                     long dl = dateToLong(dv);
-                    return new LongField(fieldName, dl, LongField.TYPE_NOT_STORED);
+                    return new LongPoint(fieldName, dl);
                 case Type.TIME:
                     TimeValue tv = new TimeValue(content);
                     long tl = timeToLong(tv);
-                    return new LongField(fieldName, tl, LongField.TYPE_NOT_STORED);
+                    return new LongPoint(fieldName, tl);
                 case Type.DATE_TIME:
                     DateTimeValue dtv = new DateTimeValue(content);
                     String dateStr = dateTimeToString(dtv);
-                    return new TextField(fieldName, dateStr, Field.Store.NO);
+                    return new RangeIndexTextField(fieldName, dateStr); //TODO - This should be also Long.
                 default:
-                    return new TextField(fieldName, content, Field.Store.NO);
+                    return new RangeIndexTextField(fieldName, content);
             }
         } catch (NumberFormatException | XPathException e) {
             // wrong type: ignore
         }
         return null;
     }
 
-    public static BytesRef convertToBytes(final AtomicValue content) throws XPathException {
-        final BytesRefBuilder bytes = new BytesRefBuilder();
+    public static Query convertToQuery(final AtomicValue content, String filedName) throws XPathException {
         switch(content.getType()) {
             case Type.INTEGER:
             case Type.LONG:
             case Type.UNSIGNED_LONG:
-                NumericUtils.longToPrefixCoded(((IntegerValue)content).getLong(), 0, bytes);
-                break;
+                return LongPoint.newExactQuery(filedName, ((IntegerValue)content).getLong());
 
             case Type.SHORT:
             case Type.UNSIGNED_SHORT:
             case Type.INT:
             case Type.UNSIGNED_INT:
-                NumericUtils.intToPrefixCoded(((IntegerValue)content).getInt(), 0, bytes);
-                break;
+                return IntPoint.newExactQuery(filedName, ((IntegerValue)content).getInt());
+
 
             case Type.DECIMAL:
-                final long dv = NumericUtils.doubleToSortableLong(((DecimalValue)content).getDouble());
-                NumericUtils.longToPrefixCoded(dv, 0, bytes);
-                break;
+                return DoublePoint.newExactQuery(filedName, ((DecimalValue) content).getDouble());
 
             case Type.DOUBLE:
-                final long lv = NumericUtils.doubleToSortableLong(((DoubleValue)content).getDouble());
-                NumericUtils.longToPrefixCoded(lv, 0, bytes);
-                break;
+                return DoublePoint.newExactQuery(filedName,((DoubleValue)content).getDouble());
 
             case Type.FLOAT:
-                final int iv = NumericUtils.floatToSortableInt(((FloatValue)content).getValue());
-                NumericUtils.longToPrefixCoded(iv, 0, bytes);
-                break;
+                return FloatPoint.newExactQuery(filedName, ((FloatValue)content).getValue());
 
             case Type.DATE:
                 final long dl = dateToLong((DateValue)content);
-                NumericUtils.longToPrefixCoded(dl, 0, bytes);
-                break;
+                return LongPoint.newExactQuery(filedName, dl);
 
             case Type.TIME:
                 final long tl = timeToLong((TimeValue) content);
-                NumericUtils.longToPrefixCoded(tl, 0, bytes);
-                break;
+                return LongPoint.newExactQuery(filedName, tl);
 
-            case Type.DATE_TIME:
+            case Type.DATE_TIME: {
                 final String dt = dateTimeToString((DateTimeValue) content);
-                bytes.copyChars(dt);
-                break;
+                return new TermQuery(new Term(filedName, dt));
+            }
 
-            default:
-                bytes.copyChars(content.getStringValue());
+            default: {
+                return new TermQuery(new Term(filedName, content.getStringValue()));
+            }
         }
-        return bytes.toBytesRef();
     }
 
     public static long dateToLong(DateValue date) {