From 6c2f6071b20633fafc383212331f79146613011b Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Mon, 2 Dec 2024 16:04:31 -0500 Subject: [PATCH 01/28] Refactor/bbq format (#117847) * Refactor bbq format to be contained in a package * fixing license headers * fixing module * fix style --- server/src/main/java/module-info.java | 4 ++-- .../{ => es816}/BinarizedByteVectorValues.java | 3 ++- .../codec/vectors/{ => es816}/BinaryQuantizer.java | 4 +++- .../{ => es816}/ES816BinaryFlatVectorsScorer.java | 14 ++++++++------ .../ES816BinaryQuantizedVectorsFormat.java | 2 +- .../ES816BinaryQuantizedVectorsReader.java | 7 ++++--- .../ES816BinaryQuantizedVectorsWriter.java | 10 ++++++---- .../ES816HnswBinaryQuantizedVectorsFormat.java | 2 +- .../{ => es816}/OffHeapBinarizedVectorValues.java | 9 +++++---- .../mapper/vectors/DenseVectorFieldMapper.java | 4 ++-- .../org.apache.lucene.codecs.KnnVectorsFormat | 4 ++-- .../{ => es816}/BinaryQuantizationTests.java | 4 +++- .../ES816BinaryFlatVectorsScorerTests.java | 4 +++- .../ES816BinaryQuantizedVectorsFormatTests.java | 3 ++- ...ES816HnswBinaryQuantizedVectorsFormatTests.java | 2 +- 15 files changed, 45 insertions(+), 31 deletions(-) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/BinarizedByteVectorValues.java (96%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/BinaryQuantizer.java (98%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryFlatVectorsScorer.java (95%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsFormat.java (98%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsReader.java (98%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsWriter.java (98%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816HnswBinaryQuantizedVectorsFormat.java (99%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/OffHeapBinarizedVectorValues.java (97%) rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/BinaryQuantizationTests.java (99%) rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryFlatVectorsScorerTests.java (99%) rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsFormatTests.java (98%) rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816HnswBinaryQuantizedVectorsFormatTests.java (99%) diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index d572d3b90fec8..5acc202ebb294 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -457,8 +457,8 @@ org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat, org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat, - org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat, - org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat; + org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat, + org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat; provides org.apache.lucene.codecs.Codec with diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinarizedByteVectorValues.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinarizedByteVectorValues.java similarity index 96% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/BinarizedByteVectorValues.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinarizedByteVectorValues.java index cf69ab0862949..d5f968af3e738 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinarizedByteVectorValues.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinarizedByteVectorValues.java @@ -17,11 +17,12 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.search.VectorScorer; import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.IOException; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinaryQuantizer.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizer.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/BinaryQuantizer.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizer.java index aa72904fe1341..768c6d526e468 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinaryQuantizer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizer.java @@ -17,11 +17,13 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.index.codec.vectors.BQSpaceUtils; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import static org.apache.lucene.index.VectorSimilarityFunction.COSINE; import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorer.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java similarity index 95% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorer.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java index 72c5da4880e75..445bdadab2354 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.index.KnnVectorValues; @@ -26,6 +26,8 @@ import org.apache.lucene.util.VectorUtil; import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; +import org.elasticsearch.index.codec.vectors.BQSpaceUtils; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import org.elasticsearch.simdvec.ESVectorUtil; import java.io.IOException; @@ -35,10 +37,10 @@ import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT; /** Vector scorer over binarized vector values */ -public class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer { +class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer { private final FlatVectorsScorer nonQuantizedDelegate; - public ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) { + ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) { this.nonQuantizedDelegate = nonQuantizedDelegate; } @@ -144,10 +146,10 @@ public RandomVectorScorerSupplier copy() throws IOException { } /** A binarized query representing its quantized form along with factors */ - public record BinaryQueryVector(byte[] vector, BinaryQuantizer.QueryFactors factors) {} + record BinaryQueryVector(byte[] vector, BinaryQuantizer.QueryFactors factors) {} /** Vector scorer over binarized vector values */ - public static class BinarizedRandomVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer { + static class BinarizedRandomVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer { private final BinaryQueryVector queryVector; private final BinarizedByteVectorValues targetVectors; private final VectorSimilarityFunction similarityFunction; @@ -155,7 +157,7 @@ public static class BinarizedRandomVectorScorer extends RandomVectorScorer.Abstr private final float sqrtDimensions; private final float maxX1; - public BinarizedRandomVectorScorer( + BinarizedRandomVectorScorer( BinaryQueryVector queryVectors, BinarizedByteVectorValues targetVectors, VectorSimilarityFunction similarityFunction diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormat.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java index e32aea0fb04ae..d864ec5dee8c5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsReader.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java index 21c4a5c449387..fc20809ea7eed 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; @@ -43,6 +43,7 @@ import org.apache.lucene.util.SuppressForbidden; import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; import org.apache.lucene.util.hnsw.RandomVectorScorer; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.IOException; import java.util.HashMap; @@ -55,7 +56,7 @@ * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ @SuppressForbidden(reason = "Lucene classes") -public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader { +class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader { private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES816BinaryQuantizedVectorsReader.class); @@ -64,7 +65,7 @@ public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader { private final FlatVectorsReader rawVectorsReader; private final ES816BinaryFlatVectorsScorer vectorScorer; - public ES816BinaryQuantizedVectorsReader( + ES816BinaryQuantizedVectorsReader( SegmentReadState state, FlatVectorsReader rawVectorsReader, ES816BinaryFlatVectorsScorer vectorsScorer diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsWriter.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsWriter.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsWriter.java index a7774b850b64c..31ae977e81118 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsWriter.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnVectorsReader; @@ -48,6 +48,8 @@ import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.index.codec.vectors.BQSpaceUtils; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.Closeable; import java.io.IOException; @@ -61,14 +63,14 @@ import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.apache.lucene.util.RamUsageEstimator.shallowSizeOfInstance; -import static org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat.BINARIZED_VECTOR_COMPONENT; -import static org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; +import static org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat.BINARIZED_VECTOR_COMPONENT; +import static org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ @SuppressForbidden(reason = "Lucene classes") -public class ES816BinaryQuantizedVectorsWriter extends FlatVectorsWriter { +class ES816BinaryQuantizedVectorsWriter extends FlatVectorsWriter { private static final long SHALLOW_RAM_BYTES_USED = shallowSizeOfInstance(ES816BinaryQuantizedVectorsWriter.class); private final SegmentWriteState segmentWriteState; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java similarity index 99% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormat.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java index 097cdffff6ae4..52f9f14b7bf97 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/OffHeapBinarizedVectorValues.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/OffHeapBinarizedVectorValues.java similarity index 97% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/OffHeapBinarizedVectorValues.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/OffHeapBinarizedVectorValues.java index e7d818bb752d6..12bf962d314bd 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/OffHeapBinarizedVectorValues.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/OffHeapBinarizedVectorValues.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.lucene90.IndexedDISI; @@ -29,6 +29,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.apache.lucene.util.packed.DirectMonotonicReader; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.IOException; import java.nio.ByteBuffer; @@ -37,7 +38,7 @@ import static org.elasticsearch.index.codec.vectors.BQVectorUtils.constSqrt; /** Binarized vector values loaded from off-heap */ -public abstract class OffHeapBinarizedVectorValues extends BinarizedByteVectorValues { +abstract class OffHeapBinarizedVectorValues extends BinarizedByteVectorValues { protected final int dimension; protected final int size; @@ -251,8 +252,8 @@ public static OffHeapBinarizedVectorValues load( } /** Dense off-heap binarized vector values */ - public static class DenseOffHeapVectorValues extends OffHeapBinarizedVectorValues { - public DenseOffHeapVectorValues( + static class DenseOffHeapVectorValues extends OffHeapBinarizedVectorValues { + DenseOffHeapVectorValues( int dimension, int size, float[] centroid, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index dea9368a9377e..0a6a24f727572 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -46,8 +46,8 @@ import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat; import org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat; -import org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat; -import org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat; +import org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat; +import org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.mapper.ArraySourceValueFetcher; diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat index c2201f5b1c319..389555e60b43b 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -3,5 +3,5 @@ org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat -org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat -org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat +org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat +org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/BinaryQuantizationTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizationTests.java similarity index 99% rename from server/src/test/java/org/elasticsearch/index/codec/vectors/BinaryQuantizationTests.java rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizationTests.java index 32d717bd76f91..205cbb4119dd6 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/BinaryQuantizationTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizationTests.java @@ -17,11 +17,13 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.index.codec.vectors.BQSpaceUtils; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.util.Random; diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorerTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorerTests.java similarity index 99% rename from server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorerTests.java rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorerTests.java index cef5e5358f3d5..a75b9bc6064d1 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorerTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorerTests.java @@ -17,13 +17,15 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.search.VectorScorer; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.util.VectorUtil; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.BQSpaceUtils; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.IOException; diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java similarity index 98% rename from server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormatTests.java rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java index 42f2fbb383ac9..681f615653d40 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; @@ -41,6 +41,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.IOException; import java.util.Locale; diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java similarity index 99% rename from server/src/test/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormatTests.java rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java index ca96e093b7b28..a25fa2836ee34 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; From 12be8203d3efd1ed62a838aaa1b379c592a7aaec Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 3 Dec 2024 09:31:51 +1100 Subject: [PATCH 02/28] Mute org.elasticsearch.xpack.test.rest.XPackRestIT test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version} #117862 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 73d9a29e275b3..57db22feba059 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -236,6 +236,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/117815 - class: org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT issue: https://github.com/elastic/elasticsearch/issues/111319 +- class: org.elasticsearch.xpack.test.rest.XPackRestIT + method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version} + issue: https://github.com/elastic/elasticsearch/issues/117862 # Examples: # From af7d3f911fbacaa1f4b1be68398cc59cbfdc89e2 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 2 Dec 2024 17:57:02 -0800 Subject: [PATCH 03/28] Add cluster level reduction (#117731) This change introduces cluster-level reduction. Unlike data-node-level reduction, it does not require pragmas because the network latency and throughput across clusters differ significantly from those within a cluster. As a result, the benefits of this reduction should outweigh the risks. --- docs/changelog/117731.yaml | 5 ++ .../action/CrossClustersCancellationIT.java | 37 ++++++++++++ .../xpack/esql/planner/PlannerUtils.java | 47 ++++++---------- .../xpack/esql/plugin/ComputeService.java | 56 +++++++------------ 4 files changed, 78 insertions(+), 67 deletions(-) create mode 100644 docs/changelog/117731.yaml diff --git a/docs/changelog/117731.yaml b/docs/changelog/117731.yaml new file mode 100644 index 0000000000000..f69cd5bf31100 --- /dev/null +++ b/docs/changelog/117731.yaml @@ -0,0 +1,5 @@ +pr: 117731 +summary: Add cluster level reduction +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java index 5ffc92636b272..f29f79976dc0d 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java @@ -238,4 +238,41 @@ public void testSameRemoteClusters() throws Exception { } } } + + public void testTasks() throws Exception { + createRemoteIndex(between(10, 100)); + EsqlQueryRequest request = EsqlQueryRequest.syncEsqlQueryRequest(); + request.query("FROM *:test | STATS total=sum(const) | LIMIT 1"); + request.pragmas(randomPragmas()); + ActionFuture requestFuture = client().execute(EsqlQueryAction.INSTANCE, request); + assertTrue(PauseFieldPlugin.startEmitting.await(30, TimeUnit.SECONDS)); + try { + assertBusy(() -> { + List clusterTasks = client(REMOTE_CLUSTER).admin() + .cluster() + .prepareListTasks() + .setActions(ComputeService.CLUSTER_ACTION_NAME) + .get() + .getTasks(); + assertThat(clusterTasks.size(), equalTo(1)); + List drivers = client(REMOTE_CLUSTER).admin() + .cluster() + .prepareListTasks() + .setTargetParentTaskId(clusterTasks.getFirst().taskId()) + .setActions(DriverTaskRunner.ACTION_NAME) + .setDetailed(true) + .get() + .getTasks(); + assertThat(drivers.size(), equalTo(1)); + TaskInfo driver = drivers.getFirst(); + assertThat(driver.description(), equalTo(""" + \\_ExchangeSourceOperator[] + \\_AggregationOperator[mode = INTERMEDIATE, aggs = sum of longs] + \\_ExchangeSinkOperator""")); + }); + } finally { + PauseFieldPlugin.allowEmitting.countDown(); + } + requestFuture.actionGet(30, TimeUnit.SECONDS).close(); + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index c998af2215169..f4ada1442efe5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -29,14 +29,8 @@ import org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizer; import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalPlanOptimizer; -import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; import org.elasticsearch.xpack.esql.plan.logical.Filter; -import org.elasticsearch.xpack.esql.plan.logical.Limit; -import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; -import org.elasticsearch.xpack.esql.plan.logical.OrderBy; -import org.elasticsearch.xpack.esql.plan.logical.TopN; -import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.EsSourceExec; import org.elasticsearch.xpack.esql.plan.physical.EstimatesRowSize; @@ -44,10 +38,7 @@ import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec; import org.elasticsearch.xpack.esql.plan.physical.FragmentExec; -import org.elasticsearch.xpack.esql.plan.physical.LimitExec; -import org.elasticsearch.xpack.esql.plan.physical.OrderExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; -import org.elasticsearch.xpack.esql.plan.physical.TopNExec; import org.elasticsearch.xpack.esql.planner.mapper.LocalMapper; import org.elasticsearch.xpack.esql.planner.mapper.Mapper; import org.elasticsearch.xpack.esql.session.Configuration; @@ -83,29 +74,25 @@ public static Tuple breakPlanBetweenCoordinatorAndDa return new Tuple<>(coordinatorPlan, dataNodePlan.get()); } - public static PhysicalPlan dataNodeReductionPlan(LogicalPlan plan, PhysicalPlan unused) { - var pipelineBreakers = plan.collectFirstChildren(Mapper::isPipelineBreaker); + public static PhysicalPlan reductionPlan(PhysicalPlan plan) { + // find the logical fragment + var fragments = plan.collectFirstChildren(p -> p instanceof FragmentExec); + if (fragments.isEmpty()) { + return null; + } + final FragmentExec fragment = (FragmentExec) fragments.getFirst(); - if (pipelineBreakers.isEmpty() == false) { - UnaryPlan pipelineBreaker = (UnaryPlan) pipelineBreakers.get(0); - if (pipelineBreaker instanceof TopN) { - LocalMapper mapper = new LocalMapper(); - var physicalPlan = EstimatesRowSize.estimateRowSize(0, mapper.map(plan)); - return physicalPlan.collectFirstChildren(TopNExec.class::isInstance).get(0); - } else if (pipelineBreaker instanceof Limit limit) { - return new LimitExec(limit.source(), unused, limit.limit()); - } else if (pipelineBreaker instanceof OrderBy order) { - return new OrderExec(order.source(), unused, order.order()); - } else if (pipelineBreaker instanceof Aggregate) { - LocalMapper mapper = new LocalMapper(); - var physicalPlan = EstimatesRowSize.estimateRowSize(0, mapper.map(plan)); - var aggregate = (AggregateExec) physicalPlan.collectFirstChildren(AggregateExec.class::isInstance).get(0); - return aggregate.withMode(AggregatorMode.INITIAL); - } else { - throw new EsqlIllegalArgumentException("unsupported unary physical plan node [" + pipelineBreaker.nodeName() + "]"); - } + final var pipelineBreakers = fragment.fragment().collectFirstChildren(Mapper::isPipelineBreaker); + if (pipelineBreakers.isEmpty()) { + return null; + } + final var pipelineBreaker = pipelineBreakers.getFirst(); + final LocalMapper mapper = new LocalMapper(); + PhysicalPlan reducePlan = mapper.map(pipelineBreaker); + if (reducePlan instanceof AggregateExec agg) { + reducePlan = agg.withMode(AggregatorMode.INITIAL); // force to emit intermediate outputs } - return null; + return EstimatesRowSize.estimateRowSize(fragment.estimatedRowSize(), reducePlan); } /** diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java index b06dd3cdb64d3..9aea1577a4137 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java @@ -60,12 +60,10 @@ import org.elasticsearch.xpack.esql.action.EsqlQueryAction; import org.elasticsearch.xpack.esql.action.EsqlSearchShardsAction; import org.elasticsearch.xpack.esql.core.expression.Attribute; -import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.enrich.EnrichLookupService; import org.elasticsearch.xpack.esql.enrich.LookupFromIndexService; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec; -import org.elasticsearch.xpack.esql.plan.physical.FragmentExec; import org.elasticsearch.xpack.esql.plan.physical.OutputExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders; @@ -780,35 +778,24 @@ private void runComputeOnDataNode( } } + private static PhysicalPlan reductionPlan(ExchangeSinkExec plan, boolean enable) { + PhysicalPlan reducePlan = new ExchangeSourceExec(plan.source(), plan.output(), plan.isIntermediateAgg()); + if (enable) { + PhysicalPlan p = PlannerUtils.reductionPlan(plan); + if (p != null) { + reducePlan = p.replaceChildren(List.of(reducePlan)); + } + } + return new ExchangeSinkExec(plan.source(), plan.output(), plan.isIntermediateAgg(), reducePlan); + } + private class DataNodeRequestHandler implements TransportRequestHandler { @Override public void messageReceived(DataNodeRequest request, TransportChannel channel, Task task) { final ActionListener listener = new ChannelActionListener<>(channel); - final ExchangeSinkExec reducePlan; + final PhysicalPlan reductionPlan; if (request.plan() instanceof ExchangeSinkExec plan) { - var fragments = plan.collectFirstChildren(FragmentExec.class::isInstance); - if (fragments.isEmpty()) { - listener.onFailure(new IllegalStateException("expected a fragment plan for a remote compute; got " + request.plan())); - return; - } - var localExchangeSource = new ExchangeSourceExec(plan.source(), plan.output(), plan.isIntermediateAgg()); - Holder reducePlanHolder = new Holder<>(); - if (request.pragmas().nodeLevelReduction()) { - PhysicalPlan dataNodePlan = request.plan(); - request.plan() - .forEachUp( - FragmentExec.class, - f -> { reducePlanHolder.set(PlannerUtils.dataNodeReductionPlan(f.fragment(), dataNodePlan)); } - ); - } - reducePlan = new ExchangeSinkExec( - plan.source(), - plan.output(), - plan.isIntermediateAgg(), - reducePlanHolder.get() != null - ? reducePlanHolder.get().replaceChildren(List.of(localExchangeSource)) - : localExchangeSource - ); + reductionPlan = reductionPlan(plan, request.pragmas().nodeLevelReduction()); } else { listener.onFailure(new IllegalStateException("expected exchange sink for a remote compute; got " + request.plan())); return; @@ -825,7 +812,7 @@ public void messageReceived(DataNodeRequest request, TransportChannel channel, T request.indicesOptions() ); try (var computeListener = ComputeListener.create(transportService, (CancellableTask) task, listener)) { - runComputeOnDataNode((CancellableTask) task, sessionId, reducePlan, request, computeListener); + runComputeOnDataNode((CancellableTask) task, sessionId, reductionPlan, request, computeListener); } } } @@ -871,10 +858,10 @@ public void messageReceived(ClusterComputeRequest request, TransportChannel chan * Performs a compute on a remote cluster. The output pages are placed in an exchange sink specified by * {@code globalSessionId}. The coordinator on the main cluster will poll pages from there. *

- * Currently, the coordinator on the remote cluster simply collects pages from data nodes in the remote cluster - * and places them in the exchange sink. We can achieve this by using a single exchange buffer to minimize overhead. - * However, here we use two exchange buffers so that we can run an actual plan on this coordinator to perform partial - * reduce operations, such as limit, topN, and partial-to-partial aggregation in the future. + * Currently, the coordinator on the remote cluster polls pages from data nodes within the remote cluster + * and performs cluster-level reduction before sending pages to the querying cluster. This reduction aims + * to minimize data transfers across clusters but may require additional CPU resources for operations like + * aggregations. */ void runComputeOnRemoteCluster( String clusterAlias, @@ -892,6 +879,7 @@ void runComputeOnRemoteCluster( () -> exchangeService.finishSinkHandler(globalSessionId, new TaskCancelledException(parentTask.getReasonCancelled())) ); final String localSessionId = clusterAlias + ":" + globalSessionId; + final PhysicalPlan coordinatorPlan = reductionPlan(plan, true); var exchangeSource = new ExchangeSourceHandler( configuration.pragmas().exchangeBufferSize(), transportService.getThreadPool().executor(ThreadPool.Names.SEARCH), @@ -899,12 +887,6 @@ void runComputeOnRemoteCluster( ); try (Releasable ignored = exchangeSource.addEmptySink()) { exchangeSink.addCompletionListener(computeListener.acquireAvoid()); - PhysicalPlan coordinatorPlan = new ExchangeSinkExec( - plan.source(), - plan.output(), - plan.isIntermediateAgg(), - new ExchangeSourceExec(plan.source(), plan.output(), plan.isIntermediateAgg()) - ); runCompute( parentTask, new ComputeContext(localSessionId, clusterAlias, List.of(), configuration, exchangeSource, exchangeSink), From 4a9f632fab7571e198f5030dd30acc80c436c58b Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 2 Dec 2024 19:53:08 -0800 Subject: [PATCH 04/28] By pass cancellation when closing sinks (#117797) > **java.lang.AssertionError: Leftover exchanges ExchangeService{sinks=[veZSyrPATq2Sg83dtgK3Jg:700/3]} on node node_s4** I looked into the test failure described in https://github.com/elastic/elasticsearch/issues/117253. The reason we don't clean up the exchange sink quickly is that, once a failure occurs, we cancel the request along with all its child requests. These exchange sinks will be cleaned up only after they become inactive, which by default takes 5 minutes. We could override the `esql.exchange.sink_inactive_interval` setting in the test to remove these exchange sinks faster. However, I think we should allow exchange requests that close exchange sinks to bypass cancellation, enabling quicker resource cleanup than the default inactive interval. Closes #117253 --- .../operator/exchange/ExchangeRequest.java | 17 ++++++-- .../operator/exchange/ExchangeService.java | 43 ++++++++++--------- .../exchange/ExchangeSourceHandler.java | 8 ++-- .../compute/operator/exchange/RemoteSink.java | 10 +++++ .../exchange/ExchangeRequestTests.java | 27 ++++++++++++ .../exchange/ExchangeServiceTests.java | 6 ++- 6 files changed, 82 insertions(+), 29 deletions(-) create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java index 6ed2cc7e587be..1e8700bcd4030 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java @@ -40,6 +40,17 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(sourcesFinished); } + @Override + public TaskId getParentTask() { + // Exchange requests with `sourcesFinished=true` complete the remote sink and return without blocking. + // Masking the parent task allows these requests to bypass task cancellation, ensuring cleanup of the remote sink. + // TODO: Maybe add a separate action/request for closing exchange sinks? + if (sourcesFinished) { + return TaskId.EMPTY_TASK_ID; + } + return super.getParentTask(); + } + /** * True if the {@link ExchangeSourceHandler} has enough input. * The corresponding {@link ExchangeSinkHandler} can drain pages and finish itself. @@ -70,9 +81,9 @@ public int hashCode() { @Override public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { - if (parentTaskId.isSet() == false) { - assert false : "ExchangeRequest must have a parent task"; - throw new IllegalStateException("ExchangeRequest must have a parent task"); + if (sourcesFinished == false && parentTaskId.isSet() == false) { + assert false : "ExchangeRequest with sourcesFinished=false must have a parent task"; + throw new IllegalStateException("ExchangeRequest with sourcesFinished=false must have a parent task"); } return new CancellableTask(id, type, action, "", parentTaskId, headers) { @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java index a943a90d02e87..00c68c4f48e86 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java @@ -314,28 +314,20 @@ static final class TransportRemoteSink implements RemoteSink { @Override public void fetchPageAsync(boolean allSourcesFinished, ActionListener listener) { if (allSourcesFinished) { - if (finished.compareAndSet(false, true)) { - doFetchPageAsync(true, listener); - } else { - // already finished or promised - listener.onResponse(new ExchangeResponse(blockFactory, null, true)); - } - } else { - // already finished - if (finished.get()) { - listener.onResponse(new ExchangeResponse(blockFactory, null, true)); - return; - } - doFetchPageAsync(false, ActionListener.wrap(r -> { - if (r.finished()) { - finished.set(true); - } - listener.onResponse(r); - }, e -> { - finished.set(true); - listener.onFailure(e); - })); + close(listener.map(unused -> new ExchangeResponse(blockFactory, null, true))); + return; + } + // already finished + if (finished.get()) { + listener.onResponse(new ExchangeResponse(blockFactory, null, true)); + return; } + doFetchPageAsync(false, ActionListener.wrap(r -> { + if (r.finished()) { + finished.set(true); + } + listener.onResponse(r); + }, e -> close(ActionListener.running(() -> listener.onFailure(e))))); } private void doFetchPageAsync(boolean allSourcesFinished, ActionListener listener) { @@ -361,6 +353,15 @@ private void doFetchPageAsync(boolean allSourcesFinished, ActionListener listener) { + if (finished.compareAndSet(false, true)) { + doFetchPageAsync(true, listener.delegateFailure((l, unused) -> l.onResponse(null))); + } else { + listener.onResponse(null); + } + } } // For testing diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java index 61b3386ce0274..375016a5d51d5 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java @@ -224,8 +224,10 @@ void onSinkFailed(Exception e) { buffer.waitForReading().listener().onResponse(null); // resume the Driver if it is being blocked on reading if (finished == false) { finished = true; - outstandingSinks.finishInstance(); - completionListener.onFailure(e); + remoteSink.close(ActionListener.running(() -> { + outstandingSinks.finishInstance(); + completionListener.onFailure(e); + })); } } @@ -262,7 +264,7 @@ public void onFailure(Exception e) { failure.unwrapAndCollect(e); } buffer.waitForReading().listener().onResponse(null); // resume the Driver if it is being blocked on reading - sinkListener.onFailure(e); + remoteSink.close(ActionListener.running(() -> sinkListener.onFailure(e))); } @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java index 7d81cd3f66600..aaa937ef17c0e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java @@ -12,4 +12,14 @@ public interface RemoteSink { void fetchPageAsync(boolean allSourcesFinished, ActionListener listener); + + default void close(ActionListener listener) { + fetchPageAsync(true, listener.delegateFailure((l, r) -> { + try { + r.close(); + } finally { + l.onResponse(null); + } + })); + } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java new file mode 100644 index 0000000000000..8a0891651a497 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.exchange; + +import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class ExchangeRequestTests extends ESTestCase { + + public void testParentTask() { + ExchangeRequest r1 = new ExchangeRequest("1", true); + r1.setParentTask(new TaskId("node-1", 1)); + assertSame(TaskId.EMPTY_TASK_ID, r1.getParentTask()); + + ExchangeRequest r2 = new ExchangeRequest("1", false); + r2.setParentTask(new TaskId("node-2", 2)); + assertTrue(r2.getParentTask().isSet()); + assertThat(r2.getParentTask(), equalTo((new TaskId("node-2", 2)))); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java index 4178f02898d79..fc6c850ba187b 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java @@ -491,7 +491,7 @@ public void testConcurrentWithTransportActions() { } } - public void testFailToRespondPage() { + public void testFailToRespondPage() throws Exception { Settings settings = Settings.builder().build(); MockTransportService node0 = newTransportService(); ExchangeService exchange0 = new ExchangeService(settings, threadPool, ESQL_TEST_EXECUTOR, blockFactory()); @@ -558,7 +558,9 @@ public void sendResponse(TransportResponse transportResponse) { Throwable cause = ExceptionsHelper.unwrap(err, IOException.class); assertNotNull(cause); assertThat(cause.getMessage(), equalTo("page is too large")); - sinkHandler.onFailure(new RuntimeException(cause)); + PlainActionFuture sinkCompletionFuture = new PlainActionFuture<>(); + sinkHandler.addCompletionListener(sinkCompletionFuture); + assertBusy(() -> assertTrue(sinkCompletionFuture.isDone())); expectThrows(Exception.class, () -> sourceCompletionFuture.actionGet(10, TimeUnit.SECONDS)); } } From af9a57ec66770530cf45aefd842e86a810b13947 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Tue, 3 Dec 2024 07:18:44 +0100 Subject: [PATCH 05/28] Remove supersetSize and subsetSize from InternalSignificantTerms.Bucket (#117574) Those fields are only used to update the score and not serialized in the bucket so they can be removed. --- .../SignificantTermsSignificanceScoreIT.java | 2 +- .../GlobalOrdinalsStringTermsAggregator.java | 9 +-- .../terms/InternalMappedSignificantTerms.java | 6 +- .../terms/InternalSignificantTerms.java | 50 ++------------ .../terms/MapStringTermsAggregator.java | 64 +++++++++-------- .../bucket/terms/NumericTermsAggregator.java | 69 ++++++++++--------- .../bucket/terms/SignificantLongTerms.java | 39 ++--------- .../bucket/terms/SignificantStringTerms.java | 30 ++------ .../bucket/terms/SignificantTerms.java | 24 +++---- .../terms/UnmappedSignificantTerms.java | 25 ++----- .../InternalSignificantTermsTestCase.java | 2 - .../terms/SignificantLongTermsTests.java | 15 +--- .../terms/SignificantStringTermsTests.java | 15 +--- ...AbstractSignificanceHeuristicTestCase.java | 39 +++-------- 14 files changed, 127 insertions(+), 262 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java index bf11c1d69bcc6..671f60e2b9d5e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java @@ -495,7 +495,7 @@ public void testScriptScore() throws ExecutionException, InterruptedException, I for (SignificantTerms.Bucket bucket : sigTerms.getBuckets()) { assertThat( bucket.getSignificanceScore(), - is((double) bucket.getSubsetDf() + bucket.getSubsetSize() + bucket.getSupersetDf() + bucket.getSupersetSize()) + is((double) bucket.getSubsetDf() + sigTerms.getSubsetSize() + bucket.getSupersetDf() + sigTerms.getSupersetSize()) ); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 5a79155d1d4f5..4cf710232c7a0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -989,7 +989,7 @@ SignificantStringTerms.Bucket[] buildBuckets(int size) { @Override SignificantStringTerms.Bucket buildEmptyTemporaryBucket() { - return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format, 0); + return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, null, format, 0); } private long subsetSize(long owningBucketOrd) { @@ -998,22 +998,19 @@ private long subsetSize(long owningBucketOrd) { } @Override - BucketUpdater bucketUpdater(long owningBucketOrd, GlobalOrdLookupFunction lookupGlobalOrd) - throws IOException { + BucketUpdater bucketUpdater(long owningBucketOrd, GlobalOrdLookupFunction lookupGlobalOrd) { long subsetSize = subsetSize(owningBucketOrd); return (spare, globalOrd, bucketOrd, docCount) -> { spare.bucketOrd = bucketOrd; oversizedCopy(lookupGlobalOrd.apply(globalOrd), spare.termBytes); spare.subsetDf = docCount; - spare.subsetSize = subsetSize; spare.supersetDf = backgroundFrequencies.freq(spare.termBytes); - spare.supersetSize = supersetSize; /* * During shard-local down-selection we use subset/superset stats * that are for this shard only. Back at the central reducer these * properties will be updated with global stats. */ - spare.updateScore(significanceHeuristic); + spare.updateScore(significanceHeuristic, subsetSize, supersetSize); }; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java index 3f75a27306ab4..8c6d21cc74119 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java @@ -59,7 +59,7 @@ protected InternalMappedSignificantTerms(StreamInput in, Bucket.Reader bucket subsetSize = in.readVLong(); supersetSize = in.readVLong(); significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class); - buckets = in.readCollectionAsList(stream -> bucketReader.read(stream, subsetSize, supersetSize, format)); + buckets = in.readCollectionAsList(stream -> bucketReader.read(stream, format)); } @Override @@ -91,12 +91,12 @@ public B getBucketByKey(String term) { } @Override - protected long getSubsetSize() { + public long getSubsetSize() { return subsetSize; } @Override - protected long getSupersetSize() { + public long getSupersetSize() { return supersetSize; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java index 6c0eb465d1f80..78ae2481f5d99 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java @@ -53,13 +53,11 @@ public abstract static class Bucket> extends InternalMultiBu */ @FunctionalInterface public interface Reader> { - B read(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException; + B read(StreamInput in, DocValueFormat format) throws IOException; } long subsetDf; - long subsetSize; long supersetDf; - long supersetSize; /** * Ordinal of the bucket while it is being built. Not used after it is * returned from {@link Aggregator#buildAggregations(org.elasticsearch.common.util.LongArray)} and not @@ -70,16 +68,7 @@ public interface Reader> { protected InternalAggregations aggregations; final transient DocValueFormat format; - protected Bucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - DocValueFormat format - ) { - this.subsetSize = subsetSize; - this.supersetSize = supersetSize; + protected Bucket(long subsetDf, long supersetDf, InternalAggregations aggregations, DocValueFormat format) { this.subsetDf = subsetDf; this.supersetDf = supersetDf; this.aggregations = aggregations; @@ -89,9 +78,7 @@ protected Bucket( /** * Read from a stream. */ - protected Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) { - this.subsetSize = subsetSize; - this.supersetSize = supersetSize; + protected Bucket(StreamInput in, DocValueFormat format) { this.format = format; } @@ -105,20 +92,10 @@ public long getSupersetDf() { return supersetDf; } - @Override - public long getSupersetSize() { - return supersetSize; - } - - @Override - public long getSubsetSize() { - return subsetSize; - } - // TODO we should refactor to remove this, since buckets should be immutable after they are generated. // This can lead to confusing bugs if the bucket is re-created (via createBucket() or similar) without // the score - void updateScore(SignificanceHeuristic significanceHeuristic) { + void updateScore(SignificanceHeuristic significanceHeuristic, long subsetSize, long supersetSize) { score = significanceHeuristic.getScore(subsetDf, subsetSize, supersetDf, supersetSize); } @@ -262,13 +239,11 @@ public InternalAggregation get() { buckets.forEach(entry -> { final B b = createBucket( entry.value.subsetDf[0], - globalSubsetSize, entry.value.supersetDf[0], - globalSupersetSize, entry.value.reducer.getAggregations(), entry.value.reducer.getProto() ); - b.updateScore(heuristic); + b.updateScore(heuristic, globalSubsetSize, globalSupersetSize); if (((b.score > 0) && (b.subsetDf >= minDocCount)) || reduceContext.isFinalReduce() == false) { final B removed = ordered.insertWithOverflow(b); if (removed == null) { @@ -317,9 +292,7 @@ public InternalAggregation finalizeSampling(SamplingContext samplingContext) { .map( b -> createBucket( samplingContext.scaleUp(b.subsetDf), - subsetSize, samplingContext.scaleUp(b.supersetDf), - supersetSize, InternalAggregations.finalizeSampling(b.aggregations, samplingContext), b ) @@ -328,14 +301,7 @@ public InternalAggregation finalizeSampling(SamplingContext samplingContext) { ); } - abstract B createBucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - B prototype - ); + abstract B createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, B prototype); protected abstract A create(long subsetSize, long supersetSize, List buckets); @@ -344,10 +310,6 @@ abstract B createBucket( */ protected abstract B[] createBucketsArray(int size); - protected abstract long getSubsetSize(); - - protected abstract long getSupersetSize(); - protected abstract SignificanceHeuristic getSignificanceHeuristic(); @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java index 6ae47d5975479..b96c495d37489 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java @@ -47,7 +47,6 @@ import java.util.function.BiConsumer; import java.util.function.Function; import java.util.function.LongConsumer; -import java.util.function.Supplier; import static org.elasticsearch.search.aggregations.InternalOrder.isKeyOrder; @@ -296,7 +295,7 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro try (ObjectArrayPriorityQueue ordered = buildPriorityQueue(size)) { B spare = null; BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningOrd); - Supplier emptyBucketBuilder = emptyBucketBuilder(owningOrd); + BucketUpdater bucketUpdater = bucketUpdater(owningOrd); while (ordsEnum.next()) { long docCount = bucketDocCount(ordsEnum.ord()); otherDocCounts.increment(ordIdx, docCount); @@ -305,9 +304,9 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro } if (spare == null) { checkRealMemoryCBForInternalBucket(); - spare = emptyBucketBuilder.get(); + spare = buildEmptyBucket(); } - updateBucket(spare, ordsEnum, docCount); + bucketUpdater.updateBucket(spare, ordsEnum, docCount); spare = ordered.insertWithOverflow(spare); } @@ -348,9 +347,9 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro abstract void collectZeroDocEntriesIfNeeded(long owningBucketOrd, boolean excludeDeletedDocs) throws IOException; /** - * Build an empty temporary bucket. + * Build an empty bucket. */ - abstract Supplier emptyBucketBuilder(long owningBucketOrd); + abstract B buildEmptyBucket(); /** * Build a {@link PriorityQueue} to sort the buckets. After we've @@ -362,7 +361,7 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro * Update fields in {@code spare} to reflect information collected for * this bucket ordinal. */ - abstract void updateBucket(B spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount) throws IOException; + abstract BucketUpdater bucketUpdater(long owningBucketOrd); /** * Build an array to hold the "top" buckets for each ordinal. @@ -399,6 +398,10 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro abstract R buildEmptyResult(); } + interface BucketUpdater { + void updateBucket(B spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount) throws IOException; + } + /** * Builds results for the standard {@code terms} aggregation. */ @@ -490,8 +493,8 @@ private void collectZeroDocEntries(BinaryDocValues values, Bits liveDocs, int ma } @Override - Supplier emptyBucketBuilder(long owningBucketOrd) { - return () -> new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format); + StringTerms.Bucket buildEmptyBucket() { + return new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format); } @Override @@ -500,10 +503,12 @@ ObjectArrayPriorityQueue buildPriorityQueue(int size) { } @Override - void updateBucket(StringTerms.Bucket spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount) throws IOException { - ordsEnum.readValue(spare.termBytes); - spare.docCount = docCount; - spare.bucketOrd = ordsEnum.ord(); + BucketUpdater bucketUpdater(long owningBucketOrd) { + return (spare, ordsEnum, docCount) -> { + ordsEnum.readValue(spare.termBytes); + spare.docCount = docCount; + spare.bucketOrd = ordsEnum.ord(); + }; } @Override @@ -615,9 +620,8 @@ public void collect(int doc, long owningBucketOrd) throws IOException { void collectZeroDocEntriesIfNeeded(long owningBucketOrd, boolean excludeDeletedDocs) throws IOException {} @Override - Supplier emptyBucketBuilder(long owningBucketOrd) { - long subsetSize = subsetSizes.get(owningBucketOrd); - return () -> new SignificantStringTerms.Bucket(new BytesRef(), 0, subsetSize, 0, 0, null, format, 0); + SignificantStringTerms.Bucket buildEmptyBucket() { + return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, null, format, 0); } @Override @@ -626,20 +630,20 @@ ObjectArrayPriorityQueue buildPriorityQueue(int s } @Override - void updateBucket(SignificantStringTerms.Bucket spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount) - throws IOException { - - ordsEnum.readValue(spare.termBytes); - spare.bucketOrd = ordsEnum.ord(); - spare.subsetDf = docCount; - spare.supersetDf = backgroundFrequencies.freq(spare.termBytes); - spare.supersetSize = supersetSize; - /* - * During shard-local down-selection we use subset/superset stats - * that are for this shard only. Back at the central reducer these - * properties will be updated with global stats. - */ - spare.updateScore(significanceHeuristic); + BucketUpdater bucketUpdater(long owningBucketOrd) { + long subsetSize = subsetSizes.get(owningBucketOrd); + return (spare, ordsEnum, docCount) -> { + ordsEnum.readValue(spare.termBytes); + spare.bucketOrd = ordsEnum.ord(); + spare.subsetDf = docCount; + spare.supersetDf = backgroundFrequencies.freq(spare.termBytes); + /* + * During shard-local down-selection we use subset/superset stats + * that are for this shard only. Back at the central reducer these + * properties will be updated with global stats. + */ + spare.updateScore(significanceHeuristic, subsetSize, supersetSize); + }; } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java index ce89b95b76a05..5d4c15d8a3b80 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java @@ -43,7 +43,6 @@ import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Function; -import java.util.function.Supplier; import static java.util.Collections.emptyList; import static org.elasticsearch.search.aggregations.InternalOrder.isKeyOrder; @@ -177,7 +176,7 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro try (ObjectArrayPriorityQueue ordered = buildPriorityQueue(size)) { B spare = null; BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrd); - Supplier emptyBucketBuilder = emptyBucketBuilder(owningBucketOrd); + BucketUpdater bucketUpdater = bucketUpdater(owningBucketOrd); while (ordsEnum.next()) { long docCount = bucketDocCount(ordsEnum.ord()); otherDocCounts.increment(ordIdx, docCount); @@ -186,9 +185,9 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro } if (spare == null) { checkRealMemoryCBForInternalBucket(); - spare = emptyBucketBuilder.get(); + spare = buildEmptyBucket(); } - updateBucket(spare, ordsEnum, docCount); + bucketUpdater.updateBucket(spare, ordsEnum, docCount); spare = ordered.insertWithOverflow(spare); } @@ -240,17 +239,16 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro abstract B[] buildBuckets(int size); /** - * Build a {@linkplain Supplier} that can be used to build "empty" - * buckets. Those buckets will then be {@link #updateBucket updated} + * Build an empty bucket. Those buckets will then be {@link #bucketUpdater(long)} updated} * for each collected bucket. */ - abstract Supplier emptyBucketBuilder(long owningBucketOrd); + abstract B buildEmptyBucket(); /** * Update fields in {@code spare} to reflect information collected for * this bucket ordinal. */ - abstract void updateBucket(B spare, BucketOrdsEnum ordsEnum, long docCount) throws IOException; + abstract BucketUpdater bucketUpdater(long owningBucketOrd); /** * Build a {@link ObjectArrayPriorityQueue} to sort the buckets. After we've @@ -282,6 +280,10 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro abstract R buildEmptyResult(); } + interface BucketUpdater { + void updateBucket(B spare, BucketOrdsEnum ordsEnum, long docCount) throws IOException; + } + abstract class StandardTermsResultStrategy, B extends InternalTerms.Bucket> extends ResultStrategy { protected final boolean showTermDocCountError; @@ -305,13 +307,6 @@ final void buildSubAggs(ObjectArray topBucketsPerOrd) throws IOException { buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); } - @Override - Supplier emptyBucketBuilder(long owningBucketOrd) { - return this::buildEmptyBucket; - } - - abstract B buildEmptyBucket(); - @Override final void collectZeroDocEntriesIfNeeded(long owningBucketOrd, boolean excludeDeletedDocs) throws IOException { if (bucketCountThresholds.getMinDocCount() != 0) { @@ -375,10 +370,12 @@ LongTerms.Bucket buildEmptyBucket() { } @Override - void updateBucket(LongTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) { - spare.term = ordsEnum.value(); - spare.docCount = docCount; - spare.bucketOrd = ordsEnum.ord(); + BucketUpdater bucketUpdater(long owningBucketOrd) { + return (LongTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) -> { + spare.term = ordsEnum.value(); + spare.docCount = docCount; + spare.bucketOrd = ordsEnum.ord(); + }; } @Override @@ -457,10 +454,12 @@ DoubleTerms.Bucket buildEmptyBucket() { } @Override - void updateBucket(DoubleTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) { - spare.term = NumericUtils.sortableLongToDouble(ordsEnum.value()); - spare.docCount = docCount; - spare.bucketOrd = ordsEnum.ord(); + BucketUpdater bucketUpdater(long owningBucketOrd) { + return (DoubleTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) -> { + spare.term = NumericUtils.sortableLongToDouble(ordsEnum.value()); + spare.docCount = docCount; + spare.bucketOrd = ordsEnum.ord(); + }; } @Override @@ -565,20 +564,22 @@ SignificantLongTerms.Bucket[] buildBuckets(int size) { } @Override - Supplier emptyBucketBuilder(long owningBucketOrd) { - long subsetSize = subsetSizes.get(owningBucketOrd); - return () -> new SignificantLongTerms.Bucket(0, subsetSize, 0, supersetSize, 0, null, format, 0); + SignificantLongTerms.Bucket buildEmptyBucket() { + return new SignificantLongTerms.Bucket(0, 0, 0, null, format, 0); } @Override - void updateBucket(SignificantLongTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) throws IOException { - spare.term = ordsEnum.value(); - spare.subsetDf = docCount; - spare.supersetDf = backgroundFrequencies.freq(spare.term); - spare.bucketOrd = ordsEnum.ord(); - // During shard-local down-selection we use subset/superset stats that are for this shard only - // Back at the central reducer these properties will be updated with global stats - spare.updateScore(significanceHeuristic); + BucketUpdater bucketUpdater(long owningBucketOrd) { + long subsetSize = subsetSizes.get(owningBucketOrd); + return (spare, ordsEnum, docCount) -> { + spare.term = ordsEnum.value(); + spare.subsetDf = docCount; + spare.supersetDf = backgroundFrequencies.freq(spare.term); + spare.bucketOrd = ordsEnum.ord(); + // During shard-local down-selection we use subset/superset stats that are for this shard only + // Back at the central reducer these properties will be updated with global stats + spare.updateScore(significanceHeuristic, subsetSize, supersetSize); + }; } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java index 2aace2a714a26..17ea290b7aaaf 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java @@ -30,23 +30,14 @@ public static class Bucket extends InternalSignificantTerms.Bucket { long term; - public Bucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - long term, - InternalAggregations aggregations, - DocValueFormat format, - double score - ) { - super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format); + public Bucket(long subsetDf, long supersetDf, long term, InternalAggregations aggregations, DocValueFormat format, double score) { + super(subsetDf, supersetDf, aggregations, format); this.term = term; this.score = score; } - Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException { - super(in, subsetSize, supersetSize, format); + Bucket(StreamInput in, DocValueFormat format) throws IOException { + super(in, format); subsetDf = in.readVLong(); supersetDf = in.readVLong(); term = in.readLong(); @@ -136,16 +127,7 @@ public SignificantLongTerms create(List buckets) { @Override public Bucket createBucket(InternalAggregations aggregations, SignificantLongTerms.Bucket prototype) { - return new Bucket( - prototype.subsetDf, - prototype.subsetSize, - prototype.supersetDf, - prototype.supersetSize, - prototype.term, - aggregations, - prototype.format, - prototype.score - ); + return new Bucket(prototype.subsetDf, prototype.supersetDf, prototype.term, aggregations, prototype.format, prototype.score); } @Override @@ -169,14 +151,7 @@ protected Bucket[] createBucketsArray(int size) { } @Override - Bucket createBucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - SignificantLongTerms.Bucket prototype - ) { - return new Bucket(subsetDf, subsetSize, supersetDf, supersetSize, prototype.term, aggregations, format, prototype.score); + Bucket createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, SignificantLongTerms.Bucket prototype) { + return new Bucket(subsetDf, supersetDf, prototype.term, aggregations, format, prototype.score); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java index 791c09d3cbd99..b255f17d2843b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java @@ -34,14 +34,12 @@ public static class Bucket extends InternalSignificantTerms.Bucket { public Bucket( BytesRef term, long subsetDf, - long subsetSize, long supersetDf, - long supersetSize, InternalAggregations aggregations, DocValueFormat format, double score ) { - super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format); + super(subsetDf, supersetDf, aggregations, format); this.termBytes = term; this.score = score; } @@ -49,8 +47,8 @@ public Bucket( /** * Read from a stream. */ - public Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException { - super(in, subsetSize, supersetSize, format); + public Bucket(StreamInput in, DocValueFormat format) throws IOException { + super(in, format); termBytes = in.readBytesRef(); subsetDf = in.readVLong(); supersetDf = in.readVLong(); @@ -140,16 +138,7 @@ public SignificantStringTerms create(List buckets @Override public Bucket createBucket(InternalAggregations aggregations, SignificantStringTerms.Bucket prototype) { - return new Bucket( - prototype.termBytes, - prototype.subsetDf, - prototype.subsetSize, - prototype.supersetDf, - prototype.supersetSize, - aggregations, - prototype.format, - prototype.score - ); + return new Bucket(prototype.termBytes, prototype.subsetDf, prototype.supersetDf, aggregations, prototype.format, prototype.score); } @Override @@ -173,14 +162,7 @@ protected Bucket[] createBucketsArray(int size) { } @Override - Bucket createBucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - SignificantStringTerms.Bucket prototype - ) { - return new Bucket(prototype.termBytes, subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format, prototype.score); + Bucket createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, SignificantStringTerms.Bucket prototype) { + return new Bucket(prototype.termBytes, subsetDf, supersetDf, aggregations, format, prototype.score); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java index f02b5338eea74..e8f160193bc71 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java @@ -17,6 +17,18 @@ */ public interface SignificantTerms extends MultiBucketsAggregation, Iterable { + /** + * @return The numbers of docs in the subset (also known as "foreground set"). + * This number is equal to the document count of the containing aggregation. + */ + long getSubsetSize(); + + /** + * @return The numbers of docs in the superset (ordinarily the background count + * of the containing aggregation). + */ + long getSupersetSize(); + interface Bucket extends MultiBucketsAggregation.Bucket { /** @@ -30,24 +42,12 @@ interface Bucket extends MultiBucketsAggregation.Bucket { */ long getSubsetDf(); - /** - * @return The numbers of docs in the subset (also known as "foreground set"). - * This number is equal to the document count of the containing aggregation. - */ - long getSubsetSize(); - /** * @return The number of docs in the superset containing a particular term (also * known as the "background count" of the bucket) */ long getSupersetDf(); - /** - * @return The numbers of docs in the superset (ordinarily the background count - * of the containing aggregation). - */ - long getSupersetSize(); - } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java index 8bd14a46bff96..6d1370f147f36 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java @@ -40,16 +40,8 @@ public class UnmappedSignificantTerms extends InternalSignificantTerms { - private Bucket( - BytesRef term, - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - DocValueFormat format - ) { - super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format); + private Bucket(BytesRef term, long subsetDf, long supersetDf, InternalAggregations aggregations, DocValueFormat format) { + super(subsetDf, supersetDf, aggregations, format); } } @@ -95,14 +87,7 @@ protected UnmappedSignificantTerms create(long subsetSize, long supersetSize, Li } @Override - Bucket createBucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - Bucket prototype - ) { + Bucket createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, Bucket prototype) { throw new UnsupportedOperationException("not supported for UnmappedSignificantTerms"); } @@ -153,12 +138,12 @@ protected SignificanceHeuristic getSignificanceHeuristic() { } @Override - protected long getSubsetSize() { + public long getSubsetSize() { return 0; } @Override - protected long getSupersetSize() { + public long getSupersetSize() { return 0; } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java index 6d49d6855caca..7e5d19977fe9f 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java @@ -59,8 +59,6 @@ protected void assertSampled( InternalSignificantTerms.Bucket sampledBucket = sampledIt.next(); assertEquals(sampledBucket.subsetDf, samplingContext.scaleUp(reducedBucket.subsetDf)); assertEquals(sampledBucket.supersetDf, samplingContext.scaleUp(reducedBucket.supersetDf)); - assertEquals(sampledBucket.subsetSize, samplingContext.scaleUp(reducedBucket.subsetSize)); - assertEquals(sampledBucket.supersetSize, samplingContext.scaleUp(reducedBucket.supersetSize)); assertThat(sampledBucket.score, closeTo(reducedBucket.score, 1e-14)); } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java index a303199338783..92bfa2f6f89f4 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java @@ -49,17 +49,8 @@ public void setUp() throws Exception { Set terms = new HashSet<>(); for (int i = 0; i < numBuckets; ++i) { long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong); - SignificantLongTerms.Bucket bucket = new SignificantLongTerms.Bucket( - subsetDfs[i], - subsetSize, - supersetDfs[i], - supersetSize, - term, - aggs, - format, - 0 - ); - bucket.updateScore(significanceHeuristic); + SignificantLongTerms.Bucket bucket = new SignificantLongTerms.Bucket(subsetDfs[i], supersetDfs[i], term, aggs, format, 0); + bucket.updateScore(significanceHeuristic, subsetSize, supersetSize); buckets.add(bucket); } return new SignificantLongTerms(name, requiredSize, 1L, metadata, format, subsetSize, supersetSize, significanceHeuristic, buckets); @@ -90,8 +81,6 @@ public void setUp() throws Exception { randomLong(), randomNonNegativeLong(), randomNonNegativeLong(), - randomNonNegativeLong(), - randomNonNegativeLong(), InternalAggregations.EMPTY, format, 0 diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java index a91566c615eaf..7499831f371aa 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java @@ -42,17 +42,8 @@ public class SignificantStringTermsTests extends InternalSignificantTermsTestCas Set terms = new HashSet<>(); for (int i = 0; i < numBuckets; ++i) { BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAlphaOfLength(10))); - SignificantStringTerms.Bucket bucket = new SignificantStringTerms.Bucket( - term, - subsetDfs[i], - subsetSize, - supersetDfs[i], - supersetSize, - aggs, - format, - 0 - ); - bucket.updateScore(significanceHeuristic); + SignificantStringTerms.Bucket bucket = new SignificantStringTerms.Bucket(term, subsetDfs[i], supersetDfs[i], aggs, format, 0); + bucket.updateScore(significanceHeuristic, subsetSize, supersetSize); buckets.add(bucket); } return new SignificantStringTerms( @@ -93,8 +84,6 @@ public class SignificantStringTermsTests extends InternalSignificantTermsTestCas new BytesRef(randomAlphaOfLengthBetween(1, 10)), randomNonNegativeLong(), randomNonNegativeLong(), - randomNonNegativeLong(), - randomNonNegativeLong(), InternalAggregations.EMPTY, format, 0 diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java index ae5083c245538..a3c03526c9b93 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java @@ -95,22 +95,20 @@ public void testStreamResponse() throws Exception { InternalMappedSignificantTerms read = (InternalMappedSignificantTerms) in.readNamedWriteable(InternalAggregation.class); assertEquals(sigTerms.getSignificanceHeuristic(), read.getSignificanceHeuristic()); + assertThat(read.getSubsetSize(), equalTo(10L)); + assertThat(read.getSupersetSize(), equalTo(20L)); SignificantTerms.Bucket originalBucket = sigTerms.getBuckets().get(0); SignificantTerms.Bucket streamedBucket = read.getBuckets().get(0); assertThat(originalBucket.getKeyAsString(), equalTo(streamedBucket.getKeyAsString())); assertThat(originalBucket.getSupersetDf(), equalTo(streamedBucket.getSupersetDf())); assertThat(originalBucket.getSubsetDf(), equalTo(streamedBucket.getSubsetDf())); - assertThat(streamedBucket.getSubsetSize(), equalTo(10L)); - assertThat(streamedBucket.getSupersetSize(), equalTo(20L)); } InternalMappedSignificantTerms getRandomSignificantTerms(SignificanceHeuristic heuristic) { if (randomBoolean()) { SignificantLongTerms.Bucket bucket = new SignificantLongTerms.Bucket( 1, - 2, 3, - 4, 123, InternalAggregations.EMPTY, DocValueFormat.RAW, @@ -121,9 +119,7 @@ public void testStreamResponse() throws Exception { SignificantStringTerms.Bucket bucket = new SignificantStringTerms.Bucket( new BytesRef("someterm"), 1, - 2, 3, - 4, InternalAggregations.EMPTY, DocValueFormat.RAW, randomDoubleBetween(0, 100, true) @@ -136,15 +132,13 @@ public void testReduce() { List aggs = createInternalAggregations(); AggregationReduceContext context = InternalAggregationTestCase.emptyReduceContextBuilder().forFinalReduction(); SignificantTerms reducedAgg = (SignificantTerms) InternalAggregationTestCase.reduce(aggs, context); + assertThat(reducedAgg.getSubsetSize(), equalTo(16L)); + assertThat(reducedAgg.getSupersetSize(), equalTo(30L)); assertThat(reducedAgg.getBuckets().size(), equalTo(2)); assertThat(reducedAgg.getBuckets().get(0).getSubsetDf(), equalTo(8L)); - assertThat(reducedAgg.getBuckets().get(0).getSubsetSize(), equalTo(16L)); assertThat(reducedAgg.getBuckets().get(0).getSupersetDf(), equalTo(10L)); - assertThat(reducedAgg.getBuckets().get(0).getSupersetSize(), equalTo(30L)); assertThat(reducedAgg.getBuckets().get(1).getSubsetDf(), equalTo(8L)); - assertThat(reducedAgg.getBuckets().get(1).getSubsetSize(), equalTo(16L)); assertThat(reducedAgg.getBuckets().get(1).getSupersetDf(), equalTo(10L)); - assertThat(reducedAgg.getBuckets().get(1).getSupersetSize(), equalTo(30L)); } public void testBasicScoreProperties() { @@ -234,9 +228,9 @@ private List createInternalAggregations() { : new AbstractSignificanceHeuristicTestCase.LongTestAggFactory(); List aggs = new ArrayList<>(); - aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 0))); - aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 1))); - aggs.add(factory.createAggregation(significanceHeuristic, 8, 10, 2, (f, i) -> f.createBucket(4, 4, 5, 10, i))); + aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 5, 0))); + aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 5, 1))); + aggs.add(factory.createAggregation(significanceHeuristic, 8, 10, 2, (f, i) -> f.createBucket(4, 5, i))); return aggs; } @@ -254,7 +248,7 @@ final A createAggregation( abstract A createAggregation(SignificanceHeuristic significanceHeuristic, long subsetSize, long supersetSize, List buckets); - abstract B createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label); + abstract B createBucket(long subsetDF, long supersetDF, long label); } private class StringTestAggFactory extends TestAggFactory { @@ -279,13 +273,11 @@ SignificantStringTerms createAggregation( } @Override - SignificantStringTerms.Bucket createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) { + SignificantStringTerms.Bucket createBucket(long subsetDF, long supersetDF, long label) { return new SignificantStringTerms.Bucket( new BytesRef(Long.toString(label).getBytes(StandardCharsets.UTF_8)), subsetDF, - subsetSize, supersetDF, - supersetSize, InternalAggregations.EMPTY, DocValueFormat.RAW, 0 @@ -315,17 +307,8 @@ SignificantLongTerms createAggregation( } @Override - SignificantLongTerms.Bucket createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) { - return new SignificantLongTerms.Bucket( - subsetDF, - subsetSize, - supersetDF, - supersetSize, - label, - InternalAggregations.EMPTY, - DocValueFormat.RAW, - 0 - ); + SignificantLongTerms.Bucket createBucket(long subsetDF, long supersetDF, long label) { + return new SignificantLongTerms.Bucket(subsetDF, supersetDF, label, InternalAggregations.EMPTY, DocValueFormat.RAW, 0); } } From fc266e5ea9e9c83c16f006f8d53ad481530273aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 3 Dec 2024 07:50:18 +0100 Subject: [PATCH 06/28] [Profiling] Switch to 19Hz sampling frequency (#117757) * [Profiling] Switch to 19Hz sampling frequency * Fix internalClusterTest --- .../xpack/profiling/action/GetStackTracesActionIT.java | 8 ++++---- .../xpack/profiling/action/CO2Calculator.java | 2 +- .../xpack/profiling/action/CostCalculator.java | 2 +- .../xpack/profiling/action/CO2CalculatorTests.java | 4 ++-- .../xpack/profiling/action/CostCalculatorTests.java | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java b/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java index 6463cda554e5b..4b3a4fb0108f7 100644 --- a/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java +++ b/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java @@ -46,8 +46,8 @@ public void testGetStackTracesUnfiltered() throws Exception { assertEquals(18, stackTrace.fileIds.length); assertEquals(18, stackTrace.frameIds.length); assertEquals(18, stackTrace.typeIds.length); - assertEquals(0.0000048475146d, stackTrace.annualCO2Tons, 0.0000000001d); - assertEquals(0.18834d, stackTrace.annualCostsUSD, 0.00001d); + assertEquals(0.0000051026469d, stackTrace.annualCO2Tons, 0.0000000001d); + assertEquals(0.19825d, stackTrace.annualCostsUSD, 0.00001d); // not determined by default assertNull(stackTrace.subGroups); @@ -91,8 +91,8 @@ public void testGetStackTracesGroupedByServiceName() throws Exception { assertEquals(18, stackTrace.fileIds.length); assertEquals(18, stackTrace.frameIds.length); assertEquals(18, stackTrace.typeIds.length); - assertEquals(0.0000048475146d, stackTrace.annualCO2Tons, 0.0000000001d); - assertEquals(0.18834d, stackTrace.annualCostsUSD, 0.00001d); + assertEquals(0.0000051026469d, stackTrace.annualCO2Tons, 0.0000000001d); + assertEquals(0.19825d, stackTrace.annualCostsUSD, 0.00001d); assertEquals(Long.valueOf(2L), stackTrace.subGroups.getCount("basket")); assertNotNull(response.getStackFrames()); diff --git a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java index fbd5f7a9b5328..0a05fc5930942 100644 --- a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java +++ b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java @@ -12,7 +12,7 @@ import java.util.Map; final class CO2Calculator { - private static final double DEFAULT_SAMPLING_FREQUENCY = 20.0d; + private static final double DEFAULT_SAMPLING_FREQUENCY = 19.0d; private static final double DEFAULT_CO2_TONS_PER_KWH = 0.000379069d; // unit: metric tons / kWh private static final double DEFAULT_KILOWATTS_PER_CORE_X86 = 7.0d / 1000.0d; // unit: watt / core private static final double DEFAULT_KILOWATTS_PER_CORE_ARM64 = 2.8d / 1000.0d; // unit: watt / core diff --git a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java index b8ee54f5f29e8..05b51adb6a52f 100644 --- a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java +++ b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java @@ -10,7 +10,7 @@ import java.util.Map; final class CostCalculator { - private static final double DEFAULT_SAMPLING_FREQUENCY = 20.0d; + private static final double DEFAULT_SAMPLING_FREQUENCY = 19.0d; private static final double SECONDS_PER_HOUR = 60 * 60; private static final double SECONDS_PER_YEAR = SECONDS_PER_HOUR * 24 * 365.0d; // unit: seconds public static final double DEFAULT_COST_USD_PER_CORE_HOUR = 0.0425d; // unit: USD / (core * hour) diff --git a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java index ff698465a56c5..9be98fbe4f46b 100644 --- a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java +++ b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java @@ -73,7 +73,7 @@ public void testCreateFromRegularSource() { double samplingDurationInSeconds = 1_800.0d; // 30 minutes long samples = 100_000L; // 100k samples - double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 20.0d); + double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 19.0d); CO2Calculator co2Calculator = new CO2Calculator(hostsTable, samplingDurationInSeconds, null, null, null, null); checkCO2Calculation(co2Calculator.getAnnualCO2Tons(HOST_ID_A, samples), annualCoreHours, 1.135d, 0.0002786d, 7.0d); @@ -110,7 +110,7 @@ public void testCreateFromMalformedSource() { double samplingDurationInSeconds = 1_800.0d; // 30 minutes long samples = 100_000L; // 100k samples - double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 20.0d); + double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 19.0d); CO2Calculator co2Calculator = new CO2Calculator(hostsTable, samplingDurationInSeconds, null, null, null, null); checkCO2Calculation(co2Calculator.getAnnualCO2Tons(HOST_ID_A, samples), annualCoreHours, 1.135d, 0.0002786d, 7.0d); diff --git a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java index eaf6cf618eddb..1c719c97164dc 100644 --- a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java +++ b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java @@ -63,7 +63,7 @@ public void testCreateFromRegularSource() { double samplingDurationInSeconds = 1_800.0d; // 30 minutes long samples = 100_000L; // 100k samples - double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 20.0d); + double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 19.0d); CostCalculator costCalculator = new CostCalculator(hostsTable, samplingDurationInSeconds, null, null, null); // Checks whether the cost calculation is based on the lookup data. From 564e13e2ba49ac78c8c142f9b29481e56c498c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 3 Dec 2024 08:56:20 +0100 Subject: [PATCH 07/28] [Profiling] Add field profiling.agent.config.sampling_frequency to profiling-hosts (#117752) --- .../profiling/component-template/profiling-hosts.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json b/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json index e58a3cbd39f97..50f3ab6bf9a08 100644 --- a/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json +++ b/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json @@ -135,6 +135,9 @@ }, "config.present_cpu_cores": { "type": "integer" + }, + "config.sampling_frequency": { + "type": "integer" } } }, From cbb08babdbd7d8f42426df7984caa2d587b26ff7 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 3 Dec 2024 10:52:14 +0200 Subject: [PATCH 08/28] Remove RestApiVersion#V_7 references for 9.0.0 (#117572) Address and remove references to org elastic search.core.RestApiVersion#V_7 from the search-related code. --- docs/changelog/117572.yaml | 5 + .../action/search/MultiSearchRequest.java | 27 +---- .../termvectors/TermVectorsRequest.java | 6 - .../index/query/CommonTermsQueryBuilder.java | 72 ------------ .../index/query/TypeQueryV7Builder.java | 108 ------------------ .../document/RestTermVectorsAction.java | 1 - .../action/search/RestMultiSearchAction.java | 3 - .../elasticsearch/search/SearchModule.java | 26 ----- .../MovAvgPipelineAggregationBuilder.java | 85 -------------- .../search/sort/FieldSortBuilder.java | 8 -- .../search/sort/ScriptSortBuilder.java | 9 -- .../search/sort/SortBuilder.java | 8 -- .../search/MultiSearchRequestTests.java | 30 ++--- 13 files changed, 20 insertions(+), 368 deletions(-) create mode 100644 docs/changelog/117572.yaml delete mode 100644 server/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java delete mode 100644 server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java delete mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java diff --git a/docs/changelog/117572.yaml b/docs/changelog/117572.yaml new file mode 100644 index 0000000000000..a4a2ef6c06f5d --- /dev/null +++ b/docs/changelog/117572.yaml @@ -0,0 +1,5 @@ +pr: 117572 +summary: Address and remove any references of RestApiVersion version 7 +area: Search +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java b/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java index 8467ee6fd86f3..2022180475529 100644 --- a/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java +++ b/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java @@ -18,11 +18,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.action.search.RestMultiSearchAction; -import org.elasticsearch.rest.action.search.RestSearchAction; import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskId; @@ -51,10 +47,6 @@ * A multi search API request. */ public class MultiSearchRequest extends ActionRequest implements CompositeIndicesRequest { - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestSearchAction.class); - public static final String FIRST_LINE_EMPTY_DEPRECATION_MESSAGE = - "support for empty first line before any action metadata in msearch API is deprecated " - + "and will be removed in the next major version"; public static final int MAX_CONCURRENT_SEARCH_REQUESTS_DEFAULT = 0; private int maxConcurrentSearchRequests = 0; @@ -213,12 +205,6 @@ public static void readMultiLineFormat( if (nextMarker == -1) { break; } - // support first line with \n - if (parserConfig.restApiVersion() == RestApiVersion.V_7 && nextMarker == 0) { - deprecationLogger.compatibleCritical("msearch_first_line_empty", FIRST_LINE_EMPTY_DEPRECATION_MESSAGE); - from = nextMarker + 1; - continue; - } SearchRequest searchRequest = new SearchRequest(); if (indices != null) { @@ -281,14 +267,11 @@ public static void readMultiLineFormat( allowNoIndices = value; } else if ("ignore_throttled".equals(entry.getKey()) || "ignoreThrottled".equals(entry.getKey())) { ignoreThrottled = value; - } else if (parserConfig.restApiVersion() == RestApiVersion.V_7 - && ("type".equals(entry.getKey()) || "types".equals(entry.getKey()))) { - deprecationLogger.compatibleCritical("msearch_with_types", RestMultiSearchAction.TYPES_DEPRECATION_MESSAGE); - } else if (extraParamParser.apply(entry.getKey(), value, searchRequest)) { - // Skip, the parser handled the key/value - } else { - throw new IllegalArgumentException("key [" + entry.getKey() + "] is not supported in the metadata section"); - } + } else if (extraParamParser.apply(entry.getKey(), value, searchRequest)) { + // Skip, the parser handled the key/value + } else { + throw new IllegalArgumentException("key [" + entry.getKey() + "] is not supported in the metadata section"); + } } defaultOptions = IndicesOptions.fromParameters( expandWildcards, diff --git a/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java b/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java index a36158d11b5b3..7a7b2afab75d1 100644 --- a/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java @@ -20,13 +20,11 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.index.VersionType; -import org.elasticsearch.rest.action.document.RestTermVectorsAction; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; @@ -52,7 +50,6 @@ // It's not possible to suppress teh warning at #realtime(boolean) at a method-level. @SuppressWarnings("unchecked") public final class TermVectorsRequest extends SingleShardRequest implements RealtimeRequest { - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(TermVectorsRequest.class); private static final ParseField INDEX = new ParseField("_index"); private static final ParseField ID = new ParseField("_id"); @@ -66,7 +63,6 @@ public final class TermVectorsRequest extends SingleShardRequest { - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonTermsQueryBuilder.class); - public static final String COMMON_TERMS_QUERY_DEPRECATION_MSG = "Common Terms Query usage is not supported. " - + "Use [match] query which can efficiently skip blocks of documents if the total number of hits is not tracked."; - - @UpdateForV9(owner = UpdateForV9.Owner.SEARCH_RELEVANCE) // v7 REST API no longer exists: eliminate ref to RestApiVersion.V_7 - public static ParseField NAME_V7 = new ParseField("common").withAllDeprecated(COMMON_TERMS_QUERY_DEPRECATION_MSG) - .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)); - - @Override - protected void doWriteTo(StreamOutput out) throws IOException { - throw new UnsupportedOperationException("common_term_query is not meant to be serialized."); - } - - @Override - protected void doXContent(XContentBuilder builder, Params params) throws IOException {} - - @Override - protected Query doToQuery(SearchExecutionContext context) throws IOException { - return null; - } - - @Override - protected boolean doEquals(CommonTermsQueryBuilder other) { - return false; - } - - @Override - protected int doHashCode() { - return 0; - } - - @Override - public String getWriteableName() { - return null; - } - - public static CommonTermsQueryBuilder fromXContent(XContentParser parser) throws IOException { - deprecationLogger.compatibleCritical("common_term_query", COMMON_TERMS_QUERY_DEPRECATION_MSG); - throw new ParsingException(parser.getTokenLocation(), COMMON_TERMS_QUERY_DEPRECATION_MSG); - } - - @Override - public TransportVersion getMinimalSupportedVersion() { - return TransportVersions.ZERO; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java b/server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java deleted file mode 100644 index c9aae0195acf7..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.query; - -import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.Query; -import org.elasticsearch.TransportVersion; -import org.elasticsearch.TransportVersions; -import org.elasticsearch.common.ParsingException; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.logging.DeprecationLogger; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.core.UpdateForV9; -import org.elasticsearch.index.mapper.MapperService; -import org.elasticsearch.xcontent.ObjectParser; -import org.elasticsearch.xcontent.ParseField; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; - -import java.io.IOException; - -@UpdateForV9(owner = UpdateForV9.Owner.SEARCH_RELEVANCE) // v7 REST API no longer exists: eliminate ref to RestApiVersion.V_7 -public class TypeQueryV7Builder extends AbstractQueryBuilder { - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(TypeQueryV7Builder.class); - public static final String TYPES_DEPRECATION_MESSAGE = "[types removal] Type queries are deprecated, " - + "prefer to filter on a field instead."; - - private static final String NAME = "type"; - public static final ParseField NAME_V7 = new ParseField(NAME).forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)); - private static final ParseField VALUE_FIELD = new ParseField("value"); - private static final ObjectParser PARSER = new ObjectParser<>(NAME, TypeQueryV7Builder::new); - - static { - PARSER.declareString( - QueryBuilder::queryName, - AbstractQueryBuilder.NAME_FIELD.forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)) - ); - PARSER.declareFloat( - QueryBuilder::boost, - AbstractQueryBuilder.BOOST_FIELD.forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)) - ); - PARSER.declareString(TypeQueryV7Builder::setValue, VALUE_FIELD.forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7))); - } - - private String value; - - public TypeQueryV7Builder() {} - - /** - * Read from a stream. - */ - public TypeQueryV7Builder(StreamInput in) throws IOException { - super(in); - } - - @Override - protected void doWriteTo(StreamOutput out) throws IOException {} - - @Override - protected void doXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(NAME); - builder.field(VALUE_FIELD.getPreferredName(), MapperService.SINGLE_MAPPING_NAME); - printBoostAndQueryName(builder); - builder.endObject(); - } - - @Override - protected Query doToQuery(SearchExecutionContext context) throws IOException { - return new MatchNoDocsQuery(); - } - - @Override - protected boolean doEquals(TypeQueryV7Builder other) { - return true; - } - - @Override - protected int doHashCode() { - return 0; - } - - public static TypeQueryV7Builder fromXContent(XContentParser parser) throws IOException { - deprecationLogger.compatibleCritical("type_query", TYPES_DEPRECATION_MESSAGE); - throw new ParsingException(parser.getTokenLocation(), TYPES_DEPRECATION_MESSAGE); - } - - @Override - public String getWriteableName() { - return NAME; - } - - public void setValue(String value) { - this.value = value; - } - - @Override - public TransportVersion getMinimalSupportedVersion() { - return TransportVersions.ZERO; - } -} diff --git a/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java b/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java index 8e41e1cd09674..d2b09af8e1f3d 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java @@ -35,7 +35,6 @@ */ @ServerlessScope(Scope.PUBLIC) public class RestTermVectorsAction extends BaseRestHandler { - public static final String TYPES_DEPRECATION_MESSAGE = "[types removal] Specifying types in term vector requests is deprecated."; @Override public List routes() { diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java b/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java index 89775b4ca8e15..24fab92ced392 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java @@ -43,9 +43,6 @@ @ServerlessScope(Scope.PUBLIC) public class RestMultiSearchAction extends BaseRestHandler { - public static final String TYPES_DEPRECATION_MESSAGE = "[types removal]" - + " Specifying types in multi search template requests is deprecated."; - private static final Set RESPONSE_PARAMS = Set.of(RestSearchAction.TYPED_KEYS_PARAM, RestSearchAction.TOTAL_HITS_AS_INT_PARAM); private final boolean allowExplicitIndex; diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index 09e25350ad4fd..d282ba425b126 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -20,12 +20,10 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.BoostingQueryBuilder; import org.elasticsearch.index.query.CombinedFieldsQueryBuilder; -import org.elasticsearch.index.query.CommonTermsQueryBuilder; import org.elasticsearch.index.query.ConstantScoreQueryBuilder; import org.elasticsearch.index.query.DisMaxQueryBuilder; import org.elasticsearch.index.query.DistanceFeatureQueryBuilder; @@ -68,7 +66,6 @@ import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.TermsQueryBuilder; import org.elasticsearch.index.query.TermsSetQueryBuilder; -import org.elasticsearch.index.query.TypeQueryV7Builder; import org.elasticsearch.index.query.WildcardQueryBuilder; import org.elasticsearch.index.query.WrapperQueryBuilder; import org.elasticsearch.index.query.functionscore.ExponentialDecayFunctionBuilder; @@ -204,7 +201,6 @@ import org.elasticsearch.search.aggregations.pipeline.InternalStatsBucket; import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.MinBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.PercentilesBucketPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.SerialDiffPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregationBuilder; @@ -686,15 +682,6 @@ private ValuesSourceRegistry registerAggregations(List plugins) { .setAggregatorRegistrar(CompositeAggregationBuilder::registerAggregators), builder ); - if (RestApiVersion.minimumSupported() == RestApiVersion.V_7) { - registerQuery( - new QuerySpec<>( - CommonTermsQueryBuilder.NAME_V7, - (streamInput) -> new CommonTermsQueryBuilder(), - CommonTermsQueryBuilder::fromXContent - ) - ); - } registerFromPlugin(plugins, SearchPlugin::getAggregations, (agg) -> this.registerAggregation(agg, builder)); @@ -815,15 +802,6 @@ private void registerPipelineAggregations(List plugins) { SerialDiffPipelineAggregationBuilder::parse ) ); - if (RestApiVersion.minimumSupported() == RestApiVersion.V_7) { - registerPipelineAggregation( - new PipelineAggregationSpec( - MovAvgPipelineAggregationBuilder.NAME_V7, - MovAvgPipelineAggregationBuilder::new, - MovAvgPipelineAggregationBuilder.PARSER - ) - ); - } registerFromPlugin(plugins, SearchPlugin::getPipelineAggregations, this::registerPipelineAggregation); } @@ -1203,10 +1181,6 @@ private void registerQueryParsers(List plugins) { })); registerFromPlugin(plugins, SearchPlugin::getQueries, this::registerQuery); - - if (RestApiVersion.minimumSupported() == RestApiVersion.V_7) { - registerQuery(new QuerySpec<>(TypeQueryV7Builder.NAME_V7, TypeQueryV7Builder::new, TypeQueryV7Builder::fromXContent)); - } } private void registerIntervalsSourceProviders() { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java deleted file mode 100644 index 068487317dfe5..0000000000000 --- a/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.search.aggregations.pipeline; - -import org.elasticsearch.TransportVersion; -import org.elasticsearch.TransportVersions; -import org.elasticsearch.common.ParsingException; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.logging.DeprecationLogger; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.core.UpdateForV9; -import org.elasticsearch.index.query.CommonTermsQueryBuilder; -import org.elasticsearch.xcontent.ContextParser; -import org.elasticsearch.xcontent.ParseField; -import org.elasticsearch.xcontent.XContentBuilder; - -import java.io.IOException; -import java.util.Map; - -/** - * The actual moving_avg aggregation was removed as a breaking change in 8.0. This class exists to provide a friendlier error message - * if somebody attempts to use the moving_avg aggregation via the compatible-with=7 mechanism. - * - * We can remove this class entirely when v7 rest api compatibility is dropped. - * - * @deprecated Only for 7.x rest compat - */ -@UpdateForV9(owner = UpdateForV9.Owner.SEARCH_ANALYTICS) // remove this since it's only for 7.x compat and 7.x compat will be removed in 9.0 -@Deprecated -public class MovAvgPipelineAggregationBuilder extends AbstractPipelineAggregationBuilder { - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonTermsQueryBuilder.class); - public static final String MOVING_AVG_AGG_DEPRECATION_MSG = "Moving Average aggregation usage is not supported. " - + "Use the [moving_fn] aggregation instead."; - - public static final ParseField NAME_V7 = new ParseField("moving_avg").withAllDeprecated(MOVING_AVG_AGG_DEPRECATION_MSG) - .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)); - - public static final ContextParser PARSER = (parser, name) -> { - deprecationLogger.compatibleCritical("moving_avg_aggregation", MOVING_AVG_AGG_DEPRECATION_MSG); - throw new ParsingException(parser.getTokenLocation(), MOVING_AVG_AGG_DEPRECATION_MSG); - }; - - public MovAvgPipelineAggregationBuilder(StreamInput in) throws IOException { - super(in, NAME_V7.getPreferredName()); - throw new UnsupportedOperationException("moving_avg is not meant to be used."); - } - - @Override - protected void doWriteTo(StreamOutput out) throws IOException { - throw new UnsupportedOperationException("moving_avg is not meant to be used."); - } - - @Override - protected PipelineAggregator createInternal(Map metadata) { - throw new UnsupportedOperationException("moving_avg is not meant to be used."); - } - - @Override - protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException { - throw new UnsupportedOperationException("moving_avg is not meant to be used."); - } - - @Override - protected void validate(ValidationContext context) { - throw new UnsupportedOperationException("moving_avg is not meant to be used."); - } - - @Override - public final String getWriteableName() { - return null; - } - - @Override - public TransportVersion getMinimalSupportedVersion() { - return TransportVersions.ZERO; - } -} diff --git a/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java index cd597f3328c0f..5691435c83ecb 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java @@ -18,7 +18,6 @@ import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; -import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.time.DateMathParser; @@ -729,13 +728,6 @@ public static FieldSortBuilder fromXContent(XContentParser parser, String fieldN PARSER.declareObject(FieldSortBuilder::setNestedSort, (p, c) -> NestedSortBuilder.fromXContent(p), NESTED_FIELD); PARSER.declareString(FieldSortBuilder::setNumericType, NUMERIC_TYPE); PARSER.declareString(FieldSortBuilder::setFormat, FORMAT); - PARSER.declareField((b, v) -> {}, (p, c) -> { - throw new ParsingException(p.getTokenLocation(), "[nested_path] has been removed in favour of the [nested] parameter", c); - }, NESTED_PATH_FIELD, ValueType.STRING); - - PARSER.declareObject((b, v) -> {}, (p, c) -> { - throw new ParsingException(p.getTokenLocation(), "[nested_filter] has been removed in favour of the [nested] parameter", c); - }, NESTED_FILTER_FIELD); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java index 48773eec8371b..445c55dc546bc 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java @@ -17,7 +17,6 @@ import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; -import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -220,14 +219,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params builderParams) PARSER.declareString((b, v) -> b.order(SortOrder.fromString(v)), ORDER_FIELD); PARSER.declareString((b, v) -> b.sortMode(SortMode.fromString(v)), SORTMODE_FIELD); PARSER.declareObject(ScriptSortBuilder::setNestedSort, (p, c) -> NestedSortBuilder.fromXContent(p), NESTED_FIELD); - - PARSER.declareObject((b, v) -> {}, (p, c) -> { - throw new ParsingException(p.getTokenLocation(), "[nested_path] has been removed in favour of the [nested] parameter", c); - }, NESTED_PATH_FIELD); - - PARSER.declareObject((b, v) -> {}, (p, c) -> { - throw new ParsingException(p.getTokenLocation(), "[nested_filter] has been removed in favour of the [nested] parameter", c); - }, NESTED_FILTER_FIELD); } /** diff --git a/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java index 5832b93b9462f..4a8cdbcdffa55 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java @@ -18,8 +18,6 @@ import org.elasticsearch.common.io.stream.VersionedNamedWriteable; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; import org.elasticsearch.index.mapper.NestedObjectMapper; import org.elasticsearch.index.query.QueryBuilder; @@ -52,12 +50,6 @@ public abstract class SortBuilder> // parse fields common to more than one SortBuilder public static final ParseField ORDER_FIELD = new ParseField("order"); - @UpdateForV9(owner = UpdateForV9.Owner.SEARCH_FOUNDATIONS) // v7 REST API no longer exists: eliminate ref to RestApiVersion.V_7 - public static final ParseField NESTED_FILTER_FIELD = new ParseField("nested_filter").withAllDeprecated() - .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)); - public static final ParseField NESTED_PATH_FIELD = new ParseField("nested_path").withAllDeprecated() - .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)); - private static final Map> PARSERS = Map.of( ScriptSortBuilder.NAME, ScriptSortBuilder::fromXContent, diff --git a/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java b/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java index f2bc561792991..9f81b999c9d98 100644 --- a/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java @@ -16,7 +16,6 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.CheckedRunnable; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.rest.RestRequest; @@ -268,12 +267,12 @@ public void testMsearchTerminatedByNewline() throws Exception { assertEquals(3, msearchRequest.requests().size()); } - private MultiSearchRequest parseMultiSearchRequestFromString(String request, RestApiVersion restApiVersion) throws IOException { - return parseMultiSearchRequest(createRestRequest(request.getBytes(StandardCharsets.UTF_8), restApiVersion)); + private MultiSearchRequest parseMultiSearchRequestFromString(String request) throws IOException { + return parseMultiSearchRequest(createRestRequest(request.getBytes(StandardCharsets.UTF_8))); } private MultiSearchRequest parseMultiSearchRequest(String sample) throws IOException { - return parseMultiSearchRequest(createRestRequest(sample, null)); + return parseMultiSearchRequest(createRestRequest(sample)); } private MultiSearchRequest parseMultiSearchRequest(RestRequest restRequest) throws IOException { @@ -288,22 +287,13 @@ private MultiSearchRequest parseMultiSearchRequest(RestRequest restRequest) thro return request; } - private RestRequest createRestRequest(String sample, RestApiVersion restApiVersion) throws IOException { + private RestRequest createRestRequest(String sample) throws IOException { byte[] data = StreamsUtils.copyToBytesFromClasspath(sample); - return createRestRequest(data, restApiVersion); + return createRestRequest(data); } - private FakeRestRequest createRestRequest(byte[] data, RestApiVersion restApiVersion) { - if (restApiVersion != null) { - final List contentTypeHeader = Collections.singletonList( - compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7) - ); - return new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withContent(new BytesArray(data), null).build(); - } else { - return new FakeRestRequest.Builder(xContentRegistry()).withContent(new BytesArray(data), XContentType.JSON).build(); - } + private FakeRestRequest createRestRequest(byte[] data) { + return new FakeRestRequest.Builder(xContentRegistry()).withContent(new BytesArray(data), XContentType.JSON).build(); } @Override @@ -517,7 +507,7 @@ public void testFailOnExtraCharacters() throws IOException { parseMultiSearchRequestFromString(""" {"index": "test"}{{{{{extra chars that shouldn't be here { "query": {"match_all": {}}} - """, null); + """); fail("should have caught first line; extra open brackets"); } catch (XContentParseException e) { assertEquals("[1:18] Unexpected token after end of object", e.getMessage()); @@ -526,7 +516,7 @@ public void testFailOnExtraCharacters() throws IOException { parseMultiSearchRequestFromString(""" {"index": "test"} { "query": {"match_all": {}}}{{{{even more chars - """, null); + """); fail("should have caught second line"); } catch (XContentParseException e) { assertEquals("[1:30] Unexpected token after end of object", e.getMessage()); @@ -535,7 +525,7 @@ public void testFailOnExtraCharacters() throws IOException { parseMultiSearchRequestFromString(""" {} { "query": {"match_all": {}}}}}}different error message - """, null); + """); fail("should have caught second line; extra closing brackets"); } catch (XContentParseException e) { assertThat( From a514aad3c2da305b0b63d8545cab75bb2c2d3032 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 3 Dec 2024 10:58:20 +0200 Subject: [PATCH 09/28] Fix/meta fields bad request (#117229) 400 rather a 5xx error is returned when _source / _seq_no / _feature / _nested_path / _field_names is requested, via fields --- docs/changelog/117229.yaml | 6 ++ .../extras/RankFeatureMetaFieldMapper.java | 2 +- rest-api-spec/build.gradle | 1 + .../test/search/520_fetch_fields.yml | 80 +++++++++++++++++-- .../index/mapper/FieldNamesFieldMapper.java | 2 +- .../index/mapper/MapperFeatures.java | 5 +- .../index/mapper/NestedPathFieldMapper.java | 2 +- .../index/mapper/SeqNoFieldMapper.java | 2 +- .../index/mapper/SourceFieldMapper.java | 2 +- .../fetch/subphase/FieldFetcherTests.java | 2 +- 10 files changed, 92 insertions(+), 12 deletions(-) create mode 100644 docs/changelog/117229.yaml diff --git a/docs/changelog/117229.yaml b/docs/changelog/117229.yaml new file mode 100644 index 0000000000000..f1b859c03e4fa --- /dev/null +++ b/docs/changelog/117229.yaml @@ -0,0 +1,6 @@ +pr: 117229 +summary: "In this pr, a 400 error is returned when _source / _seq_no / _feature /\ + \ _nested_path / _field_names is requested, rather a 5xx" +area: Search +type: bug +issues: [] diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java index 15398b1f178ee..ed1cc57b84863 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java @@ -48,7 +48,7 @@ public String typeName() { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - throw new UnsupportedOperationException("Cannot fetch values for internal field [" + typeName() + "]."); + throw new IllegalArgumentException("Cannot fetch values for internal field [" + typeName() + "]."); } @Override diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index 650d17e41de7f..e2af894eb0939 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -66,4 +66,5 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task -> task.skipTest("logsdb/20_source_mapping/stored _source mode is supported", "no longer serialize source_mode") task.skipTest("logsdb/20_source_mapping/include/exclude is supported with stored _source", "no longer serialize source_mode") task.skipTest("logsdb/20_source_mapping/synthetic _source is default", "no longer serialize source_mode") + task.skipTest("search/520_fetch_fields/fetch _seq_no via fields", "error code is changed from 5xx to 400 in 9.0") }) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml index 2b309f502f0c2..9a43199755d75 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml @@ -128,18 +128,88 @@ fetch _seq_no via stored_fields: --- fetch _seq_no via fields: + - requires: + cluster_features: ["meta_fetch_fields_error_code_changed"] + reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields - do: - catch: "request" + catch: bad_request search: index: test body: fields: [ _seq_no ] - # This should be `unauthorized` (401) or `forbidden` (403) or at least `bad request` (400) - # while instead it is mapped to an `internal_server_error (500)` - - match: { status: 500 } - - match: { error.root_cause.0.type: unsupported_operation_exception } + - match: { status: 400 } + - match: { error.root_cause.0.type: illegal_argument_exception } + - match: { error.root_cause.0.reason: "error fetching [_seq_no]: Cannot fetch values for internal field [_seq_no]." } + +--- +fetch _source via fields: + - requires: + cluster_features: ["meta_fetch_fields_error_code_changed"] + reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields + + - do: + catch: bad_request + search: + index: test + body: + fields: [ _source ] + + - match: { status: 400 } + - match: { error.root_cause.0.type: illegal_argument_exception } + - match: { error.root_cause.0.reason: "error fetching [_source]: Cannot fetch values for internal field [_source]." } + +--- +fetch _feature via fields: + - requires: + cluster_features: ["meta_fetch_fields_error_code_changed"] + reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields + + - do: + catch: bad_request + search: + index: test + body: + fields: [ _feature ] + + - match: { status: 400 } + - match: { error.root_cause.0.type: illegal_argument_exception } + - match: { error.root_cause.0.reason: "error fetching [_feature]: Cannot fetch values for internal field [_feature]." } + +--- +fetch _nested_path via fields: + - requires: + cluster_features: ["meta_fetch_fields_error_code_changed"] + reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields + + - do: + catch: bad_request + search: + index: test + body: + fields: [ _nested_path ] + + - match: { status: 400 } + - match: { error.root_cause.0.type: illegal_argument_exception } + - match: { error.root_cause.0.reason: "error fetching [_nested_path]: Cannot fetch values for internal field [_nested_path]." } + +--- +fetch _field_names via fields: + - requires: + cluster_features: ["meta_fetch_fields_error_code_changed"] + reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields + + - do: + catch: bad_request + search: + index: test + body: + fields: [ _field_names ] + + - match: { status: 400 } + - match: { error.root_cause.0.type: illegal_argument_exception } + - match: { error.root_cause.0.reason: "error fetching [_field_names]: Cannot fetch values for internal field [_field_names]." } --- fetch fields with none stored_fields: diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java index 565b1ff28a39f..425e3c664c262 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java @@ -135,7 +135,7 @@ public boolean isEnabled() { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "]."); + throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "]."); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 333c37381c587..bf6c729f95653 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -61,6 +61,8 @@ public Set getFeatures() { "mapper.constant_keyword.synthetic_source_write_fix" ); + public static final NodeFeature META_FETCH_FIELDS_ERROR_CODE_CHANGED = new NodeFeature("meta_fetch_fields_error_code_changed"); + @Override public Set getTestFeatures() { return Set.of( @@ -71,7 +73,8 @@ public Set getTestFeatures() { IgnoredSourceFieldMapper.IGNORED_SOURCE_AS_TOP_LEVEL_METADATA_ARRAY_FIELD, IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS, MapperService.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT, - CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX + CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX, + META_FETCH_FIELDS_ERROR_CODE_CHANGED ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java index b22c3a12fcda3..1cd752dc34403 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java @@ -67,7 +67,7 @@ public Query existsQuery(SearchExecutionContext context) { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "]."); + throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "]."); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java index e126102b0f3c2..66ee42dfc56f9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java @@ -168,7 +168,7 @@ public boolean mayExistInIndex(SearchExecutionContext context) { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "]."); + throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "]."); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java index b97e04fcddb5d..1cea8154aad43 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java @@ -325,7 +325,7 @@ public String typeName() { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "]."); + throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "]."); } @Override diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java index f01f760ed71c3..c5f1efe561c22 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java @@ -271,7 +271,7 @@ public void testMetadataFields() throws IOException { FieldNamesFieldMapper.NAME, NestedPathFieldMapper.name(IndexVersion.current()) )) { - expectThrows(UnsupportedOperationException.class, () -> fetchFields(mapperService, source, fieldname)); + expectThrows(IllegalArgumentException.class, () -> fetchFields(mapperService, source, fieldname)); } } From b1412f65b90893c3d29756c921c32d39f3172a65 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Tue, 3 Dec 2024 10:57:05 +0100 Subject: [PATCH 10/28] Clean up search timeout handling code (#116678) TimeExceededException was made public to be able to catch it outside of the search.internal package. That is rather dangerous, because we really need it to be created only from `ContextIndexSearcher#throwTimeExceededException`. This commit makes its constructor private to prevent it from being created outside of ContextIndexSearcher. It also adds javadocs around that. I took the chance to also share the timeout handling code that is now copy pasted in different places. --- .../search/fetch/FetchPhase.java | 7 +--- .../search/fetch/FetchPhaseDocsIterator.java | 41 +++++++++---------- .../search/internal/ContextIndexSearcher.java | 18 +++++--- .../search/query/QueryPhase.java | 9 ++-- .../search/query/SearchTimeoutException.java | 13 ++++++ .../search/rescore/RescorePhase.java | 9 ++-- .../fetch/FetchPhaseDocsIteratorTests.java | 8 +++- 7 files changed, 63 insertions(+), 42 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java index 546586a9ff3c3..2fbe3c1fc1532 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java @@ -195,13 +195,10 @@ protected SearchHit nextDoc(int doc) throws IOException { context.shardTarget(), context.searcher().getIndexReader(), docIdsToLoad, - context.request().allowPartialSearchResults() + context.request().allowPartialSearchResults(), + context.queryResult() ); - if (docsIterator.isTimedOut()) { - context.queryResult().searchTimedOut(true); - } - if (context.isCancelled()) { for (SearchHit hit : hits) { // release all hits that would otherwise become owned and eventually released by SearchHits below diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java index df4e7649ffd3b..4a242f70e8d02 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java @@ -16,6 +16,7 @@ import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.internal.ContextIndexSearcher; +import org.elasticsearch.search.query.QuerySearchResult; import org.elasticsearch.search.query.SearchTimeoutException; import java.io.IOException; @@ -30,12 +31,6 @@ */ abstract class FetchPhaseDocsIterator { - private boolean timedOut = false; - - public boolean isTimedOut() { - return timedOut; - } - /** * Called when a new leaf reader is reached * @param ctx the leaf reader for this set of doc ids @@ -53,7 +48,13 @@ public boolean isTimedOut() { /** * Iterate over a set of docsIds within a particular shard and index reader */ - public final SearchHit[] iterate(SearchShardTarget shardTarget, IndexReader indexReader, int[] docIds, boolean allowPartialResults) { + public final SearchHit[] iterate( + SearchShardTarget shardTarget, + IndexReader indexReader, + int[] docIds, + boolean allowPartialResults, + QuerySearchResult querySearchResult + ) { SearchHit[] searchHits = new SearchHit[docIds.length]; DocIdToIndex[] docs = new DocIdToIndex[docIds.length]; for (int index = 0; index < docIds.length; index++) { @@ -69,12 +70,10 @@ public final SearchHit[] iterate(SearchShardTarget shardTarget, IndexReader inde int[] docsInLeaf = docIdsInLeaf(0, endReaderIdx, docs, ctx.docBase); try { setNextReader(ctx, docsInLeaf); - } catch (ContextIndexSearcher.TimeExceededException timeExceededException) { - if (allowPartialResults) { - timedOut = true; - return SearchHits.EMPTY; - } - throw new SearchTimeoutException(shardTarget, "Time exceeded"); + } catch (ContextIndexSearcher.TimeExceededException e) { + SearchTimeoutException.handleTimeout(allowPartialResults, shardTarget, querySearchResult); + assert allowPartialResults; + return SearchHits.EMPTY; } for (int i = 0; i < docs.length; i++) { try { @@ -88,15 +87,15 @@ public final SearchHit[] iterate(SearchShardTarget shardTarget, IndexReader inde currentDoc = docs[i].docId; assert searchHits[docs[i].index] == null; searchHits[docs[i].index] = nextDoc(docs[i].docId); - } catch (ContextIndexSearcher.TimeExceededException timeExceededException) { - if (allowPartialResults) { - timedOut = true; - SearchHit[] partialSearchHits = new SearchHit[i]; - System.arraycopy(searchHits, 0, partialSearchHits, 0, i); - return partialSearchHits; + } catch (ContextIndexSearcher.TimeExceededException e) { + if (allowPartialResults == false) { + purgeSearchHits(searchHits); } - purgeSearchHits(searchHits); - throw new SearchTimeoutException(shardTarget, "Time exceeded"); + SearchTimeoutException.handleTimeout(allowPartialResults, shardTarget, querySearchResult); + assert allowPartialResults; + SearchHit[] partialSearchHits = new SearchHit[i]; + System.arraycopy(searchHits, 0, partialSearchHits, 0, i); + return partialSearchHits; } } } catch (SearchTimeoutException e) { diff --git a/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java index 78d90377cdc3f..9f990fbd97cdf 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java @@ -169,8 +169,8 @@ public void setProfiler(QueryProfiler profiler) { * Add a {@link Runnable} that will be run on a regular basis while accessing documents in the * DirectoryReader but also while collecting them and check for query cancellation or timeout. */ - public Runnable addQueryCancellation(Runnable action) { - return this.cancellable.add(action); + public void addQueryCancellation(Runnable action) { + this.cancellable.add(action); } /** @@ -425,8 +425,16 @@ public void throwTimeExceededException() { } } - public static class TimeExceededException extends RuntimeException { + /** + * Exception thrown whenever a search timeout occurs. May be thrown by {@link ContextIndexSearcher} or {@link ExitableDirectoryReader}. + */ + public static final class TimeExceededException extends RuntimeException { // This exception should never be re-thrown, but we fill in the stacktrace to be able to trace where it does not get properly caught + + /** + * Created via {@link #throwTimeExceededException()} + */ + private TimeExceededException() {} } @Override @@ -570,14 +578,12 @@ public DirectoryReader getDirectoryReader() { } private static class MutableQueryTimeout implements ExitableDirectoryReader.QueryCancellation { - private final List runnables = new ArrayList<>(); - private Runnable add(Runnable action) { + private void add(Runnable action) { Objects.requireNonNull(action, "cancellation runnable should not be null"); assert runnables.contains(action) == false : "Cancellation runnable already added"; runnables.add(action); - return action; } private void remove(Runnable action) { diff --git a/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java index af65c30b49dcf..3036a295d459a 100644 --- a/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java +++ b/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java @@ -217,10 +217,11 @@ static void addCollectorsAndSearch(SearchContext searchContext) throws QueryPhas queryResult.topDocs(queryPhaseResult.topDocsAndMaxScore(), queryPhaseResult.sortValueFormats()); if (searcher.timeExceeded()) { assert timeoutRunnable != null : "TimeExceededException thrown even though timeout wasn't set"; - if (searchContext.request().allowPartialSearchResults() == false) { - throw new SearchTimeoutException(searchContext.shardTarget(), "Time exceeded"); - } - queryResult.searchTimedOut(true); + SearchTimeoutException.handleTimeout( + searchContext.request().allowPartialSearchResults(), + searchContext.shardTarget(), + searchContext.queryResult() + ); } if (searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER) { queryResult.terminatedEarly(queryPhaseResult.terminatedAfter()); diff --git a/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java b/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java index 0ed64811fee28..e006f176ff91a 100644 --- a/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java +++ b/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java @@ -33,4 +33,17 @@ public SearchTimeoutException(StreamInput in) throws IOException { public RestStatus status() { return RestStatus.GATEWAY_TIMEOUT; } + + /** + * Propagate a timeout according to whether partial search results are allowed or not. + * In case partial results are allowed, a flag will be set to the provided {@link QuerySearchResult} to indicate that there was a + * timeout, but the execution will continue and partial results will be returned to the user. + * When partial results are disallowed, a {@link SearchTimeoutException} will be thrown and returned to the user. + */ + public static void handleTimeout(boolean allowPartialSearchResults, SearchShardTarget target, QuerySearchResult querySearchResult) { + if (allowPartialSearchResults == false) { + throw new SearchTimeoutException(target, "Time exceeded"); + } + querySearchResult.searchTimedOut(true); + } } diff --git a/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java b/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java index 1227db5d8e1db..7e3646e7689cc 100644 --- a/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java +++ b/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java @@ -73,10 +73,11 @@ public static void execute(SearchContext context) { } catch (IOException e) { throw new ElasticsearchException("Rescore Phase Failed", e); } catch (ContextIndexSearcher.TimeExceededException e) { - if (context.request().allowPartialSearchResults() == false) { - throw new SearchTimeoutException(context.shardTarget(), "Time exceeded"); - } - context.queryResult().searchTimedOut(true); + SearchTimeoutException.handleTimeout( + context.request().allowPartialSearchResults(), + context.shardTarget(), + context.queryResult() + ); } } diff --git a/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java b/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java index d5e930321db95..c8d1b6721c64b 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java @@ -17,6 +17,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.query.QuerySearchResult; import org.elasticsearch.test.ESTestCase; import java.io.IOException; @@ -77,7 +78,7 @@ protected SearchHit nextDoc(int doc) { } }; - SearchHit[] hits = it.iterate(null, reader, docs, randomBoolean()); + SearchHit[] hits = it.iterate(null, reader, docs, randomBoolean(), new QuerySearchResult()); assertThat(hits.length, equalTo(docs.length)); for (int i = 0; i < hits.length; i++) { @@ -125,7 +126,10 @@ protected SearchHit nextDoc(int doc) { } }; - Exception e = expectThrows(FetchPhaseExecutionException.class, () -> it.iterate(null, reader, docs, randomBoolean())); + Exception e = expectThrows( + FetchPhaseExecutionException.class, + () -> it.iterate(null, reader, docs, randomBoolean(), new QuerySearchResult()) + ); assertThat(e.getMessage(), containsString("Error running fetch phase for doc [" + badDoc + "]")); assertThat(e.getCause(), instanceOf(IllegalArgumentException.class)); From 76a382a78d728d90cc84fa3fbcfe61ba1c1e8db2 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 3 Dec 2024 12:24:55 +0100 Subject: [PATCH 11/28] ESQL: Enable CATEGORIZE tests on non-snapshot builds (#117881) --- .../org/elasticsearch/xpack/esql/action/EsqlCapabilities.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index a93590d7a5bc2..646c4f8240c3e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -407,7 +407,7 @@ public enum Cap { /** * Supported the text categorization function "CATEGORIZE". */ - CATEGORIZE_V4(Build.current().isSnapshot()), + CATEGORIZE_V4, /** * QSTR function From cf9687f56de49bf5f07152b70b388d3f971aa9a5 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 3 Dec 2024 13:08:02 +0100 Subject: [PATCH 12/28] ESQL: Fix layout when aggregating with aliases (#117837) Forward-port of #117832 Only really relevant for bwc with 8.11/8.12.; port for consistency with 8.x --- .../planner/AbstractPhysicalOperationProviders.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java index 69e2d1c45aa3c..35aba7665ec87 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java @@ -120,10 +120,14 @@ public final PhysicalOperation groupingPhysicalOperation( * - before stats (keep x = a | stats by x) which requires the partial input to use a's channel * - after stats (stats by a | keep x = a) which causes the output layout to refer to the follow-up alias */ + // TODO: This is likely required only for pre-8.14 node compatibility; confirm and remove if possible. + // Since https://github.com/elastic/elasticsearch/pull/104958, it shouldn't be possible to have aliases in the aggregates + // which the groupings refer to. Except for `BY CATEGORIZE(field)`, which remains as alias in the grouping, all aliases + // should've become EVALs before or after the STATS. for (NamedExpression agg : aggregates) { if (agg instanceof Alias a) { if (a.child() instanceof Attribute attr) { - if (groupAttribute.id().equals(attr.id())) { + if (sourceGroupAttribute.id().equals(attr.id())) { groupAttributeLayout.nameIds().add(a.id()); // TODO: investigate whether a break could be used since it shouldn't be possible to have multiple // attributes pointing to the same attribute @@ -133,8 +137,8 @@ public final PhysicalOperation groupingPhysicalOperation( // is in the output form // if the group points to an alias declared in the aggregate, use the alias child as source else if (aggregatorMode.isOutputPartial()) { - if (groupAttribute.semanticEquals(a.toAttribute())) { - groupAttribute = attr; + if (sourceGroupAttribute.semanticEquals(a.toAttribute())) { + sourceGroupAttribute = attr; break; } } From 2a9a3a44dc8bcf71659df5893ef23df535967eea Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 4 Dec 2024 00:13:04 +1100 Subject: [PATCH 13/28] Add a not-master state for desired balance (#116904) The new state prevents a long running desired balance computation to set result after the node stands down as master. --- docs/changelog/116904.yaml | 5 ++ .../allocation/allocator/DesiredBalance.java | 9 ++- .../DesiredBalanceShardsAllocator.java | 71 ++++++++++++++----- ...nsportDeleteDesiredBalanceActionTests.java | 2 +- .../DesiredBalanceComputerTests.java | 51 +++++++++---- .../DesiredBalanceShardsAllocatorTests.java | 13 ++-- 6 files changed, 112 insertions(+), 39 deletions(-) create mode 100644 docs/changelog/116904.yaml diff --git a/docs/changelog/116904.yaml b/docs/changelog/116904.yaml new file mode 100644 index 0000000000000..46fa445f36154 --- /dev/null +++ b/docs/changelog/116904.yaml @@ -0,0 +1,5 @@ +pr: 116904 +summary: Add a not-master state for desired balance +area: Allocation +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java index 6ad44fdf3a9c0..406ca72868a40 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java @@ -40,7 +40,14 @@ public DesiredBalance(long lastConvergedIndex, Map ass this(lastConvergedIndex, assignments, Map.of(), ComputationFinishReason.CONVERGED); } - public static final DesiredBalance INITIAL = new DesiredBalance(-1, Map.of()); + /** + * The placeholder value for {@link DesiredBalance} when the node stands down as master. + */ + public static final DesiredBalance NOT_MASTER = new DesiredBalance(-2, Map.of()); + /** + * The starting value for {@link DesiredBalance} when the node becomes the master. + */ + public static final DesiredBalance BECOME_MASTER_INITIAL = new DesiredBalance(-1, Map.of()); public ShardAssignment getAssignment(ShardId shardId) { return assignments.get(shardId); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java index 72261df658ca1..8408386b8da58 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java @@ -29,6 +29,7 @@ import org.elasticsearch.cluster.service.MasterService; import org.elasticsearch.cluster.service.MasterServiceTaskQueue; import org.elasticsearch.common.Priority; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.metrics.CounterMetric; import org.elasticsearch.common.metrics.MeanMetric; import org.elasticsearch.common.settings.ClusterSettings; @@ -43,6 +44,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; /** * A {@link ShardsAllocator} which asynchronously refreshes the desired balance held by the {@link DesiredBalanceComputer} and then takes @@ -62,7 +64,7 @@ public class DesiredBalanceShardsAllocator implements ShardsAllocator { private final AtomicLong indexGenerator = new AtomicLong(-1); private final ConcurrentLinkedQueue> pendingDesiredBalanceMoves = new ConcurrentLinkedQueue<>(); private final MasterServiceTaskQueue masterServiceTaskQueue; - private volatile DesiredBalance currentDesiredBalance = DesiredBalance.INITIAL; + private final AtomicReference currentDesiredBalanceRef = new AtomicReference<>(DesiredBalance.NOT_MASTER); private volatile boolean resetCurrentDesiredBalance = false; private final Set processedNodeShutdowns = new HashSet<>(); private final DesiredBalanceMetrics desiredBalanceMetrics; @@ -129,6 +131,12 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) { long index = desiredBalanceInput.index(); logger.debug("Starting desired balance computation for [{}]", index); + final DesiredBalance initialDesiredBalance = getInitialDesiredBalance(); + if (initialDesiredBalance == DesiredBalance.NOT_MASTER) { + logger.debug("Abort desired balance computation because node is no longer master"); + return; + } + recordTime( cumulativeComputationTime, // We set currentDesiredBalance back to INITIAL when the node stands down as master in onNoLongerMaster. @@ -137,7 +145,7 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) { // lead to unexpected behaviours for tests. See also https://github.com/elastic/elasticsearch/pull/116904 () -> setCurrentDesiredBalance( desiredBalanceComputer.compute( - getInitialDesiredBalance(), + initialDesiredBalance, desiredBalanceInput, pendingDesiredBalanceMoves, this::isFresh @@ -146,7 +154,17 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) { ); computationsExecuted.inc(); - if (currentDesiredBalance.finishReason() == DesiredBalance.ComputationFinishReason.STOP_EARLY) { + final DesiredBalance currentDesiredBalance = currentDesiredBalanceRef.get(); + if (currentDesiredBalance == DesiredBalance.NOT_MASTER || currentDesiredBalance == DesiredBalance.BECOME_MASTER_INITIAL) { + logger.debug( + () -> Strings.format( + "Desired balance computation for [%s] is discarded since master has concurrently changed. " + + "Current desiredBalance=[%s]", + index, + currentDesiredBalance + ) + ); + } else if (currentDesiredBalance.finishReason() == DesiredBalance.ComputationFinishReason.STOP_EARLY) { logger.debug( "Desired balance computation for [{}] terminated early with partial result, scheduling reconciliation", index @@ -164,10 +182,13 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) { } private DesiredBalance getInitialDesiredBalance() { + final DesiredBalance currentDesiredBalance = currentDesiredBalanceRef.get(); if (resetCurrentDesiredBalance) { logger.info("Resetting current desired balance"); resetCurrentDesiredBalance = false; - return new DesiredBalance(currentDesiredBalance.lastConvergedIndex(), Map.of()); + return currentDesiredBalance == DesiredBalance.NOT_MASTER + ? DesiredBalance.NOT_MASTER + : new DesiredBalance(currentDesiredBalance.lastConvergedIndex(), Map.of()); } else { return currentDesiredBalance; } @@ -215,6 +236,10 @@ public void allocate(RoutingAllocation allocation, ActionListener listener var index = indexGenerator.incrementAndGet(); logger.debug("Executing allocate for [{}]", index); queue.add(index, listener); + // This can only run on master, so unset not-master if exists + if (currentDesiredBalanceRef.compareAndSet(DesiredBalance.NOT_MASTER, DesiredBalance.BECOME_MASTER_INITIAL)) { + logger.debug("initialized desired balance for becoming master"); + } desiredBalanceComputation.onNewInput(DesiredBalanceInput.create(index, allocation)); if (allocation.routingTable().indicesRouting().isEmpty()) { @@ -224,7 +249,7 @@ public void allocate(RoutingAllocation allocation, ActionListener listener // Starts reconciliation towards desired balance that might have not been updated with a recent calculation yet. // This is fine as balance should have incremental rather than radical changes. // This should speed up achieving the desired balance in cases current state is still different from it (due to THROTTLING). - reconcile(currentDesiredBalance, allocation); + reconcile(currentDesiredBalanceRef.get(), allocation); } private void processNodeShutdowns(ClusterState clusterState) { @@ -267,16 +292,26 @@ private static List getMoveCommands(AllocationCommands co } private void setCurrentDesiredBalance(DesiredBalance newDesiredBalance) { - if (logger.isTraceEnabled()) { - var diff = DesiredBalance.hasChanges(currentDesiredBalance, newDesiredBalance) - ? "Diff: " + DesiredBalance.humanReadableDiff(currentDesiredBalance, newDesiredBalance) - : "No changes"; - logger.trace("Desired balance updated: {}. {}", newDesiredBalance, diff); - } else { - logger.debug("Desired balance updated for [{}]", newDesiredBalance.lastConvergedIndex()); + while (true) { + final var oldDesiredBalance = currentDesiredBalanceRef.get(); + if (oldDesiredBalance == DesiredBalance.NOT_MASTER) { + logger.debug("discard desired balance for [{}] since node is no longer master", newDesiredBalance.lastConvergedIndex()); + return; + } + + if (currentDesiredBalanceRef.compareAndSet(oldDesiredBalance, newDesiredBalance)) { + if (logger.isTraceEnabled()) { + var diff = DesiredBalance.hasChanges(oldDesiredBalance, newDesiredBalance) + ? "Diff: " + DesiredBalance.humanReadableDiff(oldDesiredBalance, newDesiredBalance) + : "No changes"; + logger.trace("Desired balance updated: {}. {}", newDesiredBalance, diff); + } else { + logger.debug("Desired balance updated for [{}]", newDesiredBalance.lastConvergedIndex()); + } + computedShardMovements.inc(DesiredBalance.shardMovements(oldDesiredBalance, newDesiredBalance)); + break; + } } - computedShardMovements.inc(DesiredBalance.shardMovements(currentDesiredBalance, newDesiredBalance)); - currentDesiredBalance = newDesiredBalance; } protected void submitReconcileTask(DesiredBalance desiredBalance) { @@ -316,7 +351,7 @@ public void execute(RoutingAllocation allocation) { } public DesiredBalance getDesiredBalance() { - return currentDesiredBalance; + return currentDesiredBalanceRef.get(); } public void resetDesiredBalance() { @@ -325,7 +360,7 @@ public void resetDesiredBalance() { public DesiredBalanceStats getStats() { return new DesiredBalanceStats( - Math.max(currentDesiredBalance.lastConvergedIndex(), 0L), + Math.max(currentDesiredBalanceRef.get().lastConvergedIndex(), 0L), desiredBalanceComputation.isActive(), computationsSubmitted.count(), computationsExecuted.count(), @@ -342,7 +377,7 @@ public DesiredBalanceStats getStats() { private void onNoLongerMaster() { if (indexGenerator.getAndSet(-1) != -1) { - currentDesiredBalance = DesiredBalance.INITIAL; + currentDesiredBalanceRef.set(DesiredBalance.NOT_MASTER); queue.completeAllAsNotMaster(); pendingDesiredBalanceMoves.clear(); desiredBalanceReconciler.clear(); @@ -412,7 +447,7 @@ private static void discardSupersededTasks( // only for tests - in production, this happens after reconciliation protected final void completeToLastConvergedIndex() { - queue.complete(currentDesiredBalance.lastConvergedIndex()); + queue.complete(currentDesiredBalanceRef.get().lastConvergedIndex()); } private void recordTime(CounterMetric metric, Runnable action) { diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java index 3dafc8f000f3f..385ac600666db 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java @@ -136,7 +136,7 @@ public DesiredBalance compute( safeAwait((ActionListener listener) -> allocationService.reroute(clusterState, "inital-allocate", listener)); var balanceBeforeReset = allocator.getDesiredBalance(); - assertThat(balanceBeforeReset.lastConvergedIndex(), greaterThan(DesiredBalance.INITIAL.lastConvergedIndex())); + assertThat(balanceBeforeReset.lastConvergedIndex(), greaterThan(DesiredBalance.BECOME_MASTER_INITIAL.lastConvergedIndex())); assertThat(balanceBeforeReset.assignments(), not(anEmptyMap())); var listener = new PlainActionFuture(); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java index 7b77947792bd4..679d04224aefe 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java @@ -96,7 +96,12 @@ public void testComputeBalance() { var clusterState = createInitialClusterState(3); var index = clusterState.metadata().index(TEST_INDEX).getIndex(); - var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true); + var desiredBalance = desiredBalanceComputer.compute( + DesiredBalance.BECOME_MASTER_INITIAL, + createInput(clusterState), + queue(), + input -> true + ); assertDesiredAssignments( desiredBalance, @@ -115,7 +120,7 @@ public void testStopsComputingWhenStale() { var index = clusterState.metadata().index(TEST_INDEX).getIndex(); // if the isFresh flag is false then we only do one iteration, allocating the primaries but not the replicas - var desiredBalance0 = DesiredBalance.INITIAL; + var desiredBalance0 = DesiredBalance.BECOME_MASTER_INITIAL; var desiredBalance1 = desiredBalanceComputer.compute(desiredBalance0, createInput(clusterState), queue(), input -> false); assertDesiredAssignments( desiredBalance1, @@ -147,7 +152,7 @@ public void testIgnoresOutOfScopePrimaries() { var primaryShard = mutateAllocationStatus(clusterState.routingTable().index(TEST_INDEX).shard(0).primaryShard()); var desiredBalance = desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, createInput(clusterState, primaryShard), queue(), input -> true @@ -184,7 +189,7 @@ public void testIgnoresOutOfScopeReplicas() { var replicaShard = mutateAllocationStatus(originalReplicaShard); var desiredBalance = desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, createInput(clusterState, replicaShard), queue(), input -> true @@ -241,7 +246,7 @@ public void testAssignShardsToTheirPreviousLocationIfAvailable() { : new ShardRouting[] { clusterState.routingTable().index(TEST_INDEX).shard(0).primaryShard() }; var desiredBalance = desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, createInput(clusterState, ignored), queue(), input -> true @@ -284,7 +289,12 @@ public void testRespectsAssignmentOfUnknownPrimaries() { } clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(routingNodes)).build(); - var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true); + var desiredBalance = desiredBalanceComputer.compute( + DesiredBalance.BECOME_MASTER_INITIAL, + createInput(clusterState), + queue(), + input -> true + ); assertDesiredAssignments( desiredBalance, @@ -331,7 +341,12 @@ public void testRespectsAssignmentOfUnknownReplicas() { } clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(routingNodes)).build(); - var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true); + var desiredBalance = desiredBalanceComputer.compute( + DesiredBalance.BECOME_MASTER_INITIAL, + createInput(clusterState), + queue(), + input -> true + ); assertDesiredAssignments( desiredBalance, @@ -367,7 +382,7 @@ public void testRespectsAssignmentByGatewayAllocators() { } var desiredBalance = desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, DesiredBalanceInput.create(randomNonNegativeLong(), routingAllocation), queue(), input -> true @@ -427,7 +442,12 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing } clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(desiredRoutingNodes)).build(); - var desiredBalance1 = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true); + var desiredBalance1 = desiredBalanceComputer.compute( + DesiredBalance.BECOME_MASTER_INITIAL, + createInput(clusterState), + queue(), + input -> true + ); assertDesiredAssignments( desiredBalance1, Map.of( @@ -513,7 +533,12 @@ public void testNoDataNodes() { var desiredBalanceComputer = createDesiredBalanceComputer(); var clusterState = createInitialClusterState(0); - var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true); + var desiredBalance = desiredBalanceComputer.compute( + DesiredBalance.BECOME_MASTER_INITIAL, + createInput(clusterState), + queue(), + input -> true + ); assertDesiredAssignments(desiredBalance, Map.of()); } @@ -532,7 +557,7 @@ public void testAppliesMoveCommands() { clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(routingNodes)).build(); var desiredBalance = desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, createInput(clusterState), queue( new MoveAllocationCommand(index.getName(), 0, "node-1", "node-2"), @@ -662,7 +687,7 @@ public void testDesiredBalanceShouldConvergeInABigCluster() { var input = new DesiredBalanceInput(randomInt(), routingAllocationWithDecidersOf(clusterState, clusterInfo, settings), List.of()); var desiredBalance = createDesiredBalanceComputer(new BalancedShardsAllocator(settings)).compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, input, queue(), ignored -> iteration.incrementAndGet() < 1000 @@ -1243,7 +1268,7 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing assertThatLogger(() -> { var iteration = new AtomicInteger(0); desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, createInput(createInitialClusterState(3)), queue(), input -> iteration.incrementAndGet() < iterations diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java index 9d33b697e31ca..9caf89d4d7613 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java @@ -698,6 +698,7 @@ public void onFailure(Exception e) { try { assertTrue(listenersCalled.await(10, TimeUnit.SECONDS)); + assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), sameInstance(DesiredBalance.NOT_MASTER)); } finally { clusterService.close(); terminate(threadPool); @@ -753,7 +754,7 @@ public DesiredBalance compute( try { // initial computation is based on DesiredBalance.INITIAL rerouteAndWait(service, clusterState, "initial-allocation"); - assertThat(desiredBalanceComputer.lastComputationInput.get(), equalTo(DesiredBalance.INITIAL)); + assertThat(desiredBalanceComputer.lastComputationInput.get(), equalTo(DesiredBalance.BECOME_MASTER_INITIAL)); // any next computation is based on current desired balance var current = desiredBalanceShardsAllocator.getDesiredBalance(); @@ -806,7 +807,7 @@ public void testResetDesiredBalanceOnNoLongerMaster() { try { rerouteAndWait(service, clusterState, "initial-allocation"); - assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.INITIAL))); + assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.BECOME_MASTER_INITIAL))); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.getNodes()).localNodeId(node1.getId()).masterNodeId(node2.getId())) @@ -816,7 +817,7 @@ public void testResetDesiredBalanceOnNoLongerMaster() { assertThat( "desired balance should be resetted on no longer master", desiredBalanceShardsAllocator.getDesiredBalance(), - equalTo(DesiredBalance.INITIAL) + equalTo(DesiredBalance.NOT_MASTER) ); } finally { clusterService.close(); @@ -862,7 +863,7 @@ public void resetDesiredBalance() { try { rerouteAndWait(service, clusterState, "initial-allocation"); - assertThat(desiredBalanceAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.INITIAL))); + assertThat(desiredBalanceAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.BECOME_MASTER_INITIAL))); final var shutdownType = randomFrom(Type.SIGTERM, Type.REMOVE, Type.REPLACE); final var singleShutdownMetadataBuilder = SingleNodeShutdownMetadata.builder() @@ -938,7 +939,7 @@ public DesiredBalance compute( Queue> pendingDesiredBalanceMoves, Predicate isFresh ) { - assertThat(previousDesiredBalance, sameInstance(DesiredBalance.INITIAL)); + assertThat(previousDesiredBalance, sameInstance(DesiredBalance.BECOME_MASTER_INITIAL)); return new DesiredBalance(desiredBalanceInput.index(), Map.of()); } }, @@ -967,7 +968,7 @@ protected void submitReconcileTask(DesiredBalance desiredBalance) { lastListener.onResponse(null); } }; - assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), sameInstance(DesiredBalance.INITIAL)); + assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), sameInstance(DesiredBalance.NOT_MASTER)); try { final PlainActionFuture future = new PlainActionFuture<>(); desiredBalanceShardsAllocator.allocate( From cab6dc5d56a7fcdbbd2fe355bc6d1277094f1400 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Wed, 4 Dec 2024 00:26:23 +1100 Subject: [PATCH 14/28] Mute org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT #117893 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 57db22feba059..cf39eae210f88 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -239,6 +239,8 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version} issue: https://github.com/elastic/elasticsearch/issues/117862 +- class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT + issue: https://github.com/elastic/elasticsearch/issues/117893 # Examples: # From cca7051e73ff089b26f3d1825e4b4e15b81e04aa Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 3 Dec 2024 14:28:07 +0100 Subject: [PATCH 15/28] ESQL: Simplify CombineProjections (#117882) Make combineUpperGroupingsAndLowerProjections a bit simpler. Also slightly improve a test and add comments to provide more context. --- .../rules/logical/CombineProjections.java | 40 ++++++++++--------- .../optimizer/LogicalPlanOptimizerTests.java | 2 +- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java index be7096538fb9a..957db4a7273e5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java @@ -22,6 +22,7 @@ import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; import java.util.ArrayList; +import java.util.LinkedHashSet; import java.util.List; public final class CombineProjections extends OptimizerRules.OptimizerRule { @@ -144,30 +145,31 @@ private static List combineUpperGroupingsAndLowerProjections( List upperGroupings, List lowerProjections ) { + assert upperGroupings.size() <= 1 + || upperGroupings.stream().anyMatch(group -> group.anyMatch(expr -> expr instanceof Categorize)) == false + : "CombineProjections only tested with a single CATEGORIZE with no additional groups"; // Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..) - AttributeMap aliases = new AttributeMap<>(); + AttributeMap aliases = new AttributeMap<>(); for (NamedExpression ne : lowerProjections) { - // record the alias - aliases.put(ne.toAttribute(), Alias.unwrap(ne)); + // Record the aliases. + // Projections are just aliases for attributes, so casting is safe. + aliases.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne)); } - // Replace any matching attribute directly with the aliased attribute from the projection. - AttributeSet seen = new AttributeSet(); - List replaced = new ArrayList<>(); + + // Propagate any renames from the lower projection into the upper groupings. + // This can lead to duplicates: e.g. + // | EVAL x = y | STATS ... BY x, y + // All substitutions happen before; groupings must be attributes at this point except for CATEGORIZE which will be an alias like + // `c = CATEGORIZE(attribute)`. + // Therefore, it is correct to deduplicate based on simple equality (based on names) instead of name ids (Set vs. AttributeSet). + // TODO: The deduplication based on simple equality will be insufficient in case of multiple CATEGORIZEs, e.g. for + // `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead. + LinkedHashSet resolvedGroupings = new LinkedHashSet<>(); for (NamedExpression ne : upperGroupings) { - // Duplicated attributes are ignored. - if (ne instanceof Attribute attribute) { - var newExpression = aliases.resolve(attribute, attribute); - if (newExpression instanceof Attribute newAttribute && seen.add(newAttribute) == false) { - // Already seen, skip - continue; - } - replaced.add(newExpression); - } else { - // For grouping functions, this will replace nested properties too - replaced.add(ne.transformUp(Attribute.class, a -> aliases.resolve(a, a))); - } + NamedExpression transformed = (NamedExpression) ne.transformUp(Attribute.class, a -> aliases.resolve(a, a)); + resolvedGroupings.add(transformed); } - return replaced; + return new ArrayList<>(resolvedGroupings); } /** diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 57d0c7432f97b..a74efca3b3d99 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -1217,7 +1217,7 @@ public void testCombineProjectionWithCategorizeGrouping() { var plan = plan(""" from test | eval k = first_name, k1 = k - | stats s = sum(salary) by cat = CATEGORIZE(k) + | stats s = sum(salary) by cat = CATEGORIZE(k1) | keep s, cat """); From 03a71d2deee7bb2788fc40b8d21d90cc75b787e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Tue, 3 Dec 2024 14:47:40 +0100 Subject: [PATCH 16/28] ESQL: Make Categorize usable in aggs when identical to a grouping (#117835) Cases like `STATS MV_APPEND(cat, CATEGORIZE(x)) BY cat=CATEGORIZE(x)` should work, as they're moved to an EVAL by a rule. Also, these cases were discarded, as they fail because of other verifications (Which also fail for BUCKET): ``` STATS x = category BY category=CATEGORIZE(message) STATS x = CATEGORIZE(message) BY CATEGORIZE(message) STATS x = CATEGORIZE(message) BY category=CATEGORIZE(message) --- .../src/main/resources/bucket.csv-spec | 21 +++ .../src/main/resources/categorize.csv-spec | 121 ++++++++++++------ .../src/main/resources/docs.csv-spec | 2 +- .../xpack/esql/action/EsqlCapabilities.java | 2 +- .../xpack/esql/analysis/Verifier.java | 39 +++--- ...ReplaceAggregateAggExpressionWithEval.java | 16 +++ ...laceAggregateNestedExpressionWithEval.java | 6 +- .../xpack/esql/analysis/VerifierTests.java | 34 +++-- .../optimizer/LogicalPlanOptimizerTests.java | 4 +- 9 files changed, 167 insertions(+), 78 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec index 7bbf011176693..b29c489910f65 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec @@ -503,6 +503,27 @@ FROM employees //end::reuseGroupingFunctionWithExpression-result[] ; +reuseGroupingFunctionImplicitAliasWithExpression#[skip:-8.13.99, reason:BUCKET renamed in 8.14] +FROM employees +| STATS s1 = `BUCKET(salary / 100 + 99, 50.)` + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.) +| SORT `BUCKET(salary / 100 + 99, 50.)`, b2 +| KEEP s1, `BUCKET(salary / 100 + 99, 50.)`, s2, b2 +; + + s1:double | BUCKET(salary / 100 + 99, 50.):double | s2:double | b2:double +351.0 |350.0 |1002.0 |1000.0 +401.0 |400.0 |1002.0 |1000.0 +451.0 |450.0 |1002.0 |1000.0 +501.0 |500.0 |1002.0 |1000.0 +551.0 |550.0 |1002.0 |1000.0 +601.0 |600.0 |1002.0 |1000.0 +601.0 |600.0 |1052.0 |1050.0 +651.0 |650.0 |1052.0 |1050.0 +701.0 |700.0 |1052.0 |1050.0 +751.0 |750.0 |1052.0 |1050.0 +801.0 |800.0 |1052.0 |1050.0 +; + reuseGroupingFunctionWithinAggs#[skip:-8.13.99, reason:BUCKET renamed in 8.14] FROM employees | STATS sum = 1 + MAX(1 + BUCKET(salary, 1000.)) BY BUCKET(salary, 1000.) + 1 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec index e45b10d1aa122..804c1c56a1eb5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec @@ -1,5 +1,5 @@ standard aggs -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS count=COUNT(), @@ -17,7 +17,7 @@ count:long | sum:long | avg:double | count_distinct:long | category:keyw ; values aggs -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS values=MV_SORT(VALUES(message)), @@ -33,7 +33,7 @@ values:keyword | top ; mv -required_capability: categorize_v4 +required_capability: categorize_v5 FROM mv_sample_data | STATS COUNT(), SUM(event_duration) BY category=CATEGORIZE(message) @@ -48,7 +48,7 @@ COUNT():long | SUM(event_duration):long | category:keyword ; row mv -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = ["connected to a", "connected to b", "disconnected"], str = ["a", "b", "c"] | STATS COUNT(), VALUES(str) BY category=CATEGORIZE(message) @@ -61,7 +61,7 @@ COUNT():long | VALUES(str):keyword | category:keyword ; skips stopwords -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = ["Mon Tue connected to a", "Jul Aug connected to b September ", "UTC connected GMT to c UTC"] | STATS COUNT() BY category=CATEGORIZE(message) @@ -73,7 +73,7 @@ COUNT():long | category:keyword ; with multiple indices -required_capability: categorize_v4 +required_capability: categorize_v5 required_capability: union_types FROM sample_data* @@ -88,7 +88,7 @@ COUNT():long | category:keyword ; mv with many values -required_capability: categorize_v4 +required_capability: categorize_v5 FROM employees | STATS COUNT() BY category=CATEGORIZE(job_positions) @@ -105,7 +105,7 @@ COUNT():long | category:keyword ; mv with many values and SUM -required_capability: categorize_v4 +required_capability: categorize_v5 FROM employees | STATS SUM(languages) BY category=CATEGORIZE(job_positions) @@ -120,7 +120,7 @@ SUM(languages):long | category:keyword ; mv with many values and nulls and SUM -required_capability: categorize_v4 +required_capability: categorize_v5 FROM employees | STATS SUM(languages) BY category=CATEGORIZE(job_positions) @@ -134,7 +134,7 @@ SUM(languages):long | category:keyword ; mv via eval -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL message = MV_APPEND(message, "Banana") @@ -150,7 +150,7 @@ COUNT():long | category:keyword ; mv via eval const -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL message = ["Banana", "Bread"] @@ -164,7 +164,7 @@ COUNT():long | category:keyword ; mv via eval const without aliases -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL message = ["Banana", "Bread"] @@ -178,7 +178,7 @@ COUNT():long | CATEGORIZE(message):keyword ; mv const in parameter -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) @@ -191,7 +191,7 @@ COUNT():long | c:keyword ; agg alias shadowing -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS c = COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) @@ -206,7 +206,7 @@ c:keyword ; chained aggregations using categorize -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -221,7 +221,7 @@ COUNT():long | category:keyword ; stats without aggs -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS BY category=CATEGORIZE(message) @@ -235,7 +235,7 @@ category:keyword ; text field -required_capability: categorize_v4 +required_capability: categorize_v5 FROM hosts | STATS COUNT() BY category=CATEGORIZE(host_group) @@ -253,7 +253,7 @@ COUNT():long | category:keyword ; on TO_UPPER -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(TO_UPPER(message)) @@ -267,7 +267,7 @@ COUNT():long | category:keyword ; on CONCAT -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " banana")) @@ -281,7 +281,7 @@ COUNT():long | category:keyword ; on CONCAT with unicode -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " 👍🏽😊")) @@ -295,7 +295,7 @@ COUNT():long | category:keyword ; on REVERSE(CONCAT()) -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(REVERSE(CONCAT(message, " 👍🏽😊"))) @@ -309,7 +309,7 @@ COUNT():long | category:keyword ; and then TO_LOWER -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -324,7 +324,7 @@ COUNT():long | category:keyword ; on const empty string -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE("") @@ -336,7 +336,7 @@ COUNT():long | category:keyword ; on const empty string from eval -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL x = "" @@ -349,7 +349,7 @@ COUNT():long | category:keyword ; on null -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL x = null @@ -362,7 +362,7 @@ COUNT():long | SUM(event_duration):long | category:keyword ; on null string -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL x = null::string @@ -375,7 +375,7 @@ COUNT():long | category:keyword ; filtering out all data -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | WHERE @timestamp < "2023-10-23T00:00:00Z" @@ -387,7 +387,7 @@ COUNT():long | category:keyword ; filtering out all data with constant -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -398,7 +398,7 @@ COUNT():long | category:keyword ; drop output columns -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS count=COUNT() BY category=CATEGORIZE(message) @@ -413,7 +413,7 @@ x:integer ; category value processing -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = ["connected to a", "connected to b", "disconnected"] | STATS COUNT() BY category=CATEGORIZE(message) @@ -427,7 +427,7 @@ COUNT():long | category:keyword ; row aliases -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = "connected to xyz" | EVAL x = message @@ -441,7 +441,7 @@ COUNT():long | category:keyword | y:keyword ; from aliases -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL x = message @@ -457,7 +457,7 @@ COUNT():long | category:keyword | y:keyword ; row aliases with keep -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = "connected to xyz" | EVAL x = message @@ -473,7 +473,7 @@ COUNT():long | y:keyword ; from aliases with keep -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL x = message @@ -491,7 +491,7 @@ COUNT():long | y:keyword ; row rename -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = "connected to xyz" | RENAME message as x @@ -505,7 +505,7 @@ COUNT():long | y:keyword ; from rename -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | RENAME message as x @@ -521,7 +521,7 @@ COUNT():long | y:keyword ; row drop -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = "connected to a" | STATS c = COUNT() BY category=CATEGORIZE(message) @@ -534,7 +534,7 @@ c:long ; from drop -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS c = COUNT() BY category=CATEGORIZE(message) @@ -547,3 +547,48 @@ c:long 3 3 ; + +categorize in aggs inside function +required_capability: categorize_v5 + +FROM sample_data + | STATS COUNT(), x = MV_APPEND(category, category) BY category=CATEGORIZE(message) + | SORT x + | KEEP `COUNT()`, x +; + +COUNT():long | x:keyword + 3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?] + 3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?] + 1 | [.*?Disconnected.*?,.*?Disconnected.*?] +; + +categorize in aggs same as grouping inside function +required_capability: categorize_v5 + +FROM sample_data + | STATS COUNT(), x = MV_APPEND(CATEGORIZE(message), `CATEGORIZE(message)`) BY CATEGORIZE(message) + | SORT x + | KEEP `COUNT()`, x +; + +COUNT():long | x:keyword + 3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?] + 3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?] + 1 | [.*?Disconnected.*?,.*?Disconnected.*?] +; + +categorize in aggs same as grouping inside function with explicit alias +required_capability: categorize_v5 + +FROM sample_data + | STATS COUNT(), x = MV_APPEND(CATEGORIZE(message), category) BY category=CATEGORIZE(message) + | SORT x + | KEEP `COUNT()`, x +; + +COUNT():long | x:keyword + 3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?] + 3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?] + 1 | [.*?Disconnected.*?,.*?Disconnected.*?] +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index 24baf1263d06a..aa89c775da4cf 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -678,7 +678,7 @@ Bangalore | 9 | 72 ; docsCategorize -required_capability: categorize_v4 +required_capability: categorize_v5 // tag::docsCategorize[] FROM sample_data | STATS count=COUNT() BY category=CATEGORIZE(message) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 646c4f8240c3e..b5d6dd8584e8c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -407,7 +407,7 @@ public enum Cap { /** * Supported the text categorization function "CATEGORIZE". */ - CATEGORIZE_V4, + CATEGORIZE_V5, /** * QSTR function diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 5f8c011cff53a..49d8a5ee8caad 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -20,7 +20,6 @@ import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; -import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; import org.elasticsearch.xpack.esql.core.expression.function.Function; @@ -63,12 +62,10 @@ import java.util.ArrayList; import java.util.BitSet; import java.util.Collection; -import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Consumer; @@ -364,35 +361,35 @@ private static void checkCategorizeGrouping(Aggregate agg, Set failures ); }); - // Forbid CATEGORIZE being used in the aggregations - agg.aggregates().forEach(a -> { - a.forEachDown( - Categorize.class, - categorize -> failures.add( - fail(categorize, "cannot use CATEGORIZE grouping function [{}] within the aggregations", categorize.sourceText()) + // Forbid CATEGORIZE being used in the aggregations, unless it appears as a grouping + agg.aggregates() + .forEach( + a -> a.forEachDown( + AggregateFunction.class, + aggregateFunction -> aggregateFunction.forEachDown( + Categorize.class, + categorize -> failures.add( + fail(categorize, "cannot use CATEGORIZE grouping function [{}] within an aggregation", categorize.sourceText()) + ) + ) ) ); - }); - // Forbid CATEGORIZE being referenced in the aggregation functions - Map categorizeByAliasId = new HashMap<>(); + // Forbid CATEGORIZE being referenced as a child of an aggregation function + AttributeMap categorizeByAttribute = new AttributeMap<>(); agg.groupings().forEach(g -> { g.forEachDown(Alias.class, alias -> { if (alias.child() instanceof Categorize categorize) { - categorizeByAliasId.put(alias.id(), categorize); + categorizeByAttribute.put(alias.toAttribute(), categorize); } }); }); agg.aggregates() .forEach(a -> a.forEachDown(AggregateFunction.class, aggregate -> aggregate.forEachDown(Attribute.class, attribute -> { - var categorize = categorizeByAliasId.get(attribute.id()); + var categorize = categorizeByAttribute.get(attribute); if (categorize != null) { failures.add( - fail( - attribute, - "cannot reference CATEGORIZE grouping function [{}] within the aggregations", - attribute.sourceText() - ) + fail(attribute, "cannot reference CATEGORIZE grouping function [{}] within an aggregation", attribute.sourceText()) ); } }))); @@ -449,7 +446,7 @@ private static void checkInvalidNamedExpressionUsage( // check the bucketing function against the group else if (c instanceof GroupingFunction gf) { if (Expressions.anyMatch(groups, ex -> ex instanceof Alias a && a.child().semanticEquals(gf)) == false) { - failures.add(fail(gf, "can only use grouping function [{}] part of the BY clause", gf.sourceText())); + failures.add(fail(gf, "can only use grouping function [{}] as part of the BY clause", gf.sourceText())); } } }); @@ -466,7 +463,7 @@ else if (c instanceof GroupingFunction gf) { // optimizer will later unroll expressions with aggs and non-aggs with a grouping function into an EVAL, but that will no longer // be verified (by check above in checkAggregate()), so do it explicitly here if (Expressions.anyMatch(groups, ex -> ex instanceof Alias a && a.child().semanticEquals(gf)) == false) { - failures.add(fail(gf, "can only use grouping function [{}] part of the BY clause", gf.sourceText())); + failures.add(fail(gf, "can only use grouping function [{}] as part of the BY clause", gf.sourceText())); } else if (level == 0) { addFailureOnGroupingUsedNakedInAggs(failures, gf, "function"); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java index 2361b46b2be6f..c36d4caf7f599 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java @@ -9,18 +9,21 @@ import org.elasticsearch.common.util.Maps; import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeMap; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.Project; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -51,6 +54,16 @@ protected LogicalPlan rule(Aggregate aggregate) { AttributeMap aliases = new AttributeMap<>(); aggregate.forEachExpressionUp(Alias.class, a -> aliases.put(a.toAttribute(), a.child())); + // Build Categorize grouping functions map. + // Functions like BUCKET() shouldn't reach this point, + // as they are moved to an early EVAL by ReplaceAggregateNestedExpressionWithEval + Map groupingAttributes = new HashMap<>(); + aggregate.forEachExpressionUp(Alias.class, a -> { + if (a.child() instanceof Categorize groupingFunction) { + groupingAttributes.put(groupingFunction, a.toAttribute()); + } + }); + // break down each aggregate into AggregateFunction and/or grouping key // preserve the projection at the end List aggs = aggregate.aggregates(); @@ -109,6 +122,9 @@ protected LogicalPlan rule(Aggregate aggregate) { return alias.toAttribute(); }); + // replace grouping functions with their references + aggExpression = aggExpression.transformUp(Categorize.class, groupingAttributes::get); + Alias alias = as.replaceChild(aggExpression); newEvals.add(alias); newProjections.add(alias.toAttribute()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java index 985e68252a1f9..4dbc43454a023 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java @@ -51,6 +51,7 @@ protected LogicalPlan rule(Aggregate aggregate) { // Exception: Categorize is internal to the aggregation and remains in the groupings. We move its child expression into an eval. if (g instanceof Alias as) { if (as.child() instanceof Categorize cat) { + // For Categorize grouping function, we only move the child expression into an eval if (cat.field() instanceof Attribute == false) { groupingChanged = true; var fieldAs = new Alias(as.source(), as.name(), cat.field(), null, true); @@ -59,7 +60,6 @@ protected LogicalPlan rule(Aggregate aggregate) { evalNames.put(fieldAs.name(), fieldAttr); Categorize replacement = cat.replaceChildren(List.of(fieldAttr)); newGroupings.set(i, as.replaceChild(replacement)); - groupingAttributes.put(cat, fieldAttr); } } else { groupingChanged = true; @@ -135,6 +135,10 @@ protected LogicalPlan rule(Aggregate aggregate) { }); // replace any grouping functions with their references pointing to the added synthetic eval replaced = replaced.transformDown(GroupingFunction.class, gf -> { + // Categorize in aggs depends on the grouping result, not on an early eval + if (gf instanceof Categorize) { + return gf; + } aggsChanged.set(true); // should never return null, as it's verified. // but even if broken, the transform will fail safely; otoh, returning `gf` will fail later due to incorrect plan. diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index d02e78202e0c2..74e2de1141728 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -407,12 +407,12 @@ public void testAggFilterOnBucketingOrAggFunctions() { // but fails if it's different assertEquals( - "1:32: can only use grouping function [bucket(a, 3)] part of the BY clause", + "1:32: can only use grouping function [bucket(a, 3)] as part of the BY clause", error("row a = 1 | stats sum(a) where bucket(a, 3) > -1 by bucket(a,2)") ); assertEquals( - "1:40: can only use grouping function [bucket(salary, 10)] part of the BY clause", + "1:40: can only use grouping function [bucket(salary, 10)] as part of the BY clause", error("from test | stats max(languages) WHERE bucket(salary, 10) > 1 by emp_no") ); @@ -444,19 +444,19 @@ public void testAggWithNonBooleanFilter() { public void testGroupingInsideAggsAsAgg() { assertEquals( - "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause", + "1:18: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause", error("from test| stats bucket(emp_no, 5.) by emp_no") ); assertEquals( - "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause", + "1:18: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause", error("from test| stats bucket(emp_no, 5.)") ); assertEquals( - "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause", + "1:18: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause", error("from test| stats bucket(emp_no, 5.) by bucket(emp_no, 6.)") ); assertEquals( - "1:22: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause", + "1:22: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause", error("from test| stats 3 + bucket(emp_no, 5.) by bucket(emp_no, 6.)") ); } @@ -1846,7 +1846,7 @@ public void testIntervalAsString() { } public void testCategorizeSingleGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled()); query("from test | STATS COUNT(*) BY CATEGORIZE(first_name)"); query("from test | STATS COUNT(*) BY cat = CATEGORIZE(first_name)"); @@ -1875,7 +1875,7 @@ public void testCategorizeSingleGrouping() { } public void testCategorizeNestedGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled()); query("from test | STATS COUNT(*) BY CATEGORIZE(LENGTH(first_name)::string)"); @@ -1890,27 +1890,33 @@ public void testCategorizeNestedGrouping() { } public void testCategorizeWithinAggregations() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled()); query("from test | STATS MV_COUNT(cat), COUNT(*) BY cat = CATEGORIZE(first_name)"); + query("from test | STATS MV_COUNT(CATEGORIZE(first_name)), COUNT(*) BY cat = CATEGORIZE(first_name)"); + query("from test | STATS MV_COUNT(CATEGORIZE(first_name)), COUNT(*) BY CATEGORIZE(first_name)"); assertEquals( - "1:25: cannot use CATEGORIZE grouping function [CATEGORIZE(first_name)] within the aggregations", + "1:25: cannot use CATEGORIZE grouping function [CATEGORIZE(first_name)] within an aggregation", error("FROM test | STATS COUNT(CATEGORIZE(first_name)) BY CATEGORIZE(first_name)") ); - assertEquals( - "1:25: cannot reference CATEGORIZE grouping function [cat] within the aggregations", + "1:25: cannot reference CATEGORIZE grouping function [cat] within an aggregation", error("FROM test | STATS COUNT(cat) BY cat = CATEGORIZE(first_name)") ); assertEquals( - "1:30: cannot reference CATEGORIZE grouping function [cat] within the aggregations", + "1:30: cannot reference CATEGORIZE grouping function [cat] within an aggregation", error("FROM test | STATS SUM(LENGTH(cat::keyword) + LENGTH(last_name)) BY cat = CATEGORIZE(first_name)") ); assertEquals( - "1:25: cannot reference CATEGORIZE grouping function [`CATEGORIZE(first_name)`] within the aggregations", + "1:25: cannot reference CATEGORIZE grouping function [`CATEGORIZE(first_name)`] within an aggregation", error("FROM test | STATS COUNT(`CATEGORIZE(first_name)`) BY CATEGORIZE(first_name)") ); + + assertEquals( + "1:28: can only use grouping function [CATEGORIZE(last_name)] as part of the BY clause", + error("FROM test | STATS MV_COUNT(CATEGORIZE(last_name)) BY CATEGORIZE(first_name)") + ); } public void testSortByAggregate() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index a74efca3b3d99..b76781f76f4af 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -1212,7 +1212,7 @@ public void testCombineProjectionWithAggregationFirstAndAliasedGroupingUsedInAgg * \_EsRelation[test][_meta_field{f}#23, emp_no{f}#17, first_name{f}#18, ..] */ public void testCombineProjectionWithCategorizeGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled()); var plan = plan(""" from test @@ -3949,7 +3949,7 @@ public void testNestedExpressionsInGroups() { * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] */ public void testNestedExpressionsInGroupsWithCategorize() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled()); var plan = optimizedPlan(""" from test From ed1e3664ad6c50d2af24b09db51448072764f663 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 3 Dec 2024 13:51:07 +0000 Subject: [PATCH 17/28] Move SparseVectorQueryBuilder and TextExpansionQueryBuilder to x-pack core (#117857) This commit moves the SparseVectorQueryBuilder and TextExpansionQueryBuilder classes to the x-pack core module, enabling other modules to utilize these query builders. Additionally, it introduces a SparseVectorQueryWrapper to extract sparse vector queries from standard Lucene queries. This is needed for supporting semantic highlighting with sparse vector fields as follow up. --- .../xpack/core/XPackClientPlugin.java | 10 +++ .../ml/search}/SparseVectorQueryBuilder.java | 7 +- .../ml/search/SparseVectorQueryWrapper.java | 77 +++++++++++++++++++ .../ml/search}/TextExpansionQueryBuilder.java | 4 +- .../ml/search/WeightedTokensQueryBuilder.java | 2 +- .../core/ml/search/WeightedTokensUtils.java | 11 ++- .../SparseVectorQueryBuilderTests.java | 21 ++--- .../TextExpansionQueryBuilderTests.java | 14 ++-- .../WeightedTokensQueryBuilderTests.java | 13 +++- .../xpack/ml/MachineLearning.java | 19 ----- 10 files changed, 125 insertions(+), 53 deletions(-) rename x-pack/plugin/{ml/src/main/java/org/elasticsearch/xpack/ml/queries => core/src/main/java/org/elasticsearch/xpack/core/ml/search}/SparseVectorQueryBuilder.java (97%) create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java rename x-pack/plugin/{ml/src/main/java/org/elasticsearch/xpack/ml/queries => core/src/main/java/org/elasticsearch/xpack/core/ml/search}/TextExpansionQueryBuilder.java (98%) rename x-pack/plugin/{ml/src/test/java/org/elasticsearch/xpack/ml/queries => core/src/test/java/org/elasticsearch/xpack/core/ml/search}/SparseVectorQueryBuilderTests.java (94%) rename x-pack/plugin/{ml/src/test/java/org/elasticsearch/xpack/ml/queries => core/src/test/java/org/elasticsearch/xpack/core/ml/search}/TextExpansionQueryBuilderTests.java (96%) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index e2435c3396fa8..f5923a4942634 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -71,6 +71,8 @@ import org.elasticsearch.xpack.core.ml.job.config.JobTaskState; import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskParams; import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskState; +import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder; +import org.elasticsearch.xpack.core.ml.search.TextExpansionQueryBuilder; import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder; import org.elasticsearch.xpack.core.monitoring.MonitoringFeatureSetUsage; import org.elasticsearch.xpack.core.rollup.RollupFeatureSetUsage; @@ -398,6 +400,14 @@ public List getNamedXContent() { @Override public List> getQueries() { return List.of( + new QuerySpec<>(SparseVectorQueryBuilder.NAME, SparseVectorQueryBuilder::new, SparseVectorQueryBuilder::fromXContent), + new QuerySpec( + TextExpansionQueryBuilder.NAME, + TextExpansionQueryBuilder::new, + TextExpansionQueryBuilder::fromXContent + ), + // TODO: The WeightedTokensBuilder is slated for removal after the SparseVectorQueryBuilder is available. + // The logic to create a Boolean query based on weighted tokens will remain and/or be moved to server. new SearchPlugin.QuerySpec( WeightedTokensQueryBuilder.NAME, WeightedTokensQueryBuilder::new, diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java similarity index 97% rename from x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java index 5a63ad8e85e9b..e9e4e90421adc 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.core.ml.search; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; @@ -33,9 +33,6 @@ import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; -import org.elasticsearch.xpack.core.ml.search.WeightedToken; -import org.elasticsearch.xpack.core.ml.search.WeightedTokensUtils; import java.io.IOException; import java.util.ArrayList; @@ -210,7 +207,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { return (shouldPruneTokens) ? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, queryVectors, ft, context) - : WeightedTokensUtils.queryBuilderWithAllTokens(queryVectors, ft, context); + : WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, ft, context); } @Override diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java new file mode 100644 index 0000000000000..234560f620d95 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java @@ -0,0 +1,77 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.ml.search; + +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Weight; +import org.elasticsearch.index.query.SearchExecutionContext; + +import java.io.IOException; +import java.util.Objects; + +/** + * A wrapper class for the Lucene query generated by {@link SparseVectorQueryBuilder#toQuery(SearchExecutionContext)}. + * This wrapper facilitates the extraction of the complete sparse vector query using a {@link QueryVisitor}. + */ +public class SparseVectorQueryWrapper extends Query { + private final String fieldName; + private final Query termsQuery; + + public SparseVectorQueryWrapper(String fieldName, Query termsQuery) { + this.fieldName = fieldName; + this.termsQuery = termsQuery; + } + + public Query getTermsQuery() { + return termsQuery; + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + var rewrite = termsQuery.rewrite(indexSearcher); + if (rewrite != termsQuery) { + return new SparseVectorQueryWrapper(fieldName, rewrite); + } + return this; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return termsQuery.createWeight(searcher, scoreMode, boost); + } + + @Override + public String toString(String field) { + return termsQuery.toString(field); + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(fieldName)) { + termsQuery.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this)); + } + } + + @Override + public boolean equals(Object obj) { + if (sameClassAs(obj) == false) { + return false; + } + SparseVectorQueryWrapper that = (SparseVectorQueryWrapper) obj; + return fieldName.equals(that.fieldName) && termsQuery.equals(that.termsQuery); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), fieldName, termsQuery); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilder.java similarity index 98% rename from x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilder.java index 6d972bcf5863a..81758ec5f9342 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilder.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.core.ml.search; import org.apache.lucene.search.Query; import org.apache.lucene.util.SetOnce; @@ -32,8 +32,6 @@ import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; -import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder; import java.io.IOException; import java.util.List; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java index 256c90c3eaa62..f41fcd77ce627 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java @@ -125,7 +125,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { } return (this.tokenPruningConfig == null) - ? WeightedTokensUtils.queryBuilderWithAllTokens(tokens, ft, context) + ? WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, tokens, ft, context) : WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, tokens, ft, context); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java index 133920416d227..1c2ac23151e6e 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java @@ -24,13 +24,18 @@ public final class WeightedTokensUtils { private WeightedTokensUtils() {} - public static Query queryBuilderWithAllTokens(List tokens, MappedFieldType ft, SearchExecutionContext context) { + public static Query queryBuilderWithAllTokens( + String fieldName, + List tokens, + MappedFieldType ft, + SearchExecutionContext context + ) { var qb = new BooleanQuery.Builder(); for (var token : tokens) { qb.add(new BoostQuery(ft.termQuery(token.token(), context), token.weight()), BooleanClause.Occur.SHOULD); } - return qb.setMinimumNumberShouldMatch(1).build(); + return new SparseVectorQueryWrapper(fieldName, qb.setMinimumNumberShouldMatch(1).build()); } public static Query queryBuilderWithPrunedTokens( @@ -64,7 +69,7 @@ public static Query queryBuilderWithPrunedTokens( } } - return qb.setMinimumNumberShouldMatch(1).build(); + return new SparseVectorQueryWrapper(fieldName, qb.setMinimumNumberShouldMatch(1).build()); } /** diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java similarity index 94% rename from x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java index 13cf6d87728a8..9872d95de024a 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.core.ml.search; import org.apache.lucene.document.Document; import org.apache.lucene.document.FeatureField; @@ -40,9 +40,6 @@ import org.elasticsearch.xpack.core.ml.action.InferModelAction; import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; -import org.elasticsearch.xpack.core.ml.search.WeightedToken; -import org.elasticsearch.xpack.ml.MachineLearning; import java.io.IOException; import java.lang.reflect.Method; @@ -50,7 +47,7 @@ import java.util.Collection; import java.util.List; -import static org.elasticsearch.xpack.ml.queries.SparseVectorQueryBuilder.QUERY_VECTOR_FIELD; +import static org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder.QUERY_VECTOR_FIELD; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.hasSize; @@ -102,7 +99,7 @@ private SparseVectorQueryBuilder createTestQueryBuilder(TokenPruningConfig token @Override protected Collection> getPlugins() { - return List.of(MachineLearning.class, MapperExtrasPlugin.class, XPackClientPlugin.class); + return List.of(MapperExtrasPlugin.class, XPackClientPlugin.class); } @Override @@ -156,8 +153,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws @Override protected void doAssertLuceneQuery(SparseVectorQueryBuilder queryBuilder, Query query, SearchExecutionContext context) { - assertThat(query, instanceOf(BooleanQuery.class)); - BooleanQuery booleanQuery = (BooleanQuery) query; + assertThat(query, instanceOf(SparseVectorQueryWrapper.class)); + var sparseQuery = (SparseVectorQueryWrapper) query; + assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery(); assertEquals(booleanQuery.getMinimumNumberShouldMatch(), 1); assertThat(booleanQuery.clauses(), hasSize(NUM_TOKENS)); @@ -233,11 +232,13 @@ public void testToQuery() throws IOException { private void testDoToQuery(SparseVectorQueryBuilder queryBuilder, SearchExecutionContext context) throws IOException { Query query = queryBuilder.doToQuery(context); + assertTrue(query instanceof SparseVectorQueryWrapper); + var sparseQuery = (SparseVectorQueryWrapper) query; if (queryBuilder.shouldPruneTokens()) { // It's possible that all documents were pruned for aggressive pruning configurations - assertTrue(query instanceof BooleanQuery || query instanceof MatchNoDocsQuery); + assertTrue(sparseQuery.getTermsQuery() instanceof BooleanQuery || sparseQuery.getTermsQuery() instanceof MatchNoDocsQuery); } else { - assertTrue(query instanceof BooleanQuery); + assertTrue(sparseQuery.getTermsQuery() instanceof BooleanQuery); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilderTests.java similarity index 96% rename from x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilderTests.java index 00d50e0d0d7bb..a0263003b72db 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilderTests.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.core.ml.search; import org.apache.lucene.document.Document; import org.apache.lucene.document.FeatureField; @@ -35,10 +35,6 @@ import org.elasticsearch.xpack.core.ml.action.InferModelAction; import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; -import org.elasticsearch.xpack.core.ml.search.WeightedToken; -import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder; -import org.elasticsearch.xpack.ml.MachineLearning; import java.io.IOException; import java.lang.reflect.Method; @@ -77,7 +73,7 @@ protected TextExpansionQueryBuilder doCreateTestQueryBuilder() { @Override protected Collection> getPlugins() { - return List.of(MachineLearning.class, MapperExtrasPlugin.class, XPackClientPlugin.class); + return List.of(MapperExtrasPlugin.class, XPackClientPlugin.class); } @Override @@ -129,8 +125,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws @Override protected void doAssertLuceneQuery(TextExpansionQueryBuilder queryBuilder, Query query, SearchExecutionContext context) { - assertThat(query, instanceOf(BooleanQuery.class)); - BooleanQuery booleanQuery = (BooleanQuery) query; + assertThat(query, instanceOf(SparseVectorQueryWrapper.class)); + var sparseQuery = (SparseVectorQueryWrapper) query; + assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery(); assertEquals(booleanQuery.getMinimumNumberShouldMatch(), 1); assertThat(booleanQuery.clauses(), hasSize(NUM_TOKENS)); diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java index 114ad90354c61..cded9b8dce5e2 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java @@ -271,8 +271,11 @@ public void testPruningIsAppliedCorrectly() throws IOException { } private void assertCorrectLuceneQuery(String name, Query query, List expectedFeatureFields) { - assertTrue(query instanceof BooleanQuery); - List booleanClauses = ((BooleanQuery) query).clauses(); + assertThat(query, instanceOf(SparseVectorQueryWrapper.class)); + var sparseQuery = (SparseVectorQueryWrapper) query; + assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery(); + List booleanClauses = booleanQuery.clauses(); assertEquals( name + " had " + booleanClauses.size() + " clauses, expected " + expectedFeatureFields.size(), expectedFeatureFields.size(), @@ -343,8 +346,10 @@ public void testMustRewrite() throws IOException { @Override protected void doAssertLuceneQuery(WeightedTokensQueryBuilder queryBuilder, Query query, SearchExecutionContext context) { - assertThat(query, instanceOf(BooleanQuery.class)); - BooleanQuery booleanQuery = (BooleanQuery) query; + assertThat(query, instanceOf(SparseVectorQueryWrapper.class)); + var sparseQuery = (SparseVectorQueryWrapper) query; + assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery(); assertEquals(booleanQuery.getMinimumNumberShouldMatch(), 1); assertThat(booleanQuery.clauses(), hasSize(NUM_TOKENS)); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 8363e0f5c19a1..c76e43790a259 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -48,7 +48,6 @@ import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; -import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.indices.AssociatedIndexDescriptor; import org.elasticsearch.indices.SystemIndexDescriptor; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; @@ -376,8 +375,6 @@ import org.elasticsearch.xpack.ml.process.MlMemoryTracker; import org.elasticsearch.xpack.ml.process.NativeController; import org.elasticsearch.xpack.ml.process.NativeStorageProvider; -import org.elasticsearch.xpack.ml.queries.SparseVectorQueryBuilder; -import org.elasticsearch.xpack.ml.queries.TextExpansionQueryBuilder; import org.elasticsearch.xpack.ml.rest.RestDeleteExpiredDataAction; import org.elasticsearch.xpack.ml.rest.RestMlInfoAction; import org.elasticsearch.xpack.ml.rest.RestMlMemoryAction; @@ -1764,22 +1761,6 @@ public List> getQueryVectorBuilders() { ); } - @Override - public List> getQueries() { - return List.of( - new QuerySpec( - TextExpansionQueryBuilder.NAME, - TextExpansionQueryBuilder::new, - TextExpansionQueryBuilder::fromXContent - ), - new QuerySpec( - SparseVectorQueryBuilder.NAME, - SparseVectorQueryBuilder::new, - SparseVectorQueryBuilder::fromXContent - ) - ); - } - private ContextParser checkAggLicense(ContextParser realParser, LicensedFeature.Momentary feature) { return (parser, name) -> { if (feature.check(getLicenseState()) == false) { From 5c1b3c7197603414614d72487c7327662d622420 Mon Sep 17 00:00:00 2001 From: mmahacek Date: Tue, 3 Dec 2024 06:10:02 -0800 Subject: [PATCH 18/28] Update email.asciidoc (#117867) Fix error in documentation. --- docs/reference/watcher/actions/email.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/watcher/actions/email.asciidoc b/docs/reference/watcher/actions/email.asciidoc index 16b9cc4be0628..efad500e0226b 100644 --- a/docs/reference/watcher/actions/email.asciidoc +++ b/docs/reference/watcher/actions/email.asciidoc @@ -129,7 +129,7 @@ killed by firewalls or load balancers in-between. | Name | Description | `format` | Attaches the watch data, equivalent to specifying `attach_data` in the watch configuration. Possible values are `json` or `yaml`. - Defaults to `json` if not specified. + Defaults to `yaml` if not specified. |====== From d3f0ae04e2b5e107686b9a19ffbe5312bacec753 Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Tue, 3 Dec 2024 15:10:57 +0100 Subject: [PATCH 19/28] Enhance LOOKUP JOIN csv-spec tests to cover more cases and fix several bugs found (#117843) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds several more tests to lookup-join.csv-spec, and fixes the following bugs: * FieldCaps on right hand side should ignore fieldNames method and just use "*" because currently the fieldNames search cannot handle lookup fields with aliases (should be fixed in a followup PR). * Stop using the lookup index in the ComputeService (so we don’t get both indices data coming in from the left, and other weird behaviour). * Ignore failing SearchStats checks on fields from the right hand side in the logical planner (so it does not plan EVAL field = null for all right hand fields). This should be fixed properly with the correct updates to TransportSearchShardsAction (or rather to making multiple use of that for each branch of the execution model). --- .../xpack/esql/ccq/MultiClusterSpecIT.java | 4 +- .../xpack/esql/CsvTestsDataLoader.java | 8 + .../resources/clientips_lookup-settings.json | 5 + .../src/main/resources/languages.csv | 2 +- .../src/main/resources/lookup-join.csv-spec | 224 +++++++++++++++++- .../src/main/resources/mapping-clientips.json | 16 +- .../src/main/resources/mapping-languages.json | 2 +- .../main/resources/mapping-message_types.json | 10 + .../src/main/resources/message_types.csv | 6 + .../message_types_lookup-settings.json | 5 + .../xpack/esql/action/EsqlCapabilities.java | 2 +- .../esql/enrich/LookupFromIndexService.java | 11 + .../local/ReplaceMissingFieldWithNull.java | 13 +- .../physical/local/InsertFieldExtraction.java | 15 +- .../esql/plan/physical/LookupJoinExec.java | 2 +- .../esql/planner/LocalExecutionPlanner.java | 1 + .../xpack/esql/planner/PlannerUtils.java | 11 +- .../xpack/esql/plugin/ComputeService.java | 54 ++++- .../xpack/esql/session/EsqlSession.java | 4 +- .../elasticsearch/xpack/esql/CsvTests.java | 2 +- 20 files changed, 355 insertions(+), 42 deletions(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java index af5eadc7358a2..19b29764559d1 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java @@ -47,7 +47,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V2; -import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V3; +import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V4; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_PLANNING_V1; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METADATA_FIELDS_REMOTE_TEST; import static org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase.Mode.SYNC; @@ -125,7 +125,7 @@ protected void shouldSkipTest(String testName) throws IOException { assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName())); - assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V3.capabilityName())); + assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V4.capabilityName())); } private TestFeatureService remoteFeaturesService() throws IOException { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 9c987a02aca2d..f9d8cf00695c1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -72,6 +72,11 @@ public class CsvTestsDataLoader { .withTypeMapping(Map.of("@timestamp", "date_nanos")); private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset("missing_ip_sample_data"); private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips"); + private static final TestsDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup") + .withSetting("clientips_lookup-settings.json"); + private static final TestsDataset MESSAGE_TYPES = new TestsDataset("message_types"); + private static final TestsDataset MESSAGE_TYPES_LOOKUP = MESSAGE_TYPES.withIndex("message_types_lookup") + .withSetting("message_types_lookup-settings.json"); private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr"); private static final TestsDataset AGES = new TestsDataset("ages"); private static final TestsDataset HEIGHTS = new TestsDataset("heights"); @@ -112,6 +117,9 @@ public class CsvTestsDataLoader { Map.entry(SAMPLE_DATA_TS_NANOS.indexName, SAMPLE_DATA_TS_NANOS), Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA), Map.entry(CLIENT_IPS.indexName, CLIENT_IPS), + Map.entry(CLIENT_IPS_LOOKUP.indexName, CLIENT_IPS_LOOKUP), + Map.entry(MESSAGE_TYPES.indexName, MESSAGE_TYPES), + Map.entry(MESSAGE_TYPES_LOOKUP.indexName, MESSAGE_TYPES_LOOKUP), Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR), Map.entry(AGES.indexName, AGES), Map.entry(HEIGHTS.indexName, HEIGHTS), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json new file mode 100644 index 0000000000000..b73d1f9accf92 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json @@ -0,0 +1,5 @@ +{ + "index": { + "mode": "lookup" + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv index 3ee60b79970ba..1c1a9776df6cc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv @@ -1,4 +1,4 @@ -language_code:keyword,language_name:keyword +language_code:integer,language_name:keyword 1,English 2,French 3,Spanish diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 5de353978b307..f2800456ceb33 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -4,8 +4,8 @@ // //TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) -basicOnTheDataNode-Ignore -required_capability: join_lookup_v3 +basicOnTheDataNode +required_capability: join_lookup_v4 FROM employees | EVAL language_code = languages @@ -21,19 +21,19 @@ emp_no:integer | language_code:integer | language_name:keyword 10093 | 3 | Spanish ; -basicRow-Ignore -required_capability: join_lookup_v3 +basicRow +required_capability: join_lookup_v4 ROW language_code = 1 | LOOKUP JOIN languages_lookup ON language_code ; -language_code:keyword | language_name:keyword +language_code:integer | language_name:keyword 1 | English ; basicOnTheCoordinator -required_capability: join_lookup_v3 +required_capability: join_lookup_v4 FROM employees | SORT emp_no @@ -49,9 +49,8 @@ emp_no:integer | language_code:integer | language_name:keyword 10003 | 4 | German ; -//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) -subsequentEvalOnTheDataNode-Ignore -required_capability: join_lookup_v3 +subsequentEvalOnTheDataNode +required_capability: join_lookup_v4 FROM employees | EVAL language_code = languages @@ -69,7 +68,7 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x ; subsequentEvalOnTheCoordinator -required_capability: join_lookup_v3 +required_capability: join_lookup_v4 FROM employees | SORT emp_no @@ -85,3 +84,208 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x 10002 | 5 | null | 10 10003 | 4 | german | 8 ; + +lookupIPFromRow +required_capability: join_lookup_v4 + +ROW left = "left", client_ip = "172.21.0.5", right = "right" +| LOOKUP JOIN clientips_lookup ON client_ip +; + +left:keyword | client_ip:keyword | right:keyword | env:keyword +left | 172.21.0.5 | right | Development +; + +lookupIPFromRowWithShadowing +required_capability: join_lookup_v4 + +ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right" +| LOOKUP JOIN clientips_lookup ON client_ip +; + +left:keyword | client_ip:keyword | right:keyword | env:keyword +left | 172.21.0.5 | right | Development +; + +lookupIPFromRowWithShadowingKeep +required_capability: join_lookup_v4 + +ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right" +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| KEEP left, client_ip, right, env +; + +left:keyword | client_ip:keyword | right:keyword | env:keyword +left | 172.21.0.5 | right | Development +; + +lookupIPFromIndex +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +; + +@timestamp:date | event_duration:long | message:keyword | client_ip:keyword | env:keyword +2023-10-23T13:55:01.543Z | 1756467 | Connected to 10.1.0.1 | 172.21.3.15 | Production +2023-10-23T13:53:55.832Z | 5033755 | Connection error | 172.21.3.15 | Production +2023-10-23T13:52:55.015Z | 8268153 | Connection error | 172.21.3.15 | Production +2023-10-23T13:51:54.732Z | 725448 | Connection error | 172.21.3.15 | Production +2023-10-23T13:33:34.937Z | 1232382 | Disconnected | 172.21.0.5 | Development +2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2 | 172.21.2.113 | QA +2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3 | 172.21.2.162 | QA +; + +lookupIPFromIndexKeep +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| KEEP @timestamp, client_ip, event_duration, message, env +; + +@timestamp:date | client_ip:keyword | event_duration:long | message:keyword | env:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Production +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Production +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Production +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Production +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Development +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | QA +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | QA +; + +lookupIPFromIndexStats +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| STATS count = count(client_ip) BY env +| SORT count DESC, env ASC +; + +count:long | env:keyword +4 | Production +2 | QA +1 | Development +; + +lookupIPFromIndexStatsKeep +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| KEEP client_ip, env +| STATS count = count(client_ip) BY env +| SORT count DESC, env ASC +; + +count:long | env:keyword +4 | Production +2 | QA +1 | Development +; + +lookupMessageFromRow +required_capability: join_lookup_v4 + +ROW left = "left", message = "Connected to 10.1.0.1", right = "right" +| LOOKUP JOIN message_types_lookup ON message +; + +left:keyword | message:keyword | right:keyword | type:keyword +left | Connected to 10.1.0.1 | right | Success +; + +lookupMessageFromRowWithShadowing +required_capability: join_lookup_v4 + +ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right" +| LOOKUP JOIN message_types_lookup ON message +; + +left:keyword | message:keyword | right:keyword | type:keyword +left | Connected to 10.1.0.1 | right | Success +; + +lookupMessageFromRowWithShadowingKeep +required_capability: join_lookup_v4 + +ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right" +| LOOKUP JOIN message_types_lookup ON message +| KEEP left, message, right, type +; + +left:keyword | message:keyword | right:keyword | type:keyword +left | Connected to 10.1.0.1 | right | Success +; + +lookupMessageFromIndex +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +; + +@timestamp:date | client_ip:ip | event_duration:long | message:keyword | type:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Success +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Error +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Error +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Disconnected +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | Success +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success +; + +lookupMessageFromIndexKeep +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +| KEEP @timestamp, client_ip, event_duration, message, type +; + +@timestamp:date | client_ip:ip | event_duration:long | message:keyword | type:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Success +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Error +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Error +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Disconnected +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | Success +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success +; + +lookupMessageFromIndexStats +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +| STATS count = count(message) BY type +| SORT count DESC, type ASC +; + +count:long | type:keyword +3 | Error +3 | Success +1 | Disconnected +; + +lookupMessageFromIndexStatsKeep +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +| KEEP message, type +| STATS count = count(message) BY type +| SORT count DESC, type ASC +; + +count:long | type:keyword +3 | Error +3 | Success +1 | Disconnected +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json index 39bd37ce26c7f..d491810f9134e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json @@ -1,10 +1,10 @@ { - "properties": { - "client_ip": { - "type": "keyword" - }, - "env": { - "type": "keyword" - } + "properties": { + "client_ip": { + "type": "keyword" + }, + "env": { + "type": "keyword" } - } \ No newline at end of file + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json index 0cec0caf17304..327b692369242 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json @@ -1,7 +1,7 @@ { "properties" : { "language_code" : { - "type" : "keyword" + "type" : "integer" }, "language_name" : { "type" : "keyword" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json new file mode 100644 index 0000000000000..af545b48da3d2 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json @@ -0,0 +1,10 @@ +{ + "properties": { + "message": { + "type": "keyword" + }, + "type": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv new file mode 100644 index 0000000000000..8e00485771445 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv @@ -0,0 +1,6 @@ +message:keyword,type:keyword +Connection error,Error +Disconnected,Disconnected +Connected to 10.1.0.1,Success +Connected to 10.1.0.2,Success +Connected to 10.1.0.3,Success diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json new file mode 100644 index 0000000000000..b73d1f9accf92 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json @@ -0,0 +1,5 @@ +{ + "index": { + "mode": "lookup" + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index b5d6dd8584e8c..4845c7061949b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -525,7 +525,7 @@ public enum Cap { /** * LOOKUP JOIN */ - JOIN_LOOKUP_V3(Build.current().isSnapshot()), + JOIN_LOOKUP_V4(Build.current().isSnapshot()), /** * Fix for https://github.com/elastic/elasticsearch/issues/117054 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java index 849e8e890e248..4f429c46b9123 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java @@ -24,6 +24,7 @@ import org.elasticsearch.tasks.TaskId; import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.security.authz.privilege.ClusterPrivilegeResolver; +import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.action.EsqlQueryAction; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -78,9 +79,19 @@ protected TransportRequest transportRequest(LookupFromIndexService.Request reque @Override protected QueryList queryList(TransportRequest request, SearchExecutionContext context, Block inputBlock, DataType inputDataType) { MappedFieldType fieldType = context.getFieldType(request.matchField); + validateTypes(request.inputDataType, fieldType); return termQueryList(fieldType, context, inputBlock, inputDataType); } + private static void validateTypes(DataType inputDataType, MappedFieldType fieldType) { + // TODO: consider supporting implicit type conversion as done in ENRICH for some types + if (fieldType.typeName().equals(inputDataType.typeName()) == false) { + throw new EsqlIllegalArgumentException( + "LOOKUP JOIN match and input types are incompatible: match[" + fieldType.typeName() + "], input[" + inputDataType + "]" + ); + } + } + public static class Request extends AbstractLookupService.Request { private final String matchField; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index 0fa6d61a0ca9b..096f72f7694e1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -9,6 +9,7 @@ import org.elasticsearch.common.util.Maps; import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; @@ -23,6 +24,7 @@ import org.elasticsearch.xpack.esql.plan.logical.Project; import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; import org.elasticsearch.xpack.esql.plan.logical.TopN; +import org.elasticsearch.xpack.esql.plan.logical.join.Join; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import org.elasticsearch.xpack.esql.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.stats.SearchStats; @@ -56,10 +58,13 @@ else if (plan instanceof Project project) { var projections = project.projections(); List newProjections = new ArrayList<>(projections.size()); Map nullLiteral = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size()); + AttributeSet joinAttributes = joinAttributes(project); for (NamedExpression projection : projections) { // Do not use the attribute name, this can deviate from the field name for union types. - if (projection instanceof FieldAttribute f && stats.exists(f.fieldName()) == false) { + if (projection instanceof FieldAttribute f && stats.exists(f.fieldName()) == false && joinAttributes.contains(f) == false) { + // TODO: Should do a searchStats lookup for join attributes instead of just ignoring them here + // See TransportSearchShardsAction DataType dt = f.dataType(); Alias nullAlias = nullLiteral.get(f.dataType()); // save the first field as null (per datatype) @@ -96,4 +101,10 @@ else if (plan instanceof Project project) { return plan; } + + private AttributeSet joinAttributes(Project project) { + var attributes = new AttributeSet(); + project.forEachDown(Join.class, j -> j.right().forEachDown(EsRelation.class, p -> attributes.addAll(p.output()))); + return attributes; + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java index cafe3726f92ac..dc32a4ad3c282 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java @@ -23,14 +23,12 @@ import org.elasticsearch.xpack.esql.rule.Rule; import java.util.ArrayList; -import java.util.Collections; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; /** - * * Materialize the concrete fields that need to be extracted from the storage until the last possible moment. * Expects the local plan to already have a projection containing the fields needed upstream. *

@@ -102,15 +100,18 @@ public PhysicalPlan apply(PhysicalPlan plan) { private static Set missingAttributes(PhysicalPlan p) { var missing = new LinkedHashSet(); - var inputSet = p.inputSet(); + var input = p.inputSet(); - // TODO: We need to extract whatever fields are missing from the left hand side. - // skip the lookup join since the right side is always materialized and a projection + // For LOOKUP JOIN we only need field-extraction on left fields used to match, since the right side is always materialized if (p instanceof LookupJoinExec join) { - return Collections.emptySet(); + join.leftFields().forEach(f -> { + if (input.contains(f) == false) { + missing.add(f); + } + }); + return missing; } - var input = inputSet; // collect field attributes used inside expressions // TODO: Rather than going over all expressions manually, this should just call .references() p.forEachExpression(TypedAttribute.class, f -> { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java index 2d3caa27da4cd..8b1cc047309e7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java @@ -102,7 +102,7 @@ public List output() { @Override public PhysicalPlan estimateRowSize(State state) { - state.add(false, output()); + state.add(false, addedFields); return this; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index a8afaa4d8119b..8c0488afdd42a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -565,6 +565,7 @@ private PhysicalOperation planHashJoin(HashJoinExec join, LocalExecutionPlannerC private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlannerContext context) { PhysicalOperation source = plan(join.left(), context); + // TODO: The source builder includes incoming fields including the ones we're going to drop Layout.Builder layoutBuilder = source.layout.builder(); for (Attribute f : join.addedFields()) { layoutBuilder.append(f); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index f4ada1442efe5..37f89891860d8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -14,6 +14,7 @@ import org.elasticsearch.compute.data.BlockFactory; import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.SearchExecutionContext; @@ -25,6 +26,7 @@ import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.core.util.Queries; +import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext; import org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizer; import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; @@ -117,12 +119,17 @@ public static String[] planOriginalIndices(PhysicalPlan plan) { var indices = new LinkedHashSet(); plan.forEachUp( FragmentExec.class, - f -> f.fragment() - .forEachUp(EsRelation.class, r -> indices.addAll(asList(Strings.commaDelimitedListToStringArray(r.index().name())))) + f -> f.fragment().forEachUp(EsRelation.class, r -> addOriginalIndexIfNotLookup(indices, r.index())) ); return indices.toArray(String[]::new); } + private static void addOriginalIndexIfNotLookup(Set indices, EsIndex index) { + if (index.indexNameWithModes().get(index.name()) != IndexMode.LOOKUP) { + indices.addAll(asList(Strings.commaDelimitedListToStringArray(index.name()))); + } + } + public static PhysicalPlan localPlan(List searchContexts, Configuration configuration, PhysicalPlan plan) { return localPlan(configuration, plan, SearchContextStats.from(searchContexts)); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java index 9aea1577a4137..c9c8635a60f57 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java @@ -62,8 +62,12 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.enrich.EnrichLookupService; import org.elasticsearch.xpack.esql.enrich.LookupFromIndexService; +import org.elasticsearch.xpack.esql.plan.logical.EsRelation; +import org.elasticsearch.xpack.esql.plan.logical.join.Join; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec; +import org.elasticsearch.xpack.esql.plan.physical.FragmentExec; +import org.elasticsearch.xpack.esql.plan.physical.LookupJoinExec; import org.elasticsearch.xpack.esql.plan.physical.OutputExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders; @@ -76,6 +80,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -160,9 +165,11 @@ public void execute( Map clusterToConcreteIndices = transportService.getRemoteClusterService() .groupIndices(SearchRequest.DEFAULT_INDICES_OPTIONS, PlannerUtils.planConcreteIndices(physicalPlan).toArray(String[]::new)); QueryPragmas queryPragmas = configuration.pragmas(); + Set lookupIndexNames = findLookupIndexNames(physicalPlan); + Set concreteIndexNames = selectConcreteIndices(clusterToConcreteIndices, lookupIndexNames); if (dataNodePlan == null) { - if (clusterToConcreteIndices.values().stream().allMatch(v -> v.indices().length == 0) == false) { - String error = "expected no concrete indices without data node plan; got " + clusterToConcreteIndices; + if (concreteIndexNames.isEmpty() == false) { + String error = "expected no concrete indices without data node plan; got " + concreteIndexNames; assert false : error; listener.onFailure(new IllegalStateException(error)); return; @@ -185,7 +192,7 @@ public void execute( return; } } else { - if (clusterToConcreteIndices.values().stream().allMatch(v -> v.indices().length == 0)) { + if (concreteIndexNames.isEmpty()) { var error = "expected concrete indices with data node plan but got empty; data node plan " + dataNodePlan; assert false : error; listener.onFailure(new IllegalStateException(error)); @@ -259,6 +266,42 @@ public void execute( } } + private Set selectConcreteIndices(Map clusterToConcreteIndices, Set indexesToIgnore) { + Set concreteIndexNames = new HashSet<>(); + clusterToConcreteIndices.forEach((clusterAlias, concreteIndices) -> { + for (String index : concreteIndices.indices()) { + if (indexesToIgnore.contains(index) == false) { + concreteIndexNames.add(index); + } + } + }); + return concreteIndexNames; + } + + private Set findLookupIndexNames(PhysicalPlan physicalPlan) { + Set lookupIndexNames = new HashSet<>(); + // When planning JOIN on the coordinator node: "LookupJoinExec.lookup()->FragmentExec.fragment()->EsRelation.index()" + physicalPlan.forEachDown( + LookupJoinExec.class, + lookupJoinExec -> lookupJoinExec.lookup() + .forEachDown( + FragmentExec.class, + frag -> frag.fragment().forEachDown(EsRelation.class, esRelation -> lookupIndexNames.add(esRelation.index().name())) + ) + ); + // When planning JOIN on the data node: "FragmentExec.fragment()->Join.right()->EsRelation.index()" + // TODO this only works for LEFT join, so we still need to support RIGHT join + physicalPlan.forEachDown( + FragmentExec.class, + fragmentExec -> fragmentExec.fragment() + .forEachDown( + Join.class, + join -> join.right().forEachDown(EsRelation.class, esRelation -> lookupIndexNames.add(esRelation.index().name())) + ) + ); + return lookupIndexNames; + } + // For queries like: FROM logs* | LIMIT 0 (including cross-cluster LIMIT 0 queries) private static void updateShardCountForCoordinatorOnlyQuery(EsqlExecutionInfo execInfo) { if (execInfo.isCrossClusterSearch()) { @@ -562,8 +605,9 @@ record DataNode(Transport.Connection connection, List shardIds, Map dataNodes, int totalShards, int skippedShards) {} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 3b0f9ab578df9..3d1ed8f70eae0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -178,7 +178,7 @@ public void executeOptimizedPlan( executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener); } - private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}; + private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {} private void executeSubPlans( PhysicalPlan physicalPlan, @@ -313,7 +313,7 @@ private void preAnalyze( // First resolve the lookup indices, then the main indices preAnalyzeLookupIndices( preAnalysis.lookupIndices, - fieldNames, + Set.of("*"), // Current LOOKUP JOIN syntax does not allow for field selection l.delegateFailureAndWrap( (lx, lookupIndexResolution) -> preAnalyzeIndices( indices, diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index df974a88a4c57..2e8b856cf82a6 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -263,7 +263,7 @@ public final void test() throws Throwable { ); assumeFalse( "lookup join disabled for csv tests", - testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V3.capabilityName()) + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V4.capabilityName()) ); if (Build.current().isSnapshot()) { assertThat( From ae1b1320996f8fb636f8f377bc9fa7b7743230a6 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Tue, 3 Dec 2024 06:34:13 -0800 Subject: [PATCH 20/28] Only check non-negative stats for active, current and queue (#117834) In SimpleThreadPoolIT, stats are gathered for each threadpool being checked, then measurements are collected. Some stats may go up or down depending on other background tasks outside the test. This commit adjusts the check for those stats to only check collecting non-negative values. closes #108320 --- muted-tests.yml | 3 --- .../org/elasticsearch/threadpool/SimpleThreadPoolIT.java | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index cf39eae210f88..3652173327e84 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -141,9 +141,6 @@ tests: - class: org.elasticsearch.xpack.shutdown.NodeShutdownIT method: testAllocationPreventedForRemoval issue: https://github.com/elastic/elasticsearch/issues/116363 -- class: org.elasticsearch.threadpool.SimpleThreadPoolIT - method: testThreadPoolMetrics - issue: https://github.com/elastic/elasticsearch/issues/108320 - class: org.elasticsearch.xpack.downsample.ILMDownsampleDisruptionIT method: testILMDownsampleRollingRestart issue: https://github.com/elastic/elasticsearch/issues/114233 diff --git a/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java b/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java index be875421e036f..d2e021a8d7436 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java @@ -167,10 +167,10 @@ public void testThreadPoolMetrics() throws Exception { tps[0].forEach(stats -> { Map threadPoolStats = List.of( Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_COMPLETED, stats.completed()), - Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_ACTIVE, (long) stats.active()), - Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_CURRENT, (long) stats.threads()), + Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_ACTIVE, 0L), + Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_CURRENT, 0L), Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_LARGEST, (long) stats.largest()), - Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_QUEUE, (long) stats.queue()) + Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_QUEUE, 0L) ).stream().collect(toUnmodifiableSortedMap(e -> stats.name() + e.getKey(), Entry::getValue)); Function> measurementExtractor = name -> { From 5ed106a79b278052842865d2e63c4817230af7ab Mon Sep 17 00:00:00 2001 From: Artem Prigoda Date: Tue, 3 Dec 2024 16:16:03 +0100 Subject: [PATCH 21/28] [test] Remove synchronization from InternalTestCluster#getInstance (#117780) The map of nodes is volatile and immutable and can be ready without synchronization. Getting a class's instance from the node's injector is also thread safe. Doing so prevents deadlocks if we restart the node and have a disruption scheme that internally calls `getInstance` from another thread. ``` 2> "elasticsearch[StatelessClusterIntegrityStressIT][server][T#1]" ID=3490 BLOCKED on org.elasticsearch.test.InternalTestCluster@18a6d098 owned by "elasticsearch[StatelessClusterIntegrityStressIT][server][T#2]" ID=3492 2> at app//org.elasticsearch.test.InternalTestCluster.getInstance(InternalTestCluster.java:1653) 2> - blocked on org.elasticsearch.test.InternalTestCluster@18a6d098 2> at app//org.elasticsearch.test.InternalTestCluster.getInstance(InternalTestCluster.java:1620) 2> at app//org.elasticsearch.test.disruption.NetworkDisruption.transport(NetworkDisruption.java:172) 2> at app//org.elasticsearch.test.disruption.NetworkDisruption.applyToNodes(NetworkDisruption.java:157) 2> at app//org.elasticsearch.test.disruption.Net 2> workDisruption.startDisrupting(NetworkDisruption.java:133) 2> "elasticsearch[StatelessClusterIntegrityStressIT][server][T#2]" ID=3492 BLOCKED on org.elasticsearch.test.disruption.NetworkDisruption@60fd3a1e owned by "elasticsearch[StatelessClusterIntegrityStressIT][server][T#1]" ID=3490 2> at app//org.elasticsearch.test.disruption.NetworkDisruption.applyToNode(NetworkDisruption.java:116) 2> - blocked on org.elasticsearch.test.disruption.NetworkDisruption@60fd3a1e 2> at app//org.elasticsearch.test.InternalTestCluster.applyDisruptionSchemeToNode(InternalTestCluster.java:2307) 2> at app//org.elasticsearch.test.InternalTestCluster.publishNode(InternalTestCluster.java:2258) 2> - locked org.elasticsearch.test.InternalTestCluster@18a6d098 2> at app//org.elasticsearch.test.InternalTestCluster.restartNode(InternalTestCluster.java:1901) 2> at app//org.elasticsearch.test.InternalTestCluster.restartNode(InternalTestCluster.java:1863) 2> - locked org.elasticsearch.test.InternalTestCluster@18a6d098 ``` --- .../main/java/org/elasticsearch/test/InternalTestCluster.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java index 7a04384298933..6d46605e201f9 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java @@ -1649,7 +1649,7 @@ public T getAnyMasterNodeInstance(Class clazz) { return getInstance(clazz, MASTER_NODE_PREDICATE); } - private synchronized T getInstance(Class clazz, Predicate predicate) { + private T getInstance(Class clazz, Predicate predicate) { NodeAndClient randomNodeAndClient = getRandomNodeAndClient(predicate); if (randomNodeAndClient == null) { throw new AssertionError("no node matches [" + predicate + "]"); From 267dc1a41d49b11c6470ae1f83091debfc49e95f Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Tue, 3 Dec 2024 07:27:44 -0800 Subject: [PATCH 22/28] Fix BWC for ES|QL cluster request (#117865) We identified a BWC bug in the cluster computer request. Specifically, the indices options were not properly selected for requests from an older querying cluster. This caused the search_shards API on the remote cluster to use restricted indices options, leading to failures when resolving wildcard index patterns. Our tests didn't catch this issue because the current BWC tests for cross-cluster queries only cover one direction: the querying cluster on the current version and the remote cluster on a compatible version. This PR fixes the issue and expands BWC tests to support both directions: the querying cluster on the current version with the remote cluster on a compatible version, and vice versa. --- docs/changelog/117865.yaml | 5 + .../qa/server/multi-clusters/build.gradle | 17 +- .../xpack/esql/ccq/Clusters.java | 19 +- .../xpack/esql/ccq/EsqlRestValidationIT.java | 7 + .../xpack/esql/ccq/MultiClusterSpecIT.java | 7 +- .../xpack/esql/ccq/MultiClustersIT.java | 104 ++++++--- .../xpack/esql/qa/single_node/RestEsqlIT.java | 1 - .../xpack/esql/plugin/RemoteClusterPlan.java | 21 +- .../esql/plugin/ClusterRequestTests.java | 206 ++++++++++++++++++ 9 files changed, 345 insertions(+), 42 deletions(-) create mode 100644 docs/changelog/117865.yaml create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java diff --git a/docs/changelog/117865.yaml b/docs/changelog/117865.yaml new file mode 100644 index 0000000000000..33dc497725f92 --- /dev/null +++ b/docs/changelog/117865.yaml @@ -0,0 +1,5 @@ +pr: 117865 +summary: Fix BWC for ES|QL cluster request +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle b/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle index 7f3859e2229ef..d80cb764ca433 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle +++ b/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle @@ -23,9 +23,22 @@ def supportedVersion = bwcVersion -> { } buildParams.bwcVersions.withWireCompatible(supportedVersion) { bwcVersion, baseName -> - tasks.register(bwcTaskName(bwcVersion), StandaloneRestIntegTestTask) { + tasks.register("${baseName}#newToOld", StandaloneRestIntegTestTask) { + usesBwcDistribution(bwcVersion) + systemProperty("tests.version.remote_cluster", bwcVersion) + maxParallelForks = 1 + } + + tasks.register("${baseName}#oldToNew", StandaloneRestIntegTestTask) { usesBwcDistribution(bwcVersion) - systemProperty("tests.old_cluster_version", bwcVersion) + systemProperty("tests.version.local_cluster", bwcVersion) + maxParallelForks = 1 + } + + // TODO: avoid running tests twice with the current version + tasks.register(bwcTaskName(bwcVersion), StandaloneRestIntegTestTask) { + dependsOn tasks.named("${baseName}#oldToNew") + dependsOn tasks.named("${baseName}#newToOld") maxParallelForks = 1 } } diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java index fa8cb49c59aed..5f3f135810322 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java @@ -20,7 +20,7 @@ public static ElasticsearchCluster remoteCluster() { return ElasticsearchCluster.local() .name(REMOTE_CLUSTER_NAME) .distribution(DistributionType.DEFAULT) - .version(Version.fromString(System.getProperty("tests.old_cluster_version"))) + .version(distributionVersion("tests.version.remote_cluster")) .nodes(2) .setting("node.roles", "[data,ingest,master]") .setting("xpack.security.enabled", "false") @@ -34,7 +34,7 @@ public static ElasticsearchCluster localCluster(ElasticsearchCluster remoteClust return ElasticsearchCluster.local() .name(LOCAL_CLUSTER_NAME) .distribution(DistributionType.DEFAULT) - .version(Version.CURRENT) + .version(distributionVersion("tests.version.local_cluster")) .nodes(2) .setting("xpack.security.enabled", "false") .setting("xpack.license.self_generated.type", "trial") @@ -46,7 +46,18 @@ public static ElasticsearchCluster localCluster(ElasticsearchCluster remoteClust .build(); } - public static org.elasticsearch.Version oldVersion() { - return org.elasticsearch.Version.fromString(System.getProperty("tests.old_cluster_version")); + public static org.elasticsearch.Version localClusterVersion() { + String prop = System.getProperty("tests.version.local_cluster"); + return prop != null ? org.elasticsearch.Version.fromString(prop) : org.elasticsearch.Version.CURRENT; + } + + public static org.elasticsearch.Version remoteClusterVersion() { + String prop = System.getProperty("tests.version.remote_cluster"); + return prop != null ? org.elasticsearch.Version.fromString(prop) : org.elasticsearch.Version.CURRENT; + } + + private static Version distributionVersion(String key) { + final String val = System.getProperty(key); + return val != null ? Version.fromString(val) : Version.CURRENT; } } diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java index 21307c5362417..55500aa1c9537 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java @@ -10,12 +10,14 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import org.apache.http.HttpHost; +import org.elasticsearch.Version; import org.elasticsearch.client.RestClient; import org.elasticsearch.core.IOUtils; import org.elasticsearch.test.TestClustersThreadFilter; import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.xpack.esql.qa.rest.EsqlRestValidationTestCase; import org.junit.AfterClass; +import org.junit.Before; import org.junit.ClassRule; import org.junit.rules.RuleChain; import org.junit.rules.TestRule; @@ -78,4 +80,9 @@ private RestClient remoteClusterClient() throws IOException { } return remoteClient; } + + @Before + public void skipTestOnOldVersions() { + assumeTrue("skip on old versions", Clusters.localClusterVersion().equals(Version.V_8_16_0)); + } } diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java index 19b29764559d1..e658d169cbce8 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java @@ -12,6 +12,7 @@ import org.apache.http.HttpEntity; import org.apache.http.HttpHost; +import org.elasticsearch.Version; import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; import org.elasticsearch.client.RestClient; @@ -118,10 +119,8 @@ protected void shouldSkipTest(String testName) throws IOException { // Do not run tests including "METADATA _index" unless marked with metadata_fields_remote_test, // because they may produce inconsistent results with multiple clusters. assumeFalse("can't test with _index metadata", (remoteMetadata == false) && hasIndexMetadata(testCase.query)); - assumeTrue( - "Test " + testName + " is skipped on " + Clusters.oldVersion(), - isEnabled(testName, instructions, Clusters.oldVersion()) - ); + Version oldVersion = Version.min(Clusters.localClusterVersion(), Clusters.remoteClusterVersion()); + assumeTrue("Test " + testName + " is skipped on " + oldVersion, isEnabled(testName, instructions, oldVersion)); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName())); diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java index dbeaed1596eff..452f40baa34a8 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java @@ -10,6 +10,7 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import org.apache.http.HttpHost; +import org.elasticsearch.Version; import org.elasticsearch.client.Request; import org.elasticsearch.client.RestClient; import org.elasticsearch.common.Strings; @@ -29,7 +30,6 @@ import java.io.IOException; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -127,10 +127,12 @@ void indexDocs(RestClient client, String index, List docs) throws IOExcepti } private Map run(String query, boolean includeCCSMetadata) throws IOException { - Map resp = runEsql( - new RestEsqlTestCase.RequestObjectBuilder().query(query).includeCCSMetadata(includeCCSMetadata).build() - ); - logger.info("--> query {} response {}", query, resp); + var queryBuilder = new RestEsqlTestCase.RequestObjectBuilder().query(query); + if (includeCCSMetadata) { + queryBuilder.includeCCSMetadata(true); + } + Map resp = runEsql(queryBuilder.build()); + logger.info("--> query {} response {}", queryBuilder, resp); return resp; } @@ -156,7 +158,7 @@ private Map runEsql(RestEsqlTestCase.RequestObjectBuilder reques public void testCount() throws Exception { { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run("FROM test-local-index,*:test-remote-index | STATS c = COUNT(*)", includeCCSMetadata); var columns = List.of(Map.of("name", "c", "type", "long")); var values = List.of(List.of(localDocs.size() + remoteDocs.size())); @@ -165,13 +167,16 @@ public void testCount() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, false); } } { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run("FROM *:test-remote-index | STATS c = COUNT(*)", includeCCSMetadata); var columns = List.of(Map.of("name", "c", "type", "long")); var values = List.of(List.of(remoteDocs.size())); @@ -180,7 +185,10 @@ public void testCount() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, true); } @@ -189,7 +197,7 @@ public void testCount() throws Exception { public void testUngroupedAggs() throws Exception { { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run("FROM test-local-index,*:test-remote-index | STATS total = SUM(data)", includeCCSMetadata); var columns = List.of(Map.of("name", "total", "type", "long")); long sum = Stream.concat(localDocs.stream(), remoteDocs.stream()).mapToLong(d -> d.data).sum(); @@ -200,13 +208,16 @@ public void testUngroupedAggs() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, false); } } { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run("FROM *:test-remote-index | STATS total = SUM(data)", includeCCSMetadata); var columns = List.of(Map.of("name", "total", "type", "long")); long sum = remoteDocs.stream().mapToLong(d -> d.data).sum(); @@ -216,12 +227,16 @@ public void testUngroupedAggs() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, true); } } { + assumeTrue("requires ccs metadata", ccsMetadataAvailable()); Map result = runWithColumnarAndIncludeCCSMetadata("FROM *:test-remote-index | STATS total = SUM(data)"); var columns = List.of(Map.of("name", "total", "type", "long")); long sum = remoteDocs.stream().mapToLong(d -> d.data).sum(); @@ -293,7 +308,7 @@ private void assertClusterDetailsMap(Map result, boolean remoteO public void testGroupedAggs() throws Exception { { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run( "FROM test-local-index,*:test-remote-index | STATS total = SUM(data) BY color | SORT color", includeCCSMetadata @@ -311,13 +326,16 @@ public void testGroupedAggs() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, false); } } { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run( "FROM *:test-remote-index | STATS total = SUM(data) by color | SORT color", includeCCSMetadata @@ -336,29 +354,57 @@ public void testGroupedAggs() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, true); } } } + public void testIndexPattern() throws Exception { + { + String indexPattern = randomFrom( + "test-local-index,*:test-remote-index", + "test-local-index,*:test-remote-*", + "test-local-index,*:test-*", + "test-*,*:test-remote-index" + ); + Map result = run("FROM " + indexPattern + " | STATS c = COUNT(*)", false); + var columns = List.of(Map.of("name", "c", "type", "long")); + var values = List.of(List.of(localDocs.size() + remoteDocs.size())); + MapMatcher mapMatcher = matchesMap(); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); + } + { + String indexPattern = randomFrom("*:test-remote-index", "*:test-remote-*", "*:test-*"); + Map result = run("FROM " + indexPattern + " | STATS c = COUNT(*)", false); + var columns = List.of(Map.of("name", "c", "type", "long")); + var values = List.of(List.of(remoteDocs.size())); + + MapMatcher mapMatcher = matchesMap(); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); + } + } + private RestClient remoteClusterClient() throws IOException { var clusterHosts = parseClusterHosts(remoteCluster.getHttpAddresses()); return buildClient(restClientSettings(), clusterHosts.toArray(new HttpHost[0])); } - private TestFeatureService remoteFeaturesService() throws IOException { - if (remoteFeaturesService == null) { - try (RestClient remoteClient = remoteClusterClient()) { - var remoteNodeVersions = readVersionsFromNodesInfo(remoteClient); - var semanticNodeVersions = remoteNodeVersions.stream() - .map(ESRestTestCase::parseLegacyVersion) - .flatMap(Optional::stream) - .collect(Collectors.toSet()); - remoteFeaturesService = createTestFeatureService(getClusterStateFeatures(remoteClient), semanticNodeVersions); - } - } - return remoteFeaturesService; + private static boolean ccsMetadataAvailable() { + return Clusters.localClusterVersion().onOrAfter(Version.V_8_16_0); + } + + private static boolean includeCCSMetadata() { + return ccsMetadataAvailable() && randomBoolean(); } } diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java index 9a184b9a620fd..050259bbb5b5c 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java @@ -76,7 +76,6 @@ public void testBasicEsql() throws IOException { indexTimestampData(1); RequestObjectBuilder builder = requestObjectBuilder().query(fromIndex() + " | stats avg(value)"); - requestObjectBuilder().includeCCSMetadata(randomBoolean()); if (Build.current().isSnapshot()) { builder.pragmas(Settings.builder().put("data_partitioning", "shard").build()); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java index 8564e4b3afde1..031bfd7139a84 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java @@ -9,12 +9,14 @@ import org.elasticsearch.TransportVersions; import org.elasticsearch.action.OriginalIndices; -import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import java.io.IOException; +import java.util.Arrays; +import java.util.Objects; record RemoteClusterPlan(PhysicalPlan plan, String[] targetIndices, OriginalIndices originalIndices) { static RemoteClusterPlan from(PlanStreamInput planIn) throws IOException { @@ -24,7 +26,8 @@ static RemoteClusterPlan from(PlanStreamInput planIn) throws IOException { if (planIn.getTransportVersion().onOrAfter(TransportVersions.ESQL_ORIGINAL_INDICES)) { originalIndices = OriginalIndices.readOriginalIndices(planIn); } else { - originalIndices = new OriginalIndices(planIn.readStringArray(), IndicesOptions.strictSingleIndexNoExpandForbidClosed()); + // fallback to the previous behavior + originalIndices = new OriginalIndices(planIn.readStringArray(), SearchRequest.DEFAULT_INDICES_OPTIONS); } return new RemoteClusterPlan(plan, targetIndices, originalIndices); } @@ -38,4 +41,18 @@ public void writeTo(PlanStreamOutput out) throws IOException { out.writeStringArray(originalIndices.indices()); } } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) return false; + RemoteClusterPlan that = (RemoteClusterPlan) o; + return Objects.equals(plan, that.plan) + && Objects.deepEquals(targetIndices, that.targetIndices) + && Objects.equals(originalIndices, that.originalIndices); + } + + @Override + public int hashCode() { + return Objects.hash(plan, Arrays.hashCode(targetIndices), originalIndices); + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java new file mode 100644 index 0000000000000..07ca112e8c527 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java @@ -0,0 +1,206 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.TransportVersions; +import org.elasticsearch.action.OriginalIndices; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.search.SearchModule; +import org.elasticsearch.test.AbstractWireSerializingTestCase; +import org.elasticsearch.test.TransportVersionUtils; +import org.elasticsearch.xpack.esql.ConfigurationTestUtils; +import org.elasticsearch.xpack.esql.EsqlTestUtils; +import org.elasticsearch.xpack.esql.analysis.Analyzer; +import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; +import org.elasticsearch.xpack.esql.index.EsIndex; +import org.elasticsearch.xpack.esql.index.IndexResolution; +import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; +import org.elasticsearch.xpack.esql.parser.EsqlParser; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.esql.ConfigurationTestUtils.randomConfiguration; +import static org.elasticsearch.xpack.esql.ConfigurationTestUtils.randomTables; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_CFG; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.emptyPolicyResolution; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; +import static org.hamcrest.Matchers.equalTo; + +public class ClusterRequestTests extends AbstractWireSerializingTestCase { + + @Override + protected Writeable.Reader instanceReader() { + return ClusterComputeRequest::new; + } + + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + List writeables = new ArrayList<>(); + writeables.addAll(new SearchModule(Settings.EMPTY, List.of()).getNamedWriteables()); + writeables.addAll(new EsqlPlugin().getNamedWriteables()); + return new NamedWriteableRegistry(writeables); + } + + @Override + protected ClusterComputeRequest createTestInstance() { + var sessionId = randomAlphaOfLength(10); + String query = randomQuery(); + PhysicalPlan physicalPlan = DataNodeRequestTests.mapAndMaybeOptimize(parse(query)); + OriginalIndices originalIndices = new OriginalIndices( + generateRandomStringArray(10, 10, false, false), + IndicesOptions.fromOptions(randomBoolean(), randomBoolean(), randomBoolean(), randomBoolean()) + ); + String[] targetIndices = generateRandomStringArray(10, 10, false, false); + ClusterComputeRequest request = new ClusterComputeRequest( + randomAlphaOfLength(10), + sessionId, + randomConfiguration(query, randomTables()), + new RemoteClusterPlan(physicalPlan, targetIndices, originalIndices) + ); + request.setParentTask(randomAlphaOfLength(10), randomNonNegativeLong()); + return request; + } + + @Override + protected ClusterComputeRequest mutateInstance(ClusterComputeRequest in) throws IOException { + return switch (between(0, 4)) { + case 0 -> { + var request = new ClusterComputeRequest( + randomValueOtherThan(in.clusterAlias(), () -> randomAlphaOfLength(10)), + in.sessionId(), + in.configuration(), + in.remoteClusterPlan() + ); + request.setParentTask(in.getParentTask()); + yield request; + } + case 1 -> { + var request = new ClusterComputeRequest( + in.clusterAlias(), + randomValueOtherThan(in.sessionId(), () -> randomAlphaOfLength(10)), + in.configuration(), + in.remoteClusterPlan() + ); + request.setParentTask(in.getParentTask()); + yield request; + } + case 2 -> { + var request = new ClusterComputeRequest( + in.clusterAlias(), + in.sessionId(), + randomValueOtherThan(in.configuration(), ConfigurationTestUtils::randomConfiguration), + in.remoteClusterPlan() + ); + request.setParentTask(in.getParentTask()); + yield request; + } + case 3 -> { + RemoteClusterPlan plan = in.remoteClusterPlan(); + var request = new ClusterComputeRequest( + in.clusterAlias(), + in.sessionId(), + in.configuration(), + new RemoteClusterPlan( + plan.plan(), + randomValueOtherThan(plan.targetIndices(), () -> generateRandomStringArray(10, 10, false, false)), + plan.originalIndices() + ) + ); + request.setParentTask(in.getParentTask()); + yield request; + } + case 4 -> { + RemoteClusterPlan plan = in.remoteClusterPlan(); + var request = new ClusterComputeRequest( + in.clusterAlias(), + in.sessionId(), + in.configuration(), + new RemoteClusterPlan( + plan.plan(), + plan.targetIndices(), + new OriginalIndices( + plan.originalIndices().indices(), + randomValueOtherThan( + plan.originalIndices().indicesOptions(), + () -> IndicesOptions.fromOptions(randomBoolean(), randomBoolean(), randomBoolean(), randomBoolean()) + ) + ) + ) + ); + request.setParentTask(in.getParentTask()); + yield request; + } + default -> throw new AssertionError("invalid value"); + }; + } + + public void testFallbackIndicesOptions() throws Exception { + ClusterComputeRequest request = createTestInstance(); + var version = TransportVersionUtils.randomVersionBetween( + random(), + TransportVersions.V_8_14_0, + TransportVersions.ESQL_ORIGINAL_INDICES + ); + ClusterComputeRequest cloned = copyInstance(request, version); + assertThat(cloned.clusterAlias(), equalTo(request.clusterAlias())); + assertThat(cloned.sessionId(), equalTo(request.sessionId())); + assertThat(cloned.configuration(), equalTo(request.configuration())); + RemoteClusterPlan plan = cloned.remoteClusterPlan(); + assertThat(plan.plan(), equalTo(request.remoteClusterPlan().plan())); + assertThat(plan.targetIndices(), equalTo(request.remoteClusterPlan().targetIndices())); + OriginalIndices originalIndices = plan.originalIndices(); + assertThat(originalIndices.indices(), equalTo(request.remoteClusterPlan().originalIndices().indices())); + assertThat(originalIndices.indicesOptions(), equalTo(SearchRequest.DEFAULT_INDICES_OPTIONS)); + } + + private static String randomQuery() { + return randomFrom(""" + from test + | where round(emp_no) > 10 + | limit 10 + """, """ + from test + | sort last_name + | limit 10 + | where round(emp_no) > 10 + | eval c = first_name + """); + } + + static LogicalPlan parse(String query) { + Map mapping = loadMapping("mapping-basic.json"); + EsIndex test = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD)); + IndexResolution getIndexResult = IndexResolution.valid(test); + var logicalOptimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(TEST_CFG)); + var analyzer = new Analyzer( + new AnalyzerContext(EsqlTestUtils.TEST_CFG, new EsqlFunctionRegistry(), getIndexResult, emptyPolicyResolution()), + TEST_VERIFIER + ); + return logicalOptimizer.optimize(analyzer.analyze(new EsqlParser().createStatement(query))); + } + + @Override + protected List filteredWarnings() { + return withDefaultLimitWarning(super.filteredWarnings()); + } +} From 00a1222f10a6bc605f67aee67d4053c5ba0557e8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Wed, 4 Dec 2024 02:32:41 +1100 Subject: [PATCH 23/28] Mute org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilderTests testToQuery #117904 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 3652173327e84..857266a5a47cd 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -238,6 +238,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/117862 - class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT issue: https://github.com/elastic/elasticsearch/issues/117893 +- class: org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilderTests + method: testToQuery + issue: https://github.com/elastic/elasticsearch/issues/117904 # Examples: # From c1a9d44ed4ac980130deb730991f10cce127c583 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Tue, 3 Dec 2024 08:42:49 -0800 Subject: [PATCH 24/28] Guard against missing file in CI upload (#117889) Somehow files can be lost before the build ends up uploading them, presumable from temporarily file deletion after tests complete. This commit guards against this case so that the build will not completely fail, but instead log a warning. --- .../internal/ElasticsearchBuildCompletePlugin.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java index 14baa55794c95..b1207a2f5161d 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java @@ -29,6 +29,8 @@ import org.gradle.api.provider.Property; import org.gradle.api.tasks.Input; import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; @@ -47,6 +49,8 @@ public abstract class ElasticsearchBuildCompletePlugin implements Plugin { + private static final Logger log = LoggerFactory.getLogger(ElasticsearchBuildCompletePlugin.class); + @Inject protected abstract FlowScope getFlowScope(); @@ -241,8 +245,11 @@ private static void createBuildArchiveTar(List files, File projectDir, Fil tOut.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU); tOut.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_STAR); for (Path path : files.stream().map(File::toPath).toList()) { - if (!Files.isRegularFile(path)) { - throw new IOException("Support only file!"); + if (Files.exists(path) == false) { + log.warn("File disappeared before it could be added to CI archive: " + path); + continue; + } else if (!Files.isRegularFile(path)) { + throw new IOException("Support only file!: " + path); } long entrySize = Files.size(path); From 0a208279ea869fafe7ee9b4c4ac60d4b9816bd25 Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Tue, 3 Dec 2024 17:53:10 +0100 Subject: [PATCH 25/28] ES|QL fix telemetry tests (usage stats) after promoting CATEGORIZE (#117878) --- muted-tests.yml | 3 --- .../resources/rest-api-spec/test/esql/60_usage.yml | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 857266a5a47cd..7bd06a6605028 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -233,9 +233,6 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/117815 - class: org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT issue: https://github.com/elastic/elasticsearch/issues/111319 -- class: org.elasticsearch.xpack.test.rest.XPackRestIT - method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version} - issue: https://github.com/elastic/elasticsearch/issues/117862 - class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT issue: https://github.com/elastic/elasticsearch/issues/117893 - class: org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilderTests diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index f7dd979540afa..c23b44c00bd14 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -163,4 +163,4 @@ setup: - match: {esql.functions.cos: $functions_cos} - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} - - length: {esql.functions: 118} # check the "sister" test above for a likely update to the same esql.functions length check + - length: {esql.functions: 119} # check the "sister" test above for a likely update to the same esql.functions length check From 22f4a799377ea8710076ff10b74fbb48724a0c09 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Tue, 3 Dec 2024 20:08:05 +0200 Subject: [PATCH 26/28] Smarter field caps with subscribable listener (#116755) --- docs/changelog/116755.yaml | 5 + .../multi_node/RequestIndexFilteringIT.java | 27 ++ .../single_node/RequestIndexFilteringIT.java | 27 ++ .../rest/RequestIndexFilteringTestCase.java | 284 ++++++++++++++++ .../esql/qa/rest/RestEnrichTestCase.java | 176 +++++++++- .../esql/enrich/EnrichPolicyResolver.java | 2 +- .../xpack/esql/session/EsqlSession.java | 315 ++++++++++++------ .../xpack/esql/session/IndexResolver.java | 13 +- 8 files changed, 741 insertions(+), 108 deletions(-) create mode 100644 docs/changelog/116755.yaml create mode 100644 x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java create mode 100644 x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java create mode 100644 x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java diff --git a/docs/changelog/116755.yaml b/docs/changelog/116755.yaml new file mode 100644 index 0000000000000..3aa5ec8580b59 --- /dev/null +++ b/docs/changelog/116755.yaml @@ -0,0 +1,5 @@ +pr: 116755 +summary: Smarter field caps with subscribable listener +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java new file mode 100644 index 0000000000000..c2ba502b92554 --- /dev/null +++ b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.multi_node; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; + +import org.elasticsearch.test.TestClustersThreadFilter; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.xpack.esql.qa.rest.RequestIndexFilteringTestCase; +import org.junit.ClassRule; + +@ThreadLeakFilters(filters = TestClustersThreadFilter.class) +public class RequestIndexFilteringIT extends RequestIndexFilteringTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = Clusters.testCluster(ignored -> {}); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } +} diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java new file mode 100644 index 0000000000000..f13bcd618f0a8 --- /dev/null +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.single_node; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; + +import org.elasticsearch.test.TestClustersThreadFilter; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.xpack.esql.qa.rest.RequestIndexFilteringTestCase; +import org.junit.ClassRule; + +@ThreadLeakFilters(filters = TestClustersThreadFilter.class) +public class RequestIndexFilteringIT extends RequestIndexFilteringTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = Clusters.testCluster(); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } +} diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java new file mode 100644 index 0000000000000..3314430d63eaa --- /dev/null +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java @@ -0,0 +1,284 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.rest; + +import org.apache.http.util.EntityUtils; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.Response; +import org.elasticsearch.client.ResponseException; +import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.esql.AssertWarnings; +import org.junit.After; +import org.junit.Assert; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import static org.elasticsearch.test.ListMatcher.matchesList; +import static org.elasticsearch.test.MapMatcher.assertMap; +import static org.elasticsearch.test.MapMatcher.matchesMap; +import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.entityToMap; +import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.requestObjectBuilder; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.nullValue; + +public abstract class RequestIndexFilteringTestCase extends ESRestTestCase { + + @After + public void wipeTestData() throws IOException { + try { + var response = client().performRequest(new Request("DELETE", "/test*")); + assertEquals(200, response.getStatusLine().getStatusCode()); + } catch (ResponseException re) { + assertEquals(404, re.getResponse().getStatusLine().getStatusCode()); + } + } + + public void testTimestampFilterFromQuery() throws IOException { + int docsTest1 = 50; + int docsTest2 = 30; + indexTimestampData(docsTest1, "test1", "2024-11-26", "id1"); + indexTimestampData(docsTest2, "test2", "2023-11-26", "id2"); + + // filter includes both indices in the result (all columns, all rows) + RestEsqlTestCase.RequestObjectBuilder builder = timestampFilter("gte", "2023-01-01").query("FROM test*"); + Map result = runEsql(builder); + assertMap( + result, + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + .item(matchesMap().entry("name", "id2").entry("type", "integer")) + .item(matchesMap().entry("name", "value").entry("type", "long")) + ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1 + docsTest2))).entry("took", greaterThanOrEqualTo(0)) + ); + + // filter includes only test1. Columns from test2 are filtered out, as well (not only rows)! + builder = timestampFilter("gte", "2024-01-01").query("FROM test*"); + assertMap( + runEsql(builder), + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + .item(matchesMap().entry("name", "value").entry("type", "long")) + ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0)) + ); + + // filter excludes both indices (no rows); the first analysis step fails because there are no columns, a second attempt succeeds + // after eliminating the index filter. All columns are returned. + builder = timestampFilter("gte", "2025-01-01").query("FROM test*"); + assertMap( + runEsql(builder), + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + .item(matchesMap().entry("name", "id2").entry("type", "integer")) + .item(matchesMap().entry("name", "value").entry("type", "long")) + ).entry("values", allOf(instanceOf(List.class), hasSize(0))).entry("took", greaterThanOrEqualTo(0)) + ); + } + + public void testFieldExistsFilter_KeepWildcard() throws IOException { + int docsTest1 = randomIntBetween(0, 10); + int docsTest2 = randomIntBetween(0, 10); + indexTimestampData(docsTest1, "test1", "2024-11-26", "id1"); + indexTimestampData(docsTest2, "test2", "2023-11-26", "id2"); + + // filter includes only test1. Columns are rows of test2 are filtered out + RestEsqlTestCase.RequestObjectBuilder builder = existsFilter("id1").query("FROM test*"); + Map result = runEsql(builder); + assertMap( + result, + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + .item(matchesMap().entry("name", "value").entry("type", "long")) + ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0)) + ); + + // filter includes only test1. Columns from test2 are filtered out, as well (not only rows)! + builder = existsFilter("id1").query("FROM test* METADATA _index | KEEP _index, id*"); + result = runEsql(builder); + assertMap( + result, + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "_index").entry("type", "keyword")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0)) + ); + @SuppressWarnings("unchecked") + var values = (List>) result.get("values"); + for (List row : values) { + assertThat(row.get(0), equalTo("test1")); + assertThat(row.get(1), instanceOf(Integer.class)); + } + } + + public void testFieldExistsFilter_With_ExplicitUseOfDiscardedIndexFields() throws IOException { + int docsTest1 = randomIntBetween(1, 5); + int docsTest2 = randomIntBetween(0, 5); + indexTimestampData(docsTest1, "test1", "2024-11-26", "id1"); + indexTimestampData(docsTest2, "test2", "2023-11-26", "id2"); + + // test2 is explicitly used in a query with "SORT id2" even if the index filter should discard test2 + RestEsqlTestCase.RequestObjectBuilder builder = existsFilter("id1").query( + "FROM test* METADATA _index | SORT id2 | KEEP _index, id*" + ); + Map result = runEsql(builder); + assertMap( + result, + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "_index").entry("type", "keyword")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + .item(matchesMap().entry("name", "id2").entry("type", "integer")) + ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0)) + ); + @SuppressWarnings("unchecked") + var values = (List>) result.get("values"); + for (List row : values) { + assertThat(row.get(0), equalTo("test1")); + assertThat(row.get(1), instanceOf(Integer.class)); + assertThat(row.get(2), nullValue()); + } + } + + public void testFieldNameTypo() throws IOException { + int docsTest1 = randomIntBetween(0, 5); + int docsTest2 = randomIntBetween(0, 5); + indexTimestampData(docsTest1, "test1", "2024-11-26", "id1"); + indexTimestampData(docsTest2, "test2", "2023-11-26", "id2"); + + // idx field name is explicitly used, though it doesn't exist in any of the indices. First test - without filter + ResponseException e = expectThrows( + ResponseException.class, + () -> runEsql(requestObjectBuilder().query("FROM test* | WHERE idx == 123")) + ); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("verification_exception")); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]")); + + e = expectThrows(ResponseException.class, () -> runEsql(requestObjectBuilder().query("FROM test1 | WHERE idx == 123"))); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("verification_exception")); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]")); + + e = expectThrows( + ResponseException.class, + () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM test* | WHERE idx == 123")) + ); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]")); + + e = expectThrows( + ResponseException.class, + () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM test2 | WHERE idx == 123")) + ); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]")); + } + + public void testIndicesDontExist() throws IOException { + int docsTest1 = 0; // we are interested only in the created index, not necessarily that it has data + indexTimestampData(docsTest1, "test1", "2024-11-26", "id1"); + + ResponseException e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo"))); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("verification_exception")); + assertThat(e.getMessage(), containsString("Unknown index [foo]")); + + e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo*"))); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("verification_exception")); + assertThat(e.getMessage(), containsString("Unknown index [foo*]")); + + e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo,test1"))); + assertEquals(404, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("index_not_found_exception")); + assertThat(e.getMessage(), containsString("no such index [foo]")); + } + + private static RestEsqlTestCase.RequestObjectBuilder timestampFilter(String op, String date) throws IOException { + return requestObjectBuilder().filter(b -> { + b.startObject("range"); + { + b.startObject("@timestamp").field(op, date).endObject(); + } + b.endObject(); + }); + } + + private static RestEsqlTestCase.RequestObjectBuilder existsFilter(String field) throws IOException { + return requestObjectBuilder().filter(b -> b.startObject("exists").field("field", field).endObject()); + } + + public Map runEsql(RestEsqlTestCase.RequestObjectBuilder requestObject) throws IOException { + return RestEsqlTestCase.runEsql(requestObject, new AssertWarnings.NoWarnings(), RestEsqlTestCase.Mode.SYNC); + } + + protected void indexTimestampData(int docs, String indexName, String date, String differentiatorFieldName) throws IOException { + Request createIndex = new Request("PUT", indexName); + createIndex.setJsonEntity(""" + { + "settings": { + "index": { + "number_of_shards": 3 + } + }, + "mappings": { + "properties": { + "@timestamp": { + "type": "date" + }, + "%differentiator_field_name%": { + "type": "integer" + } + } + } + }""".replace("%differentiator_field_name%", differentiatorFieldName)); + Response response = client().performRequest(createIndex); + assertThat( + entityToMap(response.getEntity(), XContentType.JSON), + matchesMap().entry("shards_acknowledged", true).entry("index", indexName).entry("acknowledged", true) + ); + + if (docs > 0) { + StringBuilder b = new StringBuilder(); + for (int i = 0; i < docs; i++) { + b.append(String.format(Locale.ROOT, """ + {"create":{"_index":"%s"}} + {"@timestamp":"%s","value":%d,"%s":%d} + """, indexName, date, i, differentiatorFieldName, i)); + } + Request bulk = new Request("POST", "/_bulk"); + bulk.addParameter("refresh", "true"); + bulk.addParameter("filter_path", "errors"); + bulk.setJsonEntity(b.toString()); + response = client().performRequest(bulk); + Assert.assertEquals("{\"errors\":false}", EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8)); + } + } +} diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java index def6491fb920f..bf4a4400e13cf 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java @@ -12,7 +12,9 @@ import org.apache.http.util.EntityUtils; import org.elasticsearch.client.Request; import org.elasticsearch.client.ResponseException; +import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.xcontent.XContentBuilder; import org.junit.After; import org.junit.Before; @@ -29,7 +31,6 @@ public abstract class RestEnrichTestCase extends ESRestTestCase { private static final String sourceIndexName = "countries"; - private static final String testIndexName = "test"; private static final String policyName = "countries"; public enum Mode { @@ -56,7 +57,7 @@ public void assertRequestBreakerEmpty() throws Exception { @Before public void loadTestData() throws IOException { - Request request = new Request("PUT", "/" + testIndexName); + Request request = new Request("PUT", "/test1"); request.setJsonEntity(""" { "mappings": { @@ -72,7 +73,7 @@ public void loadTestData() throws IOException { }"""); assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); - request = new Request("POST", "/" + testIndexName + "/_bulk"); + request = new Request("POST", "/test1/_bulk"); request.addParameter("refresh", "true"); request.setJsonEntity(""" { "index": {"_id": 1} } @@ -84,6 +85,34 @@ public void loadTestData() throws IOException { """); assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + request = new Request("PUT", "/test2"); + request.setJsonEntity(""" + { + "mappings": { + "properties": { + "geo.dest": { + "type": "keyword" + }, + "country_number": { + "type": "long" + } + } + } + }"""); + assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + + request = new Request("POST", "/test2/_bulk"); + request.addParameter("refresh", "true"); + request.setJsonEntity(""" + { "index": {"_id": 1} } + { "geo.dest": "IN", "country_number": 2 } + { "index": {"_id": 2} } + { "geo.dest": "IN", "country_number": 2 } + { "index": {"_id": 3} } + { "geo.dest": "US", "country_number": 3 } + """); + assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + request = new Request("PUT", "/" + sourceIndexName); request.setJsonEntity(""" { @@ -131,7 +160,7 @@ public void loadTestData() throws IOException { @After public void wipeTestData() throws IOException { try { - var response = client().performRequest(new Request("DELETE", "/" + testIndexName)); + var response = client().performRequest(new Request("DELETE", "/test1,test2")); assertEquals(200, response.getStatusLine().getStatusCode()); response = client().performRequest(new Request("DELETE", "/" + sourceIndexName)); assertEquals(200, response.getStatusLine().getStatusCode()); @@ -143,7 +172,7 @@ public void wipeTestData() throws IOException { } public void testNonExistentEnrichPolicy() throws IOException { - ResponseException re = expectThrows(ResponseException.class, () -> runEsql("from test | enrich countris", Mode.SYNC)); + ResponseException re = expectThrows(ResponseException.class, () -> runEsql("from test1 | enrich countris", null, Mode.SYNC)); assertThat( EntityUtils.toString(re.getResponse().getEntity()), containsString("cannot find enrich policy [countris], did you mean [countries]?") @@ -151,7 +180,10 @@ public void testNonExistentEnrichPolicy() throws IOException { } public void testNonExistentEnrichPolicy_KeepField() throws IOException { - ResponseException re = expectThrows(ResponseException.class, () -> runEsql("from test | enrich countris | keep number", Mode.SYNC)); + ResponseException re = expectThrows( + ResponseException.class, + () -> runEsql("from test1 | enrich countris | keep number", null, Mode.SYNC) + ); assertThat( EntityUtils.toString(re.getResponse().getEntity()), containsString("cannot find enrich policy [countris], did you mean [countries]?") @@ -159,25 +191,147 @@ public void testNonExistentEnrichPolicy_KeepField() throws IOException { } public void testMatchField_ImplicitFieldsList() throws IOException { - Map result = runEsql("from test | enrich countries | keep number | sort number"); + Map result = runEsql("from test1 | enrich countries | keep number | sort number"); var columns = List.of(Map.of("name", "number", "type", "long")); var values = List.of(List.of(1000), List.of(1000), List.of(5000)); assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); } public void testMatchField_ImplicitFieldsList_WithStats() throws IOException { - Map result = runEsql("from test | enrich countries | stats s = sum(number) by country_name"); + Map result = runEsql("from test1 | enrich countries | stats s = sum(number) by country_name"); var columns = List.of(Map.of("name", "s", "type", "long"), Map.of("name", "country_name", "type", "keyword")); var values = List.of(List.of(2000, "United States of America"), List.of(5000, "China")); assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); } + public void testSimpleIndexFilteringWithEnrich() throws IOException { + // no filter + Map result = runEsql(""" + from test* metadata _index + | enrich countries + | keep *number, geo.dest, _index + | sort geo.dest, _index + """); + var columns = List.of( + Map.of("name", "country_number", "type", "long"), + Map.of("name", "number", "type", "long"), + Map.of("name", "geo.dest", "type", "keyword"), + Map.of("name", "_index", "type", "keyword") + ); + var values = List.of( + Arrays.asList(null, 5000, "CN", "test1"), + Arrays.asList(2, null, "IN", "test2"), + Arrays.asList(2, null, "IN", "test2"), + Arrays.asList(null, 1000, "US", "test1"), + Arrays.asList(null, 1000, "US", "test1"), + Arrays.asList(3, null, "US", "test2") + ); + assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + + // filter something that won't affect the columns + result = runEsql(""" + from test* metadata _index + | enrich countries + | keep *number, geo.dest, _index + | sort geo.dest, _index + """, b -> b.startObject("exists").field("field", "foobar").endObject()); + assertMap(result, matchesMap().entry("columns", columns).entry("values", List.of()).entry("took", greaterThanOrEqualTo(0))); + } + + public void testIndexFilteringWithEnrich_RemoveOneIndex() throws IOException { + // filter out test2 but specifically use one of its fields in the query (country_number) + Map result = runEsql(""" + from test* metadata _index + | enrich countries + | keep country_number, number, geo.dest, _index + | sort geo.dest, _index + """, b -> b.startObject("exists").field("field", "number").endObject()); + + var columns = List.of( + Map.of("name", "country_number", "type", "long"), + Map.of("name", "number", "type", "long"), + Map.of("name", "geo.dest", "type", "keyword"), + Map.of("name", "_index", "type", "keyword") + ); + var values = List.of( + Arrays.asList(null, 5000, "CN", "test1"), + Arrays.asList(null, 1000, "US", "test1"), + Arrays.asList(null, 1000, "US", "test1") + ); + + assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + + // filter out test2 and use a wildcarded field name in the "keep" command + result = runEsql(""" + from test* metadata _index + | enrich countries + | keep *number, geo.dest, _index + | sort geo.dest, _index + """, b -> b.startObject("exists").field("field", "number").endObject()); + + columns = List.of( + Map.of("name", "number", "type", "long"), + Map.of("name", "geo.dest", "type", "keyword"), + Map.of("name", "_index", "type", "keyword") + ); + values = List.of(Arrays.asList(5000, "CN", "test1"), Arrays.asList(1000, "US", "test1"), Arrays.asList(1000, "US", "test1")); + assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + } + + public void testIndexFilteringWithEnrich_ExpectException() throws IOException { + // no filter, just a simple query with "enrich" that should throw a valid VerificationException + ResponseException e = expectThrows(ResponseException.class, () -> runEsql(""" + from test* metadata _index + | enrich countries + | where foobar == 123 + """)); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 3:13: Unknown column [foobar]")); + + // same query, but with a filter this time + e = expectThrows(ResponseException.class, () -> runEsql(""" + from test* metadata _index + | enrich countries + | where foobar == 123 + """, b -> b.startObject("exists").field("field", "number").endObject())); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 3:13: Unknown column [foobar]")); + } + + public void testIndexFilteringWithEnrich_FilterUnusedIndexFields() throws IOException { + // filter out "test1". The field that is specific to "test1" ("number") is not actually used in the query + Map result = runEsql(""" + from test* metadata _index + | enrich countries + | keep country_number, geo.dest, _index + | sort geo.dest, _index + """, b -> b.startObject("exists").field("field", "country_number").endObject()); + + var columns = List.of( + Map.of("name", "country_number", "type", "long"), + Map.of("name", "geo.dest", "type", "keyword"), + Map.of("name", "_index", "type", "keyword") + ); + var values = List.of(Arrays.asList(2, "IN", "test2"), Arrays.asList(2, "IN", "test2"), Arrays.asList(3, "US", "test2")); + assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + } + private Map runEsql(String query) throws IOException { - return runEsql(query, mode); + return runEsql(query, null, mode); } - private Map runEsql(String query, Mode mode) throws IOException { - var requestObject = new RestEsqlTestCase.RequestObjectBuilder().query(query); + private Map runEsql(String query, CheckedConsumer filter) throws IOException { + return runEsql(query, filter, mode); + } + + private Map runEsql(String query, CheckedConsumer filter, Mode mode) throws IOException { + var requestObject = new RestEsqlTestCase.RequestObjectBuilder(); + if (filter != null) { + requestObject.filter(filter); + } + requestObject.query(query); if (mode == Mode.ASYNC) { return RestEsqlTestCase.runEsqlAsync(requestObject); } else { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java index c8a7a6bcc4e98..c8e993b7dbf0b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java @@ -411,7 +411,7 @@ public void messageReceived(LookupRequest request, TransportChannel channel, Tas } try (ThreadContext.StoredContext ignored = threadContext.stashWithOrigin(ClientHelper.ENRICH_ORIGIN)) { String indexName = EnrichPolicy.getBaseName(policyName); - indexResolver.resolveAsMergedMapping(indexName, IndexResolver.ALL_FIELDS, refs.acquire(indexResult -> { + indexResolver.resolveAsMergedMapping(indexName, IndexResolver.ALL_FIELDS, null, refs.acquire(indexResult -> { if (indexResult.isValid() && indexResult.get().concreteIndices().size() == 1) { EsIndex esIndex = indexResult.get(); var concreteIndices = Map.of(request.clusterAlias, Iterables.get(esIndex.concreteIndices(), 0)); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 3d1ed8f70eae0..71fba5683644d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.OriginalIndices; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.common.Strings; import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.collect.Iterators; @@ -25,6 +26,7 @@ import org.elasticsearch.indices.IndicesExpressionGrouper; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; +import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.action.EsqlExecutionInfo; import org.elasticsearch.xpack.esql.action.EsqlQueryRequest; import org.elasticsearch.xpack.esql.analysis.Analyzer; @@ -151,6 +153,7 @@ public void execute(EsqlQueryRequest request, EsqlExecutionInfo executionInfo, P analyzedPlan( parse(request.query(), request.params()), executionInfo, + request.filter(), new EsqlSessionCCSUtils.CssPartialErrorsActionListener(executionInfo, listener) { @Override public void onResponse(LogicalPlan analyzedPlan) { @@ -268,31 +271,28 @@ private LogicalPlan parse(String query, QueryParams params) { return parsed; } - public void analyzedPlan(LogicalPlan parsed, EsqlExecutionInfo executionInfo, ActionListener listener) { + public void analyzedPlan( + LogicalPlan parsed, + EsqlExecutionInfo executionInfo, + QueryBuilder requestFilter, + ActionListener logicalPlanListener + ) { if (parsed.analyzed()) { - listener.onResponse(parsed); + logicalPlanListener.onResponse(parsed); return; } - preAnalyze(parsed, executionInfo, (indices, lookupIndices, policies) -> { + TriFunction analyzeAction = (indices, lookupIndices, policies) -> { planningMetrics.gatherPreAnalysisMetrics(parsed); Analyzer analyzer = new Analyzer( new AnalyzerContext(configuration, functionRegistry, indices, lookupIndices, policies), verifier ); - var plan = analyzer.analyze(parsed); + LogicalPlan plan = analyzer.analyze(parsed); plan.setAnalyzed(); - LOGGER.debug("Analyzed plan:\n{}", plan); return plan; - }, listener); - } + }; - private void preAnalyze( - LogicalPlan parsed, - EsqlExecutionInfo executionInfo, - TriFunction action, - ActionListener listener - ) { PreAnalyzer.PreAnalysis preAnalysis = preAnalyzer.preAnalyze(parsed); var unresolvedPolicies = preAnalysis.enriches.stream() .map(e -> new EnrichPolicyResolver.UnresolvedPolicy((String) e.policyName().fold(), e.mode())) @@ -302,81 +302,113 @@ private void preAnalyze( final Set targetClusters = enrichPolicyResolver.groupIndicesPerCluster( indices.stream().flatMap(t -> Arrays.stream(Strings.commaDelimitedListToStringArray(t.id().index()))).toArray(String[]::new) ).keySet(); - enrichPolicyResolver.resolvePolicies(targetClusters, unresolvedPolicies, listener.delegateFailureAndWrap((l, enrichResolution) -> { - // first we need the match_fields names from enrich policies and THEN, with an updated list of fields, we call field_caps API - var enrichMatchFields = enrichResolution.resolvedEnrichPolicies() - .stream() - .map(ResolvedEnrichPolicy::matchField) - .collect(Collectors.toSet()); - // get the field names from the parsed plan combined with the ENRICH match fields from the ENRICH policy - var fieldNames = fieldNames(parsed, enrichMatchFields); - // First resolve the lookup indices, then the main indices - preAnalyzeLookupIndices( - preAnalysis.lookupIndices, + + SubscribableListener.newForked(l -> enrichPolicyResolver.resolvePolicies(targetClusters, unresolvedPolicies, l)) + .andThen((l, enrichResolution) -> { + // we need the match_fields names from enrich policies and THEN, with an updated list of fields, we call field_caps API + var enrichMatchFields = enrichResolution.resolvedEnrichPolicies() + .stream() + .map(ResolvedEnrichPolicy::matchField) + .collect(Collectors.toSet()); + // get the field names from the parsed plan combined with the ENRICH match fields from the ENRICH policy + var fieldNames = fieldNames(parsed, enrichMatchFields); + ListenerResult listenerResult = new ListenerResult(null, null, enrichResolution, fieldNames); + + // first resolve the lookup indices, then the main indices + preAnalyzeLookupIndices(preAnalysis.lookupIndices, listenerResult, l); + }) + .andThen((l, listenerResult) -> { + // resolve the main indices + preAnalyzeIndices(preAnalysis.indices, executionInfo, listenerResult, requestFilter, l); + }) + .andThen((l, listenerResult) -> { + // TODO in follow-PR (for skip_unavailable handling of missing concrete indexes) add some tests for + // invalid index resolution to updateExecutionInfo + if (listenerResult.indices.isValid()) { + // CCS indices and skip_unavailable cluster values can stop the analysis right here + if (analyzeCCSIndices(executionInfo, targetClusters, unresolvedPolicies, listenerResult, logicalPlanListener, l)) + return; + } + // whatever tuple we have here (from CCS-special handling or from the original pre-analysis), pass it on to the next step + l.onResponse(listenerResult); + }) + .andThen((l, listenerResult) -> { + // first attempt (maybe the only one) at analyzing the plan + analyzeAndMaybeRetry(analyzeAction, requestFilter, listenerResult, logicalPlanListener, l); + }) + .andThen((l, listenerResult) -> { + assert requestFilter != null : "The second pre-analysis shouldn't take place when there is no index filter in the request"; + + // "reset" execution information for all ccs or non-ccs (local) clusters, since we are performing the indices + // resolving one more time (the first attempt failed and the query had a filter) + for (String clusterAlias : executionInfo.clusterAliases()) { + executionInfo.swapCluster(clusterAlias, (k, v) -> null); + } + + // here the requestFilter is set to null, performing the pre-analysis after the first step failed + preAnalyzeIndices(preAnalysis.indices, executionInfo, listenerResult, null, l); + }) + .andThen((l, listenerResult) -> { + assert requestFilter != null : "The second analysis shouldn't take place when there is no index filter in the request"; + LOGGER.debug("Analyzing the plan (second attempt, without filter)"); + LogicalPlan plan; + try { + plan = analyzeAction.apply(listenerResult.indices, listenerResult.lookupIndices, listenerResult.enrichResolution); + } catch (Exception e) { + l.onFailure(e); + return; + } + LOGGER.debug("Analyzed plan (second attempt, without filter):\n{}", plan); + l.onResponse(plan); + }) + .addListener(logicalPlanListener); + } + + private void preAnalyzeLookupIndices(List indices, ListenerResult listenerResult, ActionListener listener) { + if (indices.size() > 1) { + // Note: JOINs on more than one index are not yet supported + listener.onFailure(new MappingException("More than one LOOKUP JOIN is not supported")); + } else if (indices.size() == 1) { + TableInfo tableInfo = indices.get(0); + TableIdentifier table = tableInfo.id(); + // call the EsqlResolveFieldsAction (field-caps) to resolve indices and get field types + indexResolver.resolveAsMergedMapping( + table.index(), Set.of("*"), // Current LOOKUP JOIN syntax does not allow for field selection - l.delegateFailureAndWrap( - (lx, lookupIndexResolution) -> preAnalyzeIndices( - indices, - executionInfo, - enrichResolution.getUnavailableClusters(), - fieldNames, - lx.delegateFailureAndWrap((ll, indexResolution) -> { - // TODO in follow-PR (for skip_unavailble handling of missing concrete indexes) add some tests for invalid - // index resolution to updateExecutionInfo - if (indexResolution.isValid()) { - EsqlSessionCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, indexResolution); - EsqlSessionCCSUtils.updateExecutionInfoWithUnavailableClusters( - executionInfo, - indexResolution.unavailableClusters() - ); - if (executionInfo.isCrossClusterSearch() - && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) == 0) { - // for a CCS, if all clusters have been marked as SKIPPED, nothing to search so send a sentinel - // Exception to let the LogicalPlanActionListener decide how to proceed - ll.onFailure(new NoClustersToSearchException()); - return; - } - - Set newClusters = enrichPolicyResolver.groupIndicesPerCluster( - indexResolution.get().concreteIndices().toArray(String[]::new) - ).keySet(); - // If new clusters appear when resolving the main indices, we need to resolve the enrich policies again - // or exclude main concrete indices. Since this is rare, it's simpler to resolve the enrich policies - // again. - // TODO: add a test for this - if (targetClusters.containsAll(newClusters) == false - // do not bother with a re-resolution if only remotes were requested and all were offline - && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) > 0) { - enrichPolicyResolver.resolvePolicies( - newClusters, - unresolvedPolicies, - ll.map( - newEnrichResolution -> action.apply(indexResolution, lookupIndexResolution, newEnrichResolution) - ) - ); - return; - } - } - ll.onResponse(action.apply(indexResolution, lookupIndexResolution, enrichResolution)); - }) - ) - ) + null, + listener.map(indexResolution -> listenerResult.withLookupIndexResolution(indexResolution)) ); - })); + } else { + try { + // No lookup indices specified + listener.onResponse( + new ListenerResult( + listenerResult.indices, + IndexResolution.invalid("[none specified]"), + listenerResult.enrichResolution, + listenerResult.fieldNames + ) + ); + } catch (Exception ex) { + listener.onFailure(ex); + } + } } private void preAnalyzeIndices( List indices, EsqlExecutionInfo executionInfo, - Map unavailableClusters, // known to be unavailable from the enrich policy API call - Set fieldNames, - ActionListener listener + ListenerResult listenerResult, + QueryBuilder requestFilter, + ActionListener listener ) { // TODO we plan to support joins in the future when possible, but for now we'll just fail early if we see one if (indices.size() > 1) { // Note: JOINs are not supported but we detect them when listener.onFailure(new MappingException("Queries with multiple indices are not supported")); } else if (indices.size() == 1) { + // known to be unavailable from the enrich policy API call + Map unavailableClusters = listenerResult.enrichResolution.getUnavailableClusters(); TableInfo tableInfo = indices.get(0); TableIdentifier table = tableInfo.id(); @@ -409,38 +441,116 @@ private void preAnalyzeIndices( String indexExpressionToResolve = EsqlSessionCCSUtils.createIndexExpressionFromAvailableClusters(executionInfo); if (indexExpressionToResolve.isEmpty()) { // if this was a pure remote CCS request (no local indices) and all remotes are offline, return an empty IndexResolution - listener.onResponse(IndexResolution.valid(new EsIndex(table.index(), Map.of(), Map.of()))); + listener.onResponse( + new ListenerResult( + IndexResolution.valid(new EsIndex(table.index(), Map.of(), Map.of())), + listenerResult.lookupIndices, + listenerResult.enrichResolution, + listenerResult.fieldNames + ) + ); } else { // call the EsqlResolveFieldsAction (field-caps) to resolve indices and get field types - indexResolver.resolveAsMergedMapping(indexExpressionToResolve, fieldNames, listener); + indexResolver.resolveAsMergedMapping( + indexExpressionToResolve, + listenerResult.fieldNames, + requestFilter, + listener.map(indexResolution -> listenerResult.withIndexResolution(indexResolution)) + ); } } else { try { // occurs when dealing with local relations (row a = 1) - listener.onResponse(IndexResolution.invalid("[none specified]")); + listener.onResponse( + new ListenerResult( + IndexResolution.invalid("[none specified]"), + listenerResult.lookupIndices, + listenerResult.enrichResolution, + listenerResult.fieldNames + ) + ); } catch (Exception ex) { listener.onFailure(ex); } } } - private void preAnalyzeLookupIndices(List indices, Set fieldNames, ActionListener listener) { - if (indices.size() > 1) { - // Note: JOINs on more than one index are not yet supported - listener.onFailure(new MappingException("More than one LOOKUP JOIN is not supported")); - } else if (indices.size() == 1) { - TableInfo tableInfo = indices.get(0); - TableIdentifier table = tableInfo.id(); - // call the EsqlResolveFieldsAction (field-caps) to resolve indices and get field types - indexResolver.resolveAsMergedMapping(table.index(), fieldNames, listener); - } else { - try { - // No lookup indices specified - listener.onResponse(IndexResolution.invalid("[none specified]")); - } catch (Exception ex) { - listener.onFailure(ex); + private boolean analyzeCCSIndices( + EsqlExecutionInfo executionInfo, + Set targetClusters, + Set unresolvedPolicies, + ListenerResult listenerResult, + ActionListener logicalPlanListener, + ActionListener l + ) { + IndexResolution indexResolution = listenerResult.indices; + EsqlSessionCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, indexResolution); + EsqlSessionCCSUtils.updateExecutionInfoWithUnavailableClusters(executionInfo, indexResolution.unavailableClusters()); + if (executionInfo.isCrossClusterSearch() && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) == 0) { + // for a CCS, if all clusters have been marked as SKIPPED, nothing to search so send a sentinel Exception + // to let the LogicalPlanActionListener decide how to proceed + logicalPlanListener.onFailure(new NoClustersToSearchException()); + return true; + } + + Set newClusters = enrichPolicyResolver.groupIndicesPerCluster( + indexResolution.get().concreteIndices().toArray(String[]::new) + ).keySet(); + // If new clusters appear when resolving the main indices, we need to resolve the enrich policies again + // or exclude main concrete indices. Since this is rare, it's simpler to resolve the enrich policies again. + // TODO: add a test for this + if (targetClusters.containsAll(newClusters) == false + // do not bother with a re-resolution if only remotes were requested and all were offline + && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) > 0) { + enrichPolicyResolver.resolvePolicies( + newClusters, + unresolvedPolicies, + l.map(enrichResolution -> listenerResult.withEnrichResolution(enrichResolution)) + ); + return true; + } + return false; + } + + private static void analyzeAndMaybeRetry( + TriFunction analyzeAction, + QueryBuilder requestFilter, + ListenerResult listenerResult, + ActionListener logicalPlanListener, + ActionListener l + ) { + LogicalPlan plan = null; + var filterPresentMessage = requestFilter == null ? "without" : "with"; + var attemptMessage = requestFilter == null ? "the only" : "first"; + LOGGER.debug("Analyzing the plan ({} attempt, {} filter)", attemptMessage, filterPresentMessage); + + try { + plan = analyzeAction.apply(listenerResult.indices, listenerResult.lookupIndices, listenerResult.enrichResolution); + } catch (Exception e) { + if (e instanceof VerificationException ve) { + LOGGER.debug( + "Analyzing the plan ({} attempt, {} filter) failed with {}", + attemptMessage, + filterPresentMessage, + ve.getDetailedMessage() + ); + if (requestFilter == null) { + // if the initial request didn't have a filter, then just pass the exception back to the user + logicalPlanListener.onFailure(ve); + } else { + // interested only in a VerificationException, but this time we are taking out the index filter + // to try and make the index resolution work without any index filtering. In the next step... to be continued + l.onResponse(listenerResult); + } + } else { + // if the query failed with any other type of exception, then just pass the exception back to the user + logicalPlanListener.onFailure(e); } + return; } + LOGGER.debug("Analyzed plan ({} attempt, {} filter):\n{}", attemptMessage, filterPresentMessage, plan); + // the analysis succeeded from the first attempt, irrespective if it had a filter or not, just continue with the planning + logicalPlanListener.onResponse(plan); } static Set fieldNames(LogicalPlan parsed, Set enrichPolicyMatchFields) { @@ -591,4 +701,23 @@ public PhysicalPlan optimizedPhysicalPlan(LogicalPlan optimizedPlan) { LOGGER.debug("Optimized physical plan:\n{}", plan); return plan; } + + private record ListenerResult( + IndexResolution indices, + IndexResolution lookupIndices, + EnrichResolution enrichResolution, + Set fieldNames + ) { + ListenerResult withEnrichResolution(EnrichResolution newEnrichResolution) { + return new ListenerResult(indices(), lookupIndices(), newEnrichResolution, fieldNames()); + } + + ListenerResult withIndexResolution(IndexResolution newIndexResolution) { + return new ListenerResult(newIndexResolution, lookupIndices(), enrichResolution(), fieldNames()); + } + + ListenerResult withLookupIndexResolution(IndexResolution newIndexResolution) { + return new ListenerResult(indices(), newIndexResolution, enrichResolution(), fieldNames()); + } + }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java index f61be4b59830e..d000b2765e2b1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.util.Maps; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.mapper.TimeSeriesParams; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.esql.action.EsqlResolveFieldsAction; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -76,10 +77,15 @@ public IndexResolver(Client client) { /** * Resolves a pattern to one (potentially compound meaning that spawns multiple indices) mapping. */ - public void resolveAsMergedMapping(String indexWildcard, Set fieldNames, ActionListener listener) { + public void resolveAsMergedMapping( + String indexWildcard, + Set fieldNames, + QueryBuilder requestFilter, + ActionListener listener + ) { client.execute( EsqlResolveFieldsAction.TYPE, - createFieldCapsRequest(indexWildcard, fieldNames), + createFieldCapsRequest(indexWildcard, fieldNames, requestFilter), listener.delegateFailureAndWrap((l, response) -> l.onResponse(mergedMappings(indexWildcard, response))) ); } @@ -252,10 +258,11 @@ private EsField conflictingMetricTypes(String name, String fullName, FieldCapabi return new InvalidMappedField(name, "mapped as different metric types in indices: " + indices); } - private static FieldCapabilitiesRequest createFieldCapsRequest(String index, Set fieldNames) { + private static FieldCapabilitiesRequest createFieldCapsRequest(String index, Set fieldNames, QueryBuilder requestFilter) { FieldCapabilitiesRequest req = new FieldCapabilitiesRequest().indices(Strings.commaDelimitedListToStringArray(index)); req.fields(fieldNames.toArray(String[]::new)); req.includeUnmapped(true); + req.indexFilter(requestFilter); // lenient because we throw our own errors looking at the response e.g. if something was not resolved // also because this way security doesn't throw authorization exceptions but rather honors ignore_unavailable req.indicesOptions(FIELD_CAPS_INDICES_OPTIONS); From f2addbc69a2aa7fb512c1d5ca9a839f5fc7f5134 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com> Date: Tue, 3 Dec 2024 20:10:30 +0200 Subject: [PATCH 27/28] Parse the contents of dynamic objects for [subobjects:false] (#117762) * Parse the contents of dynamic objects for [subobjects:false] * Update docs/changelog/117762.yaml * add tests * tests * test dynamic field * test dynamic field * fix tests --- docs/changelog/117762.yaml | 6 + .../test/search/330_fetch_fields.yml | 118 ++++++++++++++++++ .../index/mapper/DocumentParser.java | 7 +- .../index/mapper/MapperFeatures.java | 1 + .../index/mapper/DocumentParserTests.java | 63 ++++++++++ 5 files changed, 194 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/117762.yaml diff --git a/docs/changelog/117762.yaml b/docs/changelog/117762.yaml new file mode 100644 index 0000000000000..123432e0f0507 --- /dev/null +++ b/docs/changelog/117762.yaml @@ -0,0 +1,6 @@ +pr: 117762 +summary: "Parse the contents of dynamic objects for [subobjects:false]" +area: Mapping +type: bug +issues: + - 117544 diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml index 8a8dffda69e20..44d966b76f34e 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml @@ -1177,3 +1177,121 @@ fetch geo_point: - is_false: hits.hits.0.fields.message - match: { hits.hits.0._source.message.foo: 10 } - match: { hits.hits.0._source.message.foo\.bar: 20 } + +--- +root with subobjects false and dynamic false: + - requires: + cluster_features: mapper.fix_parsing_subobjects_false_dynamic_false + reason: bug fix + + - do: + indices.create: + index: test + body: + mappings: + subobjects: false + dynamic: false + properties: + id: + type: integer + my.keyword.field: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "index": { } }' + - '{ "id": 1, "my": { "keyword.field": "abc" } }' + - match: { errors: false } + + # indexing a dynamically-mapped field still fails (silently) + - do: + bulk: + index: test + refresh: true + body: + - '{ "index": { } }' + - '{ "id": 2, "my": { "random.field": "abc" } }' + - match: { errors: false } + + - do: + search: + index: test + body: + sort: id + fields: [ "*" ] + + - match: { hits.hits.0.fields: { my.keyword.field: [ abc ], id: [ 1 ] } } + - match: { hits.hits.1.fields: { id: [ 2 ] } } + + - do: + search: + index: test + body: + query: + match: + my.keyword.field: abc + + - match: { hits.total.value: 1 } + +--- +object with subobjects false and dynamic false: + - requires: + cluster_features: mapper.fix_parsing_subobjects_false_dynamic_false + reason: bug fix + + - do: + indices.create: + index: test + body: + mappings: + properties: + my: + subobjects: false + dynamic: false + properties: + id: + type: integer + nested.keyword.field: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "index": { } }' + - '{ "id": 1, "my": { "nested": { "keyword.field": "abc" } } }' + - match: { errors: false } + + # indexing a dynamically-mapped field still fails (silently) + - do: + bulk: + index: test + refresh: true + body: + - '{ "index": { } }' + - '{ "id": 2, "my": { "nested": { "random.field": "abc" } } }' + - match: { errors: false } + + - do: + search: + index: test + body: + sort: id + fields: [ "*" ] + + - match: { hits.hits.0.fields: { my.nested.keyword.field: [ abc ], id: [ 1 ] } } + - match: { hits.hits.1.fields: { id: [ 2 ] } } + + - do: + search: + index: test + body: + query: + match: + my.nested.keyword.field: abc + + - match: { hits.total.value: 1 } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 82004356ceb57..e00e7b2320000 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -16,6 +16,7 @@ import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Nullable; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.fielddata.FieldDataContext; @@ -53,6 +54,9 @@ public final class DocumentParser { public static final IndexVersion DYNAMICALLY_MAP_DENSE_VECTORS_INDEX_VERSION = IndexVersions.FIRST_DETACHED_INDEX_VERSION; + static final NodeFeature FIX_PARSING_SUBOBJECTS_FALSE_DYNAMIC_FALSE = new NodeFeature( + "mapper.fix_parsing_subobjects_false_dynamic_false" + ); private final XContentParserConfiguration parserConfiguration; private final MappingParserContext mappingParserContext; @@ -531,7 +535,8 @@ private static void doParseObject(DocumentParserContext context, String currentF private static void parseObjectDynamic(DocumentParserContext context, String currentFieldName) throws IOException { ensureNotStrict(context, currentFieldName); - if (context.dynamic() == ObjectMapper.Dynamic.FALSE) { + // For [subobjects:false], intermediate objects get flattened so we can't skip parsing children. + if (context.dynamic() == ObjectMapper.Dynamic.FALSE && context.parent().subobjects() != ObjectMapper.Subobjects.DISABLED) { failIfMatchesRoutingPath(context, currentFieldName); if (context.canAddIgnoredField()) { context.addIgnoredField( diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index bf6c729f95653..ffb38d229078e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -73,6 +73,7 @@ public Set getTestFeatures() { IgnoredSourceFieldMapper.IGNORED_SOURCE_AS_TOP_LEVEL_METADATA_ARRAY_FIELD, IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS, MapperService.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT, + DocumentParser.FIX_PARSING_SUBOBJECTS_FALSE_DYNAMIC_FALSE, CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX, META_FETCH_FIELDS_ERROR_CODE_CHANGED ); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java index 09d57d0e34c3c..d128b25038a59 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java @@ -2053,6 +2053,38 @@ public void testSubobjectsFalseWithInnerDottedObject() throws Exception { assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots.max")); } + public void testSubobjectsFalseWithInnerDottedObjectDynamicFalse() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> { + b.startObject("metrics").field("type", "object").field("subobjects", false).field("dynamic", randomFrom("false", "runtime")); + b.startObject("properties").startObject("service.test.with.dots").field("type", "keyword").endObject().endObject(); + b.endObject(); + })); + + ParsedDocument doc = mapper.parse(source(""" + { "metrics": { "service": { "test.with.dots": "foo" } } }""")); + assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots")); + + doc = mapper.parse(source(""" + { "metrics": { "service.test": { "with.dots": "foo" } } }""")); + assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots")); + + doc = mapper.parse(source(""" + { "metrics": { "service": { "test": { "with.dots": "foo" } } } }""")); + assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots")); + + doc = mapper.parse(source(""" + { "metrics": { "service": { "test.other.dots": "foo" } } }""")); + assertNull(doc.rootDoc().getField("metrics.service.test.other.dots")); + + doc = mapper.parse(source(""" + { "metrics": { "service.test": { "other.dots": "foo" } } }""")); + assertNull(doc.rootDoc().getField("metrics.service.test.other.dots")); + + doc = mapper.parse(source(""" + { "metrics": { "service": { "test": { "other.dots": "foo" } } } }""")); + assertNull(doc.rootDoc().getField("metrics.service.test.other.dots")); + } + public void testSubobjectsFalseRoot() throws Exception { DocumentMapper mapper = createDocumentMapper(mappingNoSubobjects(xContentBuilder -> {})); ParsedDocument doc = mapper.parse(source(""" @@ -2074,6 +2106,37 @@ public void testSubobjectsFalseRoot() throws Exception { assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots")); } + public void testSubobjectsFalseRootWithInnerDottedObjectDynamicFalse() throws Exception { + DocumentMapper mapper = createDocumentMapper(topMapping(b -> { + b.field("subobjects", false).field("dynamic", randomFrom("false", "runtime")); + b.startObject("properties").startObject("service.test.with.dots").field("type", "keyword").endObject().endObject(); + })); + + ParsedDocument doc = mapper.parse(source(""" + { "service": { "test.with.dots": "foo" } }""")); + assertNotNull(doc.rootDoc().getField("service.test.with.dots")); + + doc = mapper.parse(source(""" + { "service.test": { "with.dots": "foo" } }""")); + assertNotNull(doc.rootDoc().getField("service.test.with.dots")); + + doc = mapper.parse(source(""" + { "service": { "test": { "with.dots": "foo" } } }""")); + assertNotNull(doc.rootDoc().getField("service.test.with.dots")); + + doc = mapper.parse(source(""" + { "service": { "test.other.dots": "foo" } }""")); + assertNull(doc.rootDoc().getField("service.test.other.dots")); + + doc = mapper.parse(source(""" + { "service.test": { "other.dots": "foo" } }""")); + assertNull(doc.rootDoc().getField("service.test.other.dots")); + + doc = mapper.parse(source(""" + { "service": { "test": { "other.dots": "foo" } } }""")); + assertNull(doc.rootDoc().getField("service.test.other.dots")); + } + public void testSubobjectsFalseStructuredPath() throws Exception { DocumentMapper mapper = createDocumentMapper( mapping(b -> b.startObject("metrics.service").field("type", "object").field("subobjects", false).endObject()) From f5ff9c6bbb84805647c03a280343a531610f2138 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Tue, 3 Dec 2024 20:20:56 +0100 Subject: [PATCH 28/28] [TEST] Adjust exception type in SearchServiceTests#testWaitOnRefreshTimeout (#117884) This test has been failing due to #114526, which changed the exception type to SearchTimeoutException. Closes #115935 --- muted-tests.yml | 3 --- .../java/org/elasticsearch/search/SearchServiceTests.java | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 7bd06a6605028..7e5e7f15700f3 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -106,9 +106,6 @@ tests: - class: org.elasticsearch.search.StressSearchServiceReaperIT method: testStressReaper issue: https://github.com/elastic/elasticsearch/issues/115816 -- class: org.elasticsearch.search.SearchServiceTests - method: testWaitOnRefreshTimeout - issue: https://github.com/elastic/elasticsearch/issues/115935 - class: org.elasticsearch.search.SearchServiceTests method: testParseSourceValidation issue: https://github.com/elastic/elasticsearch/issues/115936 diff --git a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java index 5dc07a41b3f8c..d1ccfcbe78732 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java @@ -20,7 +20,6 @@ import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.util.SetOnce; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.ElasticsearchTimeoutException; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionListener; @@ -111,6 +110,7 @@ import org.elasticsearch.search.query.NonCountingTermQuery; import org.elasticsearch.search.query.QuerySearchRequest; import org.elasticsearch.search.query.QuerySearchResult; +import org.elasticsearch.search.query.SearchTimeoutException; import org.elasticsearch.search.rank.RankBuilder; import org.elasticsearch.search.rank.RankDoc; import org.elasticsearch.search.rank.RankShardResult; @@ -2616,7 +2616,7 @@ public void testWaitOnRefreshTimeout() { ); service.executeQueryPhase(request, task, future); - ElasticsearchTimeoutException ex = expectThrows(ElasticsearchTimeoutException.class, future::actionGet); + SearchTimeoutException ex = expectThrows(SearchTimeoutException.class, future::actionGet); assertThat(ex.getMessage(), containsString("Wait for seq_no [0] refreshed timed out [")); }