From 6c2f6071b20633fafc383212331f79146613011b Mon Sep 17 00:00:00 2001
From: Benjamin Trent <ben.w.trent@gmail.com>
Date: Mon, 2 Dec 2024 16:04:31 -0500
Subject: [PATCH 01/28] Refactor/bbq format (#117847)

* Refactor bbq format to be contained in a package

* fixing license headers

* fixing module

* fix style
---
 server/src/main/java/module-info.java              |  4 ++--
 .../{ => es816}/BinarizedByteVectorValues.java     |  3 ++-
 .../codec/vectors/{ => es816}/BinaryQuantizer.java |  4 +++-
 .../{ => es816}/ES816BinaryFlatVectorsScorer.java  | 14 ++++++++------
 .../ES816BinaryQuantizedVectorsFormat.java         |  2 +-
 .../ES816BinaryQuantizedVectorsReader.java         |  7 ++++---
 .../ES816BinaryQuantizedVectorsWriter.java         | 10 ++++++----
 .../ES816HnswBinaryQuantizedVectorsFormat.java     |  2 +-
 .../{ => es816}/OffHeapBinarizedVectorValues.java  |  9 +++++----
 .../mapper/vectors/DenseVectorFieldMapper.java     |  4 ++--
 .../org.apache.lucene.codecs.KnnVectorsFormat      |  4 ++--
 .../{ => es816}/BinaryQuantizationTests.java       |  4 +++-
 .../ES816BinaryFlatVectorsScorerTests.java         |  4 +++-
 .../ES816BinaryQuantizedVectorsFormatTests.java    |  3 ++-
 ...ES816HnswBinaryQuantizedVectorsFormatTests.java |  2 +-
 15 files changed, 45 insertions(+), 31 deletions(-)
 rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/BinarizedByteVectorValues.java (96%)
 rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/BinaryQuantizer.java (98%)
 rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryFlatVectorsScorer.java (95%)
 rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsFormat.java (98%)
 rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsReader.java (98%)
 rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsWriter.java (98%)
 rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816HnswBinaryQuantizedVectorsFormat.java (99%)
 rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/OffHeapBinarizedVectorValues.java (97%)
 rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/BinaryQuantizationTests.java (99%)
 rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryFlatVectorsScorerTests.java (99%)
 rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsFormatTests.java (98%)
 rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816HnswBinaryQuantizedVectorsFormatTests.java (99%)

diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java
index d572d3b90fec8..5acc202ebb294 100644
--- a/server/src/main/java/module-info.java
+++ b/server/src/main/java/module-info.java
@@ -457,8 +457,8 @@
             org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat,
             org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat,
             org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat,
-            org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat,
-            org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat;
+            org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat,
+            org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat;
 
     provides org.apache.lucene.codecs.Codec
         with
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinarizedByteVectorValues.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinarizedByteVectorValues.java
similarity index 96%
rename from server/src/main/java/org/elasticsearch/index/codec/vectors/BinarizedByteVectorValues.java
rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinarizedByteVectorValues.java
index cf69ab0862949..d5f968af3e738 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinarizedByteVectorValues.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinarizedByteVectorValues.java
@@ -17,11 +17,12 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.index.ByteVectorValues;
 import org.apache.lucene.search.VectorScorer;
 import org.apache.lucene.util.VectorUtil;
+import org.elasticsearch.index.codec.vectors.BQVectorUtils;
 
 import java.io.IOException;
 
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinaryQuantizer.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizer.java
similarity index 98%
rename from server/src/main/java/org/elasticsearch/index/codec/vectors/BinaryQuantizer.java
rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizer.java
index aa72904fe1341..768c6d526e468 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinaryQuantizer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizer.java
@@ -17,11 +17,13 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.index.VectorSimilarityFunction;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.VectorUtil;
+import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
+import org.elasticsearch.index.codec.vectors.BQVectorUtils;
 
 import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
 import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorer.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java
similarity index 95%
rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorer.java
rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java
index 72c5da4880e75..445bdadab2354 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorer.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java
@@ -17,7 +17,7 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
 import org.apache.lucene.index.KnnVectorValues;
@@ -26,6 +26,8 @@
 import org.apache.lucene.util.VectorUtil;
 import org.apache.lucene.util.hnsw.RandomVectorScorer;
 import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
+import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
+import org.elasticsearch.index.codec.vectors.BQVectorUtils;
 import org.elasticsearch.simdvec.ESVectorUtil;
 
 import java.io.IOException;
@@ -35,10 +37,10 @@
 import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT;
 
 /** Vector scorer over binarized vector values */
-public class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer {
+class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer {
     private final FlatVectorsScorer nonQuantizedDelegate;
 
-    public ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) {
+    ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) {
         this.nonQuantizedDelegate = nonQuantizedDelegate;
     }
 
@@ -144,10 +146,10 @@ public RandomVectorScorerSupplier copy() throws IOException {
     }
 
     /** A binarized query representing its quantized form along with factors */
-    public record BinaryQueryVector(byte[] vector, BinaryQuantizer.QueryFactors factors) {}
+    record BinaryQueryVector(byte[] vector, BinaryQuantizer.QueryFactors factors) {}
 
     /** Vector scorer over binarized vector values */
-    public static class BinarizedRandomVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer {
+    static class BinarizedRandomVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer {
         private final BinaryQueryVector queryVector;
         private final BinarizedByteVectorValues targetVectors;
         private final VectorSimilarityFunction similarityFunction;
@@ -155,7 +157,7 @@ public static class BinarizedRandomVectorScorer extends RandomVectorScorer.Abstr
         private final float sqrtDimensions;
         private final float maxX1;
 
-        public BinarizedRandomVectorScorer(
+        BinarizedRandomVectorScorer(
             BinaryQueryVector queryVectors,
             BinarizedByteVectorValues targetVectors,
             VectorSimilarityFunction similarityFunction
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java
similarity index 98%
rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormat.java
rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java
index e32aea0fb04ae..d864ec5dee8c5 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java
@@ -17,7 +17,7 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
 import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java
similarity index 98%
rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsReader.java
rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java
index 21c4a5c449387..fc20809ea7eed 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsReader.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java
@@ -17,7 +17,7 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
@@ -43,6 +43,7 @@
 import org.apache.lucene.util.SuppressForbidden;
 import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector;
 import org.apache.lucene.util.hnsw.RandomVectorScorer;
+import org.elasticsearch.index.codec.vectors.BQVectorUtils;
 
 import java.io.IOException;
 import java.util.HashMap;
@@ -55,7 +56,7 @@
  * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10
  */
 @SuppressForbidden(reason = "Lucene classes")
-public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader {
+class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader {
 
     private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES816BinaryQuantizedVectorsReader.class);
 
@@ -64,7 +65,7 @@ public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader {
     private final FlatVectorsReader rawVectorsReader;
     private final ES816BinaryFlatVectorsScorer vectorScorer;
 
-    public ES816BinaryQuantizedVectorsReader(
+    ES816BinaryQuantizedVectorsReader(
         SegmentReadState state,
         FlatVectorsReader rawVectorsReader,
         ES816BinaryFlatVectorsScorer vectorsScorer
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsWriter.java
similarity index 98%
rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsWriter.java
rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsWriter.java
index a7774b850b64c..31ae977e81118 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsWriter.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsWriter.java
@@ -17,7 +17,7 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.KnnVectorsReader;
@@ -48,6 +48,8 @@
 import org.apache.lucene.util.hnsw.RandomVectorScorer;
 import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
 import org.elasticsearch.core.SuppressForbidden;
+import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
+import org.elasticsearch.index.codec.vectors.BQVectorUtils;
 
 import java.io.Closeable;
 import java.io.IOException;
@@ -61,14 +63,14 @@
 import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
 import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 import static org.apache.lucene.util.RamUsageEstimator.shallowSizeOfInstance;
-import static org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat.BINARIZED_VECTOR_COMPONENT;
-import static org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
+import static org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat.BINARIZED_VECTOR_COMPONENT;
+import static org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
 
 /**
  * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10
  */
 @SuppressForbidden(reason = "Lucene classes")
-public class ES816BinaryQuantizedVectorsWriter extends FlatVectorsWriter {
+class ES816BinaryQuantizedVectorsWriter extends FlatVectorsWriter {
     private static final long SHALLOW_RAM_BYTES_USED = shallowSizeOfInstance(ES816BinaryQuantizedVectorsWriter.class);
 
     private final SegmentWriteState segmentWriteState;
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java
similarity index 99%
rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormat.java
rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java
index 097cdffff6ae4..52f9f14b7bf97 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java
@@ -17,7 +17,7 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.KnnVectorsReader;
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/OffHeapBinarizedVectorValues.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/OffHeapBinarizedVectorValues.java
similarity index 97%
rename from server/src/main/java/org/elasticsearch/index/codec/vectors/OffHeapBinarizedVectorValues.java
rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/OffHeapBinarizedVectorValues.java
index e7d818bb752d6..12bf962d314bd 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/OffHeapBinarizedVectorValues.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/OffHeapBinarizedVectorValues.java
@@ -17,7 +17,7 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
 import org.apache.lucene.codecs.lucene90.IndexedDISI;
@@ -29,6 +29,7 @@
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.hnsw.RandomVectorScorer;
 import org.apache.lucene.util.packed.DirectMonotonicReader;
+import org.elasticsearch.index.codec.vectors.BQVectorUtils;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
@@ -37,7 +38,7 @@
 import static org.elasticsearch.index.codec.vectors.BQVectorUtils.constSqrt;
 
 /** Binarized vector values loaded from off-heap */
-public abstract class OffHeapBinarizedVectorValues extends BinarizedByteVectorValues {
+abstract class OffHeapBinarizedVectorValues extends BinarizedByteVectorValues {
 
     protected final int dimension;
     protected final int size;
@@ -251,8 +252,8 @@ public static OffHeapBinarizedVectorValues load(
     }
 
     /** Dense off-heap binarized vector values */
-    public static class DenseOffHeapVectorValues extends OffHeapBinarizedVectorValues {
-        public DenseOffHeapVectorValues(
+    static class DenseOffHeapVectorValues extends OffHeapBinarizedVectorValues {
+        DenseOffHeapVectorValues(
             int dimension,
             int size,
             float[] centroid,
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java
index dea9368a9377e..0a6a24f727572 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java
@@ -46,8 +46,8 @@
 import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat;
 import org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat;
 import org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat;
-import org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat;
-import org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat;
+import org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat;
+import org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat;
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.mapper.ArraySourceValueFetcher;
diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
index c2201f5b1c319..389555e60b43b 100644
--- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
+++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
@@ -3,5 +3,5 @@ org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat
 org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat
 org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat
 org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat
-org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat
-org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat
+org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat
+org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat
diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/BinaryQuantizationTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizationTests.java
similarity index 99%
rename from server/src/test/java/org/elasticsearch/index/codec/vectors/BinaryQuantizationTests.java
rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizationTests.java
index 32d717bd76f91..205cbb4119dd6 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/vectors/BinaryQuantizationTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizationTests.java
@@ -17,11 +17,13 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.index.VectorSimilarityFunction;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.lucene.util.VectorUtil;
+import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
+import org.elasticsearch.index.codec.vectors.BQVectorUtils;
 
 import java.util.Random;
 
diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorerTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorerTests.java
similarity index 99%
rename from server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorerTests.java
rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorerTests.java
index cef5e5358f3d5..a75b9bc6064d1 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorerTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorerTests.java
@@ -17,13 +17,15 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.index.VectorSimilarityFunction;
 import org.apache.lucene.search.VectorScorer;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.lucene.util.VectorUtil;
 import org.elasticsearch.common.logging.LogConfigurator;
+import org.elasticsearch.index.codec.vectors.BQSpaceUtils;
+import org.elasticsearch.index.codec.vectors.BQVectorUtils;
 
 import java.io.IOException;
 
diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java
similarity index 98%
rename from server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormatTests.java
rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java
index 42f2fbb383ac9..681f615653d40 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java
@@ -17,7 +17,7 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.FilterCodec;
@@ -41,6 +41,7 @@
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
 import org.elasticsearch.common.logging.LogConfigurator;
+import org.elasticsearch.index.codec.vectors.BQVectorUtils;
 
 import java.io.IOException;
 import java.util.Locale;
diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java
similarity index 99%
rename from server/src/test/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormatTests.java
rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java
index ca96e093b7b28..a25fa2836ee34 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormatTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java
@@ -17,7 +17,7 @@
  *
  * Modifications copyright (C) 2024 Elasticsearch B.V.
  */
-package org.elasticsearch.index.codec.vectors;
+package org.elasticsearch.index.codec.vectors.es816;
 
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.FilterCodec;

From 12be8203d3efd1ed62a838aaa1b379c592a7aaec Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
 <58790826+elasticsearchmachine@users.noreply.github.com>
Date: Tue, 3 Dec 2024 09:31:51 +1100
Subject: [PATCH 02/28] Mute org.elasticsearch.xpack.test.rest.XPackRestIT test
 {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version}
 #117862

---
 muted-tests.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/muted-tests.yml b/muted-tests.yml
index 73d9a29e275b3..57db22feba059 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -236,6 +236,9 @@ tests:
   issue: https://github.com/elastic/elasticsearch/issues/117815
 - class: org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT
   issue: https://github.com/elastic/elasticsearch/issues/111319
+- class: org.elasticsearch.xpack.test.rest.XPackRestIT
+  method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version}
+  issue: https://github.com/elastic/elasticsearch/issues/117862
 
 # Examples:
 #

From af7d3f911fbacaa1f4b1be68398cc59cbfdc89e2 Mon Sep 17 00:00:00 2001
From: Nhat Nguyen <nhat.nguyen@elastic.co>
Date: Mon, 2 Dec 2024 17:57:02 -0800
Subject: [PATCH 03/28] Add cluster level reduction (#117731)

This change introduces cluster-level reduction. Unlike data-node-level
reduction, it does not require pragmas because the network latency and
throughput across clusters differ significantly from those within a
cluster. As a result, the benefits of this reduction should outweigh the
risks.
---
 docs/changelog/117731.yaml                    |  5 ++
 .../action/CrossClustersCancellationIT.java   | 37 ++++++++++++
 .../xpack/esql/planner/PlannerUtils.java      | 47 ++++++----------
 .../xpack/esql/plugin/ComputeService.java     | 56 +++++++------------
 4 files changed, 78 insertions(+), 67 deletions(-)
 create mode 100644 docs/changelog/117731.yaml

diff --git a/docs/changelog/117731.yaml b/docs/changelog/117731.yaml
new file mode 100644
index 0000000000000..f69cd5bf31100
--- /dev/null
+++ b/docs/changelog/117731.yaml
@@ -0,0 +1,5 @@
+pr: 117731
+summary: Add cluster level reduction
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java
index 5ffc92636b272..f29f79976dc0d 100644
--- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java
+++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java
@@ -238,4 +238,41 @@ public void testSameRemoteClusters() throws Exception {
             }
         }
     }
+
+    public void testTasks() throws Exception {
+        createRemoteIndex(between(10, 100));
+        EsqlQueryRequest request = EsqlQueryRequest.syncEsqlQueryRequest();
+        request.query("FROM *:test | STATS total=sum(const) | LIMIT 1");
+        request.pragmas(randomPragmas());
+        ActionFuture<EsqlQueryResponse> requestFuture = client().execute(EsqlQueryAction.INSTANCE, request);
+        assertTrue(PauseFieldPlugin.startEmitting.await(30, TimeUnit.SECONDS));
+        try {
+            assertBusy(() -> {
+                List<TaskInfo> clusterTasks = client(REMOTE_CLUSTER).admin()
+                    .cluster()
+                    .prepareListTasks()
+                    .setActions(ComputeService.CLUSTER_ACTION_NAME)
+                    .get()
+                    .getTasks();
+                assertThat(clusterTasks.size(), equalTo(1));
+                List<TaskInfo> drivers = client(REMOTE_CLUSTER).admin()
+                    .cluster()
+                    .prepareListTasks()
+                    .setTargetParentTaskId(clusterTasks.getFirst().taskId())
+                    .setActions(DriverTaskRunner.ACTION_NAME)
+                    .setDetailed(true)
+                    .get()
+                    .getTasks();
+                assertThat(drivers.size(), equalTo(1));
+                TaskInfo driver = drivers.getFirst();
+                assertThat(driver.description(), equalTo("""
+                    \\_ExchangeSourceOperator[]
+                    \\_AggregationOperator[mode = INTERMEDIATE, aggs = sum of longs]
+                    \\_ExchangeSinkOperator"""));
+            });
+        } finally {
+            PauseFieldPlugin.allowEmitting.countDown();
+        }
+        requestFuture.actionGet(30, TimeUnit.SECONDS).close();
+    }
 }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java
index c998af2215169..f4ada1442efe5 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java
@@ -29,14 +29,8 @@
 import org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizer;
 import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext;
 import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalPlanOptimizer;
-import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
 import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
 import org.elasticsearch.xpack.esql.plan.logical.Filter;
-import org.elasticsearch.xpack.esql.plan.logical.Limit;
-import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
-import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
-import org.elasticsearch.xpack.esql.plan.logical.TopN;
-import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
 import org.elasticsearch.xpack.esql.plan.physical.AggregateExec;
 import org.elasticsearch.xpack.esql.plan.physical.EsSourceExec;
 import org.elasticsearch.xpack.esql.plan.physical.EstimatesRowSize;
@@ -44,10 +38,7 @@
 import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec;
 import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec;
 import org.elasticsearch.xpack.esql.plan.physical.FragmentExec;
-import org.elasticsearch.xpack.esql.plan.physical.LimitExec;
-import org.elasticsearch.xpack.esql.plan.physical.OrderExec;
 import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
-import org.elasticsearch.xpack.esql.plan.physical.TopNExec;
 import org.elasticsearch.xpack.esql.planner.mapper.LocalMapper;
 import org.elasticsearch.xpack.esql.planner.mapper.Mapper;
 import org.elasticsearch.xpack.esql.session.Configuration;
@@ -83,29 +74,25 @@ public static Tuple<PhysicalPlan, PhysicalPlan> breakPlanBetweenCoordinatorAndDa
         return new Tuple<>(coordinatorPlan, dataNodePlan.get());
     }
 
-    public static PhysicalPlan dataNodeReductionPlan(LogicalPlan plan, PhysicalPlan unused) {
-        var pipelineBreakers = plan.collectFirstChildren(Mapper::isPipelineBreaker);
+    public static PhysicalPlan reductionPlan(PhysicalPlan plan) {
+        // find the logical fragment
+        var fragments = plan.collectFirstChildren(p -> p instanceof FragmentExec);
+        if (fragments.isEmpty()) {
+            return null;
+        }
+        final FragmentExec fragment = (FragmentExec) fragments.getFirst();
 
-        if (pipelineBreakers.isEmpty() == false) {
-            UnaryPlan pipelineBreaker = (UnaryPlan) pipelineBreakers.get(0);
-            if (pipelineBreaker instanceof TopN) {
-                LocalMapper mapper = new LocalMapper();
-                var physicalPlan = EstimatesRowSize.estimateRowSize(0, mapper.map(plan));
-                return physicalPlan.collectFirstChildren(TopNExec.class::isInstance).get(0);
-            } else if (pipelineBreaker instanceof Limit limit) {
-                return new LimitExec(limit.source(), unused, limit.limit());
-            } else if (pipelineBreaker instanceof OrderBy order) {
-                return new OrderExec(order.source(), unused, order.order());
-            } else if (pipelineBreaker instanceof Aggregate) {
-                LocalMapper mapper = new LocalMapper();
-                var physicalPlan = EstimatesRowSize.estimateRowSize(0, mapper.map(plan));
-                var aggregate = (AggregateExec) physicalPlan.collectFirstChildren(AggregateExec.class::isInstance).get(0);
-                return aggregate.withMode(AggregatorMode.INITIAL);
-            } else {
-                throw new EsqlIllegalArgumentException("unsupported unary physical plan node [" + pipelineBreaker.nodeName() + "]");
-            }
+        final var pipelineBreakers = fragment.fragment().collectFirstChildren(Mapper::isPipelineBreaker);
+        if (pipelineBreakers.isEmpty()) {
+            return null;
+        }
+        final var pipelineBreaker = pipelineBreakers.getFirst();
+        final LocalMapper mapper = new LocalMapper();
+        PhysicalPlan reducePlan = mapper.map(pipelineBreaker);
+        if (reducePlan instanceof AggregateExec agg) {
+            reducePlan = agg.withMode(AggregatorMode.INITIAL); // force to emit intermediate outputs
         }
-        return null;
+        return EstimatesRowSize.estimateRowSize(fragment.estimatedRowSize(), reducePlan);
     }
 
     /**
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java
index b06dd3cdb64d3..9aea1577a4137 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java
@@ -60,12 +60,10 @@
 import org.elasticsearch.xpack.esql.action.EsqlQueryAction;
 import org.elasticsearch.xpack.esql.action.EsqlSearchShardsAction;
 import org.elasticsearch.xpack.esql.core.expression.Attribute;
-import org.elasticsearch.xpack.esql.core.util.Holder;
 import org.elasticsearch.xpack.esql.enrich.EnrichLookupService;
 import org.elasticsearch.xpack.esql.enrich.LookupFromIndexService;
 import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec;
 import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec;
-import org.elasticsearch.xpack.esql.plan.physical.FragmentExec;
 import org.elasticsearch.xpack.esql.plan.physical.OutputExec;
 import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
 import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders;
@@ -780,35 +778,24 @@ private void runComputeOnDataNode(
         }
     }
 
+    private static PhysicalPlan reductionPlan(ExchangeSinkExec plan, boolean enable) {
+        PhysicalPlan reducePlan = new ExchangeSourceExec(plan.source(), plan.output(), plan.isIntermediateAgg());
+        if (enable) {
+            PhysicalPlan p = PlannerUtils.reductionPlan(plan);
+            if (p != null) {
+                reducePlan = p.replaceChildren(List.of(reducePlan));
+            }
+        }
+        return new ExchangeSinkExec(plan.source(), plan.output(), plan.isIntermediateAgg(), reducePlan);
+    }
+
     private class DataNodeRequestHandler implements TransportRequestHandler<DataNodeRequest> {
         @Override
         public void messageReceived(DataNodeRequest request, TransportChannel channel, Task task) {
             final ActionListener<ComputeResponse> listener = new ChannelActionListener<>(channel);
-            final ExchangeSinkExec reducePlan;
+            final PhysicalPlan reductionPlan;
             if (request.plan() instanceof ExchangeSinkExec plan) {
-                var fragments = plan.collectFirstChildren(FragmentExec.class::isInstance);
-                if (fragments.isEmpty()) {
-                    listener.onFailure(new IllegalStateException("expected a fragment plan for a remote compute; got " + request.plan()));
-                    return;
-                }
-                var localExchangeSource = new ExchangeSourceExec(plan.source(), plan.output(), plan.isIntermediateAgg());
-                Holder<PhysicalPlan> reducePlanHolder = new Holder<>();
-                if (request.pragmas().nodeLevelReduction()) {
-                    PhysicalPlan dataNodePlan = request.plan();
-                    request.plan()
-                        .forEachUp(
-                            FragmentExec.class,
-                            f -> { reducePlanHolder.set(PlannerUtils.dataNodeReductionPlan(f.fragment(), dataNodePlan)); }
-                        );
-                }
-                reducePlan = new ExchangeSinkExec(
-                    plan.source(),
-                    plan.output(),
-                    plan.isIntermediateAgg(),
-                    reducePlanHolder.get() != null
-                        ? reducePlanHolder.get().replaceChildren(List.of(localExchangeSource))
-                        : localExchangeSource
-                );
+                reductionPlan = reductionPlan(plan, request.pragmas().nodeLevelReduction());
             } else {
                 listener.onFailure(new IllegalStateException("expected exchange sink for a remote compute; got " + request.plan()));
                 return;
@@ -825,7 +812,7 @@ public void messageReceived(DataNodeRequest request, TransportChannel channel, T
                 request.indicesOptions()
             );
             try (var computeListener = ComputeListener.create(transportService, (CancellableTask) task, listener)) {
-                runComputeOnDataNode((CancellableTask) task, sessionId, reducePlan, request, computeListener);
+                runComputeOnDataNode((CancellableTask) task, sessionId, reductionPlan, request, computeListener);
             }
         }
     }
@@ -871,10 +858,10 @@ public void messageReceived(ClusterComputeRequest request, TransportChannel chan
      * Performs a compute on a remote cluster. The output pages are placed in an exchange sink specified by
      * {@code globalSessionId}. The coordinator on the main cluster will poll pages from there.
      * <p>
-     * Currently, the coordinator on the remote cluster simply collects pages from data nodes in the remote cluster
-     * and places them in the exchange sink. We can achieve this by using a single exchange buffer to minimize overhead.
-     * However, here we use two exchange buffers so that we can run an actual plan on this coordinator to perform partial
-     * reduce operations, such as limit, topN, and partial-to-partial aggregation in the future.
+     * Currently, the coordinator on the remote cluster polls pages from data nodes within the remote cluster
+     * and performs cluster-level reduction before sending pages to the querying cluster. This reduction aims
+     * to minimize data transfers across clusters but may require additional CPU resources for operations like
+     * aggregations.
      */
     void runComputeOnRemoteCluster(
         String clusterAlias,
@@ -892,6 +879,7 @@ void runComputeOnRemoteCluster(
             () -> exchangeService.finishSinkHandler(globalSessionId, new TaskCancelledException(parentTask.getReasonCancelled()))
         );
         final String localSessionId = clusterAlias + ":" + globalSessionId;
+        final PhysicalPlan coordinatorPlan = reductionPlan(plan, true);
         var exchangeSource = new ExchangeSourceHandler(
             configuration.pragmas().exchangeBufferSize(),
             transportService.getThreadPool().executor(ThreadPool.Names.SEARCH),
@@ -899,12 +887,6 @@ void runComputeOnRemoteCluster(
         );
         try (Releasable ignored = exchangeSource.addEmptySink()) {
             exchangeSink.addCompletionListener(computeListener.acquireAvoid());
-            PhysicalPlan coordinatorPlan = new ExchangeSinkExec(
-                plan.source(),
-                plan.output(),
-                plan.isIntermediateAgg(),
-                new ExchangeSourceExec(plan.source(), plan.output(), plan.isIntermediateAgg())
-            );
             runCompute(
                 parentTask,
                 new ComputeContext(localSessionId, clusterAlias, List.of(), configuration, exchangeSource, exchangeSink),

From 4a9f632fab7571e198f5030dd30acc80c436c58b Mon Sep 17 00:00:00 2001
From: Nhat Nguyen <nhat.nguyen@elastic.co>
Date: Mon, 2 Dec 2024 19:53:08 -0800
Subject: [PATCH 04/28] By pass cancellation when closing sinks (#117797)

> **java.lang.AssertionError: Leftover exchanges ExchangeService{sinks=[veZSyrPATq2Sg83dtgK3Jg:700/3]} on node node_s4**

I looked into the test failure described in
https://github.com/elastic/elasticsearch/issues/117253. The reason we
don't clean up the exchange sink quickly is that, once a failure occurs,
we cancel the request along with all its child requests. These exchange
sinks will be cleaned up only after they become inactive, which by
default takes 5 minutes.

We could override the `esql.exchange.sink_inactive_interval` setting in
the test to remove these exchange sinks faster. However, I think we
should allow exchange requests that close exchange sinks to bypass
cancellation, enabling quicker resource cleanup than the default
inactive interval.

Closes #117253
---
 .../operator/exchange/ExchangeRequest.java    | 17 ++++++--
 .../operator/exchange/ExchangeService.java    | 43 ++++++++++---------
 .../exchange/ExchangeSourceHandler.java       |  8 ++--
 .../compute/operator/exchange/RemoteSink.java | 10 +++++
 .../exchange/ExchangeRequestTests.java        | 27 ++++++++++++
 .../exchange/ExchangeServiceTests.java        |  6 ++-
 6 files changed, 82 insertions(+), 29 deletions(-)
 create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java

diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java
index 6ed2cc7e587be..1e8700bcd4030 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java
@@ -40,6 +40,17 @@ public void writeTo(StreamOutput out) throws IOException {
         out.writeBoolean(sourcesFinished);
     }
 
+    @Override
+    public TaskId getParentTask() {
+        // Exchange requests with `sourcesFinished=true` complete the remote sink and return without blocking.
+        // Masking the parent task allows these requests to bypass task cancellation, ensuring cleanup of the remote sink.
+        // TODO: Maybe add a separate action/request for closing exchange sinks?
+        if (sourcesFinished) {
+            return TaskId.EMPTY_TASK_ID;
+        }
+        return super.getParentTask();
+    }
+
     /**
      * True if the {@link ExchangeSourceHandler} has enough input.
      * The corresponding {@link ExchangeSinkHandler} can drain pages and finish itself.
@@ -70,9 +81,9 @@ public int hashCode() {
 
     @Override
     public Task createTask(long id, String type, String action, TaskId parentTaskId, Map<String, String> headers) {
-        if (parentTaskId.isSet() == false) {
-            assert false : "ExchangeRequest must have a parent task";
-            throw new IllegalStateException("ExchangeRequest must have a parent task");
+        if (sourcesFinished == false && parentTaskId.isSet() == false) {
+            assert false : "ExchangeRequest with sourcesFinished=false must have a parent task";
+            throw new IllegalStateException("ExchangeRequest with sourcesFinished=false must have a parent task");
         }
         return new CancellableTask(id, type, action, "", parentTaskId, headers) {
             @Override
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java
index a943a90d02e87..00c68c4f48e86 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java
@@ -314,28 +314,20 @@ static final class TransportRemoteSink implements RemoteSink {
         @Override
         public void fetchPageAsync(boolean allSourcesFinished, ActionListener<ExchangeResponse> listener) {
             if (allSourcesFinished) {
-                if (finished.compareAndSet(false, true)) {
-                    doFetchPageAsync(true, listener);
-                } else {
-                    // already finished or promised
-                    listener.onResponse(new ExchangeResponse(blockFactory, null, true));
-                }
-            } else {
-                // already finished
-                if (finished.get()) {
-                    listener.onResponse(new ExchangeResponse(blockFactory, null, true));
-                    return;
-                }
-                doFetchPageAsync(false, ActionListener.wrap(r -> {
-                    if (r.finished()) {
-                        finished.set(true);
-                    }
-                    listener.onResponse(r);
-                }, e -> {
-                    finished.set(true);
-                    listener.onFailure(e);
-                }));
+                close(listener.map(unused -> new ExchangeResponse(blockFactory, null, true)));
+                return;
+            }
+            // already finished
+            if (finished.get()) {
+                listener.onResponse(new ExchangeResponse(blockFactory, null, true));
+                return;
             }
+            doFetchPageAsync(false, ActionListener.wrap(r -> {
+                if (r.finished()) {
+                    finished.set(true);
+                }
+                listener.onResponse(r);
+            }, e -> close(ActionListener.running(() -> listener.onFailure(e)))));
         }
 
         private void doFetchPageAsync(boolean allSourcesFinished, ActionListener<ExchangeResponse> listener) {
@@ -361,6 +353,15 @@ private void doFetchPageAsync(boolean allSourcesFinished, ActionListener<Exchang
                 }, responseExecutor)
             );
         }
+
+        @Override
+        public void close(ActionListener<Void> listener) {
+            if (finished.compareAndSet(false, true)) {
+                doFetchPageAsync(true, listener.delegateFailure((l, unused) -> l.onResponse(null)));
+            } else {
+                listener.onResponse(null);
+            }
+        }
     }
 
     // For testing
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java
index 61b3386ce0274..375016a5d51d5 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java
@@ -224,8 +224,10 @@ void onSinkFailed(Exception e) {
             buffer.waitForReading().listener().onResponse(null); // resume the Driver if it is being blocked on reading
             if (finished == false) {
                 finished = true;
-                outstandingSinks.finishInstance();
-                completionListener.onFailure(e);
+                remoteSink.close(ActionListener.running(() -> {
+                    outstandingSinks.finishInstance();
+                    completionListener.onFailure(e);
+                }));
             }
         }
 
@@ -262,7 +264,7 @@ public void onFailure(Exception e) {
                     failure.unwrapAndCollect(e);
                 }
                 buffer.waitForReading().listener().onResponse(null); // resume the Driver if it is being blocked on reading
-                sinkListener.onFailure(e);
+                remoteSink.close(ActionListener.running(() -> sinkListener.onFailure(e)));
             }
 
             @Override
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java
index 7d81cd3f66600..aaa937ef17c0e 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java
@@ -12,4 +12,14 @@
 public interface RemoteSink {
 
     void fetchPageAsync(boolean allSourcesFinished, ActionListener<ExchangeResponse> listener);
+
+    default void close(ActionListener<Void> listener) {
+        fetchPageAsync(true, listener.delegateFailure((l, r) -> {
+            try {
+                r.close();
+            } finally {
+                l.onResponse(null);
+            }
+        }));
+    }
 }
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java
new file mode 100644
index 0000000000000..8a0891651a497
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.operator.exchange;
+
+import org.elasticsearch.tasks.TaskId;
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class ExchangeRequestTests extends ESTestCase {
+
+    public void testParentTask() {
+        ExchangeRequest r1 = new ExchangeRequest("1", true);
+        r1.setParentTask(new TaskId("node-1", 1));
+        assertSame(TaskId.EMPTY_TASK_ID, r1.getParentTask());
+
+        ExchangeRequest r2 = new ExchangeRequest("1", false);
+        r2.setParentTask(new TaskId("node-2", 2));
+        assertTrue(r2.getParentTask().isSet());
+        assertThat(r2.getParentTask(), equalTo((new TaskId("node-2", 2))));
+    }
+}
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java
index 4178f02898d79..fc6c850ba187b 100644
--- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java
@@ -491,7 +491,7 @@ public void testConcurrentWithTransportActions() {
         }
     }
 
-    public void testFailToRespondPage() {
+    public void testFailToRespondPage() throws Exception {
         Settings settings = Settings.builder().build();
         MockTransportService node0 = newTransportService();
         ExchangeService exchange0 = new ExchangeService(settings, threadPool, ESQL_TEST_EXECUTOR, blockFactory());
@@ -558,7 +558,9 @@ public void sendResponse(TransportResponse transportResponse) {
             Throwable cause = ExceptionsHelper.unwrap(err, IOException.class);
             assertNotNull(cause);
             assertThat(cause.getMessage(), equalTo("page is too large"));
-            sinkHandler.onFailure(new RuntimeException(cause));
+            PlainActionFuture<Void> sinkCompletionFuture = new PlainActionFuture<>();
+            sinkHandler.addCompletionListener(sinkCompletionFuture);
+            assertBusy(() -> assertTrue(sinkCompletionFuture.isDone()));
             expectThrows(Exception.class, () -> sourceCompletionFuture.actionGet(10, TimeUnit.SECONDS));
         }
     }

From af9a57ec66770530cf45aefd842e86a810b13947 Mon Sep 17 00:00:00 2001
From: Ignacio Vera <ignacio.vera@elastic.co>
Date: Tue, 3 Dec 2024 07:18:44 +0100
Subject: [PATCH 05/28] Remove supersetSize and subsetSize from
 InternalSignificantTerms.Bucket (#117574)

Those fields are only used to update the score and not serialized in the bucket so they can be removed.
---
 .../SignificantTermsSignificanceScoreIT.java  |  2 +-
 .../GlobalOrdinalsStringTermsAggregator.java  |  9 +--
 .../terms/InternalMappedSignificantTerms.java |  6 +-
 .../terms/InternalSignificantTerms.java       | 50 ++------------
 .../terms/MapStringTermsAggregator.java       | 64 +++++++++--------
 .../bucket/terms/NumericTermsAggregator.java  | 69 ++++++++++---------
 .../bucket/terms/SignificantLongTerms.java    | 39 ++---------
 .../bucket/terms/SignificantStringTerms.java  | 30 ++------
 .../bucket/terms/SignificantTerms.java        | 24 +++----
 .../terms/UnmappedSignificantTerms.java       | 25 ++-----
 .../InternalSignificantTermsTestCase.java     |  2 -
 .../terms/SignificantLongTermsTests.java      | 15 +---
 .../terms/SignificantStringTermsTests.java    | 15 +---
 ...AbstractSignificanceHeuristicTestCase.java | 39 +++--------
 14 files changed, 127 insertions(+), 262 deletions(-)

diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java
index bf11c1d69bcc6..671f60e2b9d5e 100644
--- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java
@@ -495,7 +495,7 @@ public void testScriptScore() throws ExecutionException, InterruptedException, I
                 for (SignificantTerms.Bucket bucket : sigTerms.getBuckets()) {
                     assertThat(
                         bucket.getSignificanceScore(),
-                        is((double) bucket.getSubsetDf() + bucket.getSubsetSize() + bucket.getSupersetDf() + bucket.getSupersetSize())
+                        is((double) bucket.getSubsetDf() + sigTerms.getSubsetSize() + bucket.getSupersetDf() + sigTerms.getSupersetSize())
                     );
                 }
             }
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 5a79155d1d4f5..4cf710232c7a0 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -989,7 +989,7 @@ SignificantStringTerms.Bucket[] buildBuckets(int size) {
 
         @Override
         SignificantStringTerms.Bucket buildEmptyTemporaryBucket() {
-            return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format, 0);
+            return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, null, format, 0);
         }
 
         private long subsetSize(long owningBucketOrd) {
@@ -998,22 +998,19 @@ private long subsetSize(long owningBucketOrd) {
         }
 
         @Override
-        BucketUpdater<SignificantStringTerms.Bucket> bucketUpdater(long owningBucketOrd, GlobalOrdLookupFunction lookupGlobalOrd)
-            throws IOException {
+        BucketUpdater<SignificantStringTerms.Bucket> bucketUpdater(long owningBucketOrd, GlobalOrdLookupFunction lookupGlobalOrd) {
             long subsetSize = subsetSize(owningBucketOrd);
             return (spare, globalOrd, bucketOrd, docCount) -> {
                 spare.bucketOrd = bucketOrd;
                 oversizedCopy(lookupGlobalOrd.apply(globalOrd), spare.termBytes);
                 spare.subsetDf = docCount;
-                spare.subsetSize = subsetSize;
                 spare.supersetDf = backgroundFrequencies.freq(spare.termBytes);
-                spare.supersetSize = supersetSize;
                 /*
                  * During shard-local down-selection we use subset/superset stats
                  * that are for this shard only. Back at the central reducer these
                  * properties will be updated with global stats.
                  */
-                spare.updateScore(significanceHeuristic);
+                spare.updateScore(significanceHeuristic, subsetSize, supersetSize);
             };
         }
 
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java
index 3f75a27306ab4..8c6d21cc74119 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java
@@ -59,7 +59,7 @@ protected InternalMappedSignificantTerms(StreamInput in, Bucket.Reader<B> bucket
         subsetSize = in.readVLong();
         supersetSize = in.readVLong();
         significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class);
-        buckets = in.readCollectionAsList(stream -> bucketReader.read(stream, subsetSize, supersetSize, format));
+        buckets = in.readCollectionAsList(stream -> bucketReader.read(stream, format));
     }
 
     @Override
@@ -91,12 +91,12 @@ public B getBucketByKey(String term) {
     }
 
     @Override
-    protected long getSubsetSize() {
+    public long getSubsetSize() {
         return subsetSize;
     }
 
     @Override
-    protected long getSupersetSize() {
+    public long getSupersetSize() {
         return supersetSize;
     }
 
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java
index 6c0eb465d1f80..78ae2481f5d99 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java
@@ -53,13 +53,11 @@ public abstract static class Bucket<B extends Bucket<B>> extends InternalMultiBu
          */
         @FunctionalInterface
         public interface Reader<B extends Bucket<B>> {
-            B read(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException;
+            B read(StreamInput in, DocValueFormat format) throws IOException;
         }
 
         long subsetDf;
-        long subsetSize;
         long supersetDf;
-        long supersetSize;
         /**
          * Ordinal of the bucket while it is being built. Not used after it is
          * returned from {@link Aggregator#buildAggregations(org.elasticsearch.common.util.LongArray)} and not
@@ -70,16 +68,7 @@ public interface Reader<B extends Bucket<B>> {
         protected InternalAggregations aggregations;
         final transient DocValueFormat format;
 
-        protected Bucket(
-            long subsetDf,
-            long subsetSize,
-            long supersetDf,
-            long supersetSize,
-            InternalAggregations aggregations,
-            DocValueFormat format
-        ) {
-            this.subsetSize = subsetSize;
-            this.supersetSize = supersetSize;
+        protected Bucket(long subsetDf, long supersetDf, InternalAggregations aggregations, DocValueFormat format) {
             this.subsetDf = subsetDf;
             this.supersetDf = supersetDf;
             this.aggregations = aggregations;
@@ -89,9 +78,7 @@ protected Bucket(
         /**
          * Read from a stream.
          */
-        protected Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) {
-            this.subsetSize = subsetSize;
-            this.supersetSize = supersetSize;
+        protected Bucket(StreamInput in, DocValueFormat format) {
             this.format = format;
         }
 
@@ -105,20 +92,10 @@ public long getSupersetDf() {
             return supersetDf;
         }
 
-        @Override
-        public long getSupersetSize() {
-            return supersetSize;
-        }
-
-        @Override
-        public long getSubsetSize() {
-            return subsetSize;
-        }
-
         // TODO we should refactor to remove this, since buckets should be immutable after they are generated.
         // This can lead to confusing bugs if the bucket is re-created (via createBucket() or similar) without
         // the score
-        void updateScore(SignificanceHeuristic significanceHeuristic) {
+        void updateScore(SignificanceHeuristic significanceHeuristic, long subsetSize, long supersetSize) {
             score = significanceHeuristic.getScore(subsetDf, subsetSize, supersetDf, supersetSize);
         }
 
@@ -262,13 +239,11 @@ public InternalAggregation get() {
                     buckets.forEach(entry -> {
                         final B b = createBucket(
                             entry.value.subsetDf[0],
-                            globalSubsetSize,
                             entry.value.supersetDf[0],
-                            globalSupersetSize,
                             entry.value.reducer.getAggregations(),
                             entry.value.reducer.getProto()
                         );
-                        b.updateScore(heuristic);
+                        b.updateScore(heuristic, globalSubsetSize, globalSupersetSize);
                         if (((b.score > 0) && (b.subsetDf >= minDocCount)) || reduceContext.isFinalReduce() == false) {
                             final B removed = ordered.insertWithOverflow(b);
                             if (removed == null) {
@@ -317,9 +292,7 @@ public InternalAggregation finalizeSampling(SamplingContext samplingContext) {
                 .map(
                     b -> createBucket(
                         samplingContext.scaleUp(b.subsetDf),
-                        subsetSize,
                         samplingContext.scaleUp(b.supersetDf),
-                        supersetSize,
                         InternalAggregations.finalizeSampling(b.aggregations, samplingContext),
                         b
                     )
@@ -328,14 +301,7 @@ public InternalAggregation finalizeSampling(SamplingContext samplingContext) {
         );
     }
 
-    abstract B createBucket(
-        long subsetDf,
-        long subsetSize,
-        long supersetDf,
-        long supersetSize,
-        InternalAggregations aggregations,
-        B prototype
-    );
+    abstract B createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, B prototype);
 
     protected abstract A create(long subsetSize, long supersetSize, List<B> buckets);
 
@@ -344,10 +310,6 @@ abstract B createBucket(
      */
     protected abstract B[] createBucketsArray(int size);
 
-    protected abstract long getSubsetSize();
-
-    protected abstract long getSupersetSize();
-
     protected abstract SignificanceHeuristic getSignificanceHeuristic();
 
     @Override
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java
index 6ae47d5975479..b96c495d37489 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java
@@ -47,7 +47,6 @@
 import java.util.function.BiConsumer;
 import java.util.function.Function;
 import java.util.function.LongConsumer;
-import java.util.function.Supplier;
 
 import static org.elasticsearch.search.aggregations.InternalOrder.isKeyOrder;
 
@@ -296,7 +295,7 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro
                     try (ObjectArrayPriorityQueue<B> ordered = buildPriorityQueue(size)) {
                         B spare = null;
                         BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningOrd);
-                        Supplier<B> emptyBucketBuilder = emptyBucketBuilder(owningOrd);
+                        BucketUpdater<B> bucketUpdater = bucketUpdater(owningOrd);
                         while (ordsEnum.next()) {
                             long docCount = bucketDocCount(ordsEnum.ord());
                             otherDocCounts.increment(ordIdx, docCount);
@@ -305,9 +304,9 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro
                             }
                             if (spare == null) {
                                 checkRealMemoryCBForInternalBucket();
-                                spare = emptyBucketBuilder.get();
+                                spare = buildEmptyBucket();
                             }
-                            updateBucket(spare, ordsEnum, docCount);
+                            bucketUpdater.updateBucket(spare, ordsEnum, docCount);
                             spare = ordered.insertWithOverflow(spare);
                         }
 
@@ -348,9 +347,9 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro
         abstract void collectZeroDocEntriesIfNeeded(long owningBucketOrd, boolean excludeDeletedDocs) throws IOException;
 
         /**
-         * Build an empty temporary bucket.
+         * Build an empty bucket.
          */
-        abstract Supplier<B> emptyBucketBuilder(long owningBucketOrd);
+        abstract B buildEmptyBucket();
 
         /**
          * Build a {@link PriorityQueue} to sort the buckets. After we've
@@ -362,7 +361,7 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro
          * Update fields in {@code spare} to reflect information collected for
          * this bucket ordinal.
          */
-        abstract void updateBucket(B spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount) throws IOException;
+        abstract BucketUpdater<B> bucketUpdater(long owningBucketOrd);
 
         /**
          * Build an array to hold the "top" buckets for each ordinal.
@@ -399,6 +398,10 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro
         abstract R buildEmptyResult();
     }
 
+    interface BucketUpdater<B extends InternalMultiBucketAggregation.InternalBucket> {
+        void updateBucket(B spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount) throws IOException;
+    }
+
     /**
      * Builds results for the standard {@code terms} aggregation.
      */
@@ -490,8 +493,8 @@ private void collectZeroDocEntries(BinaryDocValues values, Bits liveDocs, int ma
         }
 
         @Override
-        Supplier<StringTerms.Bucket> emptyBucketBuilder(long owningBucketOrd) {
-            return () -> new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format);
+        StringTerms.Bucket buildEmptyBucket() {
+            return new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format);
         }
 
         @Override
@@ -500,10 +503,12 @@ ObjectArrayPriorityQueue<StringTerms.Bucket> buildPriorityQueue(int size) {
         }
 
         @Override
-        void updateBucket(StringTerms.Bucket spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount) throws IOException {
-            ordsEnum.readValue(spare.termBytes);
-            spare.docCount = docCount;
-            spare.bucketOrd = ordsEnum.ord();
+        BucketUpdater<StringTerms.Bucket> bucketUpdater(long owningBucketOrd) {
+            return (spare, ordsEnum, docCount) -> {
+                ordsEnum.readValue(spare.termBytes);
+                spare.docCount = docCount;
+                spare.bucketOrd = ordsEnum.ord();
+            };
         }
 
         @Override
@@ -615,9 +620,8 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
         void collectZeroDocEntriesIfNeeded(long owningBucketOrd, boolean excludeDeletedDocs) throws IOException {}
 
         @Override
-        Supplier<SignificantStringTerms.Bucket> emptyBucketBuilder(long owningBucketOrd) {
-            long subsetSize = subsetSizes.get(owningBucketOrd);
-            return () -> new SignificantStringTerms.Bucket(new BytesRef(), 0, subsetSize, 0, 0, null, format, 0);
+        SignificantStringTerms.Bucket buildEmptyBucket() {
+            return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, null, format, 0);
         }
 
         @Override
@@ -626,20 +630,20 @@ ObjectArrayPriorityQueue<SignificantStringTerms.Bucket> buildPriorityQueue(int s
         }
 
         @Override
-        void updateBucket(SignificantStringTerms.Bucket spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount)
-            throws IOException {
-
-            ordsEnum.readValue(spare.termBytes);
-            spare.bucketOrd = ordsEnum.ord();
-            spare.subsetDf = docCount;
-            spare.supersetDf = backgroundFrequencies.freq(spare.termBytes);
-            spare.supersetSize = supersetSize;
-            /*
-             * During shard-local down-selection we use subset/superset stats
-             * that are for this shard only. Back at the central reducer these
-             * properties will be updated with global stats.
-             */
-            spare.updateScore(significanceHeuristic);
+        BucketUpdater<SignificantStringTerms.Bucket> bucketUpdater(long owningBucketOrd) {
+            long subsetSize = subsetSizes.get(owningBucketOrd);
+            return (spare, ordsEnum, docCount) -> {
+                ordsEnum.readValue(spare.termBytes);
+                spare.bucketOrd = ordsEnum.ord();
+                spare.subsetDf = docCount;
+                spare.supersetDf = backgroundFrequencies.freq(spare.termBytes);
+                /*
+                 * During shard-local down-selection we use subset/superset stats
+                 * that are for this shard only. Back at the central reducer these
+                 * properties will be updated with global stats.
+                 */
+                spare.updateScore(significanceHeuristic, subsetSize, supersetSize);
+            };
         }
 
         @Override
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java
index ce89b95b76a05..5d4c15d8a3b80 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java
@@ -43,7 +43,6 @@
 import java.util.Map;
 import java.util.function.BiConsumer;
 import java.util.function.Function;
-import java.util.function.Supplier;
 
 import static java.util.Collections.emptyList;
 import static org.elasticsearch.search.aggregations.InternalOrder.isKeyOrder;
@@ -177,7 +176,7 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro
                     try (ObjectArrayPriorityQueue<B> ordered = buildPriorityQueue(size)) {
                         B spare = null;
                         BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrd);
-                        Supplier<B> emptyBucketBuilder = emptyBucketBuilder(owningBucketOrd);
+                        BucketUpdater<B> bucketUpdater = bucketUpdater(owningBucketOrd);
                         while (ordsEnum.next()) {
                             long docCount = bucketDocCount(ordsEnum.ord());
                             otherDocCounts.increment(ordIdx, docCount);
@@ -186,9 +185,9 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro
                             }
                             if (spare == null) {
                                 checkRealMemoryCBForInternalBucket();
-                                spare = emptyBucketBuilder.get();
+                                spare = buildEmptyBucket();
                             }
-                            updateBucket(spare, ordsEnum, docCount);
+                            bucketUpdater.updateBucket(spare, ordsEnum, docCount);
                             spare = ordered.insertWithOverflow(spare);
                         }
 
@@ -240,17 +239,16 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro
         abstract B[] buildBuckets(int size);
 
         /**
-         * Build a {@linkplain Supplier} that can be used to build "empty"
-         * buckets. Those buckets will then be {@link #updateBucket updated}
+         * Build an empty bucket. Those buckets will then be {@link #bucketUpdater(long)}  updated}
          * for each collected bucket.
          */
-        abstract Supplier<B> emptyBucketBuilder(long owningBucketOrd);
+        abstract B buildEmptyBucket();
 
         /**
          * Update fields in {@code spare} to reflect information collected for
          * this bucket ordinal.
          */
-        abstract void updateBucket(B spare, BucketOrdsEnum ordsEnum, long docCount) throws IOException;
+        abstract BucketUpdater<B> bucketUpdater(long owningBucketOrd);
 
         /**
          * Build a {@link ObjectArrayPriorityQueue} to sort the buckets. After we've
@@ -282,6 +280,10 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro
         abstract R buildEmptyResult();
     }
 
+    interface BucketUpdater<B extends InternalMultiBucketAggregation.InternalBucket> {
+        void updateBucket(B spare, BucketOrdsEnum ordsEnum, long docCount) throws IOException;
+    }
+
     abstract class StandardTermsResultStrategy<R extends InternalMappedTerms<R, B>, B extends InternalTerms.Bucket<B>> extends
         ResultStrategy<R, B> {
         protected final boolean showTermDocCountError;
@@ -305,13 +307,6 @@ final void buildSubAggs(ObjectArray<B[]> topBucketsPerOrd) throws IOException {
             buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs);
         }
 
-        @Override
-        Supplier<B> emptyBucketBuilder(long owningBucketOrd) {
-            return this::buildEmptyBucket;
-        }
-
-        abstract B buildEmptyBucket();
-
         @Override
         final void collectZeroDocEntriesIfNeeded(long owningBucketOrd, boolean excludeDeletedDocs) throws IOException {
             if (bucketCountThresholds.getMinDocCount() != 0) {
@@ -375,10 +370,12 @@ LongTerms.Bucket buildEmptyBucket() {
         }
 
         @Override
-        void updateBucket(LongTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) {
-            spare.term = ordsEnum.value();
-            spare.docCount = docCount;
-            spare.bucketOrd = ordsEnum.ord();
+        BucketUpdater<LongTerms.Bucket> bucketUpdater(long owningBucketOrd) {
+            return (LongTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) -> {
+                spare.term = ordsEnum.value();
+                spare.docCount = docCount;
+                spare.bucketOrd = ordsEnum.ord();
+            };
         }
 
         @Override
@@ -457,10 +454,12 @@ DoubleTerms.Bucket buildEmptyBucket() {
         }
 
         @Override
-        void updateBucket(DoubleTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) {
-            spare.term = NumericUtils.sortableLongToDouble(ordsEnum.value());
-            spare.docCount = docCount;
-            spare.bucketOrd = ordsEnum.ord();
+        BucketUpdater<DoubleTerms.Bucket> bucketUpdater(long owningBucketOrd) {
+            return (DoubleTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) -> {
+                spare.term = NumericUtils.sortableLongToDouble(ordsEnum.value());
+                spare.docCount = docCount;
+                spare.bucketOrd = ordsEnum.ord();
+            };
         }
 
         @Override
@@ -565,20 +564,22 @@ SignificantLongTerms.Bucket[] buildBuckets(int size) {
         }
 
         @Override
-        Supplier<SignificantLongTerms.Bucket> emptyBucketBuilder(long owningBucketOrd) {
-            long subsetSize = subsetSizes.get(owningBucketOrd);
-            return () -> new SignificantLongTerms.Bucket(0, subsetSize, 0, supersetSize, 0, null, format, 0);
+        SignificantLongTerms.Bucket buildEmptyBucket() {
+            return new SignificantLongTerms.Bucket(0, 0, 0, null, format, 0);
         }
 
         @Override
-        void updateBucket(SignificantLongTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) throws IOException {
-            spare.term = ordsEnum.value();
-            spare.subsetDf = docCount;
-            spare.supersetDf = backgroundFrequencies.freq(spare.term);
-            spare.bucketOrd = ordsEnum.ord();
-            // During shard-local down-selection we use subset/superset stats that are for this shard only
-            // Back at the central reducer these properties will be updated with global stats
-            spare.updateScore(significanceHeuristic);
+        BucketUpdater<SignificantLongTerms.Bucket> bucketUpdater(long owningBucketOrd) {
+            long subsetSize = subsetSizes.get(owningBucketOrd);
+            return (spare, ordsEnum, docCount) -> {
+                spare.term = ordsEnum.value();
+                spare.subsetDf = docCount;
+                spare.supersetDf = backgroundFrequencies.freq(spare.term);
+                spare.bucketOrd = ordsEnum.ord();
+                // During shard-local down-selection we use subset/superset stats that are for this shard only
+                // Back at the central reducer these properties will be updated with global stats
+                spare.updateScore(significanceHeuristic, subsetSize, supersetSize);
+            };
         }
 
         @Override
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java
index 2aace2a714a26..17ea290b7aaaf 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java
@@ -30,23 +30,14 @@ public static class Bucket extends InternalSignificantTerms.Bucket<Bucket> {
 
         long term;
 
-        public Bucket(
-            long subsetDf,
-            long subsetSize,
-            long supersetDf,
-            long supersetSize,
-            long term,
-            InternalAggregations aggregations,
-            DocValueFormat format,
-            double score
-        ) {
-            super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format);
+        public Bucket(long subsetDf, long supersetDf, long term, InternalAggregations aggregations, DocValueFormat format, double score) {
+            super(subsetDf, supersetDf, aggregations, format);
             this.term = term;
             this.score = score;
         }
 
-        Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException {
-            super(in, subsetSize, supersetSize, format);
+        Bucket(StreamInput in, DocValueFormat format) throws IOException {
+            super(in, format);
             subsetDf = in.readVLong();
             supersetDf = in.readVLong();
             term = in.readLong();
@@ -136,16 +127,7 @@ public SignificantLongTerms create(List<SignificantLongTerms.Bucket> buckets) {
 
     @Override
     public Bucket createBucket(InternalAggregations aggregations, SignificantLongTerms.Bucket prototype) {
-        return new Bucket(
-            prototype.subsetDf,
-            prototype.subsetSize,
-            prototype.supersetDf,
-            prototype.supersetSize,
-            prototype.term,
-            aggregations,
-            prototype.format,
-            prototype.score
-        );
+        return new Bucket(prototype.subsetDf, prototype.supersetDf, prototype.term, aggregations, prototype.format, prototype.score);
     }
 
     @Override
@@ -169,14 +151,7 @@ protected Bucket[] createBucketsArray(int size) {
     }
 
     @Override
-    Bucket createBucket(
-        long subsetDf,
-        long subsetSize,
-        long supersetDf,
-        long supersetSize,
-        InternalAggregations aggregations,
-        SignificantLongTerms.Bucket prototype
-    ) {
-        return new Bucket(subsetDf, subsetSize, supersetDf, supersetSize, prototype.term, aggregations, format, prototype.score);
+    Bucket createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, SignificantLongTerms.Bucket prototype) {
+        return new Bucket(subsetDf, supersetDf, prototype.term, aggregations, format, prototype.score);
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java
index 791c09d3cbd99..b255f17d2843b 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java
@@ -34,14 +34,12 @@ public static class Bucket extends InternalSignificantTerms.Bucket<Bucket> {
         public Bucket(
             BytesRef term,
             long subsetDf,
-            long subsetSize,
             long supersetDf,
-            long supersetSize,
             InternalAggregations aggregations,
             DocValueFormat format,
             double score
         ) {
-            super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format);
+            super(subsetDf, supersetDf, aggregations, format);
             this.termBytes = term;
             this.score = score;
         }
@@ -49,8 +47,8 @@ public Bucket(
         /**
          * Read from a stream.
          */
-        public Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException {
-            super(in, subsetSize, supersetSize, format);
+        public Bucket(StreamInput in, DocValueFormat format) throws IOException {
+            super(in, format);
             termBytes = in.readBytesRef();
             subsetDf = in.readVLong();
             supersetDf = in.readVLong();
@@ -140,16 +138,7 @@ public SignificantStringTerms create(List<SignificantStringTerms.Bucket> buckets
 
     @Override
     public Bucket createBucket(InternalAggregations aggregations, SignificantStringTerms.Bucket prototype) {
-        return new Bucket(
-            prototype.termBytes,
-            prototype.subsetDf,
-            prototype.subsetSize,
-            prototype.supersetDf,
-            prototype.supersetSize,
-            aggregations,
-            prototype.format,
-            prototype.score
-        );
+        return new Bucket(prototype.termBytes, prototype.subsetDf, prototype.supersetDf, aggregations, prototype.format, prototype.score);
     }
 
     @Override
@@ -173,14 +162,7 @@ protected Bucket[] createBucketsArray(int size) {
     }
 
     @Override
-    Bucket createBucket(
-        long subsetDf,
-        long subsetSize,
-        long supersetDf,
-        long supersetSize,
-        InternalAggregations aggregations,
-        SignificantStringTerms.Bucket prototype
-    ) {
-        return new Bucket(prototype.termBytes, subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format, prototype.score);
+    Bucket createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, SignificantStringTerms.Bucket prototype) {
+        return new Bucket(prototype.termBytes, subsetDf, supersetDf, aggregations, format, prototype.score);
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java
index f02b5338eea74..e8f160193bc71 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java
@@ -17,6 +17,18 @@
  */
 public interface SignificantTerms extends MultiBucketsAggregation, Iterable<SignificantTerms.Bucket> {
 
+    /**
+     * @return The numbers of docs in the subset (also known as "foreground set").
+     * This number is equal to the document count of the containing aggregation.
+     */
+    long getSubsetSize();
+
+    /**
+     * @return The numbers of docs in the superset (ordinarily the background count
+     * of the containing aggregation).
+     */
+    long getSupersetSize();
+
     interface Bucket extends MultiBucketsAggregation.Bucket {
 
         /**
@@ -30,24 +42,12 @@ interface Bucket extends MultiBucketsAggregation.Bucket {
          */
         long getSubsetDf();
 
-        /**
-         * @return The numbers of docs in the subset (also known as "foreground set").
-         * This number is equal to the document count of the containing aggregation.
-         */
-        long getSubsetSize();
-
         /**
          * @return The number of docs in the superset containing a particular term (also
          * known as the "background count" of the bucket)
          */
         long getSupersetDf();
 
-        /**
-         * @return The numbers of docs in the superset (ordinarily the background count
-         * of the containing aggregation).
-         */
-        long getSupersetSize();
-
     }
 
     @Override
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java
index 8bd14a46bff96..6d1370f147f36 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java
@@ -40,16 +40,8 @@ public class UnmappedSignificantTerms extends InternalSignificantTerms<UnmappedS
      * {@linkplain UnmappedTerms} doesn't ever need to build it because it never returns any buckets.
      */
     protected abstract static class Bucket extends InternalSignificantTerms.Bucket<Bucket> {
-        private Bucket(
-            BytesRef term,
-            long subsetDf,
-            long subsetSize,
-            long supersetDf,
-            long supersetSize,
-            InternalAggregations aggregations,
-            DocValueFormat format
-        ) {
-            super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format);
+        private Bucket(BytesRef term, long subsetDf, long supersetDf, InternalAggregations aggregations, DocValueFormat format) {
+            super(subsetDf, supersetDf, aggregations, format);
         }
     }
 
@@ -95,14 +87,7 @@ protected UnmappedSignificantTerms create(long subsetSize, long supersetSize, Li
     }
 
     @Override
-    Bucket createBucket(
-        long subsetDf,
-        long subsetSize,
-        long supersetDf,
-        long supersetSize,
-        InternalAggregations aggregations,
-        Bucket prototype
-    ) {
+    Bucket createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, Bucket prototype) {
         throw new UnsupportedOperationException("not supported for UnmappedSignificantTerms");
     }
 
@@ -153,12 +138,12 @@ protected SignificanceHeuristic getSignificanceHeuristic() {
     }
 
     @Override
-    protected long getSubsetSize() {
+    public long getSubsetSize() {
         return 0;
     }
 
     @Override
-    protected long getSupersetSize() {
+    public long getSupersetSize() {
         return 0;
     }
 }
diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java
index 6d49d6855caca..7e5d19977fe9f 100644
--- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java
+++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java
@@ -59,8 +59,6 @@ protected void assertSampled(
             InternalSignificantTerms.Bucket<?> sampledBucket = sampledIt.next();
             assertEquals(sampledBucket.subsetDf, samplingContext.scaleUp(reducedBucket.subsetDf));
             assertEquals(sampledBucket.supersetDf, samplingContext.scaleUp(reducedBucket.supersetDf));
-            assertEquals(sampledBucket.subsetSize, samplingContext.scaleUp(reducedBucket.subsetSize));
-            assertEquals(sampledBucket.supersetSize, samplingContext.scaleUp(reducedBucket.supersetSize));
             assertThat(sampledBucket.score, closeTo(reducedBucket.score, 1e-14));
         }
     }
diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java
index a303199338783..92bfa2f6f89f4 100644
--- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java
+++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java
@@ -49,17 +49,8 @@ public void setUp() throws Exception {
         Set<Long> terms = new HashSet<>();
         for (int i = 0; i < numBuckets; ++i) {
             long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong);
-            SignificantLongTerms.Bucket bucket = new SignificantLongTerms.Bucket(
-                subsetDfs[i],
-                subsetSize,
-                supersetDfs[i],
-                supersetSize,
-                term,
-                aggs,
-                format,
-                0
-            );
-            bucket.updateScore(significanceHeuristic);
+            SignificantLongTerms.Bucket bucket = new SignificantLongTerms.Bucket(subsetDfs[i], supersetDfs[i], term, aggs, format, 0);
+            bucket.updateScore(significanceHeuristic, subsetSize, supersetSize);
             buckets.add(bucket);
         }
         return new SignificantLongTerms(name, requiredSize, 1L, metadata, format, subsetSize, supersetSize, significanceHeuristic, buckets);
@@ -90,8 +81,6 @@ public void setUp() throws Exception {
                             randomLong(),
                             randomNonNegativeLong(),
                             randomNonNegativeLong(),
-                            randomNonNegativeLong(),
-                            randomNonNegativeLong(),
                             InternalAggregations.EMPTY,
                             format,
                             0
diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java
index a91566c615eaf..7499831f371aa 100644
--- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java
+++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java
@@ -42,17 +42,8 @@ public class SignificantStringTermsTests extends InternalSignificantTermsTestCas
         Set<BytesRef> terms = new HashSet<>();
         for (int i = 0; i < numBuckets; ++i) {
             BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAlphaOfLength(10)));
-            SignificantStringTerms.Bucket bucket = new SignificantStringTerms.Bucket(
-                term,
-                subsetDfs[i],
-                subsetSize,
-                supersetDfs[i],
-                supersetSize,
-                aggs,
-                format,
-                0
-            );
-            bucket.updateScore(significanceHeuristic);
+            SignificantStringTerms.Bucket bucket = new SignificantStringTerms.Bucket(term, subsetDfs[i], supersetDfs[i], aggs, format, 0);
+            bucket.updateScore(significanceHeuristic, subsetSize, supersetSize);
             buckets.add(bucket);
         }
         return new SignificantStringTerms(
@@ -93,8 +84,6 @@ public class SignificantStringTermsTests extends InternalSignificantTermsTestCas
                             new BytesRef(randomAlphaOfLengthBetween(1, 10)),
                             randomNonNegativeLong(),
                             randomNonNegativeLong(),
-                            randomNonNegativeLong(),
-                            randomNonNegativeLong(),
                             InternalAggregations.EMPTY,
                             format,
                             0
diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java
index ae5083c245538..a3c03526c9b93 100644
--- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java
@@ -95,22 +95,20 @@ public void testStreamResponse() throws Exception {
         InternalMappedSignificantTerms<?, ?> read = (InternalMappedSignificantTerms<?, ?>) in.readNamedWriteable(InternalAggregation.class);
 
         assertEquals(sigTerms.getSignificanceHeuristic(), read.getSignificanceHeuristic());
+        assertThat(read.getSubsetSize(), equalTo(10L));
+        assertThat(read.getSupersetSize(), equalTo(20L));
         SignificantTerms.Bucket originalBucket = sigTerms.getBuckets().get(0);
         SignificantTerms.Bucket streamedBucket = read.getBuckets().get(0);
         assertThat(originalBucket.getKeyAsString(), equalTo(streamedBucket.getKeyAsString()));
         assertThat(originalBucket.getSupersetDf(), equalTo(streamedBucket.getSupersetDf()));
         assertThat(originalBucket.getSubsetDf(), equalTo(streamedBucket.getSubsetDf()));
-        assertThat(streamedBucket.getSubsetSize(), equalTo(10L));
-        assertThat(streamedBucket.getSupersetSize(), equalTo(20L));
     }
 
     InternalMappedSignificantTerms<?, ?> getRandomSignificantTerms(SignificanceHeuristic heuristic) {
         if (randomBoolean()) {
             SignificantLongTerms.Bucket bucket = new SignificantLongTerms.Bucket(
                 1,
-                2,
                 3,
-                4,
                 123,
                 InternalAggregations.EMPTY,
                 DocValueFormat.RAW,
@@ -121,9 +119,7 @@ public void testStreamResponse() throws Exception {
             SignificantStringTerms.Bucket bucket = new SignificantStringTerms.Bucket(
                 new BytesRef("someterm"),
                 1,
-                2,
                 3,
-                4,
                 InternalAggregations.EMPTY,
                 DocValueFormat.RAW,
                 randomDoubleBetween(0, 100, true)
@@ -136,15 +132,13 @@ public void testReduce() {
         List<InternalAggregation> aggs = createInternalAggregations();
         AggregationReduceContext context = InternalAggregationTestCase.emptyReduceContextBuilder().forFinalReduction();
         SignificantTerms reducedAgg = (SignificantTerms) InternalAggregationTestCase.reduce(aggs, context);
+        assertThat(reducedAgg.getSubsetSize(), equalTo(16L));
+        assertThat(reducedAgg.getSupersetSize(), equalTo(30L));
         assertThat(reducedAgg.getBuckets().size(), equalTo(2));
         assertThat(reducedAgg.getBuckets().get(0).getSubsetDf(), equalTo(8L));
-        assertThat(reducedAgg.getBuckets().get(0).getSubsetSize(), equalTo(16L));
         assertThat(reducedAgg.getBuckets().get(0).getSupersetDf(), equalTo(10L));
-        assertThat(reducedAgg.getBuckets().get(0).getSupersetSize(), equalTo(30L));
         assertThat(reducedAgg.getBuckets().get(1).getSubsetDf(), equalTo(8L));
-        assertThat(reducedAgg.getBuckets().get(1).getSubsetSize(), equalTo(16L));
         assertThat(reducedAgg.getBuckets().get(1).getSupersetDf(), equalTo(10L));
-        assertThat(reducedAgg.getBuckets().get(1).getSupersetSize(), equalTo(30L));
     }
 
     public void testBasicScoreProperties() {
@@ -234,9 +228,9 @@ private List<InternalAggregation> createInternalAggregations() {
             : new AbstractSignificanceHeuristicTestCase.LongTestAggFactory();
 
         List<InternalAggregation> aggs = new ArrayList<>();
-        aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 0)));
-        aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 1)));
-        aggs.add(factory.createAggregation(significanceHeuristic, 8, 10, 2, (f, i) -> f.createBucket(4, 4, 5, 10, i)));
+        aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 5, 0)));
+        aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 5, 1)));
+        aggs.add(factory.createAggregation(significanceHeuristic, 8, 10, 2, (f, i) -> f.createBucket(4, 5, i)));
         return aggs;
     }
 
@@ -254,7 +248,7 @@ final A createAggregation(
 
         abstract A createAggregation(SignificanceHeuristic significanceHeuristic, long subsetSize, long supersetSize, List<B> buckets);
 
-        abstract B createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label);
+        abstract B createBucket(long subsetDF, long supersetDF, long label);
     }
 
     private class StringTestAggFactory extends TestAggFactory<SignificantStringTerms, SignificantStringTerms.Bucket> {
@@ -279,13 +273,11 @@ SignificantStringTerms createAggregation(
         }
 
         @Override
-        SignificantStringTerms.Bucket createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) {
+        SignificantStringTerms.Bucket createBucket(long subsetDF, long supersetDF, long label) {
             return new SignificantStringTerms.Bucket(
                 new BytesRef(Long.toString(label).getBytes(StandardCharsets.UTF_8)),
                 subsetDF,
-                subsetSize,
                 supersetDF,
-                supersetSize,
                 InternalAggregations.EMPTY,
                 DocValueFormat.RAW,
                 0
@@ -315,17 +307,8 @@ SignificantLongTerms createAggregation(
         }
 
         @Override
-        SignificantLongTerms.Bucket createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) {
-            return new SignificantLongTerms.Bucket(
-                subsetDF,
-                subsetSize,
-                supersetDF,
-                supersetSize,
-                label,
-                InternalAggregations.EMPTY,
-                DocValueFormat.RAW,
-                0
-            );
+        SignificantLongTerms.Bucket createBucket(long subsetDF, long supersetDF, long label) {
+            return new SignificantLongTerms.Bucket(subsetDF, supersetDF, label, InternalAggregations.EMPTY, DocValueFormat.RAW, 0);
         }
     }
 

From fc266e5ea9e9c83c16f006f8d53ad481530273aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20R=C3=BChsen?= <tim.ruehsen@gmx.de>
Date: Tue, 3 Dec 2024 07:50:18 +0100
Subject: [PATCH 06/28] [Profiling] Switch to 19Hz sampling frequency (#117757)

* [Profiling] Switch to 19Hz sampling frequency

* Fix internalClusterTest
---
 .../xpack/profiling/action/GetStackTracesActionIT.java    | 8 ++++----
 .../xpack/profiling/action/CO2Calculator.java             | 2 +-
 .../xpack/profiling/action/CostCalculator.java            | 2 +-
 .../xpack/profiling/action/CO2CalculatorTests.java        | 4 ++--
 .../xpack/profiling/action/CostCalculatorTests.java       | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java b/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java
index 6463cda554e5b..4b3a4fb0108f7 100644
--- a/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java
+++ b/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java
@@ -46,8 +46,8 @@ public void testGetStackTracesUnfiltered() throws Exception {
         assertEquals(18, stackTrace.fileIds.length);
         assertEquals(18, stackTrace.frameIds.length);
         assertEquals(18, stackTrace.typeIds.length);
-        assertEquals(0.0000048475146d, stackTrace.annualCO2Tons, 0.0000000001d);
-        assertEquals(0.18834d, stackTrace.annualCostsUSD, 0.00001d);
+        assertEquals(0.0000051026469d, stackTrace.annualCO2Tons, 0.0000000001d);
+        assertEquals(0.19825d, stackTrace.annualCostsUSD, 0.00001d);
         // not determined by default
         assertNull(stackTrace.subGroups);
 
@@ -91,8 +91,8 @@ public void testGetStackTracesGroupedByServiceName() throws Exception {
         assertEquals(18, stackTrace.fileIds.length);
         assertEquals(18, stackTrace.frameIds.length);
         assertEquals(18, stackTrace.typeIds.length);
-        assertEquals(0.0000048475146d, stackTrace.annualCO2Tons, 0.0000000001d);
-        assertEquals(0.18834d, stackTrace.annualCostsUSD, 0.00001d);
+        assertEquals(0.0000051026469d, stackTrace.annualCO2Tons, 0.0000000001d);
+        assertEquals(0.19825d, stackTrace.annualCostsUSD, 0.00001d);
         assertEquals(Long.valueOf(2L), stackTrace.subGroups.getCount("basket"));
 
         assertNotNull(response.getStackFrames());
diff --git a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java
index fbd5f7a9b5328..0a05fc5930942 100644
--- a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java
+++ b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java
@@ -12,7 +12,7 @@
 import java.util.Map;
 
 final class CO2Calculator {
-    private static final double DEFAULT_SAMPLING_FREQUENCY = 20.0d;
+    private static final double DEFAULT_SAMPLING_FREQUENCY = 19.0d;
     private static final double DEFAULT_CO2_TONS_PER_KWH = 0.000379069d; // unit: metric tons / kWh
     private static final double DEFAULT_KILOWATTS_PER_CORE_X86 = 7.0d / 1000.0d; // unit: watt / core
     private static final double DEFAULT_KILOWATTS_PER_CORE_ARM64 = 2.8d / 1000.0d; // unit: watt / core
diff --git a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java
index b8ee54f5f29e8..05b51adb6a52f 100644
--- a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java
+++ b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java
@@ -10,7 +10,7 @@
 import java.util.Map;
 
 final class CostCalculator {
-    private static final double DEFAULT_SAMPLING_FREQUENCY = 20.0d;
+    private static final double DEFAULT_SAMPLING_FREQUENCY = 19.0d;
     private static final double SECONDS_PER_HOUR = 60 * 60;
     private static final double SECONDS_PER_YEAR = SECONDS_PER_HOUR * 24 * 365.0d; // unit: seconds
     public static final double DEFAULT_COST_USD_PER_CORE_HOUR = 0.0425d; // unit: USD / (core * hour)
diff --git a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java
index ff698465a56c5..9be98fbe4f46b 100644
--- a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java
+++ b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java
@@ -73,7 +73,7 @@ public void testCreateFromRegularSource() {
 
         double samplingDurationInSeconds = 1_800.0d; // 30 minutes
         long samples = 100_000L; // 100k samples
-        double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 20.0d);
+        double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 19.0d);
         CO2Calculator co2Calculator = new CO2Calculator(hostsTable, samplingDurationInSeconds, null, null, null, null);
 
         checkCO2Calculation(co2Calculator.getAnnualCO2Tons(HOST_ID_A, samples), annualCoreHours, 1.135d, 0.0002786d, 7.0d);
@@ -110,7 +110,7 @@ public void testCreateFromMalformedSource() {
 
         double samplingDurationInSeconds = 1_800.0d; // 30 minutes
         long samples = 100_000L; // 100k samples
-        double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 20.0d);
+        double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 19.0d);
         CO2Calculator co2Calculator = new CO2Calculator(hostsTable, samplingDurationInSeconds, null, null, null, null);
 
         checkCO2Calculation(co2Calculator.getAnnualCO2Tons(HOST_ID_A, samples), annualCoreHours, 1.135d, 0.0002786d, 7.0d);
diff --git a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java
index eaf6cf618eddb..1c719c97164dc 100644
--- a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java
+++ b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java
@@ -63,7 +63,7 @@ public void testCreateFromRegularSource() {
 
         double samplingDurationInSeconds = 1_800.0d; // 30 minutes
         long samples = 100_000L; // 100k samples
-        double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 20.0d);
+        double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 19.0d);
         CostCalculator costCalculator = new CostCalculator(hostsTable, samplingDurationInSeconds, null, null, null);
 
         // Checks whether the cost calculation is based on the lookup data.

From 564e13e2ba49ac78c8c142f9b29481e56c498c83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20R=C3=BChsen?= <tim.ruehsen@gmx.de>
Date: Tue, 3 Dec 2024 08:56:20 +0100
Subject: [PATCH 07/28] [Profiling] Add field
 profiling.agent.config.sampling_frequency to profiling-hosts (#117752)

---
 .../profiling/component-template/profiling-hosts.json          | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json b/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json
index e58a3cbd39f97..50f3ab6bf9a08 100644
--- a/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json
+++ b/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json
@@ -135,6 +135,9 @@
                 },
                 "config.present_cpu_cores": {
                   "type": "integer"
+                },
+                "config.sampling_frequency": {
+                  "type": "integer"
                 }
               }
             },

From cbb08babdbd7d8f42426df7984caa2d587b26ff7 Mon Sep 17 00:00:00 2001
From: Dimitris Rempapis <dimitris.rempapis@elastic.co>
Date: Tue, 3 Dec 2024 10:52:14 +0200
Subject: [PATCH 08/28] Remove RestApiVersion#V_7 references for 9.0.0
 (#117572)

Address and remove references to org elastic search.core.RestApiVersion#V_7 from the search-related code.
---
 docs/changelog/117572.yaml                    |   5 +
 .../action/search/MultiSearchRequest.java     |  27 +----
 .../termvectors/TermVectorsRequest.java       |   6 -
 .../index/query/CommonTermsQueryBuilder.java  |  72 ------------
 .../index/query/TypeQueryV7Builder.java       | 108 ------------------
 .../document/RestTermVectorsAction.java       |   1 -
 .../action/search/RestMultiSearchAction.java  |   3 -
 .../elasticsearch/search/SearchModule.java    |  26 -----
 .../MovAvgPipelineAggregationBuilder.java     |  85 --------------
 .../search/sort/FieldSortBuilder.java         |   8 --
 .../search/sort/ScriptSortBuilder.java        |   9 --
 .../search/sort/SortBuilder.java              |   8 --
 .../search/MultiSearchRequestTests.java       |  30 ++---
 13 files changed, 20 insertions(+), 368 deletions(-)
 create mode 100644 docs/changelog/117572.yaml
 delete mode 100644 server/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java
 delete mode 100644 server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java
 delete mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java

diff --git a/docs/changelog/117572.yaml b/docs/changelog/117572.yaml
new file mode 100644
index 0000000000000..a4a2ef6c06f5d
--- /dev/null
+++ b/docs/changelog/117572.yaml
@@ -0,0 +1,5 @@
+pr: 117572
+summary: Address and remove any references of RestApiVersion version 7
+area: Search
+type: enhancement
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java b/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java
index 8467ee6fd86f3..2022180475529 100644
--- a/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java
+++ b/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java
@@ -18,11 +18,7 @@
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.xcontent.XContentHelper;
-import org.elasticsearch.core.RestApiVersion;
-import org.elasticsearch.rest.action.search.RestMultiSearchAction;
-import org.elasticsearch.rest.action.search.RestSearchAction;
 import org.elasticsearch.tasks.CancellableTask;
 import org.elasticsearch.tasks.Task;
 import org.elasticsearch.tasks.TaskId;
@@ -51,10 +47,6 @@
  * A multi search API request.
  */
 public class MultiSearchRequest extends ActionRequest implements CompositeIndicesRequest {
-    private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestSearchAction.class);
-    public static final String FIRST_LINE_EMPTY_DEPRECATION_MESSAGE =
-        "support for empty first line before any action metadata in msearch API is deprecated "
-            + "and will be removed in the next major version";
     public static final int MAX_CONCURRENT_SEARCH_REQUESTS_DEFAULT = 0;
 
     private int maxConcurrentSearchRequests = 0;
@@ -213,12 +205,6 @@ public static void readMultiLineFormat(
             if (nextMarker == -1) {
                 break;
             }
-            // support first line with \n
-            if (parserConfig.restApiVersion() == RestApiVersion.V_7 && nextMarker == 0) {
-                deprecationLogger.compatibleCritical("msearch_first_line_empty", FIRST_LINE_EMPTY_DEPRECATION_MESSAGE);
-                from = nextMarker + 1;
-                continue;
-            }
 
             SearchRequest searchRequest = new SearchRequest();
             if (indices != null) {
@@ -281,14 +267,11 @@ public static void readMultiLineFormat(
                             allowNoIndices = value;
                         } else if ("ignore_throttled".equals(entry.getKey()) || "ignoreThrottled".equals(entry.getKey())) {
                             ignoreThrottled = value;
-                        } else if (parserConfig.restApiVersion() == RestApiVersion.V_7
-                            && ("type".equals(entry.getKey()) || "types".equals(entry.getKey()))) {
-                                deprecationLogger.compatibleCritical("msearch_with_types", RestMultiSearchAction.TYPES_DEPRECATION_MESSAGE);
-                            } else if (extraParamParser.apply(entry.getKey(), value, searchRequest)) {
-                                // Skip, the parser handled the key/value
-                            } else {
-                                throw new IllegalArgumentException("key [" + entry.getKey() + "] is not supported in the metadata section");
-                            }
+                        } else if (extraParamParser.apply(entry.getKey(), value, searchRequest)) {
+                            // Skip, the parser handled the key/value
+                        } else {
+                            throw new IllegalArgumentException("key [" + entry.getKey() + "] is not supported in the metadata section");
+                        }
                     }
                     defaultOptions = IndicesOptions.fromParameters(
                         expandWildcards,
diff --git a/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java b/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java
index a36158d11b5b3..7a7b2afab75d1 100644
--- a/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java
+++ b/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java
@@ -20,13 +20,11 @@
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.lucene.uid.Versions;
 import org.elasticsearch.common.util.set.Sets;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.core.RestApiVersion;
 import org.elasticsearch.index.VersionType;
-import org.elasticsearch.rest.action.document.RestTermVectorsAction;
 import org.elasticsearch.xcontent.ParseField;
 import org.elasticsearch.xcontent.XContentBuilder;
 import org.elasticsearch.xcontent.XContentParser;
@@ -52,7 +50,6 @@
 // It's not possible to suppress teh warning at #realtime(boolean) at a method-level.
 @SuppressWarnings("unchecked")
 public final class TermVectorsRequest extends SingleShardRequest<TermVectorsRequest> implements RealtimeRequest {
-    private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(TermVectorsRequest.class);
 
     private static final ParseField INDEX = new ParseField("_index");
     private static final ParseField ID = new ParseField("_id");
@@ -66,7 +63,6 @@ public final class TermVectorsRequest extends SingleShardRequest<TermVectorsRequ
     private static final ParseField DFS = new ParseField("dfs");
     private static final ParseField FILTER = new ParseField("filter");
     private static final ParseField DOC = new ParseField("doc");
-    private static final ParseField TYPE = new ParseField("_type");
 
     private String id;
 
@@ -583,8 +579,6 @@ public static void parseRequest(TermVectorsRequest termVectorsRequest, XContentP
                     termVectorsRequest.version = parser.longValue();
                 } else if (VERSION_TYPE.match(currentFieldName, parser.getDeprecationHandler())) {
                     termVectorsRequest.versionType = VersionType.fromString(parser.text());
-                } else if (restApiVersion == RestApiVersion.V_7 && TYPE.match(currentFieldName, parser.getDeprecationHandler())) {
-                    deprecationLogger.compatibleCritical("termvectors_with_types", RestTermVectorsAction.TYPES_DEPRECATION_MESSAGE);
                 } else {
                     throw new ElasticsearchParseException("failed to parse term vectors request. unknown field [{}]", currentFieldName);
                 }
diff --git a/server/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java
deleted file mode 100644
index 0b9663d9112fa..0000000000000
--- a/server/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the "Elastic License
- * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
- * Public License v 1"; you may not use this file except in compliance with, at
- * your election, the "Elastic License 2.0", the "GNU Affero General Public
- * License v3.0 only", or the "Server Side Public License, v 1".
- */
-
-package org.elasticsearch.index.query;
-
-import org.apache.lucene.search.Query;
-import org.elasticsearch.TransportVersion;
-import org.elasticsearch.TransportVersions;
-import org.elasticsearch.common.ParsingException;
-import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.logging.DeprecationLogger;
-import org.elasticsearch.core.RestApiVersion;
-import org.elasticsearch.core.UpdateForV9;
-import org.elasticsearch.xcontent.ParseField;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.elasticsearch.xcontent.XContentParser;
-
-import java.io.IOException;
-
-public class CommonTermsQueryBuilder extends AbstractQueryBuilder<CommonTermsQueryBuilder> {
-    private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonTermsQueryBuilder.class);
-    public static final String COMMON_TERMS_QUERY_DEPRECATION_MSG = "Common Terms Query usage is not supported. "
-        + "Use [match] query which can efficiently skip blocks of documents if the total number of hits is not tracked.";
-
-    @UpdateForV9(owner = UpdateForV9.Owner.SEARCH_RELEVANCE) // v7 REST API no longer exists: eliminate ref to RestApiVersion.V_7
-    public static ParseField NAME_V7 = new ParseField("common").withAllDeprecated(COMMON_TERMS_QUERY_DEPRECATION_MSG)
-        .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7));
-
-    @Override
-    protected void doWriteTo(StreamOutput out) throws IOException {
-        throw new UnsupportedOperationException("common_term_query is not meant to be serialized.");
-    }
-
-    @Override
-    protected void doXContent(XContentBuilder builder, Params params) throws IOException {}
-
-    @Override
-    protected Query doToQuery(SearchExecutionContext context) throws IOException {
-        return null;
-    }
-
-    @Override
-    protected boolean doEquals(CommonTermsQueryBuilder other) {
-        return false;
-    }
-
-    @Override
-    protected int doHashCode() {
-        return 0;
-    }
-
-    @Override
-    public String getWriteableName() {
-        return null;
-    }
-
-    public static CommonTermsQueryBuilder fromXContent(XContentParser parser) throws IOException {
-        deprecationLogger.compatibleCritical("common_term_query", COMMON_TERMS_QUERY_DEPRECATION_MSG);
-        throw new ParsingException(parser.getTokenLocation(), COMMON_TERMS_QUERY_DEPRECATION_MSG);
-    }
-
-    @Override
-    public TransportVersion getMinimalSupportedVersion() {
-        return TransportVersions.ZERO;
-    }
-}
diff --git a/server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java b/server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java
deleted file mode 100644
index c9aae0195acf7..0000000000000
--- a/server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the "Elastic License
- * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
- * Public License v 1"; you may not use this file except in compliance with, at
- * your election, the "Elastic License 2.0", the "GNU Affero General Public
- * License v3.0 only", or the "Server Side Public License, v 1".
- */
-
-package org.elasticsearch.index.query;
-
-import org.apache.lucene.search.MatchNoDocsQuery;
-import org.apache.lucene.search.Query;
-import org.elasticsearch.TransportVersion;
-import org.elasticsearch.TransportVersions;
-import org.elasticsearch.common.ParsingException;
-import org.elasticsearch.common.io.stream.StreamInput;
-import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.logging.DeprecationLogger;
-import org.elasticsearch.core.RestApiVersion;
-import org.elasticsearch.core.UpdateForV9;
-import org.elasticsearch.index.mapper.MapperService;
-import org.elasticsearch.xcontent.ObjectParser;
-import org.elasticsearch.xcontent.ParseField;
-import org.elasticsearch.xcontent.XContentBuilder;
-import org.elasticsearch.xcontent.XContentParser;
-
-import java.io.IOException;
-
-@UpdateForV9(owner = UpdateForV9.Owner.SEARCH_RELEVANCE) // v7 REST API no longer exists: eliminate ref to RestApiVersion.V_7
-public class TypeQueryV7Builder extends AbstractQueryBuilder<TypeQueryV7Builder> {
-    private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(TypeQueryV7Builder.class);
-    public static final String TYPES_DEPRECATION_MESSAGE = "[types removal] Type queries are deprecated, "
-        + "prefer to filter on a field instead.";
-
-    private static final String NAME = "type";
-    public static final ParseField NAME_V7 = new ParseField(NAME).forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7));
-    private static final ParseField VALUE_FIELD = new ParseField("value");
-    private static final ObjectParser<TypeQueryV7Builder, Void> PARSER = new ObjectParser<>(NAME, TypeQueryV7Builder::new);
-
-    static {
-        PARSER.declareString(
-            QueryBuilder::queryName,
-            AbstractQueryBuilder.NAME_FIELD.forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7))
-        );
-        PARSER.declareFloat(
-            QueryBuilder::boost,
-            AbstractQueryBuilder.BOOST_FIELD.forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7))
-        );
-        PARSER.declareString(TypeQueryV7Builder::setValue, VALUE_FIELD.forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)));
-    }
-
-    private String value;
-
-    public TypeQueryV7Builder() {}
-
-    /**
-     * Read from a stream.
-     */
-    public TypeQueryV7Builder(StreamInput in) throws IOException {
-        super(in);
-    }
-
-    @Override
-    protected void doWriteTo(StreamOutput out) throws IOException {}
-
-    @Override
-    protected void doXContent(XContentBuilder builder, Params params) throws IOException {
-        builder.startObject(NAME);
-        builder.field(VALUE_FIELD.getPreferredName(), MapperService.SINGLE_MAPPING_NAME);
-        printBoostAndQueryName(builder);
-        builder.endObject();
-    }
-
-    @Override
-    protected Query doToQuery(SearchExecutionContext context) throws IOException {
-        return new MatchNoDocsQuery();
-    }
-
-    @Override
-    protected boolean doEquals(TypeQueryV7Builder other) {
-        return true;
-    }
-
-    @Override
-    protected int doHashCode() {
-        return 0;
-    }
-
-    public static TypeQueryV7Builder fromXContent(XContentParser parser) throws IOException {
-        deprecationLogger.compatibleCritical("type_query", TYPES_DEPRECATION_MESSAGE);
-        throw new ParsingException(parser.getTokenLocation(), TYPES_DEPRECATION_MESSAGE);
-    }
-
-    @Override
-    public String getWriteableName() {
-        return NAME;
-    }
-
-    public void setValue(String value) {
-        this.value = value;
-    }
-
-    @Override
-    public TransportVersion getMinimalSupportedVersion() {
-        return TransportVersions.ZERO;
-    }
-}
diff --git a/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java b/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java
index 8e41e1cd09674..d2b09af8e1f3d 100644
--- a/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java
+++ b/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java
@@ -35,7 +35,6 @@
  */
 @ServerlessScope(Scope.PUBLIC)
 public class RestTermVectorsAction extends BaseRestHandler {
-    public static final String TYPES_DEPRECATION_MESSAGE = "[types removal] Specifying types in term vector requests is deprecated.";
 
     @Override
     public List<Route> routes() {
diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java b/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java
index 89775b4ca8e15..24fab92ced392 100644
--- a/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java
+++ b/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java
@@ -43,9 +43,6 @@
 
 @ServerlessScope(Scope.PUBLIC)
 public class RestMultiSearchAction extends BaseRestHandler {
-    public static final String TYPES_DEPRECATION_MESSAGE = "[types removal]"
-        + " Specifying types in multi search template requests is deprecated.";
-
     private static final Set<String> RESPONSE_PARAMS = Set.of(RestSearchAction.TYPED_KEYS_PARAM, RestSearchAction.TOTAL_HITS_AS_INT_PARAM);
 
     private final boolean allowExplicitIndex;
diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java
index 09e25350ad4fd..d282ba425b126 100644
--- a/server/src/main/java/org/elasticsearch/search/SearchModule.java
+++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java
@@ -20,12 +20,10 @@
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.core.Nullable;
-import org.elasticsearch.core.RestApiVersion;
 import org.elasticsearch.index.query.AbstractQueryBuilder;
 import org.elasticsearch.index.query.BoolQueryBuilder;
 import org.elasticsearch.index.query.BoostingQueryBuilder;
 import org.elasticsearch.index.query.CombinedFieldsQueryBuilder;
-import org.elasticsearch.index.query.CommonTermsQueryBuilder;
 import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
 import org.elasticsearch.index.query.DisMaxQueryBuilder;
 import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
@@ -68,7 +66,6 @@
 import org.elasticsearch.index.query.TermQueryBuilder;
 import org.elasticsearch.index.query.TermsQueryBuilder;
 import org.elasticsearch.index.query.TermsSetQueryBuilder;
-import org.elasticsearch.index.query.TypeQueryV7Builder;
 import org.elasticsearch.index.query.WildcardQueryBuilder;
 import org.elasticsearch.index.query.WrapperQueryBuilder;
 import org.elasticsearch.index.query.functionscore.ExponentialDecayFunctionBuilder;
@@ -204,7 +201,6 @@
 import org.elasticsearch.search.aggregations.pipeline.InternalStatsBucket;
 import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder;
 import org.elasticsearch.search.aggregations.pipeline.MinBucketPipelineAggregationBuilder;
-import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregationBuilder;
 import org.elasticsearch.search.aggregations.pipeline.PercentilesBucketPipelineAggregationBuilder;
 import org.elasticsearch.search.aggregations.pipeline.SerialDiffPipelineAggregationBuilder;
 import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregationBuilder;
@@ -686,15 +682,6 @@ private ValuesSourceRegistry registerAggregations(List<SearchPlugin> plugins) {
                 .setAggregatorRegistrar(CompositeAggregationBuilder::registerAggregators),
             builder
         );
-        if (RestApiVersion.minimumSupported() == RestApiVersion.V_7) {
-            registerQuery(
-                new QuerySpec<>(
-                    CommonTermsQueryBuilder.NAME_V7,
-                    (streamInput) -> new CommonTermsQueryBuilder(),
-                    CommonTermsQueryBuilder::fromXContent
-                )
-            );
-        }
 
         registerFromPlugin(plugins, SearchPlugin::getAggregations, (agg) -> this.registerAggregation(agg, builder));
 
@@ -815,15 +802,6 @@ private void registerPipelineAggregations(List<SearchPlugin> plugins) {
                 SerialDiffPipelineAggregationBuilder::parse
             )
         );
-        if (RestApiVersion.minimumSupported() == RestApiVersion.V_7) {
-            registerPipelineAggregation(
-                new PipelineAggregationSpec(
-                    MovAvgPipelineAggregationBuilder.NAME_V7,
-                    MovAvgPipelineAggregationBuilder::new,
-                    MovAvgPipelineAggregationBuilder.PARSER
-                )
-            );
-        }
 
         registerFromPlugin(plugins, SearchPlugin::getPipelineAggregations, this::registerPipelineAggregation);
     }
@@ -1203,10 +1181,6 @@ private void registerQueryParsers(List<SearchPlugin> plugins) {
         }));
 
         registerFromPlugin(plugins, SearchPlugin::getQueries, this::registerQuery);
-
-        if (RestApiVersion.minimumSupported() == RestApiVersion.V_7) {
-            registerQuery(new QuerySpec<>(TypeQueryV7Builder.NAME_V7, TypeQueryV7Builder::new, TypeQueryV7Builder::fromXContent));
-        }
     }
 
     private void registerIntervalsSourceProviders() {
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java
deleted file mode 100644
index 068487317dfe5..0000000000000
--- a/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the "Elastic License
- * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
- * Public License v 1"; you may not use this file except in compliance with, at
- * your election, the "Elastic License 2.0", the "GNU Affero General Public
- * License v3.0 only", or the "Server Side Public License, v 1".
- */
-
-package org.elasticsearch.search.aggregations.pipeline;
-
-import org.elasticsearch.TransportVersion;
-import org.elasticsearch.TransportVersions;
-import org.elasticsearch.common.ParsingException;
-import org.elasticsearch.common.io.stream.StreamInput;
-import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.logging.DeprecationLogger;
-import org.elasticsearch.core.RestApiVersion;
-import org.elasticsearch.core.UpdateForV9;
-import org.elasticsearch.index.query.CommonTermsQueryBuilder;
-import org.elasticsearch.xcontent.ContextParser;
-import org.elasticsearch.xcontent.ParseField;
-import org.elasticsearch.xcontent.XContentBuilder;
-
-import java.io.IOException;
-import java.util.Map;
-
-/**
- * The actual moving_avg aggregation was removed as a breaking change in 8.0. This class exists to provide a friendlier error message
- * if somebody attempts to use the moving_avg aggregation via the compatible-with=7 mechanism.
- *
- * We can remove this class entirely when v7 rest api compatibility is dropped.
- *
- * @deprecated Only for 7.x rest compat
- */
-@UpdateForV9(owner = UpdateForV9.Owner.SEARCH_ANALYTICS) // remove this since it's only for 7.x compat and 7.x compat will be removed in 9.0
-@Deprecated
-public class MovAvgPipelineAggregationBuilder extends AbstractPipelineAggregationBuilder<MovAvgPipelineAggregationBuilder> {
-    private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonTermsQueryBuilder.class);
-    public static final String MOVING_AVG_AGG_DEPRECATION_MSG = "Moving Average aggregation usage is not supported. "
-        + "Use the [moving_fn] aggregation instead.";
-
-    public static final ParseField NAME_V7 = new ParseField("moving_avg").withAllDeprecated(MOVING_AVG_AGG_DEPRECATION_MSG)
-        .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7));
-
-    public static final ContextParser<String, MovAvgPipelineAggregationBuilder> PARSER = (parser, name) -> {
-        deprecationLogger.compatibleCritical("moving_avg_aggregation", MOVING_AVG_AGG_DEPRECATION_MSG);
-        throw new ParsingException(parser.getTokenLocation(), MOVING_AVG_AGG_DEPRECATION_MSG);
-    };
-
-    public MovAvgPipelineAggregationBuilder(StreamInput in) throws IOException {
-        super(in, NAME_V7.getPreferredName());
-        throw new UnsupportedOperationException("moving_avg is not meant to be used.");
-    }
-
-    @Override
-    protected void doWriteTo(StreamOutput out) throws IOException {
-        throw new UnsupportedOperationException("moving_avg is not meant to be used.");
-    }
-
-    @Override
-    protected PipelineAggregator createInternal(Map<String, Object> metadata) {
-        throw new UnsupportedOperationException("moving_avg is not meant to be used.");
-    }
-
-    @Override
-    protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException {
-        throw new UnsupportedOperationException("moving_avg is not meant to be used.");
-    }
-
-    @Override
-    protected void validate(ValidationContext context) {
-        throw new UnsupportedOperationException("moving_avg is not meant to be used.");
-    }
-
-    @Override
-    public final String getWriteableName() {
-        return null;
-    }
-
-    @Override
-    public TransportVersion getMinimalSupportedVersion() {
-        return TransportVersions.ZERO;
-    }
-}
diff --git a/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java
index cd597f3328c0f..5691435c83ecb 100644
--- a/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java
+++ b/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java
@@ -18,7 +18,6 @@
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.TransportVersion;
 import org.elasticsearch.TransportVersions;
-import org.elasticsearch.common.ParsingException;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.time.DateMathParser;
@@ -729,13 +728,6 @@ public static FieldSortBuilder fromXContent(XContentParser parser, String fieldN
         PARSER.declareObject(FieldSortBuilder::setNestedSort, (p, c) -> NestedSortBuilder.fromXContent(p), NESTED_FIELD);
         PARSER.declareString(FieldSortBuilder::setNumericType, NUMERIC_TYPE);
         PARSER.declareString(FieldSortBuilder::setFormat, FORMAT);
-        PARSER.declareField((b, v) -> {}, (p, c) -> {
-            throw new ParsingException(p.getTokenLocation(), "[nested_path] has been removed in favour of the [nested] parameter", c);
-        }, NESTED_PATH_FIELD, ValueType.STRING);
-
-        PARSER.declareObject((b, v) -> {}, (p, c) -> {
-            throw new ParsingException(p.getTokenLocation(), "[nested_filter] has been removed in favour of the [nested] parameter", c);
-        }, NESTED_FILTER_FIELD);
     }
 
     @Override
diff --git a/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java
index 48773eec8371b..445c55dc546bc 100644
--- a/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java
+++ b/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java
@@ -17,7 +17,6 @@
 import org.apache.lucene.util.BytesRefBuilder;
 import org.elasticsearch.TransportVersion;
 import org.elasticsearch.TransportVersions;
-import org.elasticsearch.common.ParsingException;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
@@ -220,14 +219,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params builderParams)
         PARSER.declareString((b, v) -> b.order(SortOrder.fromString(v)), ORDER_FIELD);
         PARSER.declareString((b, v) -> b.sortMode(SortMode.fromString(v)), SORTMODE_FIELD);
         PARSER.declareObject(ScriptSortBuilder::setNestedSort, (p, c) -> NestedSortBuilder.fromXContent(p), NESTED_FIELD);
-
-        PARSER.declareObject((b, v) -> {}, (p, c) -> {
-            throw new ParsingException(p.getTokenLocation(), "[nested_path] has been removed in favour of the [nested] parameter", c);
-        }, NESTED_PATH_FIELD);
-
-        PARSER.declareObject((b, v) -> {}, (p, c) -> {
-            throw new ParsingException(p.getTokenLocation(), "[nested_filter] has been removed in favour of the [nested] parameter", c);
-        }, NESTED_FILTER_FIELD);
     }
 
     /**
diff --git a/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java
index 5832b93b9462f..4a8cdbcdffa55 100644
--- a/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java
+++ b/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java
@@ -18,8 +18,6 @@
 import org.elasticsearch.common.io.stream.VersionedNamedWriteable;
 import org.elasticsearch.common.lucene.search.Queries;
 import org.elasticsearch.common.util.BigArrays;
-import org.elasticsearch.core.RestApiVersion;
-import org.elasticsearch.core.UpdateForV9;
 import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
 import org.elasticsearch.index.mapper.NestedObjectMapper;
 import org.elasticsearch.index.query.QueryBuilder;
@@ -52,12 +50,6 @@ public abstract class SortBuilder<T extends SortBuilder<T>>
     // parse fields common to more than one SortBuilder
     public static final ParseField ORDER_FIELD = new ParseField("order");
 
-    @UpdateForV9(owner = UpdateForV9.Owner.SEARCH_FOUNDATIONS) // v7 REST API no longer exists: eliminate ref to RestApiVersion.V_7
-    public static final ParseField NESTED_FILTER_FIELD = new ParseField("nested_filter").withAllDeprecated()
-        .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7));
-    public static final ParseField NESTED_PATH_FIELD = new ParseField("nested_path").withAllDeprecated()
-        .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7));
-
     private static final Map<String, Parser<?>> PARSERS = Map.of(
         ScriptSortBuilder.NAME,
         ScriptSortBuilder::fromXContent,
diff --git a/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java b/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java
index f2bc561792991..9f81b999c9d98 100644
--- a/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java
+++ b/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java
@@ -16,7 +16,6 @@
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.core.CheckedRunnable;
-import org.elasticsearch.core.RestApiVersion;
 import org.elasticsearch.index.query.MatchAllQueryBuilder;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.rest.RestRequest;
@@ -268,12 +267,12 @@ public void testMsearchTerminatedByNewline() throws Exception {
         assertEquals(3, msearchRequest.requests().size());
     }
 
-    private MultiSearchRequest parseMultiSearchRequestFromString(String request, RestApiVersion restApiVersion) throws IOException {
-        return parseMultiSearchRequest(createRestRequest(request.getBytes(StandardCharsets.UTF_8), restApiVersion));
+    private MultiSearchRequest parseMultiSearchRequestFromString(String request) throws IOException {
+        return parseMultiSearchRequest(createRestRequest(request.getBytes(StandardCharsets.UTF_8)));
     }
 
     private MultiSearchRequest parseMultiSearchRequest(String sample) throws IOException {
-        return parseMultiSearchRequest(createRestRequest(sample, null));
+        return parseMultiSearchRequest(createRestRequest(sample));
     }
 
     private MultiSearchRequest parseMultiSearchRequest(RestRequest restRequest) throws IOException {
@@ -288,22 +287,13 @@ private MultiSearchRequest parseMultiSearchRequest(RestRequest restRequest) thro
         return request;
     }
 
-    private RestRequest createRestRequest(String sample, RestApiVersion restApiVersion) throws IOException {
+    private RestRequest createRestRequest(String sample) throws IOException {
         byte[] data = StreamsUtils.copyToBytesFromClasspath(sample);
-        return createRestRequest(data, restApiVersion);
+        return createRestRequest(data);
     }
 
-    private FakeRestRequest createRestRequest(byte[] data, RestApiVersion restApiVersion) {
-        if (restApiVersion != null) {
-            final List<String> contentTypeHeader = Collections.singletonList(
-                compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7)
-            );
-            return new FakeRestRequest.Builder(xContentRegistry()).withHeaders(
-                Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader)
-            ).withContent(new BytesArray(data), null).build();
-        } else {
-            return new FakeRestRequest.Builder(xContentRegistry()).withContent(new BytesArray(data), XContentType.JSON).build();
-        }
+    private FakeRestRequest createRestRequest(byte[] data) {
+        return new FakeRestRequest.Builder(xContentRegistry()).withContent(new BytesArray(data), XContentType.JSON).build();
     }
 
     @Override
@@ -517,7 +507,7 @@ public void testFailOnExtraCharacters() throws IOException {
             parseMultiSearchRequestFromString("""
                 {"index": "test"}{{{{{extra chars that shouldn't be here
                 { "query": {"match_all": {}}}
-                """, null);
+                """);
             fail("should have caught first line; extra open brackets");
         } catch (XContentParseException e) {
             assertEquals("[1:18] Unexpected token after end of object", e.getMessage());
@@ -526,7 +516,7 @@ public void testFailOnExtraCharacters() throws IOException {
             parseMultiSearchRequestFromString("""
                 {"index": "test"}
                 { "query": {"match_all": {}}}{{{{even more chars
-                """, null);
+                """);
             fail("should have caught second line");
         } catch (XContentParseException e) {
             assertEquals("[1:30] Unexpected token after end of object", e.getMessage());
@@ -535,7 +525,7 @@ public void testFailOnExtraCharacters() throws IOException {
             parseMultiSearchRequestFromString("""
                 {}
                 { "query": {"match_all": {}}}}}}different error message
-                """, null);
+                """);
             fail("should have caught second line; extra closing brackets");
         } catch (XContentParseException e) {
             assertThat(

From a514aad3c2da305b0b63d8545cab75bb2c2d3032 Mon Sep 17 00:00:00 2001
From: Dimitris Rempapis <dimitris.rempapis@elastic.co>
Date: Tue, 3 Dec 2024 10:58:20 +0200
Subject: [PATCH 09/28] Fix/meta fields bad request (#117229)

 400 rather a 5xx error is returned when _source / _seq_no / _feature / _nested_path / _field_names is requested, via fields
---
 docs/changelog/117229.yaml                    |  6 ++
 .../extras/RankFeatureMetaFieldMapper.java    |  2 +-
 rest-api-spec/build.gradle                    |  1 +
 .../test/search/520_fetch_fields.yml          | 80 +++++++++++++++++--
 .../index/mapper/FieldNamesFieldMapper.java   |  2 +-
 .../index/mapper/MapperFeatures.java          |  5 +-
 .../index/mapper/NestedPathFieldMapper.java   |  2 +-
 .../index/mapper/SeqNoFieldMapper.java        |  2 +-
 .../index/mapper/SourceFieldMapper.java       |  2 +-
 .../fetch/subphase/FieldFetcherTests.java     |  2 +-
 10 files changed, 92 insertions(+), 12 deletions(-)
 create mode 100644 docs/changelog/117229.yaml

diff --git a/docs/changelog/117229.yaml b/docs/changelog/117229.yaml
new file mode 100644
index 0000000000000..f1b859c03e4fa
--- /dev/null
+++ b/docs/changelog/117229.yaml
@@ -0,0 +1,6 @@
+pr: 117229
+summary: "In this pr, a 400 error is returned when _source / _seq_no / _feature /\
+  \ _nested_path / _field_names is requested, rather a 5xx"
+area: Search
+type: bug
+issues: []
diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java
index 15398b1f178ee..ed1cc57b84863 100644
--- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java
+++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java
@@ -48,7 +48,7 @@ public String typeName() {
 
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
-            throw new UnsupportedOperationException("Cannot fetch values for internal field [" + typeName() + "].");
+            throw new IllegalArgumentException("Cannot fetch values for internal field [" + typeName() + "].");
         }
 
         @Override
diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle
index 650d17e41de7f..e2af894eb0939 100644
--- a/rest-api-spec/build.gradle
+++ b/rest-api-spec/build.gradle
@@ -66,4 +66,5 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
   task.skipTest("logsdb/20_source_mapping/stored _source mode is supported", "no longer serialize source_mode")
   task.skipTest("logsdb/20_source_mapping/include/exclude is supported with stored _source", "no longer serialize source_mode")
   task.skipTest("logsdb/20_source_mapping/synthetic _source is default", "no longer serialize source_mode")
+  task.skipTest("search/520_fetch_fields/fetch _seq_no via fields", "error code is changed from 5xx to 400 in 9.0")
 })
diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml
index 2b309f502f0c2..9a43199755d75 100644
--- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml
+++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml
@@ -128,18 +128,88 @@ fetch _seq_no via stored_fields:
 
 ---
 fetch _seq_no via fields:
+  - requires:
+      cluster_features: ["meta_fetch_fields_error_code_changed"]
+      reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields
 
   - do:
-      catch: "request"
+      catch: bad_request
       search:
         index: test
         body:
           fields: [ _seq_no ]
 
-  # This should be `unauthorized` (401) or `forbidden` (403) or at least `bad request` (400)
-  # while instead it is mapped to an `internal_server_error (500)`
-  - match: { status: 500 }
-  - match: { error.root_cause.0.type: unsupported_operation_exception }
+  - match: { status: 400 }
+  - match: { error.root_cause.0.type: illegal_argument_exception }
+  - match: { error.root_cause.0.reason: "error fetching [_seq_no]: Cannot fetch values for internal field [_seq_no]." }
+
+---
+fetch _source via fields:
+  - requires:
+      cluster_features: ["meta_fetch_fields_error_code_changed"]
+      reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields
+
+  - do:
+      catch: bad_request
+      search:
+        index: test
+        body:
+          fields: [ _source ]
+
+  - match: { status: 400 }
+  - match: { error.root_cause.0.type: illegal_argument_exception }
+  - match: { error.root_cause.0.reason: "error fetching [_source]: Cannot fetch values for internal field [_source]." }
+
+---
+fetch _feature via fields:
+  - requires:
+      cluster_features: ["meta_fetch_fields_error_code_changed"]
+      reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields
+
+  - do:
+      catch: bad_request
+      search:
+        index: test
+        body:
+          fields: [ _feature ]
+
+  - match: { status: 400 }
+  - match: { error.root_cause.0.type: illegal_argument_exception }
+  - match: { error.root_cause.0.reason: "error fetching [_feature]: Cannot fetch values for internal field [_feature]." }
+
+---
+fetch _nested_path via fields:
+  - requires:
+      cluster_features: ["meta_fetch_fields_error_code_changed"]
+      reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields
+
+  - do:
+      catch: bad_request
+      search:
+        index: test
+        body:
+          fields: [ _nested_path ]
+
+  - match: { status: 400 }
+  - match: { error.root_cause.0.type: illegal_argument_exception }
+  - match: { error.root_cause.0.reason: "error fetching [_nested_path]: Cannot fetch values for internal field [_nested_path]." }
+
+---
+fetch _field_names via fields:
+  - requires:
+      cluster_features: ["meta_fetch_fields_error_code_changed"]
+      reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields
+
+  - do:
+      catch: bad_request
+      search:
+        index: test
+        body:
+          fields: [ _field_names ]
+
+  - match: { status: 400 }
+  - match: { error.root_cause.0.type: illegal_argument_exception }
+  - match: { error.root_cause.0.reason: "error fetching [_field_names]: Cannot fetch values for internal field [_field_names]." }
 
 ---
 fetch fields with none stored_fields:
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java
index 565b1ff28a39f..425e3c664c262 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java
@@ -135,7 +135,7 @@ public boolean isEnabled() {
 
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
-            throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "].");
+            throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "].");
         }
 
         @Override
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java
index 333c37381c587..bf6c729f95653 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java
@@ -61,6 +61,8 @@ public Set<NodeFeature> getFeatures() {
         "mapper.constant_keyword.synthetic_source_write_fix"
     );
 
+    public static final NodeFeature META_FETCH_FIELDS_ERROR_CODE_CHANGED = new NodeFeature("meta_fetch_fields_error_code_changed");
+
     @Override
     public Set<NodeFeature> getTestFeatures() {
         return Set.of(
@@ -71,7 +73,8 @@ public Set<NodeFeature> getTestFeatures() {
             IgnoredSourceFieldMapper.IGNORED_SOURCE_AS_TOP_LEVEL_METADATA_ARRAY_FIELD,
             IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS,
             MapperService.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT,
-            CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX
+            CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX,
+            META_FETCH_FIELDS_ERROR_CODE_CHANGED
         );
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java
index b22c3a12fcda3..1cd752dc34403 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java
@@ -67,7 +67,7 @@ public Query existsQuery(SearchExecutionContext context) {
 
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
-            throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "].");
+            throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "].");
         }
 
         @Override
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java
index e126102b0f3c2..66ee42dfc56f9 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java
@@ -168,7 +168,7 @@ public boolean mayExistInIndex(SearchExecutionContext context) {
 
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
-            throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "].");
+            throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "].");
         }
 
         @Override
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java
index b97e04fcddb5d..1cea8154aad43 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java
@@ -325,7 +325,7 @@ public String typeName() {
 
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
-            throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "].");
+            throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "].");
         }
 
         @Override
diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java
index f01f760ed71c3..c5f1efe561c22 100644
--- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java
+++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java
@@ -271,7 +271,7 @@ public void testMetadataFields() throws IOException {
             FieldNamesFieldMapper.NAME,
             NestedPathFieldMapper.name(IndexVersion.current())
         )) {
-            expectThrows(UnsupportedOperationException.class, () -> fetchFields(mapperService, source, fieldname));
+            expectThrows(IllegalArgumentException.class, () -> fetchFields(mapperService, source, fieldname));
         }
     }
 

From b1412f65b90893c3d29756c921c32d39f3172a65 Mon Sep 17 00:00:00 2001
From: Luca Cavanna <javanna@apache.org>
Date: Tue, 3 Dec 2024 10:57:05 +0100
Subject: [PATCH 10/28] Clean up search timeout handling code (#116678)

TimeExceededException was made public to be able to catch it outside of the search.internal package. That is rather dangerous, because we really need it to be created only from `ContextIndexSearcher#throwTimeExceededException`.
This commit makes its constructor private to prevent it from being created outside of ContextIndexSearcher. It also adds javadocs around that. I took the chance to also share the timeout handling code that is now copy pasted in different places.
---
 .../search/fetch/FetchPhase.java              |  7 +---
 .../search/fetch/FetchPhaseDocsIterator.java  | 41 +++++++++----------
 .../search/internal/ContextIndexSearcher.java | 18 +++++---
 .../search/query/QueryPhase.java              |  9 ++--
 .../search/query/SearchTimeoutException.java  | 13 ++++++
 .../search/rescore/RescorePhase.java          |  9 ++--
 .../fetch/FetchPhaseDocsIteratorTests.java    |  8 +++-
 7 files changed, 63 insertions(+), 42 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java
index 546586a9ff3c3..2fbe3c1fc1532 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java
@@ -195,13 +195,10 @@ protected SearchHit nextDoc(int doc) throws IOException {
             context.shardTarget(),
             context.searcher().getIndexReader(),
             docIdsToLoad,
-            context.request().allowPartialSearchResults()
+            context.request().allowPartialSearchResults(),
+            context.queryResult()
         );
 
-        if (docsIterator.isTimedOut()) {
-            context.queryResult().searchTimedOut(true);
-        }
-
         if (context.isCancelled()) {
             for (SearchHit hit : hits) {
                 // release all hits that would otherwise become owned and eventually released by SearchHits below
diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java
index df4e7649ffd3b..4a242f70e8d02 100644
--- a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java
@@ -16,6 +16,7 @@
 import org.elasticsearch.search.SearchHits;
 import org.elasticsearch.search.SearchShardTarget;
 import org.elasticsearch.search.internal.ContextIndexSearcher;
+import org.elasticsearch.search.query.QuerySearchResult;
 import org.elasticsearch.search.query.SearchTimeoutException;
 
 import java.io.IOException;
@@ -30,12 +31,6 @@
  */
 abstract class FetchPhaseDocsIterator {
 
-    private boolean timedOut = false;
-
-    public boolean isTimedOut() {
-        return timedOut;
-    }
-
     /**
      * Called when a new leaf reader is reached
      * @param ctx           the leaf reader for this set of doc ids
@@ -53,7 +48,13 @@ public boolean isTimedOut() {
     /**
      * Iterate over a set of docsIds within a particular shard and index reader
      */
-    public final SearchHit[] iterate(SearchShardTarget shardTarget, IndexReader indexReader, int[] docIds, boolean allowPartialResults) {
+    public final SearchHit[] iterate(
+        SearchShardTarget shardTarget,
+        IndexReader indexReader,
+        int[] docIds,
+        boolean allowPartialResults,
+        QuerySearchResult querySearchResult
+    ) {
         SearchHit[] searchHits = new SearchHit[docIds.length];
         DocIdToIndex[] docs = new DocIdToIndex[docIds.length];
         for (int index = 0; index < docIds.length; index++) {
@@ -69,12 +70,10 @@ public final SearchHit[] iterate(SearchShardTarget shardTarget, IndexReader inde
             int[] docsInLeaf = docIdsInLeaf(0, endReaderIdx, docs, ctx.docBase);
             try {
                 setNextReader(ctx, docsInLeaf);
-            } catch (ContextIndexSearcher.TimeExceededException timeExceededException) {
-                if (allowPartialResults) {
-                    timedOut = true;
-                    return SearchHits.EMPTY;
-                }
-                throw new SearchTimeoutException(shardTarget, "Time exceeded");
+            } catch (ContextIndexSearcher.TimeExceededException e) {
+                SearchTimeoutException.handleTimeout(allowPartialResults, shardTarget, querySearchResult);
+                assert allowPartialResults;
+                return SearchHits.EMPTY;
             }
             for (int i = 0; i < docs.length; i++) {
                 try {
@@ -88,15 +87,15 @@ public final SearchHit[] iterate(SearchShardTarget shardTarget, IndexReader inde
                     currentDoc = docs[i].docId;
                     assert searchHits[docs[i].index] == null;
                     searchHits[docs[i].index] = nextDoc(docs[i].docId);
-                } catch (ContextIndexSearcher.TimeExceededException timeExceededException) {
-                    if (allowPartialResults) {
-                        timedOut = true;
-                        SearchHit[] partialSearchHits = new SearchHit[i];
-                        System.arraycopy(searchHits, 0, partialSearchHits, 0, i);
-                        return partialSearchHits;
+                } catch (ContextIndexSearcher.TimeExceededException e) {
+                    if (allowPartialResults == false) {
+                        purgeSearchHits(searchHits);
                     }
-                    purgeSearchHits(searchHits);
-                    throw new SearchTimeoutException(shardTarget, "Time exceeded");
+                    SearchTimeoutException.handleTimeout(allowPartialResults, shardTarget, querySearchResult);
+                    assert allowPartialResults;
+                    SearchHit[] partialSearchHits = new SearchHit[i];
+                    System.arraycopy(searchHits, 0, partialSearchHits, 0, i);
+                    return partialSearchHits;
                 }
             }
         } catch (SearchTimeoutException e) {
diff --git a/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java
index 78d90377cdc3f..9f990fbd97cdf 100644
--- a/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java
+++ b/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java
@@ -169,8 +169,8 @@ public void setProfiler(QueryProfiler profiler) {
      * Add a {@link Runnable} that will be run on a regular basis while accessing documents in the
      * DirectoryReader but also while collecting them and check for query cancellation or timeout.
      */
-    public Runnable addQueryCancellation(Runnable action) {
-        return this.cancellable.add(action);
+    public void addQueryCancellation(Runnable action) {
+        this.cancellable.add(action);
     }
 
     /**
@@ -425,8 +425,16 @@ public void throwTimeExceededException() {
         }
     }
 
-    public static class TimeExceededException extends RuntimeException {
+    /**
+     * Exception thrown whenever a search timeout occurs. May be thrown by {@link ContextIndexSearcher} or {@link ExitableDirectoryReader}.
+     */
+    public static final class TimeExceededException extends RuntimeException {
         // This exception should never be re-thrown, but we fill in the stacktrace to be able to trace where it does not get properly caught
+
+        /**
+         * Created via {@link #throwTimeExceededException()}
+         */
+        private TimeExceededException() {}
     }
 
     @Override
@@ -570,14 +578,12 @@ public DirectoryReader getDirectoryReader() {
     }
 
     private static class MutableQueryTimeout implements ExitableDirectoryReader.QueryCancellation {
-
         private final List<Runnable> runnables = new ArrayList<>();
 
-        private Runnable add(Runnable action) {
+        private void add(Runnable action) {
             Objects.requireNonNull(action, "cancellation runnable should not be null");
             assert runnables.contains(action) == false : "Cancellation runnable already added";
             runnables.add(action);
-            return action;
         }
 
         private void remove(Runnable action) {
diff --git a/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java
index af65c30b49dcf..3036a295d459a 100644
--- a/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java
+++ b/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java
@@ -217,10 +217,11 @@ static void addCollectorsAndSearch(SearchContext searchContext) throws QueryPhas
             queryResult.topDocs(queryPhaseResult.topDocsAndMaxScore(), queryPhaseResult.sortValueFormats());
             if (searcher.timeExceeded()) {
                 assert timeoutRunnable != null : "TimeExceededException thrown even though timeout wasn't set";
-                if (searchContext.request().allowPartialSearchResults() == false) {
-                    throw new SearchTimeoutException(searchContext.shardTarget(), "Time exceeded");
-                }
-                queryResult.searchTimedOut(true);
+                SearchTimeoutException.handleTimeout(
+                    searchContext.request().allowPartialSearchResults(),
+                    searchContext.shardTarget(),
+                    searchContext.queryResult()
+                );
             }
             if (searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER) {
                 queryResult.terminatedEarly(queryPhaseResult.terminatedAfter());
diff --git a/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java b/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java
index 0ed64811fee28..e006f176ff91a 100644
--- a/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java
+++ b/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java
@@ -33,4 +33,17 @@ public SearchTimeoutException(StreamInput in) throws IOException {
     public RestStatus status() {
         return RestStatus.GATEWAY_TIMEOUT;
     }
+
+    /**
+     * Propagate a timeout according to whether partial search results are allowed or not.
+     * In case partial results are allowed, a flag will be set to the provided {@link QuerySearchResult} to indicate that there was a
+     * timeout, but the execution will continue and partial results will be returned to the user.
+     * When partial results are disallowed, a {@link SearchTimeoutException} will be thrown and returned to the user.
+     */
+    public static void handleTimeout(boolean allowPartialSearchResults, SearchShardTarget target, QuerySearchResult querySearchResult) {
+        if (allowPartialSearchResults == false) {
+            throw new SearchTimeoutException(target, "Time exceeded");
+        }
+        querySearchResult.searchTimedOut(true);
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java b/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java
index 1227db5d8e1db..7e3646e7689cc 100644
--- a/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java
+++ b/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java
@@ -73,10 +73,11 @@ public static void execute(SearchContext context) {
         } catch (IOException e) {
             throw new ElasticsearchException("Rescore Phase Failed", e);
         } catch (ContextIndexSearcher.TimeExceededException e) {
-            if (context.request().allowPartialSearchResults() == false) {
-                throw new SearchTimeoutException(context.shardTarget(), "Time exceeded");
-            }
-            context.queryResult().searchTimedOut(true);
+            SearchTimeoutException.handleTimeout(
+                context.request().allowPartialSearchResults(),
+                context.shardTarget(),
+                context.queryResult()
+            );
         }
     }
 
diff --git a/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java b/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java
index d5e930321db95..c8d1b6721c64b 100644
--- a/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java
+++ b/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java
@@ -17,6 +17,7 @@
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.query.QuerySearchResult;
 import org.elasticsearch.test.ESTestCase;
 
 import java.io.IOException;
@@ -77,7 +78,7 @@ protected SearchHit nextDoc(int doc) {
             }
         };
 
-        SearchHit[] hits = it.iterate(null, reader, docs, randomBoolean());
+        SearchHit[] hits = it.iterate(null, reader, docs, randomBoolean(), new QuerySearchResult());
 
         assertThat(hits.length, equalTo(docs.length));
         for (int i = 0; i < hits.length; i++) {
@@ -125,7 +126,10 @@ protected SearchHit nextDoc(int doc) {
             }
         };
 
-        Exception e = expectThrows(FetchPhaseExecutionException.class, () -> it.iterate(null, reader, docs, randomBoolean()));
+        Exception e = expectThrows(
+            FetchPhaseExecutionException.class,
+            () -> it.iterate(null, reader, docs, randomBoolean(), new QuerySearchResult())
+        );
         assertThat(e.getMessage(), containsString("Error running fetch phase for doc [" + badDoc + "]"));
         assertThat(e.getCause(), instanceOf(IllegalArgumentException.class));
 

From 76a382a78d728d90cc84fa3fbcfe61ba1c1e8db2 Mon Sep 17 00:00:00 2001
From: Alexander Spies <alexander.spies@elastic.co>
Date: Tue, 3 Dec 2024 12:24:55 +0100
Subject: [PATCH 11/28] ESQL: Enable CATEGORIZE tests on non-snapshot builds
 (#117881)

---
 .../org/elasticsearch/xpack/esql/action/EsqlCapabilities.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
index a93590d7a5bc2..646c4f8240c3e 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -407,7 +407,7 @@ public enum Cap {
         /**
          * Supported the text categorization function "CATEGORIZE".
          */
-        CATEGORIZE_V4(Build.current().isSnapshot()),
+        CATEGORIZE_V4,
 
         /**
          * QSTR function

From cf9687f56de49bf5f07152b70b388d3f971aa9a5 Mon Sep 17 00:00:00 2001
From: Alexander Spies <alexander.spies@elastic.co>
Date: Tue, 3 Dec 2024 13:08:02 +0100
Subject: [PATCH 12/28] ESQL: Fix layout when aggregating with aliases
 (#117837)

Forward-port of #117832

Only really relevant for bwc with 8.11/8.12.; port for consistency with 8.x
---
 .../planner/AbstractPhysicalOperationProviders.java    | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java
index 69e2d1c45aa3c..35aba7665ec87 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java
@@ -120,10 +120,14 @@ public final PhysicalOperation groupingPhysicalOperation(
                  *  - before stats (keep x = a | stats by x) which requires the partial input to use a's channel
                  *  - after  stats (stats by a | keep x = a) which causes the output layout to refer to the follow-up alias
                  */
+                // TODO: This is likely required only for pre-8.14 node compatibility; confirm and remove if possible.
+                // Since https://github.com/elastic/elasticsearch/pull/104958, it shouldn't be possible to have aliases in the aggregates
+                // which the groupings refer to. Except for `BY CATEGORIZE(field)`, which remains as alias in the grouping, all aliases
+                // should've become EVALs before or after the STATS.
                 for (NamedExpression agg : aggregates) {
                     if (agg instanceof Alias a) {
                         if (a.child() instanceof Attribute attr) {
-                            if (groupAttribute.id().equals(attr.id())) {
+                            if (sourceGroupAttribute.id().equals(attr.id())) {
                                 groupAttributeLayout.nameIds().add(a.id());
                                 // TODO: investigate whether a break could be used since it shouldn't be possible to have multiple
                                 // attributes pointing to the same attribute
@@ -133,8 +137,8 @@ public final PhysicalOperation groupingPhysicalOperation(
                             // is in the output form
                             // if the group points to an alias declared in the aggregate, use the alias child as source
                             else if (aggregatorMode.isOutputPartial()) {
-                                if (groupAttribute.semanticEquals(a.toAttribute())) {
-                                    groupAttribute = attr;
+                                if (sourceGroupAttribute.semanticEquals(a.toAttribute())) {
+                                    sourceGroupAttribute = attr;
                                     break;
                                 }
                             }

From 2a9a3a44dc8bcf71659df5893ef23df535967eea Mon Sep 17 00:00:00 2001
From: Yang Wang <yang.wang@elastic.co>
Date: Wed, 4 Dec 2024 00:13:04 +1100
Subject: [PATCH 13/28] Add a not-master state for desired balance (#116904)

The new state prevents a long running desired balance computation to set
result after the node stands down as master.
---
 docs/changelog/116904.yaml                    |  5 ++
 .../allocation/allocator/DesiredBalance.java  |  9 ++-
 .../DesiredBalanceShardsAllocator.java        | 71 ++++++++++++++-----
 ...nsportDeleteDesiredBalanceActionTests.java |  2 +-
 .../DesiredBalanceComputerTests.java          | 51 +++++++++----
 .../DesiredBalanceShardsAllocatorTests.java   | 13 ++--
 6 files changed, 112 insertions(+), 39 deletions(-)
 create mode 100644 docs/changelog/116904.yaml

diff --git a/docs/changelog/116904.yaml b/docs/changelog/116904.yaml
new file mode 100644
index 0000000000000..46fa445f36154
--- /dev/null
+++ b/docs/changelog/116904.yaml
@@ -0,0 +1,5 @@
+pr: 116904
+summary: Add a not-master state for desired balance
+area: Allocation
+type: enhancement
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java
index 6ad44fdf3a9c0..406ca72868a40 100644
--- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java
+++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java
@@ -40,7 +40,14 @@ public DesiredBalance(long lastConvergedIndex, Map<ShardId, ShardAssignment> ass
         this(lastConvergedIndex, assignments, Map.of(), ComputationFinishReason.CONVERGED);
     }
 
-    public static final DesiredBalance INITIAL = new DesiredBalance(-1, Map.of());
+    /**
+     * The placeholder value for {@link DesiredBalance} when the node stands down as master.
+     */
+    public static final DesiredBalance NOT_MASTER = new DesiredBalance(-2, Map.of());
+    /**
+     * The starting value for {@link DesiredBalance} when the node becomes the master.
+     */
+    public static final DesiredBalance BECOME_MASTER_INITIAL = new DesiredBalance(-1, Map.of());
 
     public ShardAssignment getAssignment(ShardId shardId) {
         return assignments.get(shardId);
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java
index 72261df658ca1..8408386b8da58 100644
--- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java
+++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java
@@ -29,6 +29,7 @@
 import org.elasticsearch.cluster.service.MasterService;
 import org.elasticsearch.cluster.service.MasterServiceTaskQueue;
 import org.elasticsearch.common.Priority;
+import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.metrics.CounterMetric;
 import org.elasticsearch.common.metrics.MeanMetric;
 import org.elasticsearch.common.settings.ClusterSettings;
@@ -43,6 +44,7 @@
 import java.util.Set;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
 
 /**
  * A {@link ShardsAllocator} which asynchronously refreshes the desired balance held by the {@link DesiredBalanceComputer} and then takes
@@ -62,7 +64,7 @@ public class DesiredBalanceShardsAllocator implements ShardsAllocator {
     private final AtomicLong indexGenerator = new AtomicLong(-1);
     private final ConcurrentLinkedQueue<List<MoveAllocationCommand>> pendingDesiredBalanceMoves = new ConcurrentLinkedQueue<>();
     private final MasterServiceTaskQueue<ReconcileDesiredBalanceTask> masterServiceTaskQueue;
-    private volatile DesiredBalance currentDesiredBalance = DesiredBalance.INITIAL;
+    private final AtomicReference<DesiredBalance> currentDesiredBalanceRef = new AtomicReference<>(DesiredBalance.NOT_MASTER);
     private volatile boolean resetCurrentDesiredBalance = false;
     private final Set<String> processedNodeShutdowns = new HashSet<>();
     private final DesiredBalanceMetrics desiredBalanceMetrics;
@@ -129,6 +131,12 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) {
                 long index = desiredBalanceInput.index();
                 logger.debug("Starting desired balance computation for [{}]", index);
 
+                final DesiredBalance initialDesiredBalance = getInitialDesiredBalance();
+                if (initialDesiredBalance == DesiredBalance.NOT_MASTER) {
+                    logger.debug("Abort desired balance computation because node is no longer master");
+                    return;
+                }
+
                 recordTime(
                     cumulativeComputationTime,
                     // We set currentDesiredBalance back to INITIAL when the node stands down as master in onNoLongerMaster.
@@ -137,7 +145,7 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) {
                     // lead to unexpected behaviours for tests. See also https://github.com/elastic/elasticsearch/pull/116904
                     () -> setCurrentDesiredBalance(
                         desiredBalanceComputer.compute(
-                            getInitialDesiredBalance(),
+                            initialDesiredBalance,
                             desiredBalanceInput,
                             pendingDesiredBalanceMoves,
                             this::isFresh
@@ -146,7 +154,17 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) {
                 );
                 computationsExecuted.inc();
 
-                if (currentDesiredBalance.finishReason() == DesiredBalance.ComputationFinishReason.STOP_EARLY) {
+                final DesiredBalance currentDesiredBalance = currentDesiredBalanceRef.get();
+                if (currentDesiredBalance == DesiredBalance.NOT_MASTER || currentDesiredBalance == DesiredBalance.BECOME_MASTER_INITIAL) {
+                    logger.debug(
+                        () -> Strings.format(
+                            "Desired balance computation for [%s] is discarded since master has concurrently changed. "
+                                + "Current desiredBalance=[%s]",
+                            index,
+                            currentDesiredBalance
+                        )
+                    );
+                } else if (currentDesiredBalance.finishReason() == DesiredBalance.ComputationFinishReason.STOP_EARLY) {
                     logger.debug(
                         "Desired balance computation for [{}] terminated early with partial result, scheduling reconciliation",
                         index
@@ -164,10 +182,13 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) {
             }
 
             private DesiredBalance getInitialDesiredBalance() {
+                final DesiredBalance currentDesiredBalance = currentDesiredBalanceRef.get();
                 if (resetCurrentDesiredBalance) {
                     logger.info("Resetting current desired balance");
                     resetCurrentDesiredBalance = false;
-                    return new DesiredBalance(currentDesiredBalance.lastConvergedIndex(), Map.of());
+                    return currentDesiredBalance == DesiredBalance.NOT_MASTER
+                        ? DesiredBalance.NOT_MASTER
+                        : new DesiredBalance(currentDesiredBalance.lastConvergedIndex(), Map.of());
                 } else {
                     return currentDesiredBalance;
                 }
@@ -215,6 +236,10 @@ public void allocate(RoutingAllocation allocation, ActionListener<Void> listener
         var index = indexGenerator.incrementAndGet();
         logger.debug("Executing allocate for [{}]", index);
         queue.add(index, listener);
+        // This can only run on master, so unset not-master if exists
+        if (currentDesiredBalanceRef.compareAndSet(DesiredBalance.NOT_MASTER, DesiredBalance.BECOME_MASTER_INITIAL)) {
+            logger.debug("initialized desired balance for becoming master");
+        }
         desiredBalanceComputation.onNewInput(DesiredBalanceInput.create(index, allocation));
 
         if (allocation.routingTable().indicesRouting().isEmpty()) {
@@ -224,7 +249,7 @@ public void allocate(RoutingAllocation allocation, ActionListener<Void> listener
         // Starts reconciliation towards desired balance that might have not been updated with a recent calculation yet.
         // This is fine as balance should have incremental rather than radical changes.
         // This should speed up achieving the desired balance in cases current state is still different from it (due to THROTTLING).
-        reconcile(currentDesiredBalance, allocation);
+        reconcile(currentDesiredBalanceRef.get(), allocation);
     }
 
     private void processNodeShutdowns(ClusterState clusterState) {
@@ -267,16 +292,26 @@ private static List<MoveAllocationCommand> getMoveCommands(AllocationCommands co
     }
 
     private void setCurrentDesiredBalance(DesiredBalance newDesiredBalance) {
-        if (logger.isTraceEnabled()) {
-            var diff = DesiredBalance.hasChanges(currentDesiredBalance, newDesiredBalance)
-                ? "Diff: " + DesiredBalance.humanReadableDiff(currentDesiredBalance, newDesiredBalance)
-                : "No changes";
-            logger.trace("Desired balance updated: {}. {}", newDesiredBalance, diff);
-        } else {
-            logger.debug("Desired balance updated for [{}]", newDesiredBalance.lastConvergedIndex());
+        while (true) {
+            final var oldDesiredBalance = currentDesiredBalanceRef.get();
+            if (oldDesiredBalance == DesiredBalance.NOT_MASTER) {
+                logger.debug("discard desired balance for [{}] since node is no longer master", newDesiredBalance.lastConvergedIndex());
+                return;
+            }
+
+            if (currentDesiredBalanceRef.compareAndSet(oldDesiredBalance, newDesiredBalance)) {
+                if (logger.isTraceEnabled()) {
+                    var diff = DesiredBalance.hasChanges(oldDesiredBalance, newDesiredBalance)
+                        ? "Diff: " + DesiredBalance.humanReadableDiff(oldDesiredBalance, newDesiredBalance)
+                        : "No changes";
+                    logger.trace("Desired balance updated: {}. {}", newDesiredBalance, diff);
+                } else {
+                    logger.debug("Desired balance updated for [{}]", newDesiredBalance.lastConvergedIndex());
+                }
+                computedShardMovements.inc(DesiredBalance.shardMovements(oldDesiredBalance, newDesiredBalance));
+                break;
+            }
         }
-        computedShardMovements.inc(DesiredBalance.shardMovements(currentDesiredBalance, newDesiredBalance));
-        currentDesiredBalance = newDesiredBalance;
     }
 
     protected void submitReconcileTask(DesiredBalance desiredBalance) {
@@ -316,7 +351,7 @@ public void execute(RoutingAllocation allocation) {
     }
 
     public DesiredBalance getDesiredBalance() {
-        return currentDesiredBalance;
+        return currentDesiredBalanceRef.get();
     }
 
     public void resetDesiredBalance() {
@@ -325,7 +360,7 @@ public void resetDesiredBalance() {
 
     public DesiredBalanceStats getStats() {
         return new DesiredBalanceStats(
-            Math.max(currentDesiredBalance.lastConvergedIndex(), 0L),
+            Math.max(currentDesiredBalanceRef.get().lastConvergedIndex(), 0L),
             desiredBalanceComputation.isActive(),
             computationsSubmitted.count(),
             computationsExecuted.count(),
@@ -342,7 +377,7 @@ public DesiredBalanceStats getStats() {
 
     private void onNoLongerMaster() {
         if (indexGenerator.getAndSet(-1) != -1) {
-            currentDesiredBalance = DesiredBalance.INITIAL;
+            currentDesiredBalanceRef.set(DesiredBalance.NOT_MASTER);
             queue.completeAllAsNotMaster();
             pendingDesiredBalanceMoves.clear();
             desiredBalanceReconciler.clear();
@@ -412,7 +447,7 @@ private static void discardSupersededTasks(
 
     // only for tests - in production, this happens after reconciliation
     protected final void completeToLastConvergedIndex() {
-        queue.complete(currentDesiredBalance.lastConvergedIndex());
+        queue.complete(currentDesiredBalanceRef.get().lastConvergedIndex());
     }
 
     private void recordTime(CounterMetric metric, Runnable action) {
diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java
index 3dafc8f000f3f..385ac600666db 100644
--- a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java
+++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java
@@ -136,7 +136,7 @@ public DesiredBalance compute(
         safeAwait((ActionListener<Void> listener) -> allocationService.reroute(clusterState, "inital-allocate", listener));
 
         var balanceBeforeReset = allocator.getDesiredBalance();
-        assertThat(balanceBeforeReset.lastConvergedIndex(), greaterThan(DesiredBalance.INITIAL.lastConvergedIndex()));
+        assertThat(balanceBeforeReset.lastConvergedIndex(), greaterThan(DesiredBalance.BECOME_MASTER_INITIAL.lastConvergedIndex()));
         assertThat(balanceBeforeReset.assignments(), not(anEmptyMap()));
 
         var listener = new PlainActionFuture<ActionResponse.Empty>();
diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java
index 7b77947792bd4..679d04224aefe 100644
--- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java
+++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java
@@ -96,7 +96,12 @@ public void testComputeBalance() {
         var clusterState = createInitialClusterState(3);
         var index = clusterState.metadata().index(TEST_INDEX).getIndex();
 
-        var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true);
+        var desiredBalance = desiredBalanceComputer.compute(
+            DesiredBalance.BECOME_MASTER_INITIAL,
+            createInput(clusterState),
+            queue(),
+            input -> true
+        );
 
         assertDesiredAssignments(
             desiredBalance,
@@ -115,7 +120,7 @@ public void testStopsComputingWhenStale() {
         var index = clusterState.metadata().index(TEST_INDEX).getIndex();
 
         // if the isFresh flag is false then we only do one iteration, allocating the primaries but not the replicas
-        var desiredBalance0 = DesiredBalance.INITIAL;
+        var desiredBalance0 = DesiredBalance.BECOME_MASTER_INITIAL;
         var desiredBalance1 = desiredBalanceComputer.compute(desiredBalance0, createInput(clusterState), queue(), input -> false);
         assertDesiredAssignments(
             desiredBalance1,
@@ -147,7 +152,7 @@ public void testIgnoresOutOfScopePrimaries() {
         var primaryShard = mutateAllocationStatus(clusterState.routingTable().index(TEST_INDEX).shard(0).primaryShard());
 
         var desiredBalance = desiredBalanceComputer.compute(
-            DesiredBalance.INITIAL,
+            DesiredBalance.BECOME_MASTER_INITIAL,
             createInput(clusterState, primaryShard),
             queue(),
             input -> true
@@ -184,7 +189,7 @@ public void testIgnoresOutOfScopeReplicas() {
         var replicaShard = mutateAllocationStatus(originalReplicaShard);
 
         var desiredBalance = desiredBalanceComputer.compute(
-            DesiredBalance.INITIAL,
+            DesiredBalance.BECOME_MASTER_INITIAL,
             createInput(clusterState, replicaShard),
             queue(),
             input -> true
@@ -241,7 +246,7 @@ public void testAssignShardsToTheirPreviousLocationIfAvailable() {
             : new ShardRouting[] { clusterState.routingTable().index(TEST_INDEX).shard(0).primaryShard() };
 
         var desiredBalance = desiredBalanceComputer.compute(
-            DesiredBalance.INITIAL,
+            DesiredBalance.BECOME_MASTER_INITIAL,
             createInput(clusterState, ignored),
             queue(),
             input -> true
@@ -284,7 +289,12 @@ public void testRespectsAssignmentOfUnknownPrimaries() {
         }
         clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(routingNodes)).build();
 
-        var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true);
+        var desiredBalance = desiredBalanceComputer.compute(
+            DesiredBalance.BECOME_MASTER_INITIAL,
+            createInput(clusterState),
+            queue(),
+            input -> true
+        );
 
         assertDesiredAssignments(
             desiredBalance,
@@ -331,7 +341,12 @@ public void testRespectsAssignmentOfUnknownReplicas() {
         }
         clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(routingNodes)).build();
 
-        var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true);
+        var desiredBalance = desiredBalanceComputer.compute(
+            DesiredBalance.BECOME_MASTER_INITIAL,
+            createInput(clusterState),
+            queue(),
+            input -> true
+        );
 
         assertDesiredAssignments(
             desiredBalance,
@@ -367,7 +382,7 @@ public void testRespectsAssignmentByGatewayAllocators() {
         }
 
         var desiredBalance = desiredBalanceComputer.compute(
-            DesiredBalance.INITIAL,
+            DesiredBalance.BECOME_MASTER_INITIAL,
             DesiredBalanceInput.create(randomNonNegativeLong(), routingAllocation),
             queue(),
             input -> true
@@ -427,7 +442,12 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing
         }
         clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(desiredRoutingNodes)).build();
 
-        var desiredBalance1 = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true);
+        var desiredBalance1 = desiredBalanceComputer.compute(
+            DesiredBalance.BECOME_MASTER_INITIAL,
+            createInput(clusterState),
+            queue(),
+            input -> true
+        );
         assertDesiredAssignments(
             desiredBalance1,
             Map.of(
@@ -513,7 +533,12 @@ public void testNoDataNodes() {
         var desiredBalanceComputer = createDesiredBalanceComputer();
         var clusterState = createInitialClusterState(0);
 
-        var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true);
+        var desiredBalance = desiredBalanceComputer.compute(
+            DesiredBalance.BECOME_MASTER_INITIAL,
+            createInput(clusterState),
+            queue(),
+            input -> true
+        );
 
         assertDesiredAssignments(desiredBalance, Map.of());
     }
@@ -532,7 +557,7 @@ public void testAppliesMoveCommands() {
         clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(routingNodes)).build();
 
         var desiredBalance = desiredBalanceComputer.compute(
-            DesiredBalance.INITIAL,
+            DesiredBalance.BECOME_MASTER_INITIAL,
             createInput(clusterState),
             queue(
                 new MoveAllocationCommand(index.getName(), 0, "node-1", "node-2"),
@@ -662,7 +687,7 @@ public void testDesiredBalanceShouldConvergeInABigCluster() {
 
         var input = new DesiredBalanceInput(randomInt(), routingAllocationWithDecidersOf(clusterState, clusterInfo, settings), List.of());
         var desiredBalance = createDesiredBalanceComputer(new BalancedShardsAllocator(settings)).compute(
-            DesiredBalance.INITIAL,
+            DesiredBalance.BECOME_MASTER_INITIAL,
             input,
             queue(),
             ignored -> iteration.incrementAndGet() < 1000
@@ -1243,7 +1268,7 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing
         assertThatLogger(() -> {
             var iteration = new AtomicInteger(0);
             desiredBalanceComputer.compute(
-                DesiredBalance.INITIAL,
+                DesiredBalance.BECOME_MASTER_INITIAL,
                 createInput(createInitialClusterState(3)),
                 queue(),
                 input -> iteration.incrementAndGet() < iterations
diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java
index 9d33b697e31ca..9caf89d4d7613 100644
--- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java
+++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java
@@ -698,6 +698,7 @@ public void onFailure(Exception e) {
 
         try {
             assertTrue(listenersCalled.await(10, TimeUnit.SECONDS));
+            assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), sameInstance(DesiredBalance.NOT_MASTER));
         } finally {
             clusterService.close();
             terminate(threadPool);
@@ -753,7 +754,7 @@ public DesiredBalance compute(
         try {
             // initial computation is based on DesiredBalance.INITIAL
             rerouteAndWait(service, clusterState, "initial-allocation");
-            assertThat(desiredBalanceComputer.lastComputationInput.get(), equalTo(DesiredBalance.INITIAL));
+            assertThat(desiredBalanceComputer.lastComputationInput.get(), equalTo(DesiredBalance.BECOME_MASTER_INITIAL));
 
             // any next computation is based on current desired balance
             var current = desiredBalanceShardsAllocator.getDesiredBalance();
@@ -806,7 +807,7 @@ public void testResetDesiredBalanceOnNoLongerMaster() {
 
         try {
             rerouteAndWait(service, clusterState, "initial-allocation");
-            assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.INITIAL)));
+            assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.BECOME_MASTER_INITIAL)));
 
             clusterState = ClusterState.builder(clusterState)
                 .nodes(DiscoveryNodes.builder(clusterState.getNodes()).localNodeId(node1.getId()).masterNodeId(node2.getId()))
@@ -816,7 +817,7 @@ public void testResetDesiredBalanceOnNoLongerMaster() {
             assertThat(
                 "desired balance should be resetted on no longer master",
                 desiredBalanceShardsAllocator.getDesiredBalance(),
-                equalTo(DesiredBalance.INITIAL)
+                equalTo(DesiredBalance.NOT_MASTER)
             );
         } finally {
             clusterService.close();
@@ -862,7 +863,7 @@ public void resetDesiredBalance() {
 
         try {
             rerouteAndWait(service, clusterState, "initial-allocation");
-            assertThat(desiredBalanceAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.INITIAL)));
+            assertThat(desiredBalanceAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.BECOME_MASTER_INITIAL)));
 
             final var shutdownType = randomFrom(Type.SIGTERM, Type.REMOVE, Type.REPLACE);
             final var singleShutdownMetadataBuilder = SingleNodeShutdownMetadata.builder()
@@ -938,7 +939,7 @@ public DesiredBalance compute(
                     Queue<List<MoveAllocationCommand>> pendingDesiredBalanceMoves,
                     Predicate<DesiredBalanceInput> isFresh
                 ) {
-                    assertThat(previousDesiredBalance, sameInstance(DesiredBalance.INITIAL));
+                    assertThat(previousDesiredBalance, sameInstance(DesiredBalance.BECOME_MASTER_INITIAL));
                     return new DesiredBalance(desiredBalanceInput.index(), Map.of());
                 }
             },
@@ -967,7 +968,7 @@ protected void submitReconcileTask(DesiredBalance desiredBalance) {
                 lastListener.onResponse(null);
             }
         };
-        assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), sameInstance(DesiredBalance.INITIAL));
+        assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), sameInstance(DesiredBalance.NOT_MASTER));
         try {
             final PlainActionFuture<Void> future = new PlainActionFuture<>();
             desiredBalanceShardsAllocator.allocate(

From cab6dc5d56a7fcdbbd2fe355bc6d1277094f1400 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
 <58790826+elasticsearchmachine@users.noreply.github.com>
Date: Wed, 4 Dec 2024 00:26:23 +1100
Subject: [PATCH 14/28] Mute
 org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT
 org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT #117893

---
 muted-tests.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/muted-tests.yml b/muted-tests.yml
index 57db22feba059..cf39eae210f88 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -239,6 +239,8 @@ tests:
 - class: org.elasticsearch.xpack.test.rest.XPackRestIT
   method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version}
   issue: https://github.com/elastic/elasticsearch/issues/117862
+- class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT
+  issue: https://github.com/elastic/elasticsearch/issues/117893
 
 # Examples:
 #

From cca7051e73ff089b26f3d1825e4b4e15b81e04aa Mon Sep 17 00:00:00 2001
From: Alexander Spies <alexander.spies@elastic.co>
Date: Tue, 3 Dec 2024 14:28:07 +0100
Subject: [PATCH 15/28] ESQL: Simplify CombineProjections (#117882)

Make combineUpperGroupingsAndLowerProjections a bit simpler.
Also slightly improve a test and add comments to provide more context.
---
 .../rules/logical/CombineProjections.java     | 40 ++++++++++---------
 .../optimizer/LogicalPlanOptimizerTests.java  |  2 +-
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java
index be7096538fb9a..957db4a7273e5 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java
@@ -22,6 +22,7 @@
 import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;
 
 import java.util.ArrayList;
+import java.util.LinkedHashSet;
 import java.util.List;
 
 public final class CombineProjections extends OptimizerRules.OptimizerRule<UnaryPlan> {
@@ -144,30 +145,31 @@ private static List<Expression> combineUpperGroupingsAndLowerProjections(
         List<? extends NamedExpression> upperGroupings,
         List<? extends NamedExpression> lowerProjections
     ) {
+        assert upperGroupings.size() <= 1
+            || upperGroupings.stream().anyMatch(group -> group.anyMatch(expr -> expr instanceof Categorize)) == false
+            : "CombineProjections only tested with a single CATEGORIZE with no additional groups";
         // Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)
-        AttributeMap<Expression> aliases = new AttributeMap<>();
+        AttributeMap<Attribute> aliases = new AttributeMap<>();
         for (NamedExpression ne : lowerProjections) {
-            // record the alias
-            aliases.put(ne.toAttribute(), Alias.unwrap(ne));
+            // Record the aliases.
+            // Projections are just aliases for attributes, so casting is safe.
+            aliases.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne));
         }
-        // Replace any matching attribute directly with the aliased attribute from the projection.
-        AttributeSet seen = new AttributeSet();
-        List<Expression> replaced = new ArrayList<>();
+
+        // Propagate any renames from the lower projection into the upper groupings.
+        // This can lead to duplicates: e.g.
+        // | EVAL x = y | STATS ... BY x, y
+        // All substitutions happen before; groupings must be attributes at this point except for CATEGORIZE which will be an alias like
+        // `c = CATEGORIZE(attribute)`.
+        // Therefore, it is correct to deduplicate based on simple equality (based on names) instead of name ids (Set vs. AttributeSet).
+        // TODO: The deduplication based on simple equality will be insufficient in case of multiple CATEGORIZEs, e.g. for
+        // `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead.
+        LinkedHashSet<NamedExpression> resolvedGroupings = new LinkedHashSet<>();
         for (NamedExpression ne : upperGroupings) {
-            // Duplicated attributes are ignored.
-            if (ne instanceof Attribute attribute) {
-                var newExpression = aliases.resolve(attribute, attribute);
-                if (newExpression instanceof Attribute newAttribute && seen.add(newAttribute) == false) {
-                    // Already seen, skip
-                    continue;
-                }
-                replaced.add(newExpression);
-            } else {
-                // For grouping functions, this will replace nested properties too
-                replaced.add(ne.transformUp(Attribute.class, a -> aliases.resolve(a, a)));
-            }
+            NamedExpression transformed = (NamedExpression) ne.transformUp(Attribute.class, a -> aliases.resolve(a, a));
+            resolvedGroupings.add(transformed);
         }
-        return replaced;
+        return new ArrayList<>(resolvedGroupings);
     }
 
     /**
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java
index 57d0c7432f97b..a74efca3b3d99 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java
@@ -1217,7 +1217,7 @@ public void testCombineProjectionWithCategorizeGrouping() {
         var plan = plan("""
             from test
             | eval k = first_name, k1 = k
-            | stats s = sum(salary) by cat = CATEGORIZE(k)
+            | stats s = sum(salary) by cat = CATEGORIZE(k1)
             | keep s, cat
             """);
 

From 03a71d2deee7bb2788fc40b8d21d90cc75b787e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= <ivancea96@outlook.com>
Date: Tue, 3 Dec 2024 14:47:40 +0100
Subject: [PATCH 16/28] ESQL: Make Categorize usable in aggs when identical to
 a grouping (#117835)

Cases like `STATS MV_APPEND(cat, CATEGORIZE(x)) BY cat=CATEGORIZE(x)` should work, as they're moved to an EVAL by a rule.

Also, these cases were discarded, as they fail because of other verifications (Which also fail for BUCKET):
```
STATS x = category BY category=CATEGORIZE(message)
STATS x = CATEGORIZE(message) BY CATEGORIZE(message)
STATS x = CATEGORIZE(message) BY category=CATEGORIZE(message)
---
 .../src/main/resources/bucket.csv-spec        |  21 +++
 .../src/main/resources/categorize.csv-spec    | 121 ++++++++++++------
 .../src/main/resources/docs.csv-spec          |   2 +-
 .../xpack/esql/action/EsqlCapabilities.java   |   2 +-
 .../xpack/esql/analysis/Verifier.java         |  39 +++---
 ...ReplaceAggregateAggExpressionWithEval.java |  16 +++
 ...laceAggregateNestedExpressionWithEval.java |   6 +-
 .../xpack/esql/analysis/VerifierTests.java    |  34 +++--
 .../optimizer/LogicalPlanOptimizerTests.java  |   4 +-
 9 files changed, 167 insertions(+), 78 deletions(-)

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
index 7bbf011176693..b29c489910f65 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec
@@ -503,6 +503,27 @@ FROM employees
 //end::reuseGroupingFunctionWithExpression-result[]
 ;
 
+reuseGroupingFunctionImplicitAliasWithExpression#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
+FROM employees
+| STATS s1 = `BUCKET(salary / 100 + 99, 50.)` + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.)
+| SORT `BUCKET(salary / 100 + 99, 50.)`, b2
+| KEEP s1, `BUCKET(salary / 100 + 99, 50.)`, s2, b2
+;
+
+ s1:double | BUCKET(salary / 100 + 99, 50.):double | s2:double   | b2:double
+351.0      |350.0      |1002.0       |1000.0
+401.0      |400.0      |1002.0       |1000.0
+451.0      |450.0      |1002.0       |1000.0
+501.0      |500.0      |1002.0       |1000.0
+551.0      |550.0      |1002.0       |1000.0
+601.0      |600.0      |1002.0       |1000.0
+601.0      |600.0      |1052.0       |1050.0
+651.0      |650.0      |1052.0       |1050.0
+701.0      |700.0      |1052.0       |1050.0
+751.0      |750.0      |1052.0       |1050.0
+801.0      |800.0      |1052.0       |1050.0
+;
+
 reuseGroupingFunctionWithinAggs#[skip:-8.13.99, reason:BUCKET renamed in 8.14]
 FROM employees
 | STATS sum = 1 + MAX(1 + BUCKET(salary, 1000.)) BY BUCKET(salary, 1000.) + 1
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec
index e45b10d1aa122..804c1c56a1eb5 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec
@@ -1,5 +1,5 @@
 standard aggs
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS count=COUNT(),
@@ -17,7 +17,7 @@ count:long | sum:long |     avg:double     | count_distinct:long | category:keyw
 ;
 
 values aggs
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS values=MV_SORT(VALUES(message)),
@@ -33,7 +33,7 @@ values:keyword                                                        |      top
 ;
 
 mv
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM mv_sample_data
   | STATS COUNT(), SUM(event_duration) BY category=CATEGORIZE(message)
@@ -48,7 +48,7 @@ COUNT():long | SUM(event_duration):long | category:keyword
 ;
 
 row mv
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 ROW message = ["connected to a", "connected to b", "disconnected"], str = ["a", "b", "c"]
   | STATS COUNT(), VALUES(str) BY category=CATEGORIZE(message)
@@ -61,7 +61,7 @@ COUNT():long | VALUES(str):keyword | category:keyword
 ;
 
 skips stopwords
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 ROW message = ["Mon Tue connected to a", "Jul Aug connected to b September ", "UTC connected GMT to c UTC"]
   | STATS COUNT() BY category=CATEGORIZE(message)
@@ -73,7 +73,7 @@ COUNT():long | category:keyword
 ;
 
 with multiple indices
-required_capability: categorize_v4
+required_capability: categorize_v5
 required_capability: union_types
 
 FROM sample_data*
@@ -88,7 +88,7 @@ COUNT():long | category:keyword
 ;
 
 mv with many values
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM employees
   | STATS COUNT() BY category=CATEGORIZE(job_positions)
@@ -105,7 +105,7 @@ COUNT():long | category:keyword
 ;
 
 mv with many values and SUM
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM employees
   | STATS SUM(languages) BY category=CATEGORIZE(job_positions)
@@ -120,7 +120,7 @@ SUM(languages):long | category:keyword
 ;
 
 mv with many values and nulls and SUM
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM employees
   | STATS SUM(languages) BY category=CATEGORIZE(job_positions)
@@ -134,7 +134,7 @@ SUM(languages):long | category:keyword
 ;
 
 mv via eval
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | EVAL message = MV_APPEND(message, "Banana")
@@ -150,7 +150,7 @@ COUNT():long | category:keyword
 ;
 
 mv via eval const
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | EVAL message = ["Banana", "Bread"]
@@ -164,7 +164,7 @@ COUNT():long | category:keyword
 ;
 
 mv via eval const without aliases
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | EVAL message = ["Banana", "Bread"]
@@ -178,7 +178,7 @@ COUNT():long | CATEGORIZE(message):keyword
 ;
 
 mv const in parameter
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS COUNT() BY c = CATEGORIZE(["Banana", "Bread"])
@@ -191,7 +191,7 @@ COUNT():long | c:keyword
 ;
 
 agg alias shadowing
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS c = COUNT() BY c = CATEGORIZE(["Banana", "Bread"])
@@ -206,7 +206,7 @@ c:keyword
 ;
 
 chained aggregations using categorize
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS COUNT() BY category=CATEGORIZE(message)
@@ -221,7 +221,7 @@ COUNT():long | category:keyword
 ;
 
 stats without aggs
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS BY category=CATEGORIZE(message)
@@ -235,7 +235,7 @@ category:keyword
 ;
 
 text field
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM hosts
   | STATS COUNT() BY category=CATEGORIZE(host_group)
@@ -253,7 +253,7 @@ COUNT():long | category:keyword
 ;
 
 on TO_UPPER
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS COUNT() BY category=CATEGORIZE(TO_UPPER(message))
@@ -267,7 +267,7 @@ COUNT():long | category:keyword
 ;
 
 on CONCAT
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " banana"))
@@ -281,7 +281,7 @@ COUNT():long | category:keyword
 ;
 
 on CONCAT with unicode
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " 👍🏽😊"))
@@ -295,7 +295,7 @@ COUNT():long | category:keyword
 ;
 
 on REVERSE(CONCAT())
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS COUNT() BY category=CATEGORIZE(REVERSE(CONCAT(message, " 👍🏽😊")))
@@ -309,7 +309,7 @@ COUNT():long | category:keyword
 ;
 
 and then TO_LOWER
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS COUNT() BY category=CATEGORIZE(message)
@@ -324,7 +324,7 @@ COUNT():long | category:keyword
 ;
 
 on const empty string
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS COUNT() BY category=CATEGORIZE("")
@@ -336,7 +336,7 @@ COUNT():long | category:keyword
 ;
 
 on const empty string from eval
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | EVAL x = ""
@@ -349,7 +349,7 @@ COUNT():long | category:keyword
 ;
 
 on null
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | EVAL x = null
@@ -362,7 +362,7 @@ COUNT():long | SUM(event_duration):long | category:keyword
 ;
 
 on null string
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | EVAL x = null::string
@@ -375,7 +375,7 @@ COUNT():long | category:keyword
 ;
 
 filtering out all data
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | WHERE @timestamp < "2023-10-23T00:00:00Z"
@@ -387,7 +387,7 @@ COUNT():long | category:keyword
 ;
 
 filtering out all data with constant
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS COUNT() BY category=CATEGORIZE(message)
@@ -398,7 +398,7 @@ COUNT():long | category:keyword
 ;
 
 drop output columns
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS count=COUNT() BY category=CATEGORIZE(message)
@@ -413,7 +413,7 @@ x:integer
 ;
 
 category value processing
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 ROW message = ["connected to a", "connected to b", "disconnected"]
   | STATS COUNT() BY category=CATEGORIZE(message)
@@ -427,7 +427,7 @@ COUNT():long | category:keyword
 ;
 
 row aliases
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 ROW message = "connected to xyz"
   | EVAL x = message
@@ -441,7 +441,7 @@ COUNT():long | category:keyword           | y:keyword
 ;
 
 from aliases
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | EVAL x = message
@@ -457,7 +457,7 @@ COUNT():long | category:keyword         | y:keyword
 ;
 
 row aliases with keep
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 ROW message = "connected to xyz"
   | EVAL x = message
@@ -473,7 +473,7 @@ COUNT():long | y:keyword
 ;
 
 from aliases with keep
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | EVAL x = message
@@ -491,7 +491,7 @@ COUNT():long | y:keyword
 ;
 
 row rename
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 ROW message = "connected to xyz"
   | RENAME message as x
@@ -505,7 +505,7 @@ COUNT():long | y:keyword
 ;
 
 from rename
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | RENAME message as x
@@ -521,7 +521,7 @@ COUNT():long | y:keyword
 ;
 
 row drop
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 ROW message = "connected to a"
   | STATS c = COUNT() BY category=CATEGORIZE(message)
@@ -534,7 +534,7 @@ c:long
 ;
 
 from drop
-required_capability: categorize_v4
+required_capability: categorize_v5
 
 FROM sample_data
   | STATS c = COUNT() BY category=CATEGORIZE(message)
@@ -547,3 +547,48 @@ c:long
 3
 3
 ;
+
+categorize in aggs inside function
+required_capability: categorize_v5
+
+FROM sample_data
+  | STATS COUNT(), x = MV_APPEND(category, category) BY category=CATEGORIZE(message)
+  | SORT x
+  | KEEP `COUNT()`, x
+;
+
+COUNT():long | x:keyword
+           3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?]
+           3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?]
+           1 | [.*?Disconnected.*?,.*?Disconnected.*?]
+;
+
+categorize in aggs same as grouping inside function
+required_capability: categorize_v5
+
+FROM sample_data
+  | STATS COUNT(), x = MV_APPEND(CATEGORIZE(message), `CATEGORIZE(message)`) BY CATEGORIZE(message)
+  | SORT x
+  | KEEP `COUNT()`, x
+;
+
+COUNT():long | x:keyword
+           3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?]
+           3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?]
+           1 | [.*?Disconnected.*?,.*?Disconnected.*?]
+;
+
+categorize in aggs same as grouping inside function with explicit alias
+required_capability: categorize_v5
+
+FROM sample_data
+  | STATS COUNT(), x = MV_APPEND(CATEGORIZE(message), category) BY category=CATEGORIZE(message)
+  | SORT x
+  | KEEP `COUNT()`, x
+;
+
+COUNT():long | x:keyword
+           3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?]
+           3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?]
+           1 | [.*?Disconnected.*?,.*?Disconnected.*?]
+;
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec
index 24baf1263d06a..aa89c775da4cf 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec
@@ -678,7 +678,7 @@ Bangalore     | 9                 | 72
 ;
 
 docsCategorize
-required_capability: categorize_v4
+required_capability: categorize_v5
 // tag::docsCategorize[]
 FROM sample_data
 | STATS count=COUNT() BY category=CATEGORIZE(message)
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
index 646c4f8240c3e..b5d6dd8584e8c 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -407,7 +407,7 @@ public enum Cap {
         /**
          * Supported the text categorization function "CATEGORIZE".
          */
-        CATEGORIZE_V4,
+        CATEGORIZE_V5,
 
         /**
          * QSTR function
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java
index 5f8c011cff53a..49d8a5ee8caad 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java
@@ -20,7 +20,6 @@
 import org.elasticsearch.xpack.esql.core.expression.Expressions;
 import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
 import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute;
-import org.elasticsearch.xpack.esql.core.expression.NameId;
 import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
 import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
 import org.elasticsearch.xpack.esql.core.expression.function.Function;
@@ -63,12 +62,10 @@
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.Collection;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Locale;
-import java.util.Map;
 import java.util.Set;
 import java.util.function.BiConsumer;
 import java.util.function.Consumer;
@@ -364,35 +361,35 @@ private static void checkCategorizeGrouping(Aggregate agg, Set<Failure> failures
                 );
         });
 
-        // Forbid CATEGORIZE being used in the aggregations
-        agg.aggregates().forEach(a -> {
-            a.forEachDown(
-                Categorize.class,
-                categorize -> failures.add(
-                    fail(categorize, "cannot use CATEGORIZE grouping function [{}] within the aggregations", categorize.sourceText())
+        // Forbid CATEGORIZE being used in the aggregations, unless it appears as a grouping
+        agg.aggregates()
+            .forEach(
+                a -> a.forEachDown(
+                    AggregateFunction.class,
+                    aggregateFunction -> aggregateFunction.forEachDown(
+                        Categorize.class,
+                        categorize -> failures.add(
+                            fail(categorize, "cannot use CATEGORIZE grouping function [{}] within an aggregation", categorize.sourceText())
+                        )
+                    )
                 )
             );
-        });
 
-        // Forbid CATEGORIZE being referenced in the aggregation functions
-        Map<NameId, Categorize> categorizeByAliasId = new HashMap<>();
+        // Forbid CATEGORIZE being referenced as a child of an aggregation function
+        AttributeMap<Categorize> categorizeByAttribute = new AttributeMap<>();
         agg.groupings().forEach(g -> {
             g.forEachDown(Alias.class, alias -> {
                 if (alias.child() instanceof Categorize categorize) {
-                    categorizeByAliasId.put(alias.id(), categorize);
+                    categorizeByAttribute.put(alias.toAttribute(), categorize);
                 }
             });
         });
         agg.aggregates()
             .forEach(a -> a.forEachDown(AggregateFunction.class, aggregate -> aggregate.forEachDown(Attribute.class, attribute -> {
-                var categorize = categorizeByAliasId.get(attribute.id());
+                var categorize = categorizeByAttribute.get(attribute);
                 if (categorize != null) {
                     failures.add(
-                        fail(
-                            attribute,
-                            "cannot reference CATEGORIZE grouping function [{}] within the aggregations",
-                            attribute.sourceText()
-                        )
+                        fail(attribute, "cannot reference CATEGORIZE grouping function [{}] within an aggregation", attribute.sourceText())
                     );
                 }
             })));
@@ -449,7 +446,7 @@ private static void checkInvalidNamedExpressionUsage(
                 // check the bucketing function against the group
                 else if (c instanceof GroupingFunction gf) {
                     if (Expressions.anyMatch(groups, ex -> ex instanceof Alias a && a.child().semanticEquals(gf)) == false) {
-                        failures.add(fail(gf, "can only use grouping function [{}] part of the BY clause", gf.sourceText()));
+                        failures.add(fail(gf, "can only use grouping function [{}] as part of the BY clause", gf.sourceText()));
                     }
                 }
             });
@@ -466,7 +463,7 @@ else if (c instanceof GroupingFunction gf) {
             // optimizer will later unroll expressions with aggs and non-aggs with a grouping function into an EVAL, but that will no longer
             // be verified (by check above in checkAggregate()), so do it explicitly here
             if (Expressions.anyMatch(groups, ex -> ex instanceof Alias a && a.child().semanticEquals(gf)) == false) {
-                failures.add(fail(gf, "can only use grouping function [{}] part of the BY clause", gf.sourceText()));
+                failures.add(fail(gf, "can only use grouping function [{}] as part of the BY clause", gf.sourceText()));
             } else if (level == 0) {
                 addFailureOnGroupingUsedNakedInAggs(failures, gf, "function");
             }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java
index 2361b46b2be6f..c36d4caf7f599 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java
@@ -9,18 +9,21 @@
 
 import org.elasticsearch.common.util.Maps;
 import org.elasticsearch.xpack.esql.core.expression.Alias;
+import org.elasticsearch.xpack.esql.core.expression.Attribute;
 import org.elasticsearch.xpack.esql.core.expression.AttributeMap;
 import org.elasticsearch.xpack.esql.core.expression.Expression;
 import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
 import org.elasticsearch.xpack.esql.core.tree.Source;
 import org.elasticsearch.xpack.esql.core.util.Holder;
 import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
+import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize;
 import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
 import org.elasticsearch.xpack.esql.plan.logical.Eval;
 import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
 import org.elasticsearch.xpack.esql.plan.logical.Project;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -51,6 +54,16 @@ protected LogicalPlan rule(Aggregate aggregate) {
         AttributeMap<Expression> aliases = new AttributeMap<>();
         aggregate.forEachExpressionUp(Alias.class, a -> aliases.put(a.toAttribute(), a.child()));
 
+        // Build Categorize grouping functions map.
+        // Functions like BUCKET() shouldn't reach this point,
+        // as they are moved to an early EVAL by ReplaceAggregateNestedExpressionWithEval
+        Map<Categorize, Attribute> groupingAttributes = new HashMap<>();
+        aggregate.forEachExpressionUp(Alias.class, a -> {
+            if (a.child() instanceof Categorize groupingFunction) {
+                groupingAttributes.put(groupingFunction, a.toAttribute());
+            }
+        });
+
         // break down each aggregate into AggregateFunction and/or grouping key
         // preserve the projection at the end
         List<? extends NamedExpression> aggs = aggregate.aggregates();
@@ -109,6 +122,9 @@ protected LogicalPlan rule(Aggregate aggregate) {
                         return alias.toAttribute();
                     });
 
+                    // replace grouping functions with their references
+                    aggExpression = aggExpression.transformUp(Categorize.class, groupingAttributes::get);
+
                     Alias alias = as.replaceChild(aggExpression);
                     newEvals.add(alias);
                     newProjections.add(alias.toAttribute());
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java
index 985e68252a1f9..4dbc43454a023 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java
@@ -51,6 +51,7 @@ protected LogicalPlan rule(Aggregate aggregate) {
             // Exception: Categorize is internal to the aggregation and remains in the groupings. We move its child expression into an eval.
             if (g instanceof Alias as) {
                 if (as.child() instanceof Categorize cat) {
+                    // For Categorize grouping function, we only move the child expression into an eval
                     if (cat.field() instanceof Attribute == false) {
                         groupingChanged = true;
                         var fieldAs = new Alias(as.source(), as.name(), cat.field(), null, true);
@@ -59,7 +60,6 @@ protected LogicalPlan rule(Aggregate aggregate) {
                         evalNames.put(fieldAs.name(), fieldAttr);
                         Categorize replacement = cat.replaceChildren(List.of(fieldAttr));
                         newGroupings.set(i, as.replaceChild(replacement));
-                        groupingAttributes.put(cat, fieldAttr);
                     }
                 } else {
                     groupingChanged = true;
@@ -135,6 +135,10 @@ protected LogicalPlan rule(Aggregate aggregate) {
                 });
                 // replace any grouping functions with their references pointing to the added synthetic eval
                 replaced = replaced.transformDown(GroupingFunction.class, gf -> {
+                    // Categorize in aggs depends on the grouping result, not on an early eval
+                    if (gf instanceof Categorize) {
+                        return gf;
+                    }
                     aggsChanged.set(true);
                     // should never return null, as it's verified.
                     // but even if broken, the transform will fail safely; otoh, returning `gf` will fail later due to incorrect plan.
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
index d02e78202e0c2..74e2de1141728 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
@@ -407,12 +407,12 @@ public void testAggFilterOnBucketingOrAggFunctions() {
 
         // but fails if it's different
         assertEquals(
-            "1:32: can only use grouping function [bucket(a, 3)] part of the BY clause",
+            "1:32: can only use grouping function [bucket(a, 3)] as part of the BY clause",
             error("row a = 1 | stats sum(a) where bucket(a, 3) > -1 by bucket(a,2)")
         );
 
         assertEquals(
-            "1:40: can only use grouping function [bucket(salary, 10)] part of the BY clause",
+            "1:40: can only use grouping function [bucket(salary, 10)] as part of the BY clause",
             error("from test | stats max(languages) WHERE bucket(salary, 10) > 1 by emp_no")
         );
 
@@ -444,19 +444,19 @@ public void testAggWithNonBooleanFilter() {
 
     public void testGroupingInsideAggsAsAgg() {
         assertEquals(
-            "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause",
+            "1:18: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause",
             error("from test| stats bucket(emp_no, 5.) by emp_no")
         );
         assertEquals(
-            "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause",
+            "1:18: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause",
             error("from test| stats bucket(emp_no, 5.)")
         );
         assertEquals(
-            "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause",
+            "1:18: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause",
             error("from test| stats bucket(emp_no, 5.) by bucket(emp_no, 6.)")
         );
         assertEquals(
-            "1:22: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause",
+            "1:22: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause",
             error("from test| stats 3 + bucket(emp_no, 5.) by bucket(emp_no, 6.)")
         );
     }
@@ -1846,7 +1846,7 @@ public void testIntervalAsString() {
     }
 
     public void testCategorizeSingleGrouping() {
-        assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled());
+        assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled());
 
         query("from test | STATS COUNT(*) BY CATEGORIZE(first_name)");
         query("from test | STATS COUNT(*) BY cat = CATEGORIZE(first_name)");
@@ -1875,7 +1875,7 @@ public void testCategorizeSingleGrouping() {
     }
 
     public void testCategorizeNestedGrouping() {
-        assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled());
+        assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled());
 
         query("from test | STATS COUNT(*) BY CATEGORIZE(LENGTH(first_name)::string)");
 
@@ -1890,27 +1890,33 @@ public void testCategorizeNestedGrouping() {
     }
 
     public void testCategorizeWithinAggregations() {
-        assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled());
+        assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled());
 
         query("from test | STATS MV_COUNT(cat), COUNT(*) BY cat = CATEGORIZE(first_name)");
+        query("from test | STATS MV_COUNT(CATEGORIZE(first_name)), COUNT(*) BY cat = CATEGORIZE(first_name)");
+        query("from test | STATS MV_COUNT(CATEGORIZE(first_name)), COUNT(*) BY CATEGORIZE(first_name)");
 
         assertEquals(
-            "1:25: cannot use CATEGORIZE grouping function [CATEGORIZE(first_name)] within the aggregations",
+            "1:25: cannot use CATEGORIZE grouping function [CATEGORIZE(first_name)] within an aggregation",
             error("FROM test | STATS COUNT(CATEGORIZE(first_name)) BY CATEGORIZE(first_name)")
         );
-
         assertEquals(
-            "1:25: cannot reference CATEGORIZE grouping function [cat] within the aggregations",
+            "1:25: cannot reference CATEGORIZE grouping function [cat] within an aggregation",
             error("FROM test | STATS COUNT(cat) BY cat = CATEGORIZE(first_name)")
         );
         assertEquals(
-            "1:30: cannot reference CATEGORIZE grouping function [cat] within the aggregations",
+            "1:30: cannot reference CATEGORIZE grouping function [cat] within an aggregation",
             error("FROM test | STATS SUM(LENGTH(cat::keyword) + LENGTH(last_name)) BY cat = CATEGORIZE(first_name)")
         );
         assertEquals(
-            "1:25: cannot reference CATEGORIZE grouping function [`CATEGORIZE(first_name)`] within the aggregations",
+            "1:25: cannot reference CATEGORIZE grouping function [`CATEGORIZE(first_name)`] within an aggregation",
             error("FROM test | STATS COUNT(`CATEGORIZE(first_name)`) BY CATEGORIZE(first_name)")
         );
+
+        assertEquals(
+            "1:28: can only use grouping function [CATEGORIZE(last_name)] as part of the BY clause",
+            error("FROM test | STATS MV_COUNT(CATEGORIZE(last_name)) BY CATEGORIZE(first_name)")
+        );
     }
 
     public void testSortByAggregate() {
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java
index a74efca3b3d99..b76781f76f4af 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java
@@ -1212,7 +1212,7 @@ public void testCombineProjectionWithAggregationFirstAndAliasedGroupingUsedInAgg
      *   \_EsRelation[test][_meta_field{f}#23, emp_no{f}#17, first_name{f}#18, ..]
      */
     public void testCombineProjectionWithCategorizeGrouping() {
-        assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled());
+        assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled());
 
         var plan = plan("""
             from test
@@ -3949,7 +3949,7 @@ public void testNestedExpressionsInGroups() {
      *     \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..]
      */
     public void testNestedExpressionsInGroupsWithCategorize() {
-        assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled());
+        assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled());
 
         var plan = optimizedPlan("""
             from test

From ed1e3664ad6c50d2af24b09db51448072764f663 Mon Sep 17 00:00:00 2001
From: Jim Ferenczi <jim.ferenczi@elastic.co>
Date: Tue, 3 Dec 2024 13:51:07 +0000
Subject: [PATCH 17/28] Move SparseVectorQueryBuilder and
 TextExpansionQueryBuilder to x-pack core (#117857)

This commit moves the SparseVectorQueryBuilder and TextExpansionQueryBuilder classes to the x-pack core module, enabling other modules to utilize these query builders.
Additionally, it introduces a SparseVectorQueryWrapper to extract sparse vector queries from standard Lucene queries.
This is needed for supporting semantic highlighting with sparse vector fields as follow up.
---
 .../xpack/core/XPackClientPlugin.java         | 10 +++
 .../ml/search}/SparseVectorQueryBuilder.java  |  7 +-
 .../ml/search/SparseVectorQueryWrapper.java   | 77 +++++++++++++++++++
 .../ml/search}/TextExpansionQueryBuilder.java |  4 +-
 .../ml/search/WeightedTokensQueryBuilder.java |  2 +-
 .../core/ml/search/WeightedTokensUtils.java   | 11 ++-
 .../SparseVectorQueryBuilderTests.java        | 21 ++---
 .../TextExpansionQueryBuilderTests.java       | 14 ++--
 .../WeightedTokensQueryBuilderTests.java      | 13 +++-
 .../xpack/ml/MachineLearning.java             | 19 -----
 10 files changed, 125 insertions(+), 53 deletions(-)
 rename x-pack/plugin/{ml/src/main/java/org/elasticsearch/xpack/ml/queries => core/src/main/java/org/elasticsearch/xpack/core/ml/search}/SparseVectorQueryBuilder.java (97%)
 create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java
 rename x-pack/plugin/{ml/src/main/java/org/elasticsearch/xpack/ml/queries => core/src/main/java/org/elasticsearch/xpack/core/ml/search}/TextExpansionQueryBuilder.java (98%)
 rename x-pack/plugin/{ml/src/test/java/org/elasticsearch/xpack/ml/queries => core/src/test/java/org/elasticsearch/xpack/core/ml/search}/SparseVectorQueryBuilderTests.java (94%)
 rename x-pack/plugin/{ml/src/test/java/org/elasticsearch/xpack/ml/queries => core/src/test/java/org/elasticsearch/xpack/core/ml/search}/TextExpansionQueryBuilderTests.java (96%)

diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java
index e2435c3396fa8..f5923a4942634 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java
@@ -71,6 +71,8 @@
 import org.elasticsearch.xpack.core.ml.job.config.JobTaskState;
 import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskParams;
 import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskState;
+import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder;
+import org.elasticsearch.xpack.core.ml.search.TextExpansionQueryBuilder;
 import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder;
 import org.elasticsearch.xpack.core.monitoring.MonitoringFeatureSetUsage;
 import org.elasticsearch.xpack.core.rollup.RollupFeatureSetUsage;
@@ -398,6 +400,14 @@ public List<NamedXContentRegistry.Entry> getNamedXContent() {
     @Override
     public List<SearchPlugin.QuerySpec<?>> getQueries() {
         return List.of(
+            new QuerySpec<>(SparseVectorQueryBuilder.NAME, SparseVectorQueryBuilder::new, SparseVectorQueryBuilder::fromXContent),
+            new QuerySpec<QueryBuilder>(
+                TextExpansionQueryBuilder.NAME,
+                TextExpansionQueryBuilder::new,
+                TextExpansionQueryBuilder::fromXContent
+            ),
+            // TODO: The WeightedTokensBuilder is slated for removal after the SparseVectorQueryBuilder is available.
+            // The logic to create a Boolean query based on weighted tokens will remain and/or be moved to server.
             new SearchPlugin.QuerySpec<QueryBuilder>(
                 WeightedTokensQueryBuilder.NAME,
                 WeightedTokensQueryBuilder::new,
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java
similarity index 97%
rename from x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java
rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java
index 5a63ad8e85e9b..e9e4e90421adc 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java
@@ -5,7 +5,7 @@
  * 2.0.
  */
 
-package org.elasticsearch.xpack.ml.queries;
+package org.elasticsearch.xpack.core.ml.search;
 
 import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
@@ -33,9 +33,6 @@
 import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults;
 import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate;
-import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig;
-import org.elasticsearch.xpack.core.ml.search.WeightedToken;
-import org.elasticsearch.xpack.core.ml.search.WeightedTokensUtils;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -210,7 +207,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {
 
         return (shouldPruneTokens)
             ? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, queryVectors, ft, context)
-            : WeightedTokensUtils.queryBuilderWithAllTokens(queryVectors, ft, context);
+            : WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, ft, context);
     }
 
     @Override
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java
new file mode 100644
index 0000000000000..234560f620d95
--- /dev/null
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.core.ml.search;
+
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryVisitor;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Weight;
+import org.elasticsearch.index.query.SearchExecutionContext;
+
+import java.io.IOException;
+import java.util.Objects;
+
+/**
+ * A wrapper class for the Lucene query generated by {@link SparseVectorQueryBuilder#toQuery(SearchExecutionContext)}.
+ * This wrapper facilitates the extraction of the complete sparse vector query using a {@link QueryVisitor}.
+ */
+public class SparseVectorQueryWrapper extends Query {
+    private final String fieldName;
+    private final Query termsQuery;
+
+    public SparseVectorQueryWrapper(String fieldName, Query termsQuery) {
+        this.fieldName = fieldName;
+        this.termsQuery = termsQuery;
+    }
+
+    public Query getTermsQuery() {
+        return termsQuery;
+    }
+
+    @Override
+    public Query rewrite(IndexSearcher indexSearcher) throws IOException {
+        var rewrite = termsQuery.rewrite(indexSearcher);
+        if (rewrite != termsQuery) {
+            return new SparseVectorQueryWrapper(fieldName, rewrite);
+        }
+        return this;
+    }
+
+    @Override
+    public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
+        return termsQuery.createWeight(searcher, scoreMode, boost);
+    }
+
+    @Override
+    public String toString(String field) {
+        return termsQuery.toString(field);
+    }
+
+    @Override
+    public void visit(QueryVisitor visitor) {
+        if (visitor.acceptField(fieldName)) {
+            termsQuery.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this));
+        }
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (sameClassAs(obj) == false) {
+            return false;
+        }
+        SparseVectorQueryWrapper that = (SparseVectorQueryWrapper) obj;
+        return fieldName.equals(that.fieldName) && termsQuery.equals(that.termsQuery);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(classHash(), fieldName, termsQuery);
+    }
+}
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilder.java
similarity index 98%
rename from x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java
rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilder.java
index 6d972bcf5863a..81758ec5f9342 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilder.java
@@ -5,7 +5,7 @@
  * 2.0.
  */
 
-package org.elasticsearch.xpack.ml.queries;
+package org.elasticsearch.xpack.core.ml.search;
 
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.SetOnce;
@@ -32,8 +32,6 @@
 import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults;
 import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate;
-import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig;
-import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java
index 256c90c3eaa62..f41fcd77ce627 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java
@@ -125,7 +125,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {
         }
 
         return (this.tokenPruningConfig == null)
-            ? WeightedTokensUtils.queryBuilderWithAllTokens(tokens, ft, context)
+            ? WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, tokens, ft, context)
             : WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, tokens, ft, context);
     }
 
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java
index 133920416d227..1c2ac23151e6e 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java
@@ -24,13 +24,18 @@ public final class WeightedTokensUtils {
 
     private WeightedTokensUtils() {}
 
-    public static Query queryBuilderWithAllTokens(List<WeightedToken> tokens, MappedFieldType ft, SearchExecutionContext context) {
+    public static Query queryBuilderWithAllTokens(
+        String fieldName,
+        List<WeightedToken> tokens,
+        MappedFieldType ft,
+        SearchExecutionContext context
+    ) {
         var qb = new BooleanQuery.Builder();
 
         for (var token : tokens) {
             qb.add(new BoostQuery(ft.termQuery(token.token(), context), token.weight()), BooleanClause.Occur.SHOULD);
         }
-        return qb.setMinimumNumberShouldMatch(1).build();
+        return new SparseVectorQueryWrapper(fieldName, qb.setMinimumNumberShouldMatch(1).build());
     }
 
     public static Query queryBuilderWithPrunedTokens(
@@ -64,7 +69,7 @@ public static Query queryBuilderWithPrunedTokens(
             }
         }
 
-        return qb.setMinimumNumberShouldMatch(1).build();
+        return new SparseVectorQueryWrapper(fieldName, qb.setMinimumNumberShouldMatch(1).build());
     }
 
     /**
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java
similarity index 94%
rename from x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java
rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java
index 13cf6d87728a8..9872d95de024a 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java
@@ -5,7 +5,7 @@
  * 2.0.
  */
 
-package org.elasticsearch.xpack.ml.queries;
+package org.elasticsearch.xpack.core.ml.search;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FeatureField;
@@ -40,9 +40,6 @@
 import org.elasticsearch.xpack.core.ml.action.InferModelAction;
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings;
 import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults;
-import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig;
-import org.elasticsearch.xpack.core.ml.search.WeightedToken;
-import org.elasticsearch.xpack.ml.MachineLearning;
 
 import java.io.IOException;
 import java.lang.reflect.Method;
@@ -50,7 +47,7 @@
 import java.util.Collection;
 import java.util.List;
 
-import static org.elasticsearch.xpack.ml.queries.SparseVectorQueryBuilder.QUERY_VECTOR_FIELD;
+import static org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder.QUERY_VECTOR_FIELD;
 import static org.hamcrest.CoreMatchers.instanceOf;
 import static org.hamcrest.Matchers.either;
 import static org.hamcrest.Matchers.hasSize;
@@ -102,7 +99,7 @@ private SparseVectorQueryBuilder createTestQueryBuilder(TokenPruningConfig token
 
     @Override
     protected Collection<Class<? extends Plugin>> getPlugins() {
-        return List.of(MachineLearning.class, MapperExtrasPlugin.class, XPackClientPlugin.class);
+        return List.of(MapperExtrasPlugin.class, XPackClientPlugin.class);
     }
 
     @Override
@@ -156,8 +153,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws
 
     @Override
     protected void doAssertLuceneQuery(SparseVectorQueryBuilder queryBuilder, Query query, SearchExecutionContext context) {
-        assertThat(query, instanceOf(BooleanQuery.class));
-        BooleanQuery booleanQuery = (BooleanQuery) query;
+        assertThat(query, instanceOf(SparseVectorQueryWrapper.class));
+        var sparseQuery = (SparseVectorQueryWrapper) query;
+        assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class));
+        BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery();
         assertEquals(booleanQuery.getMinimumNumberShouldMatch(), 1);
         assertThat(booleanQuery.clauses(), hasSize(NUM_TOKENS));
 
@@ -233,11 +232,13 @@ public void testToQuery() throws IOException {
 
     private void testDoToQuery(SparseVectorQueryBuilder queryBuilder, SearchExecutionContext context) throws IOException {
         Query query = queryBuilder.doToQuery(context);
+        assertTrue(query instanceof SparseVectorQueryWrapper);
+        var sparseQuery = (SparseVectorQueryWrapper) query;
         if (queryBuilder.shouldPruneTokens()) {
             // It's possible that all documents were pruned for aggressive pruning configurations
-            assertTrue(query instanceof BooleanQuery || query instanceof MatchNoDocsQuery);
+            assertTrue(sparseQuery.getTermsQuery() instanceof BooleanQuery || sparseQuery.getTermsQuery() instanceof MatchNoDocsQuery);
         } else {
-            assertTrue(query instanceof BooleanQuery);
+            assertTrue(sparseQuery.getTermsQuery() instanceof BooleanQuery);
         }
     }
 
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilderTests.java
similarity index 96%
rename from x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java
rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilderTests.java
index 00d50e0d0d7bb..a0263003b72db 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilderTests.java
@@ -5,7 +5,7 @@
  * 2.0.
  */
 
-package org.elasticsearch.xpack.ml.queries;
+package org.elasticsearch.xpack.core.ml.search;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FeatureField;
@@ -35,10 +35,6 @@
 import org.elasticsearch.xpack.core.ml.action.InferModelAction;
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings;
 import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults;
-import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig;
-import org.elasticsearch.xpack.core.ml.search.WeightedToken;
-import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder;
-import org.elasticsearch.xpack.ml.MachineLearning;
 
 import java.io.IOException;
 import java.lang.reflect.Method;
@@ -77,7 +73,7 @@ protected TextExpansionQueryBuilder doCreateTestQueryBuilder() {
 
     @Override
     protected Collection<Class<? extends Plugin>> getPlugins() {
-        return List.of(MachineLearning.class, MapperExtrasPlugin.class, XPackClientPlugin.class);
+        return List.of(MapperExtrasPlugin.class, XPackClientPlugin.class);
     }
 
     @Override
@@ -129,8 +125,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws
 
     @Override
     protected void doAssertLuceneQuery(TextExpansionQueryBuilder queryBuilder, Query query, SearchExecutionContext context) {
-        assertThat(query, instanceOf(BooleanQuery.class));
-        BooleanQuery booleanQuery = (BooleanQuery) query;
+        assertThat(query, instanceOf(SparseVectorQueryWrapper.class));
+        var sparseQuery = (SparseVectorQueryWrapper) query;
+        assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class));
+        BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery();
         assertEquals(booleanQuery.getMinimumNumberShouldMatch(), 1);
         assertThat(booleanQuery.clauses(), hasSize(NUM_TOKENS));
 
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java
index 114ad90354c61..cded9b8dce5e2 100644
--- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java
@@ -271,8 +271,11 @@ public void testPruningIsAppliedCorrectly() throws IOException {
     }
 
     private void assertCorrectLuceneQuery(String name, Query query, List<String> expectedFeatureFields) {
-        assertTrue(query instanceof BooleanQuery);
-        List<BooleanClause> booleanClauses = ((BooleanQuery) query).clauses();
+        assertThat(query, instanceOf(SparseVectorQueryWrapper.class));
+        var sparseQuery = (SparseVectorQueryWrapper) query;
+        assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class));
+        BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery();
+        List<BooleanClause> booleanClauses = booleanQuery.clauses();
         assertEquals(
             name + " had " + booleanClauses.size() + " clauses, expected " + expectedFeatureFields.size(),
             expectedFeatureFields.size(),
@@ -343,8 +346,10 @@ public void testMustRewrite() throws IOException {
 
     @Override
     protected void doAssertLuceneQuery(WeightedTokensQueryBuilder queryBuilder, Query query, SearchExecutionContext context) {
-        assertThat(query, instanceOf(BooleanQuery.class));
-        BooleanQuery booleanQuery = (BooleanQuery) query;
+        assertThat(query, instanceOf(SparseVectorQueryWrapper.class));
+        var sparseQuery = (SparseVectorQueryWrapper) query;
+        assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class));
+        BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery();
         assertEquals(booleanQuery.getMinimumNumberShouldMatch(), 1);
         assertThat(booleanQuery.clauses(), hasSize(NUM_TOKENS));
 
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
index 8363e0f5c19a1..c76e43790a259 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
@@ -48,7 +48,6 @@
 import org.elasticsearch.features.NodeFeature;
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.TokenizerFactory;
-import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.indices.AssociatedIndexDescriptor;
 import org.elasticsearch.indices.SystemIndexDescriptor;
 import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
@@ -376,8 +375,6 @@
 import org.elasticsearch.xpack.ml.process.MlMemoryTracker;
 import org.elasticsearch.xpack.ml.process.NativeController;
 import org.elasticsearch.xpack.ml.process.NativeStorageProvider;
-import org.elasticsearch.xpack.ml.queries.SparseVectorQueryBuilder;
-import org.elasticsearch.xpack.ml.queries.TextExpansionQueryBuilder;
 import org.elasticsearch.xpack.ml.rest.RestDeleteExpiredDataAction;
 import org.elasticsearch.xpack.ml.rest.RestMlInfoAction;
 import org.elasticsearch.xpack.ml.rest.RestMlMemoryAction;
@@ -1764,22 +1761,6 @@ public List<QueryVectorBuilderSpec<?>> getQueryVectorBuilders() {
         );
     }
 
-    @Override
-    public List<QuerySpec<?>> getQueries() {
-        return List.of(
-            new QuerySpec<QueryBuilder>(
-                TextExpansionQueryBuilder.NAME,
-                TextExpansionQueryBuilder::new,
-                TextExpansionQueryBuilder::fromXContent
-            ),
-            new QuerySpec<QueryBuilder>(
-                SparseVectorQueryBuilder.NAME,
-                SparseVectorQueryBuilder::new,
-                SparseVectorQueryBuilder::fromXContent
-            )
-        );
-    }
-
     private <T> ContextParser<String, T> checkAggLicense(ContextParser<String, T> realParser, LicensedFeature.Momentary feature) {
         return (parser, name) -> {
             if (feature.check(getLicenseState()) == false) {

From 5c1b3c7197603414614d72487c7327662d622420 Mon Sep 17 00:00:00 2001
From: mmahacek <mark.mahacek@elastic.co>
Date: Tue, 3 Dec 2024 06:10:02 -0800
Subject: [PATCH 18/28] Update email.asciidoc (#117867)

Fix error in documentation.
---
 docs/reference/watcher/actions/email.asciidoc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reference/watcher/actions/email.asciidoc b/docs/reference/watcher/actions/email.asciidoc
index 16b9cc4be0628..efad500e0226b 100644
--- a/docs/reference/watcher/actions/email.asciidoc
+++ b/docs/reference/watcher/actions/email.asciidoc
@@ -129,7 +129,7 @@ killed by firewalls or load balancers in-between.
 | Name        | Description
 | `format`    | Attaches the watch data, equivalent to specifying `attach_data`
                 in the watch configuration. Possible values are `json` or `yaml`.
-                Defaults to `json` if not specified.
+                Defaults to `yaml` if not specified.
 |======
 
 

From d3f0ae04e2b5e107686b9a19ffbe5312bacec753 Mon Sep 17 00:00:00 2001
From: Craig Taverner <craig@amanzi.com>
Date: Tue, 3 Dec 2024 15:10:57 +0100
Subject: [PATCH 19/28] Enhance LOOKUP JOIN csv-spec tests to cover more cases
 and fix several bugs found (#117843)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds several more tests to lookup-join.csv-spec, and fixes the following bugs:

* FieldCaps on right hand side should ignore fieldNames method and just use "*" because currently the fieldNames search cannot handle lookup fields with aliases (should be fixed in a followup PR).
* Stop using the lookup index in the ComputeService (so we don’t get both indices data coming in from the left, and other weird behaviour).
* Ignore failing SearchStats checks on fields from the right hand side in the logical planner (so it does not plan EVAL field = null for all right hand fields). This should be fixed properly with the correct updates to TransportSearchShardsAction (or rather to making multiple use of that for each branch of the execution model).
---
 .../xpack/esql/ccq/MultiClusterSpecIT.java    |   4 +-
 .../xpack/esql/CsvTestsDataLoader.java        |   8 +
 .../resources/clientips_lookup-settings.json  |   5 +
 .../src/main/resources/languages.csv          |   2 +-
 .../src/main/resources/lookup-join.csv-spec   | 224 +++++++++++++++++-
 .../src/main/resources/mapping-clientips.json |  16 +-
 .../src/main/resources/mapping-languages.json |   2 +-
 .../main/resources/mapping-message_types.json |  10 +
 .../src/main/resources/message_types.csv      |   6 +
 .../message_types_lookup-settings.json        |   5 +
 .../xpack/esql/action/EsqlCapabilities.java   |   2 +-
 .../esql/enrich/LookupFromIndexService.java   |  11 +
 .../local/ReplaceMissingFieldWithNull.java    |  13 +-
 .../physical/local/InsertFieldExtraction.java |  15 +-
 .../esql/plan/physical/LookupJoinExec.java    |   2 +-
 .../esql/planner/LocalExecutionPlanner.java   |   1 +
 .../xpack/esql/planner/PlannerUtils.java      |  11 +-
 .../xpack/esql/plugin/ComputeService.java     |  54 ++++-
 .../xpack/esql/session/EsqlSession.java       |   4 +-
 .../elasticsearch/xpack/esql/CsvTests.java    |   2 +-
 20 files changed, 355 insertions(+), 42 deletions(-)
 create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json
 create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json
 create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv
 create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json

diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java
index af5eadc7358a2..19b29764559d1 100644
--- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java
+++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java
@@ -47,7 +47,7 @@
 import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources;
 import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS;
 import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V2;
-import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V3;
+import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V4;
 import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_PLANNING_V1;
 import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METADATA_FIELDS_REMOTE_TEST;
 import static org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase.Mode.SYNC;
@@ -125,7 +125,7 @@ protected void shouldSkipTest(String testName) throws IOException {
         assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName()));
         assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName()));
         assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName()));
-        assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V3.capabilityName()));
+        assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V4.capabilityName()));
     }
 
     private TestFeatureService remoteFeaturesService() throws IOException {
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
index 9c987a02aca2d..f9d8cf00695c1 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
@@ -72,6 +72,11 @@ public class CsvTestsDataLoader {
         .withTypeMapping(Map.of("@timestamp", "date_nanos"));
     private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset("missing_ip_sample_data");
     private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips");
+    private static final TestsDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup")
+        .withSetting("clientips_lookup-settings.json");
+    private static final TestsDataset MESSAGE_TYPES = new TestsDataset("message_types");
+    private static final TestsDataset MESSAGE_TYPES_LOOKUP = MESSAGE_TYPES.withIndex("message_types_lookup")
+        .withSetting("message_types_lookup-settings.json");
     private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr");
     private static final TestsDataset AGES = new TestsDataset("ages");
     private static final TestsDataset HEIGHTS = new TestsDataset("heights");
@@ -112,6 +117,9 @@ public class CsvTestsDataLoader {
         Map.entry(SAMPLE_DATA_TS_NANOS.indexName, SAMPLE_DATA_TS_NANOS),
         Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA),
         Map.entry(CLIENT_IPS.indexName, CLIENT_IPS),
+        Map.entry(CLIENT_IPS_LOOKUP.indexName, CLIENT_IPS_LOOKUP),
+        Map.entry(MESSAGE_TYPES.indexName, MESSAGE_TYPES),
+        Map.entry(MESSAGE_TYPES_LOOKUP.indexName, MESSAGE_TYPES_LOOKUP),
         Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR),
         Map.entry(AGES.indexName, AGES),
         Map.entry(HEIGHTS.indexName, HEIGHTS),
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json
new file mode 100644
index 0000000000000..b73d1f9accf92
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json
@@ -0,0 +1,5 @@
+{
+  "index": {
+    "mode": "lookup"
+  }
+}
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv
index 3ee60b79970ba..1c1a9776df6cc 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv
@@ -1,4 +1,4 @@
-language_code:keyword,language_name:keyword
+language_code:integer,language_name:keyword
 1,English
 2,French
 3,Spanish
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec
index 5de353978b307..f2800456ceb33 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec
@@ -4,8 +4,8 @@
 //
 
 //TODO: this sometimes returns null instead of the looked up value (likely related to the execution order)
-basicOnTheDataNode-Ignore
-required_capability: join_lookup_v3
+basicOnTheDataNode
+required_capability: join_lookup_v4
 
 FROM employees
 | EVAL language_code = languages
@@ -21,19 +21,19 @@ emp_no:integer | language_code:integer | language_name:keyword
 10093          | 3                     | Spanish
 ;
 
-basicRow-Ignore
-required_capability: join_lookup_v3
+basicRow
+required_capability: join_lookup_v4
 
 ROW language_code = 1
 | LOOKUP JOIN languages_lookup ON language_code
 ;
 
-language_code:keyword  | language_name:keyword
+language_code:integer  | language_name:keyword
 1                      | English
 ;
 
 basicOnTheCoordinator
-required_capability: join_lookup_v3
+required_capability: join_lookup_v4
 
 FROM employees
 | SORT emp_no
@@ -49,9 +49,8 @@ emp_no:integer | language_code:integer | language_name:keyword
 10003          | 4                     | German
 ;
 
-//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order)
-subsequentEvalOnTheDataNode-Ignore
-required_capability: join_lookup_v3
+subsequentEvalOnTheDataNode
+required_capability: join_lookup_v4
 
 FROM employees
 | EVAL language_code = languages
@@ -69,7 +68,7 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x
 ;
 
 subsequentEvalOnTheCoordinator
-required_capability: join_lookup_v3
+required_capability: join_lookup_v4
 
 FROM employees
 | SORT emp_no
@@ -85,3 +84,208 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x
 10002          | 5                     | null                  |                       10
 10003          | 4                     | german                |                        8
 ;
+
+lookupIPFromRow
+required_capability: join_lookup_v4
+
+ROW left = "left", client_ip = "172.21.0.5", right = "right"
+| LOOKUP JOIN clientips_lookup ON client_ip
+;
+
+left:keyword | client_ip:keyword | right:keyword | env:keyword
+left         | 172.21.0.5        | right         | Development
+;
+
+lookupIPFromRowWithShadowing
+required_capability: join_lookup_v4
+
+ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right"
+| LOOKUP JOIN clientips_lookup ON client_ip
+;
+
+left:keyword | client_ip:keyword | right:keyword | env:keyword
+left         | 172.21.0.5        | right         | Development
+;
+
+lookupIPFromRowWithShadowingKeep
+required_capability: join_lookup_v4
+
+ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right"
+| EVAL client_ip = client_ip::keyword
+| LOOKUP JOIN clientips_lookup ON client_ip
+| KEEP left, client_ip, right, env
+;
+
+left:keyword | client_ip:keyword | right:keyword | env:keyword
+left         | 172.21.0.5        | right         | Development
+;
+
+lookupIPFromIndex
+required_capability: join_lookup_v4
+
+FROM sample_data
+| EVAL client_ip = client_ip::keyword
+| LOOKUP JOIN clientips_lookup ON client_ip
+;
+
+@timestamp:date          | event_duration:long | message:keyword       | client_ip:keyword | env:keyword
+2023-10-23T13:55:01.543Z | 1756467             | Connected to 10.1.0.1 | 172.21.3.15       | Production
+2023-10-23T13:53:55.832Z | 5033755             | Connection error      | 172.21.3.15       | Production
+2023-10-23T13:52:55.015Z | 8268153             | Connection error      | 172.21.3.15       | Production
+2023-10-23T13:51:54.732Z | 725448              | Connection error      | 172.21.3.15       | Production
+2023-10-23T13:33:34.937Z | 1232382             | Disconnected          | 172.21.0.5        | Development
+2023-10-23T12:27:28.948Z | 2764889             | Connected to 10.1.0.2 | 172.21.2.113      | QA
+2023-10-23T12:15:03.360Z | 3450233             | Connected to 10.1.0.3 | 172.21.2.162      | QA
+;
+
+lookupIPFromIndexKeep
+required_capability: join_lookup_v4
+
+FROM sample_data
+| EVAL client_ip = client_ip::keyword
+| LOOKUP JOIN clientips_lookup ON client_ip
+| KEEP @timestamp, client_ip, event_duration, message, env
+;
+
+@timestamp:date          | client_ip:keyword | event_duration:long | message:keyword       | env:keyword
+2023-10-23T13:55:01.543Z | 172.21.3.15       | 1756467             | Connected to 10.1.0.1 | Production
+2023-10-23T13:53:55.832Z | 172.21.3.15       | 5033755             | Connection error      | Production
+2023-10-23T13:52:55.015Z | 172.21.3.15       | 8268153             | Connection error      | Production
+2023-10-23T13:51:54.732Z | 172.21.3.15       | 725448              | Connection error      | Production
+2023-10-23T13:33:34.937Z | 172.21.0.5        | 1232382             | Disconnected          | Development
+2023-10-23T12:27:28.948Z | 172.21.2.113      | 2764889             | Connected to 10.1.0.2 | QA
+2023-10-23T12:15:03.360Z | 172.21.2.162      | 3450233             | Connected to 10.1.0.3 | QA
+;
+
+lookupIPFromIndexStats
+required_capability: join_lookup_v4
+
+FROM sample_data
+| EVAL client_ip = client_ip::keyword
+| LOOKUP JOIN clientips_lookup ON client_ip
+| STATS count = count(client_ip) BY env
+| SORT count DESC, env ASC
+;
+
+count:long | env:keyword
+4          | Production
+2          | QA
+1          | Development
+;
+
+lookupIPFromIndexStatsKeep
+required_capability: join_lookup_v4
+
+FROM sample_data
+| EVAL client_ip = client_ip::keyword
+| LOOKUP JOIN clientips_lookup ON client_ip
+| KEEP client_ip, env
+| STATS count = count(client_ip) BY env
+| SORT count DESC, env ASC
+;
+
+count:long | env:keyword
+4          | Production
+2          | QA
+1          | Development
+;
+
+lookupMessageFromRow
+required_capability: join_lookup_v4
+
+ROW left = "left", message = "Connected to 10.1.0.1", right = "right"
+| LOOKUP JOIN message_types_lookup ON message
+;
+
+left:keyword | message:keyword       | right:keyword | type:keyword
+left         | Connected to 10.1.0.1 | right         | Success
+;
+
+lookupMessageFromRowWithShadowing
+required_capability: join_lookup_v4
+
+ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right"
+| LOOKUP JOIN message_types_lookup ON message
+;
+
+left:keyword | message:keyword       | right:keyword | type:keyword
+left         | Connected to 10.1.0.1 | right         | Success
+;
+
+lookupMessageFromRowWithShadowingKeep
+required_capability: join_lookup_v4
+
+ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right"
+| LOOKUP JOIN message_types_lookup ON message
+| KEEP left, message, right, type
+;
+
+left:keyword | message:keyword       | right:keyword | type:keyword
+left         | Connected to 10.1.0.1 | right         | Success
+;
+
+lookupMessageFromIndex
+required_capability: join_lookup_v4
+
+FROM sample_data
+| LOOKUP JOIN message_types_lookup ON message
+;
+
+@timestamp:date          | client_ip:ip | event_duration:long | message:keyword       | type:keyword
+2023-10-23T13:55:01.543Z | 172.21.3.15  | 1756467             | Connected to 10.1.0.1 | Success
+2023-10-23T13:53:55.832Z | 172.21.3.15  | 5033755             | Connection error      | Error
+2023-10-23T13:52:55.015Z | 172.21.3.15  | 8268153             | Connection error      | Error
+2023-10-23T13:51:54.732Z | 172.21.3.15  | 725448              | Connection error      | Error
+2023-10-23T13:33:34.937Z | 172.21.0.5   | 1232382             | Disconnected          | Disconnected
+2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889             | Connected to 10.1.0.2 | Success
+2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233             | Connected to 10.1.0.3 | Success
+;
+
+lookupMessageFromIndexKeep
+required_capability: join_lookup_v4
+
+FROM sample_data
+| LOOKUP JOIN message_types_lookup ON message
+| KEEP @timestamp, client_ip, event_duration, message, type
+;
+
+@timestamp:date          | client_ip:ip | event_duration:long | message:keyword       | type:keyword
+2023-10-23T13:55:01.543Z | 172.21.3.15  | 1756467             | Connected to 10.1.0.1 | Success
+2023-10-23T13:53:55.832Z | 172.21.3.15  | 5033755             | Connection error      | Error
+2023-10-23T13:52:55.015Z | 172.21.3.15  | 8268153             | Connection error      | Error
+2023-10-23T13:51:54.732Z | 172.21.3.15  | 725448              | Connection error      | Error
+2023-10-23T13:33:34.937Z | 172.21.0.5   | 1232382             | Disconnected          | Disconnected
+2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889             | Connected to 10.1.0.2 | Success
+2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233             | Connected to 10.1.0.3 | Success
+;
+
+lookupMessageFromIndexStats
+required_capability: join_lookup_v4
+
+FROM sample_data
+| LOOKUP JOIN message_types_lookup ON message
+| STATS count = count(message) BY type
+| SORT count DESC, type ASC
+;
+
+count:long | type:keyword
+3          | Error
+3          | Success
+1          | Disconnected
+;
+
+lookupMessageFromIndexStatsKeep
+required_capability: join_lookup_v4
+
+FROM sample_data
+| LOOKUP JOIN message_types_lookup ON message
+| KEEP message, type
+| STATS count = count(message) BY type
+| SORT count DESC, type ASC
+;
+
+count:long | type:keyword
+3          | Error
+3          | Success
+1          | Disconnected
+;
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json
index 39bd37ce26c7f..d491810f9134e 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json
@@ -1,10 +1,10 @@
 {
-    "properties": {
-      "client_ip": {
-        "type": "keyword"
-      },
-      "env": {
-        "type": "keyword"
-      }
+  "properties": {
+    "client_ip": {
+      "type": "keyword"
+    },
+    "env": {
+      "type": "keyword"
     }
-  }
\ No newline at end of file
+  }
+}
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json
index 0cec0caf17304..327b692369242 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json
@@ -1,7 +1,7 @@
 {
     "properties" : {
         "language_code" : {
-            "type" : "keyword"
+            "type" : "integer"
         },
         "language_name" : {
             "type" : "keyword"
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json
new file mode 100644
index 0000000000000..af545b48da3d2
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json
@@ -0,0 +1,10 @@
+{
+  "properties": {
+    "message": {
+      "type": "keyword"
+    },
+    "type": {
+      "type": "keyword"
+    }
+  }
+}
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv
new file mode 100644
index 0000000000000..8e00485771445
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv
@@ -0,0 +1,6 @@
+message:keyword,type:keyword
+Connection error,Error
+Disconnected,Disconnected
+Connected to 10.1.0.1,Success
+Connected to 10.1.0.2,Success
+Connected to 10.1.0.3,Success
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json
new file mode 100644
index 0000000000000..b73d1f9accf92
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json
@@ -0,0 +1,5 @@
+{
+  "index": {
+    "mode": "lookup"
+  }
+}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
index b5d6dd8584e8c..4845c7061949b 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -525,7 +525,7 @@ public enum Cap {
         /**
          * LOOKUP JOIN
          */
-        JOIN_LOOKUP_V3(Build.current().isSnapshot()),
+        JOIN_LOOKUP_V4(Build.current().isSnapshot()),
 
         /**
          * Fix for https://github.com/elastic/elasticsearch/issues/117054
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java
index 849e8e890e248..4f429c46b9123 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java
@@ -24,6 +24,7 @@
 import org.elasticsearch.tasks.TaskId;
 import org.elasticsearch.transport.TransportService;
 import org.elasticsearch.xpack.core.security.authz.privilege.ClusterPrivilegeResolver;
+import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
 import org.elasticsearch.xpack.esql.action.EsqlQueryAction;
 import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
 import org.elasticsearch.xpack.esql.core.tree.Source;
@@ -78,9 +79,19 @@ protected TransportRequest transportRequest(LookupFromIndexService.Request reque
     @Override
     protected QueryList queryList(TransportRequest request, SearchExecutionContext context, Block inputBlock, DataType inputDataType) {
         MappedFieldType fieldType = context.getFieldType(request.matchField);
+        validateTypes(request.inputDataType, fieldType);
         return termQueryList(fieldType, context, inputBlock, inputDataType);
     }
 
+    private static void validateTypes(DataType inputDataType, MappedFieldType fieldType) {
+        // TODO: consider supporting implicit type conversion as done in ENRICH for some types
+        if (fieldType.typeName().equals(inputDataType.typeName()) == false) {
+            throw new EsqlIllegalArgumentException(
+                "LOOKUP JOIN match and input types are incompatible: match[" + fieldType.typeName() + "], input[" + inputDataType + "]"
+            );
+        }
+    }
+
     public static class Request extends AbstractLookupService.Request {
         private final String matchField;
 
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java
index 0fa6d61a0ca9b..096f72f7694e1 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java
@@ -9,6 +9,7 @@
 
 import org.elasticsearch.common.util.Maps;
 import org.elasticsearch.xpack.esql.core.expression.Alias;
+import org.elasticsearch.xpack.esql.core.expression.AttributeSet;
 import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
 import org.elasticsearch.xpack.esql.core.expression.Literal;
 import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
@@ -23,6 +24,7 @@
 import org.elasticsearch.xpack.esql.plan.logical.Project;
 import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
 import org.elasticsearch.xpack.esql.plan.logical.TopN;
+import org.elasticsearch.xpack.esql.plan.logical.join.Join;
 import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation;
 import org.elasticsearch.xpack.esql.rule.ParameterizedRule;
 import org.elasticsearch.xpack.esql.stats.SearchStats;
@@ -56,10 +58,13 @@ else if (plan instanceof Project project) {
             var projections = project.projections();
             List<NamedExpression> newProjections = new ArrayList<>(projections.size());
             Map<DataType, Alias> nullLiteral = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size());
+            AttributeSet joinAttributes = joinAttributes(project);
 
             for (NamedExpression projection : projections) {
                 // Do not use the attribute name, this can deviate from the field name for union types.
-                if (projection instanceof FieldAttribute f && stats.exists(f.fieldName()) == false) {
+                if (projection instanceof FieldAttribute f && stats.exists(f.fieldName()) == false && joinAttributes.contains(f) == false) {
+                    // TODO: Should do a searchStats lookup for join attributes instead of just ignoring them here
+                    // See TransportSearchShardsAction
                     DataType dt = f.dataType();
                     Alias nullAlias = nullLiteral.get(f.dataType());
                     // save the first field as null (per datatype)
@@ -96,4 +101,10 @@ else if (plan instanceof Project project) {
 
         return plan;
     }
+
+    private AttributeSet joinAttributes(Project project) {
+        var attributes = new AttributeSet();
+        project.forEachDown(Join.class, j -> j.right().forEachDown(EsRelation.class, p -> attributes.addAll(p.output())));
+        return attributes;
+    }
 }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java
index cafe3726f92ac..dc32a4ad3c282 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java
@@ -23,14 +23,12 @@
 import org.elasticsearch.xpack.esql.rule.Rule;
 
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.LinkedHashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Set;
 
 /**
- *
  * Materialize the concrete fields that need to be extracted from the storage until the last possible moment.
  * Expects the local plan to already have a projection containing the fields needed upstream.
  * <p>
@@ -102,15 +100,18 @@ public PhysicalPlan apply(PhysicalPlan plan) {
 
     private static Set<Attribute> missingAttributes(PhysicalPlan p) {
         var missing = new LinkedHashSet<Attribute>();
-        var inputSet = p.inputSet();
+        var input = p.inputSet();
 
-        // TODO: We need to extract whatever fields are missing from the left hand side.
-        // skip the lookup join since the right side is always materialized and a projection
+        // For LOOKUP JOIN we only need field-extraction on left fields used to match, since the right side is always materialized
         if (p instanceof LookupJoinExec join) {
-            return Collections.emptySet();
+            join.leftFields().forEach(f -> {
+                if (input.contains(f) == false) {
+                    missing.add(f);
+                }
+            });
+            return missing;
         }
 
-        var input = inputSet;
         // collect field attributes used inside expressions
         // TODO: Rather than going over all expressions manually, this should just call .references()
         p.forEachExpression(TypedAttribute.class, f -> {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java
index 2d3caa27da4cd..8b1cc047309e7 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java
@@ -102,7 +102,7 @@ public List<Attribute> output() {
 
     @Override
     public PhysicalPlan estimateRowSize(State state) {
-        state.add(false, output());
+        state.add(false, addedFields);
         return this;
     }
 
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java
index a8afaa4d8119b..8c0488afdd42a 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java
@@ -565,6 +565,7 @@ private PhysicalOperation planHashJoin(HashJoinExec join, LocalExecutionPlannerC
 
     private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlannerContext context) {
         PhysicalOperation source = plan(join.left(), context);
+        // TODO: The source builder includes incoming fields including the ones we're going to drop
         Layout.Builder layoutBuilder = source.layout.builder();
         for (Attribute f : join.addedFields()) {
             layoutBuilder.append(f);
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java
index f4ada1442efe5..37f89891860d8 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java
@@ -14,6 +14,7 @@
 import org.elasticsearch.compute.data.BlockFactory;
 import org.elasticsearch.compute.data.ElementType;
 import org.elasticsearch.core.Tuple;
+import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.index.query.SearchExecutionContext;
@@ -25,6 +26,7 @@
 import org.elasticsearch.xpack.esql.core.type.DataType;
 import org.elasticsearch.xpack.esql.core.util.Holder;
 import org.elasticsearch.xpack.esql.core.util.Queries;
+import org.elasticsearch.xpack.esql.index.EsIndex;
 import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext;
 import org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizer;
 import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext;
@@ -117,12 +119,17 @@ public static String[] planOriginalIndices(PhysicalPlan plan) {
         var indices = new LinkedHashSet<String>();
         plan.forEachUp(
             FragmentExec.class,
-            f -> f.fragment()
-                .forEachUp(EsRelation.class, r -> indices.addAll(asList(Strings.commaDelimitedListToStringArray(r.index().name()))))
+            f -> f.fragment().forEachUp(EsRelation.class, r -> addOriginalIndexIfNotLookup(indices, r.index()))
         );
         return indices.toArray(String[]::new);
     }
 
+    private static void addOriginalIndexIfNotLookup(Set<String> indices, EsIndex index) {
+        if (index.indexNameWithModes().get(index.name()) != IndexMode.LOOKUP) {
+            indices.addAll(asList(Strings.commaDelimitedListToStringArray(index.name())));
+        }
+    }
+
     public static PhysicalPlan localPlan(List<SearchExecutionContext> searchContexts, Configuration configuration, PhysicalPlan plan) {
         return localPlan(configuration, plan, SearchContextStats.from(searchContexts));
     }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java
index 9aea1577a4137..c9c8635a60f57 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java
@@ -62,8 +62,12 @@
 import org.elasticsearch.xpack.esql.core.expression.Attribute;
 import org.elasticsearch.xpack.esql.enrich.EnrichLookupService;
 import org.elasticsearch.xpack.esql.enrich.LookupFromIndexService;
+import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
+import org.elasticsearch.xpack.esql.plan.logical.join.Join;
 import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec;
 import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec;
+import org.elasticsearch.xpack.esql.plan.physical.FragmentExec;
+import org.elasticsearch.xpack.esql.plan.physical.LookupJoinExec;
 import org.elasticsearch.xpack.esql.plan.physical.OutputExec;
 import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
 import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders;
@@ -76,6 +80,7 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -160,9 +165,11 @@ public void execute(
         Map<String, OriginalIndices> clusterToConcreteIndices = transportService.getRemoteClusterService()
             .groupIndices(SearchRequest.DEFAULT_INDICES_OPTIONS, PlannerUtils.planConcreteIndices(physicalPlan).toArray(String[]::new));
         QueryPragmas queryPragmas = configuration.pragmas();
+        Set<String> lookupIndexNames = findLookupIndexNames(physicalPlan);
+        Set<String> concreteIndexNames = selectConcreteIndices(clusterToConcreteIndices, lookupIndexNames);
         if (dataNodePlan == null) {
-            if (clusterToConcreteIndices.values().stream().allMatch(v -> v.indices().length == 0) == false) {
-                String error = "expected no concrete indices without data node plan; got " + clusterToConcreteIndices;
+            if (concreteIndexNames.isEmpty() == false) {
+                String error = "expected no concrete indices without data node plan; got " + concreteIndexNames;
                 assert false : error;
                 listener.onFailure(new IllegalStateException(error));
                 return;
@@ -185,7 +192,7 @@ public void execute(
                 return;
             }
         } else {
-            if (clusterToConcreteIndices.values().stream().allMatch(v -> v.indices().length == 0)) {
+            if (concreteIndexNames.isEmpty()) {
                 var error = "expected concrete indices with data node plan but got empty; data node plan " + dataNodePlan;
                 assert false : error;
                 listener.onFailure(new IllegalStateException(error));
@@ -259,6 +266,42 @@ public void execute(
         }
     }
 
+    private Set<String> selectConcreteIndices(Map<String, OriginalIndices> clusterToConcreteIndices, Set<String> indexesToIgnore) {
+        Set<String> concreteIndexNames = new HashSet<>();
+        clusterToConcreteIndices.forEach((clusterAlias, concreteIndices) -> {
+            for (String index : concreteIndices.indices()) {
+                if (indexesToIgnore.contains(index) == false) {
+                    concreteIndexNames.add(index);
+                }
+            }
+        });
+        return concreteIndexNames;
+    }
+
+    private Set<String> findLookupIndexNames(PhysicalPlan physicalPlan) {
+        Set<String> lookupIndexNames = new HashSet<>();
+        // When planning JOIN on the coordinator node: "LookupJoinExec.lookup()->FragmentExec.fragment()->EsRelation.index()"
+        physicalPlan.forEachDown(
+            LookupJoinExec.class,
+            lookupJoinExec -> lookupJoinExec.lookup()
+                .forEachDown(
+                    FragmentExec.class,
+                    frag -> frag.fragment().forEachDown(EsRelation.class, esRelation -> lookupIndexNames.add(esRelation.index().name()))
+                )
+        );
+        // When planning JOIN on the data node: "FragmentExec.fragment()->Join.right()->EsRelation.index()"
+        // TODO this only works for LEFT join, so we still need to support RIGHT join
+        physicalPlan.forEachDown(
+            FragmentExec.class,
+            fragmentExec -> fragmentExec.fragment()
+                .forEachDown(
+                    Join.class,
+                    join -> join.right().forEachDown(EsRelation.class, esRelation -> lookupIndexNames.add(esRelation.index().name()))
+                )
+        );
+        return lookupIndexNames;
+    }
+
     // For queries like: FROM logs* | LIMIT 0 (including cross-cluster LIMIT 0 queries)
     private static void updateShardCountForCoordinatorOnlyQuery(EsqlExecutionInfo execInfo) {
         if (execInfo.isCrossClusterSearch()) {
@@ -562,8 +605,9 @@ record DataNode(Transport.Connection connection, List<ShardId> shardIds, Map<Ind
     /**
      * Result from lookupDataNodes where can_match is performed to determine what shards can be skipped
      * and which target nodes are needed for running the ES|QL query
-     * @param dataNodes list of DataNode to perform the ES|QL query on
-     * @param totalShards Total number of shards (from can_match phase), including skipped shards
+     *
+     * @param dataNodes     list of DataNode to perform the ES|QL query on
+     * @param totalShards   Total number of shards (from can_match phase), including skipped shards
      * @param skippedShards Number of skipped shards (from can_match phase)
      */
     record DataNodeResult(List<DataNode> dataNodes, int totalShards, int skippedShards) {}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java
index 3b0f9ab578df9..3d1ed8f70eae0 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java
@@ -178,7 +178,7 @@ public void executeOptimizedPlan(
         executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener);
     }
 
-    private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {};
+    private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}
 
     private void executeSubPlans(
         PhysicalPlan physicalPlan,
@@ -313,7 +313,7 @@ private <T> void preAnalyze(
             // First resolve the lookup indices, then the main indices
             preAnalyzeLookupIndices(
                 preAnalysis.lookupIndices,
-                fieldNames,
+                Set.of("*"), // Current LOOKUP JOIN syntax does not allow for field selection
                 l.delegateFailureAndWrap(
                     (lx, lookupIndexResolution) -> preAnalyzeIndices(
                         indices,
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
index df974a88a4c57..2e8b856cf82a6 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
@@ -263,7 +263,7 @@ public final void test() throws Throwable {
             );
             assumeFalse(
                 "lookup join disabled for csv tests",
-                testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V3.capabilityName())
+                testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V4.capabilityName())
             );
             if (Build.current().isSnapshot()) {
                 assertThat(

From ae1b1320996f8fb636f8f377bc9fa7b7743230a6 Mon Sep 17 00:00:00 2001
From: Ryan Ernst <ryan@iernst.net>
Date: Tue, 3 Dec 2024 06:34:13 -0800
Subject: [PATCH 20/28] Only check non-negative stats for active, current and
 queue (#117834)

In SimpleThreadPoolIT, stats are gathered for each threadpool being
checked, then measurements are collected. Some stats may go up or down
depending on other background tasks outside the test. This commit
adjusts the check for those stats to only check collecting non-negative
values.

closes #108320
---
 muted-tests.yml                                             | 3 ---
 .../org/elasticsearch/threadpool/SimpleThreadPoolIT.java    | 6 +++---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/muted-tests.yml b/muted-tests.yml
index cf39eae210f88..3652173327e84 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -141,9 +141,6 @@ tests:
 - class: org.elasticsearch.xpack.shutdown.NodeShutdownIT
   method: testAllocationPreventedForRemoval
   issue: https://github.com/elastic/elasticsearch/issues/116363
-- class: org.elasticsearch.threadpool.SimpleThreadPoolIT
-  method: testThreadPoolMetrics
-  issue: https://github.com/elastic/elasticsearch/issues/108320
 - class: org.elasticsearch.xpack.downsample.ILMDownsampleDisruptionIT
   method: testILMDownsampleRollingRestart
   issue: https://github.com/elastic/elasticsearch/issues/114233
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java b/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java
index be875421e036f..d2e021a8d7436 100644
--- a/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java
@@ -167,10 +167,10 @@ public void testThreadPoolMetrics() throws Exception {
         tps[0].forEach(stats -> {
             Map<String, Long> threadPoolStats = List.of(
                 Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_COMPLETED, stats.completed()),
-                Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_ACTIVE, (long) stats.active()),
-                Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_CURRENT, (long) stats.threads()),
+                Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_ACTIVE, 0L),
+                Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_CURRENT, 0L),
                 Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_LARGEST, (long) stats.largest()),
-                Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_QUEUE, (long) stats.queue())
+                Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_QUEUE, 0L)
             ).stream().collect(toUnmodifiableSortedMap(e -> stats.name() + e.getKey(), Entry::getValue));
 
             Function<String, List<Long>> measurementExtractor = name -> {

From 5ed106a79b278052842865d2e63c4817230af7ab Mon Sep 17 00:00:00 2001
From: Artem Prigoda <artem.prigoda@elastic.co>
Date: Tue, 3 Dec 2024 16:16:03 +0100
Subject: [PATCH 21/28] [test] Remove synchronization from
 InternalTestCluster#getInstance (#117780)

The map of nodes is volatile and immutable and can be ready without
synchronization. Getting a class's instance from the node's injector
is also thread safe.

Doing so prevents deadlocks if we restart the node and have a disruption
scheme that internally calls `getInstance` from another thread.

```
  2> "elasticsearch[StatelessClusterIntegrityStressIT][server][T#1]" ID=3490 BLOCKED on org.elasticsearch.test.InternalTestCluster@18a6d098 owned by "elasticsearch[StatelessClusterIntegrityStressIT][server][T#2]" ID=3492
  2> 	at app//org.elasticsearch.test.InternalTestCluster.getInstance(InternalTestCluster.java:1653)
  2> 	- blocked on org.elasticsearch.test.InternalTestCluster@18a6d098
  2> 	at app//org.elasticsearch.test.InternalTestCluster.getInstance(InternalTestCluster.java:1620)
  2> 	at app//org.elasticsearch.test.disruption.NetworkDisruption.transport(NetworkDisruption.java:172)
  2> 	at app//org.elasticsearch.test.disruption.NetworkDisruption.applyToNodes(NetworkDisruption.java:157)
  2> 	at app//org.elasticsearch.test.disruption.Net  2> workDisruption.startDisrupting(NetworkDisruption.java:133)

 2> "elasticsearch[StatelessClusterIntegrityStressIT][server][T#2]" ID=3492 BLOCKED on org.elasticsearch.test.disruption.NetworkDisruption@60fd3a1e owned by "elasticsearch[StatelessClusterIntegrityStressIT][server][T#1]" ID=3490
   2> 	at app//org.elasticsearch.test.disruption.NetworkDisruption.applyToNode(NetworkDisruption.java:116)
   2> 	- blocked on org.elasticsearch.test.disruption.NetworkDisruption@60fd3a1e
   2> 	at app//org.elasticsearch.test.InternalTestCluster.applyDisruptionSchemeToNode(InternalTestCluster.java:2307)
   2> 	at app//org.elasticsearch.test.InternalTestCluster.publishNode(InternalTestCluster.java:2258)
   2> 	- locked org.elasticsearch.test.InternalTestCluster@18a6d098
   2> 	at app//org.elasticsearch.test.InternalTestCluster.restartNode(InternalTestCluster.java:1901)
   2> 	at app//org.elasticsearch.test.InternalTestCluster.restartNode(InternalTestCluster.java:1863)
   2> 	- locked org.elasticsearch.test.InternalTestCluster@18a6d098
 ```
---
 .../main/java/org/elasticsearch/test/InternalTestCluster.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java
index 7a04384298933..6d46605e201f9 100644
--- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java
+++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java
@@ -1649,7 +1649,7 @@ public <T> T getAnyMasterNodeInstance(Class<T> clazz) {
         return getInstance(clazz, MASTER_NODE_PREDICATE);
     }
 
-    private synchronized <T> T getInstance(Class<T> clazz, Predicate<NodeAndClient> predicate) {
+    private <T> T getInstance(Class<T> clazz, Predicate<NodeAndClient> predicate) {
         NodeAndClient randomNodeAndClient = getRandomNodeAndClient(predicate);
         if (randomNodeAndClient == null) {
             throw new AssertionError("no node matches [" + predicate + "]");

From 267dc1a41d49b11c6470ae1f83091debfc49e95f Mon Sep 17 00:00:00 2001
From: Nhat Nguyen <nhat.nguyen@elastic.co>
Date: Tue, 3 Dec 2024 07:27:44 -0800
Subject: [PATCH 22/28] Fix BWC for ES|QL cluster request (#117865)

We identified a BWC bug in the cluster computer request. Specifically,
the indices options were not properly selected for requests from an
older querying cluster. This caused the search_shards API on the remote
cluster to use restricted indices options, leading to failures when
resolving wildcard index patterns.

Our tests didn't catch this issue because the current BWC tests for
cross-cluster queries only cover one direction: the querying cluster on
the current version and the remote cluster on a compatible version.

This PR fixes the issue and expands BWC tests to support both
directions: the querying cluster on the current version with the remote
cluster on a compatible version, and vice versa.
---
 docs/changelog/117865.yaml                    |   5 +
 .../qa/server/multi-clusters/build.gradle     |  17 +-
 .../xpack/esql/ccq/Clusters.java              |  19 +-
 .../xpack/esql/ccq/EsqlRestValidationIT.java  |   7 +
 .../xpack/esql/ccq/MultiClusterSpecIT.java    |   7 +-
 .../xpack/esql/ccq/MultiClustersIT.java       | 104 ++++++---
 .../xpack/esql/qa/single_node/RestEsqlIT.java |   1 -
 .../xpack/esql/plugin/RemoteClusterPlan.java  |  21 +-
 .../esql/plugin/ClusterRequestTests.java      | 206 ++++++++++++++++++
 9 files changed, 345 insertions(+), 42 deletions(-)
 create mode 100644 docs/changelog/117865.yaml
 create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java

diff --git a/docs/changelog/117865.yaml b/docs/changelog/117865.yaml
new file mode 100644
index 0000000000000..33dc497725f92
--- /dev/null
+++ b/docs/changelog/117865.yaml
@@ -0,0 +1,5 @@
+pr: 117865
+summary: Fix BWC for ES|QL cluster request
+area: ES|QL
+type: bug
+issues: []
diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle b/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle
index 7f3859e2229ef..d80cb764ca433 100644
--- a/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle
+++ b/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle
@@ -23,9 +23,22 @@ def supportedVersion = bwcVersion -> {
 }
 
 buildParams.bwcVersions.withWireCompatible(supportedVersion) { bwcVersion, baseName ->
-  tasks.register(bwcTaskName(bwcVersion), StandaloneRestIntegTestTask) {
+  tasks.register("${baseName}#newToOld", StandaloneRestIntegTestTask) {
+    usesBwcDistribution(bwcVersion)
+    systemProperty("tests.version.remote_cluster", bwcVersion)
+    maxParallelForks = 1
+  }
+
+  tasks.register("${baseName}#oldToNew", StandaloneRestIntegTestTask) {
     usesBwcDistribution(bwcVersion)
-    systemProperty("tests.old_cluster_version", bwcVersion)
+    systemProperty("tests.version.local_cluster", bwcVersion)
+    maxParallelForks = 1
+  }
+
+  // TODO: avoid running tests twice with the current version
+  tasks.register(bwcTaskName(bwcVersion), StandaloneRestIntegTestTask) {
+    dependsOn tasks.named("${baseName}#oldToNew")
+    dependsOn tasks.named("${baseName}#newToOld")
     maxParallelForks = 1
   }
 }
diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java
index fa8cb49c59aed..5f3f135810322 100644
--- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java
+++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java
@@ -20,7 +20,7 @@ public static ElasticsearchCluster remoteCluster() {
         return ElasticsearchCluster.local()
             .name(REMOTE_CLUSTER_NAME)
             .distribution(DistributionType.DEFAULT)
-            .version(Version.fromString(System.getProperty("tests.old_cluster_version")))
+            .version(distributionVersion("tests.version.remote_cluster"))
             .nodes(2)
             .setting("node.roles", "[data,ingest,master]")
             .setting("xpack.security.enabled", "false")
@@ -34,7 +34,7 @@ public static ElasticsearchCluster localCluster(ElasticsearchCluster remoteClust
         return ElasticsearchCluster.local()
             .name(LOCAL_CLUSTER_NAME)
             .distribution(DistributionType.DEFAULT)
-            .version(Version.CURRENT)
+            .version(distributionVersion("tests.version.local_cluster"))
             .nodes(2)
             .setting("xpack.security.enabled", "false")
             .setting("xpack.license.self_generated.type", "trial")
@@ -46,7 +46,18 @@ public static ElasticsearchCluster localCluster(ElasticsearchCluster remoteClust
             .build();
     }
 
-    public static org.elasticsearch.Version oldVersion() {
-        return org.elasticsearch.Version.fromString(System.getProperty("tests.old_cluster_version"));
+    public static org.elasticsearch.Version localClusterVersion() {
+        String prop = System.getProperty("tests.version.local_cluster");
+        return prop != null ? org.elasticsearch.Version.fromString(prop) : org.elasticsearch.Version.CURRENT;
+    }
+
+    public static org.elasticsearch.Version remoteClusterVersion() {
+        String prop = System.getProperty("tests.version.remote_cluster");
+        return prop != null ? org.elasticsearch.Version.fromString(prop) : org.elasticsearch.Version.CURRENT;
+    }
+
+    private static Version distributionVersion(String key) {
+        final String val = System.getProperty(key);
+        return val != null ? Version.fromString(val) : Version.CURRENT;
     }
 }
diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java
index 21307c5362417..55500aa1c9537 100644
--- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java
+++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java
@@ -10,12 +10,14 @@
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
 
 import org.apache.http.HttpHost;
+import org.elasticsearch.Version;
 import org.elasticsearch.client.RestClient;
 import org.elasticsearch.core.IOUtils;
 import org.elasticsearch.test.TestClustersThreadFilter;
 import org.elasticsearch.test.cluster.ElasticsearchCluster;
 import org.elasticsearch.xpack.esql.qa.rest.EsqlRestValidationTestCase;
 import org.junit.AfterClass;
+import org.junit.Before;
 import org.junit.ClassRule;
 import org.junit.rules.RuleChain;
 import org.junit.rules.TestRule;
@@ -78,4 +80,9 @@ private RestClient remoteClusterClient() throws IOException {
         }
         return remoteClient;
     }
+
+    @Before
+    public void skipTestOnOldVersions() {
+        assumeTrue("skip on old versions", Clusters.localClusterVersion().equals(Version.V_8_16_0));
+    }
 }
diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java
index 19b29764559d1..e658d169cbce8 100644
--- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java
+++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java
@@ -12,6 +12,7 @@
 
 import org.apache.http.HttpEntity;
 import org.apache.http.HttpHost;
+import org.elasticsearch.Version;
 import org.elasticsearch.client.Request;
 import org.elasticsearch.client.Response;
 import org.elasticsearch.client.RestClient;
@@ -118,10 +119,8 @@ protected void shouldSkipTest(String testName) throws IOException {
         // Do not run tests including "METADATA _index" unless marked with metadata_fields_remote_test,
         // because they may produce inconsistent results with multiple clusters.
         assumeFalse("can't test with _index metadata", (remoteMetadata == false) && hasIndexMetadata(testCase.query));
-        assumeTrue(
-            "Test " + testName + " is skipped on " + Clusters.oldVersion(),
-            isEnabled(testName, instructions, Clusters.oldVersion())
-        );
+        Version oldVersion = Version.min(Clusters.localClusterVersion(), Clusters.remoteClusterVersion());
+        assumeTrue("Test " + testName + " is skipped on " + oldVersion, isEnabled(testName, instructions, oldVersion));
         assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName()));
         assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName()));
         assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName()));
diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java
index dbeaed1596eff..452f40baa34a8 100644
--- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java
+++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java
@@ -10,6 +10,7 @@
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
 
 import org.apache.http.HttpHost;
+import org.elasticsearch.Version;
 import org.elasticsearch.client.Request;
 import org.elasticsearch.client.RestClient;
 import org.elasticsearch.common.Strings;
@@ -29,7 +30,6 @@
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
-import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
@@ -127,10 +127,12 @@ void indexDocs(RestClient client, String index, List<Doc> docs) throws IOExcepti
     }
 
     private Map<String, Object> run(String query, boolean includeCCSMetadata) throws IOException {
-        Map<String, Object> resp = runEsql(
-            new RestEsqlTestCase.RequestObjectBuilder().query(query).includeCCSMetadata(includeCCSMetadata).build()
-        );
-        logger.info("--> query {} response {}", query, resp);
+        var queryBuilder = new RestEsqlTestCase.RequestObjectBuilder().query(query);
+        if (includeCCSMetadata) {
+            queryBuilder.includeCCSMetadata(true);
+        }
+        Map<String, Object> resp = runEsql(queryBuilder.build());
+        logger.info("--> query {} response {}", queryBuilder, resp);
         return resp;
     }
 
@@ -156,7 +158,7 @@ private Map<String, Object> runEsql(RestEsqlTestCase.RequestObjectBuilder reques
 
     public void testCount() throws Exception {
         {
-            boolean includeCCSMetadata = randomBoolean();
+            boolean includeCCSMetadata = includeCCSMetadata();
             Map<String, Object> result = run("FROM test-local-index,*:test-remote-index | STATS c = COUNT(*)", includeCCSMetadata);
             var columns = List.of(Map.of("name", "c", "type", "long"));
             var values = List.of(List.of(localDocs.size() + remoteDocs.size()));
@@ -165,13 +167,16 @@ public void testCount() throws Exception {
             if (includeCCSMetadata) {
                 mapMatcher = mapMatcher.entry("_clusters", any(Map.class));
             }
-            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
+            if (ccsMetadataAvailable()) {
+                mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0));
+            }
+            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values));
             if (includeCCSMetadata) {
                 assertClusterDetailsMap(result, false);
             }
         }
         {
-            boolean includeCCSMetadata = randomBoolean();
+            boolean includeCCSMetadata = includeCCSMetadata();
             Map<String, Object> result = run("FROM *:test-remote-index | STATS c = COUNT(*)", includeCCSMetadata);
             var columns = List.of(Map.of("name", "c", "type", "long"));
             var values = List.of(List.of(remoteDocs.size()));
@@ -180,7 +185,10 @@ public void testCount() throws Exception {
             if (includeCCSMetadata) {
                 mapMatcher = mapMatcher.entry("_clusters", any(Map.class));
             }
-            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
+            if (ccsMetadataAvailable()) {
+                mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0));
+            }
+            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values));
             if (includeCCSMetadata) {
                 assertClusterDetailsMap(result, true);
             }
@@ -189,7 +197,7 @@ public void testCount() throws Exception {
 
     public void testUngroupedAggs() throws Exception {
         {
-            boolean includeCCSMetadata = randomBoolean();
+            boolean includeCCSMetadata = includeCCSMetadata();
             Map<String, Object> result = run("FROM test-local-index,*:test-remote-index | STATS total = SUM(data)", includeCCSMetadata);
             var columns = List.of(Map.of("name", "total", "type", "long"));
             long sum = Stream.concat(localDocs.stream(), remoteDocs.stream()).mapToLong(d -> d.data).sum();
@@ -200,13 +208,16 @@ public void testUngroupedAggs() throws Exception {
             if (includeCCSMetadata) {
                 mapMatcher = mapMatcher.entry("_clusters", any(Map.class));
             }
-            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
+            if (ccsMetadataAvailable()) {
+                mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0));
+            }
+            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values));
             if (includeCCSMetadata) {
                 assertClusterDetailsMap(result, false);
             }
         }
         {
-            boolean includeCCSMetadata = randomBoolean();
+            boolean includeCCSMetadata = includeCCSMetadata();
             Map<String, Object> result = run("FROM *:test-remote-index | STATS total = SUM(data)", includeCCSMetadata);
             var columns = List.of(Map.of("name", "total", "type", "long"));
             long sum = remoteDocs.stream().mapToLong(d -> d.data).sum();
@@ -216,12 +227,16 @@ public void testUngroupedAggs() throws Exception {
             if (includeCCSMetadata) {
                 mapMatcher = mapMatcher.entry("_clusters", any(Map.class));
             }
-            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
+            if (ccsMetadataAvailable()) {
+                mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0));
+            }
+            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values));
             if (includeCCSMetadata) {
                 assertClusterDetailsMap(result, true);
             }
         }
         {
+            assumeTrue("requires ccs metadata", ccsMetadataAvailable());
             Map<String, Object> result = runWithColumnarAndIncludeCCSMetadata("FROM *:test-remote-index | STATS total = SUM(data)");
             var columns = List.of(Map.of("name", "total", "type", "long"));
             long sum = remoteDocs.stream().mapToLong(d -> d.data).sum();
@@ -293,7 +308,7 @@ private void assertClusterDetailsMap(Map<String, Object> result, boolean remoteO
 
     public void testGroupedAggs() throws Exception {
         {
-            boolean includeCCSMetadata = randomBoolean();
+            boolean includeCCSMetadata = includeCCSMetadata();
             Map<String, Object> result = run(
                 "FROM test-local-index,*:test-remote-index | STATS total = SUM(data) BY color | SORT color",
                 includeCCSMetadata
@@ -311,13 +326,16 @@ public void testGroupedAggs() throws Exception {
             if (includeCCSMetadata) {
                 mapMatcher = mapMatcher.entry("_clusters", any(Map.class));
             }
-            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
+            if (ccsMetadataAvailable()) {
+                mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0));
+            }
+            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values));
             if (includeCCSMetadata) {
                 assertClusterDetailsMap(result, false);
             }
         }
         {
-            boolean includeCCSMetadata = randomBoolean();
+            boolean includeCCSMetadata = includeCCSMetadata();
             Map<String, Object> result = run(
                 "FROM *:test-remote-index | STATS total = SUM(data) by color | SORT color",
                 includeCCSMetadata
@@ -336,29 +354,57 @@ public void testGroupedAggs() throws Exception {
             if (includeCCSMetadata) {
                 mapMatcher = mapMatcher.entry("_clusters", any(Map.class));
             }
-            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
+            if (ccsMetadataAvailable()) {
+                mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0));
+            }
+            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values));
             if (includeCCSMetadata) {
                 assertClusterDetailsMap(result, true);
             }
         }
     }
 
+    public void testIndexPattern() throws Exception {
+        {
+            String indexPattern = randomFrom(
+                "test-local-index,*:test-remote-index",
+                "test-local-index,*:test-remote-*",
+                "test-local-index,*:test-*",
+                "test-*,*:test-remote-index"
+            );
+            Map<String, Object> result = run("FROM " + indexPattern + " | STATS c = COUNT(*)", false);
+            var columns = List.of(Map.of("name", "c", "type", "long"));
+            var values = List.of(List.of(localDocs.size() + remoteDocs.size()));
+            MapMatcher mapMatcher = matchesMap();
+            if (ccsMetadataAvailable()) {
+                mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0));
+            }
+            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values));
+        }
+        {
+            String indexPattern = randomFrom("*:test-remote-index", "*:test-remote-*", "*:test-*");
+            Map<String, Object> result = run("FROM " + indexPattern + " | STATS c = COUNT(*)", false);
+            var columns = List.of(Map.of("name", "c", "type", "long"));
+            var values = List.of(List.of(remoteDocs.size()));
+
+            MapMatcher mapMatcher = matchesMap();
+            if (ccsMetadataAvailable()) {
+                mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0));
+            }
+            assertMap(result, mapMatcher.entry("columns", columns).entry("values", values));
+        }
+    }
+
     private RestClient remoteClusterClient() throws IOException {
         var clusterHosts = parseClusterHosts(remoteCluster.getHttpAddresses());
         return buildClient(restClientSettings(), clusterHosts.toArray(new HttpHost[0]));
     }
 
-    private TestFeatureService remoteFeaturesService() throws IOException {
-        if (remoteFeaturesService == null) {
-            try (RestClient remoteClient = remoteClusterClient()) {
-                var remoteNodeVersions = readVersionsFromNodesInfo(remoteClient);
-                var semanticNodeVersions = remoteNodeVersions.stream()
-                    .map(ESRestTestCase::parseLegacyVersion)
-                    .flatMap(Optional::stream)
-                    .collect(Collectors.toSet());
-                remoteFeaturesService = createTestFeatureService(getClusterStateFeatures(remoteClient), semanticNodeVersions);
-            }
-        }
-        return remoteFeaturesService;
+    private static boolean ccsMetadataAvailable() {
+        return Clusters.localClusterVersion().onOrAfter(Version.V_8_16_0);
+    }
+
+    private static boolean includeCCSMetadata() {
+        return ccsMetadataAvailable() && randomBoolean();
     }
 }
diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java
index 9a184b9a620fd..050259bbb5b5c 100644
--- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java
+++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java
@@ -76,7 +76,6 @@ public void testBasicEsql() throws IOException {
         indexTimestampData(1);
 
         RequestObjectBuilder builder = requestObjectBuilder().query(fromIndex() + " | stats avg(value)");
-        requestObjectBuilder().includeCCSMetadata(randomBoolean());
         if (Build.current().isSnapshot()) {
             builder.pragmas(Settings.builder().put("data_partitioning", "shard").build());
         }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java
index 8564e4b3afde1..031bfd7139a84 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java
@@ -9,12 +9,14 @@
 
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.action.OriginalIndices;
-import org.elasticsearch.action.support.IndicesOptions;
+import org.elasticsearch.action.search.SearchRequest;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput;
 import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.Objects;
 
 record RemoteClusterPlan(PhysicalPlan plan, String[] targetIndices, OriginalIndices originalIndices) {
     static RemoteClusterPlan from(PlanStreamInput planIn) throws IOException {
@@ -24,7 +26,8 @@ static RemoteClusterPlan from(PlanStreamInput planIn) throws IOException {
         if (planIn.getTransportVersion().onOrAfter(TransportVersions.ESQL_ORIGINAL_INDICES)) {
             originalIndices = OriginalIndices.readOriginalIndices(planIn);
         } else {
-            originalIndices = new OriginalIndices(planIn.readStringArray(), IndicesOptions.strictSingleIndexNoExpandForbidClosed());
+            // fallback to the previous behavior
+            originalIndices = new OriginalIndices(planIn.readStringArray(), SearchRequest.DEFAULT_INDICES_OPTIONS);
         }
         return new RemoteClusterPlan(plan, targetIndices, originalIndices);
     }
@@ -38,4 +41,18 @@ public void writeTo(PlanStreamOutput out) throws IOException {
             out.writeStringArray(originalIndices.indices());
         }
     }
+
+    @Override
+    public boolean equals(Object o) {
+        if (o == null || getClass() != o.getClass()) return false;
+        RemoteClusterPlan that = (RemoteClusterPlan) o;
+        return Objects.equals(plan, that.plan)
+            && Objects.deepEquals(targetIndices, that.targetIndices)
+            && Objects.equals(originalIndices, that.originalIndices);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(plan, Arrays.hashCode(targetIndices), originalIndices);
+    }
 }
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java
new file mode 100644
index 0000000000000..07ca112e8c527
--- /dev/null
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java
@@ -0,0 +1,206 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.plugin;
+
+import org.elasticsearch.TransportVersions;
+import org.elasticsearch.action.OriginalIndices;
+import org.elasticsearch.action.search.SearchRequest;
+import org.elasticsearch.action.support.IndicesOptions;
+import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.IndexMode;
+import org.elasticsearch.search.SearchModule;
+import org.elasticsearch.test.AbstractWireSerializingTestCase;
+import org.elasticsearch.test.TransportVersionUtils;
+import org.elasticsearch.xpack.esql.ConfigurationTestUtils;
+import org.elasticsearch.xpack.esql.EsqlTestUtils;
+import org.elasticsearch.xpack.esql.analysis.Analyzer;
+import org.elasticsearch.xpack.esql.analysis.AnalyzerContext;
+import org.elasticsearch.xpack.esql.core.type.EsField;
+import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
+import org.elasticsearch.xpack.esql.index.EsIndex;
+import org.elasticsearch.xpack.esql.index.IndexResolution;
+import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
+import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer;
+import org.elasticsearch.xpack.esql.parser.EsqlParser;
+import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
+import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import static org.elasticsearch.xpack.esql.ConfigurationTestUtils.randomConfiguration;
+import static org.elasticsearch.xpack.esql.ConfigurationTestUtils.randomTables;
+import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_CFG;
+import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER;
+import static org.elasticsearch.xpack.esql.EsqlTestUtils.emptyPolicyResolution;
+import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping;
+import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning;
+import static org.hamcrest.Matchers.equalTo;
+
+public class ClusterRequestTests extends AbstractWireSerializingTestCase<ClusterComputeRequest> {
+
+    @Override
+    protected Writeable.Reader<ClusterComputeRequest> instanceReader() {
+        return ClusterComputeRequest::new;
+    }
+
+    @Override
+    protected NamedWriteableRegistry getNamedWriteableRegistry() {
+        List<NamedWriteableRegistry.Entry> writeables = new ArrayList<>();
+        writeables.addAll(new SearchModule(Settings.EMPTY, List.of()).getNamedWriteables());
+        writeables.addAll(new EsqlPlugin().getNamedWriteables());
+        return new NamedWriteableRegistry(writeables);
+    }
+
+    @Override
+    protected ClusterComputeRequest createTestInstance() {
+        var sessionId = randomAlphaOfLength(10);
+        String query = randomQuery();
+        PhysicalPlan physicalPlan = DataNodeRequestTests.mapAndMaybeOptimize(parse(query));
+        OriginalIndices originalIndices = new OriginalIndices(
+            generateRandomStringArray(10, 10, false, false),
+            IndicesOptions.fromOptions(randomBoolean(), randomBoolean(), randomBoolean(), randomBoolean())
+        );
+        String[] targetIndices = generateRandomStringArray(10, 10, false, false);
+        ClusterComputeRequest request = new ClusterComputeRequest(
+            randomAlphaOfLength(10),
+            sessionId,
+            randomConfiguration(query, randomTables()),
+            new RemoteClusterPlan(physicalPlan, targetIndices, originalIndices)
+        );
+        request.setParentTask(randomAlphaOfLength(10), randomNonNegativeLong());
+        return request;
+    }
+
+    @Override
+    protected ClusterComputeRequest mutateInstance(ClusterComputeRequest in) throws IOException {
+        return switch (between(0, 4)) {
+            case 0 -> {
+                var request = new ClusterComputeRequest(
+                    randomValueOtherThan(in.clusterAlias(), () -> randomAlphaOfLength(10)),
+                    in.sessionId(),
+                    in.configuration(),
+                    in.remoteClusterPlan()
+                );
+                request.setParentTask(in.getParentTask());
+                yield request;
+            }
+            case 1 -> {
+                var request = new ClusterComputeRequest(
+                    in.clusterAlias(),
+                    randomValueOtherThan(in.sessionId(), () -> randomAlphaOfLength(10)),
+                    in.configuration(),
+                    in.remoteClusterPlan()
+                );
+                request.setParentTask(in.getParentTask());
+                yield request;
+            }
+            case 2 -> {
+                var request = new ClusterComputeRequest(
+                    in.clusterAlias(),
+                    in.sessionId(),
+                    randomValueOtherThan(in.configuration(), ConfigurationTestUtils::randomConfiguration),
+                    in.remoteClusterPlan()
+                );
+                request.setParentTask(in.getParentTask());
+                yield request;
+            }
+            case 3 -> {
+                RemoteClusterPlan plan = in.remoteClusterPlan();
+                var request = new ClusterComputeRequest(
+                    in.clusterAlias(),
+                    in.sessionId(),
+                    in.configuration(),
+                    new RemoteClusterPlan(
+                        plan.plan(),
+                        randomValueOtherThan(plan.targetIndices(), () -> generateRandomStringArray(10, 10, false, false)),
+                        plan.originalIndices()
+                    )
+                );
+                request.setParentTask(in.getParentTask());
+                yield request;
+            }
+            case 4 -> {
+                RemoteClusterPlan plan = in.remoteClusterPlan();
+                var request = new ClusterComputeRequest(
+                    in.clusterAlias(),
+                    in.sessionId(),
+                    in.configuration(),
+                    new RemoteClusterPlan(
+                        plan.plan(),
+                        plan.targetIndices(),
+                        new OriginalIndices(
+                            plan.originalIndices().indices(),
+                            randomValueOtherThan(
+                                plan.originalIndices().indicesOptions(),
+                                () -> IndicesOptions.fromOptions(randomBoolean(), randomBoolean(), randomBoolean(), randomBoolean())
+                            )
+                        )
+                    )
+                );
+                request.setParentTask(in.getParentTask());
+                yield request;
+            }
+            default -> throw new AssertionError("invalid value");
+        };
+    }
+
+    public void testFallbackIndicesOptions() throws Exception {
+        ClusterComputeRequest request = createTestInstance();
+        var version = TransportVersionUtils.randomVersionBetween(
+            random(),
+            TransportVersions.V_8_14_0,
+            TransportVersions.ESQL_ORIGINAL_INDICES
+        );
+        ClusterComputeRequest cloned = copyInstance(request, version);
+        assertThat(cloned.clusterAlias(), equalTo(request.clusterAlias()));
+        assertThat(cloned.sessionId(), equalTo(request.sessionId()));
+        assertThat(cloned.configuration(), equalTo(request.configuration()));
+        RemoteClusterPlan plan = cloned.remoteClusterPlan();
+        assertThat(plan.plan(), equalTo(request.remoteClusterPlan().plan()));
+        assertThat(plan.targetIndices(), equalTo(request.remoteClusterPlan().targetIndices()));
+        OriginalIndices originalIndices = plan.originalIndices();
+        assertThat(originalIndices.indices(), equalTo(request.remoteClusterPlan().originalIndices().indices()));
+        assertThat(originalIndices.indicesOptions(), equalTo(SearchRequest.DEFAULT_INDICES_OPTIONS));
+    }
+
+    private static String randomQuery() {
+        return randomFrom("""
+            from test
+            | where round(emp_no) > 10
+            | limit 10
+            """, """
+            from test
+            | sort last_name
+            | limit 10
+            | where round(emp_no) > 10
+            | eval c = first_name
+            """);
+    }
+
+    static LogicalPlan parse(String query) {
+        Map<String, EsField> mapping = loadMapping("mapping-basic.json");
+        EsIndex test = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD));
+        IndexResolution getIndexResult = IndexResolution.valid(test);
+        var logicalOptimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(TEST_CFG));
+        var analyzer = new Analyzer(
+            new AnalyzerContext(EsqlTestUtils.TEST_CFG, new EsqlFunctionRegistry(), getIndexResult, emptyPolicyResolution()),
+            TEST_VERIFIER
+        );
+        return logicalOptimizer.optimize(analyzer.analyze(new EsqlParser().createStatement(query)));
+    }
+
+    @Override
+    protected List<String> filteredWarnings() {
+        return withDefaultLimitWarning(super.filteredWarnings());
+    }
+}

From 00a1222f10a6bc605f67aee67d4053c5ba0557e8 Mon Sep 17 00:00:00 2001
From: elasticsearchmachine
 <58790826+elasticsearchmachine@users.noreply.github.com>
Date: Wed, 4 Dec 2024 02:32:41 +1100
Subject: [PATCH 23/28] Mute
 org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilderTests
 testToQuery #117904

---
 muted-tests.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/muted-tests.yml b/muted-tests.yml
index 3652173327e84..857266a5a47cd 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -238,6 +238,9 @@ tests:
   issue: https://github.com/elastic/elasticsearch/issues/117862
 - class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT
   issue: https://github.com/elastic/elasticsearch/issues/117893
+- class: org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilderTests
+  method: testToQuery
+  issue: https://github.com/elastic/elasticsearch/issues/117904
 
 # Examples:
 #

From c1a9d44ed4ac980130deb730991f10cce127c583 Mon Sep 17 00:00:00 2001
From: Ryan Ernst <ryan@iernst.net>
Date: Tue, 3 Dec 2024 08:42:49 -0800
Subject: [PATCH 24/28] Guard against missing file in CI upload (#117889)

Somehow files can be lost before the build ends up uploading them,
presumable from temporarily file deletion after tests complete. This
commit guards against this case so that the build will not completely
fail, but instead log a warning.
---
 .../internal/ElasticsearchBuildCompletePlugin.java    | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java
index 14baa55794c95..b1207a2f5161d 100644
--- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java
+++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java
@@ -29,6 +29,8 @@
 import org.gradle.api.provider.Property;
 import org.gradle.api.tasks.Input;
 import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
@@ -47,6 +49,8 @@
 
 public abstract class ElasticsearchBuildCompletePlugin implements Plugin<Project> {
 
+    private static final Logger log = LoggerFactory.getLogger(ElasticsearchBuildCompletePlugin.class);
+
     @Inject
     protected abstract FlowScope getFlowScope();
 
@@ -241,8 +245,11 @@ private static void createBuildArchiveTar(List<File> files, File projectDir, Fil
                 tOut.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
                 tOut.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_STAR);
                 for (Path path : files.stream().map(File::toPath).toList()) {
-                    if (!Files.isRegularFile(path)) {
-                        throw new IOException("Support only file!");
+                    if (Files.exists(path) == false) {
+                        log.warn("File disappeared before it could be added to CI archive: " + path);
+                        continue;
+                    } else if (!Files.isRegularFile(path)) {
+                        throw new IOException("Support only file!: " + path);
                     }
 
                     long entrySize = Files.size(path);

From 0a208279ea869fafe7ee9b4c4ac60d4b9816bd25 Mon Sep 17 00:00:00 2001
From: Luigi Dell'Aquila <luigi.dellaquila@gmail.com>
Date: Tue, 3 Dec 2024 17:53:10 +0100
Subject: [PATCH 25/28] ES|QL fix telemetry tests (usage stats) after promoting
 CATEGORIZE (#117878)

---
 muted-tests.yml                                                | 3 ---
 .../resources/rest-api-spec/test/esql/60_usage.yml             | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/muted-tests.yml b/muted-tests.yml
index 857266a5a47cd..7bd06a6605028 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -233,9 +233,6 @@ tests:
   issue: https://github.com/elastic/elasticsearch/issues/117815
 - class: org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT
   issue: https://github.com/elastic/elasticsearch/issues/111319
-- class: org.elasticsearch.xpack.test.rest.XPackRestIT
-  method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version}
-  issue: https://github.com/elastic/elasticsearch/issues/117862
 - class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT
   issue: https://github.com/elastic/elasticsearch/issues/117893
 - class: org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilderTests
diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml
index f7dd979540afa..c23b44c00bd14 100644
--- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml
+++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml
@@ -163,4 +163,4 @@ setup:
   - match: {esql.functions.cos: $functions_cos}
   - gt: {esql.functions.to_long: $functions_to_long}
   - match: {esql.functions.coalesce: $functions_coalesce}
-  - length: {esql.functions: 118} # check the "sister" test above for a likely update to the same esql.functions length check
+  - length: {esql.functions: 119} # check the "sister" test above for a likely update to the same esql.functions length check

From 22f4a799377ea8710076ff10b74fbb48724a0c09 Mon Sep 17 00:00:00 2001
From: Andrei Stefan <astefan@users.noreply.github.com>
Date: Tue, 3 Dec 2024 20:08:05 +0200
Subject: [PATCH 26/28] Smarter field caps with subscribable listener (#116755)

---
 docs/changelog/116755.yaml                    |   5 +
 .../multi_node/RequestIndexFilteringIT.java   |  27 ++
 .../single_node/RequestIndexFilteringIT.java  |  27 ++
 .../rest/RequestIndexFilteringTestCase.java   | 284 ++++++++++++++++
 .../esql/qa/rest/RestEnrichTestCase.java      | 176 +++++++++-
 .../esql/enrich/EnrichPolicyResolver.java     |   2 +-
 .../xpack/esql/session/EsqlSession.java       | 315 ++++++++++++------
 .../xpack/esql/session/IndexResolver.java     |  13 +-
 8 files changed, 741 insertions(+), 108 deletions(-)
 create mode 100644 docs/changelog/116755.yaml
 create mode 100644 x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java
 create mode 100644 x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java
 create mode 100644 x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java

diff --git a/docs/changelog/116755.yaml b/docs/changelog/116755.yaml
new file mode 100644
index 0000000000000..3aa5ec8580b59
--- /dev/null
+++ b/docs/changelog/116755.yaml
@@ -0,0 +1,5 @@
+pr: 116755
+summary: Smarter field caps with subscribable listener
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java
new file mode 100644
index 0000000000000..c2ba502b92554
--- /dev/null
+++ b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.qa.multi_node;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+
+import org.elasticsearch.test.TestClustersThreadFilter;
+import org.elasticsearch.test.cluster.ElasticsearchCluster;
+import org.elasticsearch.xpack.esql.qa.rest.RequestIndexFilteringTestCase;
+import org.junit.ClassRule;
+
+@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
+public class RequestIndexFilteringIT extends RequestIndexFilteringTestCase {
+
+    @ClassRule
+    public static ElasticsearchCluster cluster = Clusters.testCluster(ignored -> {});
+
+    @Override
+    protected String getTestRestCluster() {
+        return cluster.getHttpAddresses();
+    }
+}
diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java
new file mode 100644
index 0000000000000..f13bcd618f0a8
--- /dev/null
+++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.qa.single_node;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+
+import org.elasticsearch.test.TestClustersThreadFilter;
+import org.elasticsearch.test.cluster.ElasticsearchCluster;
+import org.elasticsearch.xpack.esql.qa.rest.RequestIndexFilteringTestCase;
+import org.junit.ClassRule;
+
+@ThreadLeakFilters(filters = TestClustersThreadFilter.class)
+public class RequestIndexFilteringIT extends RequestIndexFilteringTestCase {
+
+    @ClassRule
+    public static ElasticsearchCluster cluster = Clusters.testCluster();
+
+    @Override
+    protected String getTestRestCluster() {
+        return cluster.getHttpAddresses();
+    }
+}
diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java
new file mode 100644
index 0000000000000..3314430d63eaa
--- /dev/null
+++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java
@@ -0,0 +1,284 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.qa.rest;
+
+import org.apache.http.util.EntityUtils;
+import org.elasticsearch.client.Request;
+import org.elasticsearch.client.Response;
+import org.elasticsearch.client.ResponseException;
+import org.elasticsearch.test.rest.ESRestTestCase;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.esql.AssertWarnings;
+import org.junit.After;
+import org.junit.Assert;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import static org.elasticsearch.test.ListMatcher.matchesList;
+import static org.elasticsearch.test.MapMatcher.assertMap;
+import static org.elasticsearch.test.MapMatcher.matchesMap;
+import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.entityToMap;
+import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.requestObjectBuilder;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.nullValue;
+
+public abstract class RequestIndexFilteringTestCase extends ESRestTestCase {
+
+    @After
+    public void wipeTestData() throws IOException {
+        try {
+            var response = client().performRequest(new Request("DELETE", "/test*"));
+            assertEquals(200, response.getStatusLine().getStatusCode());
+        } catch (ResponseException re) {
+            assertEquals(404, re.getResponse().getStatusLine().getStatusCode());
+        }
+    }
+
+    public void testTimestampFilterFromQuery() throws IOException {
+        int docsTest1 = 50;
+        int docsTest2 = 30;
+        indexTimestampData(docsTest1, "test1", "2024-11-26", "id1");
+        indexTimestampData(docsTest2, "test2", "2023-11-26", "id2");
+
+        // filter includes both indices in the result (all columns, all rows)
+        RestEsqlTestCase.RequestObjectBuilder builder = timestampFilter("gte", "2023-01-01").query("FROM test*");
+        Map<String, Object> result = runEsql(builder);
+        assertMap(
+            result,
+            matchesMap().entry(
+                "columns",
+                matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date"))
+                    .item(matchesMap().entry("name", "id1").entry("type", "integer"))
+                    .item(matchesMap().entry("name", "id2").entry("type", "integer"))
+                    .item(matchesMap().entry("name", "value").entry("type", "long"))
+            ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1 + docsTest2))).entry("took", greaterThanOrEqualTo(0))
+        );
+
+        // filter includes only test1. Columns from test2 are filtered out, as well (not only rows)!
+        builder = timestampFilter("gte", "2024-01-01").query("FROM test*");
+        assertMap(
+            runEsql(builder),
+            matchesMap().entry(
+                "columns",
+                matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date"))
+                    .item(matchesMap().entry("name", "id1").entry("type", "integer"))
+                    .item(matchesMap().entry("name", "value").entry("type", "long"))
+            ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0))
+        );
+
+        // filter excludes both indices (no rows); the first analysis step fails because there are no columns, a second attempt succeeds
+        // after eliminating the index filter. All columns are returned.
+        builder = timestampFilter("gte", "2025-01-01").query("FROM test*");
+        assertMap(
+            runEsql(builder),
+            matchesMap().entry(
+                "columns",
+                matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date"))
+                    .item(matchesMap().entry("name", "id1").entry("type", "integer"))
+                    .item(matchesMap().entry("name", "id2").entry("type", "integer"))
+                    .item(matchesMap().entry("name", "value").entry("type", "long"))
+            ).entry("values", allOf(instanceOf(List.class), hasSize(0))).entry("took", greaterThanOrEqualTo(0))
+        );
+    }
+
+    public void testFieldExistsFilter_KeepWildcard() throws IOException {
+        int docsTest1 = randomIntBetween(0, 10);
+        int docsTest2 = randomIntBetween(0, 10);
+        indexTimestampData(docsTest1, "test1", "2024-11-26", "id1");
+        indexTimestampData(docsTest2, "test2", "2023-11-26", "id2");
+
+        // filter includes only test1. Columns are rows of test2 are filtered out
+        RestEsqlTestCase.RequestObjectBuilder builder = existsFilter("id1").query("FROM test*");
+        Map<String, Object> result = runEsql(builder);
+        assertMap(
+            result,
+            matchesMap().entry(
+                "columns",
+                matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date"))
+                    .item(matchesMap().entry("name", "id1").entry("type", "integer"))
+                    .item(matchesMap().entry("name", "value").entry("type", "long"))
+            ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0))
+        );
+
+        // filter includes only test1. Columns from test2 are filtered out, as well (not only rows)!
+        builder = existsFilter("id1").query("FROM test* METADATA _index | KEEP _index, id*");
+        result = runEsql(builder);
+        assertMap(
+            result,
+            matchesMap().entry(
+                "columns",
+                matchesList().item(matchesMap().entry("name", "_index").entry("type", "keyword"))
+                    .item(matchesMap().entry("name", "id1").entry("type", "integer"))
+            ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0))
+        );
+        @SuppressWarnings("unchecked")
+        var values = (List<List<Object>>) result.get("values");
+        for (List<Object> row : values) {
+            assertThat(row.get(0), equalTo("test1"));
+            assertThat(row.get(1), instanceOf(Integer.class));
+        }
+    }
+
+    public void testFieldExistsFilter_With_ExplicitUseOfDiscardedIndexFields() throws IOException {
+        int docsTest1 = randomIntBetween(1, 5);
+        int docsTest2 = randomIntBetween(0, 5);
+        indexTimestampData(docsTest1, "test1", "2024-11-26", "id1");
+        indexTimestampData(docsTest2, "test2", "2023-11-26", "id2");
+
+        // test2 is explicitly used in a query with "SORT id2" even if the index filter should discard test2
+        RestEsqlTestCase.RequestObjectBuilder builder = existsFilter("id1").query(
+            "FROM test* METADATA _index | SORT id2 | KEEP _index, id*"
+        );
+        Map<String, Object> result = runEsql(builder);
+        assertMap(
+            result,
+            matchesMap().entry(
+                "columns",
+                matchesList().item(matchesMap().entry("name", "_index").entry("type", "keyword"))
+                    .item(matchesMap().entry("name", "id1").entry("type", "integer"))
+                    .item(matchesMap().entry("name", "id2").entry("type", "integer"))
+            ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0))
+        );
+        @SuppressWarnings("unchecked")
+        var values = (List<List<Object>>) result.get("values");
+        for (List<Object> row : values) {
+            assertThat(row.get(0), equalTo("test1"));
+            assertThat(row.get(1), instanceOf(Integer.class));
+            assertThat(row.get(2), nullValue());
+        }
+    }
+
+    public void testFieldNameTypo() throws IOException {
+        int docsTest1 = randomIntBetween(0, 5);
+        int docsTest2 = randomIntBetween(0, 5);
+        indexTimestampData(docsTest1, "test1", "2024-11-26", "id1");
+        indexTimestampData(docsTest2, "test2", "2023-11-26", "id2");
+
+        // idx field name is explicitly used, though it doesn't exist in any of the indices. First test - without filter
+        ResponseException e = expectThrows(
+            ResponseException.class,
+            () -> runEsql(requestObjectBuilder().query("FROM test* | WHERE idx == 123"))
+        );
+        assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
+        assertThat(e.getMessage(), containsString("verification_exception"));
+        assertThat(e.getMessage(), containsString("Found 1 problem"));
+        assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]"));
+
+        e = expectThrows(ResponseException.class, () -> runEsql(requestObjectBuilder().query("FROM test1 | WHERE idx == 123")));
+        assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
+        assertThat(e.getMessage(), containsString("verification_exception"));
+        assertThat(e.getMessage(), containsString("Found 1 problem"));
+        assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]"));
+
+        e = expectThrows(
+            ResponseException.class,
+            () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM test* | WHERE idx == 123"))
+        );
+        assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
+        assertThat(e.getMessage(), containsString("Found 1 problem"));
+        assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]"));
+
+        e = expectThrows(
+            ResponseException.class,
+            () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM test2 | WHERE idx == 123"))
+        );
+        assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
+        assertThat(e.getMessage(), containsString("Found 1 problem"));
+        assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]"));
+    }
+
+    public void testIndicesDontExist() throws IOException {
+        int docsTest1 = 0; // we are interested only in the created index, not necessarily that it has data
+        indexTimestampData(docsTest1, "test1", "2024-11-26", "id1");
+
+        ResponseException e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo")));
+        assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
+        assertThat(e.getMessage(), containsString("verification_exception"));
+        assertThat(e.getMessage(), containsString("Unknown index [foo]"));
+
+        e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo*")));
+        assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
+        assertThat(e.getMessage(), containsString("verification_exception"));
+        assertThat(e.getMessage(), containsString("Unknown index [foo*]"));
+
+        e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo,test1")));
+        assertEquals(404, e.getResponse().getStatusLine().getStatusCode());
+        assertThat(e.getMessage(), containsString("index_not_found_exception"));
+        assertThat(e.getMessage(), containsString("no such index [foo]"));
+    }
+
+    private static RestEsqlTestCase.RequestObjectBuilder timestampFilter(String op, String date) throws IOException {
+        return requestObjectBuilder().filter(b -> {
+            b.startObject("range");
+            {
+                b.startObject("@timestamp").field(op, date).endObject();
+            }
+            b.endObject();
+        });
+    }
+
+    private static RestEsqlTestCase.RequestObjectBuilder existsFilter(String field) throws IOException {
+        return requestObjectBuilder().filter(b -> b.startObject("exists").field("field", field).endObject());
+    }
+
+    public Map<String, Object> runEsql(RestEsqlTestCase.RequestObjectBuilder requestObject) throws IOException {
+        return RestEsqlTestCase.runEsql(requestObject, new AssertWarnings.NoWarnings(), RestEsqlTestCase.Mode.SYNC);
+    }
+
+    protected void indexTimestampData(int docs, String indexName, String date, String differentiatorFieldName) throws IOException {
+        Request createIndex = new Request("PUT", indexName);
+        createIndex.setJsonEntity("""
+            {
+              "settings": {
+                "index": {
+                  "number_of_shards": 3
+                }
+              },
+              "mappings": {
+                "properties": {
+                  "@timestamp": {
+                    "type": "date"
+                  },
+                  "%differentiator_field_name%": {
+                    "type": "integer"
+                  }
+                }
+              }
+            }""".replace("%differentiator_field_name%", differentiatorFieldName));
+        Response response = client().performRequest(createIndex);
+        assertThat(
+            entityToMap(response.getEntity(), XContentType.JSON),
+            matchesMap().entry("shards_acknowledged", true).entry("index", indexName).entry("acknowledged", true)
+        );
+
+        if (docs > 0) {
+            StringBuilder b = new StringBuilder();
+            for (int i = 0; i < docs; i++) {
+                b.append(String.format(Locale.ROOT, """
+                    {"create":{"_index":"%s"}}
+                    {"@timestamp":"%s","value":%d,"%s":%d}
+                    """, indexName, date, i, differentiatorFieldName, i));
+            }
+            Request bulk = new Request("POST", "/_bulk");
+            bulk.addParameter("refresh", "true");
+            bulk.addParameter("filter_path", "errors");
+            bulk.setJsonEntity(b.toString());
+            response = client().performRequest(bulk);
+            Assert.assertEquals("{\"errors\":false}", EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8));
+        }
+    }
+}
diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java
index def6491fb920f..bf4a4400e13cf 100644
--- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java
+++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java
@@ -12,7 +12,9 @@
 import org.apache.http.util.EntityUtils;
 import org.elasticsearch.client.Request;
 import org.elasticsearch.client.ResponseException;
+import org.elasticsearch.core.CheckedConsumer;
 import org.elasticsearch.test.rest.ESRestTestCase;
+import org.elasticsearch.xcontent.XContentBuilder;
 import org.junit.After;
 import org.junit.Before;
 
@@ -29,7 +31,6 @@
 public abstract class RestEnrichTestCase extends ESRestTestCase {
 
     private static final String sourceIndexName = "countries";
-    private static final String testIndexName = "test";
     private static final String policyName = "countries";
 
     public enum Mode {
@@ -56,7 +57,7 @@ public void assertRequestBreakerEmpty() throws Exception {
 
     @Before
     public void loadTestData() throws IOException {
-        Request request = new Request("PUT", "/" + testIndexName);
+        Request request = new Request("PUT", "/test1");
         request.setJsonEntity("""
             {
               "mappings": {
@@ -72,7 +73,7 @@ public void loadTestData() throws IOException {
             }""");
         assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode());
 
-        request = new Request("POST", "/" + testIndexName + "/_bulk");
+        request = new Request("POST", "/test1/_bulk");
         request.addParameter("refresh", "true");
         request.setJsonEntity("""
             { "index": {"_id": 1} }
@@ -84,6 +85,34 @@ public void loadTestData() throws IOException {
             """);
         assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode());
 
+        request = new Request("PUT", "/test2");
+        request.setJsonEntity("""
+            {
+              "mappings": {
+                "properties": {
+                  "geo.dest": {
+                    "type": "keyword"
+                  },
+                  "country_number": {
+                    "type": "long"
+                  }
+                }
+              }
+            }""");
+        assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode());
+
+        request = new Request("POST", "/test2/_bulk");
+        request.addParameter("refresh", "true");
+        request.setJsonEntity("""
+            { "index": {"_id": 1} }
+            { "geo.dest": "IN", "country_number": 2 }
+            { "index": {"_id": 2} }
+            { "geo.dest": "IN", "country_number": 2 }
+            { "index": {"_id": 3} }
+            { "geo.dest": "US", "country_number": 3 }
+            """);
+        assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode());
+
         request = new Request("PUT", "/" + sourceIndexName);
         request.setJsonEntity("""
             {
@@ -131,7 +160,7 @@ public void loadTestData() throws IOException {
     @After
     public void wipeTestData() throws IOException {
         try {
-            var response = client().performRequest(new Request("DELETE", "/" + testIndexName));
+            var response = client().performRequest(new Request("DELETE", "/test1,test2"));
             assertEquals(200, response.getStatusLine().getStatusCode());
             response = client().performRequest(new Request("DELETE", "/" + sourceIndexName));
             assertEquals(200, response.getStatusLine().getStatusCode());
@@ -143,7 +172,7 @@ public void wipeTestData() throws IOException {
     }
 
     public void testNonExistentEnrichPolicy() throws IOException {
-        ResponseException re = expectThrows(ResponseException.class, () -> runEsql("from test | enrich countris", Mode.SYNC));
+        ResponseException re = expectThrows(ResponseException.class, () -> runEsql("from test1 | enrich countris", null, Mode.SYNC));
         assertThat(
             EntityUtils.toString(re.getResponse().getEntity()),
             containsString("cannot find enrich policy [countris], did you mean [countries]?")
@@ -151,7 +180,10 @@ public void testNonExistentEnrichPolicy() throws IOException {
     }
 
     public void testNonExistentEnrichPolicy_KeepField() throws IOException {
-        ResponseException re = expectThrows(ResponseException.class, () -> runEsql("from test | enrich countris | keep number", Mode.SYNC));
+        ResponseException re = expectThrows(
+            ResponseException.class,
+            () -> runEsql("from test1 | enrich countris | keep number", null, Mode.SYNC)
+        );
         assertThat(
             EntityUtils.toString(re.getResponse().getEntity()),
             containsString("cannot find enrich policy [countris], did you mean [countries]?")
@@ -159,25 +191,147 @@ public void testNonExistentEnrichPolicy_KeepField() throws IOException {
     }
 
     public void testMatchField_ImplicitFieldsList() throws IOException {
-        Map<String, Object> result = runEsql("from test | enrich countries | keep number | sort number");
+        Map<String, Object> result = runEsql("from test1 | enrich countries | keep number | sort number");
         var columns = List.of(Map.of("name", "number", "type", "long"));
         var values = List.of(List.of(1000), List.of(1000), List.of(5000));
         assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
     }
 
     public void testMatchField_ImplicitFieldsList_WithStats() throws IOException {
-        Map<String, Object> result = runEsql("from test | enrich countries | stats s = sum(number) by country_name");
+        Map<String, Object> result = runEsql("from test1 | enrich countries | stats s = sum(number) by country_name");
         var columns = List.of(Map.of("name", "s", "type", "long"), Map.of("name", "country_name", "type", "keyword"));
         var values = List.of(List.of(2000, "United States of America"), List.of(5000, "China"));
         assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
     }
 
+    public void testSimpleIndexFilteringWithEnrich() throws IOException {
+        // no filter
+        Map<String, Object> result = runEsql("""
+                from test* metadata _index
+                | enrich countries
+                | keep *number, geo.dest, _index
+                | sort geo.dest, _index
+            """);
+        var columns = List.of(
+            Map.of("name", "country_number", "type", "long"),
+            Map.of("name", "number", "type", "long"),
+            Map.of("name", "geo.dest", "type", "keyword"),
+            Map.of("name", "_index", "type", "keyword")
+        );
+        var values = List.of(
+            Arrays.asList(null, 5000, "CN", "test1"),
+            Arrays.asList(2, null, "IN", "test2"),
+            Arrays.asList(2, null, "IN", "test2"),
+            Arrays.asList(null, 1000, "US", "test1"),
+            Arrays.asList(null, 1000, "US", "test1"),
+            Arrays.asList(3, null, "US", "test2")
+        );
+        assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
+
+        // filter something that won't affect the columns
+        result = runEsql("""
+                from test* metadata _index
+                | enrich countries
+                | keep *number, geo.dest, _index
+                | sort geo.dest, _index
+            """, b -> b.startObject("exists").field("field", "foobar").endObject());
+        assertMap(result, matchesMap().entry("columns", columns).entry("values", List.of()).entry("took", greaterThanOrEqualTo(0)));
+    }
+
+    public void testIndexFilteringWithEnrich_RemoveOneIndex() throws IOException {
+        // filter out test2 but specifically use one of its fields in the query (country_number)
+        Map<String, Object> result = runEsql("""
+                from test* metadata _index
+                | enrich countries
+                | keep country_number, number, geo.dest, _index
+                | sort geo.dest, _index
+            """, b -> b.startObject("exists").field("field", "number").endObject());
+
+        var columns = List.of(
+            Map.of("name", "country_number", "type", "long"),
+            Map.of("name", "number", "type", "long"),
+            Map.of("name", "geo.dest", "type", "keyword"),
+            Map.of("name", "_index", "type", "keyword")
+        );
+        var values = List.of(
+            Arrays.asList(null, 5000, "CN", "test1"),
+            Arrays.asList(null, 1000, "US", "test1"),
+            Arrays.asList(null, 1000, "US", "test1")
+        );
+
+        assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
+
+        // filter out test2 and use a wildcarded field name in the "keep" command
+        result = runEsql("""
+                from test* metadata _index
+                | enrich countries
+                | keep *number, geo.dest, _index
+                | sort geo.dest, _index
+            """, b -> b.startObject("exists").field("field", "number").endObject());
+
+        columns = List.of(
+            Map.of("name", "number", "type", "long"),
+            Map.of("name", "geo.dest", "type", "keyword"),
+            Map.of("name", "_index", "type", "keyword")
+        );
+        values = List.of(Arrays.asList(5000, "CN", "test1"), Arrays.asList(1000, "US", "test1"), Arrays.asList(1000, "US", "test1"));
+        assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
+    }
+
+    public void testIndexFilteringWithEnrich_ExpectException() throws IOException {
+        // no filter, just a simple query with "enrich" that should throw a valid VerificationException
+        ResponseException e = expectThrows(ResponseException.class, () -> runEsql("""
+                from test* metadata _index
+                | enrich countries
+                | where foobar == 123
+            """));
+        assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
+        assertThat(e.getMessage(), containsString("Found 1 problem"));
+        assertThat(e.getMessage(), containsString("line 3:13: Unknown column [foobar]"));
+
+        // same query, but with a filter this time
+        e = expectThrows(ResponseException.class, () -> runEsql("""
+                from test* metadata _index
+                | enrich countries
+                | where foobar == 123
+            """, b -> b.startObject("exists").field("field", "number").endObject()));
+        assertEquals(400, e.getResponse().getStatusLine().getStatusCode());
+        assertThat(e.getMessage(), containsString("Found 1 problem"));
+        assertThat(e.getMessage(), containsString("line 3:13: Unknown column [foobar]"));
+    }
+
+    public void testIndexFilteringWithEnrich_FilterUnusedIndexFields() throws IOException {
+        // filter out "test1". The field that is specific to "test1" ("number") is not actually used in the query
+        Map<String, Object> result = runEsql("""
+                from test* metadata _index
+                | enrich countries
+                | keep country_number, geo.dest, _index
+                | sort geo.dest, _index
+            """, b -> b.startObject("exists").field("field", "country_number").endObject());
+
+        var columns = List.of(
+            Map.of("name", "country_number", "type", "long"),
+            Map.of("name", "geo.dest", "type", "keyword"),
+            Map.of("name", "_index", "type", "keyword")
+        );
+        var values = List.of(Arrays.asList(2, "IN", "test2"), Arrays.asList(2, "IN", "test2"), Arrays.asList(3, "US", "test2"));
+        assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0)));
+    }
+
     private Map<String, Object> runEsql(String query) throws IOException {
-        return runEsql(query, mode);
+        return runEsql(query, null, mode);
     }
 
-    private Map<String, Object> runEsql(String query, Mode mode) throws IOException {
-        var requestObject = new RestEsqlTestCase.RequestObjectBuilder().query(query);
+    private Map<String, Object> runEsql(String query, CheckedConsumer<XContentBuilder, IOException> filter) throws IOException {
+        return runEsql(query, filter, mode);
+    }
+
+    private Map<String, Object> runEsql(String query, CheckedConsumer<XContentBuilder, IOException> filter, Mode mode) throws IOException {
+        var requestObject = new RestEsqlTestCase.RequestObjectBuilder();
+        if (filter != null) {
+            requestObject.filter(filter);
+        }
+        requestObject.query(query);
         if (mode == Mode.ASYNC) {
             return RestEsqlTestCase.runEsqlAsync(requestObject);
         } else {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java
index c8a7a6bcc4e98..c8e993b7dbf0b 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java
@@ -411,7 +411,7 @@ public void messageReceived(LookupRequest request, TransportChannel channel, Tas
                     }
                     try (ThreadContext.StoredContext ignored = threadContext.stashWithOrigin(ClientHelper.ENRICH_ORIGIN)) {
                         String indexName = EnrichPolicy.getBaseName(policyName);
-                        indexResolver.resolveAsMergedMapping(indexName, IndexResolver.ALL_FIELDS, refs.acquire(indexResult -> {
+                        indexResolver.resolveAsMergedMapping(indexName, IndexResolver.ALL_FIELDS, null, refs.acquire(indexResult -> {
                             if (indexResult.isValid() && indexResult.get().concreteIndices().size() == 1) {
                                 EsIndex esIndex = indexResult.get();
                                 var concreteIndices = Map.of(request.clusterAlias, Iterables.get(esIndex.concreteIndices(), 0));
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java
index 3d1ed8f70eae0..71fba5683644d 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java
@@ -11,6 +11,7 @@
 import org.elasticsearch.action.OriginalIndices;
 import org.elasticsearch.action.search.ShardSearchFailure;
 import org.elasticsearch.action.support.IndicesOptions;
+import org.elasticsearch.action.support.SubscribableListener;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.TriFunction;
 import org.elasticsearch.common.collect.Iterators;
@@ -25,6 +26,7 @@
 import org.elasticsearch.indices.IndicesExpressionGrouper;
 import org.elasticsearch.logging.LogManager;
 import org.elasticsearch.logging.Logger;
+import org.elasticsearch.xpack.esql.VerificationException;
 import org.elasticsearch.xpack.esql.action.EsqlExecutionInfo;
 import org.elasticsearch.xpack.esql.action.EsqlQueryRequest;
 import org.elasticsearch.xpack.esql.analysis.Analyzer;
@@ -151,6 +153,7 @@ public void execute(EsqlQueryRequest request, EsqlExecutionInfo executionInfo, P
         analyzedPlan(
             parse(request.query(), request.params()),
             executionInfo,
+            request.filter(),
             new EsqlSessionCCSUtils.CssPartialErrorsActionListener(executionInfo, listener) {
                 @Override
                 public void onResponse(LogicalPlan analyzedPlan) {
@@ -268,31 +271,28 @@ private LogicalPlan parse(String query, QueryParams params) {
         return parsed;
     }
 
-    public void analyzedPlan(LogicalPlan parsed, EsqlExecutionInfo executionInfo, ActionListener<LogicalPlan> listener) {
+    public void analyzedPlan(
+        LogicalPlan parsed,
+        EsqlExecutionInfo executionInfo,
+        QueryBuilder requestFilter,
+        ActionListener<LogicalPlan> logicalPlanListener
+    ) {
         if (parsed.analyzed()) {
-            listener.onResponse(parsed);
+            logicalPlanListener.onResponse(parsed);
             return;
         }
 
-        preAnalyze(parsed, executionInfo, (indices, lookupIndices, policies) -> {
+        TriFunction<IndexResolution, IndexResolution, EnrichResolution, LogicalPlan> analyzeAction = (indices, lookupIndices, policies) -> {
             planningMetrics.gatherPreAnalysisMetrics(parsed);
             Analyzer analyzer = new Analyzer(
                 new AnalyzerContext(configuration, functionRegistry, indices, lookupIndices, policies),
                 verifier
             );
-            var plan = analyzer.analyze(parsed);
+            LogicalPlan plan = analyzer.analyze(parsed);
             plan.setAnalyzed();
-            LOGGER.debug("Analyzed plan:\n{}", plan);
             return plan;
-        }, listener);
-    }
+        };
 
-    private <T> void preAnalyze(
-        LogicalPlan parsed,
-        EsqlExecutionInfo executionInfo,
-        TriFunction<IndexResolution, IndexResolution, EnrichResolution, T> action,
-        ActionListener<T> listener
-    ) {
         PreAnalyzer.PreAnalysis preAnalysis = preAnalyzer.preAnalyze(parsed);
         var unresolvedPolicies = preAnalysis.enriches.stream()
             .map(e -> new EnrichPolicyResolver.UnresolvedPolicy((String) e.policyName().fold(), e.mode()))
@@ -302,81 +302,113 @@ private <T> void preAnalyze(
         final Set<String> targetClusters = enrichPolicyResolver.groupIndicesPerCluster(
             indices.stream().flatMap(t -> Arrays.stream(Strings.commaDelimitedListToStringArray(t.id().index()))).toArray(String[]::new)
         ).keySet();
-        enrichPolicyResolver.resolvePolicies(targetClusters, unresolvedPolicies, listener.delegateFailureAndWrap((l, enrichResolution) -> {
-            // first we need the match_fields names from enrich policies and THEN, with an updated list of fields, we call field_caps API
-            var enrichMatchFields = enrichResolution.resolvedEnrichPolicies()
-                .stream()
-                .map(ResolvedEnrichPolicy::matchField)
-                .collect(Collectors.toSet());
-            // get the field names from the parsed plan combined with the ENRICH match fields from the ENRICH policy
-            var fieldNames = fieldNames(parsed, enrichMatchFields);
-            // First resolve the lookup indices, then the main indices
-            preAnalyzeLookupIndices(
-                preAnalysis.lookupIndices,
+
+        SubscribableListener.<EnrichResolution>newForked(l -> enrichPolicyResolver.resolvePolicies(targetClusters, unresolvedPolicies, l))
+            .<ListenerResult>andThen((l, enrichResolution) -> {
+                // we need the match_fields names from enrich policies and THEN, with an updated list of fields, we call field_caps API
+                var enrichMatchFields = enrichResolution.resolvedEnrichPolicies()
+                    .stream()
+                    .map(ResolvedEnrichPolicy::matchField)
+                    .collect(Collectors.toSet());
+                // get the field names from the parsed plan combined with the ENRICH match fields from the ENRICH policy
+                var fieldNames = fieldNames(parsed, enrichMatchFields);
+                ListenerResult listenerResult = new ListenerResult(null, null, enrichResolution, fieldNames);
+
+                // first resolve the lookup indices, then the main indices
+                preAnalyzeLookupIndices(preAnalysis.lookupIndices, listenerResult, l);
+            })
+            .<ListenerResult>andThen((l, listenerResult) -> {
+                // resolve the main indices
+                preAnalyzeIndices(preAnalysis.indices, executionInfo, listenerResult, requestFilter, l);
+            })
+            .<ListenerResult>andThen((l, listenerResult) -> {
+                // TODO in follow-PR (for skip_unavailable handling of missing concrete indexes) add some tests for
+                // invalid index resolution to updateExecutionInfo
+                if (listenerResult.indices.isValid()) {
+                    // CCS indices and skip_unavailable cluster values can stop the analysis right here
+                    if (analyzeCCSIndices(executionInfo, targetClusters, unresolvedPolicies, listenerResult, logicalPlanListener, l))
+                        return;
+                }
+                // whatever tuple we have here (from CCS-special handling or from the original pre-analysis), pass it on to the next step
+                l.onResponse(listenerResult);
+            })
+            .<ListenerResult>andThen((l, listenerResult) -> {
+                // first attempt (maybe the only one) at analyzing the plan
+                analyzeAndMaybeRetry(analyzeAction, requestFilter, listenerResult, logicalPlanListener, l);
+            })
+            .<ListenerResult>andThen((l, listenerResult) -> {
+                assert requestFilter != null : "The second pre-analysis shouldn't take place when there is no index filter in the request";
+
+                // "reset" execution information for all ccs or non-ccs (local) clusters, since we are performing the indices
+                // resolving one more time (the first attempt failed and the query had a filter)
+                for (String clusterAlias : executionInfo.clusterAliases()) {
+                    executionInfo.swapCluster(clusterAlias, (k, v) -> null);
+                }
+
+                // here the requestFilter is set to null, performing the pre-analysis after the first step failed
+                preAnalyzeIndices(preAnalysis.indices, executionInfo, listenerResult, null, l);
+            })
+            .<LogicalPlan>andThen((l, listenerResult) -> {
+                assert requestFilter != null : "The second analysis shouldn't take place when there is no index filter in the request";
+                LOGGER.debug("Analyzing the plan (second attempt, without filter)");
+                LogicalPlan plan;
+                try {
+                    plan = analyzeAction.apply(listenerResult.indices, listenerResult.lookupIndices, listenerResult.enrichResolution);
+                } catch (Exception e) {
+                    l.onFailure(e);
+                    return;
+                }
+                LOGGER.debug("Analyzed plan (second attempt, without filter):\n{}", plan);
+                l.onResponse(plan);
+            })
+            .addListener(logicalPlanListener);
+    }
+
+    private void preAnalyzeLookupIndices(List<TableInfo> indices, ListenerResult listenerResult, ActionListener<ListenerResult> listener) {
+        if (indices.size() > 1) {
+            // Note: JOINs on more than one index are not yet supported
+            listener.onFailure(new MappingException("More than one LOOKUP JOIN is not supported"));
+        } else if (indices.size() == 1) {
+            TableInfo tableInfo = indices.get(0);
+            TableIdentifier table = tableInfo.id();
+            // call the EsqlResolveFieldsAction (field-caps) to resolve indices and get field types
+            indexResolver.resolveAsMergedMapping(
+                table.index(),
                 Set.of("*"), // Current LOOKUP JOIN syntax does not allow for field selection
-                l.delegateFailureAndWrap(
-                    (lx, lookupIndexResolution) -> preAnalyzeIndices(
-                        indices,
-                        executionInfo,
-                        enrichResolution.getUnavailableClusters(),
-                        fieldNames,
-                        lx.delegateFailureAndWrap((ll, indexResolution) -> {
-                            // TODO in follow-PR (for skip_unavailble handling of missing concrete indexes) add some tests for invalid
-                            // index resolution to updateExecutionInfo
-                            if (indexResolution.isValid()) {
-                                EsqlSessionCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, indexResolution);
-                                EsqlSessionCCSUtils.updateExecutionInfoWithUnavailableClusters(
-                                    executionInfo,
-                                    indexResolution.unavailableClusters()
-                                );
-                                if (executionInfo.isCrossClusterSearch()
-                                    && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) == 0) {
-                                    // for a CCS, if all clusters have been marked as SKIPPED, nothing to search so send a sentinel
-                                    // Exception to let the LogicalPlanActionListener decide how to proceed
-                                    ll.onFailure(new NoClustersToSearchException());
-                                    return;
-                                }
-
-                                Set<String> newClusters = enrichPolicyResolver.groupIndicesPerCluster(
-                                    indexResolution.get().concreteIndices().toArray(String[]::new)
-                                ).keySet();
-                                // If new clusters appear when resolving the main indices, we need to resolve the enrich policies again
-                                // or exclude main concrete indices. Since this is rare, it's simpler to resolve the enrich policies
-                                // again.
-                                // TODO: add a test for this
-                                if (targetClusters.containsAll(newClusters) == false
-                                    // do not bother with a re-resolution if only remotes were requested and all were offline
-                                    && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) > 0) {
-                                    enrichPolicyResolver.resolvePolicies(
-                                        newClusters,
-                                        unresolvedPolicies,
-                                        ll.map(
-                                            newEnrichResolution -> action.apply(indexResolution, lookupIndexResolution, newEnrichResolution)
-                                        )
-                                    );
-                                    return;
-                                }
-                            }
-                            ll.onResponse(action.apply(indexResolution, lookupIndexResolution, enrichResolution));
-                        })
-                    )
-                )
+                null,
+                listener.map(indexResolution -> listenerResult.withLookupIndexResolution(indexResolution))
             );
-        }));
+        } else {
+            try {
+                // No lookup indices specified
+                listener.onResponse(
+                    new ListenerResult(
+                        listenerResult.indices,
+                        IndexResolution.invalid("[none specified]"),
+                        listenerResult.enrichResolution,
+                        listenerResult.fieldNames
+                    )
+                );
+            } catch (Exception ex) {
+                listener.onFailure(ex);
+            }
+        }
     }
 
     private void preAnalyzeIndices(
         List<TableInfo> indices,
         EsqlExecutionInfo executionInfo,
-        Map<String, Exception> unavailableClusters,  // known to be unavailable from the enrich policy API call
-        Set<String> fieldNames,
-        ActionListener<IndexResolution> listener
+        ListenerResult listenerResult,
+        QueryBuilder requestFilter,
+        ActionListener<ListenerResult> listener
     ) {
         // TODO we plan to support joins in the future when possible, but for now we'll just fail early if we see one
         if (indices.size() > 1) {
             // Note: JOINs are not supported but we detect them when
             listener.onFailure(new MappingException("Queries with multiple indices are not supported"));
         } else if (indices.size() == 1) {
+            // known to be unavailable from the enrich policy API call
+            Map<String, Exception> unavailableClusters = listenerResult.enrichResolution.getUnavailableClusters();
             TableInfo tableInfo = indices.get(0);
             TableIdentifier table = tableInfo.id();
 
@@ -409,38 +441,116 @@ private void preAnalyzeIndices(
             String indexExpressionToResolve = EsqlSessionCCSUtils.createIndexExpressionFromAvailableClusters(executionInfo);
             if (indexExpressionToResolve.isEmpty()) {
                 // if this was a pure remote CCS request (no local indices) and all remotes are offline, return an empty IndexResolution
-                listener.onResponse(IndexResolution.valid(new EsIndex(table.index(), Map.of(), Map.of())));
+                listener.onResponse(
+                    new ListenerResult(
+                        IndexResolution.valid(new EsIndex(table.index(), Map.of(), Map.of())),
+                        listenerResult.lookupIndices,
+                        listenerResult.enrichResolution,
+                        listenerResult.fieldNames
+                    )
+                );
             } else {
                 // call the EsqlResolveFieldsAction (field-caps) to resolve indices and get field types
-                indexResolver.resolveAsMergedMapping(indexExpressionToResolve, fieldNames, listener);
+                indexResolver.resolveAsMergedMapping(
+                    indexExpressionToResolve,
+                    listenerResult.fieldNames,
+                    requestFilter,
+                    listener.map(indexResolution -> listenerResult.withIndexResolution(indexResolution))
+                );
             }
         } else {
             try {
                 // occurs when dealing with local relations (row a = 1)
-                listener.onResponse(IndexResolution.invalid("[none specified]"));
+                listener.onResponse(
+                    new ListenerResult(
+                        IndexResolution.invalid("[none specified]"),
+                        listenerResult.lookupIndices,
+                        listenerResult.enrichResolution,
+                        listenerResult.fieldNames
+                    )
+                );
             } catch (Exception ex) {
                 listener.onFailure(ex);
             }
         }
     }
 
-    private void preAnalyzeLookupIndices(List<TableInfo> indices, Set<String> fieldNames, ActionListener<IndexResolution> listener) {
-        if (indices.size() > 1) {
-            // Note: JOINs on more than one index are not yet supported
-            listener.onFailure(new MappingException("More than one LOOKUP JOIN is not supported"));
-        } else if (indices.size() == 1) {
-            TableInfo tableInfo = indices.get(0);
-            TableIdentifier table = tableInfo.id();
-            // call the EsqlResolveFieldsAction (field-caps) to resolve indices and get field types
-            indexResolver.resolveAsMergedMapping(table.index(), fieldNames, listener);
-        } else {
-            try {
-                // No lookup indices specified
-                listener.onResponse(IndexResolution.invalid("[none specified]"));
-            } catch (Exception ex) {
-                listener.onFailure(ex);
+    private boolean analyzeCCSIndices(
+        EsqlExecutionInfo executionInfo,
+        Set<String> targetClusters,
+        Set<EnrichPolicyResolver.UnresolvedPolicy> unresolvedPolicies,
+        ListenerResult listenerResult,
+        ActionListener<LogicalPlan> logicalPlanListener,
+        ActionListener<ListenerResult> l
+    ) {
+        IndexResolution indexResolution = listenerResult.indices;
+        EsqlSessionCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, indexResolution);
+        EsqlSessionCCSUtils.updateExecutionInfoWithUnavailableClusters(executionInfo, indexResolution.unavailableClusters());
+        if (executionInfo.isCrossClusterSearch() && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) == 0) {
+            // for a CCS, if all clusters have been marked as SKIPPED, nothing to search so send a sentinel Exception
+            // to let the LogicalPlanActionListener decide how to proceed
+            logicalPlanListener.onFailure(new NoClustersToSearchException());
+            return true;
+        }
+
+        Set<String> newClusters = enrichPolicyResolver.groupIndicesPerCluster(
+            indexResolution.get().concreteIndices().toArray(String[]::new)
+        ).keySet();
+        // If new clusters appear when resolving the main indices, we need to resolve the enrich policies again
+        // or exclude main concrete indices. Since this is rare, it's simpler to resolve the enrich policies again.
+        // TODO: add a test for this
+        if (targetClusters.containsAll(newClusters) == false
+            // do not bother with a re-resolution if only remotes were requested and all were offline
+            && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) > 0) {
+            enrichPolicyResolver.resolvePolicies(
+                newClusters,
+                unresolvedPolicies,
+                l.map(enrichResolution -> listenerResult.withEnrichResolution(enrichResolution))
+            );
+            return true;
+        }
+        return false;
+    }
+
+    private static void analyzeAndMaybeRetry(
+        TriFunction<IndexResolution, IndexResolution, EnrichResolution, LogicalPlan> analyzeAction,
+        QueryBuilder requestFilter,
+        ListenerResult listenerResult,
+        ActionListener<LogicalPlan> logicalPlanListener,
+        ActionListener<ListenerResult> l
+    ) {
+        LogicalPlan plan = null;
+        var filterPresentMessage = requestFilter == null ? "without" : "with";
+        var attemptMessage = requestFilter == null ? "the only" : "first";
+        LOGGER.debug("Analyzing the plan ({} attempt, {} filter)", attemptMessage, filterPresentMessage);
+
+        try {
+            plan = analyzeAction.apply(listenerResult.indices, listenerResult.lookupIndices, listenerResult.enrichResolution);
+        } catch (Exception e) {
+            if (e instanceof VerificationException ve) {
+                LOGGER.debug(
+                    "Analyzing the plan ({} attempt, {} filter) failed with {}",
+                    attemptMessage,
+                    filterPresentMessage,
+                    ve.getDetailedMessage()
+                );
+                if (requestFilter == null) {
+                    // if the initial request didn't have a filter, then just pass the exception back to the user
+                    logicalPlanListener.onFailure(ve);
+                } else {
+                    // interested only in a VerificationException, but this time we are taking out the index filter
+                    // to try and make the index resolution work without any index filtering. In the next step... to be continued
+                    l.onResponse(listenerResult);
+                }
+            } else {
+                // if the query failed with any other type of exception, then just pass the exception back to the user
+                logicalPlanListener.onFailure(e);
             }
+            return;
         }
+        LOGGER.debug("Analyzed plan ({} attempt, {} filter):\n{}", attemptMessage, filterPresentMessage, plan);
+        // the analysis succeeded from the first attempt, irrespective if it had a filter or not, just continue with the planning
+        logicalPlanListener.onResponse(plan);
     }
 
     static Set<String> fieldNames(LogicalPlan parsed, Set<String> enrichPolicyMatchFields) {
@@ -591,4 +701,23 @@ public PhysicalPlan optimizedPhysicalPlan(LogicalPlan optimizedPlan) {
         LOGGER.debug("Optimized physical plan:\n{}", plan);
         return plan;
     }
+
+    private record ListenerResult(
+        IndexResolution indices,
+        IndexResolution lookupIndices,
+        EnrichResolution enrichResolution,
+        Set<String> fieldNames
+    ) {
+        ListenerResult withEnrichResolution(EnrichResolution newEnrichResolution) {
+            return new ListenerResult(indices(), lookupIndices(), newEnrichResolution, fieldNames());
+        }
+
+        ListenerResult withIndexResolution(IndexResolution newIndexResolution) {
+            return new ListenerResult(newIndexResolution, lookupIndices(), enrichResolution(), fieldNames());
+        }
+
+        ListenerResult withLookupIndexResolution(IndexResolution newIndexResolution) {
+            return new ListenerResult(indices(), newIndexResolution, enrichResolution(), fieldNames());
+        }
+    };
 }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java
index f61be4b59830e..d000b2765e2b1 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java
@@ -18,6 +18,7 @@
 import org.elasticsearch.common.util.Maps;
 import org.elasticsearch.index.IndexMode;
 import org.elasticsearch.index.mapper.TimeSeriesParams;
+import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.xpack.esql.action.EsqlResolveFieldsAction;
 import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -76,10 +77,15 @@ public IndexResolver(Client client) {
     /**
      * Resolves a pattern to one (potentially compound meaning that spawns multiple indices) mapping.
      */
-    public void resolveAsMergedMapping(String indexWildcard, Set<String> fieldNames, ActionListener<IndexResolution> listener) {
+    public void resolveAsMergedMapping(
+        String indexWildcard,
+        Set<String> fieldNames,
+        QueryBuilder requestFilter,
+        ActionListener<IndexResolution> listener
+    ) {
         client.execute(
             EsqlResolveFieldsAction.TYPE,
-            createFieldCapsRequest(indexWildcard, fieldNames),
+            createFieldCapsRequest(indexWildcard, fieldNames, requestFilter),
             listener.delegateFailureAndWrap((l, response) -> l.onResponse(mergedMappings(indexWildcard, response)))
         );
     }
@@ -252,10 +258,11 @@ private EsField conflictingMetricTypes(String name, String fullName, FieldCapabi
         return new InvalidMappedField(name, "mapped as different metric types in indices: " + indices);
     }
 
-    private static FieldCapabilitiesRequest createFieldCapsRequest(String index, Set<String> fieldNames) {
+    private static FieldCapabilitiesRequest createFieldCapsRequest(String index, Set<String> fieldNames, QueryBuilder requestFilter) {
         FieldCapabilitiesRequest req = new FieldCapabilitiesRequest().indices(Strings.commaDelimitedListToStringArray(index));
         req.fields(fieldNames.toArray(String[]::new));
         req.includeUnmapped(true);
+        req.indexFilter(requestFilter);
         // lenient because we throw our own errors looking at the response e.g. if something was not resolved
         // also because this way security doesn't throw authorization exceptions but rather honors ignore_unavailable
         req.indicesOptions(FIELD_CAPS_INDICES_OPTIONS);

From f2addbc69a2aa7fb512c1d5ca9a839f5fc7f5134 Mon Sep 17 00:00:00 2001
From: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com>
Date: Tue, 3 Dec 2024 20:10:30 +0200
Subject: [PATCH 27/28] Parse the contents of dynamic objects for
 [subobjects:false] (#117762)

* Parse the contents of dynamic objects for [subobjects:false]

* Update docs/changelog/117762.yaml

* add tests

* tests

* test dynamic field

* test dynamic field

* fix tests
---
 docs/changelog/117762.yaml                    |   6 +
 .../test/search/330_fetch_fields.yml          | 118 ++++++++++++++++++
 .../index/mapper/DocumentParser.java          |   7 +-
 .../index/mapper/MapperFeatures.java          |   1 +
 .../index/mapper/DocumentParserTests.java     |  63 ++++++++++
 5 files changed, 194 insertions(+), 1 deletion(-)
 create mode 100644 docs/changelog/117762.yaml

diff --git a/docs/changelog/117762.yaml b/docs/changelog/117762.yaml
new file mode 100644
index 0000000000000..123432e0f0507
--- /dev/null
+++ b/docs/changelog/117762.yaml
@@ -0,0 +1,6 @@
+pr: 117762
+summary: "Parse the contents of dynamic objects for [subobjects:false]"
+area: Mapping
+type: bug
+issues:
+ - 117544
diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml
index 8a8dffda69e20..44d966b76f34e 100644
--- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml
+++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml
@@ -1177,3 +1177,121 @@ fetch geo_point:
   - is_false: hits.hits.0.fields.message
   - match: { hits.hits.0._source.message.foo: 10 }
   - match: { hits.hits.0._source.message.foo\.bar: 20 }
+
+---
+root with subobjects false and dynamic false:
+  - requires:
+      cluster_features: mapper.fix_parsing_subobjects_false_dynamic_false
+      reason: bug fix
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            subobjects: false
+            dynamic: false
+            properties:
+              id:
+                type: integer
+              my.keyword.field:
+                type: keyword
+
+  - do:
+      bulk:
+        index: test
+        refresh: true
+        body:
+          - '{ "index": { } }'
+          - '{ "id": 1, "my": { "keyword.field": "abc" } }'
+  - match: { errors: false }
+
+  # indexing a dynamically-mapped field still fails (silently)
+  - do:
+      bulk:
+        index: test
+        refresh: true
+        body:
+          - '{ "index": { } }'
+          - '{ "id": 2, "my": { "random.field": "abc" } }'
+  - match: { errors: false }
+
+  - do:
+      search:
+        index: test
+        body:
+          sort: id
+          fields: [ "*" ]
+
+  - match: { hits.hits.0.fields: { my.keyword.field: [ abc ], id: [ 1 ] } }
+  - match: { hits.hits.1.fields: { id: [ 2 ] } }
+
+  - do:
+      search:
+        index: test
+        body:
+          query:
+            match:
+              my.keyword.field: abc
+
+  - match: { hits.total.value: 1 }
+
+---
+object with subobjects false and dynamic false:
+  - requires:
+      cluster_features: mapper.fix_parsing_subobjects_false_dynamic_false
+      reason: bug fix
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          mappings:
+            properties:
+              my:
+                subobjects: false
+                dynamic: false
+                properties:
+                  id:
+                    type: integer
+                  nested.keyword.field:
+                    type: keyword
+
+  - do:
+      bulk:
+        index: test
+        refresh: true
+        body:
+          - '{ "index": { } }'
+          - '{ "id": 1, "my": { "nested": { "keyword.field": "abc" } } }'
+  - match: { errors: false }
+
+  # indexing a dynamically-mapped field still fails (silently)
+  - do:
+      bulk:
+        index: test
+        refresh: true
+        body:
+          - '{ "index": { } }'
+          - '{ "id": 2, "my": { "nested": { "random.field": "abc" } } }'
+  - match: { errors: false }
+
+  - do:
+      search:
+        index: test
+        body:
+          sort: id
+          fields: [ "*" ]
+
+  - match: { hits.hits.0.fields: { my.nested.keyword.field: [ abc ], id: [ 1 ] } }
+  - match: { hits.hits.1.fields: { id: [ 2 ] } }
+
+  - do:
+      search:
+        index: test
+        body:
+          query:
+            match:
+              my.nested.keyword.field: abc
+
+  - match: { hits.total.value: 1 }
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
index 82004356ceb57..e00e7b2320000 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
@@ -16,6 +16,7 @@
 import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.core.Nullable;
+import org.elasticsearch.features.NodeFeature;
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.fielddata.FieldDataContext;
@@ -53,6 +54,9 @@
 public final class DocumentParser {
 
     public static final IndexVersion DYNAMICALLY_MAP_DENSE_VECTORS_INDEX_VERSION = IndexVersions.FIRST_DETACHED_INDEX_VERSION;
+    static final NodeFeature FIX_PARSING_SUBOBJECTS_FALSE_DYNAMIC_FALSE = new NodeFeature(
+        "mapper.fix_parsing_subobjects_false_dynamic_false"
+    );
 
     private final XContentParserConfiguration parserConfiguration;
     private final MappingParserContext mappingParserContext;
@@ -531,7 +535,8 @@ private static void doParseObject(DocumentParserContext context, String currentF
 
     private static void parseObjectDynamic(DocumentParserContext context, String currentFieldName) throws IOException {
         ensureNotStrict(context, currentFieldName);
-        if (context.dynamic() == ObjectMapper.Dynamic.FALSE) {
+        // For [subobjects:false], intermediate objects get flattened so we can't skip parsing children.
+        if (context.dynamic() == ObjectMapper.Dynamic.FALSE && context.parent().subobjects() != ObjectMapper.Subobjects.DISABLED) {
             failIfMatchesRoutingPath(context, currentFieldName);
             if (context.canAddIgnoredField()) {
                 context.addIgnoredField(
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java
index bf6c729f95653..ffb38d229078e 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java
@@ -73,6 +73,7 @@ public Set<NodeFeature> getTestFeatures() {
             IgnoredSourceFieldMapper.IGNORED_SOURCE_AS_TOP_LEVEL_METADATA_ARRAY_FIELD,
             IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS,
             MapperService.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT,
+            DocumentParser.FIX_PARSING_SUBOBJECTS_FALSE_DYNAMIC_FALSE,
             CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX,
             META_FETCH_FIELDS_ERROR_CODE_CHANGED
         );
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java
index 09d57d0e34c3c..d128b25038a59 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java
@@ -2053,6 +2053,38 @@ public void testSubobjectsFalseWithInnerDottedObject() throws Exception {
         assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots.max"));
     }
 
+    public void testSubobjectsFalseWithInnerDottedObjectDynamicFalse() throws Exception {
+        DocumentMapper mapper = createDocumentMapper(mapping(b -> {
+            b.startObject("metrics").field("type", "object").field("subobjects", false).field("dynamic", randomFrom("false", "runtime"));
+            b.startObject("properties").startObject("service.test.with.dots").field("type", "keyword").endObject().endObject();
+            b.endObject();
+        }));
+
+        ParsedDocument doc = mapper.parse(source("""
+            { "metrics": { "service": { "test.with.dots": "foo" }  } }"""));
+        assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots"));
+
+        doc = mapper.parse(source("""
+            { "metrics": { "service.test": { "with.dots": "foo" }  } }"""));
+        assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots"));
+
+        doc = mapper.parse(source("""
+            { "metrics": { "service": { "test": { "with.dots": "foo" }  }  } }"""));
+        assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots"));
+
+        doc = mapper.parse(source("""
+            { "metrics": { "service": { "test.other.dots": "foo" }  } }"""));
+        assertNull(doc.rootDoc().getField("metrics.service.test.other.dots"));
+
+        doc = mapper.parse(source("""
+            { "metrics": { "service.test": { "other.dots": "foo" }  } }"""));
+        assertNull(doc.rootDoc().getField("metrics.service.test.other.dots"));
+
+        doc = mapper.parse(source("""
+            { "metrics": { "service": { "test": { "other.dots": "foo" }  }  } }"""));
+        assertNull(doc.rootDoc().getField("metrics.service.test.other.dots"));
+    }
+
     public void testSubobjectsFalseRoot() throws Exception {
         DocumentMapper mapper = createDocumentMapper(mappingNoSubobjects(xContentBuilder -> {}));
         ParsedDocument doc = mapper.parse(source("""
@@ -2074,6 +2106,37 @@ public void testSubobjectsFalseRoot() throws Exception {
         assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots"));
     }
 
+    public void testSubobjectsFalseRootWithInnerDottedObjectDynamicFalse() throws Exception {
+        DocumentMapper mapper = createDocumentMapper(topMapping(b -> {
+            b.field("subobjects", false).field("dynamic", randomFrom("false", "runtime"));
+            b.startObject("properties").startObject("service.test.with.dots").field("type", "keyword").endObject().endObject();
+        }));
+
+        ParsedDocument doc = mapper.parse(source("""
+            { "service": { "test.with.dots": "foo" } }"""));
+        assertNotNull(doc.rootDoc().getField("service.test.with.dots"));
+
+        doc = mapper.parse(source("""
+            { "service.test": { "with.dots": "foo" } }"""));
+        assertNotNull(doc.rootDoc().getField("service.test.with.dots"));
+
+        doc = mapper.parse(source("""
+            { "service": { "test": { "with.dots": "foo" } } }"""));
+        assertNotNull(doc.rootDoc().getField("service.test.with.dots"));
+
+        doc = mapper.parse(source("""
+            { "service": { "test.other.dots": "foo" } }"""));
+        assertNull(doc.rootDoc().getField("service.test.other.dots"));
+
+        doc = mapper.parse(source("""
+            { "service.test": { "other.dots": "foo" } }"""));
+        assertNull(doc.rootDoc().getField("service.test.other.dots"));
+
+        doc = mapper.parse(source("""
+            { "service": { "test": { "other.dots": "foo" } } }"""));
+        assertNull(doc.rootDoc().getField("service.test.other.dots"));
+    }
+
     public void testSubobjectsFalseStructuredPath() throws Exception {
         DocumentMapper mapper = createDocumentMapper(
             mapping(b -> b.startObject("metrics.service").field("type", "object").field("subobjects", false).endObject())

From f5ff9c6bbb84805647c03a280343a531610f2138 Mon Sep 17 00:00:00 2001
From: Luca Cavanna <javanna@apache.org>
Date: Tue, 3 Dec 2024 20:20:56 +0100
Subject: [PATCH 28/28] [TEST] Adjust exception type in
 SearchServiceTests#testWaitOnRefreshTimeout (#117884)

This test has been failing due to #114526, which changed the exception type to SearchTimeoutException.

Closes #115935
---
 muted-tests.yml                                               | 3 ---
 .../java/org/elasticsearch/search/SearchServiceTests.java     | 4 ++--
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/muted-tests.yml b/muted-tests.yml
index 7bd06a6605028..7e5e7f15700f3 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -106,9 +106,6 @@ tests:
 - class: org.elasticsearch.search.StressSearchServiceReaperIT
   method: testStressReaper
   issue: https://github.com/elastic/elasticsearch/issues/115816
-- class: org.elasticsearch.search.SearchServiceTests
-  method: testWaitOnRefreshTimeout
-  issue: https://github.com/elastic/elasticsearch/issues/115935
 - class: org.elasticsearch.search.SearchServiceTests
   method: testParseSourceValidation
   issue: https://github.com/elastic/elasticsearch/issues/115936
diff --git a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java
index 5dc07a41b3f8c..d1ccfcbe78732 100644
--- a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java
+++ b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java
@@ -20,7 +20,6 @@
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.util.SetOnce;
 import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.ElasticsearchTimeoutException;
 import org.elasticsearch.TransportVersion;
 import org.elasticsearch.TransportVersions;
 import org.elasticsearch.action.ActionListener;
@@ -111,6 +110,7 @@
 import org.elasticsearch.search.query.NonCountingTermQuery;
 import org.elasticsearch.search.query.QuerySearchRequest;
 import org.elasticsearch.search.query.QuerySearchResult;
+import org.elasticsearch.search.query.SearchTimeoutException;
 import org.elasticsearch.search.rank.RankBuilder;
 import org.elasticsearch.search.rank.RankDoc;
 import org.elasticsearch.search.rank.RankShardResult;
@@ -2616,7 +2616,7 @@ public void testWaitOnRefreshTimeout() {
         );
         service.executeQueryPhase(request, task, future);
 
-        ElasticsearchTimeoutException ex = expectThrows(ElasticsearchTimeoutException.class, future::actionGet);
+        SearchTimeoutException ex = expectThrows(SearchTimeoutException.class, future::actionGet);
         assertThat(ex.getMessage(), containsString("Wait for seq_no [0] refreshed timed out ["));
     }