diff --git a/docs/changelog/118804.yaml b/docs/changelog/118804.yaml
new file mode 100644
index 0000000000000..1548367a5485f
--- /dev/null
+++ b/docs/changelog/118804.yaml
@@ -0,0 +1,15 @@
+pr: 118804
+summary: Add new experimental `rank_vectors` mapping for late-interaction second order
+ ranking
+area: Vector Search
+type: feature
+issues: []
+highlight:
+ title: Add new experimental `rank_vectors` mapping for late-interaction second order
+ ranking
+ body:
+ Late-interaction models are powerful rerankers. While their size and overall
+ cost doesn't lend itself for HNSW indexing, utilizing them as second order reranking
+ can provide excellent boosts in relevance. The new `rank_vectors` mapping allows for rescoring
+ over new and novel multi-vector late-interaction models like ColBERT or ColPali.
+ notable: true
diff --git a/docs/changelog/119601.yaml b/docs/changelog/119601.yaml
new file mode 100644
index 0000000000000..3570588a5c690
--- /dev/null
+++ b/docs/changelog/119601.yaml
@@ -0,0 +1,13 @@
+pr: 119601
+summary: "[8.x] Add new experimental `rank_vectors` mapping for late-interaction second\
+ \ order ranking"
+area: Vector Search
+type: feature
+issues: []
+highlight:
+ title: "[8.x] Add new experimental `rank_vectors` mapping for late-interaction second\
+ \ order ranking"
+ body: |-
+ Backports the following commits to 8.x: - Add new experimental
+ rank_vectors mapping for late-interaction second order ranking (#118804)
+ notable: true
diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc
index babe4f508b5f0..e5155b7d4ce5b 100644
--- a/docs/reference/mapping/types.asciidoc
+++ b/docs/reference/mapping/types.asciidoc
@@ -180,6 +180,8 @@ include::types/rank-feature.asciidoc[]
include::types/rank-features.asciidoc[]
+include::types/rank-vectors.asciidoc[]
+
include::types/search-as-you-type.asciidoc[]
include::types/semantic-text.asciidoc[]
diff --git a/docs/reference/mapping/types/dense-vector.asciidoc b/docs/reference/mapping/types/dense-vector.asciidoc
index e6e11d6dd539f..b56704b896706 100644
--- a/docs/reference/mapping/types/dense-vector.asciidoc
+++ b/docs/reference/mapping/types/dense-vector.asciidoc
@@ -1,4 +1,3 @@
-[role="xpack"]
[[dense-vector]]
=== Dense vector field type
++++
diff --git a/docs/reference/mapping/types/rank-vectors.asciidoc b/docs/reference/mapping/types/rank-vectors.asciidoc
new file mode 100644
index 0000000000000..a718a5e47ec85
--- /dev/null
+++ b/docs/reference/mapping/types/rank-vectors.asciidoc
@@ -0,0 +1,201 @@
+[role="xpack"]
+[[rank-vectors]]
+=== Rank Vectors
+++++
+ Rank Vectors
+++++
+experimental::[]
+
+The `rank_vectors` field type enables late-interaction dense vector scoring in Elasticsearch. The number of vectors
+per field can vary, but they must all share the same number of dimensions and element type.
+
+The purpose of vectors stored in this field is second order ranking documents with max-sim similarity.
+
+Here is a simple example of using this field with `float` elements.
+
+[source,console]
+--------------------------------------------------
+PUT my-rank-vectors-float
+{
+ "mappings": {
+ "properties": {
+ "my_vector": {
+ "type": "rank_vectors"
+ }
+ }
+ }
+}
+
+PUT my-rank-vectors-float/_doc/1
+{
+ "my_vector" : [[0.5, 10, 6], [-0.5, 10, 10]]
+}
+
+--------------------------------------------------
+// TESTSETUP
+
+In addition to the `float` element type, `byte` and `bit` element types are also supported.
+
+Here is an example of using this field with `byte` elements.
+
+[source,console]
+--------------------------------------------------
+PUT my-rank-vectors-byte
+{
+ "mappings": {
+ "properties": {
+ "my_vector": {
+ "type": "rank_vectors",
+ "element_type": "byte"
+ }
+ }
+ }
+}
+
+PUT my-rank-vectors-byte/_doc/1
+{
+ "my_vector" : [[1, 2, 3], [4, 5, 6]]
+}
+--------------------------------------------------
+
+Here is an example of using this field with `bit` elements.
+
+[source,console]
+--------------------------------------------------
+PUT my-rank-vectors-bit
+{
+ "mappings": {
+ "properties": {
+ "my_vector": {
+ "type": "rank_vectors",
+ "element_type": "bit"
+ }
+ }
+ }
+}
+
+POST /my-rank-vectors-bit/_bulk?refresh
+{"index": {"_id" : "1"}}
+{"my_vector": [127, -127, 0, 1, 42]}
+{"index": {"_id" : "2"}}
+{"my_vector": "8100012a7f"}
+--------------------------------------------------
+
+[role="child_attributes"]
+[[rank-vectors-params]]
+==== Parameters for rank vectors fields
+
+The `rank_vectors` field type supports the following parameters:
+
+[[rank-vectors-element-type]]
+`element_type`::
+(Optional, string)
+The data type used to encode vectors. The supported data types are
+`float` (default), `byte`, and bit.
+
+.Valid values for `element_type`
+[%collapsible%open]
+====
+`float`:::
+indexes a 4-byte floating-point
+value per dimension. This is the default value.
+
+`byte`:::
+indexes a 1-byte integer value per dimension.
+
+`bit`:::
+indexes a single bit per dimension. Useful for very high-dimensional vectors or models that specifically support bit vectors.
+NOTE: when using `bit`, the number of dimensions must be a multiple of 8 and must represent the number of bits.
+
+====
+
+`dims`::
+(Optional, integer)
+Number of vector dimensions. Can't exceed `4096`. If `dims` is not specified,
+it will be set to the length of the first vector added to the field.
+
+[[rank-vectors-synthetic-source]]
+==== Synthetic `_source`
+
+IMPORTANT: Synthetic `_source` is Generally Available only for TSDB indices
+(indices that have `index.mode` set to `time_series`). For other indices
+synthetic `_source` is in technical preview. Features in technical preview may
+be changed or removed in a future release. Elastic will work to fix
+any issues, but features in technical preview are not subject to the support SLA
+of official GA features.
+
+`rank_vectors` fields support <> .
+
+[[rank-vectors-scoring]]
+==== Scoring with rank vectors
+
+Rank vectors can be accessed and used in <>.
+
+For example, the following query scores documents based on the maxSim similarity between the query vector and the vectors stored in the `my_vector` field:
+
+[source,console]
+--------------------------------------------------
+GET my-rank-vectors-float/_search
+{
+ "query": {
+ "script_score": {
+ "query": {
+ "match_all": {}
+ },
+ "script": {
+ "source": "maxSimDotProduct(params.query_vector, 'my_vector')",
+ "params": {
+ "query_vector": [[0.5, 10, 6], [-0.5, 10, 10]]
+ }
+ }
+ }
+ }
+}
+--------------------------------------------------
+
+Additionally, asymmetric similarity functions can be used to score against `bit` vectors. For example, the following query scores documents based on the maxSimDotProduct similarity between a floating point query vector and bit vectors stored in the `my_vector` field:
+
+[source,console]
+--------------------------------------------------
+PUT my-rank-vectors-bit
+{
+ "mappings": {
+ "properties": {
+ "my_vector": {
+ "type": "rank_vectors",
+ "element_type": "bit"
+ }
+ }
+ }
+}
+
+POST /my-rank-vectors-bit/_bulk?refresh
+{"index": {"_id" : "1"}}
+{"my_vector": [127, -127, 0, 1, 42]}
+{"index": {"_id" : "2"}}
+{"my_vector": "8100012a7f"}
+
+GET my-rank-vectors-bit/_search
+{
+ "query": {
+ "script_score": {
+ "query": {
+ "match_all": {}
+ },
+ "script": {
+ "source": "maxSimDotProduct(params.query_vector, 'my_vector')",
+ "params": {
+ "query_vector": [
+ [0.35, 0.77, 0.95, 0.15, 0.11, 0.08, 0.58, 0.06, 0.44, 0.52, 0.21,
+ 0.62, 0.65, 0.16, 0.64, 0.39, 0.93, 0.06, 0.93, 0.31, 0.92, 0.0,
+ 0.66, 0.86, 0.92, 0.03, 0.81, 0.31, 0.2 , 0.92, 0.95, 0.64, 0.19,
+ 0.26, 0.77, 0.64, 0.78, 0.32, 0.97, 0.84]
+ ] <1>
+ }
+ }
+ }
+ }
+}
+--------------------------------------------------
+<1> Note that the query vector has 40 elements, matching the number of bits in the bit vectors.
+
diff --git a/modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.score.txt b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.score.txt
index c08300f4351d5..fd8c81464b4a8 100644
--- a/modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.score.txt
+++ b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.score.txt
@@ -50,7 +50,5 @@ static_import {
double cosineSimilarity(org.elasticsearch.script.ScoreScript, Object, String) bound_to org.elasticsearch.script.VectorScoreScriptUtils$CosineSimilarity
double dotProduct(org.elasticsearch.script.ScoreScript, Object, String) bound_to org.elasticsearch.script.VectorScoreScriptUtils$DotProduct
double hamming(org.elasticsearch.script.ScoreScript, Object, String) bound_to org.elasticsearch.script.VectorScoreScriptUtils$Hamming
- double maxSimDotProduct(org.elasticsearch.script.ScoreScript, Object, String) bound_to org.elasticsearch.script.RankVectorsScoreScriptUtils$MaxSimDotProduct
- double maxSimInvHamming(org.elasticsearch.script.ScoreScript, Object, String) bound_to org.elasticsearch.script.RankVectorsScoreScriptUtils$MaxSimInvHamming
}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java
index b206c503a2739..4a22a53a64b41 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java
@@ -101,7 +101,7 @@
public class DenseVectorFieldMapper extends FieldMapper {
public static final String COSINE_MAGNITUDE_FIELD_SUFFIX = "._magnitude";
private static final float EPS = 1e-3f;
- static final int BBQ_MIN_DIMS = 64;
+ public static final int BBQ_MIN_DIMS = 64;
public static boolean isNotUnitVector(float magnitude) {
return Math.abs(magnitude - 1.0f) > EPS;
@@ -485,8 +485,12 @@ private VectorData parseHexEncodedVector(
}
@Override
- VectorData parseKnnVector(DocumentParserContext context, int dims, IntBooleanConsumer dimChecker, VectorSimilarity similarity)
- throws IOException {
+ public VectorData parseKnnVector(
+ DocumentParserContext context,
+ int dims,
+ IntBooleanConsumer dimChecker,
+ VectorSimilarity similarity
+ ) throws IOException {
XContentParser.Token token = context.parser().currentToken();
return switch (token) {
case START_ARRAY -> parseVectorArray(context, dims, dimChecker, similarity);
@@ -516,17 +520,17 @@ public void parseKnnVectorAndIndex(DocumentParserContext context, DenseVectorFie
}
@Override
- int getNumBytes(int dimensions) {
+ public int getNumBytes(int dimensions) {
return dimensions;
}
@Override
- ByteBuffer createByteBuffer(IndexVersion indexVersion, int numBytes) {
+ public ByteBuffer createByteBuffer(IndexVersion indexVersion, int numBytes) {
return ByteBuffer.wrap(new byte[numBytes]);
}
@Override
- int parseDimensionCount(DocumentParserContext context) throws IOException {
+ public int parseDimensionCount(DocumentParserContext context) throws IOException {
XContentParser.Token currentToken = context.parser().currentToken();
return switch (currentToken) {
case START_ARRAY -> {
@@ -690,8 +694,12 @@ && isNotUnitVector(squaredMagnitude)) {
}
@Override
- VectorData parseKnnVector(DocumentParserContext context, int dims, IntBooleanConsumer dimChecker, VectorSimilarity similarity)
- throws IOException {
+ public VectorData parseKnnVector(
+ DocumentParserContext context,
+ int dims,
+ IntBooleanConsumer dimChecker,
+ VectorSimilarity similarity
+ ) throws IOException {
int index = 0;
float squaredMagnitude = 0;
float[] vector = new float[dims];
@@ -710,12 +718,12 @@ VectorData parseKnnVector(DocumentParserContext context, int dims, IntBooleanCon
}
@Override
- int getNumBytes(int dimensions) {
+ public int getNumBytes(int dimensions) {
return dimensions * Float.BYTES;
}
@Override
- ByteBuffer createByteBuffer(IndexVersion indexVersion, int numBytes) {
+ public ByteBuffer createByteBuffer(IndexVersion indexVersion, int numBytes) {
return indexVersion.onOrAfter(LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION)
? ByteBuffer.wrap(new byte[numBytes]).order(ByteOrder.LITTLE_ENDIAN)
: ByteBuffer.wrap(new byte[numBytes]);
@@ -888,8 +896,12 @@ private VectorData parseHexEncodedVector(DocumentParserContext context, IntBoole
}
@Override
- VectorData parseKnnVector(DocumentParserContext context, int dims, IntBooleanConsumer dimChecker, VectorSimilarity similarity)
- throws IOException {
+ public VectorData parseKnnVector(
+ DocumentParserContext context,
+ int dims,
+ IntBooleanConsumer dimChecker,
+ VectorSimilarity similarity
+ ) throws IOException {
XContentParser.Token token = context.parser().currentToken();
return switch (token) {
case START_ARRAY -> parseVectorArray(context, dims, dimChecker, similarity);
@@ -919,18 +931,18 @@ public void parseKnnVectorAndIndex(DocumentParserContext context, DenseVectorFie
}
@Override
- int getNumBytes(int dimensions) {
+ public int getNumBytes(int dimensions) {
assert dimensions % Byte.SIZE == 0;
return dimensions / Byte.SIZE;
}
@Override
- ByteBuffer createByteBuffer(IndexVersion indexVersion, int numBytes) {
+ public ByteBuffer createByteBuffer(IndexVersion indexVersion, int numBytes) {
return ByteBuffer.wrap(new byte[numBytes]);
}
@Override
- int parseDimensionCount(DocumentParserContext context) throws IOException {
+ public int parseDimensionCount(DocumentParserContext context) throws IOException {
XContentParser.Token currentToken = context.parser().currentToken();
return switch (currentToken) {
case START_ARRAY -> {
@@ -973,16 +985,16 @@ public void checkDimensions(Integer dvDims, int qvDims) {
abstract void parseKnnVectorAndIndex(DocumentParserContext context, DenseVectorFieldMapper fieldMapper) throws IOException;
- abstract VectorData parseKnnVector(
+ public abstract VectorData parseKnnVector(
DocumentParserContext context,
int dims,
IntBooleanConsumer dimChecker,
VectorSimilarity similarity
) throws IOException;
- abstract int getNumBytes(int dimensions);
+ public abstract int getNumBytes(int dimensions);
- abstract ByteBuffer createByteBuffer(IndexVersion indexVersion, int numBytes);
+ public abstract ByteBuffer createByteBuffer(IndexVersion indexVersion, int numBytes);
public abstract void checkVectorBounds(float[] vector);
@@ -1000,7 +1012,7 @@ public void checkDimensions(Integer dvDims, int qvDims) {
}
}
- int parseDimensionCount(DocumentParserContext context) throws IOException {
+ public int parseDimensionCount(DocumentParserContext context) throws IOException {
int index = 0;
for (Token token = context.parser().nextToken(); token != Token.END_ARRAY; token = context.parser().nextToken()) {
index++;
@@ -1087,7 +1099,7 @@ public static ElementType fromString(String name) {
}
}
- static final Map namesToElementType = Map.of(
+ public static final Map namesToElementType = Map.of(
ElementType.BYTE.toString(),
ElementType.BYTE,
ElementType.FLOAT.toString(),
@@ -2494,9 +2506,10 @@ public String fieldName() {
}
/**
- * @FunctionalInterface for a function that takes a int and boolean
+ * Interface for a function that takes a int and boolean
*/
- interface IntBooleanConsumer {
+ @FunctionalInterface
+ public interface IntBooleanConsumer {
void accept(int value, boolean isComplete);
}
}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/RankVectorsDVLeafFieldData.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/RankVectorsDVLeafFieldData.java
deleted file mode 100644
index 0125d0249ec2b..0000000000000
--- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/RankVectorsDVLeafFieldData.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the "Elastic License
- * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
- * Public License v 1"; you may not use this file except in compliance with, at
- * your election, the "Elastic License 2.0", the "GNU Affero General Public
- * License v3.0 only", or the "Server Side Public License, v 1".
- */
-
-package org.elasticsearch.index.mapper.vectors;
-
-import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.LeafReader;
-import org.elasticsearch.index.fielddata.LeafFieldData;
-import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
-import org.elasticsearch.script.field.DocValuesScriptFieldFactory;
-import org.elasticsearch.script.field.vectors.BitRankVectorsDocValuesField;
-import org.elasticsearch.script.field.vectors.ByteRankVectorsDocValuesField;
-import org.elasticsearch.script.field.vectors.FloatRankVectorsDocValuesField;
-
-import java.io.IOException;
-
-final class RankVectorsDVLeafFieldData implements LeafFieldData {
- private final LeafReader reader;
- private final String field;
- private final DenseVectorFieldMapper.ElementType elementType;
- private final int dims;
-
- RankVectorsDVLeafFieldData(LeafReader reader, String field, DenseVectorFieldMapper.ElementType elementType, int dims) {
- this.reader = reader;
- this.field = field;
- this.elementType = elementType;
- this.dims = dims;
- }
-
- @Override
- public DocValuesScriptFieldFactory getScriptFieldFactory(String name) {
- try {
- BinaryDocValues values = DocValues.getBinary(reader, field);
- BinaryDocValues magnitudeValues = DocValues.getBinary(reader, field + RankVectorsFieldMapper.VECTOR_MAGNITUDES_SUFFIX);
- return switch (elementType) {
- case BYTE -> new ByteRankVectorsDocValuesField(values, magnitudeValues, name, elementType, dims);
- case FLOAT -> new FloatRankVectorsDocValuesField(values, magnitudeValues, name, elementType, dims);
- case BIT -> new BitRankVectorsDocValuesField(values, magnitudeValues, name, elementType, dims);
- };
- } catch (IOException e) {
- throw new IllegalStateException("Cannot load doc values for multi-vector field!", e);
- }
- }
-
- @Override
- public SortedBinaryDocValues getBytesValues() {
- throw new UnsupportedOperationException("String representation of doc values for multi-vector fields is not supported");
- }
-
- @Override
- public long ramBytesUsed() {
- return 0;
- }
-}
diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java
index 3dc25b058b1d6..09be98630d5c4 100644
--- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java
+++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java
@@ -67,7 +67,6 @@
import org.elasticsearch.index.mapper.VersionFieldMapper;
import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
-import org.elasticsearch.index.mapper.vectors.RankVectorsFieldMapper;
import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper;
import org.elasticsearch.index.seqno.RetentionLeaseBackgroundSyncAction;
import org.elasticsearch.index.seqno.RetentionLeaseSyncAction;
@@ -211,9 +210,6 @@ public static Map getMappers(List mappe
mappers.put(DenseVectorFieldMapper.CONTENT_TYPE, DenseVectorFieldMapper.PARSER);
mappers.put(SparseVectorFieldMapper.CONTENT_TYPE, SparseVectorFieldMapper.PARSER);
- if (RankVectorsFieldMapper.FEATURE_FLAG.isEnabled()) {
- mappers.put(RankVectorsFieldMapper.CONTENT_TYPE, RankVectorsFieldMapper.PARSER);
- }
for (MapperPlugin mapperPlugin : mapperPlugins) {
for (Map.Entry entry : mapperPlugin.getMappers().entrySet()) {
diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java
index 7bcdd523fd3d3..3d32c1ff6cf27 100644
--- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java
+++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java
@@ -10,7 +10,6 @@
package org.elasticsearch.rest.action.search;
import org.elasticsearch.Build;
-import org.elasticsearch.index.mapper.vectors.RankVectorsFieldMapper;
import java.util.HashSet;
import java.util.Set;
@@ -32,14 +31,8 @@ private SearchCapabilities() {}
private static final String DENSE_VECTOR_DOCVALUE_FIELDS = "dense_vector_docvalue_fields";
/** Support kql query. */
private static final String KQL_QUERY_SUPPORTED = "kql_query";
- /** Support rank-vectors field mapper. */
- private static final String RANK_VECTORS_FIELD_MAPPER = "rank_vectors_field_mapper";
/** Support propagating nested retrievers' inner_hits to top-level compound retrievers . */
private static final String NESTED_RETRIEVER_INNER_HITS_SUPPORT = "nested_retriever_inner_hits_support";
- /** Support rank-vectors script field access. */
- private static final String RANK_VECTORS_SCRIPT_ACCESS = "rank_vectors_script_access";
- /** Initial support for rank-vectors maxSim functions access. */
- private static final String RANK_VECTORS_SCRIPT_MAX_SIM = "rank_vectors_script_max_sim_with_bugfix";
/** Fixed the math in {@code moving_fn}'s {@code linearWeightedAvg}. */
private static final String MOVING_FN_RIGHT_MATH = "moving_fn_right_math";
@@ -59,11 +52,6 @@ private SearchCapabilities() {}
capabilities.add(OPTIMIZED_SCALAR_QUANTIZATION_BBQ);
capabilities.add(KNN_QUANTIZED_VECTOR_RESCORE);
capabilities.add(MOVING_FN_RIGHT_MATH);
- if (RankVectorsFieldMapper.FEATURE_FLAG.isEnabled()) {
- capabilities.add(RANK_VECTORS_FIELD_MAPPER);
- capabilities.add(RANK_VECTORS_SCRIPT_ACCESS);
- capabilities.add(RANK_VECTORS_SCRIPT_MAX_SIM);
- }
if (Build.current().isSnapshot()) {
capabilities.add(KQL_QUERY_SUPPORTED);
}
diff --git a/server/src/main/java/org/elasticsearch/script/field/vectors/ByteRankVectorsDocValuesField.java b/server/src/main/java/org/elasticsearch/script/field/vectors/ByteRankVectorsDocValuesField.java
index db81bb6ebe1cb..1bff1b50fb5ac 100644
--- a/server/src/main/java/org/elasticsearch/script/field/vectors/ByteRankVectorsDocValuesField.java
+++ b/server/src/main/java/org/elasticsearch/script/field/vectors/ByteRankVectorsDocValuesField.java
@@ -111,13 +111,13 @@ public boolean isEmpty() {
return value == null;
}
- static class ByteVectorIterator implements VectorIterator {
+ public static class ByteVectorIterator implements VectorIterator {
private final byte[] buffer;
private final BytesRef vectorValues;
private final int size;
private int idx = 0;
- ByteVectorIterator(BytesRef vectorValues, byte[] buffer, int size) {
+ public ByteVectorIterator(BytesRef vectorValues, byte[] buffer, int size) {
assert vectorValues.length == (buffer.length * size);
this.vectorValues = vectorValues;
this.size = size;
diff --git a/server/src/main/java/org/elasticsearch/script/field/vectors/FloatRankVectorsDocValuesField.java b/server/src/main/java/org/elasticsearch/script/field/vectors/FloatRankVectorsDocValuesField.java
index 39bc1e621113b..d47795a3b2401 100644
--- a/server/src/main/java/org/elasticsearch/script/field/vectors/FloatRankVectorsDocValuesField.java
+++ b/server/src/main/java/org/elasticsearch/script/field/vectors/FloatRankVectorsDocValuesField.java
@@ -110,14 +110,14 @@ private void decodeVectorIfNecessary() {
}
}
- static class FloatVectorIterator implements VectorIterator {
+ public static class FloatVectorIterator implements VectorIterator {
private final float[] buffer;
private final FloatBuffer vectorValues;
private final BytesRef vectorValueBytesRef;
private final int size;
private int idx = 0;
- FloatVectorIterator(BytesRef vectorValues, float[] buffer, int size) {
+ public FloatVectorIterator(BytesRef vectorValues, float[] buffer, int size) {
assert vectorValues.length == (buffer.length * Float.BYTES * size);
this.vectorValueBytesRef = vectorValues;
this.vectorValues = ByteBuffer.wrap(vectorValues.bytes, vectorValues.offset, vectorValues.length)
diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
index 4e7c98a8cac97..366dbe05cf7f1 100644
--- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
@@ -110,7 +110,6 @@
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
-import org.elasticsearch.index.mapper.vectors.RankVectorsFieldMapper;
import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.shard.IndexShard;
@@ -203,7 +202,6 @@ public abstract class AggregatorTestCase extends ESTestCase {
private static final List TYPE_TEST_BLACKLIST = List.of(
ObjectMapper.CONTENT_TYPE, // Cannot aggregate objects
DenseVectorFieldMapper.CONTENT_TYPE, // Cannot aggregate dense vectors
- RankVectorsFieldMapper.CONTENT_TYPE, // Cannot aggregate dense vectors
SparseVectorFieldMapper.CONTENT_TYPE, // Sparse vectors are no longer supported
NestedObjectMapper.CONTENT_TYPE, // TODO support for nested
diff --git a/x-pack/plugin/rank-vectors/build.gradle b/x-pack/plugin/rank-vectors/build.gradle
new file mode 100644
index 0000000000000..53aabb8fdbf74
--- /dev/null
+++ b/x-pack/plugin/rank-vectors/build.gradle
@@ -0,0 +1,24 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+apply plugin: 'elasticsearch.internal-es-plugin'
+apply plugin: 'elasticsearch.internal-cluster-test'
+
+esplugin {
+ name = 'rank-vectors'
+ description = 'Rank vectors in search.'
+ classname = 'org.elasticsearch.xpack.rank.vectors.RankVectorsPlugin'
+ extendedPlugins = ['x-pack-core', 'lang-painless']
+}
+
+dependencies {
+ compileOnly project(path: xpackModule('core'))
+ compileOnly(project(':modules:lang-painless:spi'))
+
+ testImplementation(testArtifact(project(xpackModule('core'))))
+ testImplementation(testArtifact(project(':server')))
+}
diff --git a/x-pack/plugin/rank-vectors/src/main/java/module-info.java b/x-pack/plugin/rank-vectors/src/main/java/module-info.java
new file mode 100644
index 0000000000000..4af3c994edd38
--- /dev/null
+++ b/x-pack/plugin/rank-vectors/src/main/java/module-info.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+module org.elasticsearch.rank.vectors {
+ requires org.elasticsearch.xcore;
+ requires org.elasticsearch.painless.spi;
+ requires org.elasticsearch.server;
+ requires org.apache.lucene.core;
+ requires org.elasticsearch.xcontent;
+
+ exports org.elasticsearch.xpack.rank.vectors;
+ exports org.elasticsearch.xpack.rank.vectors.mapper;
+ exports org.elasticsearch.xpack.rank.vectors.script;
+
+ // whitelist resource access
+ opens org.elasticsearch.xpack.rank.vectors.script to org.elasticsearch.painless.spi;
+
+ provides org.elasticsearch.painless.spi.PainlessExtension with org.elasticsearch.xpack.rank.vectors.script.RankVectorsPainlessExtension;
+ provides org.elasticsearch.features.FeatureSpecification with org.elasticsearch.xpack.rank.vectors.RankVectorsFeatures;
+
+}
diff --git a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsFeatures.java b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsFeatures.java
new file mode 100644
index 0000000000000..44b1b7a068860
--- /dev/null
+++ b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsFeatures.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.rank.vectors;
+
+import org.elasticsearch.features.FeatureSpecification;
+import org.elasticsearch.features.NodeFeature;
+
+import java.util.Set;
+
+public class RankVectorsFeatures implements FeatureSpecification {
+ public static final NodeFeature RANK_VECTORS_FEATURE = new NodeFeature("rank_vectors");
+
+ @Override
+ public Set getTestFeatures() {
+ return Set.of(RANK_VECTORS_FEATURE);
+ }
+
+}
diff --git a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java
new file mode 100644
index 0000000000000..35c87f1fc1847
--- /dev/null
+++ b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/RankVectorsPlugin.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.rank.vectors;
+
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.mapper.Mapper;
+import org.elasticsearch.license.License;
+import org.elasticsearch.license.LicenseUtils;
+import org.elasticsearch.license.LicensedFeature;
+import org.elasticsearch.license.XPackLicenseState;
+import org.elasticsearch.plugins.MapperPlugin;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.xpack.core.XPackPlugin;
+import org.elasticsearch.xpack.rank.vectors.mapper.RankVectorsFieldMapper;
+
+import java.util.Map;
+
+import static org.elasticsearch.index.mapper.FieldMapper.notInMultiFields;
+import static org.elasticsearch.xpack.rank.vectors.mapper.RankVectorsFieldMapper.CONTENT_TYPE;
+
+public class RankVectorsPlugin extends Plugin implements MapperPlugin {
+ public static final LicensedFeature.Momentary RANK_VECTORS_FEATURE = LicensedFeature.momentary(
+ null,
+ "rank-vectors",
+ License.OperationMode.ENTERPRISE
+ );
+
+ @Override
+ public Map getMappers() {
+ return Map.of(CONTENT_TYPE, new FieldMapper.TypeParser((n, c) -> {
+ if (RANK_VECTORS_FEATURE.check(getLicenseState()) == false) {
+ throw LicenseUtils.newComplianceException("Rank Vectors");
+ }
+ return new RankVectorsFieldMapper.Builder(n, c.indexVersionCreated(), getLicenseState());
+ }, notInMultiFields(CONTENT_TYPE)));
+ }
+
+ protected XPackLicenseState getLicenseState() {
+ return XPackPlugin.getSharedLicenseState();
+ }
+}
diff --git a/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsDVLeafFieldData.java b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsDVLeafFieldData.java
new file mode 100644
index 0000000000000..b858b935c1483
--- /dev/null
+++ b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsDVLeafFieldData.java
@@ -0,0 +1,158 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.rank.vectors.mapper;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.index.fielddata.FormattedDocValues;
+import org.elasticsearch.index.fielddata.LeafFieldData;
+import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
+import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
+import org.elasticsearch.script.field.DocValuesScriptFieldFactory;
+import org.elasticsearch.script.field.vectors.BitRankVectorsDocValuesField;
+import org.elasticsearch.script.field.vectors.ByteRankVectorsDocValuesField;
+import org.elasticsearch.script.field.vectors.FloatRankVectorsDocValuesField;
+import org.elasticsearch.script.field.vectors.VectorIterator;
+import org.elasticsearch.search.DocValueFormat;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+final class RankVectorsDVLeafFieldData implements LeafFieldData {
+ private final LeafReader reader;
+ private final String field;
+ private final DenseVectorFieldMapper.ElementType elementType;
+ private final int dims;
+
+ RankVectorsDVLeafFieldData(LeafReader reader, String field, DenseVectorFieldMapper.ElementType elementType, int dims) {
+ this.reader = reader;
+ this.field = field;
+ this.elementType = elementType;
+ this.dims = dims;
+ }
+
+ @Override
+ public FormattedDocValues getFormattedValues(DocValueFormat format) {
+ int dims = elementType == DenseVectorFieldMapper.ElementType.BIT ? this.dims / Byte.SIZE : this.dims;
+ return switch (elementType) {
+ case BYTE, BIT -> new FormattedDocValues() {
+ private final byte[] vector = new byte[dims];
+ private BytesRef ref = null;
+ private int numVecs = -1;
+ private final BinaryDocValues binary;
+ {
+ try {
+ binary = DocValues.getBinary(reader, field);
+ } catch (IOException e) {
+ throw new IllegalStateException("Cannot load doc values", e);
+ }
+ }
+
+ @Override
+ public boolean advanceExact(int docId) throws IOException {
+ if (binary == null || binary.advanceExact(docId) == false) {
+ return false;
+ }
+ ref = binary.binaryValue();
+ assert ref.length % dims == 0;
+ numVecs = ref.length / dims;
+ return true;
+ }
+
+ @Override
+ public int docValueCount() {
+ return 1;
+ }
+
+ public Object nextValue() {
+ // Boxed to keep from `byte[]` being transformed into a string
+ List vectors = new ArrayList<>(numVecs);
+ VectorIterator iterator = new ByteRankVectorsDocValuesField.ByteVectorIterator(ref, vector, numVecs);
+ while (iterator.hasNext()) {
+ byte[] v = iterator.next();
+ Byte[] vec = new Byte[dims];
+ for (int i = 0; i < dims; i++) {
+ vec[i] = v[i];
+ }
+ vectors.add(vec);
+ }
+ return vectors;
+ }
+ };
+ case FLOAT -> new FormattedDocValues() {
+ private final float[] vector = new float[dims];
+ private BytesRef ref = null;
+ private int numVecs = -1;
+ private final BinaryDocValues binary;
+ {
+ try {
+ binary = DocValues.getBinary(reader, field);
+ } catch (IOException e) {
+ throw new IllegalStateException("Cannot load doc values", e);
+ }
+ }
+
+ @Override
+ public boolean advanceExact(int docId) throws IOException {
+ if (binary == null || binary.advanceExact(docId) == false) {
+ return false;
+ }
+ ref = binary.binaryValue();
+ assert ref.length % (Float.BYTES * dims) == 0;
+ numVecs = ref.length / (Float.BYTES * dims);
+ return true;
+ }
+
+ @Override
+ public int docValueCount() {
+ return 1;
+ }
+
+ @Override
+ public Object nextValue() {
+ List vectors = new ArrayList<>(numVecs);
+ VectorIterator iterator = new FloatRankVectorsDocValuesField.FloatVectorIterator(ref, vector, numVecs);
+ while (iterator.hasNext()) {
+ float[] v = iterator.next();
+ vectors.add(Arrays.copyOf(v, v.length));
+ }
+ return vectors;
+ }
+ };
+ };
+ }
+
+ @Override
+ public DocValuesScriptFieldFactory getScriptFieldFactory(String name) {
+ try {
+ BinaryDocValues values = DocValues.getBinary(reader, field);
+ BinaryDocValues magnitudeValues = DocValues.getBinary(reader, field + RankVectorsFieldMapper.VECTOR_MAGNITUDES_SUFFIX);
+ return switch (elementType) {
+ case BYTE -> new ByteRankVectorsDocValuesField(values, magnitudeValues, name, elementType, dims);
+ case FLOAT -> new FloatRankVectorsDocValuesField(values, magnitudeValues, name, elementType, dims);
+ case BIT -> new BitRankVectorsDocValuesField(values, magnitudeValues, name, elementType, dims);
+ };
+ } catch (IOException e) {
+ throw new IllegalStateException("Cannot load doc values for multi-vector field!", e);
+ }
+ }
+
+ @Override
+ public SortedBinaryDocValues getBytesValues() {
+ throw new UnsupportedOperationException("String representation of doc values for multi-vector fields is not supported");
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return 0;
+ }
+}
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/RankVectorsFieldMapper.java b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java
similarity index 88%
rename from server/src/main/java/org/elasticsearch/index/mapper/vectors/RankVectorsFieldMapper.java
rename to x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java
index d57dbf79b450c..873d67e76b04a 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/RankVectorsFieldMapper.java
+++ b/x-pack/plugin/rank-vectors/src/main/java/org/elasticsearch/xpack/rank/vectors/mapper/RankVectorsFieldMapper.java
@@ -1,13 +1,11 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the "Elastic License
- * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
- * Public License v 1"; you may not use this file except in compliance with, at
- * your election, the "Elastic License 2.0", the "GNU Affero General Public
- * License v3.0 only", or the "Server Side Public License, v 1".
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
*/
-package org.elasticsearch.index.mapper.vectors;
+package org.elasticsearch.xpack.rank.vectors.mapper;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.index.BinaryDocValues;
@@ -15,7 +13,6 @@
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
-import org.elasticsearch.common.util.FeatureFlag;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.fielddata.FieldDataContext;
@@ -31,7 +28,10 @@
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.mapper.ValueFetcher;
+import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.index.query.SearchExecutionContext;
+import org.elasticsearch.license.LicenseUtils;
+import org.elasticsearch.license.XPackLicenseState;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.vectors.VectorData;
@@ -50,11 +50,11 @@
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT_BIT;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.namesToElementType;
+import static org.elasticsearch.xpack.rank.vectors.RankVectorsPlugin.RANK_VECTORS_FEATURE;
public class RankVectorsFieldMapper extends FieldMapper {
public static final String VECTOR_MAGNITUDES_SUFFIX = "._magnitude";
- public static final FeatureFlag FEATURE_FLAG = new FeatureFlag("rank_vectors");
public static final String CONTENT_TYPE = "rank_vectors";
private static RankVectorsFieldMapper toType(FieldMapper in) {
@@ -111,10 +111,12 @@ public static class Builder extends FieldMapper.Builder {
private final Parameter