From 8d0f859e97669f9521c79e015349a02f1fff7dff Mon Sep 17 00:00:00 2001 From: Luca Rossetto Date: Wed, 31 Jul 2024 09:42:47 +0200 Subject: [PATCH] Added support for fulltext and started work on knn queries --- settings.gradle | 1 + .../model/query/basics/ComparisonOperator.kt | 3 +- .../core/util/knn/FixedSizePriorityQueue.kt | 38 ++++++++++++++++++ .../database/jsonl/model/ValueContainer.kt | 35 ++++++++--------- .../jsonl/scalar/ScalarJsonlReader.kt | 9 ++++- .../jsonl/struct/StructJsonlReader.kt | 9 ++++- .../jsonl/vector/VectorJsonlReader.kt | 39 ++++++++++++++++++- 7 files changed, 112 insertions(+), 22 deletions(-) create mode 100644 vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/util/knn/FixedSizePriorityQueue.kt diff --git a/settings.gradle b/settings.gradle index 0ad7ee17..62164786 100644 --- a/settings.gradle +++ b/settings.gradle @@ -8,6 +8,7 @@ include 'vitrivr-engine-index' include 'vitrivr-engine-query' include 'vitrivr-engine-server' include 'vitrivr-engine-module-cottontaildb' +include 'vitrivr-engine-module-jsonl' include 'vitrivr-engine-module-pgvector' include 'vitrivr-engine-module-features' include 'vitrivr-engine-module-m3d' diff --git a/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/model/query/basics/ComparisonOperator.kt b/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/model/query/basics/ComparisonOperator.kt index 362236ae..498062bb 100644 --- a/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/model/query/basics/ComparisonOperator.kt +++ b/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/model/query/basics/ComparisonOperator.kt @@ -112,7 +112,8 @@ enum class ComparisonOperator(val value: String) { when (v1) { is Value.String, is Value.Text -> { - (v1.value as String).replace("\\", "\\\\").replace("*", "\\*").replace("%", "*").toRegex().matches(v2.value as String) + (v1.value as String).replace("\\", "\\\\").replace("[", "\\[").replace("]", "\\]") + .replace("*", "\\*").replace("%", "*").toRegex().matches(v2.value as String) } else -> false diff --git a/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/util/knn/FixedSizePriorityQueue.kt b/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/util/knn/FixedSizePriorityQueue.kt new file mode 100644 index 00000000..ccbd0896 --- /dev/null +++ b/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/util/knn/FixedSizePriorityQueue.kt @@ -0,0 +1,38 @@ +package org.vitrivr.engine.core.util.knn + +import java.util.* + +/** + * Ordered List of fixed size, used for KNN operations + */ + +class FixedSizePriorityQueue(private val maxSize: Int, comparator: Comparator) : TreeSet(comparator) { + + init { + require(maxSize > 0) { "Maximum size must be greater than zero." } + } + + private val elementsLeft: Int + get() = this.maxSize - this.size + + override fun add(element: T): Boolean { + if (elementsLeft > 0) { + // queue isn't full => add element and decrement elementsLeft + val added = super.add(element) + return added + } else { + // there is already 1 or more elements => compare to the least + val compared = super.comparator().compare(this.last(), element) + if (compared > 0) { + // new element is larger than the least in queue => pull the least and add new one to queue + pollLast() + super.add(element) + return true + } else { + // new element is less than the least in queue => return false + return false + } + } + } + +} \ No newline at end of file diff --git a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/model/ValueContainer.kt b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/model/ValueContainer.kt index 6e0c25d7..aa135690 100644 --- a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/model/ValueContainer.kt +++ b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/model/ValueContainer.kt @@ -3,12 +3,11 @@ package org.vitrivr.engine.database.jsonl.model import kotlinx.serialization.Serializable import org.vitrivr.engine.core.model.serializer.DateSerializer import org.vitrivr.engine.core.model.serializer.UUIDSerializer -import org.vitrivr.engine.core.model.types.Type import org.vitrivr.engine.core.model.types.Value import java.util.* @Serializable -sealed class ValueContainer(val innerType: Type) { //TODO explicitly use innerType for serialization +sealed class ValueContainer { companion object { fun fromValue(value: Value<*>): ValueContainer = when (value) { @@ -36,82 +35,82 @@ sealed class ValueContainer(val innerType: Type) { //TODO explicitly use innerTy } @Serializable -class BooleanValueContainer(private val value: Boolean) : ValueContainer(Type.Boolean) { +class BooleanValueContainer(private val value: Boolean) : ValueContainer() { override fun toValue(): Value = Value.Boolean(value) } @Serializable -class ByteValueContainer(private val value: Byte) : ValueContainer(Type.Byte) { +class ByteValueContainer(private val value: Byte) : ValueContainer() { override fun toValue(): Value = Value.Byte(value) } @Serializable class DateTimeValueContainer(@Serializable(DateSerializer::class) private val value: Date) : - ValueContainer(Type.Datetime) { + ValueContainer() { override fun toValue(): Value = Value.DateTime(value) } @Serializable -class DoubleValueContainer(private val value: Double) : ValueContainer(Type.Double) { +class DoubleValueContainer(private val value: Double) : ValueContainer() { override fun toValue(): Value = Value.Double(value) } @Serializable -class FloatValueContainer(private val value: Float) : ValueContainer(Type.Float) { +class FloatValueContainer(private val value: Float) : ValueContainer() { override fun toValue(): Value = Value.Float(value) } @Serializable -class IntValueContainer(private val value: Int) : ValueContainer(Type.Int) { +class IntValueContainer(private val value: Int) : ValueContainer() { override fun toValue(): Value = Value.Int(value) } @Serializable -class LongValueContainer(private val value: Long) : ValueContainer(Type.Long) { +class LongValueContainer(private val value: Long) : ValueContainer() { override fun toValue(): Value = Value.Long(value) } @Serializable -class ShortValueContainer(private val value: Short) : ValueContainer(Type.Short) { +class ShortValueContainer(private val value: Short) : ValueContainer() { override fun toValue(): Value = Value.Short(value) } @Serializable -class StringValueContainer(private val value: String) : ValueContainer(Type.String) { +class StringValueContainer(private val value: String) : ValueContainer() { override fun toValue(): Value = Value.String(value) } @Serializable -class TextValueContainer(private val value: String) : ValueContainer(Type.Text) { +class TextValueContainer(private val value: String) : ValueContainer() { override fun toValue(): Value = Value.Text(value) } @Serializable -class UuidValueContainer(@Serializable(UUIDSerializer::class) private val value: UUID) : ValueContainer(Type.UUID) { +class UuidValueContainer(@Serializable(UUIDSerializer::class) private val value: UUID) : ValueContainer() { override fun toValue(): Value = Value.UUIDValue(value) } @Serializable -class BooleanVectorValueContainer(private val value: BooleanArray) : ValueContainer(Type.BooleanVector(value.size)) { +class BooleanVectorValueContainer(private val value: BooleanArray) : ValueContainer() { override fun toValue(): Value = Value.BooleanVector(value) } @Serializable -class DoubleVectorValueContainer(private val value: DoubleArray) : ValueContainer(Type.DoubleVector(value.size)) { +class DoubleVectorValueContainer(private val value: DoubleArray) : ValueContainer() { override fun toValue(): Value = Value.DoubleVector(value) } @Serializable -class FloatVectorValueContainer(private val value: FloatArray) : ValueContainer(Type.FloatVector(value.size)) { +class FloatVectorValueContainer(private val value: FloatArray) : ValueContainer() { override fun toValue(): Value = Value.FloatVector(value) } @Serializable -class IntVectorValueContainer(private val value: IntArray) : ValueContainer(Type.IntVector(value.size)) { +class IntVectorValueContainer(private val value: IntArray) : ValueContainer() { override fun toValue(): Value = Value.IntVector(value) } @Serializable -class LongVectorValueContainer(private val value: LongArray) : ValueContainer(Type.LongVector(value.size)) { +class LongVectorValueContainer(private val value: LongArray) : ValueContainer() { override fun toValue(): Value = Value.LongVector(value) } \ No newline at end of file diff --git a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/scalar/ScalarJsonlReader.kt b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/scalar/ScalarJsonlReader.kt index 272096b1..35ee11bb 100644 --- a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/scalar/ScalarJsonlReader.kt +++ b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/scalar/ScalarJsonlReader.kt @@ -46,7 +46,14 @@ class ScalarJsonlReader( } private fun queryFulltext(fulltextQuery: SimpleFulltextQuery): Sequence> { - TODO() + + val queryString = fulltextQuery.value.value + val attributeName = fulltextQuery.attributeName ?: return emptySequence() + + return getAll().filter { descriptor -> + (descriptor.values()[attributeName]!! as Value.String).value.contains(queryString) + } + } private fun queryBoolean(query: SimpleBooleanQuery<*>): Sequence> = diff --git a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/struct/StructJsonlReader.kt b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/struct/StructJsonlReader.kt index 1876e5b8..9aaeba0f 100644 --- a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/struct/StructJsonlReader.kt +++ b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/struct/StructJsonlReader.kt @@ -50,7 +50,14 @@ class StructJsonlReader( } private fun queryFulltext(fulltextQuery: SimpleFulltextQuery): Sequence { - TODO() + + val queryString = fulltextQuery.value.value + val attributeName = fulltextQuery.attributeName ?: return emptySequence() + + return getAll().filter { descriptor -> + (descriptor.values()[attributeName]!! as Value.String).value.contains(queryString) + } + } private fun queryBoolean(query: SimpleBooleanQuery<*>): Sequence = getAll().filter { descriptor -> diff --git a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/vector/VectorJsonlReader.kt b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/vector/VectorJsonlReader.kt index f45ddc04..5f98c352 100644 --- a/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/vector/VectorJsonlReader.kt +++ b/vitrivr-engine-module-jsonl/src/main/kotlin/org/vitrivr/engine/database/jsonl/vector/VectorJsonlReader.kt @@ -4,7 +4,9 @@ import org.vitrivr.engine.core.model.descriptor.vector.* import org.vitrivr.engine.core.model.metamodel.Schema import org.vitrivr.engine.core.model.query.Query import org.vitrivr.engine.core.model.query.proximity.ProximityQuery +import org.vitrivr.engine.core.model.retrievable.Retrieved import org.vitrivr.engine.core.model.types.Value +import org.vitrivr.engine.core.util.knn.FixedSizePriorityQueue import org.vitrivr.engine.database.jsonl.AbstractJsonlReader import org.vitrivr.engine.database.jsonl.model.AttributeContainerList import org.vitrivr.engine.database.jsonl.JsonlConnection @@ -61,9 +63,44 @@ class VectorJsonlReader( else -> throw UnsupportedOperationException("Query of typ ${query::class} is not supported by this reader.") } - private fun queryProximity(query: ProximityQuery<*>): Sequence> { + + private fun queryAndJoinProximity(query: ProximityQuery<*>): Sequence { + + val queue = knn(query) + TODO() } + private fun queryProximity(query: ProximityQuery<*>): Sequence> = knn(query).asSequence().map { it.first } + + + private fun knn(query: ProximityQuery<*>): FixedSizePriorityQueue, Float>> { + + val queue = FixedSizePriorityQueue(query.k.toInt(), + Comparator, Float>> { p0, p1 -> + p0.second.compareTo(p1.second) //TODO consider direction + }) + + getAll().forEach { descriptor -> + val dist = distance(query, descriptor.vector) + queue.add(descriptor to dist) + } + + return queue + + } + + private fun distance(query: ProximityQuery<*>, vector: Value.Vector<*>): Float { + return when (query.value) { + is Value.FloatVector -> query.distance(query.value as Value.FloatVector, vector as Value.FloatVector) + is Value.DoubleVector -> query.distance( + query.value as Value.DoubleVector, + vector as Value.DoubleVector + ).toFloat() + + else -> error("Unsupported query type ${query.value::class.simpleName}") + } + } + } \ No newline at end of file