Skip to content

Commit

Permalink
Added support for fulltext and started work on knn queries
Browse files Browse the repository at this point in the history
  • Loading branch information
lucaro committed Jul 31, 2024
1 parent 539aa59 commit 8d0f859
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 22 deletions.
1 change: 1 addition & 0 deletions settings.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ include 'vitrivr-engine-index'
include 'vitrivr-engine-query'
include 'vitrivr-engine-server'
include 'vitrivr-engine-module-cottontaildb'
include 'vitrivr-engine-module-jsonl'
include 'vitrivr-engine-module-pgvector'
include 'vitrivr-engine-module-features'
include 'vitrivr-engine-module-m3d'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ enum class ComparisonOperator(val value: String) {
when (v1) {
is Value.String,
is Value.Text -> {
(v1.value as String).replace("\\", "\\\\").replace("*", "\\*").replace("%", "*").toRegex().matches(v2.value as String)
(v1.value as String).replace("\\", "\\\\").replace("[", "\\[").replace("]", "\\]")
.replace("*", "\\*").replace("%", "*").toRegex().matches(v2.value as String)
}

else -> false
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package org.vitrivr.engine.core.util.knn

import java.util.*

/**
* Ordered List of fixed size, used for KNN operations
*/

class FixedSizePriorityQueue<T>(private val maxSize: Int, comparator: Comparator<T>) : TreeSet<T>(comparator) {

init {
require(maxSize > 0) { "Maximum size must be greater than zero." }
}

private val elementsLeft: Int
get() = this.maxSize - this.size

override fun add(element: T): Boolean {
if (elementsLeft > 0) {
// queue isn't full => add element and decrement elementsLeft
val added = super.add(element)
return added
} else {
// there is already 1 or more elements => compare to the least
val compared = super.comparator().compare(this.last(), element)
if (compared > 0) {
// new element is larger than the least in queue => pull the least and add new one to queue
pollLast()
super.add(element)
return true
} else {
// new element is less than the least in queue => return false
return false
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ package org.vitrivr.engine.database.jsonl.model
import kotlinx.serialization.Serializable
import org.vitrivr.engine.core.model.serializer.DateSerializer
import org.vitrivr.engine.core.model.serializer.UUIDSerializer
import org.vitrivr.engine.core.model.types.Type
import org.vitrivr.engine.core.model.types.Value
import java.util.*

@Serializable
sealed class ValueContainer(val innerType: Type) { //TODO explicitly use innerType for serialization
sealed class ValueContainer {

companion object {
fun fromValue(value: Value<*>): ValueContainer = when (value) {
Expand Down Expand Up @@ -36,82 +35,82 @@ sealed class ValueContainer(val innerType: Type) { //TODO explicitly use innerTy
}

@Serializable
class BooleanValueContainer(private val value: Boolean) : ValueContainer(Type.Boolean) {
class BooleanValueContainer(private val value: Boolean) : ValueContainer() {
override fun toValue(): Value<Boolean> = Value.Boolean(value)
}

@Serializable
class ByteValueContainer(private val value: Byte) : ValueContainer(Type.Byte) {
class ByteValueContainer(private val value: Byte) : ValueContainer() {
override fun toValue(): Value<Byte> = Value.Byte(value)
}

@Serializable
class DateTimeValueContainer(@Serializable(DateSerializer::class) private val value: Date) :
ValueContainer(Type.Datetime) {
ValueContainer() {
override fun toValue(): Value<Date> = Value.DateTime(value)
}

@Serializable
class DoubleValueContainer(private val value: Double) : ValueContainer(Type.Double) {
class DoubleValueContainer(private val value: Double) : ValueContainer() {
override fun toValue(): Value<Double> = Value.Double(value)
}

@Serializable
class FloatValueContainer(private val value: Float) : ValueContainer(Type.Float) {
class FloatValueContainer(private val value: Float) : ValueContainer() {
override fun toValue(): Value<Float> = Value.Float(value)
}

@Serializable
class IntValueContainer(private val value: Int) : ValueContainer(Type.Int) {
class IntValueContainer(private val value: Int) : ValueContainer() {
override fun toValue(): Value<Int> = Value.Int(value)
}

@Serializable
class LongValueContainer(private val value: Long) : ValueContainer(Type.Long) {
class LongValueContainer(private val value: Long) : ValueContainer() {
override fun toValue(): Value<Long> = Value.Long(value)
}

@Serializable
class ShortValueContainer(private val value: Short) : ValueContainer(Type.Short) {
class ShortValueContainer(private val value: Short) : ValueContainer() {
override fun toValue(): Value<Short> = Value.Short(value)
}

@Serializable
class StringValueContainer(private val value: String) : ValueContainer(Type.String) {
class StringValueContainer(private val value: String) : ValueContainer() {
override fun toValue(): Value<String> = Value.String(value)
}

@Serializable
class TextValueContainer(private val value: String) : ValueContainer(Type.Text) {
class TextValueContainer(private val value: String) : ValueContainer() {
override fun toValue(): Value<String> = Value.Text(value)
}

@Serializable
class UuidValueContainer(@Serializable(UUIDSerializer::class) private val value: UUID) : ValueContainer(Type.UUID) {
class UuidValueContainer(@Serializable(UUIDSerializer::class) private val value: UUID) : ValueContainer() {
override fun toValue(): Value<UUID> = Value.UUIDValue(value)
}

@Serializable
class BooleanVectorValueContainer(private val value: BooleanArray) : ValueContainer(Type.BooleanVector(value.size)) {
class BooleanVectorValueContainer(private val value: BooleanArray) : ValueContainer() {
override fun toValue(): Value<BooleanArray> = Value.BooleanVector(value)
}

@Serializable
class DoubleVectorValueContainer(private val value: DoubleArray) : ValueContainer(Type.DoubleVector(value.size)) {
class DoubleVectorValueContainer(private val value: DoubleArray) : ValueContainer() {
override fun toValue(): Value<DoubleArray> = Value.DoubleVector(value)
}

@Serializable
class FloatVectorValueContainer(private val value: FloatArray) : ValueContainer(Type.FloatVector(value.size)) {
class FloatVectorValueContainer(private val value: FloatArray) : ValueContainer() {
override fun toValue(): Value<FloatArray> = Value.FloatVector(value)
}

@Serializable
class IntVectorValueContainer(private val value: IntArray) : ValueContainer(Type.IntVector(value.size)) {
class IntVectorValueContainer(private val value: IntArray) : ValueContainer() {
override fun toValue(): Value<IntArray> = Value.IntVector(value)
}

@Serializable
class LongVectorValueContainer(private val value: LongArray) : ValueContainer(Type.LongVector(value.size)) {
class LongVectorValueContainer(private val value: LongArray) : ValueContainer() {
override fun toValue(): Value<LongArray> = Value.LongVector(value)
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,14 @@ class ScalarJsonlReader(
}

private fun queryFulltext(fulltextQuery: SimpleFulltextQuery): Sequence<ScalarDescriptor<*>> {
TODO()

val queryString = fulltextQuery.value.value
val attributeName = fulltextQuery.attributeName ?: return emptySequence()

return getAll().filter { descriptor ->
(descriptor.values()[attributeName]!! as Value.String).value.contains(queryString)
}

}

private fun queryBoolean(query: SimpleBooleanQuery<*>): Sequence<ScalarDescriptor<*>> =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,14 @@ class StructJsonlReader(
}

private fun queryFulltext(fulltextQuery: SimpleFulltextQuery): Sequence<StructDescriptor> {
TODO()

val queryString = fulltextQuery.value.value
val attributeName = fulltextQuery.attributeName ?: return emptySequence()

return getAll().filter { descriptor ->
(descriptor.values()[attributeName]!! as Value.String).value.contains(queryString)
}

}

private fun queryBoolean(query: SimpleBooleanQuery<*>): Sequence<StructDescriptor> = getAll().filter { descriptor ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import org.vitrivr.engine.core.model.descriptor.vector.*
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.query.Query
import org.vitrivr.engine.core.model.query.proximity.ProximityQuery
import org.vitrivr.engine.core.model.retrievable.Retrieved
import org.vitrivr.engine.core.model.types.Value
import org.vitrivr.engine.core.util.knn.FixedSizePriorityQueue
import org.vitrivr.engine.database.jsonl.AbstractJsonlReader
import org.vitrivr.engine.database.jsonl.model.AttributeContainerList
import org.vitrivr.engine.database.jsonl.JsonlConnection
Expand Down Expand Up @@ -61,9 +63,44 @@ class VectorJsonlReader(
else -> throw UnsupportedOperationException("Query of typ ${query::class} is not supported by this reader.")
}

private fun queryProximity(query: ProximityQuery<*>): Sequence<VectorDescriptor<*>> {

private fun queryAndJoinProximity(query: ProximityQuery<*>): Sequence<Retrieved> {

val queue = knn(query)

TODO()
}

private fun queryProximity(query: ProximityQuery<*>): Sequence<VectorDescriptor<*>> = knn(query).asSequence().map { it.first }


private fun knn(query: ProximityQuery<*>): FixedSizePriorityQueue<Pair<VectorDescriptor<*>, Float>> {

val queue = FixedSizePriorityQueue(query.k.toInt(),
Comparator<Pair<VectorDescriptor<*>, Float>> { p0, p1 ->
p0.second.compareTo(p1.second) //TODO consider direction
})

getAll().forEach { descriptor ->
val dist = distance(query, descriptor.vector)
queue.add(descriptor to dist)
}

return queue

}

private fun distance(query: ProximityQuery<*>, vector: Value.Vector<*>): Float {
return when (query.value) {
is Value.FloatVector -> query.distance(query.value as Value.FloatVector, vector as Value.FloatVector)
is Value.DoubleVector -> query.distance(
query.value as Value.DoubleVector,
vector as Value.DoubleVector
).toFloat()

else -> error("Unsupported query type ${query.value::class.simpleName}")
}
}


}

0 comments on commit 8d0f859

Please sign in to comment.