Skip to content

Commit

Permalink
Merge pull request #50 from vitrivr/feature/boolean-retrieval
Browse files Browse the repository at this point in the history
Boolean Retrieval on Subfields
  • Loading branch information
sauterl authored Apr 9, 2024
2 parents 4a04c58 + 3b0b1ed commit dc6b5b2
Show file tree
Hide file tree
Showing 33 changed files with 1,249 additions and 116 deletions.
870 changes: 869 additions & 1 deletion README.md

Large diffs are not rendered by default.

20 changes: 1 addition & 19 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -107,27 +107,9 @@ subprojects {
}

/* Define required variables. */
def fullOAS = 'http://localhost:7070/swagger-docs'
def fullOAS = 'http://localhost:7070/openapi.json'
def oasFile = "${project.projectDir}/vitrivr-engine-server/doc/oas.json"

openApiGenerate {
generateApiTests = false
generateModelTests = false
validateSpec = false
skipValidateSpec = true

generatorName = 'typescript-angular'
inputSpec = oasFile
outputDir = file("${project.projectDir}/openapi/doc/ts").toString()
configOptions = [
npmName: '@vitrivr-engine/api',
ngVersion: '13.2.3',
snapshot: 'true',
enumPropertyNaming: 'original'
]
}


openApiGenerate {
generateApiTests = false
generateModelTests = false
Expand Down
13 changes: 13 additions & 0 deletions example-boolean-subfield-query.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"inputs":{
"booleanFileSize": {
"type":"NUMERIC",
"value":"15000",
"comparison":">"
}
},
"operations":{
"out":{"type":"RETRIEVER","field":"file.size","input":"booleanFileSize"}
},
"output":"out"
}
6 changes: 4 additions & 2 deletions gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ version_commonsmath3=3.6.1
version_cottontaildb=0.16.1
version_jackson_kotlin=2.15.2
version_javacv=1.5.9
version_javalin=5.6.3
version_javalin=6.1.3
version_javalinopenapi=6.1.3
version_javalinssl=6.1.3
version_jline=3.23.0
version_junit=5.10.1
version_junit_platform=1.10.1
Expand All @@ -19,4 +21,4 @@ version_protobuf=3.25.1
version_scrimage=4.1.1
version_slf4j=2.0.9
version_jogl=2.3.2
version_joml=1.9.25
version_joml=1.9.25
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.vitrivr.engine.core.features

import io.github.oshai.kotlinlogging.KLogger
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.onCompletion
Expand All @@ -20,9 +22,15 @@ import java.util.*
* @author Ralph Gasser
* @version 1.0.0
*/
abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor>(final override val input: Operator<Retrievable>, final override val field: Schema.Field<C, D>, final override val persisting: Boolean = true, private val bufferSize: Int = 100) :
abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor>(
final override val input: Operator<Retrievable>,
final override val field: Schema.Field<C, D>,
final override val persisting: Boolean = true,
private val bufferSize: Int = 100) :
Extractor<C, D> {

protected val logger: KLogger = KotlinLogging.logger {}

/**
* A default [Extractor] implementation. It executes the following steps:
*
Expand All @@ -33,25 +41,31 @@ abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor>(final ov
* @return [Flow] of [Retrievable]
*/
final override fun toFlow(scope: CoroutineScope): Flow<Retrievable> {
/* The [DescriptorWriter] used by this [AbstractExtractor]. */
logger.debug { "Initialising flow..." }

/** The [DescriptorWriter] used by this [AbstractExtractor]. */
val writer: DescriptorWriter<D> by lazy { this.field.getWriter() }

/* The buffer used for writing descriptors. */
/** The buffer used for writing descriptors. */
val buffer = LinkedList<D>()

/* Prepare and return flow. */
/** Prepare and return flow. */
return this.input.toFlow(scope).onEach { retrievable ->
logger.trace{"Retrievable $retrievable"}
if (this.matches(retrievable)) {
logger.debug{"Extraction for retrievable: $retrievable" }
/* Perform extraction. */
val descriptors = extract(retrievable)

logger.trace{"Extracted descriptors for retrievable ($retrievable): $descriptors"}
/* Append descriptor. */
for (d in descriptors) {
retrievable.addAttribute(DescriptorAttribute(d))
}

/* Persist descriptor. */
if (this.persisting) {
logger.debug{"Persisting descriptors for retrievable ($retrievable)"}
/* Add descriptors to buffer. */
for (d in descriptors) {
buffer.add(d)
Expand All @@ -65,6 +79,7 @@ abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor>(final ov
}
}
}.onCompletion {
logger.trace { "Completed extraction" }
/* Persist buffer if necessary. */
if (buffer.isNotEmpty()) {
writer.addAll(buffer)
Expand All @@ -88,4 +103,4 @@ abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor>(final ov
* @return List of resulting [Descriptor]s.
*/
protected abstract fun extract(retrievable: Retrievable): List<D>
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ import org.vitrivr.engine.core.operators.retrieve.Retriever
* @author Ralph Gasser
* @version 1.0.0
*/
class FileSourceMetadataRetriever(field: Schema.Field<ContentElement<*>, FileSourceMetadataDescriptor>, query: BooleanQuery, context: QueryContext) : AbstractRetriever<ContentElement<*>, FileSourceMetadataDescriptor>(field, query, context)
class FileSourceMetadataRetriever(field: Schema.Field<ContentElement<*>, FileSourceMetadataDescriptor>, query: BooleanQuery, context: QueryContext) : AbstractRetriever<ContentElement<*>, FileSourceMetadataDescriptor>(field, query, context)
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ class SchemaManager {
val schema = Schema(config.name, connection)
config.fields.map {
val analyser = loadServiceForName<Analyser<*,*>>(it.factory) ?: throw IllegalArgumentException("Failed to find a factory implementation for '${it.factory}'.")
if(it.name.contains(".")){
throw IllegalArgumentException("Field names must not have a dot (.) in their name.")
}
@Suppress("UNCHECKED_CAST")
schema.addField(it.name, analyser as Analyser<ContentElement<*>, Descriptor>, it.parameters)
}
Expand Down Expand Up @@ -121,4 +124,4 @@ class SchemaManager {
false
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,34 @@ import org.vitrivr.engine.core.model.query.bool.SimpleBooleanQuery
* Enumeration of comparison operators used by the [SimpleBooleanQuery].
*
* @author Ralph Gasser
* @version 1.0.0
* @author Loris Sauter
* @version 1.1.0
*/
enum class ComparisonOperator(val value: String) {
EQ("="),
EQ("=="),
NEQ("!="),
LE("<"),
GR(">"),
LEQ("<="),
GEQ(">=");
}

companion object{
/**
* Resolves a [ComparisonOperator] from the given [String].
*
* @param str The [String] which should be one of the [ComparisonOperator]
* @throws IllegalArgumentException In case the given string is not one of the defined ones.
*/
fun fromString(str: String):ComparisonOperator{
return when(str.trim()){
"==" -> EQ
"!=" -> NEQ
"<" -> LE
">" -> GR
"<=" -> LEQ
">=" -> GEQ
else -> throw IllegalArgumentException("Cannot parse '$str' as a comparison operator.")
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ data class SimpleBooleanQuery<T : Value<*>>(

/** The number of results that should be returned by this [SimpleBooleanQuery]. */
override val limit: Long = Long.MAX_VALUE
) : BooleanQuery
) : BooleanQuery {}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ package org.vitrivr.engine.core.model.types
* An enumeration of supported [Type]s
*
* @author Ralph Gasser
* @version 1.0.0
* @author Loris Sauter
* @version 1.1.0
*/
enum class Type {
STRING,
Expand All @@ -14,5 +15,6 @@ enum class Type {
INT,
LONG,
FLOAT,
DOUBLE
}
DOUBLE,
DATETIME
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.vitrivr.engine.core.model.types

import java.util.Date

/**
* A [Value] in vitrivr-engine maps primitive data types.
*
Expand Down Expand Up @@ -49,4 +51,7 @@ sealed interface Value<T> {

@JvmInline
value class Double(override val value: kotlin.Double) : Value<kotlin.Double>
}

@JvmInline
value class DateTime(override val value: Date) : Value<Date>
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.vitrivr.engine.core.operators.ingest

import io.github.oshai.kotlinlogging.KLogger
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.channels.BufferOverflow
import kotlinx.coroutines.channels.Channel.Factory.RENDEZVOUS
Expand Down Expand Up @@ -29,6 +31,8 @@ import java.util.concurrent.locks.StampedLock
*/
abstract class AbstractSegmenter(override val input: Operator<ContentElement<*>>, val context: IndexContext) : Segmenter {

protected val logger: KLogger = KotlinLogging.logger { }

/** The [SharedFlow] returned by this [AbstractSegmenter]'s [toFlow] method. Is created lazily. */
private var sharedFlow: SharedFlow<Retrievable>? = null

Expand All @@ -55,13 +59,16 @@ abstract class AbstractSegmenter(override val input: Operator<ContentElement<*>>
* @return A [SharedFlow]
*/
final override fun toFlow(scope: CoroutineScope): SharedFlow<Retrievable> {
logger.trace { "Initialising flow..." }
val stamp = this.lock.writeLock()
try {
if (this.sharedFlow != null) return this.sharedFlow!!
this.sharedFlow = channelFlow {
val input = this@AbstractSegmenter.input.toFlow(scope).onCompletion {
logger.trace { "Signalling end of pipeline" }
send(TerminalRetrievable)
}
logger.trace { "Segmenting $input" }
this@AbstractSegmenter.segment(input, this)
}.buffer(capacity = RENDEZVOUS, onBufferOverflow = BufferOverflow.SUSPEND).shareIn(CoroutineScope(scope.coroutineContext), SharingStarted.Lazily, 0)
return this.sharedFlow!!
Expand All @@ -85,4 +92,4 @@ abstract class AbstractSegmenter(override val input: Operator<ContentElement<*>>
* @param downstream The [ProducerScope] to hand [Retrievable] to the downstream pipeline.
*/
abstract suspend fun finish(downstream: ProducerScope<Retrievable>)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package org.vitrivr.engine.core.operators.retrieve

import org.vitrivr.engine.core.model.content.element.ContentElement
import org.vitrivr.engine.core.model.descriptor.struct.StructDescriptor
import org.vitrivr.engine.core.model.metamodel.Schema

/**
* The [StructRetrieverFactory] creates [Retriever]s for [StructDescriptor]s.
*/
interface StructRetrieverFactory {

fun <C:ContentElement<*>>newRetriever(schema: Schema, properties: Map<String,String>): Retriever<C,StructDescriptor>
}
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,6 @@ class FixedDurationSegmenter : SegmenterFactory {
private var sourceWritten = false


/** [KLogger] instance. */
private val logger: KLogger = KotlinLogging.logger {}

override suspend fun segment(upstream: Flow<ContentElement<*>>, downstream: ProducerScope<Retrievable>) {
upstream.collect { content ->
this.mutex.lock()
Expand Down Expand Up @@ -183,4 +180,4 @@ class FixedDurationSegmenter : SegmenterFactory {
this.lastStartTime = nextStartTime
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package org.vitrivr.engine.plugin.cottontaildb

import org.vitrivr.cottontail.client.language.basics.predicate.Compare
import org.vitrivr.cottontail.core.types.Types
import org.vitrivr.cottontail.core.types.VectorValue
import org.vitrivr.cottontail.core.values.*
import org.vitrivr.engine.core.model.descriptor.FieldSchema
import org.vitrivr.engine.core.model.descriptor.scalar.*
Expand Down Expand Up @@ -88,6 +87,8 @@ internal fun FieldSchema.toCottontailType(): Types<*> {
} else {
Types.Double
}

Type.DATETIME -> Types.Date
}
}

Expand Down Expand Up @@ -119,6 +120,7 @@ internal fun Value<*>.toCottontailValue(): PublicValue = when (this) {
is Value.Long -> LongValue(this.value)
is Value.Short -> ShortValue(this.value)
is Value.String -> StringValue(this.value)
is Value.DateTime -> DateValue(this.value)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ class ScalarDescriptorReader(field: Schema.Field<*, ScalarDescriptor<*>>, connec
is StringDescriptor -> StringDescriptor(retrievableId, descriptorId, tuple.asString(DESCRIPTOR_COLUMN_NAME)?.toValue() ?: throw IllegalArgumentException("The provided tuple is missing the required field '$DESCRIPTOR_COLUMN_NAME'."))
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ class StringDescriptorReader(field: Schema.Field<*, StringDescriptor>, connectio
val value = tuple.asString(DESCRIPTOR_COLUMN_NAME)?.toValue() ?: throw IllegalArgumentException("The provided tuple is missing the required field '$DESCRIPTOR_COLUMN_NAME'.")
return StringDescriptor(descriptorId, retrievableId, value)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ class StructDescriptorInitializer(field: Schema.Field<*, StructDescriptor>, conn
} else {
Types.Double
}

Type.DATETIME -> Types.Date
}
create.column(Name.ColumnName.create(field.name), type, nullable = field.nullable, primaryKey = false, autoIncrement = false)
}
Expand All @@ -80,4 +82,4 @@ class StructDescriptorInitializer(field: Schema.Field<*, StructDescriptor>, conn
logger.error(e) { "Failed to initialize entity ${this.entityName} due to exception." }
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import org.vitrivr.engine.core.model.retrievable.attributes.ScoreAttribute
import org.vitrivr.engine.core.model.types.Value
import org.vitrivr.engine.plugin.cottontaildb.*
import org.vitrivr.engine.plugin.cottontaildb.descriptors.AbstractDescriptorReader
import java.io.File
import java.util.*
import kotlin.reflect.full.primaryConstructor

Expand Down Expand Up @@ -63,10 +64,13 @@ class StructDescriptorReader(field: Schema.Field<*, StructDescriptor>, connectio

/* Execute query. */
return this.connection.client.query(cottontailQuery).asSequence().map { tuple ->
val retrievableId = tuple.asUuidValue(RETRIEVABLE_ID_COLUMN_NAME)?.value ?: throw IllegalArgumentException("The provided tuple is missing the required field '${RETRIEVABLE_ID_COLUMN_NAME}'.")
val score = tuple.asDouble(SCORE_COLUMN_NAME) ?: 0.0
val retrievableId = tuple.asUuidValue(RETRIEVABLE_ID_COLUMN_NAME)?.value
?: throw IllegalArgumentException("The provided tuple is missing the required field '${RETRIEVABLE_ID_COLUMN_NAME}'.")
val retrieved = Retrieved(retrievableId, null, false)
retrieved.addAttribute(ScoreAttribute.Unbound(score.toFloat()))
if(query is SimpleFulltextQuery){
val score = tuple.asDouble(SCORE_COLUMN_NAME) ?: 0.0
retrieved.addAttribute(ScoreAttribute.Unbound(score.toFloat()))
}
retrieved
}
}
Expand Down
Loading

0 comments on commit dc6b5b2

Please sign in to comment.