Skip to content

Commit

Permalink
Merge pull request #103 from vitrivr/feature/content_authors_everywhere
Browse files Browse the repository at this point in the history
Looks good to me. Thanks for the effort.
  • Loading branch information
ppanopticon authored Aug 30, 2024
2 parents 7c40489 + c626138 commit 5e95c9c
Show file tree
Hide file tree
Showing 36 changed files with 257 additions and 80 deletions.
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ subprojects {
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: version_junit
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: version_junit
testImplementation group: 'org.junit.platform', name: 'junit-platform-commons', version: version_junit_platform
testImplementation(group: "org.jetbrains.kotlinx", "name": "kotlinx-coroutines-test", version: version_kotlinx_coroutines)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@ package org.vitrivr.engine.core.features
import io.github.oshai.kotlinlogging.KLogger
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.flow.*
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.asFlow
import kotlinx.coroutines.flow.emitAll
import kotlinx.coroutines.flow.flow
import org.vitrivr.engine.core.model.content.element.ContentElement
import org.vitrivr.engine.core.model.descriptor.Descriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.retrievable.Retrievable
import org.vitrivr.engine.core.model.retrievable.attributes.CONTENT_AUTHORS_KEY
import org.vitrivr.engine.core.model.retrievable.attributes.ContentAuthorAttribute
import org.vitrivr.engine.core.operators.Operator
import org.vitrivr.engine.core.operators.ingest.Extractor

Expand All @@ -19,15 +24,27 @@ import org.vitrivr.engine.core.operators.ingest.Extractor
* @author Ralph Gasser
* @version 1.0.0
*/
abstract class AbstractBatchedExtractor<C : ContentElement<*>, D : Descriptor<*>>(final override val input: Operator<Retrievable>, final override val analyser: Analyser<C, D>, final override val field: Schema.Field<C, D>?, private val bufferSize: Int = 100) : Extractor<C, D> {

private val logger: KLogger = KotlinLogging.logger {}
abstract class AbstractBatchedExtractor<C : ContentElement<*>, D : Descriptor<*>>(final override val input: Operator<Retrievable>, final override val analyser: Analyser<C, D>, final override val field: Schema.Field<C, D>?, protected val parameters: Map<String, String>) : Extractor<C, D> {

companion object {
const val BATCH_SIZE_KEY = "batchSize"
}

init {
require(field == null || this.field.analyser == this.analyser) { "Field and analyser do not match! This is a programmer's error!" }
}

/** The [KLogger] instance used by this [AbstractExtractor]. */
protected val logger: KLogger = KotlinLogging.logger {}

/** The names of the content source to consider during processing. */
protected val contentSources : Set<String>?
get() = this.parameters[CONTENT_AUTHORS_KEY]?.split(",")?.toSet()

/** The buffer- and batch size. */
private val bufferSize : Int
get() = this.parameters[BATCH_SIZE_KEY]?.toIntOrNull() ?: 1

/**
* A default [Extractor] implementation for batched extraction. It executes the following steps:
*
Expand Down Expand Up @@ -101,4 +118,23 @@ abstract class AbstractBatchedExtractor<C : ContentElement<*>, D : Descriptor<*>
*/
protected abstract fun extract(retrievables: List<Retrievable>): List<List<D>>

/**
* Filters the content of a [Retrievable] based on the [ContentAuthorAttribute] and the [contentSources] parameter.
*
* @param retrievable [Retrievable] to extract content from.
*/
@Suppress("UNCHECKED_CAST")
protected fun filterContent(retrievable: Retrievable): List<C> {
val contentIds = this.contentSources?.let {
retrievable.filteredAttribute(ContentAuthorAttribute::class.java)?.getContentIds(it)
}
return retrievable.content.filter { content ->
if (this.analyser.contentClasses.none { it.isInstance(content) }) return@filter false
if (contentIds == null) {
return@filter true
} else {
return@filter contentIds.contains(content.id)
}
}.map { it as C }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,30 @@ import org.vitrivr.engine.core.model.descriptor.Descriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.retrievable.Retrievable
import org.vitrivr.engine.core.model.retrievable.attributes.CONTENT_AUTHORS_KEY
import org.vitrivr.engine.core.model.retrievable.attributes.ContentAuthorAttribute
import org.vitrivr.engine.core.operators.Operator
import org.vitrivr.engine.core.operators.ingest.Extractor

/**
* An abstract [Extractor] implementation that is suitable for most default [Extractor] implementations.
*
* @author Ralph Gasser
* @version 1.2.0
* @version 1.3.0
*/
abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor<*>>(final override val input: Operator<Retrievable>, final override val analyser: Analyser<C, D>, final override val field: Schema.Field<C, D>? = null) : Extractor<C, D> {

protected val logger: KLogger = KotlinLogging.logger {}
abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor<*>>(final override val input: Operator<Retrievable>, final override val analyser: Analyser<C, D>, final override val field: Schema.Field<C, D>? = null, protected val parameters: Map<String, String>) : Extractor<C, D> {

init {
require(field == null || this.field.analyser == this.analyser) { "Field and analyser do not match! This is a programmer's error!" }
}

/** The [KLogger] instance used by this [AbstractExtractor]. */
protected val logger: KLogger = KotlinLogging.logger {}

/** The names of the content source to consider during processing. */
protected val contentSources : Set<String>?
get() = this.parameters[CONTENT_AUTHORS_KEY]?.split(",")?.toSet()

/**
* A default [Extractor] implementation. It executes the following steps:
*
Expand All @@ -36,7 +43,7 @@ abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor<*>>(final
*
* @return [Flow] of [Retrievable]
*/
final override fun toFlow(scope: CoroutineScope): Flow<Retrievable> = this.input.toFlow(scope).onEach { retrievable ->
override fun toFlow(scope: CoroutineScope): Flow<Retrievable> = this.input.toFlow(scope).onEach { retrievable ->
if (this.matches(retrievable)) {
/* Perform extraction. */
val descriptors = try {
Expand Down Expand Up @@ -74,4 +81,24 @@ abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor<*>>(final
* @return List of resulting [Descriptor]s.
*/
protected abstract fun extract(retrievable: Retrievable): List<D>

/**
* Filters the content of a [Retrievable] based on the [ContentAuthorAttribute] and the [contentSources] parameter.
*
* @param retrievable [Retrievable] to extract content from.
*/
@Suppress("UNCHECKED_CAST")
protected fun filterContent(retrievable: Retrievable): List<C> {
val contentIds = this.contentSources?.let {
retrievable.filteredAttribute(ContentAuthorAttribute::class.java)?.getContentIds(it)
}
return retrievable.content.filter { content ->
if (this.analyser.contentClasses.none { it.isInstance(content) }) return@filter false
if (contentIds == null) {
return@filter true
} else {
return@filter contentIds.contains(content.id)
}
}.map { it as C }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import org.vitrivr.engine.core.model.content.Content
import org.vitrivr.engine.core.model.content.element.ImageContent
import org.vitrivr.engine.core.model.descriptor.vector.FloatVectorDescriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Analyser.Companion.merge
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.query.Query
import org.vitrivr.engine.core.model.query.proximity.ProximityQuery
Expand Down Expand Up @@ -51,7 +52,7 @@ class AverageColor : Analyser<ImageContent, FloatVectorDescriptor> {
* @return A new [Extractor] instance for this [Analyser]
* @throws [UnsupportedOperationException], if this [Analyser] does not support the creation of an [Extractor] instance.
*/
override fun newExtractor(field: Schema.Field<ImageContent, FloatVectorDescriptor>, input: Operator<Retrievable>, context: IndexContext) = AverageColorExtractor(input, this, field)
override fun newExtractor(field: Schema.Field<ImageContent, FloatVectorDescriptor>, input: Operator<Retrievable>, context: IndexContext) = AverageColorExtractor(input, this, field, merge(field, context))

/**
* Generates and returns a new [AverageColorExtractor] instance for this [AverageColor].
Expand All @@ -63,7 +64,7 @@ class AverageColor : Analyser<ImageContent, FloatVectorDescriptor> {
* @return A new [Extractor] instance for this [Analyser]
* @throws [UnsupportedOperationException], if this [Analyser] does not support the creation of an [Extractor] instance.
*/
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext): Extractor<ImageContent, FloatVectorDescriptor> = AverageColorExtractor(input, this, null)
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext): Extractor<ImageContent, FloatVectorDescriptor> = AverageColorExtractor(input, this, null, context.local[name] ?: emptyMap())

/**
* Generates and returns a new [DenseRetriever] instance for this [AverageColor].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import org.vitrivr.engine.core.model.descriptor.Descriptor
import org.vitrivr.engine.core.model.descriptor.vector.FloatVectorDescriptor
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.retrievable.Retrievable
import org.vitrivr.engine.core.model.retrievable.attributes.CONTENT_AUTHORS_KEY
import org.vitrivr.engine.core.operators.Operator
import org.vitrivr.engine.core.operators.ingest.Extractor
import org.vitrivr.engine.core.source.file.FileSource
Expand All @@ -20,7 +21,9 @@ import org.vitrivr.engine.core.source.file.FileSource
* @author Luca Rossetto
* @version 1.2.0
*/
class AverageColorExtractor(input: Operator<Retrievable>, analyser: AverageColor, field: Schema.Field<ImageContent, FloatVectorDescriptor>?) : AbstractExtractor<ImageContent, FloatVectorDescriptor>(input, analyser, field) {
class AverageColorExtractor(input: Operator<Retrievable>, analyser: AverageColor, field: Schema.Field<ImageContent, FloatVectorDescriptor>?, parameters : Map<String, String>) : AbstractExtractor<ImageContent, FloatVectorDescriptor>(input, analyser, field, parameters) {


/**
* Internal method to check, if [Retrievable] matches this [Extractor] and should thus be processed.
*
Expand All @@ -38,7 +41,7 @@ class AverageColorExtractor(input: Operator<Retrievable>, analyser: AverageColor
* @return List of resulting [Descriptor]s.
*/
override fun extract(retrievable: Retrievable): List<FloatVectorDescriptor> {
val content = retrievable.content.filterIsInstance<ImageContent>()
val content = this.filterContent(retrievable)
return content.map { (this.analyser as AverageColor).analyse(it).copy(retrievableId = retrievable.id, field = this.field) }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import org.vitrivr.engine.core.model.content.element.ContentElement
import org.vitrivr.engine.core.model.descriptor.Attribute
import org.vitrivr.engine.core.model.descriptor.struct.AnyMapStructDescriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Analyser.Companion.merge
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.query.Query
import org.vitrivr.engine.core.model.query.bool.SimpleBooleanQuery
Expand Down Expand Up @@ -39,7 +40,7 @@ class ExifMetadata : Analyser<ContentElement<*>, AnyMapStructDescriptor> {
*
* @return A new [Extractor] instance for this [Analyser]
*/
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext) = ExifMetadataExtractor(input, this, null)
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext) = ExifMetadataExtractor(input, this, null, context.local[name] ?: emptyMap())

/**
* Generates and returns a new [ExifMetadataExtractor] instance for this [ExifMetadata].
Expand All @@ -50,7 +51,7 @@ class ExifMetadata : Analyser<ContentElement<*>, AnyMapStructDescriptor> {
*
* @return A new [Extractor] instance for this [Analyser]
*/
override fun newExtractor(field: Schema.Field<ContentElement<*>, AnyMapStructDescriptor>, input: Operator<Retrievable>, context: IndexContext) = ExifMetadataExtractor(input, this, field)
override fun newExtractor(field: Schema.Field<ContentElement<*>, AnyMapStructDescriptor>, input: Operator<Retrievable>, context: IndexContext) = ExifMetadataExtractor(input, this, field, merge(field, context))

/**
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ private fun JsonObject.convertType(type: Type): Value<*>? {
}
}

class ExifMetadataExtractor(input: Operator<Retrievable>, analyser: ExifMetadata, field: Schema.Field<ContentElement<*>, AnyMapStructDescriptor>?) : AbstractExtractor<ContentElement<*>, AnyMapStructDescriptor>(input, analyser, field) {
class ExifMetadataExtractor(input: Operator<Retrievable>, analyser: ExifMetadata, field: Schema.Field<ContentElement<*>, AnyMapStructDescriptor>?, parameters: Map<String,String>) : AbstractExtractor<ContentElement<*>, AnyMapStructDescriptor>(input, analyser, field, parameters) {


override fun matches(retrievable: Retrievable): Boolean =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import org.vitrivr.engine.core.context.QueryContext
import org.vitrivr.engine.core.model.content.element.ContentElement
import org.vitrivr.engine.core.model.descriptor.struct.metadata.source.FileSourceMetadataDescriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Analyser.Companion.merge
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.query.Query
import org.vitrivr.engine.core.model.query.bool.SimpleBooleanQuery
Expand Down Expand Up @@ -39,7 +40,7 @@ class FileSourceMetadata : Analyser<ContentElement<*>, FileSourceMetadataDescrip
*
* @return [FileSourceMetadataExtractor]
*/
override fun newExtractor(field: Schema.Field<ContentElement<*>, FileSourceMetadataDescriptor>, input: Operator<Retrievable>, context: IndexContext) = FileSourceMetadataExtractor(input, this, field)
override fun newExtractor(field: Schema.Field<ContentElement<*>, FileSourceMetadataDescriptor>, input: Operator<Retrievable>, context: IndexContext) = FileSourceMetadataExtractor(input, this, field, merge(field, context))

/**
* Generates and returns a new [FileSourceMetadataExtractor] for the provided [Schema.Field].
Expand All @@ -50,7 +51,7 @@ class FileSourceMetadata : Analyser<ContentElement<*>, FileSourceMetadataDescrip
*
* @return [FileSourceMetadataExtractor]
*/
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext) = FileSourceMetadataExtractor(input, this, null)
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext) = FileSourceMetadataExtractor(input, this, null, context.local[name] ?: emptyMap())

/**
* Generates and returns a new [FileSourceMetadataRetriever] for the provided [Schema.Field].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import kotlin.io.path.absolutePathString
* @author Ralph Gasser
* @version 1.2.0
*/
class FileSourceMetadataExtractor(input: Operator<Retrievable>, analyser: FileSourceMetadata, field: Schema.Field<ContentElement<*>, FileSourceMetadataDescriptor>?) :
AbstractExtractor<ContentElement<*>, FileSourceMetadataDescriptor>(input, analyser, field) {
class FileSourceMetadataExtractor(input: Operator<Retrievable>, analyser: FileSourceMetadata, field: Schema.Field<ContentElement<*>, FileSourceMetadataDescriptor>?, parameters: Map<String,String>) :
AbstractExtractor<ContentElement<*>, FileSourceMetadataDescriptor>(input, analyser, field, parameters) {
/**
* Internal method to check, if [Retrievable] matches this [Extractor] and should thus be processed.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import org.vitrivr.engine.core.model.content.element.ContentElement
import org.vitrivr.engine.core.model.descriptor.struct.metadata.source.FileSourceMetadataDescriptor
import org.vitrivr.engine.core.model.descriptor.struct.metadata.source.VideoSourceMetadataDescriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Analyser.Companion.merge
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.query.Query
import org.vitrivr.engine.core.model.query.bool.BooleanQuery
Expand Down Expand Up @@ -43,7 +44,7 @@ class VideoSourceMetadata : Analyser<ContentElement<*>, VideoSourceMetadataDescr
*
* @return [FileSourceMetadataExtractor]
*/
override fun newExtractor(field: Schema.Field<ContentElement<*>, VideoSourceMetadataDescriptor>, input: Operator<Retrievable>, context: IndexContext) = VideoSourceMetadataExtractor(input, this, field)
override fun newExtractor(field: Schema.Field<ContentElement<*>, VideoSourceMetadataDescriptor>, input: Operator<Retrievable>, context: IndexContext) = VideoSourceMetadataExtractor(input, this, field, merge(field, context))

/**
* Generates and returns a new [FileSourceMetadataExtractor] for the provided [Schema.Field].
Expand All @@ -54,7 +55,7 @@ class VideoSourceMetadata : Analyser<ContentElement<*>, VideoSourceMetadataDescr
*
* @return [FileSourceMetadataExtractor]
*/
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext): Extractor<ContentElement<*>, VideoSourceMetadataDescriptor> = VideoSourceMetadataExtractor(input, this, null)
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext): Extractor<ContentElement<*>, VideoSourceMetadataDescriptor> = VideoSourceMetadataExtractor(input, this, null, context.local[name] ?: emptyMap())

/**
* Generates and returns a new [VideoSourceMetadataRetriever] for the provided [Schema.Field].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ import java.util.*
* @author Ralph Gasser
* @version 1.1.0
*/
class VideoSourceMetadataExtractor(input: Operator<Retrievable>, analyser: VideoSourceMetadata, field: Schema.Field<ContentElement<*>, VideoSourceMetadataDescriptor>?) :
AbstractExtractor<ContentElement<*>, VideoSourceMetadataDescriptor>(input, analyser, field) {
class VideoSourceMetadataExtractor(input: Operator<Retrievable>, analyser: VideoSourceMetadata, field: Schema.Field<ContentElement<*>, VideoSourceMetadataDescriptor>?, parameters: Map<String,String>) :
AbstractExtractor<ContentElement<*>, VideoSourceMetadataDescriptor>(input, analyser, field, parameters) {
/**
* Internal method to check, if [Retrievable] matches this [Extractor] and should thus be processed.
*
Expand Down
Loading

0 comments on commit 5e95c9c

Please sign in to comment.