Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bugfix: added content authors to all extractors #103

Merged
merged 9 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ subprojects {
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: version_junit
testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: version_junit
testImplementation group: 'org.junit.platform', name: 'junit-platform-commons', version: version_junit_platform
testImplementation(group: "org.jetbrains.kotlinx", "name": "kotlinx-coroutines-test", version: version_kotlinx_coroutines)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@ package org.vitrivr.engine.core.features
import io.github.oshai.kotlinlogging.KLogger
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.flow.*
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.asFlow
import kotlinx.coroutines.flow.emitAll
import kotlinx.coroutines.flow.flow
import org.vitrivr.engine.core.model.content.element.ContentElement
import org.vitrivr.engine.core.model.descriptor.Descriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.retrievable.Retrievable
import org.vitrivr.engine.core.model.retrievable.attributes.CONTENT_AUTHORS_KEY
import org.vitrivr.engine.core.model.retrievable.attributes.ContentAuthorAttribute
import org.vitrivr.engine.core.operators.Operator
import org.vitrivr.engine.core.operators.ingest.Extractor

Expand All @@ -19,15 +24,27 @@ import org.vitrivr.engine.core.operators.ingest.Extractor
* @author Ralph Gasser
* @version 1.0.0
*/
abstract class AbstractBatchedExtractor<C : ContentElement<*>, D : Descriptor<*>>(final override val input: Operator<Retrievable>, final override val analyser: Analyser<C, D>, final override val field: Schema.Field<C, D>?, private val bufferSize: Int = 100) : Extractor<C, D> {

private val logger: KLogger = KotlinLogging.logger {}
abstract class AbstractBatchedExtractor<C : ContentElement<*>, D : Descriptor<*>>(final override val input: Operator<Retrievable>, final override val analyser: Analyser<C, D>, final override val field: Schema.Field<C, D>?, protected val parameters: Map<String, String>) : Extractor<C, D> {

companion object {
const val BATCH_SIZE_KEY = "batchSize"
}

init {
require(field == null || this.field.analyser == this.analyser) { "Field and analyser do not match! This is a programmer's error!" }
}

/** The [KLogger] instance used by this [AbstractExtractor]. */
protected val logger: KLogger = KotlinLogging.logger {}

/** The names of the content source to consider during processing. */
protected val contentSources : Set<String>?
get() = this.parameters[CONTENT_AUTHORS_KEY]?.split(",")?.toSet()

/** The buffer- and batch size. */
private val bufferSize : Int
get() = this.parameters[BATCH_SIZE_KEY]?.toIntOrNull() ?: 1

/**
* A default [Extractor] implementation for batched extraction. It executes the following steps:
*
Expand Down Expand Up @@ -101,4 +118,23 @@ abstract class AbstractBatchedExtractor<C : ContentElement<*>, D : Descriptor<*>
*/
protected abstract fun extract(retrievables: List<Retrievable>): List<List<D>>

/**
* Filters the content of a [Retrievable] based on the [ContentAuthorAttribute] and the [contentSources] parameter.
*
* @param retrievable [Retrievable] to extract content from.
*/
@Suppress("UNCHECKED_CAST")
protected fun filterContent(retrievable: Retrievable): List<C> {
val contentIds = this.contentSources?.let {
retrievable.filteredAttribute(ContentAuthorAttribute::class.java)?.getContentIds(it)
}
return retrievable.content.filter { content ->
if (this.analyser.contentClasses.none { it.isInstance(content) }) return@filter false
if (contentIds == null) {
return@filter true
} else {
return@filter contentIds.contains(content.id)
}
}.map { it as C }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,30 @@ import org.vitrivr.engine.core.model.descriptor.Descriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.retrievable.Retrievable
import org.vitrivr.engine.core.model.retrievable.attributes.CONTENT_AUTHORS_KEY
import org.vitrivr.engine.core.model.retrievable.attributes.ContentAuthorAttribute
import org.vitrivr.engine.core.operators.Operator
import org.vitrivr.engine.core.operators.ingest.Extractor

/**
* An abstract [Extractor] implementation that is suitable for most default [Extractor] implementations.
*
* @author Ralph Gasser
* @version 1.2.0
* @version 1.3.0
*/
abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor<*>>(final override val input: Operator<Retrievable>, final override val analyser: Analyser<C, D>, final override val field: Schema.Field<C, D>? = null) : Extractor<C, D> {

protected val logger: KLogger = KotlinLogging.logger {}
abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor<*>>(final override val input: Operator<Retrievable>, final override val analyser: Analyser<C, D>, final override val field: Schema.Field<C, D>? = null, protected val parameters: Map<String, String>) : Extractor<C, D> {

init {
require(field == null || this.field.analyser == this.analyser) { "Field and analyser do not match! This is a programmer's error!" }
}

/** The [KLogger] instance used by this [AbstractExtractor]. */
protected val logger: KLogger = KotlinLogging.logger {}

/** The names of the content source to consider during processing. */
protected val contentSources : Set<String>?
get() = this.parameters[CONTENT_AUTHORS_KEY]?.split(",")?.toSet()

/**
* A default [Extractor] implementation. It executes the following steps:
*
Expand All @@ -36,7 +43,7 @@ abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor<*>>(final
*
* @return [Flow] of [Retrievable]
*/
final override fun toFlow(scope: CoroutineScope): Flow<Retrievable> = this.input.toFlow(scope).onEach { retrievable ->
override fun toFlow(scope: CoroutineScope): Flow<Retrievable> = this.input.toFlow(scope).onEach { retrievable ->
if (this.matches(retrievable)) {
/* Perform extraction. */
val descriptors = try {
Expand Down Expand Up @@ -74,4 +81,24 @@ abstract class AbstractExtractor<C : ContentElement<*>, D : Descriptor<*>>(final
* @return List of resulting [Descriptor]s.
*/
protected abstract fun extract(retrievable: Retrievable): List<D>

/**
* Filters the content of a [Retrievable] based on the [ContentAuthorAttribute] and the [contentSources] parameter.
*
* @param retrievable [Retrievable] to extract content from.
*/
@Suppress("UNCHECKED_CAST")
protected fun filterContent(retrievable: Retrievable): List<C> {
val contentIds = this.contentSources?.let {
retrievable.filteredAttribute(ContentAuthorAttribute::class.java)?.getContentIds(it)
}
return retrievable.content.filter { content ->
if (this.analyser.contentClasses.none { it.isInstance(content) }) return@filter false
if (contentIds == null) {
return@filter true
} else {
return@filter contentIds.contains(content.id)
}
}.map { it as C }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import org.vitrivr.engine.core.model.content.Content
import org.vitrivr.engine.core.model.content.element.ImageContent
import org.vitrivr.engine.core.model.descriptor.vector.FloatVectorDescriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Analyser.Companion.merge
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.query.Query
import org.vitrivr.engine.core.model.query.proximity.ProximityQuery
Expand Down Expand Up @@ -51,7 +52,7 @@ class AverageColor : Analyser<ImageContent, FloatVectorDescriptor> {
* @return A new [Extractor] instance for this [Analyser]
* @throws [UnsupportedOperationException], if this [Analyser] does not support the creation of an [Extractor] instance.
*/
override fun newExtractor(field: Schema.Field<ImageContent, FloatVectorDescriptor>, input: Operator<Retrievable>, context: IndexContext) = AverageColorExtractor(input, this, field)
override fun newExtractor(field: Schema.Field<ImageContent, FloatVectorDescriptor>, input: Operator<Retrievable>, context: IndexContext) = AverageColorExtractor(input, this, field, merge(field, context))

/**
* Generates and returns a new [AverageColorExtractor] instance for this [AverageColor].
Expand All @@ -63,7 +64,7 @@ class AverageColor : Analyser<ImageContent, FloatVectorDescriptor> {
* @return A new [Extractor] instance for this [Analyser]
* @throws [UnsupportedOperationException], if this [Analyser] does not support the creation of an [Extractor] instance.
*/
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext): Extractor<ImageContent, FloatVectorDescriptor> = AverageColorExtractor(input, this, null)
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext): Extractor<ImageContent, FloatVectorDescriptor> = AverageColorExtractor(input, this, null, context.local[name] ?: emptyMap())

/**
* Generates and returns a new [DenseRetriever] instance for this [AverageColor].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import org.vitrivr.engine.core.model.descriptor.Descriptor
import org.vitrivr.engine.core.model.descriptor.vector.FloatVectorDescriptor
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.retrievable.Retrievable
import org.vitrivr.engine.core.model.retrievable.attributes.CONTENT_AUTHORS_KEY
import org.vitrivr.engine.core.operators.Operator
import org.vitrivr.engine.core.operators.ingest.Extractor
import org.vitrivr.engine.core.source.file.FileSource
Expand All @@ -20,7 +21,9 @@ import org.vitrivr.engine.core.source.file.FileSource
* @author Luca Rossetto
* @version 1.2.0
*/
class AverageColorExtractor(input: Operator<Retrievable>, analyser: AverageColor, field: Schema.Field<ImageContent, FloatVectorDescriptor>?) : AbstractExtractor<ImageContent, FloatVectorDescriptor>(input, analyser, field) {
class AverageColorExtractor(input: Operator<Retrievable>, analyser: AverageColor, field: Schema.Field<ImageContent, FloatVectorDescriptor>?, parameters : Map<String, String>) : AbstractExtractor<ImageContent, FloatVectorDescriptor>(input, analyser, field, parameters) {


/**
* Internal method to check, if [Retrievable] matches this [Extractor] and should thus be processed.
*
Expand All @@ -38,7 +41,7 @@ class AverageColorExtractor(input: Operator<Retrievable>, analyser: AverageColor
* @return List of resulting [Descriptor]s.
*/
override fun extract(retrievable: Retrievable): List<FloatVectorDescriptor> {
val content = retrievable.content.filterIsInstance<ImageContent>()
val content = this.filterContent(retrievable)
return content.map { (this.analyser as AverageColor).analyse(it).copy(retrievableId = retrievable.id, field = this.field) }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import org.vitrivr.engine.core.model.content.element.ContentElement
import org.vitrivr.engine.core.model.descriptor.Attribute
import org.vitrivr.engine.core.model.descriptor.struct.AnyMapStructDescriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Analyser.Companion.merge
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.query.Query
import org.vitrivr.engine.core.model.query.bool.SimpleBooleanQuery
Expand Down Expand Up @@ -39,7 +40,7 @@ class ExifMetadata : Analyser<ContentElement<*>, AnyMapStructDescriptor> {
*
* @return A new [Extractor] instance for this [Analyser]
*/
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext) = ExifMetadataExtractor(input, this, null)
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext) = ExifMetadataExtractor(input, this, null, context.local[name] ?: emptyMap())

/**
* Generates and returns a new [ExifMetadataExtractor] instance for this [ExifMetadata].
Expand All @@ -50,7 +51,7 @@ class ExifMetadata : Analyser<ContentElement<*>, AnyMapStructDescriptor> {
*
* @return A new [Extractor] instance for this [Analyser]
*/
override fun newExtractor(field: Schema.Field<ContentElement<*>, AnyMapStructDescriptor>, input: Operator<Retrievable>, context: IndexContext) = ExifMetadataExtractor(input, this, field)
override fun newExtractor(field: Schema.Field<ContentElement<*>, AnyMapStructDescriptor>, input: Operator<Retrievable>, context: IndexContext) = ExifMetadataExtractor(input, this, field, merge(field, context))

/**
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ private fun JsonObject.convertType(type: Type): Value<*>? {
}
}

class ExifMetadataExtractor(input: Operator<Retrievable>, analyser: ExifMetadata, field: Schema.Field<ContentElement<*>, AnyMapStructDescriptor>?) : AbstractExtractor<ContentElement<*>, AnyMapStructDescriptor>(input, analyser, field) {
class ExifMetadataExtractor(input: Operator<Retrievable>, analyser: ExifMetadata, field: Schema.Field<ContentElement<*>, AnyMapStructDescriptor>?, parameters: Map<String,String>) : AbstractExtractor<ContentElement<*>, AnyMapStructDescriptor>(input, analyser, field, parameters) {


override fun matches(retrievable: Retrievable): Boolean =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import org.vitrivr.engine.core.context.QueryContext
import org.vitrivr.engine.core.model.content.element.ContentElement
import org.vitrivr.engine.core.model.descriptor.struct.metadata.source.FileSourceMetadataDescriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Analyser.Companion.merge
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.query.Query
import org.vitrivr.engine.core.model.query.bool.SimpleBooleanQuery
Expand Down Expand Up @@ -39,7 +40,7 @@ class FileSourceMetadata : Analyser<ContentElement<*>, FileSourceMetadataDescrip
*
* @return [FileSourceMetadataExtractor]
*/
override fun newExtractor(field: Schema.Field<ContentElement<*>, FileSourceMetadataDescriptor>, input: Operator<Retrievable>, context: IndexContext) = FileSourceMetadataExtractor(input, this, field)
override fun newExtractor(field: Schema.Field<ContentElement<*>, FileSourceMetadataDescriptor>, input: Operator<Retrievable>, context: IndexContext) = FileSourceMetadataExtractor(input, this, field, merge(field, context))

/**
* Generates and returns a new [FileSourceMetadataExtractor] for the provided [Schema.Field].
Expand All @@ -50,7 +51,7 @@ class FileSourceMetadata : Analyser<ContentElement<*>, FileSourceMetadataDescrip
*
* @return [FileSourceMetadataExtractor]
*/
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext) = FileSourceMetadataExtractor(input, this, null)
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext) = FileSourceMetadataExtractor(input, this, null, context.local[name] ?: emptyMap())

/**
* Generates and returns a new [FileSourceMetadataRetriever] for the provided [Schema.Field].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import kotlin.io.path.absolutePathString
* @author Ralph Gasser
* @version 1.2.0
*/
class FileSourceMetadataExtractor(input: Operator<Retrievable>, analyser: FileSourceMetadata, field: Schema.Field<ContentElement<*>, FileSourceMetadataDescriptor>?) :
AbstractExtractor<ContentElement<*>, FileSourceMetadataDescriptor>(input, analyser, field) {
class FileSourceMetadataExtractor(input: Operator<Retrievable>, analyser: FileSourceMetadata, field: Schema.Field<ContentElement<*>, FileSourceMetadataDescriptor>?, parameters: Map<String,String>) :
AbstractExtractor<ContentElement<*>, FileSourceMetadataDescriptor>(input, analyser, field, parameters) {
/**
* Internal method to check, if [Retrievable] matches this [Extractor] and should thus be processed.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import org.vitrivr.engine.core.model.content.element.ContentElement
import org.vitrivr.engine.core.model.descriptor.struct.metadata.source.FileSourceMetadataDescriptor
import org.vitrivr.engine.core.model.descriptor.struct.metadata.source.VideoSourceMetadataDescriptor
import org.vitrivr.engine.core.model.metamodel.Analyser
import org.vitrivr.engine.core.model.metamodel.Analyser.Companion.merge
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.query.Query
import org.vitrivr.engine.core.model.query.bool.BooleanQuery
Expand Down Expand Up @@ -43,7 +44,7 @@ class VideoSourceMetadata : Analyser<ContentElement<*>, VideoSourceMetadataDescr
*
* @return [FileSourceMetadataExtractor]
*/
override fun newExtractor(field: Schema.Field<ContentElement<*>, VideoSourceMetadataDescriptor>, input: Operator<Retrievable>, context: IndexContext) = VideoSourceMetadataExtractor(input, this, field)
override fun newExtractor(field: Schema.Field<ContentElement<*>, VideoSourceMetadataDescriptor>, input: Operator<Retrievable>, context: IndexContext) = VideoSourceMetadataExtractor(input, this, field, merge(field, context))

/**
* Generates and returns a new [FileSourceMetadataExtractor] for the provided [Schema.Field].
Expand All @@ -54,7 +55,7 @@ class VideoSourceMetadata : Analyser<ContentElement<*>, VideoSourceMetadataDescr
*
* @return [FileSourceMetadataExtractor]
*/
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext): Extractor<ContentElement<*>, VideoSourceMetadataDescriptor> = VideoSourceMetadataExtractor(input, this, null)
override fun newExtractor(name: String, input: Operator<Retrievable>, context: IndexContext): Extractor<ContentElement<*>, VideoSourceMetadataDescriptor> = VideoSourceMetadataExtractor(input, this, null, context.local[name] ?: emptyMap())

/**
* Generates and returns a new [VideoSourceMetadataRetriever] for the provided [Schema.Field].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ import java.util.*
* @author Ralph Gasser
* @version 1.1.0
*/
class VideoSourceMetadataExtractor(input: Operator<Retrievable>, analyser: VideoSourceMetadata, field: Schema.Field<ContentElement<*>, VideoSourceMetadataDescriptor>?) :
AbstractExtractor<ContentElement<*>, VideoSourceMetadataDescriptor>(input, analyser, field) {
class VideoSourceMetadataExtractor(input: Operator<Retrievable>, analyser: VideoSourceMetadata, field: Schema.Field<ContentElement<*>, VideoSourceMetadataDescriptor>?, parameters: Map<String,String>) :
AbstractExtractor<ContentElement<*>, VideoSourceMetadataDescriptor>(input, analyser, field, parameters) {
/**
* Internal method to check, if [Retrievable] matches this [Extractor] and should thus be processed.
*
Expand Down
Loading