Skip to content

Commit

Permalink
adds change due immutable retrievable
Browse files Browse the repository at this point in the history
  • Loading branch information
net-cscience-raphael committed Aug 19, 2024
1 parent 5fecf89 commit 3f0076f
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ import org.vitrivr.engine.core.operators.ingest.Extractor
* @author Ralph Gasser
* @version 1.0.0
*/
abstract class AbstractBatchedExtractor<C : ContentElement<*>, D : Descriptor<*>>(final override val input: Operator<Retrievable>, final override val field: Schema.Field<C, D>?, private val bufferSize: Int = 100) : Extractor<C, D> {
abstract class AbstractBatchedExtractor<C : ContentElement<*>, D : Descriptor<*>>(final override val input: Operator<Retrievable>, final override val analyser: Analyser<C, D>, final override val field: Schema.Field<C, D>?, private val bufferSize: Int = 100) : Extractor<C, D> {

private val logger: KLogger = KotlinLogging.logger {}


init {
require(field == null || this.field.analyser == this.analyser) { "Field and analyser do not match! This is a programmer's error!" }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ data class TextDescriptor(
companion object {
private val SCHEMA = listOf(Attribute(VALUE_ATTRIBUTE_NAME, Type.Text))
}


/**
* Returns the [Attribute] [List ]of this [StringDescriptor].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ abstract class FesExtractor<C : ContentElement<*>, D : Descriptor<*>>(
field: Schema.Field<C, D>?,
analyser: ExternalFesAnalyser<C, D>,
protected val parameters: Map<String, String>,
) : AbstractExtractor<C, D>(input, analyser, field) {
/** Host of the FES API. */
) : AbstractBatchedExtractor<C, D>(input, analyser, field, parameters["batchSize"]?.toIntOrNull() ?: 1) {

private val contentSources = parameters[CONTENT_AUTHORS_KEY]?.split(",")?.toSet()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class ASRExtractor(

return retrievables.map { retrievable ->
this.filterContent(retrievable).map {
flatResults[index++].also { it.retrievableId = retrievable.id }
flatResults[index++].also { TextDescriptor(it.id, retrievable.id, it.value, it.field) }
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class ImageCaptionExtractor(
if (it !is ImageContent) {
null
} else{
flatResults[index++].also { it.retrievableId = retrievable.id }
flatResults[index++].also { TextDescriptor(it.id, retrievable.id, it.value, it.field) }
}
}.filterNotNull()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import org.vitrivr.engine.core.model.content.element.ContentElement
import org.vitrivr.engine.core.model.content.element.ImageContent
import org.vitrivr.engine.core.model.content.element.TextContent
import org.vitrivr.engine.core.model.descriptor.Descriptor
import org.vitrivr.engine.core.model.descriptor.scalar.TextDescriptor
import org.vitrivr.engine.core.model.descriptor.vector.FloatVectorDescriptor
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.retrievable.Retrievable
Expand Down Expand Up @@ -65,12 +66,11 @@ class DenseEmbeddingExtractor(
return retrievables.indices.map { index ->
val descriptors = mutableListOf<FloatVectorDescriptor>()
textResultMap[index]?.let {
it.retrievableId = retrievables[index].id
descriptors.add(it)

descriptors.add(FloatVectorDescriptor(it.id, retrievables[index].id, it.vector, it.field))
}
imageResultMap[index]?.let {
it.retrievableId = retrievables[index].id
descriptors.add(it)
descriptors.add(FloatVectorDescriptor(it.id, retrievables[index].id, it.vector, it.field))
}
descriptors
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class OCRExtractor(

return retrievables.map { retrievable ->
this.filterContent(retrievable).map {
flatResults[index++].also { it.retrievableId = retrievable.id }
flatResults[index++].also { TextDescriptor(it.id, retrievable.id, it.value, it.field) }
}
}
}
Expand Down

0 comments on commit 3f0076f

Please sign in to comment.