Skip to content

Commit

Permalink
Merge pull request #57 from vitrivr/fes-module
Browse files Browse the repository at this point in the history
Added FES integration including whisper
  • Loading branch information
faberf authored May 17, 2024
2 parents f5a14ad + 01cd236 commit 7201d34
Show file tree
Hide file tree
Showing 26 changed files with 1,592 additions and 170 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ To use it, we specifically craft a corresponding _field config_:
There are no additional parameters, unlike, for instance, an [`ExternalAnalyser`](/vitrivr-engine-plugin-features/src/main/kotlin/org/vitrivr/engine/base/features/external/ExternalAnalyser.kt),
which requires the parameter `host` with an endpoint as value.

For analysers that require a running [Feature Extraction Server](https://github.com/faberf/feature-extraction-server) (FES), the `host` parameter is required. Additionally, the `model` parameter may be used to specify a non-default model which should execute the task. Of course, this requires that the FES has the necessary plugins installed. See the [FES documentation](https://github.com/faberf/feature-extraction-server) for more information.

- For [`ASR`] the analyser will perform automatic speech recognition on the audio content.
- For [`OCR`] the analyser will perform optical character recognition on the image content.
- For [`DenseEmbedding`] the analyser will embed text / images as float vectors. Additionally, the `length` parameter is required to specify the length of the embedding.
- For [`ImageCaption`] the analyser will generate a caption for the image content. Optionally, a `prompt` parameter can be used to specify a prompt for the caption generation. For example, the prompt could have the form `"Question: What is in the image? Answer:"`.
- For [`ImageClassification`] the analyser will classify the image content. Additionally, the `classes` parameter is required, which should contain the classes to classify the image into, separated by commas. Optionally, the `top_k` and `threshold` parameters can be used to specify the number of top classes to return and the threshold for the classification.


Other fields are for (technical) metadata such as the [`FileSourceMetadata`](/vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/features/metadata/source/file/FileSourceMetadata.kt),
which additionally stores the file's path and size.

Expand Down
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ plugins {
id 'java-library'
id 'idea'
id 'org.jetbrains.kotlin.plugin.serialization' version "$version_kotlin"
id 'org.openapi.generator' version '5.2.0'
id 'org.openapi.generator' version '7.4.0'
id "de.undercouch.download" version "5.4.0"
}

Expand Down
2 changes: 1 addition & 1 deletion settings.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ include 'vitrivr-engine-server'
include 'vitrivr-engine-module-cottontaildb'
include 'vitrivr-engine-module-features'
include 'vitrivr-engine-module-m3d'

include 'vitrivr-engine-module-fes'
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package org.vitrivr.engine.core.features

import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.onCompletion
import kotlinx.coroutines.flow.onEach
import org.vitrivr.engine.core.model.content.element.ContentElement
import org.vitrivr.engine.core.model.descriptor.Descriptor
import org.vitrivr.engine.core.model.metamodel.Schema
import org.vitrivr.engine.core.model.retrievable.Retrievable
import org.vitrivr.engine.core.operators.Operator
import org.vitrivr.engine.core.operators.ingest.Extractor
import java.util.*

/**
* An abstract [Extractor] implementation that is suitable for [Extractor] implementations which extract descriptors in batches of multiple retrievables.
*
* @author Fynn Faber
* @author Ralph Gasser
* @version 1.0.0
*/
abstract class AbstractBatchedExtractor<C : ContentElement<*>, D : Descriptor>(final override val input: Operator<Retrievable>, final override val field: Schema.Field<C, D>?, private val bufferSize: Int = 100) :
Extractor<C, D> {

/**
* A default [Extractor] implementation for batched extraction. It executes the following steps:
*
* - It checks if the [Retrievable] matches the [Extractor] by calling [matches].
* - If the [Retrievable] matches, it is added to a buffer.
* - If the buffer reaches a certain size, the [Extractor] is called to extract descriptors from the buffer.
* - The descriptors are then added to the [Retrievable].
*
* @return [Flow] of [Retrievable]
*/
final override fun toFlow(scope: CoroutineScope): Flow<Retrievable> {

val batch = mutableListOf<Retrievable>()

/* Prepare and return flow. */
return this.input.toFlow(scope).onEach { retrievable ->
if (this.matches(retrievable)) {
batch.add(retrievable)
}
if (batch.size >= bufferSize) {
val descriptors = extract(batch)
// zip descriptors and batch
for (i in batch.indices) {
val r = batch[i]
for (d in descriptors[i]) {
r.addDescriptor(d)
}
}
batch.clear()
}
}.onCompletion {
/* Persist buffer if necessary. */
if (batch.isNotEmpty()) {
val descriptors = extract(batch)
// zip descriptors and batch
for (i in batch.indices) {
val r = batch[i]
for (d in descriptors[i]) {
r.addDescriptor(d)
}
}
batch.clear()
}
}
}

/**
* Internal method to check, if [Retrievable] matches this [Extractor] and should thus be processed.
*
* @param retrievable The [Retrievable] to check.
* @return True on match, false otherwise,
*/
protected abstract fun matches(retrievable: Retrievable): Boolean

/**
* Internal method to perform extraction on batch of [Retrievable].
**
* @param retrievables The list of [Retrievable] to process.
* @return List of lists of resulting [Descriptor]s, one list for each [Retrievable].
*/
protected abstract fun extract(retrievables: List<Retrievable>): List<List<D>>

}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import org.vitrivr.engine.core.model.types.Value
*/
data class LabelDescriptor(
override val id: DescriptorId,
override val retrievableId: RetrievableId,
override val retrievableId: RetrievableId?,
val label: Value.String,
val confidence: Value.Float,
override val field: Schema.Field<*, LabelDescriptor>? = null
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.vitrivr.engine.core.operators.persistence

import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flow
Expand All @@ -21,6 +22,9 @@ import org.vitrivr.engine.core.operators.Operator
*/
class PersistingSink(override val input: Operator<Retrievable>, val context: IndexContext) : Operator.Sink<Retrievable> {

/** Logger instance. */
private val logger = KotlinLogging.logger {}

/** The [RetrievableWriter] instance used by this [PersistingSink]. */
private val writer: RetrievableWriter by lazy {
this.context.schema.connection.getRetrievableWriter()
Expand Down Expand Up @@ -91,6 +95,8 @@ class PersistingSink(override val input: Operator<Retrievable>, val context: Ind
val field = descriptor.field
if (field != null) {
into.third.compute(field) { _, v -> (v ?: mutableSetOf()).apply { add(descriptor) } }
}else{
logger.debug { "Descriptor $descriptor has no field and will not be persisted." }
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package org.vitrivr.engine.core.util.extension

import org.vitrivr.engine.core.model.content.element.AudioContent
import java.nio.ByteBuffer
import java.nio.ByteOrder
import java.util.*

private fun writeWaveHeader(buffer: ByteBuffer, samplingRate: Float, channels: Short, length: Int) {
val subChunk2Length = length * channels * (16 / 8) // Assuming 16 bits per sample

// RIFF Chunk
buffer.put("RIFF".toByteArray())
buffer.putInt(36 + subChunk2Length)
buffer.put("WAVE".toByteArray())

// fmt chunk
buffer.put("fmt ".toByteArray())
buffer.putInt(16) // PCM header size
buffer.putShort(1) // Audio format 1 = PCM
buffer.putShort(channels)
buffer.putInt(samplingRate.toInt())
buffer.putInt((samplingRate * channels * (16 / 8)).toInt()) // Byte rate
buffer.putShort((channels * (16 / 8)).toShort()) // Block align
buffer.putShort(16) // Bits per sample

// data chunk
buffer.put("data".toByteArray())
buffer.putInt(subChunk2Length)
}

/**
* Converts the audio content to a data URL.
*
* @return Data URL
*/
fun AudioContent.toDataURL(): String {
val data = this.content
val buffer = ByteBuffer.allocate(44 + data.remaining() * 2).order(ByteOrder.LITTLE_ENDIAN)

// Write WAV header
writeWaveHeader(buffer, this.samplingRate.toFloat(), this.channels, data.remaining())

while (data.hasRemaining()) {
val sample = data.get()
buffer.putShort(sample)
}

val base64 = Base64.getEncoder().encodeToString(buffer.array())
return "data:audio/wav;base64,$base64"
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,9 @@ import java.nio.ShortBuffer
import java.util.*
import java.util.concurrent.TimeUnit


/**
* A [Decoder] that can decode [ImageContent] and [AudioContent] from a [Source] of [MediaType.VIDEO].
*
* @author Fynn Firouz Faber
* @author Ralph Gasser
* @version 2.0.0
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.vitrivr.engine.base.features.external.implementations.clip
package org.vitrivr.engine.base.features.external.common

import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.flow.flow
Expand All @@ -11,7 +11,7 @@ import org.vitrivr.engine.core.model.query.proximity.ProximityQuery
import org.vitrivr.engine.core.util.math.ScoringFunctions

/**
* [CLIPRetriever] implementation for external CLIP image feature retrieval.
* [DenseRetriever] implementation for proximity-based retrieval on float vector embeddings.
*
* @param field Schema field for which the retriever operates.
* @param query The query vector for proximity-based retrieval.
Expand All @@ -20,12 +20,12 @@ import org.vitrivr.engine.core.util.math.ScoringFunctions
* @see [AbstractRetriever]
* @see [ProximityQuery]
*
* @author Rahel Arnold
* @author Rahel Arnold, Fynn Faber
* @version 1.0.0
*/
class CLIPRetriever(field: Schema.Field<ContentElement<*>, FloatVectorDescriptor>, query: ProximityQuery<*>, context: QueryContext) : AbstractRetriever<ContentElement<*>, FloatVectorDescriptor>(field, query, context) {
class DenseRetriever<C : ContentElement<*>>(field: Schema.Field<C, FloatVectorDescriptor>, query: ProximityQuery<*>, context: QueryContext) : AbstractRetriever<C, FloatVectorDescriptor>(field, query, context) {
override fun toFlow(scope: CoroutineScope) = flow {
this@CLIPRetriever.reader.getAll(this@CLIPRetriever.query).forEach {
this@DenseRetriever.reader.getAll(this@DenseRetriever.query).forEach {
it.addAttribute(ScoringFunctions.max(it))
emit(it)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.vitrivr.engine.base.features.external.implementations.clip

import org.vitrivr.engine.base.features.external.ExternalAnalyser
import org.vitrivr.engine.base.features.external.implementations.dino.DINORetriever
import org.vitrivr.engine.base.features.external.common.DenseRetriever
import org.vitrivr.engine.core.context.IndexContext
import org.vitrivr.engine.core.context.QueryContext
import org.vitrivr.engine.core.model.content.element.ContentElement
Expand Down Expand Up @@ -94,11 +94,11 @@ class CLIP : ExternalAnalyser<ContentElement<*>,FloatVectorDescriptor>() {
* @return A new [Retriever] instance for this [CLIP]
* @throws [UnsupportedOperationException], if this [CLIP] does not support the creation of an [Retriever] instance.
*/
override fun newRetrieverForQuery(field: Schema.Field<ContentElement<*>, FloatVectorDescriptor>, query: Query, context: QueryContext): CLIPRetriever {
override fun newRetrieverForQuery(field: Schema.Field<ContentElement<*>, FloatVectorDescriptor>, query: Query, context: QueryContext): DenseRetriever<ContentElement<*>> {
require(field.analyser == this) { "The field '${field.fieldName}' analyser does not correspond with this analyser. This is a programmer's error!" }
require(query is ProximityQuery<*> && query.value.first() is Value.Float) { "The query is not a ProximityQuery<Value.Float>." }
@Suppress("UNCHECKED_CAST")
return CLIPRetriever(field, query as ProximityQuery<Value.Float>, context)
return DenseRetriever(field, query as ProximityQuery<Value.Float>, context)
}

/**
Expand All @@ -111,7 +111,7 @@ class CLIP : ExternalAnalyser<ContentElement<*>,FloatVectorDescriptor>() {
* @return A new [Retriever] instance for this [Analyser]
* @throws [UnsupportedOperationException], if this [Analyser] does not support the creation of an [Retriever] instance.
*/
override fun newRetrieverForDescriptors(field: Schema.Field<ContentElement<*>, FloatVectorDescriptor>, descriptors: Collection<FloatVectorDescriptor>, context: QueryContext): CLIPRetriever {
override fun newRetrieverForDescriptors(field: Schema.Field<ContentElement<*>, FloatVectorDescriptor>, descriptors: Collection<FloatVectorDescriptor>, context: QueryContext): DenseRetriever<ContentElement<*>> {
/* Prepare query parameters. */
val k = context.getProperty(field.fieldName, "limit")?.toLongOrNull() ?: 1000L
val fetchVector = context.getProperty(field.fieldName, "returnDescriptor")?.toBooleanStrictOrNull() ?: false
Expand All @@ -130,7 +130,7 @@ class CLIP : ExternalAnalyser<ContentElement<*>,FloatVectorDescriptor>() {
* @return A new [Retriever] instance for this [CLIP]
* @throws [UnsupportedOperationException], if this [CLIP] does not support the creation of an [Retriever] instance.
*/
override fun newRetrieverForContent(field: Schema.Field<ContentElement<*>, FloatVectorDescriptor>, content: Collection<ContentElement<*>>, context: QueryContext): Retriever<ContentElement<*>, FloatVectorDescriptor> {
override fun newRetrieverForContent(field: Schema.Field<ContentElement<*>, FloatVectorDescriptor>, content: Collection<ContentElement<*>>, context: QueryContext): DenseRetriever<ContentElement<*>> {
val host = field.parameters[HOST_PARAMETER_NAME] ?: HOST_PARAMETER_DEFAULT

/* Extract vectors from content. */
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.vitrivr.engine.base.features.external.implementations.dino

import org.vitrivr.engine.base.features.external.ExternalAnalyser
import org.vitrivr.engine.base.features.external.common.DenseRetriever
import org.vitrivr.engine.core.context.IndexContext
import org.vitrivr.engine.core.context.QueryContext
import org.vitrivr.engine.core.model.content.Content
Expand Down Expand Up @@ -88,11 +89,11 @@ class DINO : ExternalAnalyser<ImageContent,FloatVectorDescriptor>() {
* @return A new [Retriever] instance for this [Analyser]
* @throws [UnsupportedOperationException], if this [Analyser] does not support the creation of an [Retriever] instance.
*/
override fun newRetrieverForQuery(field: Schema.Field<ImageContent, FloatVectorDescriptor>, query: Query, context: QueryContext): DINORetriever {
override fun newRetrieverForQuery(field: Schema.Field<ImageContent, FloatVectorDescriptor>, query: Query, context: QueryContext): DenseRetriever<ImageContent> {
require(field.analyser == this) { "The field '${field.fieldName}' analyser does not correspond with this analyser. This is a programmer's error!" }
require(query is ProximityQuery<*> && query.value.first() is Value.Float) { "The query is not a ProximityQuery<Value.Float>." }
@Suppress("UNCHECKED_CAST")
return DINORetriever(field, query as ProximityQuery<Value.Float>, context)
return DenseRetriever(field, query as ProximityQuery<Value.Float>, context)
}

/**
Expand All @@ -105,7 +106,7 @@ class DINO : ExternalAnalyser<ImageContent,FloatVectorDescriptor>() {
* @return A new [Retriever] instance for this [Analyser]
* @throws [UnsupportedOperationException], if this [Analyser] does not support the creation of an [Retriever] instance.
*/
override fun newRetrieverForDescriptors(field: Schema.Field<ImageContent, FloatVectorDescriptor>, descriptors: Collection<FloatVectorDescriptor>, context: QueryContext): DINORetriever {
override fun newRetrieverForDescriptors(field: Schema.Field<ImageContent, FloatVectorDescriptor>, descriptors: Collection<FloatVectorDescriptor>, context: QueryContext): DenseRetriever<ImageContent> {
/* Prepare query parameters. */
val k = context.getProperty(field.fieldName, "limit")?.toLongOrNull() ?: 1000L
val fetchVector = context.getProperty(field.fieldName, "returnDescriptor")?.toBooleanStrictOrNull() ?: false
Expand All @@ -124,7 +125,7 @@ class DINO : ExternalAnalyser<ImageContent,FloatVectorDescriptor>() {
* @return A new [Retriever] instance for this [Analyser]
* @throws [UnsupportedOperationException], if this [Analyser] does not support the creation of an [Retriever] instance.
*/
override fun newRetrieverForContent(field: Schema.Field<ImageContent, FloatVectorDescriptor>, content: Collection<ImageContent>, context: QueryContext): DINORetriever {
override fun newRetrieverForContent(field: Schema.Field<ImageContent, FloatVectorDescriptor>, content: Collection<ImageContent>, context: QueryContext): DenseRetriever<ImageContent> {
require(field.analyser == this) { "The field '${field.fieldName}' analyser does not correspond with this analyser. This is a programmer's error!" }
val host = field.parameters[HOST_PARAMETER_NAME] ?: HOST_PARAMETER_DEFAULT

Expand Down

This file was deleted.

Loading

0 comments on commit 7201d34

Please sign in to comment.