-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #57 from vitrivr/fes-module
Added FES integration including whisper
- Loading branch information
Showing
26 changed files
with
1,592 additions
and
170 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
87 changes: 87 additions & 0 deletions
87
...-engine-core/src/main/kotlin/org/vitrivr/engine/core/features/AbstractBatchedExtractor.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
package org.vitrivr.engine.core.features | ||
|
||
import kotlinx.coroutines.CoroutineScope | ||
import kotlinx.coroutines.flow.Flow | ||
import kotlinx.coroutines.flow.onCompletion | ||
import kotlinx.coroutines.flow.onEach | ||
import org.vitrivr.engine.core.model.content.element.ContentElement | ||
import org.vitrivr.engine.core.model.descriptor.Descriptor | ||
import org.vitrivr.engine.core.model.metamodel.Schema | ||
import org.vitrivr.engine.core.model.retrievable.Retrievable | ||
import org.vitrivr.engine.core.operators.Operator | ||
import org.vitrivr.engine.core.operators.ingest.Extractor | ||
import java.util.* | ||
|
||
/** | ||
* An abstract [Extractor] implementation that is suitable for [Extractor] implementations which extract descriptors in batches of multiple retrievables. | ||
* | ||
* @author Fynn Faber | ||
* @author Ralph Gasser | ||
* @version 1.0.0 | ||
*/ | ||
abstract class AbstractBatchedExtractor<C : ContentElement<*>, D : Descriptor>(final override val input: Operator<Retrievable>, final override val field: Schema.Field<C, D>?, private val bufferSize: Int = 100) : | ||
Extractor<C, D> { | ||
|
||
/** | ||
* A default [Extractor] implementation for batched extraction. It executes the following steps: | ||
* | ||
* - It checks if the [Retrievable] matches the [Extractor] by calling [matches]. | ||
* - If the [Retrievable] matches, it is added to a buffer. | ||
* - If the buffer reaches a certain size, the [Extractor] is called to extract descriptors from the buffer. | ||
* - The descriptors are then added to the [Retrievable]. | ||
* | ||
* @return [Flow] of [Retrievable] | ||
*/ | ||
final override fun toFlow(scope: CoroutineScope): Flow<Retrievable> { | ||
|
||
val batch = mutableListOf<Retrievable>() | ||
|
||
/* Prepare and return flow. */ | ||
return this.input.toFlow(scope).onEach { retrievable -> | ||
if (this.matches(retrievable)) { | ||
batch.add(retrievable) | ||
} | ||
if (batch.size >= bufferSize) { | ||
val descriptors = extract(batch) | ||
// zip descriptors and batch | ||
for (i in batch.indices) { | ||
val r = batch[i] | ||
for (d in descriptors[i]) { | ||
r.addDescriptor(d) | ||
} | ||
} | ||
batch.clear() | ||
} | ||
}.onCompletion { | ||
/* Persist buffer if necessary. */ | ||
if (batch.isNotEmpty()) { | ||
val descriptors = extract(batch) | ||
// zip descriptors and batch | ||
for (i in batch.indices) { | ||
val r = batch[i] | ||
for (d in descriptors[i]) { | ||
r.addDescriptor(d) | ||
} | ||
} | ||
batch.clear() | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Internal method to check, if [Retrievable] matches this [Extractor] and should thus be processed. | ||
* | ||
* @param retrievable The [Retrievable] to check. | ||
* @return True on match, false otherwise, | ||
*/ | ||
protected abstract fun matches(retrievable: Retrievable): Boolean | ||
|
||
/** | ||
* Internal method to perform extraction on batch of [Retrievable]. | ||
** | ||
* @param retrievables The list of [Retrievable] to process. | ||
* @return List of lists of resulting [Descriptor]s, one list for each [Retrievable]. | ||
*/ | ||
protected abstract fun extract(retrievables: List<Retrievable>): List<List<D>> | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
...ine-core/src/main/kotlin/org/vitrivr/engine/core/util/extension/AudioContentExtensions.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package org.vitrivr.engine.core.util.extension | ||
|
||
import org.vitrivr.engine.core.model.content.element.AudioContent | ||
import java.nio.ByteBuffer | ||
import java.nio.ByteOrder | ||
import java.util.* | ||
|
||
private fun writeWaveHeader(buffer: ByteBuffer, samplingRate: Float, channels: Short, length: Int) { | ||
val subChunk2Length = length * channels * (16 / 8) // Assuming 16 bits per sample | ||
|
||
// RIFF Chunk | ||
buffer.put("RIFF".toByteArray()) | ||
buffer.putInt(36 + subChunk2Length) | ||
buffer.put("WAVE".toByteArray()) | ||
|
||
// fmt chunk | ||
buffer.put("fmt ".toByteArray()) | ||
buffer.putInt(16) // PCM header size | ||
buffer.putShort(1) // Audio format 1 = PCM | ||
buffer.putShort(channels) | ||
buffer.putInt(samplingRate.toInt()) | ||
buffer.putInt((samplingRate * channels * (16 / 8)).toInt()) // Byte rate | ||
buffer.putShort((channels * (16 / 8)).toShort()) // Block align | ||
buffer.putShort(16) // Bits per sample | ||
|
||
// data chunk | ||
buffer.put("data".toByteArray()) | ||
buffer.putInt(subChunk2Length) | ||
} | ||
|
||
/** | ||
* Converts the audio content to a data URL. | ||
* | ||
* @return Data URL | ||
*/ | ||
fun AudioContent.toDataURL(): String { | ||
val data = this.content | ||
val buffer = ByteBuffer.allocate(44 + data.remaining() * 2).order(ByteOrder.LITTLE_ENDIAN) | ||
|
||
// Write WAV header | ||
writeWaveHeader(buffer, this.samplingRate.toFloat(), this.channels, data.remaining()) | ||
|
||
while (data.hasRemaining()) { | ||
val sample = data.get() | ||
buffer.putShort(sample) | ||
} | ||
|
||
val base64 = Base64.getEncoder().encodeToString(buffer.array()) | ||
return "data:audio/wav;base64,$base64" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
32 changes: 0 additions & 32 deletions
32
...in/kotlin/org/vitrivr/engine/base/features/external/implementations/dino/DINORetriever.kt
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.