Skip to content

Commit

Permalink
Merge branch 'dev' into feature/dynamic-aggregation
Browse files Browse the repository at this point in the history
# Conflicts:
#	vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/features/AbstractBatchedExtractor.kt
#	vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/features/AbstractExtractor.kt
#	vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/features/metadata/source/exif/ExifMetadataExtractor.kt
#	vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/model/descriptor/scalar/TextDescriptor.kt
#	vitrivr-engine-core/src/main/kotlin/org/vitrivr/engine/core/model/descriptor/struct/metadata/source/VideoSourceMetadataDescriptor.kt
#	vitrivr-engine-module-fes/src/main/kotlin/org/vitrivr/engine/base/features/external/common/FesExtractor.kt
#	vitrivr-engine-module-fes/src/main/kotlin/org/vitrivr/engine/base/features/external/implementations/caption/ImageCaptionExtractor.kt
  • Loading branch information
lucaro committed Aug 21, 2024
2 parents 42f0c45 + ddb33d3 commit 9a57907
Show file tree
Hide file tree
Showing 222 changed files with 2,059 additions and 3,306 deletions.
50 changes: 20 additions & 30 deletions example-configs/ingestion/example/image.json
Original file line number Diff line number Diff line change
@@ -1,66 +1,57 @@
{
"schemas": [
{
"name": "example",
"schemas": {
"example": {
"connection": {
"database": "CottontailConnectionProvider",
"parameters": {
"Host": "127.0.0.1",
"port": "1865"
}
},
"fields": [
{
"name": "averagecolor",
"fields": {
"averagecolor": {
"factory": "AverageColor"
},
{
"name": "file",
"file": {
"factory": "FileSourceMetadata"
},
{
"name": "clip",
"clip": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://127.0.0.1:8888",
"model": "open-clip-vit-b32",
"length":"512"
"length": "512"
}
},
{
"name": "dino",
"dino": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://127.0.0.1:8888/",
"model": "dino-v2-vits14",
"length":"384"
"length": "384"
}
},
{
"name": "whisper",
"whisper": {
"factory": "ASR",
"parameters": {
"host": "http://127.0.0.1:8888/",
"model": "whisper"
}
},
{
"name": "ocr",
"ocr": {
"factory": "OCR",
"parameters": {
"host": "http://127.0.0.1:8888/",
"model": "tesseract"
}
},
{
"name": "time",
"time": {
"factory": "TemporalMetadata"
},
{
"name": "video",
"video": {
"factory": "VideoSourceMetadata"
},
],
}
},
"resolvers": {
"disk": {
"factory": "DiskResolver",
Expand All @@ -69,18 +60,17 @@
}
}
},
"exporters": [
{
"name": "thumbnail",
"exporters": {
"thumbnail": {
"factory": "ThumbnailExporter",
"resolverName": "disk",
"parameters": {
"maxSideResolution": "300",
"mimeType": "JPG"
}
}
],
"extractionPipelines": []
},
"extractionPipelines": {}
}
]
}
}
78 changes: 31 additions & 47 deletions example-configs/schema/dense.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"schemas": [
{
"name": "dense",
"schemas": {
"dense": {
"connection": {
"database": "PgVectorConnectionProvider",
"parameters": {
Expand All @@ -11,113 +10,100 @@
"password": "admin"
}
},
"fields": [
{
"name": "averagecolor",
"fields": {
"averagecolor": {
"factory": "AverageColor"
},
{
"name": "file",
"file": {
"factory": "FileSourceMetadata"
},
{
"name": "time",
"time": {
"factory": "TemporalMetadata"
},
{
"name": "video",
"video": {
"factory": "VideoSourceMetadata"
},
{
"name": "asrSparse",
"asrSparse": {
"factory": "ASR",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "whisper",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
},
{
"name": "asrDense",
"asrDense": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "e5mistral7b-instruct",
"length": "4096",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
},
{
"name": "clip",
"clip": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "open-clip-vit-b32",
"length": "512",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
},

{
"name": "documentType",
"documentType": {
"factory": "ImageClassification",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "open-clip-vit-b32",
"timeoutSeconds": "100",
"retries":"1000",
"retries": "1000",
"classes": "text document,photograph",
"threshold": "0.1",
"top_k": "1"
}
},
{
"name": "captionSparse",
"captionSparse": {
"factory": "ImageCaption",
"parameters": {
"host": "http://10.34.64.84:8888/",
"timeoutSeconds": "100",
"retries":"1000",
"retries": "1000",
"model": "gpt4o",
"prompt": "Describe the contents of this image to help document the image and to aid archivists in searching for the image. Use information from the internet to enhance the description. The image is part of the PTT Archive which preserves the history (1848-1997) of Swiss Post, Telegraphy and Telephony (PTT)."
}
},
{
"name": "captionDense",
"captionDense": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "e5mistral7b-instruct",
"length": "4096",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
},
{
"name": "ocrSparse",
"ocrSparse": {
"factory": "OCR",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "tesseract",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
},
{
"name": "ocrDense",
"ocrDense": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "e5mistral7b-instruct",
"length": "4096",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
}
],
},
"resolvers": {
"disk": {
"factory": "DiskResolver",
Expand All @@ -126,23 +112,21 @@
}
}
},
"exporters": [
{
"name": "thumbnail",
"exporters": {
"thumbnail": {
"factory": "ThumbnailExporter",
"resolverName": "disk",
"parameters": {
"maxSideResolution": "400",
"mimeType": "JPG"
}
}
],
"extractionPipelines": [
{
"name": "wit1",
},
"extractionPipelines": {
"wit1": {
"path": "./pipelines/wit1.json"
}
]
}
}
]
}
}
Loading

0 comments on commit 9a57907

Please sign in to comment.