From bc9c24d352eab0fc869f74621facb33601f0e6dd Mon Sep 17 00:00:00 2001 From: Laura Rettig Date: Tue, 20 Aug 2024 13:38:36 +0200 Subject: [PATCH] Update example-configs to new schema structure, closes #80 --- example-configs/ingestion/example/image.json | 50 +++++-------- example-configs/schema/dense.json | 78 ++++++++------------ example-configs/schema/example.json | 58 ++++++--------- example-configs/schema/metadata.json | 34 ++++----- 4 files changed, 89 insertions(+), 131 deletions(-) diff --git a/example-configs/ingestion/example/image.json b/example-configs/ingestion/example/image.json index 326a939b7..c100364b3 100644 --- a/example-configs/ingestion/example/image.json +++ b/example-configs/ingestion/example/image.json @@ -1,7 +1,6 @@ { - "schemas": [ - { - "name": "example", + "schemas": { + "example": { "connection": { "database": "CottontailConnectionProvider", "parameters": { @@ -9,58 +8,50 @@ "port": "1865" } }, - "fields": [ - { - "name": "averagecolor", + "fields": { + "averagecolor": { "factory": "AverageColor" }, - { - "name": "file", + "file": { "factory": "FileSourceMetadata" }, - { - "name": "clip", + "clip": { "factory": "DenseEmbedding", "parameters": { "host": "http://127.0.0.1:8888", "model": "open-clip-vit-b32", - "length":"512" + "length": "512" } }, - { - "name": "dino", + "dino": { "factory": "DenseEmbedding", "parameters": { "host": "http://127.0.0.1:8888/", "model": "dino-v2-vits14", - "length":"384" + "length": "384" } }, - { - "name": "whisper", + "whisper": { "factory": "ASR", "parameters": { "host": "http://127.0.0.1:8888/", "model": "whisper" } }, - { - "name": "ocr", + "ocr": { "factory": "OCR", "parameters": { "host": "http://127.0.0.1:8888/", "model": "tesseract" } }, - { - "name": "time", + "time": { "factory": "TemporalMetadata" }, - { - "name": "video", + "video": { "factory": "VideoSourceMetadata" - }, - ], + } + }, "resolvers": { "disk": { "factory": "DiskResolver", @@ -69,9 +60,8 @@ } } }, - "exporters": [ - { - "name": "thumbnail", + "exporters": { + "thumbnail": { "factory": "ThumbnailExporter", "resolverName": "disk", "parameters": { @@ -79,8 +69,8 @@ "mimeType": "JPG" } } - ], - "extractionPipelines": [] + }, + "extractionPipelines": {} } - ] + } } diff --git a/example-configs/schema/dense.json b/example-configs/schema/dense.json index d77bc4dac..1acc27c67 100644 --- a/example-configs/schema/dense.json +++ b/example-configs/schema/dense.json @@ -1,7 +1,6 @@ { - "schemas": [ - { - "name": "dense", + "schemas": { + "dense": { "connection": { "database": "PgVectorConnectionProvider", "parameters": { @@ -11,113 +10,100 @@ "password": "admin" } }, - "fields": [ - { - "name": "averagecolor", + "fields": { + "averagecolor": { "factory": "AverageColor" }, - { - "name": "file", + "file": { "factory": "FileSourceMetadata" }, - { - "name": "time", + "time": { "factory": "TemporalMetadata" }, - { - "name": "video", + "video": { "factory": "VideoSourceMetadata" }, - { - "name": "asrSparse", + "asrSparse": { "factory": "ASR", "parameters": { "host": "http://10.34.64.84:8888/", "model": "whisper", "timeoutSeconds": "100", - "retries":"1000" + "retries": "1000" } }, - { - "name": "asrDense", + "asrDense": { "factory": "DenseEmbedding", "parameters": { "host": "http://10.34.64.84:8888/", "model": "e5mistral7b-instruct", "length": "4096", "timeoutSeconds": "100", - "retries":"1000" + "retries": "1000" } }, - { - "name": "clip", + "clip": { "factory": "DenseEmbedding", "parameters": { "host": "http://10.34.64.84:8888/", "model": "open-clip-vit-b32", "length": "512", "timeoutSeconds": "100", - "retries":"1000" + "retries": "1000" } }, - - { - "name": "documentType", + "documentType": { "factory": "ImageClassification", "parameters": { "host": "http://10.34.64.84:8888/", "model": "open-clip-vit-b32", "timeoutSeconds": "100", - "retries":"1000", + "retries": "1000", "classes": "text document,photograph", "threshold": "0.1", "top_k": "1" } }, - { - "name": "captionSparse", + "captionSparse": { "factory": "ImageCaption", "parameters": { "host": "http://10.34.64.84:8888/", "timeoutSeconds": "100", - "retries":"1000", + "retries": "1000", "model": "gpt4o", "prompt": "Describe the contents of this image to help document the image and to aid archivists in searching for the image. Use information from the internet to enhance the description. The image is part of the PTT Archive which preserves the history (1848-1997) of Swiss Post, Telegraphy and Telephony (PTT)." } }, - { - "name": "captionDense", + "captionDense": { "factory": "DenseEmbedding", "parameters": { "host": "http://10.34.64.84:8888/", "model": "e5mistral7b-instruct", "length": "4096", "timeoutSeconds": "100", - "retries":"1000" + "retries": "1000" } }, - { - "name": "ocrSparse", + "ocrSparse": { "factory": "OCR", "parameters": { "host": "http://10.34.64.84:8888/", "model": "tesseract", "timeoutSeconds": "100", - "retries":"1000" + "retries": "1000" } }, - { - "name": "ocrDense", + "ocrDense": { "factory": "DenseEmbedding", "parameters": { "host": "http://10.34.64.84:8888/", "model": "e5mistral7b-instruct", "length": "4096", "timeoutSeconds": "100", - "retries":"1000" + "retries": "1000" } } - ], + }, "resolvers": { "disk": { "factory": "DiskResolver", @@ -126,9 +112,8 @@ } } }, - "exporters": [ - { - "name": "thumbnail", + "exporters": { + "thumbnail": { "factory": "ThumbnailExporter", "resolverName": "disk", "parameters": { @@ -136,13 +121,12 @@ "mimeType": "JPG" } } - ], - "extractionPipelines": [ - { - "name": "wit1", + }, + "extractionPipelines": { + "wit1": { "path": "./pipelines/wit1.json" } - ] + } } - ] + } } diff --git a/example-configs/schema/example.json b/example-configs/schema/example.json index bc1ab23ae..c23fff8cc 100644 --- a/example-configs/schema/example.json +++ b/example-configs/schema/example.json @@ -1,7 +1,6 @@ { - "schemas": [ - { - "name": "example", + "schemas": { + "example": { "connection": { "database": "CottontailConnectionProvider", "parameters": { @@ -9,58 +8,50 @@ "port": "1865" } }, - "fields": [ - { - "name": "averagecolor", + "fields": { + "averagecolor": { "factory": "AverageColor" }, - { - "name": "file", + "file": { "factory": "FileSourceMetadata" }, - { - "name": "clip", + "clip": { "factory": "DenseEmbedding", "parameters": { "host": "http://127.0.0.1:8888", "model": "open-clip-vit-b32", - "length":"512" + "length": "512" } }, - { - "name": "dino", + "dino": { "factory": "DenseEmbedding", "parameters": { "host": "http://127.0.0.1:8888/", "model": "dino-v2-vits14", - "length":"384" + "length": "384" } }, - { - "name": "whisper", + "whisper": { "factory": "ASR", "parameters": { "host": "http://127.0.0.1:8888/", "model": "whisper" } }, - { - "name": "ocr", + "ocr": { "factory": "OCR", "parameters": { "host": "http://127.0.0.1:8888/", "model": "tesseract" } }, - { - "name": "time", + "time": { "factory": "TemporalMetadata" }, - { - "name": "video", + "video": { "factory": "VideoSourceMetadata" - }, - ], + } + }, "resolvers": { "disk": { "factory": "DiskResolver", @@ -69,9 +60,8 @@ } } }, - "exporters": [ - { - "name": "thumbnail", + "exporters": { + "thumbnail": { "factory": "ThumbnailExporter", "resolverName": "disk", "parameters": { @@ -79,17 +69,15 @@ "mimeType": "JPG" } } - ], - "extractionPipelines": [ - { - "name": "video", + }, + "extractionPipelines": { + "video": { "path": "./example/video.json" }, - { - "name": "image", + "image": { "path": "./example/image.json" } - ] + } } - ] + } } diff --git a/example-configs/schema/metadata.json b/example-configs/schema/metadata.json index 2bfc0a67a..1fd27f2d6 100644 --- a/example-configs/schema/metadata.json +++ b/example-configs/schema/metadata.json @@ -1,7 +1,6 @@ { - "schemas": [ - { - "name": "metadata", + "schemas": { + "metadata": { "connection": { "database": "CottontailConnectionProvider", "parameters": { @@ -9,17 +8,14 @@ "port": "1865" } }, - "fields": [ - { - "name": "file", + "fields": { + "file": { "factory": "FileSourceMetadata" }, - { - "name": "time", + "time": { "factory": "TemporalMetadata" }, - { - "name": "exif", + "exif": { "factory": "ExifMetadata", "parameters": { "ExifSubIFD_FocalLength": "INT", @@ -29,15 +25,15 @@ "time_zone": "STRING" } } - ], - "resolvers": { - "disk": { - "factory": "DiskResolver", - "parameters": { - "location": "./thumbnails/vitrivr" - } - } + }, + "resolvers": { + "disk": { + "factory": "DiskResolver", + "parameters": { + "location": "./thumbnails/vitrivr" + } } + } } - ] + } } \ No newline at end of file