From a0958ac70503874e2e75a96c037d141d05a99016 Mon Sep 17 00:00:00 2001 From: Raphael Date: Wed, 29 May 2024 17:28:33 +0200 Subject: [PATCH] ptt-configs Added ptt configs for benchmark --- .../ptt-benchmark/pipelines/image-tiny.json | 72 +++++++++++ config/ptt-benchmark/pipelines/image.json | 72 +++++++++++ .../ptt-benchmark/pipelines/video-tiny.json | 76 ++++++++++++ config/ptt-benchmark/pipelines/video.json | 76 ++++++++++++ .../queries/Insomnia-export.json | 1 + config/ptt-benchmark/schema.json | 112 ++++++++++++++++++ .../feature/external/common/DenseRetriever.kt | 2 +- 7 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 config/ptt-benchmark/pipelines/image-tiny.json create mode 100644 config/ptt-benchmark/pipelines/image.json create mode 100644 config/ptt-benchmark/pipelines/video-tiny.json create mode 100644 config/ptt-benchmark/pipelines/video.json create mode 100644 config/ptt-benchmark/queries/Insomnia-export.json create mode 100644 config/ptt-benchmark/schema.json diff --git a/config/ptt-benchmark/pipelines/image-tiny.json b/config/ptt-benchmark/pipelines/image-tiny.json new file mode 100644 index 00000000..87e87377 --- /dev/null +++ b/config/ptt-benchmark/pipelines/image-tiny.json @@ -0,0 +1,72 @@ +{ + "schema": "ptt", + "context": { + "contentFactory": "InMemoryContentFactory", + "resolverName": "disk", + "local": { + "content": { + "path": "../cache" + }, + "enumerator": { + "path": "../benchmark/media_objects_tiny", + "depth": "5" + }, + "filter": { + "type": "SOURCE:IMAGE" + } + } + }, + "operators": { + "enumerator": { + "type": "ENUMERATOR", + "factory": "FileSystemEnumerator", + "mediaTypes": ["IMAGE"] + }, + "decoder": { + "type": "DECODER", + "factory": "ImageDecoder" + }, + "file_metadata":{ + "type": "EXTRACTOR", + "fieldName": "file" + }, + "clip": { + "type": "EXTRACTOR", + "fieldName": "clip" + }, + "ocr": { + "type": "EXTRACTOR", + "fieldName": "ocr" + }, + "caption": { + "type": "EXTRACTOR", + "fieldName": "caption" + }, + "document_type": { + "type": "EXTRACTOR", + "fieldName": "document_type" + }, + "filter": { + "type": "TRANSFORMER", + "factory": "TypeFilterTransformer" + } + }, + "operations": { + "enumerator-stage": {"operator": "enumerator"}, + "decoder-stage": {"operator": "decoder", "inputs": ["enumerator-stage"]}, + "filter-stage": {"operator": "filter", "inputs": ["decoder-stage"]}, + "clip-stage": {"operator": "clip", "inputs": ["filter-stage"]}, + "ocr-stage": {"operator": "ocr", "inputs": ["filter-stage"]}, + "caption-stage": {"operator": "caption", "inputs": ["filter-stage"]}, + "document-type-stage": {"operator": "document_type", "inputs": ["filter-stage"]}, + "file-metadata-stage": {"operator": "file_metadata", "inputs": ["filter-stage"]} + }, + "output": [ + "caption-stage", + "ocr-stage", + "clip-stage", + "document-type-stage", + "file-metadata-stage" + ], + "mergeType": "COMBINE" +} diff --git a/config/ptt-benchmark/pipelines/image.json b/config/ptt-benchmark/pipelines/image.json new file mode 100644 index 00000000..30c93f0b --- /dev/null +++ b/config/ptt-benchmark/pipelines/image.json @@ -0,0 +1,72 @@ +{ + "schema": "ptt", + "context": { + "contentFactory": "InMemoryContentFactory", + "resolverName": "disk", + "local": { + "content": { + "path": "../cache" + }, + "enumerator": { + "path": "../benchmark/media_objects", + "depth": "5" + }, + "filter": { + "type": "SOURCE:IMAGE" + } + } + }, + "operators": { + "enumerator": { + "type": "ENUMERATOR", + "factory": "FileSystemEnumerator", + "mediaTypes": ["IMAGE"] + }, + "decoder": { + "type": "DECODER", + "factory": "ImageDecoder" + }, + "file_metadata":{ + "type": "EXTRACTOR", + "fieldName": "file" + }, + "clip": { + "type": "EXTRACTOR", + "fieldName": "clip" + }, + "ocr": { + "type": "EXTRACTOR", + "fieldName": "ocr" + }, + "caption": { + "type": "EXTRACTOR", + "fieldName": "caption" + }, + "document_type": { + "type": "EXTRACTOR", + "fieldName": "document_type" + }, + "filter": { + "type": "TRANSFORMER", + "factory": "TypeFilterTransformer" + } + }, + "operations": { + "enumerator-stage": {"operator": "enumerator"}, + "decoder-stage": {"operator": "decoder", "inputs": ["enumerator-stage"]}, + "filter-stage": {"operator": "filter", "inputs": ["decoder-stage"]}, + "clip-stage": {"operator": "clip", "inputs": ["filter-stage"]}, + "ocr-stage": {"operator": "ocr", "inputs": ["filter-stage"]}, + "caption-stage": {"operator": "caption", "inputs": ["filter-stage"]}, + "document-type-stage": {"operator": "document_type", "inputs": ["filter-stage"]}, + "file-metadata-stage": {"operator": "file_metadata", "inputs": ["filter-stage"]} + }, + "output": [ + "caption-stage", + "ocr-stage", + "clip-stage", + "document-type-stage", + "file-metadata-stage" + ], + "mergeType": "COMBINE" +} diff --git a/config/ptt-benchmark/pipelines/video-tiny.json b/config/ptt-benchmark/pipelines/video-tiny.json new file mode 100644 index 00000000..9b74ae15 --- /dev/null +++ b/config/ptt-benchmark/pipelines/video-tiny.json @@ -0,0 +1,76 @@ +{ + "schema": "ptt", + "context": { + "contentFactory": "InMemoryContentFactory", + "resolverName": "disk", + "local": { + "content": { + "path": "../cache" + }, + "enumerator": { + "path": "../benchmark/media_objects_tiny", + "depth": "5" + }, + "filter": { + "type": "SOURCE:VIDEO" + }, + "decoder": { + "timeWindowMs": "2000" + } + } + }, + "operators": { + "enumerator": { + "type": "ENUMERATOR", + "factory": "FileSystemEnumerator", + "mediaTypes": [ + "VIDEO" + ] + }, + "decoder": { + "type": "DECODER", + "factory": "VideoDecoder" + }, + "selector": { + "type": "TRANSFORMER", + "factory": "LastContentAggregator" + }, + "clip": { + "type": "EXTRACTOR", + "fieldName": "clip" + }, + "asr": { + "type": "EXTRACTOR", + "fieldName": "asr" + }, + "caption": { + "type": "EXTRACTOR", + "fieldName": "caption" + }, + "ocr": { + "type": "EXTRACTOR", + "fieldName": "ocr" + }, + "filter": { + "type": "TRANSFORMER", + "factory": "TypeFilterTransformer" + } + }, + "operations": { + "enumerator-stage": {"operator": "enumerator"}, + "filter-stage": {"operator": "filter","inputs": ["enumerator-stage"]}, + "decoder-stage": {"operator": "decoder","inputs": ["filter-stage"]}, + "selector-stage": {"operator": "selector","inputs": ["filter-stage"]}, + "asr-stage": {"operator": "asr", "inputs": ["selector-stage"]}, + "caption-stage": {"operator": "caption", "inputs": ["selector-stage"]}, + "clip-stage": {"operator": "clip", "inputs": ["selector-stage"]}, + "ocr-stage": {"operator": "ocr", "inputs": ["selector-stage"]} +}, + "output": [ + "asr-stage", + "caption-stage", + "clip-stage", + "ocr-stage" + ], + "mergeType": "COMBINE" +} diff --git a/config/ptt-benchmark/pipelines/video.json b/config/ptt-benchmark/pipelines/video.json new file mode 100644 index 00000000..22f55f09 --- /dev/null +++ b/config/ptt-benchmark/pipelines/video.json @@ -0,0 +1,76 @@ +{ + "schema": "ptt", + "context": { + "contentFactory": "CachedContentFactory", + "resolverName": "disk", + "local": { + "content": { + "path": "../cache" + }, + "enumerator": { + "path": "../benchmark/media_objects", + "depth": "5" + }, + "filter": { + "type": "SOURCE:VIDEO" + }, + "decoder": { + "timeWindowMs": "2000" + } + } + }, + "operators": { + "enumerator": { + "type": "ENUMERATOR", + "factory": "FileSystemEnumerator", + "mediaTypes": [ + "VIDEO" + ] + }, + "decoder": { + "type": "DECODER", + "factory": "VideoDecoder" + }, + "selector": { + "type": "TRANSFORMER", + "factory": "LastContentAggregator" + }, + "clip": { + "type": "EXTRACTOR", + "fieldName": "clip" + }, + "asr": { + "type": "EXTRACTOR", + "fieldName": "asr" + }, + "caption": { + "type": "EXTRACTOR", + "fieldName": "caption" + }, + "ocr": { + "type": "EXTRACTOR", + "fieldName": "ocr" + }, + "filter": { + "type": "TRANSFORMER", + "factory": "TypeFilterTransformer" + } + }, + "operations": { + "enumerator-stage": {"operator": "enumerator"}, + "filter-stage": {"operator": "filter","inputs": ["enumerator-stage"]}, + "decoder-stage": {"operator": "decoder","inputs": ["filter-stage"]}, + "selector-stage": {"operator": "selector","inputs": ["filter-stage"]}, + "asr-stage": {"operator": "asr", "inputs": ["selector-stage"]}, + "caption-stage": {"operator": "caption", "inputs": ["selector-stage"]}, + "clip-stage": {"operator": "clip", "inputs": ["selector-stage"]}, + "ocr-stage": {"operator": "ocr", "inputs": ["selector-stage"]} +}, + "output": [ + "asr-stage", + "caption-stage", + "clip-stage", + "ocr-stage" + ], + "mergeType": "COMBINE" +} diff --git a/config/ptt-benchmark/queries/Insomnia-export.json b/config/ptt-benchmark/queries/Insomnia-export.json new file mode 100644 index 00000000..1a42e98f --- /dev/null +++ b/config/ptt-benchmark/queries/Insomnia-export.json @@ -0,0 +1 @@ +{"_type":"export","__export_format":4,"__export_date":"2024-05-29T15:26:50.453Z","__export_source":"insomnia.desktop.app:v2023.5.7","resources":[{"_id":"req_710ce9bafc974f5d968fb304ec8f0b0d","parentId":"fld_f1ea5552140b44f7aba5c10dda977029","modified":1716991471648,"created":1716988833884,"url":"localhost:7070/api/ptt/query","name":"Query: Clip(Text) -> ","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n\t\t\"inputs\": {\n\t\t\t\t\"mytext\": {\"type\": \"TEXT\", \"data\": \"orange starfish on the seafloor\"}\n\t\t},\n\t\t\"operations\": {\n\t\t\t\"clip\" : {\"type\": \"RETRIEVER\", \"field\": \"clip\", \"input\": \"mytext\"},\n\t\t\t\"relations\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"RelationExpander\", \"input\": \"clip\"},\n\t\t\t\"lookup\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"FieldLookup\", \"input\": \"relations\"},\n\t\t\t\"aggregator\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"ScoreAggregator\", \"input\": \"lookup\"},\n\t\t\t\"filelookup\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"FieldLookup\", \"input\": \"aggregator\"}\n\t\t},\n\t\t\"context\": {\n\t\t\t\"global\": {\n\t\t\t\t\"limit\": \"1000\"\n\t\t\t},\n\t\t\t\"local\" : {\n\t\t\t\t\"lookup\":{\"field\": \"time\", \"keys\": \"start, end\"},\n\t\t\t\t\"relations\" :{\"outgoing\": \"partOf\"},\t\t\t\t\n\t\t\t\t\"filelookup\": {\"field\": \"file\", \"keys\": \"path\"}\n\t\t\t}\n\t\t},\n\t\t\"output\": \"filelookup\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"},{"name":"User-Agent","value":"Insomnia/2023.5.7"}],"authentication":{},"metaSortKey":-1716989618754,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"fld_f1ea5552140b44f7aba5c10dda977029","parentId":"wrk_1a7fd1d31e4f426ea30d155a375fc77f","modified":1716988828474,"created":1716988828474,"name":"PTT-Benchmark","description":"","environment":{},"environmentPropertyOrder":null,"metaSortKey":-1716988828474,"_type":"request_group"},{"_id":"wrk_1a7fd1d31e4f426ea30d155a375fc77f","parentId":null,"modified":1699640716232,"created":1699640716232,"name":"vitrivr-engine","description":"","scope":"collection","_type":"workspace"},{"_id":"req_37d3973e5cd84d6fb37d188e2f047dd1","parentId":"fld_f1ea5552140b44f7aba5c10dda977029","modified":1716994118302,"created":1716991936483,"url":"localhost:7070/api/ptt/query","name":"Query: Asr(Text) -> ","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n\t\t\"inputs\": {\n\t\t\t\t\"text\": {\"type\": \"TEXT\", \"data\": \"Zeit ist dann\"}\n\t\t},\n\t\t\"operations\": {\n\t\t\t\"feature\" : {\"type\": \"RETRIEVER\", \"field\": \"asr\", \"input\": \"text\"},\n\t\t\t\"relations\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"RelationExpander\", \"input\": \"feature\"},\n\t\t\t\"lookup\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"FieldLookup\", \"input\": \"relations\"},\n\t\t\t\"aggregator\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"ScoreAggregator\", \"input\": \"lookup\"},\n\t\t\t\"filelookup\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"FieldLookup\", \"input\": \"aggregator\"}\n\t\t},\n\t\t\"context\": {\n\t\t\t\"global\": {\n\t\t\t\t\"limit\": \"1000\"\n\t\t\t},\n\t\t\t\"local\" : {\n\t\t\t\t\"lookup\": {\"field\": \"time\", \"keys\": \"start, end\"},\n\t\t\t\t\"relations\" : {\"outgoing\": \"partOf\"},\t\t\t\t\n\t\t\t\t\"filelookup\": {\"field\": \"file\", \"keys\": \"path\"}\n\t\t\t}\n\t\t},\n\t\t\"output\": \"filelookup\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"},{"name":"User-Agent","value":"Insomnia/2023.5.7"}],"authentication":{},"metaSortKey":-1713676664026,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_0da1ff6d80284cc68aec36e1f00de66e","parentId":"fld_f1ea5552140b44f7aba5c10dda977029","modified":1716994280722,"created":1716994127457,"url":"localhost:7070/api/ptt/query","name":"Query: Caption(Text) -> ","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n\t\t\"inputs\": {\n\t\t\t\t\"text\": {\"type\": \"TEXT\", \"data\": \"A page\"}\n\t\t},\n\t\t\"operations\": {\n\t\t\t\"feature\" : {\"type\": \"RETRIEVER\", \"field\": \"caption\", \"input\": \"text\"},\n\t\t\t\"relations\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"RelationExpander\", \"input\": \"feature\"},\n\t\t\t\"lookup\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"FieldLookup\", \"input\": \"relations\"},\n\t\t\t\"aggregator\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"ScoreAggregator\", \"input\": \"lookup\"},\n\t\t\t\"filelookup\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"FieldLookup\", \"input\": \"aggregator\"}\n\t\t},\n\t\t\"context\": {\n\t\t\t\"global\": {\n\t\t\t\t\"limit\": \"1000\"\n\t\t\t},\n\t\t\t\"local\" : {\n\t\t\t\t\"lookup\": {\"field\": \"time\", \"keys\": \"start, end\"},\n\t\t\t\t\"relations\" : {\"outgoing\": \"partOf\"},\t\t\t\t\n\t\t\t\t\"filelookup\": {\"field\": \"file\", \"keys\": \"path\"}\n\t\t\t}\n\t\t},\n\t\t\"output\": \"filelookup\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"},{"name":"User-Agent","value":"Insomnia/2023.5.7"}],"authentication":{},"metaSortKey":-1712020186662,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_e14c8167bd0d4297a4e9234f0cb84cf8","parentId":"fld_f1ea5552140b44f7aba5c10dda977029","modified":1716994559844,"created":1716994554076,"url":"localhost:7070/api/ptt/query","name":"Query: Ocr(Text) -> ","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n\t\t\"inputs\": {\n\t\t\t\t\"text\": {\"type\": \"TEXT\", \"data\": \"A page\"}\n\t\t},\n\t\t\"operations\": {\n\t\t\t\"feature\" : {\"type\": \"RETRIEVER\", \"field\": \"ocr\", \"input\": \"text\"},\n\t\t\t\"relations\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"RelationExpander\", \"input\": \"feature\"},\n\t\t\t\"lookup\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"FieldLookup\", \"input\": \"relations\"},\n\t\t\t\"aggregator\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"ScoreAggregator\", \"input\": \"lookup\"},\n\t\t\t\"filelookup\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"FieldLookup\", \"input\": \"aggregator\"}\n\t\t},\n\t\t\"context\": {\n\t\t\t\"global\": {\n\t\t\t\t\"limit\": \"1000\"\n\t\t\t},\n\t\t\t\"local\" : {\n\t\t\t\t\"lookup\": {\"field\": \"time\", \"keys\": \"start, end\"},\n\t\t\t\t\"relations\" : {\"outgoing\": \"partOf\"},\t\t\t\t\n\t\t\t\t\"filelookup\": {\"field\": \"file\", \"keys\": \"path\"}\n\t\t\t}\n\t\t},\n\t\t\"output\": \"filelookup\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"},{"name":"User-Agent","value":"Insomnia/2023.5.7"}],"authentication":{},"metaSortKey":-1711191947980,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"req_2ba307fb82ab41c3833096e150cc527b","parentId":"fld_f1ea5552140b44f7aba5c10dda977029","modified":1716995325367,"created":1716994618839,"url":"localhost:7070/api/ptt/query","name":"Query: All(Text) -> ","description":"","method":"POST","body":{"mimeType":"application/json","text":"{\n\t\t\"inputs\": {\n\t\t\t\t\"clip\": {\"type\": \"TEXT\", \"data\": \"A page\"},\n\t\t\t\t\"ocr\": {\"type\": \"TEXT\", \"data\": \"A page\"},\n\t\t\t\t\"asr\": {\"type\": \"TEXT\", \"data\": \"A page\"},\n\t\t\t\t\"caption\": {\"type\": \"TEXT\", \"data\": \"A page\"}\n\t\t},\n\t\t\"operations\": {\n\t\t\t\"feature1\" : {\"type\": \"RETRIEVER\", \"field\": \"clip\", \"input\": \"clip\"},\n\t\t\t\"feature2\" : {\"type\": \"RETRIEVER\", \"field\": \"ocr\", \"input\": \"ocr\"},\n\t\t\t\"feature3\" : {\"type\": \"RETRIEVER\", \"field\": \"asr\", \"input\": \"asr\"},\n\t\t\t\"feature4\" : {\"type\": \"RETRIEVER\", \"field\": \"caption\", \"input\": \"caption\"},\n\t\t\t\"score\" : {\"type\": \"AGGREGATOR\", \"aggregatorName\": \"WeightedScoreFusion\", \"inputs\": [\"feature1\", \"feature2\",\"feature3\", \"feature4\"]},\n\t\t\t\"relations\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"RelationExpander\", \"input\": \"score\"},\n\t\t\t\"lookup\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"FieldLookup\", \"input\": \"relations\"},\n\t\t\t\"aggregator\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"ScoreAggregator\", \"input\": \"lookup\"},\n\t\t\t\"filelookup\" : {\"type\": \"TRANSFORMER\", \"transformerName\": \"FieldLookup\", \"input\": \"aggregator\"}\n\t\t},\n\t\t\"context\": {\n\t\t\t\"global\": {\n\t\t\t\t\"limit\": \"1000\"\n\t\t\t},\n\t\t\t\"local\" : {\n\t\t\t\t\"lookup\": {\"field\": \"time\", \"keys\": \"start, end\"},\n\t\t\t\t\"relations\" : {\"outgoing\": \"partOf\"},\t\t\t\t\n\t\t\t\t\"filelookup\": {\"field\": \"file\", \"keys\": \"path\"},\n\t\t\t\t\"score\": {\"weights\": \"0.5,0.2,0.2,0.1\"}\n\t\t\t}\n\t\t},\n\t\t\"output\": \"filelookup\"\n}"},"parameters":[],"headers":[{"name":"Content-Type","value":"application/json"},{"name":"User-Agent","value":"Insomnia/2023.5.7"}],"authentication":{},"metaSortKey":-1710777828639,"isPrivate":false,"settingStoreCookies":true,"settingSendCookies":true,"settingDisableRenderRequestBody":false,"settingEncodeUrl":true,"settingRebuildPath":true,"settingFollowRedirects":"global","_type":"request"},{"_id":"env_39fe09c413c6930fe51c72e599bb686d1182901b","parentId":"wrk_1a7fd1d31e4f426ea30d155a375fc77f","modified":1699640716236,"created":1699640716236,"name":"Base Environment","data":{},"dataPropertyOrder":null,"color":null,"isPrivate":false,"metaSortKey":1699640716236,"_type":"environment"},{"_id":"jar_39fe09c413c6930fe51c72e599bb686d1182901b","parentId":"wrk_1a7fd1d31e4f426ea30d155a375fc77f","modified":1699640716237,"created":1699640716237,"name":"Default Jar","cookies":[],"_type":"cookie_jar"}]} \ No newline at end of file diff --git a/config/ptt-benchmark/schema.json b/config/ptt-benchmark/schema.json new file mode 100644 index 00000000..b7d5b0b3 --- /dev/null +++ b/config/ptt-benchmark/schema.json @@ -0,0 +1,112 @@ +{ + "schemas": [ + { + "name": "ptt", + "connection": { + "database": "CottontailConnectionProvider", + "parameters": { + "Host": "127.0.0.1", + "port": "1865" + } + }, + "fields": [ + { + "name": "averagecolor", + "factory": "AverageColor" + }, + { + "name": "file", + "factory": "FileSourceMetadata" + }, + { + "name": "time", + "factory": "TemporalMetadata" + }, + { + "name": "video", + "factory": "VideoSourceMetadata" + }, + { + "name": "asr", + "factory": "ASR", + "parameters": { + "host": "http://10.34.58.85:8888/", + "model": "whisper" + } + }, + { + "name": "clip", + "factory": "DenseEmbedding", + "parameters": { + "host": "http://10.34.58.85:8888/", + "model": "open-clip-vit-b32", + "length": "512" + } + }, + { + "name": "document_type", + "factory": "ImageClassification", + "parameters": { + "host": "http://10.34.58.85:8888/", + "model": "open-clip-vit-b32", + "classes": "text document,photograph,artwork", + "threshold": "0.1", + "top_k": "1" + } + }, + { + "name": "caption", + "factory": "ImageCaption", + "parameters": { + "host": "http://10.34.58.85:8888/" + } + }, + { + "name": "ocr", + "factory": "OCR", + "parameters": { + "host": "http://10.34.58.85:8888/", + "model": "tesseract" + } + } + ], + "resolvers": { + "disk": { + "factory": "DiskResolver", + "parameters": { + "location": "../thumbnails" + } + } + }, + "exporters": [ + { + "name": "thumbnail", + "factory": "ThumbnailExporter", + "resolverName": "disk", + "parameters": { + "maxSideResolution": "400", + "mimeType": "JPG" + } + } + ], + "extractionPipelines": [ + { + "name": "image_tiny", + "path": "./config/ptt-benchmark/pipelines/image-tiny.json" + }, + { + "name": "image", + "path": "./config/ptt-benchmark/pipelines/image.json" + }, + { + "name": "video_tiny", + "path": "./config/ptt-benchmark/pipelines/video-tiny.json" + }, + { + "name": "video", + "path": "./config/ptt-benchmark/pipelines/video.json" + } + ] + } + ] +} \ No newline at end of file diff --git a/vitrivr-engine-module-features/src/main/kotlin/org/vitrivr/engine/module/features/feature/external/common/DenseRetriever.kt b/vitrivr-engine-module-features/src/main/kotlin/org/vitrivr/engine/module/features/feature/external/common/DenseRetriever.kt index c218bcc6..fedb32f6 100644 --- a/vitrivr-engine-module-features/src/main/kotlin/org/vitrivr/engine/module/features/feature/external/common/DenseRetriever.kt +++ b/vitrivr-engine-module-features/src/main/kotlin/org/vitrivr/engine/module/features/feature/external/common/DenseRetriever.kt @@ -26,7 +26,7 @@ import org.vitrivr.engine.core.util.math.ScoringFunctions class DenseRetriever>(field: Schema.Field, query: ProximityQuery<*>, context: QueryContext) : AbstractRetriever(field, query, context) { override fun toFlow(scope: CoroutineScope) = flow { this@DenseRetriever.reader.getAll(this@DenseRetriever.query).forEach { - it.addAttribute(ScoringFunctions.max(it)) + it.addAttribute(ScoringFunctions.max(it, 2.0f)) emit(it) } }