Skip to content

Commit

Permalink
Update example-configs to new schema structure, closes #80
Browse files Browse the repository at this point in the history
  • Loading branch information
v0idness committed Aug 20, 2024
1 parent 43965cb commit bc9c24d
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 131 deletions.
50 changes: 20 additions & 30 deletions example-configs/ingestion/example/image.json
Original file line number Diff line number Diff line change
@@ -1,66 +1,57 @@
{
"schemas": [
{
"name": "example",
"schemas": {
"example": {
"connection": {
"database": "CottontailConnectionProvider",
"parameters": {
"Host": "127.0.0.1",
"port": "1865"
}
},
"fields": [
{
"name": "averagecolor",
"fields": {
"averagecolor": {
"factory": "AverageColor"
},
{
"name": "file",
"file": {
"factory": "FileSourceMetadata"
},
{
"name": "clip",
"clip": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://127.0.0.1:8888",
"model": "open-clip-vit-b32",
"length":"512"
"length": "512"
}
},
{
"name": "dino",
"dino": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://127.0.0.1:8888/",
"model": "dino-v2-vits14",
"length":"384"
"length": "384"
}
},
{
"name": "whisper",
"whisper": {
"factory": "ASR",
"parameters": {
"host": "http://127.0.0.1:8888/",
"model": "whisper"
}
},
{
"name": "ocr",
"ocr": {
"factory": "OCR",
"parameters": {
"host": "http://127.0.0.1:8888/",
"model": "tesseract"
}
},
{
"name": "time",
"time": {
"factory": "TemporalMetadata"
},
{
"name": "video",
"video": {
"factory": "VideoSourceMetadata"
},
],
}
},
"resolvers": {
"disk": {
"factory": "DiskResolver",
Expand All @@ -69,18 +60,17 @@
}
}
},
"exporters": [
{
"name": "thumbnail",
"exporters": {
"thumbnail": {
"factory": "ThumbnailExporter",
"resolverName": "disk",
"parameters": {
"maxSideResolution": "300",
"mimeType": "JPG"
}
}
],
"extractionPipelines": []
},
"extractionPipelines": {}
}
]
}
}
78 changes: 31 additions & 47 deletions example-configs/schema/dense.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"schemas": [
{
"name": "dense",
"schemas": {
"dense": {
"connection": {
"database": "PgVectorConnectionProvider",
"parameters": {
Expand All @@ -11,113 +10,100 @@
"password": "admin"
}
},
"fields": [
{
"name": "averagecolor",
"fields": {
"averagecolor": {
"factory": "AverageColor"
},
{
"name": "file",
"file": {
"factory": "FileSourceMetadata"
},
{
"name": "time",
"time": {
"factory": "TemporalMetadata"
},
{
"name": "video",
"video": {
"factory": "VideoSourceMetadata"
},
{
"name": "asrSparse",
"asrSparse": {
"factory": "ASR",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "whisper",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
},
{
"name": "asrDense",
"asrDense": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "e5mistral7b-instruct",
"length": "4096",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
},
{
"name": "clip",
"clip": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "open-clip-vit-b32",
"length": "512",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
},

{
"name": "documentType",
"documentType": {
"factory": "ImageClassification",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "open-clip-vit-b32",
"timeoutSeconds": "100",
"retries":"1000",
"retries": "1000",
"classes": "text document,photograph",
"threshold": "0.1",
"top_k": "1"
}
},
{
"name": "captionSparse",
"captionSparse": {
"factory": "ImageCaption",
"parameters": {
"host": "http://10.34.64.84:8888/",
"timeoutSeconds": "100",
"retries":"1000",
"retries": "1000",
"model": "gpt4o",
"prompt": "Describe the contents of this image to help document the image and to aid archivists in searching for the image. Use information from the internet to enhance the description. The image is part of the PTT Archive which preserves the history (1848-1997) of Swiss Post, Telegraphy and Telephony (PTT)."
}
},
{
"name": "captionDense",
"captionDense": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "e5mistral7b-instruct",
"length": "4096",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
},
{
"name": "ocrSparse",
"ocrSparse": {
"factory": "OCR",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "tesseract",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
},
{
"name": "ocrDense",
"ocrDense": {
"factory": "DenseEmbedding",
"parameters": {
"host": "http://10.34.64.84:8888/",
"model": "e5mistral7b-instruct",
"length": "4096",
"timeoutSeconds": "100",
"retries":"1000"
"retries": "1000"
}
}
],
},
"resolvers": {
"disk": {
"factory": "DiskResolver",
Expand All @@ -126,23 +112,21 @@
}
}
},
"exporters": [
{
"name": "thumbnail",
"exporters": {
"thumbnail": {
"factory": "ThumbnailExporter",
"resolverName": "disk",
"parameters": {
"maxSideResolution": "400",
"mimeType": "JPG"
}
}
],
"extractionPipelines": [
{
"name": "wit1",
},
"extractionPipelines": {
"wit1": {
"path": "./pipelines/wit1.json"
}
]
}
}
]
}
}
Loading

0 comments on commit bc9c24d

Please sign in to comment.