From 25a46cf384dbd63cee48debd990cebfb7a77ce0a Mon Sep 17 00:00:00 2001 From: William Ryder Date: Fri, 16 Feb 2024 16:02:45 +0100 Subject: [PATCH 1/2] feat: add elasticsearch --- package-lock.json | 122 +++++++++++++++++++++++++++++++++++ package.json | 1 + src/config/elasticsearch.ts | 4 ++ src/elastic.ts | 65 +++++++++++++++++++ src/index.ts | 3 + src/workers/discordReview.ts | 15 +++-- src/workers/downloadPDF.ts | 5 ++ 7 files changed, 210 insertions(+), 5 deletions(-) create mode 100644 src/config/elasticsearch.ts create mode 100644 src/elastic.ts diff --git a/package-lock.json b/package-lock.json index 60059c7c..59c7c8a5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@bull-board/api": "^5.12.0", "@bull-board/express": "^5.12.0", + "@elastic/elasticsearch": "^8.12.1", "@google/generative-ai": "^0.1.3", "bullmq": "^5.1.1", "chromadb": "^1.7.3", @@ -830,6 +831,60 @@ "node": ">=18" } }, + "node_modules/@elastic/elasticsearch": { + "version": "8.12.1", + "resolved": "https://registry.npmjs.org/@elastic/elasticsearch/-/elasticsearch-8.12.1.tgz", + "integrity": "sha512-/dJtxtvoN2vRXip6xUrEyzthhzVUOKL8L9YNq25HpMwqiqrJTK70/dOp6GM8oTVQ87UPyJBiiCxQY2+cvg2XWw==", + "dependencies": { + "@elastic/transport": "^8.4.0", + "tslib": "^2.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@elastic/transport": { + "version": "8.4.0", + "resolved": "https://registry.npmjs.org/@elastic/transport/-/transport-8.4.0.tgz", + "integrity": "sha512-Yb3fDa7yGD0ca3uMbL64M3vM1cE5h5uHmBcTjkdB4VpCasRNKSd09iDpwqX8zX1tbBtxcaKYLceKthWvPeIxTw==", + "dependencies": { + "debug": "^4.3.4", + "hpagent": "^1.0.0", + "ms": "^2.1.3", + "secure-json-parse": "^2.4.0", + "tslib": "^2.4.0", + "undici": "^5.22.1" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/@elastic/transport/node_modules/debug": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", + "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "dependencies": { + "ms": "2.1.2" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/@elastic/transport/node_modules/debug/node_modules/ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" + }, + "node_modules/@elastic/transport/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + }, "node_modules/@fastify/busboy": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-2.1.0.tgz", @@ -2856,6 +2911,14 @@ "node": ">= 0.4" } }, + "node_modules/hpagent": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz", + "integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==", + "engines": { + "node": ">=14" + } + }, "node_modules/html-escaper": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz", @@ -4774,6 +4837,11 @@ "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" }, + "node_modules/secure-json-parse": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-2.7.0.tgz", + "integrity": "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==" + }, "node_modules/semver": { "version": "7.5.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz", @@ -6111,6 +6179,50 @@ } } }, + "@elastic/elasticsearch": { + "version": "8.12.1", + "resolved": "https://registry.npmjs.org/@elastic/elasticsearch/-/elasticsearch-8.12.1.tgz", + "integrity": "sha512-/dJtxtvoN2vRXip6xUrEyzthhzVUOKL8L9YNq25HpMwqiqrJTK70/dOp6GM8oTVQ87UPyJBiiCxQY2+cvg2XWw==", + "requires": { + "@elastic/transport": "^8.4.0", + "tslib": "^2.4.0" + } + }, + "@elastic/transport": { + "version": "8.4.0", + "resolved": "https://registry.npmjs.org/@elastic/transport/-/transport-8.4.0.tgz", + "integrity": "sha512-Yb3fDa7yGD0ca3uMbL64M3vM1cE5h5uHmBcTjkdB4VpCasRNKSd09iDpwqX8zX1tbBtxcaKYLceKthWvPeIxTw==", + "requires": { + "debug": "^4.3.4", + "hpagent": "^1.0.0", + "ms": "^2.1.3", + "secure-json-parse": "^2.4.0", + "tslib": "^2.4.0", + "undici": "^5.22.1" + }, + "dependencies": { + "debug": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", + "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "requires": { + "ms": "2.1.2" + }, + "dependencies": { + "ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" + } + } + }, + "ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + } + } + }, "@fastify/busboy": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-2.1.0.tgz", @@ -7612,6 +7724,11 @@ "function-bind": "^1.1.2" } }, + "hpagent": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz", + "integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==" + }, "html-escaper": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz", @@ -9021,6 +9138,11 @@ "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" }, + "secure-json-parse": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-2.7.0.tgz", + "integrity": "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==" + }, "semver": { "version": "7.5.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz", diff --git a/package.json b/package.json index 58ffcc7c..5c93b235 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "dependencies": { "@bull-board/api": "^5.12.0", "@bull-board/express": "^5.12.0", + "@elastic/elasticsearch": "^8.12.1", "@google/generative-ai": "^0.1.3", "bullmq": "^5.1.1", "chromadb": "^1.7.3", diff --git a/src/config/elasticsearch.ts b/src/config/elasticsearch.ts new file mode 100644 index 00000000..436a4686 --- /dev/null +++ b/src/config/elasticsearch.ts @@ -0,0 +1,4 @@ +export default { + node: process.env.ELASTIC_NODE_URL, + indexName: process.env.ELASTIC_INDEX_NAME, +} \ No newline at end of file diff --git a/src/elastic.ts b/src/elastic.ts new file mode 100644 index 00000000..5a213cac --- /dev/null +++ b/src/elastic.ts @@ -0,0 +1,65 @@ +import config from './config/elasticsearch'; +import { Client } from '@elastic/elasticsearch'; + +class Elastic { + client: Client; + indexName: string; + + constructor({ node, indexName }) { + this.client = new Client({ node }); + this.indexName = indexName; + } + + async setupIndex() { + const indexExists = await this.client.indices.exists({ index: this.indexName }); + if (!indexExists) { + await this.client.indices.create({ + index: this.indexName, + body: { + mappings: { + properties: { + pdf: { type: 'binary' }, + report: { type: 'object' }, + state: { type: 'keyword' } + } + } + } + }); + console.log(`Index ${this.indexName} created.`); + } else { + console.log(`Index ${this.indexName} already exists.`); + } + } + + async indexDocument(documentId: string, pdfContent: string, reportData: object = null) { + const docBody = { + pdf: pdfContent, + report: reportData, + state: 'pending' + }; + + await this.client.index({ + index: this.indexName, + id: documentId, + body: docBody + }); + + console.log(`Document ${documentId} indexed.`); + } + + async updateDocumentState(documentId: string, newState: string) { + await this.client.update({ + index: this.indexName, + id: documentId, + body: { + doc: { + state: newState + } + } + }); + + console.log(`Document ${documentId} state updated to ${newState}.`); + } +} + +export default new Elastic(config) diff --git a/src/index.ts b/src/index.ts index e0925abd..69d49932 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,6 +7,7 @@ import { BullMQAdapter } from '@bull-board/api/bullMQAdapter' import { ExpressAdapter } from '@bull-board/express' import discord from './discord' +import elastic from './elastic' // keep this line, otherwise the workers won't be started import * as workers from './workers' @@ -56,6 +57,8 @@ createBullBoard({ }) const app = express() + +elastic.setupIndex() discord.login() app.use('/admin/queues', serverAdapter.getRouter()) diff --git a/src/workers/discordReview.ts b/src/workers/discordReview.ts index c1f53fd1..e2ad9f4d 100644 --- a/src/workers/discordReview.ts +++ b/src/workers/discordReview.ts @@ -10,14 +10,19 @@ import { ModalActionRowComponentBuilder, TextInputStyle, } from 'discord.js' +import { Client } from '@elastic/elasticsearch'; + +const esClient = new Client({ + node: 'http://elasticsearch.data-pipeline.svc.cluster.local:9200' +}); class JobData extends Job { data: { - url: string - json: string - channelId: string - messageId: string - } + url: string; + json: string; + channelId: string; + messageId: string; + }; } const worker = new Worker( diff --git a/src/workers/downloadPDF.ts b/src/workers/downloadPDF.ts index 89f78719..7dc41e69 100644 --- a/src/workers/downloadPDF.ts +++ b/src/workers/downloadPDF.ts @@ -4,6 +4,11 @@ import pdf from 'pdf-parse' import { splitText } from '../queues' import discord from '../discord' import { TextChannel } from 'discord.js' +import { Client } from '@elastic/elasticsearch'; + +const esClient = new Client({ + node: 'http://elasticsearch.data-pipeline.svc.cluster.local:9200' +}); class JobData extends Job { data: { From 65c50dbb7ddddfe373d39b8b55a4ebc250cef3ad Mon Sep 17 00:00:00 2001 From: William Ryder Date: Sat, 17 Feb 2024 00:10:42 +0100 Subject: [PATCH 2/2] fix: add url to es index --- src/elastic.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/elastic.ts b/src/elastic.ts index 5a213cac..fa8857aa 100644 --- a/src/elastic.ts +++ b/src/elastic.ts @@ -18,6 +18,7 @@ class Elastic { body: { mappings: { properties: { + url: { type: 'keyword' }, pdf: { type: 'binary' }, report: { type: 'object' }, state: { type: 'keyword' }