forked from gmpetrov/databerry
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatastore-remove-orphans.ts
85 lines (69 loc) · 1.88 KB
/
datastore-remove-orphans.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import axios from 'axios';
import bulkDeleteDatasources from './utils/bulk-delete-datasources';
import { DatastoreManager } from './utils/datastores';
import prisma from './utils/prisma-client';
const datastoreRemoveOrphans = async (datastoreId: string) => {
const client = axios.create({
baseURL: process.env.QDRANT_API_URL,
headers: {
'api-key': process.env.QDRANT_API_KEY,
},
});
const datastore = await prisma.datastore.findUnique({
where: {
id: datastoreId,
},
});
const datasoures = await prisma.appDatasource.findMany({
where: {
datastoreId,
},
select: {
id: true,
},
});
const ids = datasoures.map((each) => each.id);
let fetchedAll = false;
let cursor = '';
const manager = new DatastoreManager(datastore!);
console.log(`${datasoures.length} Datasources found in DB`);
while (!fetchedAll) {
const { data } = await client.post(
'collections/text-embedding-ada-002/points/scroll',
{
...(cursor ? { offset: cursor } : {}),
limit: 1000,
filter: {
must: [
{
key: 'datastore_id',
match: {
value: datastoreId,
},
},
],
},
with_payload: true,
with_vector: false,
}
);
const batchIds = new Set<string>();
for (const each of data?.result?.points) {
batchIds.add(each.payload?.datasource_id);
}
const zombies = Array.from(batchIds).filter((each) => !ids.includes(each));
if (zombies.length > 0) {
console.log('zombies', zombies);
// await manager.removeBulk(zombies);
await bulkDeleteDatasources({
datastoreId,
datasourceIds: zombies,
});
}
cursor = data?.result?.next_page_offset;
if (!cursor) {
fetchedAll = true;
}
}
};
export default datastoreRemoveOrphans;