-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathembeddings_cc_index.py
110 lines (93 loc) · 4.26 KB
/
embeddings_cc_index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# embeddings.cc index API
# https://github.com/dice-group/embeddings.cc
#
# Installation of required packages:
# https://anaconda.org/conda-forge/httpx
# https://anaconda.org/jmcmurray/json
# conda install -c conda-forge httpx
# conda install -c jmcmurray json
import httpx
import json
class EmbeddingsCcIndex():
WEBSERVICE_URL = 'http://embeddings.cs.uni-paderborn.de:8008'
def __init__(self, webservice_url=None):
if webservice_url is None:
self.webservice_url = self.WEBSERVICE_URL
else:
self.webservice_url = webservice_url
self.headers_json = {'Content-Type': 'application/json'}
# ----------| GET requests without password |-----------------------------------------------------------------------
def ping(self, seconds=1):
"""
Returns:
200: Success
502: Webservice unavailable (also if not in UPB network, check VPN)
503: Elasticsearch service unavailable
"""
try:
return httpx.get(self.webservice_url + '/ping', timeout=seconds).status_code
except httpx.RequestError as exc:
return 502
def count(self, index):
"""
Returns number of documents in Elasticsearch index.
"""
return httpx.get(self.webservice_url + '/count', params={'index': index})
def get_embeddings(self, index, entity):
"""
Searches for an entity in Elasticsearch and returns related embeddings.
"""
return httpx.get(self.webservice_url + '/get_embeddings', params={'index': index, 'entity': entity})
# ----------| POST requests with password |-------------------------------------------------------------------------
def get_max_cpu_usage(self, password):
"""
Gets the maximum CPU useage of ES nodes
"""
return httpx.post(self.webservice_url + '/get_max_cpu_usage', params={'password': password})
def get_indexes(self, password):
"""
Returns webservice response containing existing Elasticsearch indexes.
"""
return httpx.post(self.webservice_url + '/get_indexes', params={'password': password})
def create_index(self, password, index, dimensions, shards=5):
"""
Creates an Elasticsearch index and returns Elasticsearch API response.
"""
return httpx.post(self.webservice_url + '/create_index',
params={'password': password, 'index': index, 'dimensions': dimensions,
'shards': shards})
def create_index_usagelog(self, password):
"""
Creates the Elasticsearch index 'usagelog' and returns Elasticsearch API response.
"""
return httpx.post(self.webservice_url + '/create_index_usagelog',
params={'password': password})
def delete_index(self, password, index):
"""
Deletes an Elasticsearch index and returns Elasticsearch API response.
"""
return httpx.post(self.webservice_url + '/delete_index', params={'password': password, 'index': index})
def add(self, password, index, docs, timeout=500):
"""
Adds embeddings.
Data is transformed to JSON, so tuples and lists are handled equally.
Important: Split your data into multiple requests and wait for a response
before adding additional data. A request can take max 50,000 items.
Also important: Ensure the embeddings are in numeric format (not string).
"""
if len(docs) > 50000:
raise IndexError('Too many records')
data = json.JSONEncoder().encode({'password': password, 'index': index, 'docs': docs})
return httpx.post(self.webservice_url + '/add', data=data, headers=self.headers_json, timeout=timeout)
def alias_put(self, password, index, alias):
"""
Adds an alias for an index.
"""
return httpx.post(self.webservice_url + '/alias_put',
params={'password': password, 'index': index, 'alias': alias})
def alias_delete(self, password, index, alias):
"""
Deletes an alias of an index.
"""
return httpx.post(self.webservice_url + '/alias_delete',
params={'password': password, 'index': index, 'alias': alias})