Skip to content

Commit

Permalink
#342 first try at adding specified layer loading
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitryKey committed Oct 10, 2021
1 parent ac43973 commit b467cdc
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 4 deletions.
4 changes: 4 additions & 0 deletions python_bindings/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,10 @@ class Index {
return ids;
}

py::list getFirstLayer(int layer) {
return appr_alg->get_linklist_at_level(appr_alg->entry, layer);
}


py::dict getAnnData() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */

Expand Down
43 changes: 43 additions & 0 deletions python_bindings/tests/bindings_test_getdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,46 @@ def testGettingItems(self):
# After adding them, all labels should be retrievable
returned_items = p.get_items(labels)
self.assertSequenceEqual(data.tolist(), returned_items)


def testGettingItems(self):
print("\n**** Getting the data by layer ****\n")

dim = 16
num_elements = 10000

# Generating sample data
data = np.float32(np.random.random((num_elements, dim)))
labels = np.arange(0, num_elements)

# Declaring index
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip

# Initiating index
# max_elements - the maximum number of elements, should be known beforehand
# (probably will be made optional in the future)
#
# ef_construction - controls index search speed/build speed tradeoff
# M - is tightly connected with internal dimensionality of the data
# strongly affects the memory consumption

p.init_index(max_elements=num_elements, ef_construction=100, M=16)

# Controlling the recall by setting ef:
# higher ef leads to better accuracy, but slower search
p.set_ef(100)

p.set_num_threads(4) # by default using all available cores

# Before adding anything, getting any labels should fail
self.assertRaises(Exception, lambda: p.get_items(labels))

print("Adding all elements (%d)" % (len(data)))
p.add_items(data, labels)

# After adding them, all labels should be retrievable
returned_items = p.get_items(labels)
self.assertSequenceEqual(data.tolist(), returned_items)

data = p.getFirstLayer(layer=0)
print(data)
8 changes: 4 additions & 4 deletions sift_1b.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,22 +231,22 @@ inline bool exists_test(const std::string &name) {
void sift_test1B() {


int subset_size_milllions = 200;
int subset_size_millions = 200;
int efConstruction = 40;
int M = 16;


size_t vecsize = subset_size_milllions * 1000000;
size_t vecsize = subset_size_millions * 1000000;

size_t qsize = 10000;
size_t vecdim = 128;
char path_index[1024];
char path_gt[1024];
char *path_q = "../bigann/bigann_query.bvecs";
char *path_data = "../bigann/bigann_base.bvecs";
sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_milllions, efConstruction, M);
sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_millions, efConstruction, M);

sprintf(path_gt, "../bigann/gnd/idx_%dM.ivecs", subset_size_milllions);
sprintf(path_gt, "../bigann/gnd/idx_%dM.ivecs", subset_size_millions);


unsigned char *massb = new unsigned char[vecdim];
Expand Down

0 comments on commit b467cdc

Please sign in to comment.