Skip to content

Commit b467cdc

Browse files
committed
#342 first try at adding specified layer loading
1 parent ac43973 commit b467cdc

File tree

3 files changed

+51
-4
lines changed

3 files changed

+51
-4
lines changed

python_bindings/bindings.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,10 @@ class Index {
288288
return ids;
289289
}
290290

291+
py::list getFirstLayer(int layer) {
292+
return appr_alg->get_linklist_at_level(appr_alg->entry, layer);
293+
}
294+
291295

292296
py::dict getAnnData() const { /* WARNING: Index::getAnnData is not thread-safe with Index::addItems */
293297

python_bindings/tests/bindings_test_getdata.py

+43
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,46 @@ def testGettingItems(self):
4444
# After adding them, all labels should be retrievable
4545
returned_items = p.get_items(labels)
4646
self.assertSequenceEqual(data.tolist(), returned_items)
47+
48+
49+
def testGettingItems(self):
50+
print("\n**** Getting the data by layer ****\n")
51+
52+
dim = 16
53+
num_elements = 10000
54+
55+
# Generating sample data
56+
data = np.float32(np.random.random((num_elements, dim)))
57+
labels = np.arange(0, num_elements)
58+
59+
# Declaring index
60+
p = hnswlib.Index(space='l2', dim=dim) # possible options are l2, cosine or ip
61+
62+
# Initiating index
63+
# max_elements - the maximum number of elements, should be known beforehand
64+
# (probably will be made optional in the future)
65+
#
66+
# ef_construction - controls index search speed/build speed tradeoff
67+
# M - is tightly connected with internal dimensionality of the data
68+
# strongly affects the memory consumption
69+
70+
p.init_index(max_elements=num_elements, ef_construction=100, M=16)
71+
72+
# Controlling the recall by setting ef:
73+
# higher ef leads to better accuracy, but slower search
74+
p.set_ef(100)
75+
76+
p.set_num_threads(4) # by default using all available cores
77+
78+
# Before adding anything, getting any labels should fail
79+
self.assertRaises(Exception, lambda: p.get_items(labels))
80+
81+
print("Adding all elements (%d)" % (len(data)))
82+
p.add_items(data, labels)
83+
84+
# After adding them, all labels should be retrievable
85+
returned_items = p.get_items(labels)
86+
self.assertSequenceEqual(data.tolist(), returned_items)
87+
88+
data = p.getFirstLayer(layer=0)
89+
print(data)

sift_1b.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -231,22 +231,22 @@ inline bool exists_test(const std::string &name) {
231231
void sift_test1B() {
232232

233233

234-
int subset_size_milllions = 200;
234+
int subset_size_millions = 200;
235235
int efConstruction = 40;
236236
int M = 16;
237237

238238

239-
size_t vecsize = subset_size_milllions * 1000000;
239+
size_t vecsize = subset_size_millions * 1000000;
240240

241241
size_t qsize = 10000;
242242
size_t vecdim = 128;
243243
char path_index[1024];
244244
char path_gt[1024];
245245
char *path_q = "../bigann/bigann_query.bvecs";
246246
char *path_data = "../bigann/bigann_base.bvecs";
247-
sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_milllions, efConstruction, M);
247+
sprintf(path_index, "sift1b_%dm_ef_%d_M_%d.bin", subset_size_millions, efConstruction, M);
248248

249-
sprintf(path_gt, "../bigann/gnd/idx_%dM.ivecs", subset_size_milllions);
249+
sprintf(path_gt, "../bigann/gnd/idx_%dM.ivecs", subset_size_millions);
250250

251251

252252
unsigned char *massb = new unsigned char[vecdim];

0 commit comments

Comments
 (0)