Skip to content

Commit 7e4e0b8

Browse files
committed
rename "metadata" to "config" to match benchmark wordings, add chromadb volume, fix CHROMA_WORKERS, allow more chromadb versions in pyproject.toml
1 parent 8dfad2f commit 7e4e0b8

File tree

8 files changed

+27
-29
lines changed

8 files changed

+27
-29
lines changed

benchmark/convert.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def main():
6060
data = json.load(open(search_file))
6161
experiment_name = data["params"]["experiment"]
6262
dataset_name = data["params"]["dataset"]
63-
engine_params = data["params"].get("config", {})
63+
engine_params = data["params"]["config"]
6464
parallel = data["params"]["parallel"]
6565
engine_name = data["params"]["engine"]
6666

engine/clients/chroma/configure.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def recreate(self, dataset: Dataset, collection_params):
3737
params = self.collection_params
3838
params["metadata"] = dict(
3939
{"hnsw:space": self.DISTANCE_MAPPING.get(dataset.config.distance)},
40-
**params.get("metadata", {}),
40+
**params.pop("config", {}),
4141
)
4242
self.client.create_collection(
4343
name=CHROMA_COLLECTION_NAME,

engine/clients/chroma/parser.py

-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def build_condition(
2727
condition = {**condition, **or_subfilters[0]}
2828

2929
return condition
30-
# return {k: v for d in [flt for xs in [and_subfilters, or_subfilters] for flt in xs] for k, v in d.items()}
3130

3231
def build_exact_match_filter(self, field_name: str, value: FieldValue) -> Where:
3332
return {field_name: value}

engine/clients/chroma/search.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,5 @@ def search_one(cls, query: Query, top: int) -> List[Tuple[int, float]]:
4040
def setup_search(self):
4141
metadata = self.collection.metadata.copy()
4242
metadata.pop("hnsw:space", None) # Not allowed in the collection.modify method
43-
metadata.update(self.search_params.get("metadata", {}))
43+
metadata.update(self.search_params.get("config", {}))
4444
self.collection.modify(metadata=metadata)

engine/clients/chroma/upload.py

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def init_client(cls, host, distance, connection_params, upload_params):
2222

2323
@classmethod
2424
def upload_batch(cls, batch: List[Record]):
25+
# assert len(batch) <= cls.client.get_max_batch_size() # commented for performance reasons
2526
ids, vectors, payloads = [], [], []
2627
for point in batch:
2728
ids.append(str(point.id))

engine/servers/chroma-single-node/docker-compose.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
services:
22
chromadb_bench:
33
image: ${CONTAINER_REGISTRY:-docker.io}/chromadb/chroma:0.5.7
4-
#volumes:
5-
# - ./chromadb:/chroma/chroma
4+
volumes:
5+
- ./chromadb:/chroma/chroma
66
ports:
77
- "8000:8000"
88
logging:
@@ -13,7 +13,7 @@ services:
1313
environment:
1414
IS_PERSISTENT: TRUE
1515
ANONYMIZED_TELEMETRY: False
16-
CHROMA_WORKERS: 2
16+
CHROMA_WORKERS: 1
1717
deploy:
1818
resources:
1919
limits:

experiments/configurations/chroma-single-node.json

+19-21
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
"connection_params": {},
66
"collection_params": {},
77
"search_params": [
8-
{
9-
"parallel": 8
10-
}
8+
{ "parallel": 8, "config": {} }
119
],
1210
"upload_params": {
1311
"parallel": 16,
@@ -19,14 +17,14 @@
1917
"engine": "chroma",
2018
"connection_params": {},
2119
"collection_params": {
22-
"metadata": {
20+
"config": {
2321
"hnsw:M": 16,
2422
"hnsw:construction_ef": 128
2523
}
2624
},
2725
"search_params": [
28-
{ "parallel": 1, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 512 } },
29-
{ "parallel": 100, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 512 } }
26+
{ "parallel": 1, "config": {"hnsw:search_ef": 128 } }, { "parallel": 1, "config": {"hnsw:search_ef": 256 } }, { "parallel": 1, "config": {"hnsw:search_ef": 512 } },
27+
{ "parallel": 100, "config": {"hnsw:search_ef": 128 } }, { "parallel": 100, "config": {"hnsw:search_ef": 256 } }, { "parallel": 100, "config": {"hnsw:search_ef": 512 } }
3028
],
3129
"upload_params": { "parallel": 16 }
3230
},
@@ -35,14 +33,14 @@
3533
"engine": "chroma",
3634
"connection_params": {},
3735
"collection_params": {
38-
"metadata": {
36+
"config": {
3937
"hnsw:M": 32,
4038
"hnsw:construction_ef": 128
4139
}
4240
},
4341
"search_params": [
44-
{ "parallel": 1, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 512 } },
45-
{ "parallel": 100, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 512 } }
42+
{ "parallel": 1, "config": {"hnsw:search_ef": 128 } }, { "parallel": 1, "config": {"hnsw:search_ef": 256 } }, { "parallel": 1, "config": {"hnsw:search_ef": 512 } },
43+
{ "parallel": 100, "config": {"hnsw:search_ef": 128 } }, { "parallel": 100, "config": {"hnsw:search_ef": 256 } }, { "parallel": 100, "config": {"hnsw:search_ef": 512 } }
4644
],
4745
"upload_params": { "parallel": 16 }
4846
},
@@ -51,14 +49,14 @@
5149
"engine": "chroma",
5250
"connection_params": {},
5351
"collection_params": {
54-
"metadata": {
52+
"config": {
5553
"hnsw:M": 32,
5654
"hnsw:construction_ef": 256
5755
}
5856
},
5957
"search_params": [
60-
{ "parallel": 1, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 512 } },
61-
{ "parallel": 100, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 512 } }
58+
{ "parallel": 1, "config": {"hnsw:search_ef": 128 } }, { "parallel": 1, "config": {"hnsw:search_ef": 256 } }, { "parallel": 1, "config": {"hnsw:search_ef": 512 } },
59+
{ "parallel": 100, "config": {"hnsw:search_ef": 128 } }, { "parallel": 100, "config": {"hnsw:search_ef": 256 } }, { "parallel": 100, "config": {"hnsw:search_ef": 512 } }
6260
],
6361
"upload_params": { "parallel": 16 }
6462
},
@@ -67,14 +65,14 @@
6765
"engine": "chroma",
6866
"connection_params": {},
6967
"collection_params": {
70-
"metadata": {
68+
"config": {
7169
"hnsw:M": 32,
7270
"hnsw:construction_ef": 512
7371
}
7472
},
7573
"search_params": [
76-
{ "parallel": 1, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 512 } },
77-
{ "parallel": 100, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 512 } }
74+
{ "parallel": 1, "config": {"hnsw:search_ef": 128 } }, { "parallel": 1, "config": {"hnsw:search_ef": 256 } }, { "parallel": 1, "config": {"hnsw:search_ef": 512 } },
75+
{ "parallel": 100, "config": {"hnsw:search_ef": 128 } }, { "parallel": 100, "config": {"hnsw:search_ef": 256 } }, { "parallel": 100, "config": {"hnsw:search_ef": 512 } }
7876
],
7977
"upload_params": { "parallel": 16 }
8078
},
@@ -83,14 +81,14 @@
8381
"engine": "chroma",
8482
"connection_params": {},
8583
"collection_params": {
86-
"metadata": {
84+
"config": {
8785
"hnsw:M": 64,
8886
"hnsw:construction_ef": 256
8987
}
9088
},
9189
"search_params": [
92-
{ "parallel": 1, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 512 } },
93-
{ "parallel": 100, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 512 } }
90+
{ "parallel": 1, "config": {"hnsw:search_ef": 128 } }, { "parallel": 1, "config": {"hnsw:search_ef": 256 } }, { "parallel": 1, "config": {"hnsw:search_ef": 512 } },
91+
{ "parallel": 100, "config": {"hnsw:search_ef": 128 } }, { "parallel": 100, "config": {"hnsw:search_ef": 256 } }, { "parallel": 100, "config": {"hnsw:search_ef": 512 } }
9492
],
9593
"upload_params": { "parallel": 16 }
9694
},
@@ -99,14 +97,14 @@
9997
"engine": "chroma",
10098
"connection_params": {},
10199
"collection_params": {
102-
"metadata": {
100+
"config": {
103101
"hnsw:M": 64,
104102
"hnsw:construction_ef": 512
105103
}
106104
},
107105
"search_params": [
108-
{ "parallel": 1, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 1, "metadata": {"hnsw:search_ef": 512 } },
109-
{ "parallel": 100, "metadata": {"hnsw:search_ef": 128 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 256 } }, { "parallel": 100, "metadata": {"hnsw:search_ef": 512 } }
106+
{ "parallel": 1, "config": {"hnsw:search_ef": 128 } }, { "parallel": 1, "config": {"hnsw:search_ef": 256 } }, { "parallel": 1, "config": {"hnsw:search_ef": 512 } },
107+
{ "parallel": 100, "config": {"hnsw:search_ef": 128 } }, { "parallel": 100, "config": {"hnsw:search_ef": 256 } }, { "parallel": 100, "config": {"hnsw:search_ef": 512 } }
110108
],
111109
"upload_params": { "parallel": 16 }
112110
}

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ opensearch-py = "^2.3.2"
2020
tqdm = "^4.66.1"
2121
psycopg = {extras = ["binary"], version = "^3.1.17"}
2222
pgvector = "^0.2.4"
23-
chromadb = "0.5.7"
23+
chromadb = { version = ">=0.5.5,<0.6.0" }
2424

2525
[tool.poetry.dev-dependencies]
2626
pre-commit = "^2.20.0"

0 commit comments

Comments
 (0)