Skip to content

Commit a3f956a

Browse files
authored
Use milvus gpu with GPU_IVF_FLAT flag (NVIDIA#243)
* Use GPU flag for document ingestion as default * Port milvus changes to release branch
1 parent fc80791 commit a3f956a

File tree

3 files changed

+15
-4
lines changed

3 files changed

+15
-4
lines changed

RAG/examples/local_deploy/docker-compose-vectordb.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ services:
5454

5555
milvus:
5656
container_name: milvus-standalone
57-
image: milvusdb/milvus:v2.4.12
57+
image: milvusdb/milvus:v2.4.15-gpu
5858
command: ["milvus", "run", "standalone"]
5959
environment:
6060
ETCD_ENDPOINTS: etcd:2379
@@ -74,6 +74,13 @@ services:
7474
depends_on:
7575
- "etcd"
7676
- "minio"
77+
deploy:
78+
resources:
79+
reservations:
80+
devices:
81+
- driver: nvidia
82+
capabilities: ["gpu"]
83+
device_ids: ['${VECTORSTORE_GPU_DEVICE_ID:-0}']
7784
profiles: ["nemo-retriever", "milvus", ""]
7885

7986
elasticsearch:

RAG/src/chain_server/configuration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class VectorStoreConfig(ConfigWizard):
4040
"nprobe", default=16, help_txt="Number of units to query", # IVF Flat milvus
4141
)
4242
index_type: str = configfield(
43-
"index_type", default="IVF_FLAT", help_txt="Index of the vector db", # IVF Flat for milvus
43+
"index_type", default="GPU_IVF_FLAT", help_txt="Index of the vector db", # IVF Flat for milvus
4444
)
4545

4646

RAG/src/chain_server/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,13 +314,17 @@ def create_vectorstore_langchain(document_embedder: "Embeddings", collection_nam
314314
)
315315
elif config.vector_store.name == "milvus":
316316
logger.info(f"Using milvus collection: {collection_name}")
317-
# vectorstore url can be updated using environment variable APP_VECTORSTORE_URL, it should be in http://ip:port format
317+
if not collection_name:
318+
collection_name = os.getenv('COLLECTION_NAME', "vector_db")
319+
logger.info(f"Using milvus collection: {collection_name}")
318320
url = urlparse(config.vector_store.url)
319321
vectorstore = Milvus(
320322
document_embedder,
321323
connection_args={"host": url.hostname, "port": url.port},
322324
collection_name=collection_name,
323-
auto_id=True,
325+
index_params={"index_type": config.vector_store.index_type, "metric_type": "L2", "nlist": config.vector_store.nlist},
326+
search_params={"nprobe": config.vector_store.nprobe},
327+
auto_id = True
324328
)
325329
else:
326330
raise ValueError(f"{config.vector_store.name} vector database is not supported")

0 commit comments

Comments
 (0)