From 6bb405266430a146aaa6f9f2f49605a0a19a8712 Mon Sep 17 00:00:00 2001 From: Anthony Mahanna Date: Wed, 27 Nov 2024 09:09:43 -0500 Subject: [PATCH 1/4] initial commit --- comps/vectorstores/arango/README.md | 57 ++++++++++++++++++++ comps/vectorstores/arango/__init__.py | 2 + comps/vectorstores/arango/docker-compose.yml | 11 ++++ 3 files changed, 70 insertions(+) create mode 100644 comps/vectorstores/arango/README.md create mode 100644 comps/vectorstores/arango/__init__.py create mode 100644 comps/vectorstores/arango/docker-compose.yml diff --git a/comps/vectorstores/arango/README.md b/comps/vectorstores/arango/README.md new file mode 100644 index 0000000000..ff0bf26ebf --- /dev/null +++ b/comps/vectorstores/arango/README.md @@ -0,0 +1,57 @@ +# Start ArangoDB server + +**Additional Links**: +- https://arangodb.com/2024/11/vector-search-in-arangodb-practical-insights-and-hands-on-examples/ + +## 1. Start ArangoDB via Docker + +```bash +docker run -d --name arango -p 8529:8529 -e ARANGO_ROOT_PASSWORD=test arangodb/arangodb:3.13 +``` + +## 2. Create a Vector Index + +**Using `arangosh`**: + +```bash +127.0.0.1:8529@_system > db.myCollection.ensureIndex( +{ + name: "my-vector-index", + type: "vector", + fields: ["embeddings"] + params: { metric: "cosine", dimension: 128, nLists: 100 } +} +``` + +**Using the `python-arango` driver**: + +```python +from arango import ArangoClient + +db = ArangoClient().db('_system', username='root', password='test') + +db.collection("myCollection").add_index( + { + "name": "my-vector-index", + "type": "vector", + "fields": ["embeddings"], + "params": { + "metric": "cosine", + "dimensions": 128, + "nLists": 100, + }, + } +) +``` + +## 3. Use the Vector Index via the Arango Query Language (AQL) + +```bash +LET query_embedding = [0.1, 0.3, 0.5, …] + +FOR doc IN myCollection + LET score = APPROX_NEAR_COSINE(doc.embeddings, query_embedding) + SORT score DESC + LIMIT 5 + RETURN {doc, score} +``` \ No newline at end of file diff --git a/comps/vectorstores/arango/__init__.py b/comps/vectorstores/arango/__init__.py new file mode 100644 index 0000000000..916f3a44b2 --- /dev/null +++ b/comps/vectorstores/arango/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/vectorstores/arango/docker-compose.yml b/comps/vectorstores/arango/docker-compose.yml new file mode 100644 index 0000000000..271ee1c1c4 --- /dev/null +++ b/comps/vectorstores/arango/docker-compose.yml @@ -0,0 +1,11 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + arango: + image: arangodb/arangodb:3.13 + container_name: arangodb + ports: + - 8529:8529 + environment: + ARANGO_ROOT_PASSWORD: ${ARANGO_ROOT_PASSWORD} \ No newline at end of file From 10f4e0a96ab42109b43679faaa1e99cd66ca266a Mon Sep 17 00:00:00 2001 From: Anthony Mahanna Date: Wed, 27 Nov 2024 09:38:37 -0500 Subject: [PATCH 2/4] fix: `3.12` --- comps/vectorstores/arango/README.md | 2 +- comps/vectorstores/arango/docker-compose.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/comps/vectorstores/arango/README.md b/comps/vectorstores/arango/README.md index ff0bf26ebf..fc123f3cad 100644 --- a/comps/vectorstores/arango/README.md +++ b/comps/vectorstores/arango/README.md @@ -6,7 +6,7 @@ ## 1. Start ArangoDB via Docker ```bash -docker run -d --name arango -p 8529:8529 -e ARANGO_ROOT_PASSWORD=test arangodb/arangodb:3.13 +docker run -d --name arango -p 8529:8529 -e ARANGO_ROOT_PASSWORD=test arangodb/arangodb:3.12 ``` ## 2. Create a Vector Index diff --git a/comps/vectorstores/arango/docker-compose.yml b/comps/vectorstores/arango/docker-compose.yml index 271ee1c1c4..4032df63f5 100644 --- a/comps/vectorstores/arango/docker-compose.yml +++ b/comps/vectorstores/arango/docker-compose.yml @@ -3,7 +3,7 @@ services: arango: - image: arangodb/arangodb:3.13 + image: arangodb/arangodb:3.12 container_name: arangodb ports: - 8529:8529 From 88bf906870d6bd4c33f36e3f0ec74a95cc702915 Mon Sep 17 00:00:00 2001 From: Anthony Mahanna Date: Mon, 30 Dec 2024 15:53:57 -0500 Subject: [PATCH 3/4] cleanup --- comps/vectorstores/arango/README.md | 21 ++++++++++---------- comps/vectorstores/arango/docker-compose.yml | 12 ++++++++--- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/comps/vectorstores/arango/README.md b/comps/vectorstores/arango/README.md index fc123f3cad..8ee595ef62 100644 --- a/comps/vectorstores/arango/README.md +++ b/comps/vectorstores/arango/README.md @@ -6,7 +6,7 @@ ## 1. Start ArangoDB via Docker ```bash -docker run -d --name arango -p 8529:8529 -e ARANGO_ROOT_PASSWORD=test arangodb/arangodb:3.12 +docker run -d --name arangodb -p 8529:8529 -e ARANGO_ROOT_PASSWORD=openSesame arangodb/arangodb:3.12 ``` ## 2. Create a Vector Index @@ -15,12 +15,13 @@ docker run -d --name arango -p 8529:8529 -e ARANGO_ROOT_PASSWORD=test arangodb/a ```bash 127.0.0.1:8529@_system > db.myCollection.ensureIndex( -{ - name: "my-vector-index", - type: "vector", - fields: ["embeddings"] - params: { metric: "cosine", dimension: 128, nLists: 100 } -} + { + name: "my-vector-index", + type: "vector", + fields: ["embedding"] + params: { metric: "cosine", dimension: 1024, nLists: 100 } + } +) ``` **Using the `python-arango` driver**: @@ -28,7 +29,7 @@ docker run -d --name arango -p 8529:8529 -e ARANGO_ROOT_PASSWORD=test arangodb/a ```python from arango import ArangoClient -db = ArangoClient().db('_system', username='root', password='test') +db = ArangoClient(hosts="http://localhost:8529").db('_system', username='root', password='openSesame') db.collection("myCollection").add_index( { @@ -37,7 +38,7 @@ db.collection("myCollection").add_index( "fields": ["embeddings"], "params": { "metric": "cosine", - "dimensions": 128, + "dimension": 1024, "nLists": 100, }, } @@ -50,7 +51,7 @@ db.collection("myCollection").add_index( LET query_embedding = [0.1, 0.3, 0.5, …] FOR doc IN myCollection - LET score = APPROX_NEAR_COSINE(doc.embeddings, query_embedding) + LET score = APPROX_NEAR_COSINE(doc.embedding, query_embedding) SORT score DESC LIMIT 5 RETURN {doc, score} diff --git a/comps/vectorstores/arango/docker-compose.yml b/comps/vectorstores/arango/docker-compose.yml index 4032df63f5..824698f185 100644 --- a/comps/vectorstores/arango/docker-compose.yml +++ b/comps/vectorstores/arango/docker-compose.yml @@ -2,10 +2,16 @@ # SPDX-License-Identifier: Apache-2.0 services: - arango: + arangodb: image: arangodb/arangodb:3.12 container_name: arangodb ports: - - 8529:8529 + - "8529:8529" environment: - ARANGO_ROOT_PASSWORD: ${ARANGO_ROOT_PASSWORD} \ No newline at end of file + - ARANGO_ROOT_PASSWORD=openSesame + volumes: + - arango_data:/var/lib/arangodb3 + +volumes: + arango_data: + driver: local \ No newline at end of file From 04152c3e9c1e9c7c6c0b90ec05bc5f91c3a598eb Mon Sep 17 00:00:00 2001 From: Anthony Mahanna Date: Wed, 29 Jan 2025 15:13:34 -0500 Subject: [PATCH 4/4] fix: `--experimental-vector-index true` --- comps/vectorstores/arango/README.md | 2 +- comps/vectorstores/arango/docker-compose.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/comps/vectorstores/arango/README.md b/comps/vectorstores/arango/README.md index 8ee595ef62..dfcbf16f7d 100644 --- a/comps/vectorstores/arango/README.md +++ b/comps/vectorstores/arango/README.md @@ -6,7 +6,7 @@ ## 1. Start ArangoDB via Docker ```bash -docker run -d --name arangodb -p 8529:8529 -e ARANGO_ROOT_PASSWORD=openSesame arangodb/arangodb:3.12 +docker run -d --name arangodb -p 8529:8529 -e ARANGO_ROOT_PASSWORD=openSesame arangodb/arangodb:3.12 --experimental-vector-index true ``` ## 2. Create a Vector Index diff --git a/comps/vectorstores/arango/docker-compose.yml b/comps/vectorstores/arango/docker-compose.yml index 824698f185..74b4d65006 100644 --- a/comps/vectorstores/arango/docker-compose.yml +++ b/comps/vectorstores/arango/docker-compose.yml @@ -11,6 +11,7 @@ services: - ARANGO_ROOT_PASSWORD=openSesame volumes: - arango_data:/var/lib/arangodb3 + command: ["--experimental-vector-index=true"] volumes: arango_data: