Skip to content

Commit

Permalink
Merge pull request #12 from radekdymacz/singlestore
Browse files Browse the repository at this point in the history
  • Loading branch information
kyegomez authored Dec 10, 2024
2 parents 58f5eee + 589bda7 commit 3193cd0
Show file tree
Hide file tree
Showing 9 changed files with 933 additions and 6 deletions.
19 changes: 18 additions & 1 deletion .env.examples
Original file line number Diff line number Diff line change
@@ -1,2 +1,19 @@
# Pinecone Configuration
PINECONE_API_KEYS="your_pinecone_api_key"
BASE_SWARMS_MEMORY_URL="http:"

# Base URL Configuration
BASE_SWARMS_MEMORY_URL="http:"

# SingleStore Configuration
# Host can be localhost for local development or your SingleStore deployment URL
SINGLESTORE_HOST="your_singlestore_host" # e.g., "localhost" or "svc-123-xyz.aws.singlestore.com"

# Default port is 3306, but might be different for your deployment
SINGLESTORE_PORT="3306"

# Your SingleStore user credentials
SINGLESTORE_USER="your_singlestore_username" # e.g., "admin"
SINGLESTORE_PASSWORD="your_singlestore_password"

# Database name where vector tables will be created
SINGLESTORE_DATABASE="your_database_name" # e.g., "vector_store"
76 changes: 72 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

<div align="center">
<a href="https://swarms.world">
<h1>Swarms Memory</h1>
Expand Down Expand Up @@ -39,8 +38,9 @@ Here's a more detailed and larger table with descriptions and website links for
| **ChromaDB** | Available | A high-performance, distributed database optimized for handling large-scale AI tasks. | [ChromaDB Documentation](swarms_memory/memory/chromadb.md) | [ChromaDB](https://chromadb.com) |
| **Pinecone** | Available | A fully managed vector database that makes it easy to add vector search to your applications. | [Pinecone Documentation](swarms_memory/memory/pinecone.md) | [Pinecone](https://pinecone.io) |
| **Redis** | Coming Soon | An open-source, in-memory data structure store, used as a database, cache, and message broker. | [Redis Documentation](swarms_memory/memory/redis.md) | [Redis](https://redis.io) |
| **Faiss** | Coming Soon | A library for efficient similarity search and clustering of dense vectors, developed by Facebook AI. | [Faiss Documentation](swarms_memory/memory/faiss.md) | [Faiss](https://faiss.ai) |
| **HNSW** | Coming Soon | A graph-based algorithm for approximate nearest neighbor search, known for its speed and accuracy. | [HNSW Documentation](swarms_memory/memory/hnsw.md) | [HNSW](https://github.com/nmslib/hnswlib) |
| **Faiss** | Available | A library for efficient similarity search and clustering of dense vectors, developed by Facebook AI. | [Faiss Documentation](swarms_memory/memory/faiss.md) | [Faiss](https://faiss.ai) |
| **SingleStore**| Available | A distributed SQL database that provides high-performance vector similarity search. | [SingleStore Documentation](swarms_memory/memory/singlestore.md) | [SingleStore](https://www.singlestore.com) |
| **HNSW** | Coming Soon | A graph-based algorithm for approximate nearest neighbor search. | [HNSW Documentation](swarms_memory/memory/hnsw.md) | [HNSW](https://github.com/nmslib/hnswlib) |

This table includes a brief description of each system, their current status, links to their documentation, and their respective websites for further information.

Expand Down Expand Up @@ -259,6 +259,75 @@ for result in results:
```


### SingleStore
```python
from swarms_memory.vector_dbs.singlestore_wrapper import SingleStoreDB

# Initialize SingleStore with environment variables
db = SingleStoreDB(
host="your_host",
port=3306,
user="your_user",
password="your_password",
database="your_database",
table_name="example_vectors",
dimension=768, # Default dimension for all-MiniLM-L6-v2
namespace="example"
)

# Custom embedding function example (optional)
def custom_embedding_function(text: str) -> List[float]:
# Your custom embedding logic here
return embeddings

# Initialize with custom functions
db = SingleStoreDB(
host="your_host",
port=3306,
user="your_user",
password="your_password",
database="your_database",
table_name="example_vectors",
dimension=768,
namespace="example",
embedding_function=custom_embedding_function,
preprocess_function=lambda x: x.lower(), # Simple preprocessing
postprocess_function=lambda x: sorted(x, key=lambda k: k['similarity'], reverse=True) # Sort by similarity
)

# Add documents with metadata
doc_id = db.add(
document="SingleStore is a distributed SQL database that combines horizontal scalability with ACID guarantees.",
metadata={"source": "docs", "category": "database"}
)

# Query similar documents
results = db.query(
query="How does SingleStore scale?",
top_k=3,
metadata_filter={"source": "docs"}
)

# Process results
for result in results:
print(f"Document: {result['document']}")
print(f"Similarity: {result['similarity']:.4f}")
print(f"Metadata: {result['metadata']}\n")

# Delete a document
db.delete(doc_id)

# Key features:
# - Built on SingleStore's native vector similarity search
# - Supports custom embedding models and functions
# - Automatic table creation with optimized vector indexing
# - Metadata filtering for refined searches
# - Document preprocessing and postprocessing
# - Namespace support for document organization
# - SSL support for secure connections

# For more examples, see the [SingleStore example](examples/singlestore_wrapper_example.py).
```
# License
MIT

Expand All @@ -275,4 +344,3 @@ Please cite Swarms in your paper or your project if you found it beneficial in a
note = {Accessed: Date}
}
```

60 changes: 60 additions & 0 deletions examples/singlestore_wrapper_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import os
from dotenv import load_dotenv
from swarms_memory.vector_dbs.singlestore_wrapper import SingleStoreDB

# Load environment variables
load_dotenv()

def main():
# Initialize SingleStore with environment variables
db = SingleStoreDB(
host=os.getenv("SINGLESTORE_HOST"),
port=int(os.getenv("SINGLESTORE_PORT", "3306")),
user=os.getenv("SINGLESTORE_USER"),
password=os.getenv("SINGLESTORE_PASSWORD"),
database=os.getenv("SINGLESTORE_DATABASE"),
table_name="example_vectors",
dimension=768, # Default dimension for all-MiniLM-L6-v2
namespace="example"
)

# Example documents
documents = [
"SingleStore is a distributed SQL database that combines the horizontal scalability of NoSQL systems with the ACID guarantees of traditional RDBMSs.",
"Vector similarity search in SingleStore uses DOT_PRODUCT distance type for efficient nearest neighbor queries.",
"SingleStore supports both row and column store formats, making it suitable for both transactional and analytical workloads."
]

# Add documents to the database
doc_ids = []
for doc in documents:
doc_id = db.add(
document=doc,
metadata={"source": "example", "type": "documentation"}
)
doc_ids.append(doc_id)
print(f"Added document with ID: {doc_id}")

# Query similar documents
query = "How does SingleStore handle vector similarity search?"
results = db.query(
query=query,
top_k=2,
metadata_filter={"source": "example"}
)

print("\nQuery:", query)
print("\nResults:")
for result in results:
print(f"\nDocument: {result['document']}")
print(f"Similarity: {result['similarity']:.4f}")
print(f"Metadata: {result['metadata']}")

# Clean up - delete documents
print("\nCleaning up...")
for doc_id in doc_ids:
db.delete(doc_id)
print(f"Deleted document with ID: {doc_id}")

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pinecone = "*"
faiss-cpu = "*"
pydantic = "*"
sqlalchemy = "*"
singlestoredb = "*"



Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ pinecone
faiss-cpu
torch
pydantic
sqlalchemy
sqlalchemy
singlestoredb
Loading

0 comments on commit 3193cd0

Please sign in to comment.