Skip to content

Commit

Permalink
[FEAT][Faiss]
Browse files Browse the repository at this point in the history
  • Loading branch information
Kye Gomez authored and Kye Gomez committed Jul 8, 2024
1 parent 5ce2102 commit 8eb3908
Show file tree
Hide file tree
Showing 9 changed files with 430 additions and 3 deletions.
84 changes: 84 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,90 @@ print(result)
```


### Faiss

```python
from typing import List, Dict, Any
from swarms_memory.faiss_wrapper import FAISSDB


from transformers import AutoTokenizer, AutoModel
import torch


# Custom embedding function using a HuggingFace model
def custom_embedding_function(text: str) -> List[float]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
inputs = tokenizer(
text,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512,
)
with torch.no_grad():
outputs = model(**inputs)
embeddings = (
outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
)
return embeddings


# Custom preprocessing function
def custom_preprocess(text: str) -> str:
return text.lower().strip()


# Custom postprocessing function
def custom_postprocess(
results: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
for result in results:
result["custom_score"] = (
result["score"] * 2
) # Example modification
return results


# Initialize the wrapper with custom functions
wrapper = FAISSDB(
dimension=768,
index_type="Flat",
embedding_function=custom_embedding_function,
preprocess_function=custom_preprocess,
postprocess_function=custom_postprocess,
metric="cosine",
logger_config={
"handlers": [
{
"sink": "custom_faiss_rag_wrapper.log",
"rotation": "1 GB",
},
{"sink": lambda msg: print(f"Custom log: {msg}", end="")},
],
},
)

# Adding documents
wrapper.add(
"This is a sample document about artificial intelligence.",
{"category": "AI"},
)
wrapper.add(
"Python is a popular programming language for data science.",
{"category": "Programming"},
)

# Querying
results = wrapper.query("What is AI?")
for result in results:
print(
f"Score: {result['score']}, Custom Score: {result['custom_score']}, Text: {result['metadata']['text']}"
)
```


# License
MIT

Expand Down
File renamed without changes.
78 changes: 78 additions & 0 deletions examples/faiss_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from typing import List, Dict, Any
from swarms_memory.faiss_wrapper import FAISSDB


from transformers import AutoTokenizer, AutoModel
import torch


# Custom embedding function using a HuggingFace model
def custom_embedding_function(text: str) -> List[float]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")
inputs = tokenizer(
text,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512,
)
with torch.no_grad():
outputs = model(**inputs)
embeddings = (
outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
)
return embeddings


# Custom preprocessing function
def custom_preprocess(text: str) -> str:
return text.lower().strip()


# Custom postprocessing function
def custom_postprocess(
results: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
for result in results:
result["custom_score"] = (
result["score"] * 2
) # Example modification
return results


# Initialize the wrapper with custom functions
wrapper = FAISSDB(
dimension=768,
index_type="Flat",
embedding_function=custom_embedding_function,
preprocess_function=custom_preprocess,
postprocess_function=custom_postprocess,
metric="cosine",
logger_config={
"handlers": [
{
"sink": "custom_faiss_rag_wrapper.log",
"rotation": "1 GB",
},
{"sink": lambda msg: print(f"Custom log: {msg}", end="")},
],
},
)

# Adding documents
wrapper.add(
"This is a sample document about artificial intelligence.",
{"category": "AI"},
)
wrapper.add(
"Python is a popular programming language for data science.",
{"category": "Programming"},
)

# Querying
results = wrapper.query("What is AI?")
for result in results:
print(
f"Score: {result['score']}, Custom Score: {result['custom_score']}, Text: {result['metadata']['text']}"
)
File renamed without changes.
3 changes: 2 additions & 1 deletion swarms_memory/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from swarms_memory.chroma_db_wrapper import ChromaDB
from swarms_memory.pinecone_wrapper import PineconeMemory
from swarms_memory.faiss_wrapper import FAISSDB

__all__ = ["ChromaDB", "PineconeMemory"]
__all__ = ["ChromaDB", "PineconeMemory", "FAISSDB"]
1 change: 1 addition & 0 deletions swarms_memory/chroma_db_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def __init__(
*args,
**kwargs,
):
super().__init__(*args, **kwargs)
self.metric = metric
self.output_dir = output_dir
self.limit_tokens = limit_tokens
Expand Down
Loading

0 comments on commit 8eb3908

Please sign in to comment.