Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

community: Add support for Upstash Vector #17012

Closed
wants to merge 38 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
2a70916
Add support for Upstash Vector
ytkimirti Feb 2, 2024
8b1a907
Add integration tests
ytkimirti Feb 4, 2024
1e77f54
Add info api
ytkimirti Feb 4, 2024
82e38c5
Add name to indexing.ipynb
ytkimirti Feb 4, 2024
d319370
Merge remote-tracking branch 'upstream/master'
ytkimirti Feb 4, 2024
763c59b
Fix formatting
ytkimirti Feb 4, 2024
f63414d
Fix formatting
ytkimirti Feb 5, 2024
4b7ed74
Add example env vars
ytkimirti Feb 5, 2024
0d42775
Fix iteration
ytkimirti Feb 5, 2024
894d9a1
Add env vars to scheduled tests workflow file
ytkimirti Feb 5, 2024
2e50779
Formatting
ytkimirti Feb 5, 2024
6a8ed63
Remove skip
ytkimirti Feb 5, 2024
6518ae7
Add async implementations of functions
ytkimirti Feb 7, 2024
4d7dfef
Remove upstash keys from yaml file
ytkimirti Feb 8, 2024
4e3da15
Merge branch 'master-up'
ytkimirti Feb 8, 2024
3586def
Fixes, finalize integration tests
ytkimirti Feb 9, 2024
fb2277e
Merge branch 'master-up'
ytkimirti Feb 9, 2024
7d32e02
Remove optional from text_key in constructor
ytkimirti Feb 13, 2024
13bc7f2
Merge branch 'master-up'
ytkimirti Feb 13, 2024
7be8f62
Remove forgotten print
ytkimirti Feb 16, 2024
9d94e98
Add upstash docs notebook
ytkimirti Feb 16, 2024
1226916
Add async version of add_texts
ytkimirti Feb 18, 2024
1d5c172
Small cleanup
ytkimirti Feb 18, 2024
16a73e0
Update docs
ytkimirti Feb 18, 2024
699d24d
Merge branch 'master-up'
ytkimirti Feb 18, 2024
ea654ad
Cleanup
ytkimirti Feb 18, 2024
81ea626
Better description
ytkimirti Feb 18, 2024
9ab3ce3
Fix naming
ytkimirti Feb 18, 2024
2fd9651
Add support for with relevance scores functions
ytkimirti Feb 18, 2024
4dc962a
Add missing ids
ytkimirti Feb 18, 2024
073f754
Improve tests
ytkimirti Feb 18, 2024
1eb0a95
Add integration tests for async methods
ytkimirti Feb 18, 2024
19b441e
Improve formatting for the notebook
ytkimirti Feb 18, 2024
f20aade
Remove output in the notebook
ytkimirti Feb 18, 2024
7a00146
Fix formatting
ytkimirti Feb 18, 2024
8f9a06e
Fix formatting
ytkimirti Feb 18, 2024
f72a3e6
Fix formatting
ytkimirti Feb 20, 2024
660419e
Merge branch 'master-up'
ytkimirti Feb 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix iteration
  • Loading branch information
ytkimirti committed Feb 5, 2024
commit 0d42775b49b8a6b766e08a636693bfa70ac16e53
24 changes: 6 additions & 18 deletions libs/community/langchain_community/vectorstores/upstash.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,27 +159,17 @@ def add_texts(
for metadata, text in zip(metadatas, texts):
metadata[self._text_key] = text

# For loops to avoid memory issues and optimize when using HTTP based embeddings
# The first loop runs the embeddings, it benefits when using OpenAI embeddings
# The second loops runs the pinecone upsert asynchronously.
for i in range(0, len(texts), embedding_chunk_size):
chunk_texts = texts[i: i + embedding_chunk_size]
chunk_ids = ids[i: i + embedding_chunk_size]
chunk_metadatas = metadatas[i: i + embedding_chunk_size]
embeddings = self._embed_documents(chunk_texts)

async_res = [
self._index.upsert(
vectors=batch,
**kwargs,
)
async def upsert_all():
for batch in batch_iterate(
batch_size, zip(chunk_ids, embeddings, chunk_metadatas)
)
]

async def upsert_all():
return await asyncio.gather(*async_res)
):
await self._index.upsert(vectors=batch)

asyncio.run(upsert_all())

Expand Down Expand Up @@ -293,9 +283,8 @@ def max_marginal_relevance_search_by_vector(
)
selected = [results[i].metadata for i in mmr_selected]
return [
# type: ignore since include_metadata=True
Document(page_content=metadata.pop(
(self._text_key)), metadata=metadata)
(self._text_key)), metadata=metadata) # type: ignore since include_metadata=True
for metadata in selected
]

Expand Down Expand Up @@ -389,9 +378,8 @@ def delete(
if delete_all:
self._index.reset()
elif ids is not None:
for i in range(0, len(ids), batch_size):
chunk = ids[i: i + batch_size]
self._index.delete(ids=chunk)
for batch in batch_iterate(batch_size, ids):
self._index.delete(ids=batch)
else:
raise ValueError("Either ids or delete_all should be provided")

Expand Down