Skip to content

Commit

Permalink
Removed kwargs from add_texts (langchain-ai#7595)
Browse files Browse the repository at this point in the history
Removing **kwargs argument from add_texts method in DeepLake vectorstore
as it confuses users and doesn't fail when user is typing incorrect
parameters.

Also added small test to ensure the change is applies correctly.

Guys could pls take a look: @rlancemartin, @eyurtsev, this is a small
PR.

Thx so much!
  • Loading branch information
adolkhan authored Jul 19, 2023
1 parent 4d8b48b commit 7bb8434
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 559 deletions.
20 changes: 14 additions & 6 deletions langchain/vectorstores/deeplake.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,20 @@ def add_texts(
texts (Iterable[str]): Texts to add to the vectorstore.
metadatas (Optional[List[dict]], optional): Optional list of metadatas.
ids (Optional[List[str]], optional): Optional list of IDs.
**kwargs: other optional keyword arguments.
embedding_function (Optional[Embeddings], optional): Embedding function
to use to convert the text into embeddings.
**kwargs (Any): Any additional keyword arguments passed is not supported
by this method.
Returns:
List[str]: List of IDs of the added texts.
"""
if kwargs:
unsupported_items = "`, `".join(set(kwargs.keys()))
raise TypeError(
f"`{unsupported_items}` is/are not a valid argument to add_text method"
)

kwargs = {}
if ids:
if self._id_tensor_name == "ids": # for backwards compatibility
Expand All @@ -199,8 +208,7 @@ def add_texts(
metadata=metadatas,
embedding_data=texts,
embedding_tensor="embedding",
embedding_function=kwargs.get("embedding_function")
or self._embedding_function.embed_documents, # type: ignore
embedding_function=self._embedding_function.embed_documents, # type: ignore
return_ids=True,
**kwargs,
)
Expand Down Expand Up @@ -707,6 +715,7 @@ def from_texts(
metadatas: Optional[List[dict]] = None,
ids: Optional[List[str]] = None,
dataset_path: str = _LANGCHAIN_DEFAULT_DEEPLAKE_PATH,
embedding_function: Optional[Embeddings] = None,
**kwargs: Any,
) -> DeepLake:
"""Create a Deep Lake dataset from a raw documents.
Expand Down Expand Up @@ -753,20 +762,19 @@ def from_texts(
ValueError: If 'embedding' is provided in kwargs. This is deprecated,
please use `embedding_function` instead.
"""
if kwargs.get("embedding"):
if embedding:
raise ValueError(
"using embedding as embedidng_functions is deprecated. "
"Please use `embedding_function` instead."
)

deeplake_dataset = cls(
dataset_path=dataset_path, embedding_function=embedding, **kwargs
dataset_path=dataset_path, embedding_function=embedding_function, **kwargs
)
deeplake_dataset.add_texts(
texts=texts,
metadatas=metadatas,
ids=ids,
embedding_function=embedding.embed_documents, # type: ignore
)
return deeplake_dataset

Expand Down
Loading

0 comments on commit 7bb8434

Please sign in to comment.