Skip to content
This repository has been archived by the owner on Nov 13, 2024. It is now read-only.

Commit

Permalink
Fix comments
Browse files Browse the repository at this point in the history
  • Loading branch information
izellevy committed Dec 7, 2023
1 parent 550c6bd commit b1ef462
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 29 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Canopy

<p align="center">
<a href="https://pypi.org/project/canopy-sdk" target="_blank">
<a href="https://pypi.org/project/fastapi" target="_blank">
<img src="https://img.shields.io/pypi/pyversions/canopy-sdk" alt="Supported Python versions">
</a>
<a href="https://pypi.org/project/canopy-sdk" target="_blank">
<a href="https://pypi.org/project/fastapi" target="_blank">
<img src="https://img.shields.io/pypi/v/canopy-sdk?label=pypi%20package" alt="Package version">
</a>
</p>
Expand Down
4 changes: 0 additions & 4 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,6 @@ chat_engine:
knowledge_base:
params:
default_top_k: 5 # The default number of document chunks to retrieve for each query
# index_params: # Optional - index creation parameters for `create_canopy_index()` or `canopy new`
# metric: cosine
# pod_type: p1


# --------------------------------------------------------------------------
# Configuration for the Chunker subcomponent of the knowledge base.
Expand Down
33 changes: 12 additions & 21 deletions src/canopy/knowledge_base/knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
RESERVED_METADATA_KEYS = {"document_id", "text", "source"}

DELETE_STARTER_BATCH_SIZE = 30

DELETE_STARTER_CHUNKS_PER_DOC = 32


Expand Down Expand Up @@ -190,14 +189,13 @@ def __init__(self,
else:
self._pinecone_client = _get_global_client()

# Normally, index creation params are passed directly to the `.create_canopy_index()` method. # noqa: E501
# However, when KnowledgeBase is initialized from a config file, these params
# would be set by the `KnowledgeBase.from_config()` constructor.
self._index_params: Dict[str, Any] = {}

# The index object is initialized lazily, when the user calls `connect()` or
# `create_canopy_index()`
self._index: Optional[Index] = None
self._default_spec = ServerlessSpec(
cloud='aws',
region='us-west-2'
)

def _connect_index(self) -> None:
if self.index_name not in list_canopy_indexes(self._pinecone_client):
Expand Down Expand Up @@ -262,12 +260,9 @@ def verify_index_connection(self) -> None:
) from e

def create_canopy_index(self,
spec: Union[Dict, ServerlessSpec, PodSpec] = ServerlessSpec(
cloud='aws',
region='us-west-2'
),
spec: Union[Dict, ServerlessSpec, PodSpec] = None,
dimension: Optional[int] = None,
index_params: Optional[dict] = None
metric: Optional[str] = "cosine"
):
"""
Creates the underlying Pinecone index that will be used by the KnowledgeBase.
Expand All @@ -292,12 +287,14 @@ def create_canopy_index(self,
dimension: The dimension of the vectors to index.
If `dimension` isn't explicitly provided,
Canopy would try to infer the embedding's dimension based on the configured `Encoder`
index_params: A dictionary of parameters to pass to the index creation API.
For example, you can set the index's number of replicas by passing {"replicas": 2}.
see https://docs.pinecone.io/docs/python-client#create_index
metric: The distance metric to be used for similarity search: 'euclidean', 'cosine', or 'dotproduct'. The
default is 'cosine'.
""" # noqa: E501

if spec is None:
spec = self._default_spec

if dimension is None:
try:
encoder_dimension = self._encoder.dimension
Expand All @@ -318,15 +315,13 @@ def create_canopy_index(self,
"If you wish to delete it, use `delete_index()`. "
)

# create index
index_params = index_params or self._index_params
try:
self._pinecone_client.create_index(
name=self.index_name,
dimension=dimension,
spec=spec,
timeout=TIMEOUT_INDEX_CREATE,
**index_params)
metric=metric)
except (Exception, PineconeApiException) as e:
raise RuntimeError(
f"Failed to create index {self.index_name} due to error: "
Expand Down Expand Up @@ -633,11 +628,7 @@ def from_config(cls,
)
config['params']['index_name'] = index_name

# If the config includes an 'index_params' key, they need to be saved until
# the index is created, and then passed to the index creation method.
index_params = config['params'].pop('index_params', {})
kb = cls._from_config(config)
kb._index_params = index_params
return kb

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def assert_vector_ids_not_exist(vector_ids: List[str],

@retry(reraise=True, stop=stop_after_attempt(5), wait=wait_random(min=10, max=20))
def try_create_canopy_index(kb: KnowledgeBase):
kb.create_canopy_index(index_params={"metric": "dotproduct"})
kb.create_canopy_index(metric="dotproduct")


@pytest.fixture(scope="module")
Expand Down
2 changes: 1 addition & 1 deletion tests/system/knowledge_base/test_knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def encoder():

@retry(reraise=True, stop=stop_after_attempt(5), wait=wait_random(min=10, max=20))
def try_create_canopy_index(kb: KnowledgeBase):
kb.create_canopy_index(index_params={"metric": "dotproduct"})
kb.create_canopy_index(metric="dotproduct")


@pytest.fixture(scope="module", autouse=True)
Expand Down

0 comments on commit b1ef462

Please sign in to comment.