Skip to content

Commit

Permalink
Merge pull request #61 from uc-cdis/fix/vectorstore
Browse files Browse the repository at this point in the history
fix(vectorstore): fix issue where no documents were loaded and only a…
  • Loading branch information
Avantol13 authored Apr 30, 2024
2 parents 4704c0e + e21513e commit a49906b
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 46 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: [email protected]:Yelp/detect-secrets
rev: v0.13.1
rev: v1.4.0
hooks:
- id: detect-secrets
args: ['--baseline', '.secrets.baseline']
Expand Down
9 changes: 5 additions & 4 deletions gen3discoveryai/topic_chains/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,13 @@ def insert_documents_into_vectorstore(self, documents: list[Document]) -> None:
Args:
documents (list[langchain.schema.document.Document]): IDs to Documents to store in the knowledge store
"""
if not self.vectorstore:
logging.warning(
if self.vectorstore is None:
msg = (
f"Attempted to insert documents into a TopicChain {self.name} "
f"for topic {self.topic} that doesn't have a configured vectorstore"
f"for topic '{self.topic}' which doesn't have a configured vectorstore"
)
return
logging.error(msg)
raise Exception(msg)

logging.info(
f"Recreating knowledge store collection for {self.topic} from documents..."
Expand Down
8 changes: 5 additions & 3 deletions gen3discoveryai/topic_chains/question_answer_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,17 @@ def __init__(self, topic: str, metadata: Dict[str, Any] = None) -> None:
client_settings=settings,
)

logging.debug("chroma vectorstore initialized")
logging.debug(
f"chroma vectorstore initialized from ./knowledge/{topic} with docs"
)

retriever_cfg = {
"k": num_similar_docs_to_find,
"score_threshold": similarity_score_threshold,
}
logging.debug(f"retreiver search_kwargs: {retriever_cfg}")

retreival_qa_chain = RetrievalQA.from_chain_type(
retrieval_qa_chain = RetrievalQA.from_chain_type(
self.llm,
retriever=vectorstore.as_retriever(
search_type="similarity_score_threshold",
Expand All @@ -153,7 +155,7 @@ def __init__(self, topic: str, metadata: Dict[str, Any] = None) -> None:
super().__init__(
name=self.NAME,
topic=topic,
chain=retreival_qa_chain,
chain=retrieval_qa_chain,
vectorstore=vectorstore,
)

Expand Down
59 changes: 30 additions & 29 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 3 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "gen3discoveryai"
version = "0.1.0"
version = "1.0.0"
description = "Gen3 Discovery AI Service"
authors = ["CTDS UChicago <[email protected]>"]
license = "Apache-2.0"
Expand All @@ -10,12 +10,6 @@ packages = [{include = "gen3discoveryai"}]
[tool.poetry.dependencies]
python = ">=3.9,<3.10.dev0"

# TODO: REMOVE WHEN onnxruntime bug is resolved https://github.com/microsoft/onnxruntime/issues/18065
onnxruntime = "==1.16.1"

# TODO: Unpin when requests updates to include a version later than this
certifi = ">=2023.7.22"

aiohttp = ">=3.8.4"
aiosignal = ">=1.3.1"
async = ">=0.6.2"
Expand Down Expand Up @@ -57,7 +51,7 @@ selenium = ">=4.18.1"


[tool.poetry.group.dev.dependencies]
# <8.0.0 is temporary, try removing. It was causing issues because the
# <8.0.0 is temporary, try removing. It was causing issues because the
# underlying pytest-* libraries hadn't updated yet to fix some breaking changes
pytest = ">=7.3.2,<8.0.0"
uvicorn = ">=0.22.0"
Expand All @@ -72,7 +66,7 @@ pytest-profiling = "^1.7.0"
[tool.pytest.ini_options]
# Better default `pytest` command which adds coverage
#
# WARNING: overriding default `pytest` command to include all this coverage
# WARNING: overriding default `pytest` command to include all this coverage
# may interfere with some debuggers (like PyCharm's), so it may not stop
# on breakpoints. If you run into this issue, you can comment
# the addopts out below and then run the pytest command with all these options
Expand Down

0 comments on commit a49906b

Please sign in to comment.