Skip to content

Commit

Permalink
Merge pull request #3271 from QuivrHQ/feat/url-add-knowledge
Browse files Browse the repository at this point in the history
feat: url add knowledge
  • Loading branch information
AmineDiro authored Oct 14, 2024
2 parents 87a7116 + fcf2b34 commit 2fe24d2
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,15 @@ async def create_knowledge(
"knowledge_id": knowledge_db.id,
},
)
return knowledge_db
else:
knowledge_db = await self.repository.update_knowledge(
knowledge_db,
KnowledgeUpdate(status=KnowledgeStatus.UPLOADED),
autocommit=autocommit,
)
return knowledge_db

return knowledge_db
except Exception as e:
logger.exception(
f"Error uploading knowledge {knowledgedb.id} to storage : {e}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,30 @@ async def test_create_knowledge_file(session: AsyncSession, user: User):
storage.knowledge_exists(km)


@pytest.mark.asyncio(loop_scope="session")
async def test_create_knowledge_web(session: AsyncSession, user: User):
assert user.id
storage = FakeStorage()
repository = KnowledgeRepository(session)
service = KnowledgeService(repository, storage)

km_to_add = AddKnowledge(
url="http://quivr.app",
source=KnowledgeSource.WEB,
is_folder=False,
parent_id=None,
)

km = await service.create_knowledge(
user_id=user.id, knowledge_to_add=km_to_add, upload_file=None
)

assert km.id
assert km.url == km_to_add.url
assert km.status == KnowledgeStatus.UPLOADED
assert not km.is_folder


@pytest.mark.asyncio(loop_scope="session")
async def test_create_knowledge_folder(session: AsyncSession, user: User):
assert user.id
Expand Down Expand Up @@ -555,7 +579,7 @@ async def test_create_knowledge_folder(session: AsyncSession, user: User):


@pytest.mark.asyncio(loop_scope="session")
async def test_create_knowledge_file_in_folder(
async def test_create_knowledge_file_in_folder_in_brain(
monkeypatch, session: AsyncSession, user: User, folder_km_brain: KnowledgeDB
):
tasks = {}
Expand Down Expand Up @@ -948,10 +972,6 @@ async def test_list_knowledge_root(session: AsyncSession, user: User):
assert len(root_kms) == 2
assert {k.id for k in root_kms} == {root_folder.id, root_file.id}

# check order
assert root_kms[0].file_name == "folder"
assert root_kms[1].file_name == "file_1"


@pytest.mark.asyncio(loop_scope="session")
async def test_list_knowledge(session: AsyncSession, user: User):
Expand Down
13 changes: 11 additions & 2 deletions backend/worker/quivr_worker/process/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,17 @@ If an exception occurs during the parsing loop, the following steps are taken:
- This operation should be rolled back if an error occurs afterward. Otherwise, the knowledge could remain in `Processing` or `ERROR` status with associated vectors.
- Reprocessing the knowledge would result in reinserting the vectors into the database, leading to duplicate vectors for the same knowledge.

2. Set the knowledge status to `ERROR`.
3. Continue processing.
**Transaction Safety for Each Operation:**

- **Creating knowledge and linking to brains**: These operations can be retried safely. Knowledge is only recreated if it does not already exist in the database, allowing for safe retry.
- **Downloading sync files**: This operation is idempotent but is safe to retry. If a change has occured, we would download the last version of the file.
- **Linking knowledge to brains**: Only links the brain if it is not already associated with the knowledge. Safe for retry.
- **Creating vectors**:
- This operation should be rolled back if an error occurs afterward. Otherwise, the knowledge could remain in `Processing` or `ERROR` status with associated vectors.
- Reprocessing the knowledge would result in reinserting the vectors into the database, leading to duplicate vectors for the same knowledge.

1. Set the knowledge status to `ERROR`.
2. Continue processing.

| Note: This means that some knowledges will remain in an errored state. Currently, they are not automatically rescheduled for processing.

Expand Down

0 comments on commit 2fe24d2

Please sign in to comment.