Skip to content

Commit

Permalink
pre-commits
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyaspimpalgaonkar committed Oct 20, 2024
1 parent 1161b47 commit 5cb14ab
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions py/core/main/services/ingestion_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ async def _get_enriched_chunk_text(
chunk_enrichment_settings: ChunkEnrichmentSettings,
document_chunks: list[dict],
document_chunks_dict: dict,
) -> str:
) -> VectorEntry:
# get chunks in context

context_chunk_ids = []
Expand Down Expand Up @@ -394,7 +394,7 @@ async def _get_enriched_chunk_text(
for neighbor in semantic_neighbors:
context_chunk_ids.append(neighbor["extraction_id"])

context_chunk_ids = set(context_chunk_ids)
context_chunk_ids = list(set(context_chunk_ids))

context_chunk_texts = []
for context_chunk_id in context_chunk_ids:
Expand Down Expand Up @@ -437,7 +437,7 @@ async def _get_enriched_chunk_text(
chunk["metadata"]["chunk_enrichment_status"] = "success"

data = await self.providers.embedding.async_get_embedding(
updated_chunk_text
updated_chunk_text or chunk["text"]
)

chunk["metadata"]["original_text"] = chunk["text"]
Expand All @@ -450,19 +450,19 @@ async def _get_enriched_chunk_text(
document_id=document_id,
user_id=chunk["user_id"],
collection_ids=chunk["collection_ids"],
text=updated_chunk_text,
text=updated_chunk_text or chunk["text"],
metadata=chunk["metadata"],
)

return vector_entry_new

async def chunk_enrichment(self, document_id: UUID) -> None:
async def chunk_enrichment(self, document_id: UUID) -> int:
# just call the pipe on every chunk of the document

# TODO: Why is the config not recognized as an ingestionconfig but as a providerconfig?
chunk_enrichment_settings = (
self.providers.ingestion.config.chunk_enrichment_settings
self.providers.ingestion.config.chunk_enrichment_settings # type: ignore
)

# get all document_chunks
document_chunks = self.providers.database.vector.get_document_chunks(
document_id=document_id,
Expand Down

0 comments on commit 5cb14ab

Please sign in to comment.