Skip to content

Commit

Permalink
Fixes after review
Browse files Browse the repository at this point in the history
  • Loading branch information
akotyla committed Oct 15, 2024
1 parent fb38633 commit f1094b8
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ragbits.core.vector_store import VectorStore
from ragbits.document_search.documents.document import Document, DocumentMeta
from ragbits.document_search.documents.element import Element
from ragbits.document_search.documents.sources import Source
from ragbits.document_search.documents.sources import GCSSource, LocalFileSource, Source
from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter
from ragbits.document_search.ingestion.providers.base import BaseProvider
from ragbits.document_search.retrieval.rephrasers.base import QueryRephraser
Expand Down Expand Up @@ -80,7 +80,9 @@ async def search(self, query: str, search_config: SearchConfig = SearchConfig())
return self.reranker.rerank(elements)

async def ingest_document(
self, document: Union[DocumentMeta, Document, Source], document_processor: Optional[BaseProvider] = None
self,
document: Union[DocumentMeta, Document, Union[LocalFileSource, GCSSource]],
document_processor: Optional[BaseProvider] = None,
) -> None:
"""
Ingest a document.
Expand All @@ -92,8 +94,7 @@ async def ingest_document(
"""

if isinstance(document, Source):
local_path = await document.fetch()
document_meta = DocumentMeta.from_local_path(local_path)
document_meta = await DocumentMeta.from_source(document)
elif isinstance(document, DocumentMeta):
document_meta = document
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,24 @@ def from_local_path(cls, local_path: Path) -> "DocumentMeta":
source=LocalFileSource(path=local_path),
)

@classmethod
async def from_source(cls, source: Union[LocalFileSource, GCSSource]) -> "DocumentMeta":
"""
Create a document metadata from a source.
Args:
source: The source from which the document is fetched.
Returns:
The document metadata.
"""
path = await source.fetch()

return cls(
document_type=DocumentType(path.suffix[1:]),
source=source,
)


class Document(BaseModel):
"""
Expand Down

0 comments on commit f1094b8

Please sign in to comment.