Skip to content

Commit

Permalink
feat(document-search) add location metadata to document (#122)
Browse files Browse the repository at this point in the history
  • Loading branch information
kdziedzic68 authored Oct 30, 2024
1 parent c1c019f commit b245164
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,23 @@
from ragbits.document_search.documents.document import DocumentMeta


class ElementLocation(BaseModel):
"""
An object representing position of chunk within document.
"""

page_number: int | None = None
coordinates: dict | None = None


class Element(BaseModel, ABC):
"""
An object representing an element in a document.
"""

element_type: str
document_meta: DocumentMeta
location: ElementLocation | None = None

_elements_registry: ClassVar[dict[str, type["Element"]]] = {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ragbits.core.llms.base import LLM
from ragbits.core.prompt.base import BasePrompt
from ragbits.document_search.documents.document import DocumentMeta
from ragbits.document_search.documents.element import TextElement
from ragbits.document_search.documents.element import ElementLocation, TextElement


def to_text_element(element: UnstructuredElement, document_meta: DocumentMeta) -> TextElement:
Expand All @@ -22,9 +22,30 @@ def to_text_element(element: UnstructuredElement, document_meta: DocumentMeta) -
Returns:
text element
"""
location = to_element_location(element)
return TextElement(
document_meta=document_meta,
content=element.text,
location=location,
)


def to_element_location(element: UnstructuredElement) -> ElementLocation:
"""
Converts unstructured element to element location.
Args:
element: element from unstructured
Returns:
element location
"""
metadata = element.metadata.to_dict()
page_number = metadata.get("page_number")
coordinates = metadata.get("coordinates")
return ElementLocation(
page_number=page_number,
coordinates=coordinates,
)


Expand Down

0 comments on commit b245164

Please sign in to comment.