diff --git a/examples/apps/documents_chat.py b/examples/apps/documents_chat.py index 8f7f24a4..576d6e37 100644 --- a/examples/apps/documents_chat.py +++ b/examples/apps/documents_chat.py @@ -125,7 +125,9 @@ async def _handle_message( if not self._documents_ingested: yield self.NO_DOCUMENTS_INGESTED_MESSAGE results = await self.document_search.search(message[-1]) - prompt = RAGPrompt(QueryWithContext(query=message, context=[i.text_representation for i in results])) + prompt = RAGPrompt( + QueryWithContext(query=message, context=[i.text_representation for i in results if i.text_representation]) + ) response = await self._llm.generate(prompt) yield response.answer diff --git a/packages/ragbits-core/src/ragbits/core/embeddings/base.py b/packages/ragbits-core/src/ragbits/core/embeddings/base.py index 2ef2b9b4..83cbbfe6 100644 --- a/packages/ragbits-core/src/ragbits/core/embeddings/base.py +++ b/packages/ragbits-core/src/ragbits/core/embeddings/base.py @@ -6,6 +6,7 @@ class EmbeddingTypes(Enum): """ Enum for listing supported embedding types """ + TEXT: str = "text" IMAGE: str = "image" diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 4dec6e14..45510c0e 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -152,7 +152,7 @@ async def insert_elements(self, elements: list[Element]) -> None: """ elements_with_text = [element for element in elements if element.key] images_with_text = [element for element in elements_with_text if isinstance(element, ImageElement)] - vectors = await self.embedder.embed_text([element.key for element in elements_with_text]) + vectors = await self.embedder.embed_text([element.key for element in elements_with_text if element.key]) image_elements = [element for element in elements if isinstance(element, ImageElement)] @@ -160,7 +160,7 @@ async def insert_elements(self, elements: list[Element]) -> None: if num_images_with_no_textual_repr > 0: warnings.warn( f"{len(image_elements) - len(images_with_text)} of {len(image_elements)}" - f"Have no textual representation" + "Have no textual representation and have not been text emedded" ) entries = [ diff --git a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py index a6856868..16070439 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/documents/element.py @@ -62,7 +62,7 @@ def key(self) -> str | None: @computed_field # type: ignore[prop-decorator] @property @abstractmethod - def text_representation(self) -> str: + def text_representation(self) -> str | None: """ Get the text representation of the element. @@ -116,7 +116,7 @@ def to_vector_db_entry(self, vector: list[float], embedding_type: EmbeddingTypes metadata["embedding_type"] = str(embedding_type) return VectorStoreEntry( id=vector_store_entry_id, - key=self.key, + key=self.key or "null", vector=vector, metadata=metadata, )