Skip to content

Commit

Permalink
Add methods to convert from Document to Weaviate data object and vice…
Browse files Browse the repository at this point in the history
…versa
  • Loading branch information
silvanocerza committed Jan 25, 2024
1 parent 7e21bd8 commit ef9c0f8
Showing 1 changed file with 24 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0
import base64
from dataclasses import asdict
from typing import Any, Dict, List, Optional, Tuple, Union

Expand All @@ -12,6 +13,7 @@
from weaviate.auth import AuthCredentials
from weaviate.config import Config, ConnectionConfig
from weaviate.embedded import EmbeddedOptions
from weaviate.util import generate_uuid5

Number = Union[int, float]
TimeoutType = Union[Tuple[Number, Number], Number]
Expand Down Expand Up @@ -195,6 +197,28 @@ def count_documents(self) -> int:
def filter_documents(self, filters: Optional[Dict[str, Any]] = None) -> List[Document]: # noqa: ARG002
return []

def _to_data_object(self, document: Document) -> Dict[str, Any]:
"""
Convert a Document to a Weviate data object ready to be saved.
"""
data = document.to_dict(flatten=False)
# Weaviate forces a UUID as an id.
# We don't know if the id of our Document is a UUID or not, so we save it on a different field
# and let Weaviate a UUID that we're going to ignore completely.
data["_original_id"] = data.pop("id")
if (blob := data.pop("blob")) is not None:
# Weaviate wants the blob data as a base64 encoded string
# See the official docs for more information:
# https://weaviate.io/developers/weaviate/config-refs/datatypes#datatype-blob
data["blob_data"] = base64.b64encode(bytes(blob.pop("data"))).decode()
data["blob_mime_type"] = blob.pop("mime_type")
# The embedding vector is stored separately from the rest of the data
del data["embedding"]

# Weaviate doesn't like empty objects, let's delete meta if it's empty
if data["meta"] == {}:
del data["meta"]

def write_documents(
self,
documents: List[Document], # noqa: ARG002
Expand Down

0 comments on commit ef9c0f8

Please sign in to comment.