Skip to content

Commit

Permalink
Initial version of GCS source
Browse files Browse the repository at this point in the history
  • Loading branch information
akotyla committed Sep 18, 2024
1 parent 5a6ebe7 commit ddfda9a
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 0 deletions.
5 changes: 5 additions & 0 deletions packages/ragbits-document-search/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ dependencies = [
"ragbits"
]

[project.optional-dependencies]
google-cloud-storage = [
"gcloud-aio-storage~=9.3.0"
]

[tool.uv]
dev-dependencies = [
"pre-commit~=3.8.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@

from pydantic import BaseModel

try:
from gcloud.aio.storage import Storage

HAS_GCLOUD_AIO = True
except ImportError:
HAS_GCLOUD_AIO = False


class Source(BaseModel, ABC):
"""
Expand Down Expand Up @@ -54,3 +61,44 @@ async def fetch(self) -> Path:
The local path to the object fetched from the source.
"""
return self.path


class GoogleCloudStorageSource(Source):
"""
An object representing a GCS file source.
"""

source_type: Literal["google_cloud_storage_file"] = "google_cloud_storage_file"

bucket: str
object_name: str

path: Path

def get_id(self) -> str:
"""
Get unique identifier of the object in the source.
Returns:
Unique identifier.
"""
return f"bucket_name: {self.bucket}\nobject_name: {self.path}"

async def fetch(self) -> Path:
"""
Fetch the source.
Returns:
Tuple containing bucket name and file path.
Raises:
ImportError: If the required 'gcloud' package is not installed for Google Cloud Storage source.
"""

if not HAS_GCLOUD_AIO:
raise ImportError("You need to install the 'gcloud' package to use Google Cloud Storage")

async with Storage() as client:
await client.download_to_filename(bucket=self.bucket, object_name=self.object_name, filename=self.path)

return self.path

0 comments on commit ddfda9a

Please sign in to comment.