Skip to content

Commit

Permalink
refactor: simplify gdrive_download
Browse files Browse the repository at this point in the history
  • Loading branch information
milovate committed Dec 20, 2024
1 parent dcea38b commit 6b041e5
Showing 1 changed file with 10 additions and 48 deletions.
58 changes: 10 additions & 48 deletions daras_ai_v2/gdrive_downloader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import io
import mimetypes

from furl import furl
import requests

Expand Down Expand Up @@ -71,15 +71,17 @@ def gdrive_download(f: furl, mime_type: str, export_links: dict) -> tuple[bytes,
# get metadata
service = discovery.build("drive", "v3")

docs_export_mimetype = {
"application/vnd.google-apps.document": "text/plain",
"application/vnd.google-apps.spreadsheet": "text/csv",
"application/vnd.google-apps.presentation": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/vnd.google-apps.drawing": "application/pdf",
}

if f.host != "drive.google.com":
# export google docs to appropriate type
export_mime_type, _, is_google_workspace_doc = docs_export_mimetype(
f, mime_type
)

if is_google_workspace_doc and (
f_url_export := export_links.get(export_mime_type, None)
):
export_mime_type = docs_export_mimetype.get(mime_type, mime_type)
if f_url_export := export_links.get(export_mime_type, None):
r = requests.get(f_url_export)
file_bytes = r.content
raise_for_status(r, is_user_url=True)
Expand All @@ -101,46 +103,6 @@ def gdrive_download(f: furl, mime_type: str, export_links: dict) -> tuple[bytes,
return file_bytes, mime_type


def docs_export_mimetype(f: furl, mime_type) -> tuple[str, str, bool]:
"""
return the mimetype to export google docs - https://developers.google.com/drive/api/guides/ref-export-formats
Args:
f (furl): google docs link
Returns:
tuple[str, str]: (mime_type, extension, is_google_workspace_supported)
"""

supported_mimetypes = {
"application/vnd.google-apps.spreadsheet",
"application/vnd.google-apps.presentation",
"application/vnd.google-apps.drawing",
"application/vnd.google-apps.document",
}

is_google_workspace_supported = mime_type in supported_mimetypes

if is_google_workspace_supported:
if "document" in f.path.segments:
mime_type = "text/plain"
ext = ".txt"
elif "spreadsheets" in f.path.segments:
mime_type = "text/csv"
ext = ".csv"
elif "presentation" in f.path.segments:
mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
ext = ".pptx"
elif "drawings" in f.path.segments:
mime_type = "application/pdf"
ext = ".pdf"
else:
raise ValueError(f"Not sure how to export google docs url: {str(f)!r}")
else:
ext = f".{mimetypes.guess_extension(mime_type)}" or ""
return mime_type, ext, is_google_workspace_supported


def gdrive_metadata(file_id: str) -> dict:
from googleapiclient import discovery

Expand Down

0 comments on commit 6b041e5

Please sign in to comment.