Skip to content

Commit

Permalink
Community GoogleDriveLoader: Update drive.py to also parse google sli…
Browse files Browse the repository at this point in the history
…des (#633)

* Update drive.py to also parse google slides

Slides also support exporting the text via the same mechanism, adding them to the type of docs that are searches and exported.

* formatting
  • Loading branch information
jeffbryner authored Dec 11, 2024
1 parent 5c453e0 commit 94f04cc
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions libs/community/langchain_google_community/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def validate_inputs(cls, values: Dict[str, Any]) -> Any:
"document": "application/vnd.google-apps.document",
"sheet": "application/vnd.google-apps.spreadsheet",
"pdf": "application/pdf",
"presentation": "application/vnd.google-apps.presentation",
}
allowed_types = list(type_mapping.keys()) + list(type_mapping.values())
short_names = ", ".join([f"'{x}'" for x in type_mapping.keys()])
Expand Down Expand Up @@ -376,7 +377,11 @@ def _load_document_from_id(self, id: str) -> Document:

file = (
service.files()
.get(fileId=id, supportsAllDrives=True, fields="modifiedTime,name")
.get(
fileId=id,
supportsAllDrives=True,
fields="modifiedTime,name,webViewLink",
)
.execute()
)
request = service.files().export_media(fileId=id, mimeType="text/plain")
Expand All @@ -395,7 +400,7 @@ def _load_document_from_id(self, id: str) -> Document:

text = fh.getvalue().decode("utf-8")
metadata = {
"source": f"https://docs.google.com/document/d/{id}/edit",
"source": f"{file.get('webViewLink')}",
"title": f"{file.get('name')}",
"when": f"{file.get('modifiedTime')}",
}
Expand Down Expand Up @@ -426,7 +431,10 @@ def _load_documents_from_folder(
for file in _files:
if file["trashed"] and not self.load_trashed_files:
continue
elif file["mimeType"] == "application/vnd.google-apps.document":
elif file["mimeType"] in [
"application/vnd.google-apps.document",
"application/vnd.google-apps.presentation",
]:
returns.append(self._load_document_from_id(file["id"])) # type: ignore
elif file["mimeType"] == "application/vnd.google-apps.spreadsheet":
returns.extend(self._load_sheet_from_id(file["id"])) # type: ignore
Expand Down

0 comments on commit 94f04cc

Please sign in to comment.