diff --git a/daras_ai_v2/gdrive_downloader.py b/daras_ai_v2/gdrive_downloader.py
index 2720b19a9..3f18f7e1d 100644
--- a/daras_ai_v2/gdrive_downloader.py
+++ b/daras_ai_v2/gdrive_downloader.py
@@ -1,9 +1,11 @@
 import io
 
 from furl import furl
+import requests
 
 from daras_ai_v2.exceptions import UserError
 from daras_ai_v2.functional import flatmap_parallel
+from daras_ai_v2.exceptions import raise_for_status
 
 
 def is_gdrive_url(f: furl) -> bool:
@@ -60,7 +62,7 @@ def gdrive_list_urls_of_files_in_folder(f: furl, max_depth: int = 4) -> list[str
     return filter(None, urls)
 
 
-def gdrive_download(f: furl, mime_type: str) -> tuple[bytes, str]:
+def gdrive_download(f: furl, mime_type: str, export_links: dict) -> tuple[bytes, str]:
     from googleapiclient import discovery
     from googleapiclient.http import MediaIoBaseDownload
 
@@ -68,19 +70,20 @@ def gdrive_download(f: furl, mime_type: str) -> tuple[bytes, str]:
     file_id = url_to_gdrive_file_id(f)
     # get metadata
     service = discovery.build("drive", "v3")
-    # get files in drive directly
-    if f.host == "drive.google.com":
-        request = service.files().get_media(
-            fileId=file_id,
-            supportsAllDrives=True,
-        )
-    # export google docs to appropriate type
-    else:
-        mime_type, _ = docs_export_mimetype(f)
-        request = service.files().export_media(
-            fileId=file_id,
-            mimeType=mime_type,
-        )
+
+    if f.host != "drive.google.com":
+        # export google docs to appropriate type
+        export_mime_type, _ = docs_export_mimetype(f)
+        if f_url_export := export_links.get(export_mime_type, None):
+            r = requests.get(f_url_export)
+            file_bytes = r.content
+            raise_for_status(r, is_user_url=True)
+            return file_bytes, export_mime_type
+
+    request = service.files().get_media(
+        fileId=file_id,
+        supportsAllDrives=True,
+    )
     # download
     file = io.BytesIO()
     downloader = MediaIoBaseDownload(file, request)
@@ -88,8 +91,9 @@ def gdrive_download(f: furl, mime_type: str) -> tuple[bytes, str]:
     while done is False:
         _, done = downloader.next_chunk()
         # print(f"Download {int(status.progress() * 100)}%")
-    f_bytes = file.getvalue()
-    return f_bytes, mime_type
+    file_bytes = file.getvalue()
+
+    return file_bytes, mime_type
 
 
 def docs_export_mimetype(f: furl) -> tuple[str, str]:
@@ -109,8 +113,10 @@ def docs_export_mimetype(f: furl) -> tuple[str, str]:
         mime_type = "text/csv"
         ext = ".csv"
     elif "presentation" in f.path.segments:
-        mime_type = "application/pdf"
-        ext = ".pdf"
+        mime_type = (
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation"
+        )
+        ext = ".pptx"
     elif "drawings" in f.path.segments:
         mime_type = "application/pdf"
         ext = ".pdf"
@@ -128,7 +134,7 @@ def gdrive_metadata(file_id: str) -> dict:
         .get(
             supportsAllDrives=True,
             fileId=file_id,
-            fields="name,md5Checksum,modifiedTime,mimeType,size",
+            fields="name,md5Checksum,modifiedTime,mimeType,size,exportLinks",
         )
         .execute()
     )
diff --git a/daras_ai_v2/glossary.py b/daras_ai_v2/glossary.py
index 87618b87c..77c252173 100644
--- a/daras_ai_v2/glossary.py
+++ b/daras_ai_v2/glossary.py
@@ -15,7 +15,7 @@ def validate_glossary_document(document: str):
 
     metadata = doc_url_to_file_metadata(document)
     f_bytes, mime_type = download_content_bytes(
-        f_url=document, mime_type=metadata.mime_type
+        f_url=document, mime_type=metadata.mime_type, export_links=metadata.export_links
     )
     df = tabular_bytes_to_str_df(
         f_name=metadata.name, f_bytes=f_bytes, mime_type=mime_type
diff --git a/daras_ai_v2/office_utils_pptx.py b/daras_ai_v2/office_utils_pptx.py
index e45843e9a..780841424 100644
--- a/daras_ai_v2/office_utils_pptx.py
+++ b/daras_ai_v2/office_utils_pptx.py
@@ -34,7 +34,11 @@ def pptx_to_text_pages(f: typing.BinaryIO, use_form_reco: bool = False) -> list[
             except Exception as e:
                 slide_content.append(f"  Error processing shape: {e}")
 
+        if slide.has_notes_slide:
+            slide_content.extend(handle_author_notes(slide))
+
         slides_text.append("\n".join(slide_content) + "\n")
+
     return slides_text
 
 
@@ -43,81 +47,55 @@ def handle_text_elements(shape) -> list[str]:
     Handles text elements within a shape, including lists.
     """
     text_elements = []
-    is_a_list = False
-    is_list_group_created = False
-    enum_list_item_value = 0
-    bullet_type = "None"
-    list_label = "LIST"
     namespaces = {"a": "http://schemas.openxmlformats.org/drawingml/2006/main"}
 
-    # Identify if shape contains lists
+    current_list_type = None
+    list_item_index = 0
+
     for paragraph in shape.text_frame.paragraphs:
         p = paragraph._element
+        paragraph_text = ""
+        is_list_item = False
+
+        # Determine list type
         if p.find(".//a:buChar", namespaces=namespaces) is not None:
-            bullet_type = "Bullet"
-            is_a_list = True
+            current_list_type = "Bullet"
+            is_list_item = True
         elif p.find(".//a:buAutoNum", namespaces=namespaces) is not None:
-            bullet_type = "Numbered"
-            is_a_list = True
+            current_list_type = "Numbered"
+            is_list_item = True
+        elif paragraph.level > 0:  # Indented text is also treated as a list
+            current_list_type = "Bullet"
+            is_list_item = True
         else:
-            is_a_list = False
-
-        if paragraph.level > 0:
-            is_a_list = True
-
-        if is_a_list:
-            if bullet_type == "Numbered":
-                list_label = "ORDERED_LIST"
-
-    # Iterate through paragraphs to build up text
-    for paragraph in shape.text_frame.paragraphs:
-        p = paragraph._element
-        enum_list_item_value += 1
-        inline_paragraph_text = ""
-        inline_list_item_text = ""
-        doc_label = "PARAGRAPH"
-
-        for e in p.iterfind(".//a:r", namespaces=namespaces):
-            if len(e.text.strip()) > 0:
-                e_is_a_list_item = False
-                is_numbered = False
-                if p.find(".//a:buChar", namespaces=namespaces) is not None:
-                    bullet_type = "Bullet"
-                    e_is_a_list_item = True
-                elif p.find(".//a:buAutoNum", namespaces=namespaces) is not None:
-                    bullet_type = "Numbered"
-                    is_numbered = True
-                    e_is_a_list_item = True
-                else:
-                    e_is_a_list_item = False
-
-                if e_is_a_list_item:
-                    if len(inline_paragraph_text) > 0:
-                        text_elements.append(inline_paragraph_text)
-                    inline_list_item_text += e.text
+            current_list_type = None
+            list_item_index = 0  # Reset numbering if no list
+
+        # Process paragraph text
+        for run in p.iterfind(".//a:r", namespaces=namespaces):
+            run_text = run.text.strip() if run.text else ""
+            if run_text:
+                paragraph_text += run_text
+
+        if is_list_item:
+            if current_list_type == "Numbered":
+                list_item_index += 1
+                list_prefix = f"{list_item_index}."
+            else:
+                list_prefix = "•"  # Default bullet symbol
+            text_elements.append(f"{list_prefix} {paragraph_text}")
+        else:
+            # Handle placeholders for titles or subtitles
+            if shape.is_placeholder:
+                placeholder_type = shape.placeholder_format.type
+                if placeholder_type == PP_PLACEHOLDER.TITLE:
+                    text_elements.append(f"TITLE: {paragraph_text}")
+                elif placeholder_type == PP_PLACEHOLDER.SUBTITLE:
+                    text_elements.append(f"SECTION_HEADER: {paragraph_text}")
                 else:
-                    if shape.is_placeholder:
-                        placeholder_type = shape.placeholder_format.type
-                        if placeholder_type in [
-                            PP_PLACEHOLDER.CENTER_TITLE,
-                            PP_PLACEHOLDER.TITLE,
-                        ]:
-                            doc_label = "TITLE"
-                        elif placeholder_type == PP_PLACEHOLDER.SUBTITLE:
-                            doc_label = "SECTION_HEADER"
-                    enum_list_item_value = 0
-                    inline_paragraph_text += e.text
-
-        if len(inline_paragraph_text) > 0:
-            text_elements.append(inline_paragraph_text)
-
-        if len(inline_list_item_text) > 0:
-            enum_marker = ""
-            if is_numbered:
-                enum_marker = str(enum_list_item_value) + "."
-            if not is_list_group_created:
-                is_list_group_created = True
-            text_elements.append(f"{enum_marker} {inline_list_item_text}")
+                    text_elements.append(paragraph_text)
+            else:
+                text_elements.append(paragraph_text)
 
     return text_elements
 
@@ -171,7 +149,7 @@ def handle_tables(shape) -> list[str]:
     for row in grid[1:]:
         line = "|" + "|".join(row) + "|"
         table_text.append(line)
-        print(line)
+        # print(line)
 
     return table_text
 
@@ -207,6 +185,17 @@ def handle_charts(shape) -> list[str]:
     return chart_text
 
 
+def handle_author_notes(slide) -> list[str]:
+
+    notes = []
+    if slide.notes_slide.notes_text_frame:
+        notes_text = slide.notes_slide.notes_text_frame.text.strip()
+        if notes_text:
+            notes.append("Speaker Notes:")
+            notes.append(notes_text)
+    return notes
+
+
 # TODO :azure form reco to extract text from images
 def handle_pictures(shape):
     pass
diff --git a/daras_ai_v2/vector_search.py b/daras_ai_v2/vector_search.py
index f78c39260..3d54d5383 100644
--- a/daras_ai_v2/vector_search.py
+++ b/daras_ai_v2/vector_search.py
@@ -310,6 +310,7 @@ def doc_url_to_file_metadata(f_url: str) -> FileMetadata:
         etag = meta.get("md5Checksum") or meta.get("modifiedTime")
         mime_type = meta["mimeType"]
         total_bytes = int(meta.get("size") or 0)
+        export_links = meta.get("exportLinks", {})
     else:
         try:
             if is_user_uploaded_url(f_url):
@@ -327,6 +328,7 @@ def doc_url_to_file_metadata(f_url: str) -> FileMetadata:
             mime_type = None
             etag = None
             total_bytes = 0
+            export_links = {}
         else:
             name = (
                 r.headers.get("content-disposition", "")
@@ -338,6 +340,7 @@ def doc_url_to_file_metadata(f_url: str) -> FileMetadata:
                 etag = etag.strip('"')
             mime_type = get_mimetype_from_response(r)
             total_bytes = int(r.headers.get("content-length") or 0)
+            export_links = {}
     # extract filename from url as a fallback
     if not name:
         if is_user_uploaded_url(f_url):
@@ -347,9 +350,12 @@ def doc_url_to_file_metadata(f_url: str) -> FileMetadata:
     # guess mimetype from name as a fallback
     if not mime_type:
         mime_type = mimetypes.guess_type(name)[0]
-    return FileMetadata(
+
+    file_metadata = FileMetadata(
         name=name, etag=etag, mime_type=mime_type or "", total_bytes=total_bytes
     )
+    file_metadata.export_links = export_links or {}
+    return file_metadata
 
 
 def yt_dlp_get_video_entries(url: str) -> list[dict]:
@@ -650,7 +656,10 @@ def doc_url_to_text_pages(
     Download document from url and convert to text pages.
     """
     f_bytes, mime_type = download_content_bytes(
-        f_url=f_url, mime_type=file_meta.mime_type, is_user_url=is_user_url
+        f_url=f_url,
+        mime_type=file_meta.mime_type,
+        is_user_url=is_user_url,
+        export_links=file_meta.export_links,
     )
     if not f_bytes:
         return []
@@ -664,14 +673,18 @@ def doc_url_to_text_pages(
 
 
 def download_content_bytes(
-    *, f_url: str, mime_type: str, is_user_url: bool = True
+    *,
+    f_url: str,
+    mime_type: str,
+    is_user_url: bool = True,
+    export_links: dict[str, str] = {},
 ) -> tuple[bytes, str]:
     if is_yt_dlp_able_url(f_url):
         return download_youtube_to_wav(f_url), "audio/wav"
     f = furl(f_url)
     if is_gdrive_url(f):
         # download from google drive
-        return gdrive_download(f, mime_type)
+        return gdrive_download(f, mime_type, export_links)
     try:
         # download from url
         if is_user_uploaded_url(f_url):
diff --git a/files/models.py b/files/models.py
index 12af91a7a..afb6504cc 100644
--- a/files/models.py
+++ b/files/models.py
@@ -8,6 +8,10 @@ class FileMetadata(models.Model):
     mime_type = models.CharField(max_length=255, default="", blank=True)
     total_bytes = models.PositiveIntegerField(default=0, blank=True)
 
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.export_links = {}
+
     def __str__(self):
         ret = f"{self.name or 'Unnamed'} - {self.mime_type}"
         if self.total_bytes:
diff --git a/recipes/BulkRunner.py b/recipes/BulkRunner.py
index 25c99a2dc..4eb7d654e 100644
--- a/recipes/BulkRunner.py
+++ b/recipes/BulkRunner.py
@@ -612,7 +612,7 @@ def get_columns(files: list[str]) -> list[str]:
 def read_df_any(f_url: str) -> "pd.DataFrame":
     file_meta = doc_url_to_file_metadata(f_url)
     f_bytes, mime_type = download_content_bytes(
-        f_url=f_url, mime_type=file_meta.mime_type
+        f_url=f_url, mime_type=file_meta.mime_type, export_links=file_meta.export_links
     )
     df = tabular_bytes_to_any_df(
         f_name=file_meta.name, f_bytes=f_bytes, mime_type=mime_type
diff --git a/recipes/DocExtract.py b/recipes/DocExtract.py
index 23cf89bfe..0fa063379 100644
--- a/recipes/DocExtract.py
+++ b/recipes/DocExtract.py
@@ -475,7 +475,9 @@ def process_source(
         elif is_video:
             f = furl(webpage_url)
             if is_gdrive_url(f):
-                f_bytes, _ = gdrive_download(f, doc_meta.mime_type)
+                f_bytes, _ = gdrive_download(
+                    f, doc_meta.mime_type, doc_meta.export_links
+                )
                 webpage_url = upload_file_from_bytes(
                     doc_meta.name, f_bytes, content_type=doc_meta.mime_type
                 )