From 2d68c20f7c81329e77e9be782a76f2a0b187e3b2 Mon Sep 17 00:00:00 2001
From: Facundo Rohr <rohrfacu@gmail.com>
Date: Sat, 5 Jan 2019 12:57:26 -0300
Subject: [PATCH 1/2] code refactoring, removing logs

---
 scribd_dl/scribd_dl.py | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/scribd_dl/scribd_dl.py b/scribd_dl/scribd_dl.py
index 81162e6..ae58647 100644
--- a/scribd_dl/scribd_dl.py
+++ b/scribd_dl/scribd_dl.py
@@ -23,6 +23,7 @@
     GreaterThanLastPageError,
     RestrictedDocumentError
 )
+from selenium.webdriver.support import expected_conditions as EC
 
 
 class ScribdDL(object):
@@ -91,7 +92,7 @@ def start_browser(self):
         chrome_options.add_argument('--disable-gpu')
         chrome_options.add_argument('--disable-infobars')
         chrome_options.add_argument("--window-size=1600,2020")
-
+        
         if self.DRIVER_PATH:  # search for chromedriver in assets
             self.driver = webdriver.Chrome(executable_path=self.DRIVER_PATH, options=chrome_options)
         else:  # search for chromedriver in PATH
@@ -158,8 +159,9 @@ def _process_url(self, url):
             except TimeoutException:
                 pass
             try:  # Refresh the page in case it could not retrieve the total_pages element
-                total_pages = self.driver.find_element_by_xpath("//span[@class='total_pages']/span[2]")
-                total_pages = total_pages.text.split()[1]
+                # changed the xpath of the total_pages element
+                total_pages = self.driver.find_element_by_xpath("/html[1]/body[1]/div[1]/div[1]/div[3]/div[1]/div[2]/section[1]/div[1]/div[1]/div[1]/main[1]/div[1]/div[1]/div[2]/div[1]/span[1]/span[2]")
+                total_pages = total_pages.text.split(' ')[1]
                 break
             except NoSuchElementException:  # total_pages element not available, try again
                 retries += 1
@@ -219,10 +221,31 @@ def _scroll_pages(self, first_page, last_page, total_pages):
             self.logger.debug('Processing page : %s of %s', counter, last_page, extra=self.extra)
 
             time.sleep(sleep_time)
+
+            # 
+            # Converting from RGBA mode to RGB
+            #
             img = Image.open(BytesIO(self.driver.get_screenshot_as_png()))  # Save screenshot in memory
+            newImg = img
+
+            bg_colour = (255, 255, 255)
+
+            #
+            # THIS SHOULD BE A FUNCTION 
+            #
+            if img.mode in ('RGBA', 'LA') or (img.mode == 'P' and 'transparency' in img.info):
+                # Need to convert to RGBA if LA format due to a bug in PIL (http://stackoverflow.com/a/1963146)
+                alpha = img.convert('RGBA').split()[-1]
+
+                # Create a new background image of our matt color.
+                bg = Image.new("RGB", img.size, bg_colour + (255,))
+                bg.paste(img, mask=alpha)
+                newImg = bg
+            else:
+                newImg = img
 
             # Crop the image to the speified size
-            img = img.crop((
+            newImg = newImg.crop((
                 page.location['x'],
                 page.location['y'],
                 page.location['x'] + page.size['width'],
@@ -230,7 +253,7 @@ def _scroll_pages(self, first_page, last_page, total_pages):
             ))
             # Append the byte array to List
             imgByteArr = BytesIO()
-            img.save(imgByteArr, format='PNG')
+            newImg.save(imgByteArr, format='PNG')
             Pages.append(imgByteArr.getvalue())
 
             if processed == to_process:  # If on the last page

From fe203916c63446e00bb1f723a874fb43d80de06d Mon Sep 17 00:00:00 2001
From: Facundo Rohr <rohrfacu@gmail.com>
Date: Sun, 6 Jan 2019 22:55:05 -0300
Subject: [PATCH 2/2] remove scribd page and search bar from doc

---
 scribd_dl/scribd_dl.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scribd_dl/scribd_dl.py b/scribd_dl/scribd_dl.py
index ae58647..b9ac499 100644
--- a/scribd_dl/scribd_dl.py
+++ b/scribd_dl/scribd_dl.py
@@ -197,9 +197,10 @@ def _process_url(self, url):
         self._scroll_pages(first_page, last_page, total_pages)
 
     def _scroll_pages(self, first_page, last_page, total_pages):
-        # Enter full screen mode
-        fullscreen_xpath = "//button[@aria-label='Fullscreen']"
-        self.driver.find_element_by_xpath(fullscreen_xpath).click()
+        # Fullscreen mode commented to obtain document without the pages bar
+        
+        # fullscreen_xpath = "//button[@aria-label='Fullscreen']"
+        # self.driver.find_element_by_xpath(fullscreen_xpath).click()
         Pages = []  # Holds the actual image bytes of each page
         Sizes = []  # Holds the size in bytes (an integer) of each page
         to_process = last_page - first_page + 1  # Total pages to process