evmer · usama455 · Aug 20, 2023
diff --git a/README.md b/README.md
@@ -12,6 +12,10 @@ Please watch the [demonstration video](https://youtu.be/X4msqCulOYk).
 
 You'll need to find the *authToken*, *bookId* and *reCaptchaToken* analyzing the browser/websocket traffic and replace the constants in downloader.py.
 
+UPDATE : 
+
+You'll need to add "data" value as well by analyzing the browser/websocket from the "loadData" event for all chapters in TOKEN_LIST
+
 ## Run!
 >$ python3 downloader.py
 

diff --git a/downloader.py b/downloader.py
@@ -10,7 +10,7 @@
 import ssl
 import re
 import os
-
+import fitz  
 import requests
 import websocket
 from pyppeteer import launch
@@ -22,6 +22,15 @@
 
 PUPPETEER_THREADS = 50
 
+
+TOKEN_LIST = [
+	"CHAPTER_1_Token",
+	"CHAPTER_2_Token"
+	#Add tokens for all chapters here
+	]
+
+tokenIndex = 1
+
 def init_book_delivery():
 	while True:
 		try:
@@ -110,7 +119,7 @@ def __init__(self):
 			else:
 				raise Exception(f'unknown book format ({book_format})!')
 
-			ws.send(json.dumps({"action":"loadPage","data":{"authToken": AUTH_TOKEN, "pageId": list(chapters)[0], "bookType": book_format, "windowWidth":1792, "mergedChapterPartIndex":0}}))
+			ws.send(json.dumps({"action":"loadPage","data":TOKEN_LIST[0]}))
 
 
 		elif 'pageChunk' in data['event']:
@@ -153,7 +162,11 @@ def __init__(self):
 				merged_chapter_part_idx += 1
 				next_page = page_id
 
-			ws.send(json.dumps({"action":"loadPage","data":{"authToken": AUTH_TOKEN, "pageId": str(next_page), "bookType": book_format, "windowWidth":1792, "mergedChapterPartIndex":merged_chapter_part_idx}}))
+			ws.send(json.dumps({"action":"loadPage","data":TOKEN_LIST[tokenIndex+1]}))
+			tokenIndex = tokenIndex + 1
+			if (tokenIndex+1 == len(TOKEN_LIST)):
+				break
+
 
 	break
 
@@ -209,8 +222,10 @@ async def render_page(chapter_no, semaphore):
 
 			# remove useless img (mess up with pdf gen)
 			if book_format == 'EPUB':
-				match = re.search('<img id="trigger" data-chapterid="[0-9]*?" src="" onerror="LoadChapter\(\'[0-9]*?\'\)" />', content).group(0)
-				if match: content = content.replace(match, '')
+				match = re.search('<img id="trigger" data-chapterid="[0-9]*?" src="" onerror="LoadChapter\(\'[0-9]*?\'\)" />', content)
+				if match:
+					content = content.replace(match.group(0), '')
+
 
 			# reveal hidden images
 			imgs = re.findall("<img.*?>", content, re.S)
@@ -234,13 +249,12 @@ async def render_page(chapter_no, semaphore):
 				options['width'] = width
 				options['height'] =  height
 			elif book_format == 'EPUB':
-				options['margin'] = {'top': '20', 'bottom': '20', 'left': '20', 'right': '20'}
+				options['margin'] = {'top': '20', 'bottom': '20', 'left': '40', 'right': '40'}
 
 			# build pdf
 			await page.pdf(options)
 			await page.close()
 
-			print(f"{chapter_no}.pdf created")
 
 	sem = asyncio.Semaphore(PUPPETEER_THREADS)
 	await asyncio.gather(*[render_page(chapter_no, sem) for chapter_no in contents if not os.path.exists(f'{cache_dir}/{chapter_no}.pdf')])
@@ -256,8 +270,48 @@ async def render_page(chapter_no, semaphore):
 print('merging pdf pages...')
 merger = PdfMerger()
 
+contents_value = contents  # Copy the value of 'contents'
+contents_type = type(contents)  # Get the type of 'contents'
+
+keys_list = list(contents.keys())
 for chapter_no in sorted(contents):
-	merger.append(f'{cache_dir}/{chapter_no}.pdf')
+	chapter_pdf_path = os.path.join(cache_dir, f'{chapter_no}.pdf')
+
+	# merger.append(f'{cache_dir}/{chapter_no}.pdf')
+
+	pdf_document = fitz.open(chapter_pdf_path)
+	first_page = pdf_document[0]
+	lines = first_page.get_text("text").strip().split('\n')
+	first_line = lines[0].strip()
+	second_line = lines[1].strip() if len(lines) > 1 else ""
+	isFirstLine = True
+	is_empty_page = True
+	for page_num in range(pdf_document.page_count):
+		page = pdf_document[page_num]
+		text = page.get_text("text")
+		if text.strip():
+			is_empty_page = False
+			break
+	if not is_empty_page:
+		chapter_title = f"Chapter {chapter_no}"
+		if first_line:
+			try:
+				int(first_line)
+				isFirstLine = False
+			except ValueError:
+				print("ERROR IN GETTING LINE :", ValueError)
+		if isFirstLine:	
+			chapter_title = first_line
+		elif second_line:
+			chapter_title = second_line
+		else:
+			chapter_title = f"Chapter {chapter_no}"
+		merger.append(chapter_pdf_path, bookmark=chapter_title)
+
+	pdf_document.close()
+
+	# merger.append(chapter_pdf_path, bookmark=chapter_title)
+
 
 merger.write(f"{book_title}.pdf")
 merger.close()

diff --git a/requirements.txt b/requirements.txt
@@ -3,3 +3,4 @@ websocket-client==1.4.0
 pyppeteer==1.0.2
 PyPDF2==2.10.5
 Pillow==8.4.0
+fitz==0.0.1