From 9be8e711301cce1953b19b2cb6b18c9991ae2d1a Mon Sep 17 00:00:00 2001 From: cactusoftheday Date: Tue, 22 Aug 2023 19:06:55 -0400 Subject: [PATCH 01/11] Move changes to master branch --- android/app/build.gradle | 15 ++ .../LNReader/MainApplication.java | 3 +- .../epubParser/EpubParser.java | 155 +++++++++++++++ .../EpubParserActivityEventListener.java | 25 +++ .../epubParser/EpubParserPackage.java | 26 +++ android/app/src/main/python/epubParser.py | 184 ++++++++++++++++++ android/app/src/requirements.txt | 1 + android/build.gradle | 3 +- src/native/epubParser.ts | 8 + .../settings/SettingsAdvancedScreen.js | 5 + 10 files changed, 423 insertions(+), 2 deletions(-) create mode 100644 android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java create mode 100644 android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParserActivityEventListener.java create mode 100644 android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParserPackage.java create mode 100644 android/app/src/main/python/epubParser.py create mode 100644 android/app/src/requirements.txt create mode 100644 src/native/epubParser.ts diff --git a/android/app/build.gradle b/android/app/build.gradle index 77aa008c8..6a4bfcacd 100644 --- a/android/app/build.gradle +++ b/android/app/build.gradle @@ -1,4 +1,5 @@ apply plugin: "com.android.application" +apply plugin: "com.chaquo.python" import com.android.build.OutputFile @@ -150,6 +151,20 @@ android { versionName "$versionMajor.$versionMinor.$versionPatch" buildConfigField "boolean", "IS_NEW_ARCHITECTURE_ENABLED", isNewArchitectureEnabled().toString() } + ndk { + abiFilters "armeabi-v7a", "arm64-v8a", "x86", "x86_64" + } + python{ + //buildPython "C:/Users/kijij/AppData/Local/Programs/Python/Python39/python.exe" //don't believe you need this + pip { + install "-r", "./requirements.txt" + } + } +} +sourceSets { + main { + python.srcDirs = ["./src/main/python"] + } splits { abi { reset() diff --git a/android/app/src/main/java/com/rajarsheechatterjee/LNReader/MainApplication.java b/android/app/src/main/java/com/rajarsheechatterjee/LNReader/MainApplication.java index a1d8b15a0..c3659470d 100644 --- a/android/app/src/main/java/com/rajarsheechatterjee/LNReader/MainApplication.java +++ b/android/app/src/main/java/com/rajarsheechatterjee/LNReader/MainApplication.java @@ -12,7 +12,7 @@ import com.facebook.react.ReactPackage; import com.facebook.soloader.SoLoader; import com.rajarsheechatterjee.VolumeButtonListener.VolumeButtonListenerPackage; - +import com.rajarsheechatterjee.EpubParser.EpubParserPackage; import expo.modules.ApplicationLifecycleDispatcher; import expo.modules.ReactNativeHostWrapper; @@ -40,6 +40,7 @@ protected List getPackages() { List packages = new PackageList(this).getPackages(); packages.add(new com.rajarsheechatterjee.LNReader.NavigationBarColorPackage()); packages.add(new VolumeButtonListenerPackage()); + packages.add(new EpubParserPackage()); return packages; } diff --git a/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java b/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java new file mode 100644 index 000000000..e793e8d70 --- /dev/null +++ b/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java @@ -0,0 +1,155 @@ +package com.rajarsheechatterjee.EpubParser; + +import com.chaquo.python.PyObject; +import com.chaquo.python.Python; +import com.chaquo.python.android.AndroidPlatform; + +import android.app.Activity; +import android.content.Intent; +import android.content.Context; +import android.content.ContentResolver; + +import android.database.Cursor; +import android.net.Uri; +import android.os.Bundle; +import android.provider.OpenableColumns; +import android.view.View; +import android.widget.Button; +import android.widget.TextView; +import android.widget.Toast; + +import com.facebook.react.bridge.ActivityEventListener; +import com.facebook.react.bridge.BaseActivityEventListener; +import com.facebook.react.bridge.Promise; +import com.facebook.react.bridge.ReactApplicationContext; +import com.facebook.react.bridge.ReactContextBaseJavaModule; +import com.facebook.react.bridge.ReactMethod; +import com.facebook.react.modules.core.DeviceEventManagerModule; +import java.io.File; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import android.util.Log; + +public class EpubParser extends ReactContextBaseJavaModule implements ActivityEventListener{ + private static final int REQUEST_DIRECTORY=100; //constant request code dunno if will use it + private static String epubPath; + private static Promise promise; + private final EpubParserActivityEventListener activityEventListener; + private static ReactApplicationContext appContext = null; + + public EpubParser(ReactApplicationContext context){ + super(context); + appContext = context; + activityEventListener = new EpubParserActivityEventListener(context, this); + context.addActivityEventListener(activityEventListener); + if (!Python.isStarted()) { + // Initialize Python with the Chaquopy SDK + Python.start(new AndroidPlatform(context)); + } + } + @Override + public void onActivityResult(Activity activity, int requestCode, int resultCode, Intent data) { + if (activity != null && requestCode == REQUEST_DIRECTORY && resultCode == Activity.RESULT_OK) { + Uri uri = data.getData(); + String folderPath = getFilePathFromUri(uri); + epubPath = folderPath; + + Python py = Python.getInstance(); + PyObject epubParser = py.getModule("epubParser"); + + if (epubPath != null && epubPath.toLowerCase().endsWith(".epub")) { + PyObject getContent = epubParser.callAttr("parseEpub", epubPath, "/data/data/com.rajarsheechatterjee.LNReader/files/"); + epubPath = null; + promise.resolve(null); + } else { + promise.reject("Invalid file format"); + } + } else { + promise.reject("Directory selection canceled"); + } + } + /* + private final ActivityEventListener activityEventListener = new BaseActivityEventListener() { + //@Override + public void onActivityResult(Activity activity, int requestCode, int resultCode, Intent data){ + if(requestCode == REQUEST_DIRECTORY && resultCode == Activity.RESULT_OK){ + Uri uri = data.getData(); + String folderPath = getFilePathFromUri(uri); + //Log.d("file name", folderPath); + epubPath = folderPath; + //Log.d("epub path", epubPath); + Python py = Python.getInstance(); + PyObject epubParser = py.getModule("epubParser"); + if(epubPath != null && epubPath.toLowerCase().endsWith(".epub")){ + //Log.d("file name", epubPath); + PyObject getContent = epubParser.callAttr("parseEpub",epubPath,"/data/data/com.rajarsheechatterjee.LNReader/files/"); //dunno if that's the right app ID + epubPath = null; + promise.resolve(null); + } else{ + promise.reject("Bad file"); + } + } + else{ + promise.reject("Directory selection canceled"); + } + } + };*/ + + @Override + public String getName(){ + return "EpubParser"; + } + + @ReactMethod + public void openDirectory(Promise promise){ + this.promise = promise; + try{ + Intent intent=new Intent(Intent.ACTION_OPEN_DOCUMENT); + intent.addCategory(Intent.CATEGORY_OPENABLE); + intent.setType("*/*"); //there's an extra backslash in there + final Activity activity = getCurrentActivity(); + activity.startActivityForResult(intent,REQUEST_DIRECTORY); + } catch (Exception e){ + promise.reject(e); + } + } + + private String getFilePathFromUri(Uri uri){ + String filePath=null; + if(uri!=null){ + ContentResolver resolver = getCurrentActivity().getContentResolver(); + Cursor cursor=resolver.query(uri,null,null,null,null); + if(cursor!=null&&cursor.moveToFirst()){ + int index=cursor.getColumnIndex(OpenableColumns.DISPLAY_NAME); + String fileName=cursor.getString(index); + cursor.close(); + + File file=new File(getReactApplicationContext().getCacheDir(),fileName); + filePath=file.getAbsolutePath(); + + try{ + InputStream inputStream=resolver.openInputStream(uri); + OutputStream outputStream=new FileOutputStream(file); + byte[]buffer=new byte[4096]; + int bytesRead; + while((bytesRead=inputStream.read(buffer))!=-1){ + outputStream.write(buffer,0,bytesRead); + } + outputStream.close(); + inputStream.close(); + }catch(IOException e){ + e.printStackTrace(); + } + } + } + return filePath; + } + @Override + public void onNewIntent(Intent intent) { + // Handle new intents if needed + } +} \ No newline at end of file diff --git a/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParserActivityEventListener.java b/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParserActivityEventListener.java new file mode 100644 index 000000000..ff4109c77 --- /dev/null +++ b/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParserActivityEventListener.java @@ -0,0 +1,25 @@ +package com.rajarsheechatterjee.EpubParser; + + +import android.app.Activity; +import android.content.Intent; +import com.facebook.react.bridge.ActivityEventListener; +import com.facebook.react.bridge.ReactApplicationContext; +import com.facebook.react.bridge.ReactContextBaseJavaModule; +import com.facebook.react.bridge.BaseActivityEventListener; +import com.facebook.react.bridge.ReactMethod; + +public class EpubParserActivityEventListener extends BaseActivityEventListener { + private final ReactApplicationContext reactContext; + private final EpubParser epubParser; + + public EpubParserActivityEventListener(ReactApplicationContext reactContext, EpubParser epubParser) { + this.reactContext = reactContext; + this.epubParser = epubParser; + } + + @Override + public void onActivityResult(Activity activity, int requestCode, int resultCode, Intent data) { + epubParser.onActivityResult(activity, requestCode, resultCode, data); + } +} \ No newline at end of file diff --git a/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParserPackage.java b/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParserPackage.java new file mode 100644 index 000000000..37f528337 --- /dev/null +++ b/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParserPackage.java @@ -0,0 +1,26 @@ +package com.rajarsheechatterjee.EpubParser; + +import com.facebook.react.ReactPackage; +import com.facebook.react.bridge.NativeModule; +import com.facebook.react.bridge.ReactApplicationContext; +import com.facebook.react.uimanager.ViewManager; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class EpubParserPackage implements ReactPackage { + @Override + public List createNativeModules(ReactApplicationContext reactContext) { + List modules = new ArrayList(); + + modules.add(new EpubParser(reactContext)); + + return modules; + } + + @Override + public List createViewManagers(ReactApplicationContext reactContext) { + return Collections.emptyList(); + } +} \ No newline at end of file diff --git a/android/app/src/main/python/epubParser.py b/android/app/src/main/python/epubParser.py new file mode 100644 index 000000000..c5a94a16e --- /dev/null +++ b/android/app/src/main/python/epubParser.py @@ -0,0 +1,184 @@ +import json +import os.path +import os +import re +import zipfile +from lxml import etree +#import shutil +#from PIL import Image #not necessary only for debugging + +'''export interface SourceNovel { + url: string; //must be absoulute + name: string; + cover?: string; + genres?: string; + summary?: string; + author?: string; + artist?: string; + status?: string; + chapters?: ChapterItem[]; +} +export interface ChapterItem { + name: string; + url: string; //must be absoulute + releaseTime?: string; +}''' + +class ChapterItem: + def __init__(self, name, url, releaseTime): + self.name = name + self.url = url + self.releaseTime = releaseTime #perhaps epub creation date? + +namespaces = { + "calibre":"http://calibre.kovidgoyal.net/2009/metadata", + "dc":"http://purl.org/dc/elements/1.1/", + "dcterms":"http://purl.org/dc/terms/", + "opf":"http://www.idpf.org/2007/opf", + "u":"urn:oasis:names:tc:opendocument:xmlns:container", + "xsi":"http://www.w3.org/2001/XMLSchema-instance", + "xhtml":"http://www.w3.org/1999/xhtml" +} + +def getContentOPF(epub_path): + with zipfile.ZipFile(epub_path) as z: + text = z.read("META-INF/container.xml") #get container xml to get content + tree = etree.fromstring(text) + #print(text) + rootfile_path = tree.xpath("/u:container/u:rootfiles/u:rootfile", + namespaces=namespaces)[0].get("full-path") + #print(rootfile_path) + tree = etree.fromstring(z.read(rootfile_path)) + #print(tree) + return tree + +def cleanTitle(dir_name): + #cleans a string so that it is valid for directories + pattern = r'[<>:"/\\|?*\x00-\x1F]' + # Remove any disallowed characters from the directory name string + clean_dir_name = re.sub(pattern, '', dir_name) + return clean_dir_name + +def getContent(epub_path, dest_dir=os.environ["HOME"]): + extensions = ['.html','.htm','.xhtml','.css','.png','.jpeg','.jpg','.gif','.opf'] # still need the content.opf file + with zipfile.ZipFile(epub_path) as z: + for i in range(0,len(z.namelist())): + if(z.namelist()[i].endswith('opf')): + contentOPF = z.namelist()[i] #search for opf file + tree = etree.XML(z.read(contentOPF)) + title = tree.find('.//dc:title', namespaces=namespaces).text + cleanedTitle = cleanTitle(title) + dir = dest_dir + "convertedEpubs/" + cleanedTitle # temporary directory for saving + for fileInfo in z.infolist(): + if any(fileInfo.filename.endswith(ext) for ext in extensions): + z.extract(fileInfo.filename, path = dir) + return dir + +def treeFindsAll(queryString, XMLtree): + list = XMLtree.findall(queryString, namespaces=namespaces) + query = '' + if len(list) == 0: + query = 'N/A' + return query + for item in list: + query += (item.text + ' ') + return query + +class SourceNovel: + def __init__(self, url, name, cover, genres, summary, author, artist, chapters): + self.url = url #file location? + self.name = name + self.cover = cover + self.genres = genres + self.summary = summary + self.author = author + self.artist = artist + self.status = 'local' + self.chapters = chapters + +def getMetadata(epub_path, save_path): + with zipfile.ZipFile(epub_path) as z: + for i in range(0, len(z.namelist())): + if (z.namelist()[i].endswith('opf')): + contentOPF = z.namelist()[i] # search for opf file + tree = etree.XML(z.read(contentOPF)) + title = tree.find('.//dc:title',namespaces=namespaces).text + print("title",title) + cover = getCover(epub_path) + authors = '' + for author in tree.findall('.//dc:creator', namespaces=namespaces): + authors += (author.text + ' ') + print("authors",authors) + genre = treeFindsAll('.//dc:subject', tree) + print("genre",genre) + summary = treeFindsAll('.//dc:description', tree) + print("summary",summary) + artist = '' #this will be included with author as epub files do not need to have an artist role and distinction is probably not necessary + chapters = tree.xpath("//opf:spine//opf:itemref", namespaces=namespaces) + for chapter in chapters: + print(chapter.get("idref")) + chapters = len(chapters) + print("chapters",chapters) + return {"url" : save_path, + "title" : title, + "cover" : cover, + "genre" : genre, + "summary" : summary, + "authors" : authors, + "artist" : artist, + "chapters" : chapters, + } + +def getCover(epub_path): + with zipfile.ZipFile(epub_path) as z: + for i in range(0, len(z.namelist())): + if (z.namelist()[i].endswith('opf')): + contentOPF = z.namelist()[i] # search for opf file + print(z.namelist()) + tree = etree.XML(z.read(contentOPF)) + coverHREF = None + try: + coverID = tree.xpath("//opf:metadata/opf:meta[@name='cover']", namespaces=namespaces)[0].get("content") + print("coverID 2", coverID) #now we know where the cover image is located + coverHREF = tree.xpath("//opf:manifest/opf:item[@id='"+coverID+"']",namespaces=namespaces)[0].get("href") + + except IndexError: #not an EPUB 2.0 + print("EPUB 2 failure") + pass + #print("coverHREF", coverHREF) + if not coverHREF: #try EPUB 3.0 + try: + coverHREF = tree.xpath("//opf:manifest/opf:item[@properties='cover-image']",namespaces=namespaces)[0].get('href') + except IndexError: + print("EPUB 3 failure") + pass + + elif not coverHREF: #some EPUBs don't explicitly declare cover images + try: + coverID = tree.xpath("//opf:spine/open:itemref[@idref='cover']",namespaces=namespaces)[0].get("idref") + temp = tree.xpath("//opf:manifest/opf:item[@id='"+coverID+"']", namespaces=namespaces)[0].get('href') + + tree = etree.fromstring(z.read(temp)) + coverHREF = tree.xpath("//xhtml:img", namespaces=namespaces)[0].get("src") + except IndexError: + print("Edge case failure") + + elif not coverHREF: + print("No cover found") + return None + + coverPath = os.path.join(os.path.dirname(contentOPF),coverHREF) + coverPath = coverPath.replace('\\','/') + print("coverPath", coverPath) + #return z.open(coverPath) + return coverPath + +def dumpMetaData(metaData, dest_dir): + dir = dest_dir + "convertedEpubs/" + cleanTitle(metaData.get("title")) + '/' + with open(dir + metaData.get("title") + ".json", 'w') as fileout: + json.dump(metaData, fileout) + +def parseEpub(epub_path, dest_dir): + save_path = getContent(epub_path, dest_dir) #simultaneously writes and saves content to a variable + metaData = getMetadata(epub_path, save_path) + dumpMetaData(metaData, dest_dir) \ No newline at end of file diff --git a/android/app/src/requirements.txt b/android/app/src/requirements.txt new file mode 100644 index 000000000..3ea5a2280 --- /dev/null +++ b/android/app/src/requirements.txt @@ -0,0 +1 @@ +lxml==4.6.3 \ No newline at end of file diff --git a/android/build.gradle b/android/build.gradle index 64e7c52ef..ae132e31a 100644 --- a/android/build.gradle +++ b/android/build.gradle @@ -20,12 +20,13 @@ buildscript { repositories { google() mavenCentral() + maven { url "https://chaquo.com/maven" } } dependencies { classpath("com.android.tools.build:gradle:7.1.2") classpath("com.facebook.react:react-native-gradle-plugin") classpath("de.undercouch:gradle-download-task:5.0.1") - + classpath ("com.chaquo.python:gradle:14.0.2") // NOTE: Do not place your application dependencies here; they belong // in the individual module build.gradle files } diff --git a/src/native/epubParser.ts b/src/native/epubParser.ts new file mode 100644 index 000000000..554f39c42 --- /dev/null +++ b/src/native/epubParser.ts @@ -0,0 +1,8 @@ +import { NativeModules } from 'react-native'; + +interface EpubParserInterface { + openDirectory(): void; +} + +const { EpubParser } = NativeModules; +export default EpubParser as EpubParserInterface; diff --git a/src/screens/settings/SettingsAdvancedScreen.js b/src/screens/settings/SettingsAdvancedScreen.js index cd8d950b4..e341a9e05 100644 --- a/src/screens/settings/SettingsAdvancedScreen.js +++ b/src/screens/settings/SettingsAdvancedScreen.js @@ -70,6 +70,11 @@ const AdvancedSettings = ({ navigation }) => { theme={theme} /> + EpubParser.openDirectory()} + theme={theme} + /> Date: Tue, 22 Aug 2023 19:37:33 -0400 Subject: [PATCH 02/11] Fixed errors stemming from improper move --- android/app/build.gradle | 28 +++++++++++++------------- android/app/{src => }/requirements.txt | 0 2 files changed, 14 insertions(+), 14 deletions(-) rename android/app/{src => }/requirements.txt (100%) diff --git a/android/app/build.gradle b/android/app/build.gradle index 0696c3369..e92800cba 100644 --- a/android/app/build.gradle +++ b/android/app/build.gradle @@ -147,23 +147,23 @@ android { minSdkVersion rootProject.ext.minSdkVersion targetSdkVersion rootProject.ext.targetSdkVersion // Generated version code. Supports versions up to 1024.1024.2048 - versionCode ((((versionMajor << 10) | versionMinor) << 11) | versionPatch) + versionCode((((versionMajor << 10) | versionMinor) << 11) | versionPatch) versionName "$versionMajor.$versionMinor.$versionPatch" buildConfigField "boolean", "IS_NEW_ARCHITECTURE_ENABLED", isNewArchitectureEnabled().toString() - } - ndk { - abiFilters "armeabi-v7a", "arm64-v8a", "x86", "x86_64" - } - python{ - //buildPython "C:/Users/kijij/AppData/Local/Programs/Python/Python39/python.exe" //don't believe you need this - pip { - install "-r", "./requirements.txt" + ndk { + abiFilters "armeabi-v7a", "arm64-v8a", "x86", "x86_64" + } + python { + //buildPython "C:/Users/kijij/AppData/Local/Programs/Python/Python39/python.exe" //don't believe you need this + pip { + install "-r", "./requirements.txt" + } + } + sourceSets { + main { + python.srcDirs = ["./src/main/python"] + } } - } -} -sourceSets { - main { - python.srcDirs = ["./src/main/python"] } splits { abi { diff --git a/android/app/src/requirements.txt b/android/app/requirements.txt similarity index 100% rename from android/app/src/requirements.txt rename to android/app/requirements.txt From 5c73f2897cffa542f2107e078468beced7f24def Mon Sep 17 00:00:00 2001 From: Danisty Date: Thu, 24 Aug 2023 00:34:28 +0200 Subject: [PATCH 03/11] Add EPub to library --- .../epubParser/EpubParser.java | 8 +- android/app/src/main/python/epubParser.py | 145 ++++++++++++------ src/native/epubParser.ts | 19 ++- .../reader/utils/sanitizeChapterText.ts | 4 + .../settings/SettingsAdvancedScreen.js | 3 +- src/sources/local/epubSource.ts | 108 +++++++++++++ src/sources/sourceManager.ts | 2 + 7 files changed, 229 insertions(+), 60 deletions(-) create mode 100644 src/sources/local/epubSource.ts diff --git a/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java b/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java index e793e8d70..2470347e8 100644 --- a/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java +++ b/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java @@ -55,16 +55,14 @@ public EpubParser(ReactApplicationContext context){ public void onActivityResult(Activity activity, int requestCode, int resultCode, Intent data) { if (activity != null && requestCode == REQUEST_DIRECTORY && resultCode == Activity.RESULT_OK) { Uri uri = data.getData(); - String folderPath = getFilePathFromUri(uri); - epubPath = folderPath; + String epubPath = getFilePathFromUri(uri); Python py = Python.getInstance(); PyObject epubParser = py.getModule("epubParser"); if (epubPath != null && epubPath.toLowerCase().endsWith(".epub")) { - PyObject getContent = epubParser.callAttr("parseEpub", epubPath, "/data/data/com.rajarsheechatterjee.LNReader/files/"); - epubPath = null; - promise.resolve(null); + PyObject savePath = epubParser.callAttr("parseEpub", epubPath, "/data/data/com.rajarsheechatterjee.LNReader/files/"); + promise.resolve(savePath.toString()); } else { promise.reject("Invalid file format"); } diff --git a/android/app/src/main/python/epubParser.py b/android/app/src/main/python/epubParser.py index c5a94a16e..557bd49e3 100644 --- a/android/app/src/main/python/epubParser.py +++ b/android/app/src/main/python/epubParser.py @@ -31,22 +31,23 @@ def __init__(self, name, url, releaseTime): self.releaseTime = releaseTime #perhaps epub creation date? namespaces = { - "calibre":"http://calibre.kovidgoyal.net/2009/metadata", - "dc":"http://purl.org/dc/elements/1.1/", - "dcterms":"http://purl.org/dc/terms/", - "opf":"http://www.idpf.org/2007/opf", - "u":"urn:oasis:names:tc:opendocument:xmlns:container", - "xsi":"http://www.w3.org/2001/XMLSchema-instance", - "xhtml":"http://www.w3.org/1999/xhtml" + 'calibre':'http://calibre.kovidgoyal.net/2009/metadata', + 'dc':'http://purl.org/dc/elements/1.1/', + 'dcterms':'http://purl.org/dc/terms/', + 'opf':'http://www.idpf.org/2007/opf', + 'ncx':'http://www.daisy.org/z3986/2005/ncx/', + 'u':'urn:oasis:names:tc:opendocument:xmlns:container', + 'xsi':'http://www.w3.org/2001/XMLSchema-instance', + 'xhtml':'http://www.w3.org/1999/xhtml' } def getContentOPF(epub_path): with zipfile.ZipFile(epub_path) as z: - text = z.read("META-INF/container.xml") #get container xml to get content + text = z.read('META-INF/container.xml') #get container xml to get content tree = etree.fromstring(text) #print(text) - rootfile_path = tree.xpath("/u:container/u:rootfiles/u:rootfile", - namespaces=namespaces)[0].get("full-path") + rootfile_path = tree.xpath('/u:container/u:rootfiles/u:rootfile', + namespaces=namespaces)[0].get('full-path') #print(rootfile_path) tree = etree.fromstring(z.read(rootfile_path)) #print(tree) @@ -59,7 +60,7 @@ def cleanTitle(dir_name): clean_dir_name = re.sub(pattern, '', dir_name) return clean_dir_name -def getContent(epub_path, dest_dir=os.environ["HOME"]): +def getContent(epub_path, dest_dir): extensions = ['.html','.htm','.xhtml','.css','.png','.jpeg','.jpg','.gif','.opf'] # still need the content.opf file with zipfile.ZipFile(epub_path) as z: for i in range(0,len(z.namelist())): @@ -68,7 +69,7 @@ def getContent(epub_path, dest_dir=os.environ["HOME"]): tree = etree.XML(z.read(contentOPF)) title = tree.find('.//dc:title', namespaces=namespaces).text cleanedTitle = cleanTitle(title) - dir = dest_dir + "convertedEpubs/" + cleanedTitle # temporary directory for saving + dir = dest_dir + 'convertedEpubs/' + cleanedTitle # temporary directory for saving for fileInfo in z.infolist(): if any(fileInfo.filename.endswith(ext) for ext in extensions): z.extract(fileInfo.filename, path = dir) @@ -96,89 +97,131 @@ def __init__(self, url, name, cover, genres, summary, author, artist, chapters): self.status = 'local' self.chapters = chapters +def getChapters(z: zipfile.ZipFile, opf_tree): + isEPUB2 = False + filename = None + chapters = [] + + for filename in z.namelist(): + if filename == 'toc.ncx': + isEPUB2 = True + chpts_data_filename = filename + break + + if isEPUB2: + name_by_content = {} + tree = etree.XML(z.read(chpts_data_filename)) + for nav_point in tree.xpath('//ncx:navMap/ncx:navPoint', namespaces=namespaces): + name = nav_point.find('.//ncx:text', namespaces=namespaces).text + content = nav_point.find('.//ncx:content', namespaces=namespaces).attrib['src'] + '#' + name_by_content[content[:content.index('#') or -1]] = name + + chapters_elements = opf_tree.xpath('//opf:spine//opf:itemref', namespaces=namespaces) + for i,chapter_el in enumerate(chapters_elements): + id = chapter_el.get('idref') + item = opf_tree.find(f".//opf:manifest/opf:item[@id='{id}']", namespaces=namespaces) + content = item.attrib['href'] + chapter_has_name = content in name_by_content + + if chapter_has_name: + chapter_name = name_by_content[content] + if chapter_name.isnumeric(): + chapterName = f"Chapter {chapter_name}" + else: + chapterName = chapter_name + else: + chapterName = f"Unnamed chapter {i+1}" + + chapters.append({ + "name":chapterName, + "path":content + }) + else: + pass + + return chapters + def getMetadata(epub_path, save_path): with zipfile.ZipFile(epub_path) as z: - for i in range(0, len(z.namelist())): - if (z.namelist()[i].endswith('opf')): - contentOPF = z.namelist()[i] # search for opf file + for filename in z.namelist(): + if (filename.endswith('opf')): + contentOPF = filename # search for opf file tree = etree.XML(z.read(contentOPF)) title = tree.find('.//dc:title',namespaces=namespaces).text - print("title",title) + print('title',title) cover = getCover(epub_path) authors = '' for author in tree.findall('.//dc:creator', namespaces=namespaces): authors += (author.text + ' ') - print("authors",authors) + print('authors',authors) genre = treeFindsAll('.//dc:subject', tree) - print("genre",genre) + print('genre',genre) summary = treeFindsAll('.//dc:description', tree) - print("summary",summary) + print('summary',summary) artist = '' #this will be included with author as epub files do not need to have an artist role and distinction is probably not necessary - chapters = tree.xpath("//opf:spine//opf:itemref", namespaces=namespaces) - for chapter in chapters: - print(chapter.get("idref")) - chapters = len(chapters) - print("chapters",chapters) - return {"url" : save_path, - "title" : title, - "cover" : cover, - "genre" : genre, - "summary" : summary, - "authors" : authors, - "artist" : artist, - "chapters" : chapters, - } + chapters = getChapters(z, tree) + print('chapters',chapters) + return { + 'url' : save_path, + 'title' : title, + 'cover' : cover, + 'genre' : genre, + 'summary' : summary, + 'authors' : authors.strip(), + 'artist' : artist, + 'chapters' : chapters, + } def getCover(epub_path): with zipfile.ZipFile(epub_path) as z: - for i in range(0, len(z.namelist())): - if (z.namelist()[i].endswith('opf')): - contentOPF = z.namelist()[i] # search for opf file - print(z.namelist()) + for filename in z.namelist(): + if (filename.endswith('opf')): + contentOPF = filename # search for opf file tree = etree.XML(z.read(contentOPF)) coverHREF = None try: - coverID = tree.xpath("//opf:metadata/opf:meta[@name='cover']", namespaces=namespaces)[0].get("content") - print("coverID 2", coverID) #now we know where the cover image is located - coverHREF = tree.xpath("//opf:manifest/opf:item[@id='"+coverID+"']",namespaces=namespaces)[0].get("href") + coverID = tree.xpath("//opf:metadata/opf:meta[@name='cover']", namespaces=namespaces)[0].get('content') + print('coverID 2', coverID) #now we know where the cover image is located + coverHREF = tree.xpath("//opf:manifest/opf:item[@id='"+coverID+"']",namespaces=namespaces)[0].get('href') except IndexError: #not an EPUB 2.0 - print("EPUB 2 failure") + print('EPUB 2 failure') pass - #print("coverHREF", coverHREF) + #print('coverHREF', coverHREF) if not coverHREF: #try EPUB 3.0 try: coverHREF = tree.xpath("//opf:manifest/opf:item[@properties='cover-image']",namespaces=namespaces)[0].get('href') except IndexError: - print("EPUB 3 failure") + print('EPUB 3 failure') pass elif not coverHREF: #some EPUBs don't explicitly declare cover images try: - coverID = tree.xpath("//opf:spine/open:itemref[@idref='cover']",namespaces=namespaces)[0].get("idref") + coverID = tree.xpath("//opf:spine/open:itemref[@idref='cover']",namespaces=namespaces)[0].get('idref') temp = tree.xpath("//opf:manifest/opf:item[@id='"+coverID+"']", namespaces=namespaces)[0].get('href') tree = etree.fromstring(z.read(temp)) - coverHREF = tree.xpath("//xhtml:img", namespaces=namespaces)[0].get("src") + coverHREF = tree.xpath('//xhtml:img', namespaces=namespaces)[0].get('src') except IndexError: - print("Edge case failure") + print('Edge case failure') elif not coverHREF: - print("No cover found") + print('No cover found') return None coverPath = os.path.join(os.path.dirname(contentOPF),coverHREF) coverPath = coverPath.replace('\\','/') - print("coverPath", coverPath) + print('coverPath', coverPath) #return z.open(coverPath) return coverPath def dumpMetaData(metaData, dest_dir): - dir = dest_dir + "convertedEpubs/" + cleanTitle(metaData.get("title")) + '/' - with open(dir + metaData.get("title") + ".json", 'w') as fileout: + dir = dest_dir + 'convertedEpubs/' + cleanTitle(metaData.get('title')) + '/' + with open(dir + cleanTitle(metaData.get('title')) + '.json', 'w') as fileout: json.dump(metaData, fileout) def parseEpub(epub_path, dest_dir): save_path = getContent(epub_path, dest_dir) #simultaneously writes and saves content to a variable metaData = getMetadata(epub_path, save_path) - dumpMetaData(metaData, dest_dir) \ No newline at end of file + dumpMetaData(metaData, dest_dir) + return save_path diff --git a/src/native/epubParser.ts b/src/native/epubParser.ts index 554f39c42..aab94d3f7 100644 --- a/src/native/epubParser.ts +++ b/src/native/epubParser.ts @@ -1,8 +1,21 @@ +import { getNovel } from '@database/queries/NovelQueries'; +import { insertNovelInLibrary } from '@database/queries/NovelQueriesV2'; import { NativeModules } from 'react-native'; +const { EpubParser } = NativeModules; +const epubSourceId = 0; + interface EpubParserInterface { - openDirectory(): void; + openDirectory(): Promise; } -const { EpubParser } = NativeModules; -export default EpubParser as EpubParserInterface; +export async function openDirectory() { + const ParserInterface = EpubParser as EpubParserInterface; + const epubPath = await ParserInterface.openDirectory(); + + let dbNovel = await getNovel(epubSourceId, epubPath); + if (dbNovel === undefined || dbNovel.followed === 0) { + // Insert novel if it doesn't exist in the database + await insertNovelInLibrary(epubSourceId, epubPath, false, 1); + } +} diff --git a/src/screens/reader/utils/sanitizeChapterText.ts b/src/screens/reader/utils/sanitizeChapterText.ts index 390e9eb21..cf0146034 100644 --- a/src/screens/reader/utils/sanitizeChapterText.ts +++ b/src/screens/reader/utils/sanitizeChapterText.ts @@ -16,6 +16,7 @@ export const sanitizeChapterText = ( html: string, options?: Options, ): string => { + const isEPubSource = options?.sourceId === 0; let text = sanitizeHtml(html, { allowedTags: sanitizeHtml.defaults.allowedTags.concat([ 'img', @@ -25,13 +26,16 @@ export const sanitizeChapterText = ( 'b', 'a', 'center', + ...(isEPubSource ? ['head', 'title', 'style'] : []), ]), allowedAttributes: { 'img': ['src', 'class'], 'a': ['href'], 'input': ['type', 'offline'], + ...(isEPubSource ? { '*': ['class'] } : {}), }, allowedSchemes: ['data', 'http', 'https', 'file'], + allowVulnerableTags: isEPubSource, }); if (text) { if (options?.removeExtraParagraphSpacing) { diff --git a/src/screens/settings/SettingsAdvancedScreen.js b/src/screens/settings/SettingsAdvancedScreen.js index e341a9e05..a01aaf58d 100644 --- a/src/screens/settings/SettingsAdvancedScreen.js +++ b/src/screens/settings/SettingsAdvancedScreen.js @@ -12,6 +12,7 @@ import { getString } from '@strings/translations'; import useBoolean from '@hooks/useBoolean'; import ConfirmationDialog from '@components/ConfirmationDialog/ConfirmationDialog'; import { deleteReadChaptersFromDb } from '../../database/queries/DownloadQueries'; +import { openDirectory } from '../../native/epubParser'; import { Appbar, Button, List } from '@components'; import useSourceStorage from '@hooks/useSourceStorage'; @@ -72,7 +73,7 @@ const AdvancedSettings = ({ navigation }) => { EpubParser.openDirectory()} + onPress={() => openDirectory()} theme={theme} /> diff --git a/src/sources/local/epubSource.ts b/src/sources/local/epubSource.ts new file mode 100644 index 000000000..db95c0c86 --- /dev/null +++ b/src/sources/local/epubSource.ts @@ -0,0 +1,108 @@ +import * as cheerio from 'cheerio'; +import RNFS from 'react-native-fs'; + +const sourceName = 'EPub'; +const sourceId = 0; + +const parseNovelAndChapters = async (epubPath: string) => { + const files = await RNFS.readDir(epubPath); + + const metadataFile = files.find(f => f.name.endsWith('.json'))!; + const metadata = JSON.parse(await RNFS.readFile(metadataFile.path)); + + let novel = { + sourceId, + sourceName, + url: epubPath, + novelUrl: epubPath, + + novelName: metadata.title, + novelCover: `file://${epubPath}/${metadata.cover}`, + artist: metadata.artist, + author: metadata.authors, + summary: metadata.summary, + status: 'Finished', + + chapters: metadata.chapters.map((chapter: any) => ({ + chapterName: chapter.name, + chapterUrl: `file://${epubPath}/${chapter.path}`, + })), + }; + + return novel; +}; + +const parseChapter = async (epubPath: string, chapterUrl: string) => { + const files = await RNFS.readDir(epubPath); + + const metadataFile = files.find(f => f.name.endsWith('.json'))!; + const metadata = JSON.parse(await RNFS.readFile(metadataFile.path)); + const chapterInfo = metadata.chapters.find((f: any) => + chapterUrl.includes(f.path), + ); + + let chapterText = await RNFS.readFile(chapterUrl); + + const loadedCheerio = cheerio.load(chapterText); + const promises: Promise[] = []; + + const imageReplace = function (this: cheerio.Element) { + const src = + this.attribs.src ?? + this.attributes.find( + attr => attr.name.includes('src') || attr.name.includes('href'), + )?.value; + + if (src === undefined) { + loadedCheerio(this).remove(); + return; + } + + const promise: Promise = new Promise(async r => { + const url = `${epubPath}/${src}`; + loadedCheerio(this).replaceWith(``); + r(0); + }); + + promises.push(promise); + }; + + // Specify images absolute path + loadedCheerio('img').each(imageReplace); + loadedCheerio('image').each(imageReplace); + + // Add styles + loadedCheerio('link').each(function () { + if (this.attribs.href === undefined) { + return; + } + + const promise: Promise = new Promise(async r => { + const style = await RNFS.readFile(`${epubPath}/${this.attribs.href}`); + loadedCheerio(this).replaceWith(``); + r(0); + }); + + promises.push(promise); + }); + + await Promise.all(promises); + chapterText = loadedCheerio.html().replace(/href=".+?"/g, ''); + + const chapter = { + sourceId, + novelUrl: epubPath, + chapterUrl, + chapterName: chapterInfo.name, + chapterText, + }; + + return chapter; +}; + +const EPubSource = { + parseNovelAndChapters, + parseChapter, +}; + +export default EPubSource; diff --git a/src/sources/sourceManager.ts b/src/sources/sourceManager.ts index b96a68822..e4e8e0ec9 100644 --- a/src/sources/sourceManager.ts +++ b/src/sources/sourceManager.ts @@ -1,3 +1,4 @@ +import EPubSource from './local/epubSource'; import ComradeMaoScraper from './en/comrademao'; import ReadLightNovelScraper from './en/readlightnovel'; import fastNovelScraper from './en/fastnovel'; @@ -185,6 +186,7 @@ interface Scraper { export const sourceManager = (sourceId: number): Scraper => { const scrapers: Record = { // @ts-ignore + 0: EPubSource, // @ts-ignore 1: BoxNovelScraper, // @ts-ignore 2: ReadLightNovelScraper, // @ts-ignore 3: fastNovelScraper, // @ts-ignore From cd820554fbba38899c942dc76c935ff4807630cf Mon Sep 17 00:00:00 2001 From: Danisty Date: Thu, 24 Aug 2023 19:48:49 +0200 Subject: [PATCH 04/11] Extract everything in root folder, don't create subfolders --- android/app/src/main/python/epubParser.py | 41 ++++++++++++++--------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/android/app/src/main/python/epubParser.py b/android/app/src/main/python/epubParser.py index 557bd49e3..b3e3d05fe 100644 --- a/android/app/src/main/python/epubParser.py +++ b/android/app/src/main/python/epubParser.py @@ -45,12 +45,9 @@ def getContentOPF(epub_path): with zipfile.ZipFile(epub_path) as z: text = z.read('META-INF/container.xml') #get container xml to get content tree = etree.fromstring(text) - #print(text) rootfile_path = tree.xpath('/u:container/u:rootfiles/u:rootfile', namespaces=namespaces)[0].get('full-path') - #print(rootfile_path) tree = etree.fromstring(z.read(rootfile_path)) - #print(tree) return tree def cleanTitle(dir_name): @@ -72,7 +69,8 @@ def getContent(epub_path, dest_dir): dir = dest_dir + 'convertedEpubs/' + cleanedTitle # temporary directory for saving for fileInfo in z.infolist(): if any(fileInfo.filename.endswith(ext) for ext in extensions): - z.extract(fileInfo.filename, path = dir) + fileInfo.filename = os.path.basename(fileInfo.filename) + z.extract(fileInfo, path = dir) return dir def treeFindsAll(queryString, XMLtree): @@ -103,11 +101,14 @@ def getChapters(z: zipfile.ZipFile, opf_tree): chapters = [] for filename in z.namelist(): - if filename == 'toc.ncx': + print(filename) + if filename.endswith('toc.ncx'): isEPUB2 = True chpts_data_filename = filename break + print(isEPUB2) + if isEPUB2: name_by_content = {} tree = etree.XML(z.read(chpts_data_filename)) @@ -146,21 +147,30 @@ def getMetadata(epub_path, save_path): for filename in z.namelist(): if (filename.endswith('opf')): contentOPF = filename # search for opf file + tree = etree.XML(z.read(contentOPF)) + title = tree.find('.//dc:title',namespaces=namespaces).text - print('title',title) + print('title', title) + cover = getCover(epub_path) + print('cover', cover) + authors = '' for author in tree.findall('.//dc:creator', namespaces=namespaces): authors += (author.text + ' ') - print('authors',authors) + print('authors', authors) + genre = treeFindsAll('.//dc:subject', tree) - print('genre',genre) + print('genre', genre) + summary = treeFindsAll('.//dc:description', tree) - print('summary',summary) + print('summary', summary) + artist = '' #this will be included with author as epub files do not need to have an artist role and distinction is probably not necessary chapters = getChapters(z, tree) - print('chapters',chapters) + print('chapters', chapters) + return { 'url' : save_path, 'title' : title, @@ -177,8 +187,10 @@ def getCover(epub_path): for filename in z.namelist(): if (filename.endswith('opf')): contentOPF = filename # search for opf file + tree = etree.XML(z.read(contentOPF)) coverHREF = None + try: coverID = tree.xpath("//opf:metadata/opf:meta[@name='cover']", namespaces=namespaces)[0].get('content') print('coverID 2', coverID) #now we know where the cover image is located @@ -187,14 +199,13 @@ def getCover(epub_path): except IndexError: #not an EPUB 2.0 print('EPUB 2 failure') pass - #print('coverHREF', coverHREF) + if not coverHREF: #try EPUB 3.0 try: coverHREF = tree.xpath("//opf:manifest/opf:item[@properties='cover-image']",namespaces=namespaces)[0].get('href') except IndexError: print('EPUB 3 failure') pass - elif not coverHREF: #some EPUBs don't explicitly declare cover images try: coverID = tree.xpath("//opf:spine/open:itemref[@idref='cover']",namespaces=namespaces)[0].get('idref') @@ -204,15 +215,13 @@ def getCover(epub_path): coverHREF = tree.xpath('//xhtml:img', namespaces=namespaces)[0].get('src') except IndexError: print('Edge case failure') - elif not coverHREF: print('No cover found') return None - coverPath = os.path.join(os.path.dirname(contentOPF),coverHREF) - coverPath = coverPath.replace('\\','/') + coverPath = coverHREF.replace('\\','/') print('coverPath', coverPath) - #return z.open(coverPath) + return coverPath def dumpMetaData(metaData, dest_dir): From 0a43d400e62cf6dfcb47d1b6dfeb68debc46da65 Mon Sep 17 00:00:00 2001 From: cactusoftheday Date: Tue, 29 Aug 2023 19:08:18 -0400 Subject: [PATCH 05/11] made changes to epubparser.py --- android/app/src/main/python/epubParser.py | 70 +++++++++++++++-------- 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/android/app/src/main/python/epubParser.py b/android/app/src/main/python/epubParser.py index b3e3d05fe..a5d9b49c7 100644 --- a/android/app/src/main/python/epubParser.py +++ b/android/app/src/main/python/epubParser.py @@ -69,7 +69,6 @@ def getContent(epub_path, dest_dir): dir = dest_dir + 'convertedEpubs/' + cleanedTitle # temporary directory for saving for fileInfo in z.infolist(): if any(fileInfo.filename.endswith(ext) for ext in extensions): - fileInfo.filename = os.path.basename(fileInfo.filename) z.extract(fileInfo, path = dir) return dir @@ -101,44 +100,69 @@ def getChapters(z: zipfile.ZipFile, opf_tree): chapters = [] for filename in z.namelist(): - print(filename) if filename.endswith('toc.ncx'): isEPUB2 = True chpts_data_filename = filename break - print(isEPUB2) - if isEPUB2: - name_by_content = {} + name_by_path = {} tree = etree.XML(z.read(chpts_data_filename)) + # lastNavHash = None + for nav_point in tree.xpath('//ncx:navMap/ncx:navPoint', namespaces=namespaces): name = nav_point.find('.//ncx:text', namespaces=namespaces).text - content = nav_point.find('.//ncx:content', namespaces=namespaces).attrib['src'] + '#' - name_by_content[content[:content.index('#') or -1]] = name + content = nav_point.find('.//ncx:content', namespaces=namespaces).attrib['src'] + + chapter_path = '#' in content and content[:content.index('#')] or content + # new_chapter_path = chapter_name + # i = 1 + + if name_by_path[chapter_path]: # Chapter has been referenced more than once, split into smaller chapters + continue + # new_chapter_path = f'{chapter_path}_lnreaderSplit{i}' + # lastNavHash = content[content.index('#')+1:] + # i += 1 + + name_by_path[chapter_path] = name + chapters.append({ + "name":name, + "path":chapter_path + }) + # chapters_elements = opf_tree.xpath('//opf:spine//opf:itemref', namespaces=namespaces) + # for i,chapter_el in enumerate(chapters_elements): + # id = chapter_el.get('idref') + # item = opf_tree.find(f".//opf:manifest/opf:item[@id='{id}']", namespaces=namespaces) + # path = item.attrib['href'] + # chapter_has_name = path in name_by_path + # + # if chapter_has_name: + # chapter_name = name_by_path[path] + # if chapter_name.isnumeric(): + # chapterName = f"Chapter {chapter_name}" + # else: + # chapterName = chapter_name + # else: + # chapterName = f"Unnamed chapter {i+1}" +# + # chapters.append({ + # "name":chapterName, + # "path":path + # }) + else: + # epub 3 uses nav.xhtml instead but I don't have an example rn + # Temporal workaround, no chapter names included chapters_elements = opf_tree.xpath('//opf:spine//opf:itemref', namespaces=namespaces) for i,chapter_el in enumerate(chapters_elements): id = chapter_el.get('idref') item = opf_tree.find(f".//opf:manifest/opf:item[@id='{id}']", namespaces=namespaces) - content = item.attrib['href'] - chapter_has_name = content in name_by_content - - if chapter_has_name: - chapter_name = name_by_content[content] - if chapter_name.isnumeric(): - chapterName = f"Chapter {chapter_name}" - else: - chapterName = chapter_name - else: - chapterName = f"Unnamed chapter {i+1}" + path = item.attrib['href'] chapters.append({ - "name":chapterName, - "path":content + "name":f"Chapter {i+1}", + "path":path }) - else: - pass return chapters @@ -170,7 +194,7 @@ def getMetadata(epub_path, save_path): artist = '' #this will be included with author as epub files do not need to have an artist role and distinction is probably not necessary chapters = getChapters(z, tree) print('chapters', chapters) - + return { 'url' : save_path, 'title' : title, From 91264771c150880d896178a44b4e7d75d49572ef Mon Sep 17 00:00:00 2001 From: cactusoftheday Date: Tue, 29 Aug 2023 19:51:54 -0400 Subject: [PATCH 06/11] changed json metadata file and added some safeguards for different epub formats --- android/app/src/main/python/epubParser.py | 193 +++++++++++----------- 1 file changed, 94 insertions(+), 99 deletions(-) diff --git a/android/app/src/main/python/epubParser.py b/android/app/src/main/python/epubParser.py index a5d9b49c7..eb8b50921 100644 --- a/android/app/src/main/python/epubParser.py +++ b/android/app/src/main/python/epubParser.py @@ -4,8 +4,9 @@ import re import zipfile from lxml import etree -#import shutil -#from PIL import Image #not necessary only for debugging + +# import shutil +# from PIL import Image #not necessary only for debugging '''export interface SourceNovel { url: string; //must be absoulute @@ -24,54 +25,61 @@ releaseTime?: string; }''' + class ChapterItem: def __init__(self, name, url, releaseTime): self.name = name self.url = url - self.releaseTime = releaseTime #perhaps epub creation date? + self.releaseTime = releaseTime # perhaps epub creation date? + namespaces = { - 'calibre':'http://calibre.kovidgoyal.net/2009/metadata', - 'dc':'http://purl.org/dc/elements/1.1/', - 'dcterms':'http://purl.org/dc/terms/', - 'opf':'http://www.idpf.org/2007/opf', - 'ncx':'http://www.daisy.org/z3986/2005/ncx/', - 'u':'urn:oasis:names:tc:opendocument:xmlns:container', - 'xsi':'http://www.w3.org/2001/XMLSchema-instance', - 'xhtml':'http://www.w3.org/1999/xhtml' + 'calibre': 'http://calibre.kovidgoyal.net/2009/metadata', + 'dc': 'http://purl.org/dc/elements/1.1/', + 'dcterms': 'http://purl.org/dc/terms/', + 'opf': 'http://www.idpf.org/2007/opf', + 'ncx': 'http://www.daisy.org/z3986/2005/ncx/', + 'u': 'urn:oasis:names:tc:opendocument:xmlns:container', + 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', + 'xhtml': 'http://www.w3.org/1999/xhtml' } + def getContentOPF(epub_path): with zipfile.ZipFile(epub_path) as z: - text = z.read('META-INF/container.xml') #get container xml to get content + text = z.read('META-INF/container.xml') # get container xml to get content tree = etree.fromstring(text) rootfile_path = tree.xpath('/u:container/u:rootfiles/u:rootfile', namespaces=namespaces)[0].get('full-path') tree = etree.fromstring(z.read(rootfile_path)) return tree + def cleanTitle(dir_name): - #cleans a string so that it is valid for directories + # cleans a string so that it is valid for directories pattern = r'[<>:"/\\|?*\x00-\x1F]' # Remove any disallowed characters from the directory name string clean_dir_name = re.sub(pattern, '', dir_name) return clean_dir_name + def getContent(epub_path, dest_dir): - extensions = ['.html','.htm','.xhtml','.css','.png','.jpeg','.jpg','.gif','.opf'] # still need the content.opf file + extensions = ['.html', '.htm', '.xhtml', '.css', '.png', '.jpeg', '.jpg', '.gif', + '.opf'] # still need the content.opf file with zipfile.ZipFile(epub_path) as z: - for i in range(0,len(z.namelist())): - if(z.namelist()[i].endswith('opf')): - contentOPF = z.namelist()[i] #search for opf file + for i in range(0, len(z.namelist())): + if (z.namelist()[i].endswith('opf')): + contentOPF = z.namelist()[i] # search for opf file tree = etree.XML(z.read(contentOPF)) title = tree.find('.//dc:title', namespaces=namespaces).text cleanedTitle = cleanTitle(title) dir = dest_dir + 'convertedEpubs/' + cleanedTitle # temporary directory for saving for fileInfo in z.infolist(): if any(fileInfo.filename.endswith(ext) for ext in extensions): - z.extract(fileInfo, path = dir) + z.extract(fileInfo, path=dir) return dir + def treeFindsAll(queryString, XMLtree): list = XMLtree.findall(queryString, namespaces=namespaces) query = '' @@ -82,9 +90,10 @@ def treeFindsAll(queryString, XMLtree): query += (item.text + ' ') return query + class SourceNovel: def __init__(self, url, name, cover, genres, summary, author, artist, chapters): - self.url = url #file location? + self.url = url # file location? self.name = name self.cover = cover self.genres = genres @@ -94,118 +103,100 @@ def __init__(self, url, name, cover, genres, summary, author, artist, chapters): self.status = 'local' self.chapters = chapters -def getChapters(z: zipfile.ZipFile, opf_tree): + +def getChapters(z: zipfile.ZipFile, opf_tree, path): isEPUB2 = False filename = None chapters = [] - for filename in z.namelist(): + #print(filename) if filename.endswith('toc.ncx'): isEPUB2 = True chpts_data_filename = filename break + #print(isEPUB2) + if isEPUB2: - name_by_path = {} + name_by_content = {} tree = etree.XML(z.read(chpts_data_filename)) # lastNavHash = None - for nav_point in tree.xpath('//ncx:navMap/ncx:navPoint', namespaces=namespaces): name = nav_point.find('.//ncx:text', namespaces=namespaces).text - content = nav_point.find('.//ncx:content', namespaces=namespaces).attrib['src'] + content = nav_point.find('.//ncx:content', namespaces=namespaces).attrib['src'] + '#' + name_by_content[content[:content.index('#') or -1]] = name + chapter_filename = content[:content.index('#') or -1] - chapter_path = '#' in content and content[:content.index('#')] or content - # new_chapter_path = chapter_name - # i = 1 + name_by_content[chapter_filename] = name - if name_by_path[chapter_path]: # Chapter has been referenced more than once, split into smaller chapters - continue - # new_chapter_path = f'{chapter_path}_lnreaderSplit{i}' - # lastNavHash = content[content.index('#')+1:] - # i += 1 - - name_by_path[chapter_path] = name - chapters.append({ - "name":name, - "path":chapter_path - }) - - # chapters_elements = opf_tree.xpath('//opf:spine//opf:itemref', namespaces=namespaces) - # for i,chapter_el in enumerate(chapters_elements): - # id = chapter_el.get('idref') - # item = opf_tree.find(f".//opf:manifest/opf:item[@id='{id}']", namespaces=namespaces) - # path = item.attrib['href'] - # chapter_has_name = path in name_by_path - # - # if chapter_has_name: - # chapter_name = name_by_path[path] - # if chapter_name.isnumeric(): - # chapterName = f"Chapter {chapter_name}" - # else: - # chapterName = chapter_name - # else: - # chapterName = f"Unnamed chapter {i+1}" -# - # chapters.append({ - # "name":chapterName, - # "path":path - # }) - else: - # epub 3 uses nav.xhtml instead but I don't have an example rn - # Temporal workaround, no chapter names included chapters_elements = opf_tree.xpath('//opf:spine//opf:itemref', namespaces=namespaces) - for i,chapter_el in enumerate(chapters_elements): + for i, chapter_el in enumerate(chapters_elements): id = chapter_el.get('idref') item = opf_tree.find(f".//opf:manifest/opf:item[@id='{id}']", namespaces=namespaces) - path = item.attrib['href'] + content = item.attrib['href'] + chapter_has_name = content in name_by_content + + if chapter_has_name: + chapter_name = name_by_content[content] + if chapter_name.isnumeric(): + chapterName = f"Chapter {chapter_name}" + else: + chapterName = chapter_name + else: + chapterName = f"Unnamed chapter {i + 1}" chapters.append({ - "name":f"Chapter {i+1}", - "path":path + "name": chapterName, + "path": path + '/' + content if len(path) > 0 else content, }) + else: + pass # epub 3 uses nav.xhtml instead but I don't have an example rn return chapters + def getMetadata(epub_path, save_path): with zipfile.ZipFile(epub_path) as z: for filename in z.namelist(): if (filename.endswith('opf')): contentOPF = filename # search for opf file - + break + print("content.opf", contentOPF) tree = etree.XML(z.read(contentOPF)) - title = tree.find('.//dc:title',namespaces=namespaces).text - print('title', title) + title = tree.find('.//dc:title', namespaces=namespaces).text + #print('title', title) cover = getCover(epub_path) - print('cover', cover) + #print('cover', cover) authors = '' for author in tree.findall('.//dc:creator', namespaces=namespaces): authors += (author.text + ' ') - print('authors', authors) + #print('authors', authors) genre = treeFindsAll('.//dc:subject', tree) - print('genre', genre) + #print('genre', genre) summary = treeFindsAll('.//dc:description', tree) - print('summary', summary) + #print('summary', summary) - artist = '' #this will be included with author as epub files do not need to have an artist role and distinction is probably not necessary - chapters = getChapters(z, tree) + artist = '' # this will be included with author as epub files do not need to have an artist role and distinction is probably not necessary + chapters = getChapters(z, tree, os.path.dirname(contentOPF)) print('chapters', chapters) - + return { - 'url' : save_path, - 'title' : title, - 'cover' : cover, - 'genre' : genre, - 'summary' : summary, - 'authors' : authors.strip(), - 'artist' : artist, - 'chapters' : chapters, + 'url': save_path, + 'title': title, + 'cover': cover, + 'genre': genre, + 'summary': summary, + 'authors': authors.strip(), + 'artist': artist, + 'chapters': chapters, } + def getCover(epub_path): with zipfile.ZipFile(epub_path) as z: for filename in z.namelist(): @@ -217,44 +208,48 @@ def getCover(epub_path): try: coverID = tree.xpath("//opf:metadata/opf:meta[@name='cover']", namespaces=namespaces)[0].get('content') - print('coverID 2', coverID) #now we know where the cover image is located - coverHREF = tree.xpath("//opf:manifest/opf:item[@id='"+coverID+"']",namespaces=namespaces)[0].get('href') + #print('coverID 2', coverID) # now we know where the cover image is located + coverHREF = tree.xpath("//opf:manifest/opf:item[@id='" + coverID + "']", namespaces=namespaces)[0].get( + 'href') - except IndexError: #not an EPUB 2.0 - print('EPUB 2 failure') + except IndexError: # not an EPUB 2.0 + #print('EPUB 2 failure') pass - if not coverHREF: #try EPUB 3.0 + if not coverHREF: # try EPUB 3.0 try: - coverHREF = tree.xpath("//opf:manifest/opf:item[@properties='cover-image']",namespaces=namespaces)[0].get('href') + coverHREF = tree.xpath("//opf:manifest/opf:item[@properties='cover-image']", namespaces=namespaces)[ + 0].get('href') except IndexError: - print('EPUB 3 failure') + #print('EPUB 3 failure') pass - elif not coverHREF: #some EPUBs don't explicitly declare cover images + elif not coverHREF: # some EPUBs don't explicitly declare cover images try: - coverID = tree.xpath("//opf:spine/open:itemref[@idref='cover']",namespaces=namespaces)[0].get('idref') - temp = tree.xpath("//opf:manifest/opf:item[@id='"+coverID+"']", namespaces=namespaces)[0].get('href') + coverID = tree.xpath("//opf:spine/open:itemref[@idref='cover']", namespaces=namespaces)[0].get('idref') + temp = tree.xpath("//opf:manifest/opf:item[@id='" + coverID + "']", namespaces=namespaces)[0].get( + 'href') tree = etree.fromstring(z.read(temp)) coverHREF = tree.xpath('//xhtml:img', namespaces=namespaces)[0].get('src') except IndexError: print('Edge case failure') elif not coverHREF: - print('No cover found') + #print('No cover found') return None - coverPath = coverHREF.replace('\\','/') - print('coverPath', coverPath) + coverPath = coverHREF.replace('\\', '/') + #print('coverPath', coverPath) return coverPath + def dumpMetaData(metaData, dest_dir): dir = dest_dir + 'convertedEpubs/' + cleanTitle(metaData.get('title')) + '/' - with open(dir + cleanTitle(metaData.get('title')) + '.json', 'w') as fileout: + with open(dir + 'metadata.json', 'w') as fileout: json.dump(metaData, fileout) def parseEpub(epub_path, dest_dir): - save_path = getContent(epub_path, dest_dir) #simultaneously writes and saves content to a variable + save_path = getContent(epub_path, dest_dir) # simultaneously writes and saves content to a variable metaData = getMetadata(epub_path, save_path) dumpMetaData(metaData, dest_dir) return save_path From 204806a4e6d70fc57425f41d1adb6dce733d3f9d Mon Sep 17 00:00:00 2001 From: Danisty Date: Wed, 30 Aug 2023 14:58:38 +0200 Subject: [PATCH 07/11] Correctly read metadata and convert relative paths --- android/app/src/main/python/epubParser.py | 58 +++++++++++++---------- src/sources/local/epubSource.ts | 38 ++++++++++----- 2 files changed, 59 insertions(+), 37 deletions(-) diff --git a/android/app/src/main/python/epubParser.py b/android/app/src/main/python/epubParser.py index eb8b50921..e41f0e69e 100644 --- a/android/app/src/main/python/epubParser.py +++ b/android/app/src/main/python/epubParser.py @@ -32,7 +32,6 @@ def __init__(self, name, url, releaseTime): self.url = url self.releaseTime = releaseTime # perhaps epub creation date? - namespaces = { 'calibre': 'http://calibre.kovidgoyal.net/2009/metadata', 'dc': 'http://purl.org/dc/elements/1.1/', @@ -67,16 +66,20 @@ def getContent(epub_path, dest_dir): extensions = ['.html', '.htm', '.xhtml', '.css', '.png', '.jpeg', '.jpg', '.gif', '.opf'] # still need the content.opf file with zipfile.ZipFile(epub_path) as z: - for i in range(0, len(z.namelist())): - if (z.namelist()[i].endswith('opf')): - contentOPF = z.namelist()[i] # search for opf file + for filename in z.namelist(): + if (filename.endswith('opf')): + contentOPF = filename # search for opf file + break + tree = etree.XML(z.read(contentOPF)) title = tree.find('.//dc:title', namespaces=namespaces).text cleanedTitle = cleanTitle(title) dir = dest_dir + 'convertedEpubs/' + cleanedTitle # temporary directory for saving + for fileInfo in z.infolist(): if any(fileInfo.filename.endswith(ext) for ext in extensions): z.extract(fileInfo, path=dir) + return dir @@ -104,53 +107,60 @@ def __init__(self, url, name, cover, genres, summary, author, artist, chapters): self.chapters = chapters -def getChapters(z: zipfile.ZipFile, opf_tree, path): +def getChapters(z: zipfile.ZipFile, opf_tree, dir_path): isEPUB2 = False - filename = None chapters = [] + for filename in z.namelist(): - #print(filename) if filename.endswith('toc.ncx'): isEPUB2 = True chpts_data_filename = filename break - #print(isEPUB2) - if isEPUB2: - name_by_content = {} + name_by_path = {} tree = etree.XML(z.read(chpts_data_filename)) - # lastNavHash = None + for nav_point in tree.xpath('//ncx:navMap/ncx:navPoint', namespaces=namespaces): name = nav_point.find('.//ncx:text', namespaces=namespaces).text - content = nav_point.find('.//ncx:content', namespaces=namespaces).attrib['src'] + '#' - name_by_content[content[:content.index('#') or -1]] = name - chapter_filename = content[:content.index('#') or -1] + path = nav_point.find('.//ncx:content', namespaces=namespaces).attrib['src'] - name_by_content[chapter_filename] = name + chapter_path = path[:path.index('#')] if '#' in path else path + name_by_path[chapter_path] = name chapters_elements = opf_tree.xpath('//opf:spine//opf:itemref', namespaces=namespaces) for i, chapter_el in enumerate(chapters_elements): id = chapter_el.get('idref') item = opf_tree.find(f".//opf:manifest/opf:item[@id='{id}']", namespaces=namespaces) - content = item.attrib['href'] - chapter_has_name = content in name_by_content + path = item.attrib['href'] + chapter_has_name = path in name_by_path if chapter_has_name: - chapter_name = name_by_content[content] + chapter_name = name_by_path[path] if chapter_name.isnumeric(): - chapterName = f"Chapter {chapter_name}" + chapterName = f'Chapter {chapter_name}' else: chapterName = chapter_name else: - chapterName = f"Unnamed chapter {i + 1}" + chapterName = f'Unnamed chapter {i + 1}' chapters.append({ - "name": chapterName, - "path": path + '/' + content if len(path) > 0 else content, + 'name': chapterName, + 'path': dir_path + '/' + path if len(dir_path) > 0 else path, }) else: - pass # epub 3 uses nav.xhtml instead but I don't have an example rn + # EPUB3 uses nav.xhtml instead but I don't have an example rn + # For now it will list all files without their corresponding chapter names + chapters_elements = opf_tree.xpath('//opf:spine//opf:itemref', namespaces=namespaces) + for i, chapter_el in enumerate(chapters_elements): + id = chapter_el.get('idref') + item = opf_tree.find(f".//opf:manifest/opf:item[@id='{id}']", namespaces=namespaces) + path = item.attrib['href'] + + chapters.append({ + 'name': f'EPUB3 - Chapter {i + 1}', + 'path': path + '/' + path if len(path) > 0 else path, + }) return chapters @@ -161,7 +171,7 @@ def getMetadata(epub_path, save_path): if (filename.endswith('opf')): contentOPF = filename # search for opf file break - print("content.opf", contentOPF) + print('content.opf', contentOPF) tree = etree.XML(z.read(contentOPF)) title = tree.find('.//dc:title', namespaces=namespaces).text diff --git a/src/sources/local/epubSource.ts b/src/sources/local/epubSource.ts index db95c0c86..79a5e0d4e 100644 --- a/src/sources/local/epubSource.ts +++ b/src/sources/local/epubSource.ts @@ -1,14 +1,27 @@ import * as cheerio from 'cheerio'; import RNFS from 'react-native-fs'; +import { htmlToText } from '../helpers/htmlToText'; const sourceName = 'EPub'; const sourceId = 0; -const parseNovelAndChapters = async (epubPath: string) => { - const files = await RNFS.readDir(epubPath); +const resolveRelativePath = (absPath: string, relPath: string): string => { + const absPathParts = absPath.split('/'); + const relPathParts = relPath.split('/'); + + let backNavigations = 0; + relPathParts.forEach(p => p === '..' && backNavigations++); - const metadataFile = files.find(f => f.name.endsWith('.json'))!; - const metadata = JSON.parse(await RNFS.readFile(metadataFile.path)); + const newAbsPath = absPathParts + .slice(0, absPathParts.length - backNavigations) + .join('/'); + const newRelPath = relPathParts.slice(backNavigations).join('/'); + + return `${newAbsPath}/${newRelPath}`; +}; + +const parseNovelAndChapters = async (epubPath: string) => { + const metadata = JSON.parse(await RNFS.readFile(`${epubPath}/metadata.json`)); let novel = { sourceId, @@ -20,8 +33,8 @@ const parseNovelAndChapters = async (epubPath: string) => { novelCover: `file://${epubPath}/${metadata.cover}`, artist: metadata.artist, author: metadata.authors, - summary: metadata.summary, - status: 'Finished', + summary: htmlToText(metadata.summary), + status: 'Completed', chapters: metadata.chapters.map((chapter: any) => ({ chapterName: chapter.name, @@ -33,14 +46,12 @@ const parseNovelAndChapters = async (epubPath: string) => { }; const parseChapter = async (epubPath: string, chapterUrl: string) => { - const files = await RNFS.readDir(epubPath); - - const metadataFile = files.find(f => f.name.endsWith('.json'))!; - const metadata = JSON.parse(await RNFS.readFile(metadataFile.path)); + const metadata = JSON.parse(await RNFS.readFile(`${epubPath}/metadata.json`)); const chapterInfo = metadata.chapters.find((f: any) => chapterUrl.includes(f.path), ); + const dirPath = chapterUrl.substring(0, chapterUrl.lastIndexOf('/')); let chapterText = await RNFS.readFile(chapterUrl); const loadedCheerio = cheerio.load(chapterText); @@ -59,8 +70,8 @@ const parseChapter = async (epubPath: string, chapterUrl: string) => { } const promise: Promise = new Promise(async r => { - const url = `${epubPath}/${src}`; - loadedCheerio(this).replaceWith(``); + const url = resolveRelativePath(dirPath, src); + loadedCheerio(this).replaceWith(``); r(0); }); @@ -78,7 +89,8 @@ const parseChapter = async (epubPath: string, chapterUrl: string) => { } const promise: Promise = new Promise(async r => { - const style = await RNFS.readFile(`${epubPath}/${this.attribs.href}`); + const path = resolveRelativePath(dirPath, this.attribs.href); + const style = await RNFS.readFile(path); loadedCheerio(this).replaceWith(``); r(0); }); From ffc53b4a66eb68feb33ba08b8c81c5cbbfc3b71e Mon Sep 17 00:00:00 2001 From: Danisty Date: Wed, 30 Aug 2023 15:44:19 +0200 Subject: [PATCH 08/11] Corrected path for EPUB3 --- android/app/src/main/python/epubParser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/android/app/src/main/python/epubParser.py b/android/app/src/main/python/epubParser.py index e41f0e69e..bd00d6826 100644 --- a/android/app/src/main/python/epubParser.py +++ b/android/app/src/main/python/epubParser.py @@ -159,7 +159,7 @@ def getChapters(z: zipfile.ZipFile, opf_tree, dir_path): chapters.append({ 'name': f'EPUB3 - Chapter {i + 1}', - 'path': path + '/' + path if len(path) > 0 else path, + 'path': dir_path + '/' + path if len(dir_path) > 0 else path, }) return chapters From 30d0bbdf53b6c795ea15fa3a6084bd4531281ca3 Mon Sep 17 00:00:00 2001 From: cactusoftheday Date: Wed, 30 Aug 2023 14:11:09 -0400 Subject: [PATCH 09/11] fixed cover not showing up --- android/app/src/main/python/epubParser.py | 34 ++++++++++++----------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/android/app/src/main/python/epubParser.py b/android/app/src/main/python/epubParser.py index bd00d6826..e5ee5cc50 100644 --- a/android/app/src/main/python/epubParser.py +++ b/android/app/src/main/python/epubParser.py @@ -32,6 +32,7 @@ def __init__(self, name, url, releaseTime): self.url = url self.releaseTime = releaseTime # perhaps epub creation date? + namespaces = { 'calibre': 'http://calibre.kovidgoyal.net/2009/metadata', 'dc': 'http://purl.org/dc/elements/1.1/', @@ -63,7 +64,7 @@ def cleanTitle(dir_name): def getContent(epub_path, dest_dir): - extensions = ['.html', '.htm', '.xhtml', '.css', '.png', '.jpeg', '.jpg', '.gif', + extensions = ['.html', '.htm', '.xhtml', '.css', '.png', '.jpeg', '.jpg', '.gif', '.ncx', '.opf'] # still need the content.opf file with zipfile.ZipFile(epub_path) as z: for filename in z.namelist(): @@ -79,7 +80,7 @@ def getContent(epub_path, dest_dir): for fileInfo in z.infolist(): if any(fileInfo.filename.endswith(ext) for ext in extensions): z.extract(fileInfo, path=dir) - + return dir @@ -134,7 +135,7 @@ def getChapters(z: zipfile.ZipFile, opf_tree, dir_path): item = opf_tree.find(f".//opf:manifest/opf:item[@id='{id}']", namespaces=namespaces) path = item.attrib['href'] chapter_has_name = path in name_by_path - + print(chapter_has_name) if chapter_has_name: chapter_name = name_by_path[path] if chapter_name.isnumeric(): @@ -142,7 +143,7 @@ def getChapters(z: zipfile.ZipFile, opf_tree, dir_path): else: chapterName = chapter_name else: - chapterName = f'Unnamed chapter {i + 1}' + chapterName = id.split('_')[0] chapters.append({ 'name': chapterName, @@ -175,21 +176,21 @@ def getMetadata(epub_path, save_path): tree = etree.XML(z.read(contentOPF)) title = tree.find('.//dc:title', namespaces=namespaces).text - #print('title', title) + # print('title', title) cover = getCover(epub_path) - #print('cover', cover) + # print('cover', cover) authors = '' for author in tree.findall('.//dc:creator', namespaces=namespaces): authors += (author.text + ' ') - #print('authors', authors) + # print('authors', authors) genre = treeFindsAll('.//dc:subject', tree) - #print('genre', genre) + # print('genre', genre) summary = treeFindsAll('.//dc:description', tree) - #print('summary', summary) + # print('summary', summary) artist = '' # this will be included with author as epub files do not need to have an artist role and distinction is probably not necessary chapters = getChapters(z, tree, os.path.dirname(contentOPF)) @@ -218,12 +219,12 @@ def getCover(epub_path): try: coverID = tree.xpath("//opf:metadata/opf:meta[@name='cover']", namespaces=namespaces)[0].get('content') - #print('coverID 2', coverID) # now we know where the cover image is located + # print('coverID 2', coverID) # now we know where the cover image is located coverHREF = tree.xpath("//opf:manifest/opf:item[@id='" + coverID + "']", namespaces=namespaces)[0].get( 'href') except IndexError: # not an EPUB 2.0 - #print('EPUB 2 failure') + # print('EPUB 2 failure') pass if not coverHREF: # try EPUB 3.0 @@ -231,7 +232,7 @@ def getCover(epub_path): coverHREF = tree.xpath("//opf:manifest/opf:item[@properties='cover-image']", namespaces=namespaces)[ 0].get('href') except IndexError: - #print('EPUB 3 failure') + # print('EPUB 3 failure') pass elif not coverHREF: # some EPUBs don't explicitly declare cover images try: @@ -244,13 +245,13 @@ def getCover(epub_path): except IndexError: print('Edge case failure') elif not coverHREF: - #print('No cover found') + # print('No cover found') return None coverPath = coverHREF.replace('\\', '/') - #print('coverPath', coverPath) + # print('coverPath', coverPath) - return coverPath + return coverPath if os.path.dirname(contentOPF) == 0 else os.path.dirname(contentOPF) + '/' + coverPath def dumpMetaData(metaData, dest_dir): @@ -258,8 +259,9 @@ def dumpMetaData(metaData, dest_dir): with open(dir + 'metadata.json', 'w') as fileout: json.dump(metaData, fileout) + def parseEpub(epub_path, dest_dir): save_path = getContent(epub_path, dest_dir) # simultaneously writes and saves content to a variable metaData = getMetadata(epub_path, save_path) dumpMetaData(metaData, dest_dir) - return save_path + return save_path \ No newline at end of file From 32ef6c43c4843b085816a9b2ffb093d7a75487d7 Mon Sep 17 00:00:00 2001 From: Danisty Date: Thu, 31 Aug 2023 11:45:33 +0200 Subject: [PATCH 10/11] Get missing chapters names --- android/app/src/main/python/epubParser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/android/app/src/main/python/epubParser.py b/android/app/src/main/python/epubParser.py index e5ee5cc50..1d071278d 100644 --- a/android/app/src/main/python/epubParser.py +++ b/android/app/src/main/python/epubParser.py @@ -122,12 +122,13 @@ def getChapters(z: zipfile.ZipFile, opf_tree, dir_path): name_by_path = {} tree = etree.XML(z.read(chpts_data_filename)) - for nav_point in tree.xpath('//ncx:navMap/ncx:navPoint', namespaces=namespaces): + for nav_point in tree.xpath('.//ncx:navPoint', namespaces=namespaces): name = nav_point.find('.//ncx:text', namespaces=namespaces).text path = nav_point.find('.//ncx:content', namespaces=namespaces).attrib['src'] chapter_path = path[:path.index('#')] if '#' in path else path - name_by_path[chapter_path] = name + if chapter_path not in name_by_path: + name_by_path[chapter_path] = name chapters_elements = opf_tree.xpath('//opf:spine//opf:itemref', namespaces=namespaces) for i, chapter_el in enumerate(chapters_elements): From 9c2731bb2b35a444d36f6e8ef223dc8cb3fa8a4e Mon Sep 17 00:00:00 2001 From: cactusoftheday Date: Sun, 8 Oct 2023 22:02:11 -0400 Subject: [PATCH 11/11] fixed conflict with export to epub --- .../epubParser/EpubParser.java | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java b/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java index 2470347e8..785eaf5f8 100644 --- a/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java +++ b/android/app/src/main/java/com/rajarsheechatterjee/epubParser/EpubParser.java @@ -63,39 +63,9 @@ public void onActivityResult(Activity activity, int requestCode, int resultCode, if (epubPath != null && epubPath.toLowerCase().endsWith(".epub")) { PyObject savePath = epubParser.callAttr("parseEpub", epubPath, "/data/data/com.rajarsheechatterjee.LNReader/files/"); promise.resolve(savePath.toString()); - } else { - promise.reject("Invalid file format"); } - } else { - promise.reject("Directory selection canceled"); } } - /* - private final ActivityEventListener activityEventListener = new BaseActivityEventListener() { - //@Override - public void onActivityResult(Activity activity, int requestCode, int resultCode, Intent data){ - if(requestCode == REQUEST_DIRECTORY && resultCode == Activity.RESULT_OK){ - Uri uri = data.getData(); - String folderPath = getFilePathFromUri(uri); - //Log.d("file name", folderPath); - epubPath = folderPath; - //Log.d("epub path", epubPath); - Python py = Python.getInstance(); - PyObject epubParser = py.getModule("epubParser"); - if(epubPath != null && epubPath.toLowerCase().endsWith(".epub")){ - //Log.d("file name", epubPath); - PyObject getContent = epubParser.callAttr("parseEpub",epubPath,"/data/data/com.rajarsheechatterjee.LNReader/files/"); //dunno if that's the right app ID - epubPath = null; - promise.resolve(null); - } else{ - promise.reject("Bad file"); - } - } - else{ - promise.reject("Directory selection canceled"); - } - } - };*/ @Override public String getName(){