Factory (#17)

* added NovelFactory * fix test and add factory object interface --------- Co-authored-by: Scott Iverson <[email protected]>
safirex · May 19, 2023 · 54d3f8f · 54d3f8f
1 parent caf5faa
commit 54d3f8f
Show file tree

Hide file tree

Showing 7 changed files with 369 additions and 265 deletions.
diff --git a/archive_updater.py b/archive_updater.py
@@ -3,7 +3,7 @@
 import argparse
 from argparse import RawDescriptionHelpFormatter
 import sys
-import os
+#import os
 sys.path.append('.\src')
 sys.path.append('..\src')
 
@@ -18,6 +18,8 @@
 statusInput='s'
 compressInput='c'
 
+# novels.registerObject(WuxiaWorldNovel.getSiteId(), WuxiaWorldNovel)
+#novels.registerObject(NovelPia.getSiteId(), NovelPia)
 
 def dev_tests():
     # x = Novel('n6912eh', 'My Skills Are Too Strong to Be a Heroine')
@@ -33,9 +35,9 @@ def dev_tests():
 
 def test_novelpia2():
     from selenium import webdriver
-    from selenium.webdriver.common.by import By
+    #from selenium.webdriver.common.by import By
     from selenium.webdriver.firefox.options import Options
-    from bs4 import BeautifulSoup
+    #from bs4 import BeautifulSoup
 
     gecko = os.path.normpath(os.path.join(os.path.dirname(__file__)+"/libs", 'geckodriver'))
     # binary = FirefoxBinary(r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe')
@@ -51,7 +53,7 @@ def test_novelpia():
     from selenium import webdriver
     from selenium.webdriver.common.by import By
     from selenium.webdriver.firefox.options import Options
-    from bs4 import BeautifulSoup
+    #from bs4 import BeautifulSoup
 
     gecko = os.path.normpath(os.path.join(os.path.dirname(__file__)+"/libs", 'geckodriver'))
     # binary = FirefoxBinary(r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe')
@@ -68,6 +70,8 @@ def test_novelpia():
     print(soup)
 
     driver.close()
+
+
 def check_env():
     try: 
         os.listdir('novel_list')
@@ -121,7 +125,7 @@ def parser():
     parser_update.set_defaults(func=option_update)
 
     parser_zip = subparsers.add_parser('zip', help='zip help')
-    parser_zip.add_argument('-o', type=str, help='output directory')
+    parser_zip.add_argument('-o', type=str, help='output directory', default='')
     parser_zip.add_argument('-r', type=str, help='set a regex filtering the novels', default='')
     parser_zip.set_defaults(func=option_zip)
 
@@ -132,6 +136,7 @@ def parser():
     parser_zip.set_defaults(func=option_status)
 
 
+
     args = parser.parse_args()
     print(args)
     if(hasattr(args,"func")):

diff --git a/gui.py b/gui.py
@@ -9,7 +9,7 @@
 from PyQt5 import uic
 from PyQt5.QtWidgets import QApplication, QWidget,  QListWidget, QVBoxLayout, QLabel, QPushButton, QListWidgetItem, \
     QHBoxLayout
-from regex import D
+#from regex import D
 from src.main_functions import *
 
 

diff --git a/requirements.txt b/requirements.txt
@@ -3,6 +3,9 @@ beautifulsoup4
 bs4
 cookiejar
 mechanize
+requests_html
+PyQt5
+selenium
 
 ###### Requirements with Version Specifiers ######`
 requests~=2.25      # Compatible release.
diff --git a/src/Chapters.py b/src/Chapters.py
@@ -4,21 +4,21 @@
 
 
 
-def checkFileName(str) -> str:
+def checkFileName(name) -> str:
     """ make sure the title is conform to windows url settings (260 char max)"""
-    str=str.replace('?','')
-    str=str.replace('!','')
-    str=str.replace(':','')
-    str=str.replace('"','')
-    str=str.replace('*','')
-    str=str.replace('/','')
-    str=str.replace('\\','')
-    str=str.replace('\t','')
-    str=str.replace('|','')
-    str=str.replace('<','')
-    str=str.replace('>','')
-    str=str[:250-len('./novel_list/')]
-    return str
+    name=name.replace('?','')
+    name=name.replace('!','')
+    name=name.replace(':','')
+    name=name.replace('"','')
+    name=name.replace('*','')
+    name=name.replace('/','')
+    name=name.replace('\\','')
+    name=name.replace('\t','')
+    name=name.replace('|','')
+    name=name.replace('<','')
+    name=name.replace('>','')
+    name=name[:250-len('./novel_list/')]
+    return name
 
 class Chapter():
     def __init__(self,num,url=''):
@@ -34,10 +34,11 @@ def setContent(self,content):
     def setTitle(self,Title):
         self.title=Title
 
-    def setUrl(self) -> str:
+    def setUrl(self):
         """"will define Url chapter"""
-        pass
-    def getUrl(self):
+        raise(Exception(self," doesn't have a proper setUrl function definition"))
+
+    def getUrl(self) -> str:
         return self.url
 
     def processChapter(self,headers):
@@ -50,11 +51,11 @@ def processChapter(self,headers):
 
     def parseTitle(self,html) -> str:
         """returns the title of the page"""
-        pass
+        raise(Exception(self," doesn't have a proper parseTitle function definition"))
 
     def parseContent(self,html):
         """returns the content of the page"""
-        pass
+        raise(Exception(self," doesn't have a proper parseContent function definition"))
 
 
     def validateTitle(self,title):
@@ -77,15 +78,13 @@ def cleanText(self,chapter_content):
         return chapter_content
 
 
-    def save(self,dir):
-        pass
 
-    def createFile(self,dir):
+    def createFile(self, path):
         chapter_title=checkFileName(self.title)
         print("titre"+chapter_title)
         print('saving '+str(self.num)+' '+chapter_title)
 
-        file = open('%s/%s_%s.txt'%(dir,self.num,chapter_title), 'w+', encoding='utf-8')
+        file = open('%s/%s_%s.txt'%(path,self.num,chapter_title), 'w+', encoding='utf-8')
         file.write(chapter_title+'\n')
         file.write(self.content)
         file.close()
@@ -97,7 +96,7 @@ class KakyomuChapter(Chapter):
     def __init__(self,num,url):
         super().__init__(num,url)
 
-    def setUrl(self) -> str:
+    def setUrl(self) :
         # self.url = 'https://kakuyomu.jp/works/%s/episodes/%s'%(self.novelNum,self.num)
         print("url = "+str(self.url))
         pass
@@ -165,11 +164,11 @@ def parseTitle(self, html) -> str:
         title = soup.find("p","novel_subtitle").text
         return title
 
-    def createFile(self,dir):
+    def createFile(self, path):
         chapter_title=checkFileName(self.title)
 
         print('saving '+str(self.num)+' '+chapter_title)
-        file = open('%s/%d_%s.txt'%(dir,self.num,chapter_title), 'w+', encoding='utf-8')
+        file = open('%s/%d_%s.txt'%(path,self.num,chapter_title), 'w+', encoding='utf-8')
         file.write(chapter_title+'\n')
         file.write(self.content)
         file.close()
@@ -185,7 +184,6 @@ def setUrl(self,url):
         self.url=url
 
     def getTitle(self,html):
-        from bs4 import BeautifulSoup
         soup = BeautifulSoup(html)
         title=''
         for h in soup.find_all('title'):
@@ -194,13 +192,12 @@ def getTitle(self,html):
         #title=re.findall('<h4 class="" (*<>) (.*?)</h4>',html)[0]
         replacething=re.findall('_u3000',title)
         for y in replacething:
-            chapter_title=chapter_title.replace(y,' ')
+            title=title.replace(y,' ')
         title=self.validateTitle(title)
         self.setTitle(title)
         return title
 
     def getContent(self,html):
-        from bs4 import BeautifulSoup
 
         #can be made better with soup.id["chapter-content"]
         soup = BeautifulSoup(html)