Skip to content

Commit

Permalink
Factory (#17)
Browse files Browse the repository at this point in the history
* added NovelFactory

* fix test and add factory object interface

---------

Co-authored-by: Scott Iverson <[email protected]>
  • Loading branch information
safirex and siverson101 authored May 19, 2023
1 parent caf5faa commit 54d3f8f
Show file tree
Hide file tree
Showing 7 changed files with 369 additions and 265 deletions.
15 changes: 10 additions & 5 deletions archive_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import argparse
from argparse import RawDescriptionHelpFormatter
import sys
import os
#import os
sys.path.append('.\src')
sys.path.append('..\src')

Expand All @@ -18,6 +18,8 @@
statusInput='s'
compressInput='c'

# novels.registerObject(WuxiaWorldNovel.getSiteId(), WuxiaWorldNovel)
#novels.registerObject(NovelPia.getSiteId(), NovelPia)

def dev_tests():
# x = Novel('n6912eh', 'My Skills Are Too Strong to Be a Heroine')
Expand All @@ -33,9 +35,9 @@ def dev_tests():

def test_novelpia2():
from selenium import webdriver
from selenium.webdriver.common.by import By
#from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup
#from bs4 import BeautifulSoup

gecko = os.path.normpath(os.path.join(os.path.dirname(__file__)+"/libs", 'geckodriver'))
# binary = FirefoxBinary(r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe')
Expand All @@ -51,7 +53,7 @@ def test_novelpia():
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup
#from bs4 import BeautifulSoup

gecko = os.path.normpath(os.path.join(os.path.dirname(__file__)+"/libs", 'geckodriver'))
# binary = FirefoxBinary(r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe')
Expand All @@ -68,6 +70,8 @@ def test_novelpia():
print(soup)

driver.close()


def check_env():
try:
os.listdir('novel_list')
Expand Down Expand Up @@ -121,7 +125,7 @@ def parser():
parser_update.set_defaults(func=option_update)

parser_zip = subparsers.add_parser('zip', help='zip help')
parser_zip.add_argument('-o', type=str, help='output directory')
parser_zip.add_argument('-o', type=str, help='output directory', default='')
parser_zip.add_argument('-r', type=str, help='set a regex filtering the novels', default='')
parser_zip.set_defaults(func=option_zip)

Expand All @@ -132,6 +136,7 @@ def parser():
parser_zip.set_defaults(func=option_status)



args = parser.parse_args()
print(args)
if(hasattr(args,"func")):
Expand Down
2 changes: 1 addition & 1 deletion gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from PyQt5 import uic
from PyQt5.QtWidgets import QApplication, QWidget, QListWidget, QVBoxLayout, QLabel, QPushButton, QListWidgetItem, \
QHBoxLayout
from regex import D
#from regex import D
from src.main_functions import *


Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ beautifulsoup4
bs4
cookiejar
mechanize
requests_html
PyQt5
selenium

###### Requirements with Version Specifiers ######`
requests~=2.25 # Compatible release.
55 changes: 26 additions & 29 deletions src/Chapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,21 @@



def checkFileName(str) -> str:
def checkFileName(name) -> str:
""" make sure the title is conform to windows url settings (260 char max)"""
str=str.replace('?','')
str=str.replace('!','')
str=str.replace(':','')
str=str.replace('"','')
str=str.replace('*','')
str=str.replace('/','')
str=str.replace('\\','')
str=str.replace('\t','')
str=str.replace('|','')
str=str.replace('<','')
str=str.replace('>','')
str=str[:250-len('./novel_list/')]
return str
name=name.replace('?','')
name=name.replace('!','')
name=name.replace(':','')
name=name.replace('"','')
name=name.replace('*','')
name=name.replace('/','')
name=name.replace('\\','')
name=name.replace('\t','')
name=name.replace('|','')
name=name.replace('<','')
name=name.replace('>','')
name=name[:250-len('./novel_list/')]
return name

class Chapter():
def __init__(self,num,url=''):
Expand All @@ -34,10 +34,11 @@ def setContent(self,content):
def setTitle(self,Title):
self.title=Title

def setUrl(self) -> str:
def setUrl(self):
""""will define Url chapter"""
pass
def getUrl(self):
raise(Exception(self," doesn't have a proper setUrl function definition"))

def getUrl(self) -> str:
return self.url

def processChapter(self,headers):
Expand All @@ -50,11 +51,11 @@ def processChapter(self,headers):

def parseTitle(self,html) -> str:
"""returns the title of the page"""
pass
raise(Exception(self," doesn't have a proper parseTitle function definition"))

def parseContent(self,html):
"""returns the content of the page"""
pass
raise(Exception(self," doesn't have a proper parseContent function definition"))


def validateTitle(self,title):
Expand All @@ -77,15 +78,13 @@ def cleanText(self,chapter_content):
return chapter_content


def save(self,dir):
pass

def createFile(self,dir):
def createFile(self, path):
chapter_title=checkFileName(self.title)
print("titre"+chapter_title)
print('saving '+str(self.num)+' '+chapter_title)

file = open('%s/%s_%s.txt'%(dir,self.num,chapter_title), 'w+', encoding='utf-8')
file = open('%s/%s_%s.txt'%(path,self.num,chapter_title), 'w+', encoding='utf-8')
file.write(chapter_title+'\n')
file.write(self.content)
file.close()
Expand All @@ -97,7 +96,7 @@ class KakyomuChapter(Chapter):
def __init__(self,num,url):
super().__init__(num,url)

def setUrl(self) -> str:
def setUrl(self) :
# self.url = 'https://kakuyomu.jp/works/%s/episodes/%s'%(self.novelNum,self.num)
print("url = "+str(self.url))
pass
Expand Down Expand Up @@ -165,11 +164,11 @@ def parseTitle(self, html) -> str:
title = soup.find("p","novel_subtitle").text
return title

def createFile(self,dir):
def createFile(self, path):
chapter_title=checkFileName(self.title)

print('saving '+str(self.num)+' '+chapter_title)
file = open('%s/%d_%s.txt'%(dir,self.num,chapter_title), 'w+', encoding='utf-8')
file = open('%s/%d_%s.txt'%(path,self.num,chapter_title), 'w+', encoding='utf-8')
file.write(chapter_title+'\n')
file.write(self.content)
file.close()
Expand All @@ -185,7 +184,6 @@ def setUrl(self,url):
self.url=url

def getTitle(self,html):
from bs4 import BeautifulSoup
soup = BeautifulSoup(html)
title=''
for h in soup.find_all('title'):
Expand All @@ -194,13 +192,12 @@ def getTitle(self,html):
#title=re.findall('<h4 class="" (*<>) (.*?)</h4>',html)[0]
replacething=re.findall('_u3000',title)
for y in replacething:
chapter_title=chapter_title.replace(y,' ')
title=title.replace(y,' ')
title=self.validateTitle(title)
self.setTitle(title)
return title

def getContent(self,html):
from bs4 import BeautifulSoup

#can be made better with soup.id["chapter-content"]
soup = BeautifulSoup(html)
Expand Down
Loading

0 comments on commit 54d3f8f

Please sign in to comment.