diff --git a/archive_updater.py b/archive_updater.py index e00e18b..4c5bbc4 100644 --- a/archive_updater.py +++ b/archive_updater.py @@ -5,7 +5,7 @@ import main_functions as mf - +keep_text_format =False updateInput='u' fullupdateInput='fu' @@ -32,6 +32,8 @@ def parser(): type=str,default=argparse.SUPPRESS) parser.add_argument("-f", help="force",action='store_true' ,default=argparse.SUPPRESS) + parser.add_argument("-md", help="format",action='store_true' + ,default=argparse.SUPPRESS) args = parser.parse_args() print(args) @@ -40,12 +42,19 @@ def parser(): if(args.mode==downloadInput): print("downloading") - mf.download() + + if hasattr(args, 'md'): + keep_text_format=True + + mf.download(keep_text_format) elif(args.mode==updateInput): if hasattr(args, 'r'): regex=args.r - mf.archiveUpdate(mf.findNovel(regex)) + + if hasattr(args, 'md'): + keep_text_format=True + mf.archiveUpdate(mf.findNovel(regex),True) elif(args.mode==statusInput): mf.getFolderStatus() diff --git a/src/Downloaders.py b/src/Downloaders.py index 443ee92..4da7719 100644 --- a/src/Downloaders.py +++ b/src/Downloaders.py @@ -6,9 +6,12 @@ class Novel: - def __init__(self, codeNovel, titreNovel): + def __init__(self, codeNovel, titreNovel, keep_text_format=False): + self.code = codeNovel self.titre = titreNovel + self.keep_text_format = keep_text_format + def download(self) -> str: """download chapter from site.""" @@ -82,7 +85,7 @@ def __init__(self, Novel): self.site = 'https://ncode.syosetu.com/' self.headers = { "user-agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"} - super(SyosetuNovel, self).__init__(Novel.code, Novel.titre) + super(SyosetuNovel, self).__init__(Novel.code, Novel.titre, Novel.keep_text_format) def updatePerDate(self, html): from bs4 import BeautifulSoup @@ -243,7 +246,7 @@ def test(): class KakuyomuNovel(Novel): def __init__(self, Novel): - super().__init__(Novel.code, Novel.titre) + super().__init__(Novel.code, Novel.titre, Novel.keep_text_format) def getChapterTitle(self, str): chapter_title = re.findall( @@ -298,21 +301,33 @@ def processChapter(self, chapter_url): chapter_title = self.getChapterTitle(html) print(chapter_title) soup = BeautifulSoup(html, 'html.parser') - content = soup.find('div', 'widget-episodeBody') - content = content.getText() + soup = soup.find('div', 'widget-episodeBody') + content=[] + if (self.keep_text_format == False): + content = soup.getText() + else: + content=str(soup) + self.createFile(chapter_title, content, chapter_url) def createFile(self, chapter_title, chapter_content, chapter_url): + file_extension ='txt' + print(self.keep_text_format) + if(self.keep_text_format==True): + file_extension='md' + print("file extension is md") + chapter_title = checkTitle(chapter_title) - file = open('%s/%d_%s.txt' % (self.getDir(), - self.getLastChapter(), chapter_title), 'w+', encoding='utf-8') + file = open('%s/%d_%s.%s' % (self.getDir(), self.getLastChapter(), chapter_title, file_extension) + , 'w+', encoding='utf-8') file.write(chapter_url+'\n') file.write(chapter_title+'\n') for sentence in chapter_content: file.write(sentence) file.close() - + + def getNovelTitle(self): titlediv = '

' % self.code url = 'https://kakuyomu.jp/works/%s' % self.code diff --git a/src/main_functions.py b/src/main_functions.py index ad8a57d..71e3ef4 100644 --- a/src/main_functions.py +++ b/src/main_functions.py @@ -1,7 +1,8 @@ +from logging import fatal import Downloaders import os -def archiveUpdate(dirList=[]): +def archiveUpdate(dirList=[],keep_text_format=False): if not dirList: dirList=os.listdir('./novel_list') print("list=") @@ -11,7 +12,8 @@ def archiveUpdate(dirList=[]): print() novelInfo=getNovelInfoFromFolderName(novel_folder) #change the fetching process following the site it's hosted on - novel=Downloaders.Novel(novelInfo[1],novelInfo[0]) + + novel=Downloaders.Novel(novelInfo[1],novelInfo[0],keep_text_format) novel=novel.updateObject() if(novel==0): print(novel_folder+' couldnt be updated because the code doesnt match known formats') @@ -114,7 +116,7 @@ def getNovelInfoFromFolderName(folderName): -def download(): +def download(keep_text_format=False): if('novel_list' not in os.listdir('.')): os.mkdir('novel_list') novel_list=getInputFile() @@ -126,7 +128,8 @@ def download(): name=novel_info[1] #print('i '+name) - novel=Downloaders.Novel(code,name) + print(keep_text_format) + novel=Downloaders.Novel(code,name,keep_text_format) novel=novel.updateObject() if(novel==0): continue