add markdown format for kakyomu

safirex · Sep 15, 2021 · 3e3e6a7 · 3e3e6a7
1 parent e758569
commit 3e3e6a7
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 15 deletions.
diff --git a/archive_updater.py b/archive_updater.py
@@ -5,7 +5,7 @@
 
 import main_functions as mf
 
-
+keep_text_format =False
 
 updateInput='u'
 fullupdateInput='fu'
@@ -32,6 +32,8 @@ def parser():
         type=str,default=argparse.SUPPRESS)
     parser.add_argument("-f", help="force",action='store_true'
         ,default=argparse.SUPPRESS)
+    parser.add_argument("-md", help="format",action='store_true'
+        ,default=argparse.SUPPRESS)   
 
     args = parser.parse_args()
     print(args)
@@ -40,12 +42,19 @@ def parser():
 
         if(args.mode==downloadInput):
             print("downloading")
-            mf.download()
+
+            if hasattr(args, 'md'):
+                keep_text_format=True
+
+            mf.download(keep_text_format)
 
         elif(args.mode==updateInput):
             if hasattr(args, 'r'):
                 regex=args.r
-            mf.archiveUpdate(mf.findNovel(regex))
+
+            if hasattr(args, 'md'):
+                keep_text_format=True
+            mf.archiveUpdate(mf.findNovel(regex),True)
 
         elif(args.mode==statusInput):
             mf.getFolderStatus()

diff --git a/src/Downloaders.py b/src/Downloaders.py
@@ -6,9 +6,12 @@
 
 
 class Novel:
-    def __init__(self, codeNovel, titreNovel):
+    def __init__(self, codeNovel, titreNovel, keep_text_format=False):
+
         self.code = codeNovel
         self.titre = titreNovel
+        self.keep_text_format = keep_text_format
+
 
     def download(self) -> str:
         """download chapter from site."""
@@ -82,7 +85,7 @@ def __init__(self, Novel):
         self.site = 'https://ncode.syosetu.com/'
         self.headers = {
             "user-agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"}
-        super(SyosetuNovel, self).__init__(Novel.code, Novel.titre)
+        super(SyosetuNovel, self).__init__(Novel.code, Novel.titre, Novel.keep_text_format)
 
     def updatePerDate(self, html):
         from bs4 import BeautifulSoup
@@ -243,7 +246,7 @@ def test():
 
 class KakuyomuNovel(Novel):
     def __init__(self, Novel):
-        super().__init__(Novel.code, Novel.titre)
+        super().__init__(Novel.code, Novel.titre, Novel.keep_text_format)
 
     def getChapterTitle(self, str):
         chapter_title = re.findall(
@@ -298,21 +301,33 @@ def processChapter(self, chapter_url):
         chapter_title = self.getChapterTitle(html)
         print(chapter_title)
         soup = BeautifulSoup(html, 'html.parser')
-        content = soup.find('div', 'widget-episodeBody')
-        content = content.getText()
+        soup = soup.find('div', 'widget-episodeBody')
+        content=[]
 
+        if (self.keep_text_format == False):
+            content = soup.getText()
+        else:
+            content=str(soup)
+
         self.createFile(chapter_title, content, chapter_url)
 
     def createFile(self, chapter_title, chapter_content, chapter_url):
+        file_extension ='txt'
+        print(self.keep_text_format)
+        if(self.keep_text_format==True):
+            file_extension='md'
+            print("file extension is md")
+
         chapter_title = checkTitle(chapter_title)
-        file = open('%s/%d_%s.txt' % (self.getDir(),
-                                      self.getLastChapter(), chapter_title), 'w+', encoding='utf-8')
+        file = open('%s/%d_%s.%s' % (self.getDir(), self.getLastChapter(), chapter_title, file_extension)
+                    , 'w+', encoding='utf-8')
         file.write(chapter_url+'\n')
         file.write(chapter_title+'\n')
         for sentence in chapter_content:
             file.write(sentence)
         file.close()
-
+
+
     def getNovelTitle(self):
         titlediv = '<h1 id="workTitle"><a href="/works/%s">' % self.code
         url = 'https://kakuyomu.jp/works/%s' % self.code

diff --git a/src/main_functions.py b/src/main_functions.py
@@ -1,7 +1,8 @@
+from logging import fatal
 import Downloaders
 import os
 
-def archiveUpdate(dirList=[]):
+def archiveUpdate(dirList=[],keep_text_format=False):
     if not dirList:
         dirList=os.listdir('./novel_list')
     print("list=")
@@ -11,7 +12,8 @@ def archiveUpdate(dirList=[]):
         print()
         novelInfo=getNovelInfoFromFolderName(novel_folder)
         #change the fetching process following the site it's hosted on
-        novel=Downloaders.Novel(novelInfo[1],novelInfo[0])
+
+        novel=Downloaders.Novel(novelInfo[1],novelInfo[0],keep_text_format)
         novel=novel.updateObject()
         if(novel==0):
             print(novel_folder+' couldnt be updated because the code doesnt match known formats')
@@ -114,7 +116,7 @@ def getNovelInfoFromFolderName(folderName):
 
 
 
-def download():
+def download(keep_text_format=False):
     if('novel_list' not in os.listdir('.')):
         os.mkdir('novel_list')
     novel_list=getInputFile()
@@ -126,7 +128,8 @@ def download():
         name=novel_info[1]
         #print('i '+name)
 
-        novel=Downloaders.Novel(code,name)
+        print(keep_text_format)
+        novel=Downloaders.Novel(code,name,keep_text_format)
         novel=novel.updateObject()
         if(novel==0):
             continue