Skip to content

Commit

Permalink
add markdown format for kakyomu
Browse files Browse the repository at this point in the history
  • Loading branch information
safirex committed Sep 15, 2021
1 parent e758569 commit 3e3e6a7
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 15 deletions.
15 changes: 12 additions & 3 deletions archive_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import main_functions as mf


keep_text_format =False

updateInput='u'
fullupdateInput='fu'
Expand All @@ -32,6 +32,8 @@ def parser():
type=str,default=argparse.SUPPRESS)
parser.add_argument("-f", help="force",action='store_true'
,default=argparse.SUPPRESS)
parser.add_argument("-md", help="format",action='store_true'
,default=argparse.SUPPRESS)

args = parser.parse_args()
print(args)
Expand All @@ -40,12 +42,19 @@ def parser():

if(args.mode==downloadInput):
print("downloading")
mf.download()

if hasattr(args, 'md'):
keep_text_format=True

mf.download(keep_text_format)

elif(args.mode==updateInput):
if hasattr(args, 'r'):
regex=args.r
mf.archiveUpdate(mf.findNovel(regex))

if hasattr(args, 'md'):
keep_text_format=True
mf.archiveUpdate(mf.findNovel(regex),True)

elif(args.mode==statusInput):
mf.getFolderStatus()
Expand Down
31 changes: 23 additions & 8 deletions src/Downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@


class Novel:
def __init__(self, codeNovel, titreNovel):
def __init__(self, codeNovel, titreNovel, keep_text_format=False):

self.code = codeNovel
self.titre = titreNovel
self.keep_text_format = keep_text_format


def download(self) -> str:
"""download chapter from site."""
Expand Down Expand Up @@ -82,7 +85,7 @@ def __init__(self, Novel):
self.site = 'https://ncode.syosetu.com/'
self.headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"}
super(SyosetuNovel, self).__init__(Novel.code, Novel.titre)
super(SyosetuNovel, self).__init__(Novel.code, Novel.titre, Novel.keep_text_format)

def updatePerDate(self, html):
from bs4 import BeautifulSoup
Expand Down Expand Up @@ -243,7 +246,7 @@ def test():

class KakuyomuNovel(Novel):
def __init__(self, Novel):
super().__init__(Novel.code, Novel.titre)
super().__init__(Novel.code, Novel.titre, Novel.keep_text_format)

def getChapterTitle(self, str):
chapter_title = re.findall(
Expand Down Expand Up @@ -298,21 +301,33 @@ def processChapter(self, chapter_url):
chapter_title = self.getChapterTitle(html)
print(chapter_title)
soup = BeautifulSoup(html, 'html.parser')
content = soup.find('div', 'widget-episodeBody')
content = content.getText()
soup = soup.find('div', 'widget-episodeBody')
content=[]

if (self.keep_text_format == False):
content = soup.getText()
else:
content=str(soup)

self.createFile(chapter_title, content, chapter_url)

def createFile(self, chapter_title, chapter_content, chapter_url):
file_extension ='txt'
print(self.keep_text_format)
if(self.keep_text_format==True):
file_extension='md'
print("file extension is md")

chapter_title = checkTitle(chapter_title)
file = open('%s/%d_%s.txt' % (self.getDir(),
self.getLastChapter(), chapter_title), 'w+', encoding='utf-8')
file = open('%s/%d_%s.%s' % (self.getDir(), self.getLastChapter(), chapter_title, file_extension)
, 'w+', encoding='utf-8')
file.write(chapter_url+'\n')
file.write(chapter_title+'\n')
for sentence in chapter_content:
file.write(sentence)
file.close()



def getNovelTitle(self):
titlediv = '<h1 id="workTitle"><a href="/works/%s">' % self.code
url = 'https://kakuyomu.jp/works/%s' % self.code
Expand Down
11 changes: 7 additions & 4 deletions src/main_functions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from logging import fatal
import Downloaders
import os

def archiveUpdate(dirList=[]):
def archiveUpdate(dirList=[],keep_text_format=False):
if not dirList:
dirList=os.listdir('./novel_list')
print("list=")
Expand All @@ -11,7 +12,8 @@ def archiveUpdate(dirList=[]):
print()
novelInfo=getNovelInfoFromFolderName(novel_folder)
#change the fetching process following the site it's hosted on
novel=Downloaders.Novel(novelInfo[1],novelInfo[0])

novel=Downloaders.Novel(novelInfo[1],novelInfo[0],keep_text_format)
novel=novel.updateObject()
if(novel==0):
print(novel_folder+' couldnt be updated because the code doesnt match known formats')
Expand Down Expand Up @@ -114,7 +116,7 @@ def getNovelInfoFromFolderName(folderName):



def download():
def download(keep_text_format=False):
if('novel_list' not in os.listdir('.')):
os.mkdir('novel_list')
novel_list=getInputFile()
Expand All @@ -126,7 +128,8 @@ def download():
name=novel_info[1]
#print('i '+name)

novel=Downloaders.Novel(code,name)
print(keep_text_format)
novel=Downloaders.Novel(code,name,keep_text_format)
novel=novel.updateObject()
if(novel==0):
continue
Expand Down

0 comments on commit 3e3e6a7

Please sign in to comment.