Skip to content

Commit

Permalink
add title parsing to chapters
Browse files Browse the repository at this point in the history
  • Loading branch information
safirex committed Nov 27, 2022
1 parent ded7e5d commit 93ff207
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,7 @@ soshite*
*.bat
test.py
.env
.idea
.idea
baseCode/*
build/*
dist/*
4 changes: 3 additions & 1 deletion src/Chapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def checkFileName(str) -> str:
str=str.replace('*','')
str=str.replace('/','')
str=str.replace('\\','')
str=str.replace('\t','')
str=str.replace('|','')
str=str.replace('<','')
str=str.replace('>','')
Expand Down Expand Up @@ -83,6 +84,7 @@ def createFile(self,dir):
chapter_title=checkFileName(self.title)
print("titre"+chapter_title)
print('saving '+str(self.num)+' '+chapter_title)

file = open('%s/%s_%s.txt'%(dir,self.num,chapter_title), 'w+', encoding='utf-8')
file.write(chapter_title+'\n')
file.write(self.content)
Expand All @@ -105,7 +107,7 @@ def parseTitle(self, html) -> str:
chapter_title = re.findall(
'<p class="widget-episodeTitle js-vertical-composition-item">(.*?)<', html)[0]
print("title found = "+str(chapter_title))
return chapter_title
return checkFileName(chapter_title)

def parseContent(self, html,keep_text_format=False):
soup = BeautifulSoup(html, 'html.parser')
Expand Down

0 comments on commit 93ff207

Please sign in to comment.