Skip to content

Commit

Permalink
get chapter title from the value of the <title> node. Addressing #27
Browse files Browse the repository at this point in the history
  • Loading branch information
lb803 committed Mar 6, 2023
1 parent b3c1cd5 commit b9457ea
Showing 1 changed file with 5 additions and 10 deletions.
15 changes: 5 additions & 10 deletions src/epublius/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,24 +106,19 @@ def get_file_soup(self):

def get_chapter_title(self):
'''
Retrieve chapter title based on the text of <h1> or
Retrieve chapter title based on the text of <title> or
taking a guess from the file name
(i.e. front-cover.xhtml -> "Front Cover")
Special characters in the title are escaped before
the ch_title varible is returned.
'''

h1 = self.soup.find_all('h1')

if len(h1) == 1:
ch_title = self.soup.h1.get_text()

elif len(h1) > 1:
print("[WARNING] {} has multiple h1 tags"
.format(self.contents[self.index]))
ch_title = h1.pop(0).get_text()
title_node = self.soup.title

if (title_node is not None) and \
(title_node.string is not None):
ch_title = title_node.string
else:
# Strip extension from file name
basename = self.contents[self.index].split('.')[0]
Expand Down

0 comments on commit b9457ea

Please sign in to comment.