From b9457ea95304dd1bacd824ac780b6931169120c3 Mon Sep 17 00:00:00 2001 From: Luca Baffa <47544021+lb803@users.noreply.github.com> Date: Mon, 6 Mar 2023 12:15:16 +0000 Subject: [PATCH] get chapter title from the value of the node. Addressing https://github.com/OpenBookPublishers/epublius/issues/27 --- src/epublius/metadata.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/epublius/metadata.py b/src/epublius/metadata.py index f001536..af39035 100644 --- a/src/epublius/metadata.py +++ b/src/epublius/metadata.py @@ -106,7 +106,7 @@ def get_file_soup(self): def get_chapter_title(self): ''' - Retrieve chapter title based on the text of <h1> or + Retrieve chapter title based on the text of <title> or taking a guess from the file name (i.e. front-cover.xhtml -> "Front Cover") @@ -114,16 +114,11 @@ def get_chapter_title(self): the ch_title varible is returned. ''' - h1 = self.soup.find_all('h1') - - if len(h1) == 1: - ch_title = self.soup.h1.get_text() - - elif len(h1) > 1: - print("[WARNING] {} has multiple h1 tags" - .format(self.contents[self.index])) - ch_title = h1.pop(0).get_text() + title_node = self.soup.title + if (title_node is not None) and \ + (title_node.string is not None): + ch_title = title_node.string else: # Strip extension from file name basename = self.contents[self.index].split('.')[0]