Skip to content

Commit

Permalink
Merge pull request #29 from OpenBookPublishers/feature/write_urls
Browse files Browse the repository at this point in the history
Write Landing Page and Full Text URLs to Thoth automatically on chapter creation
  • Loading branch information
rhigman authored Jul 25, 2023
2 parents fbda449 + 4ad0ca9 commit 39a5a7c
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 6 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ The entry point is `.src/main.py`, which can be called with the following argume
| `-u` | URL path of this book |
| `-k` | Copyright file |
| `-a` | Donation link |
| `-m` | MathJax support |
| `-p` | Privacy policy URL |
| `-m` | MathJax support |
| `-p` | Privacy policy URL |
| `-w` | Write chapter URLs to Thoth (True/False) |


## Thoth Wrapper (Optional)
Expand All @@ -38,12 +39,15 @@ docker run --rm \
-v /path/to/output:/ebook_automation/output \
-e MATHJAX=False \
-e PRIVACYPOLICY_URL=https://example.org \
-e [email protected] \
-e THOTH_PWD=password \
openbookpublishers/epublius \
thoth_wrapper.py /ebook_automation/epub_file.epub \
--doi 10.11647/obp.0275
```

The environment variable MATHJAX enables or disable MathJax support
The environment variables THOTH_EMAIL and THOTH_PWD allow use of the `--write-urls` option by providing Thoth credentials

## Contributing

Expand Down
4 changes: 4 additions & 0 deletions src/epublius/epublius.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ def parse_args(self, argv=None):
parser.add_argument('-p', '--privacy-policy',
help = 'Privacy policy URL')

parser.add_argument('-w', '--write-urls',
help = 'Write HTML chapter URLs to Thoth',
default = False)

return parser.parse_args(argv)

def unzip_epub(self, prefix):
Expand Down
17 changes: 17 additions & 0 deletions src/epublius/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,23 @@ def get_chapter_title(self):

return html.escape(ch_title)

def get_chapter_doi(self):
'''
Retrieve chapter DOI based on the text of <p class=doi>
(this contains both copyright statement and DOI link)
'''
# Not all chapters will have DOIs
doi = None

doi_node = self.soup.find('p', class_='doi')

if (doi_node is not None):
doi_link = doi_node.a
if (doi_link is not None) and (doi_link.string is not None):
doi = doi_link.string

return doi

def get_css(self):
'''
Return a str with the CSS information of a file
Expand Down
72 changes: 72 additions & 0 deletions src/epublius/thoth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python3

from os import getenv
from thothlibrary import ThothClient
import urllib.parse


class Thoth:
'''
Module to handle Thoth interactions
'''

def __init__(self):
self.client = ThothClient()
self.logged_in = self.login()

def login(self):
username = getenv('THOTH_EMAIL')
password = getenv('THOTH_PWD')
if username is None:
print('[WARNING] No Thoth username provided '
'(THOTH_EMAIL environment variable not set)')
return False
if password is None:
print('[WARNING] No Thoth password provided '
'(THOTH_PWD environment variable not set)')
return False
try:
self.client.login(username, password)
return True
except:
return False

def write_urls(self, metadata, book_doi):
'''
Write chapter Landing Page and Full Text URLs
to Thoth in standard OBP format
'''
chapter_doi_full = metadata.get_chapter_doi()

# Skip chapters that don't have a DOI
if chapter_doi_full is not None:
work_id = self.client.work_by_doi(chapter_doi_full).workId
chapter_doi = chapter_doi_full.split('doi.org/')[-1].lower()
landing_page_root = (
'https://www.openbookpublishers.com/books/'
'{book_doi}/chapters/{chapter_doi}')

publication = {"workId": work_id,
"publicationType": "HTML",
"isbn": None,
"widthMm": None,
"widthIn": None,
"heightMm": None,
"heightIn": None,
"depthMm": None,
"depthIn": None,
"weightG": None,
"weightOz": None}
publication_id = self.client.create_publication(publication)

location = {"publicationId": publication_id,
"landingPage": landing_page_root.format(
book_doi=book_doi, chapter_doi=chapter_doi),
"fullTextUrl": urllib.parse.unquote_plus(
metadata.get_page_url()),
"locationPlatform": "OTHER",
"canonical": "true"}
self.client.create_location(location)

print('{}: URLs written to Thoth'.format(
metadata.contents[metadata.index]))
14 changes: 14 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from epublius.epublius import Epublius
from epublius.metadata import Metadata
from epublius.output import Output
from epublius.thoth import Thoth
from thothlibrary import ThothError


def main():
Expand All @@ -15,6 +17,11 @@ def main():
# Create instances
epublius = Epublius(work_dir)
output = Output(os.path.abspath('assets/template.xhtml'))
thoth = Thoth()

# Warn if user requested to write URLs to Thoth but Thoth login failed
if epublius.argv.write_urls and not thoth.logged_in:
print('[WARNING] Thoth login failed; URLs will not be written')

# Get book contents
contents = epublius.get_contents()
Expand Down Expand Up @@ -57,6 +64,13 @@ def main():
file_path = os.path.join(output_directory, section)
output.write_file(processed_content, file_path)

if epublius.argv.write_urls and thoth.logged_in:
# Write chapter URL metadata to Thoth
try:
thoth.write_urls(metadata, epublius.argv.doi)
except (KeyError, ThothError) as e:
# Continue on error, but display warning
print('[WARNING] Error writing URLs to Thoth for {}: {}'.format(section, e))

# Duplicate TOC file to output_directory/main.html
epublius.duplicate_contents(TOC_filepath.get('TOC_filepath'))
Expand Down
9 changes: 5 additions & 4 deletions src/thoth_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
def query_thoth(doi_url):
thoth = ThothClient()
return thoth.query('workByDoi', {'doi': f'"{doi_url}"'})

def get_title(thoth_data):
return thoth_data["fullTitle"]

Expand All @@ -37,9 +37,9 @@ def get_html_pub_url(thoth_data):
def run():
parser = argparse.ArgumentParser(description='Thoth wrapper')
parser.add_argument('epub_path', help='Path to epub file')
parser.add_argument('-d', '--doi', help='Work DOI (registered in Thoth)')
parser.add_argument('-d', '--doi', help='Work DOI (registered in Thoth)', required=True)
args = parser.parse_args()

doi_url = urllib.parse.urljoin('https://doi.org/', args.doi)

thoth_data = query_thoth(doi_url)
Expand All @@ -57,7 +57,8 @@ def run():
"-t", os.path.join(epublius_dir, ""),
"-d", args.doi,
"-m", MATHJAX,
"-p", os.getenv('PRIVACYPOLICY_URL', '#')]
"-p", os.getenv('PRIVACYPOLICY_URL', '#'),
"-w", 'True']

os.execvp(sys.executable, [exe] + args)

Expand Down

0 comments on commit 39a5a7c

Please sign in to comment.