Skip to content

Commit

Permalink
Merge pull request #40 from ru-fu/backup-link-text
Browse files Browse the repository at this point in the history
add optional backup link texts for related links
  • Loading branch information
ru-fu authored Jun 21, 2024
2 parents d03d0cd + 883b515 commit 294b52b
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 31 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,18 @@ For Discourse links, specify only the topic IDs (in a comma-separated list).
If you have defined several Discourse prefixes, specify both key and ID (for example, `abc:1234`).

For related links, specify the full URLs (in a comma-separated list).

The link text is extracted automatically or can be specified in Markdown syntax.
You can also specify a backup link text (using curly brackets instead of square brackets) that is used only if the link title cannot be extracted.
Note that spaces are ignored; if you need spaces in the title, replace them with ` `.
If Sphinx complains about the metadata value because it starts with "[", enclose the full value in double quotes.

The following example uses MyST syntax for the metadata:

```
---
discourse: 1234,56789
relatedlinks: https://www.example.com, [Link text](https://www.example.com)
discourse: 1234, abc:56789, abc:[Link text](1234)
relatedlinks: https://www.example.com, [Link text](https://www.example.com), {Backup text}(https://www.example.com)
---
```

Expand Down
113 changes: 85 additions & 28 deletions canonical-sphinx-extensions/related-links/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,14 @@
# }
# }
#
# Use these prefixes when linking (no prefix = first dict entry)
# Use these prefixes when linking (no prefix = first dict entry):
#
# ---
# discourse: ubuntu:12033,lxc:13128
# ---
#
# - Add related URLs to the metadata at the top of the page using
# the tag "relatedlinks". The link text is extracted automatically
# or can be specified in Markdown syntax. Note that spaces are
# ignored; if you need spaces in the title, replace them with  .
# Some examples (in MyST syntax):
# the tag "relatedlinks":
#
# ---
# relatedlinks: https://www.example.com
Expand All @@ -47,15 +44,47 @@
# relatedlinks: https://www.example.com, https://www.google.com
# ---
#
# ---
# relatedlinks: "[Link text](https://www.example.com)"
# ---
# In both methods, the link text is extracted automatically
# or can be specified in Markdown syntax. Note that spaces are
# ignored; if you need spaces in the title, replace them with  .
#
# ---
# discourse: [Link text](12033)
# ---
#
# ---
# discourse: ubuntu:[Link text](12033)
# ---
#
# ---
# relatedlinks: [Link text](https://www.example.com)
# ---
#
# If Sphinx complains about the metadata value because it starts
# with "[", enclose the full value in double quotes.
#
# You can also specify a backup link text that is used only if the
# title cannot be extracted automatically.
# For this, use the same syntax as for specifying the link text, but
# use "{" and "}" instead of "[" and "]".
# If the backup text is used, Sphinx logs an error of type
# canonical-sphinx-extensions.linktext (which you can suppress if
# needed).
#
# ---
# discourse: {Backup text}(12033)
# ---
#
# If Sphinx complains about the metadata value because it starts
# with "[", enclose the full value in double quotes.
# ---
# discourse: ubuntu:{Backup text}(12033)
# ---
#
# For both ways, check for errors in the output. Invalid links are
# not added to the output.
# ---
# relatedlinks: {Backup text}(https://www.example.com)
# ---
#
# Always check for errors in the output. Invalid links are not added
# to the output.
######################################################################

import requests
Expand All @@ -68,6 +97,13 @@
logger = logging.getLogger(__name__)


def log_warning(pagename, err, title):
msg = pagename + ": " + err
if title:
msg += "\nUsing backup link text instead: " + title
logger.warning(msg, type="canonical-sphinx-extensions", subtype="linktext")


def setup_func(app, pagename, templatename, context, doctree):
def discourse_links(IDlist):

Expand All @@ -79,15 +115,17 @@ def discourse_links(IDlist):

for post in posts:
title = ""
postID = post

# determine the linkurl (which Discourse to link to)
# and strip this information from the postID
if type(context["discourse_prefix"]) is dict:
ID = post.split(":")
if len(ID) == 1:
linkurl = list(
context["discourse_prefix"].values()
)[0] + post
linkurl = list(context["discourse_prefix"].values())[0]
elif ID[0] in context["discourse_prefix"]:
linkurl = context["discourse_prefix"][ID[0]] + ID[1]
linkurl = context["discourse_prefix"][ID[0]]
postID = ID[1]
else:
logger.warning(
pagename
Expand All @@ -97,23 +135,36 @@ def discourse_links(IDlist):
)
continue
else:
linkurl = context["discourse_prefix"] + post
linkurl = context["discourse_prefix"]

# determine the title (and maybe strip it from the postID)
if post in cache:
title = cache[post]
elif postID.startswith("[") and postID.endswith(")"):
split = postID.partition("](")
title = split[0][1:]
postID = split[2][:-1]
else:

if postID.startswith("{") and postID.endswith(")"):
split = postID.partition("}(")
# if a backup link text exist, fall back on it if no
# other title can be retrieved
title = split[0][1:]
postID = split[2][:-1]

try:
r = requests.get(linkurl + ".json")
r = requests.get(linkurl + postID + ".json")
r.raise_for_status()
title = json.loads(r.text)["title"]
cache[post] = title
except requests.HTTPError as err:
logger.warning(pagename + ": " + str(err))
log_warning(pagename, str(err), title)
except requests.ConnectionError as err:
logger.warning(pagename + ": " + str(err))
log_warning(pagename, str(err), title)

if title:
linklist += '<li><a href="' + linkurl
linklist += '<li><a href="' + linkurl + postID
linklist += '" target="_blank">' + title + "</a></li>"

linklist += "</ul>"
Expand Down Expand Up @@ -141,24 +192,30 @@ def related_links(linklist):
title = split[0][1:]
link = split[2][:-1]
else:

if link.startswith("{") and link.endswith(")"):
split = link.partition("}(")
# if a backup link text exist, fall back on it if no
# other title can be retrieved
title = split[0][1:]
link = split[2][:-1]

try:
r = requests.get(link)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
if soup.title is None:
logger.warning(
pagename
+ ": "
+ link
+ " doesn't have a title."
log_warning(
pagename, link + " doesn't have a title.",
title
)
else:
title = soup.title.get_text()
cache[link] = title
except requests.HTTPError as err:
logger.warning(pagename + ": " + str(err))
log_warning(pagename, str(err), title)
except requests.ConnectionError as err:
logger.warning(pagename + ": " + str(err))
log_warning(pagename, str(err), title)

if title:
linklist += '<li><a href="' + link + '" target="_blank">'
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = canonical-sphinx-extensions
version = 0.0.21
version = 0.0.22
author = Ruth Fuchss
author_email = [email protected]
description = A collection of Sphinx extensions used by Canonical documentation
Expand Down

0 comments on commit 294b52b

Please sign in to comment.