Skip to content

Commit

Permalink
add optional backup link texts for related links
Browse files Browse the repository at this point in the history
Sometimes, the link text cannot be retrieved. For such cases, you
can now specify a backup link text.
If needed, you can then suppress the error that is raised when
the backup link text is used (canonical-sphinx-extensions.linktext).

Fixes #27

Signed-off-by: Ruth Fuchss <[email protected]>
  • Loading branch information
ru-fu committed Jun 7, 2024
1 parent d03d0cd commit 3a09c85
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 30 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,18 @@ For Discourse links, specify only the topic IDs (in a comma-separated list).
If you have defined several Discourse prefixes, specify both key and ID (for example, `abc:1234`).

For related links, specify the full URLs (in a comma-separated list).

The link text is extracted automatically or can be specified in Markdown syntax.
You can also specify a backup link text (using curly brackets instead of square brackets) that is used only if the link title cannot be extracted.
Note that spaces are ignored; if you need spaces in the title, replace them with `&#32;`.
If Sphinx complains about the metadata value because it starts with "[", enclose the full value in double quotes.

The following example uses MyST syntax for the metadata:

```
---
discourse: 1234,56789
relatedlinks: https://www.example.com, [Link&#32;text](https://www.example.com)
discourse: 1234, abc:56789, abc:[Link&#32;text](1234)
relatedlinks: https://www.example.com, [Link&#32;text](https://www.example.com), {Backup&#32;text}(https://www.example.com)
---
```

Expand Down
113 changes: 85 additions & 28 deletions canonical-sphinx-extensions/related-links/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,14 @@
# }
# }
#
# Use these prefixes when linking (no prefix = first dict entry)
# Use these prefixes when linking (no prefix = first dict entry):
#
# ---
# discourse: ubuntu:12033,lxc:13128
# ---
#
# - Add related URLs to the metadata at the top of the page using
# the tag "relatedlinks". The link text is extracted automatically
# or can be specified in Markdown syntax. Note that spaces are
# ignored; if you need spaces in the title, replace them with &#32;.
# Some examples (in MyST syntax):
# the tag "relatedlinks":
#
# ---
# relatedlinks: https://www.example.com
Expand All @@ -47,15 +44,47 @@
# relatedlinks: https://www.example.com, https://www.google.com
# ---
#
# ---
# relatedlinks: "[Link&#32;text](https://www.example.com)"
# ---
# In both methods, the link text is extracted automatically
# or can be specified in Markdown syntax. Note that spaces are
# ignored; if you need spaces in the title, replace them with &#32;.
#
# ---
# discourse: [Link&#32;text](12033)
# ---
#
# ---
# discourse: ubuntu:[Link&#32;text](12033)
# ---
#
# ---
# relatedlinks: [Link&#32;text](https://www.example.com)
# ---
#
# If Sphinx complains about the metadata value because it starts
# with "[", enclose the full value in double quotes.
#
# You can also specify a backup link text that is used only if the
# title cannot be extracted automatically.
# For this, use the same syntax as for specifying the link text, but
# use "{" and "}" instead of "[" and "]".
# If the backup text is used, Sphinx logs an error of type
# canonical-sphinx-extensions.linktext (which you can suppress if
# needed).
#
# ---
# discourse: {Backup&#32;text}(12033)
# ---
#
# If Sphinx complains about the metadata value because it starts
# with "[", enclose the full value in double quotes.
# ---
# discourse: ubuntu:{Backup&#32;text}(12033)
# ---
#
# For both ways, check for errors in the output. Invalid links are
# not added to the output.
# ---
# relatedlinks: {Backup&#32;text}(https://www.example.com)
# ---
#
# Always check for errors in the output. Invalid links are not added
# to the output.
######################################################################

import requests
Expand All @@ -68,6 +97,13 @@
logger = logging.getLogger(__name__)


def log_warning(pagename, err, title):
msg = pagename + ": " + err
if title:
msg += "\nUsing backup link text instead: " + title
logger.warning(msg, type="canonical-sphinx-extensions", subtype="linktext")


def setup_func(app, pagename, templatename, context, doctree):
def discourse_links(IDlist):

Expand All @@ -79,15 +115,17 @@ def discourse_links(IDlist):

for post in posts:
title = ""
postID = post

# determine the linkurl (which Discourse to link to)
# and strip this information from the postID
if type(context["discourse_prefix"]) is dict:
ID = post.split(":")
if len(ID) == 1:
linkurl = list(
context["discourse_prefix"].values()
)[0] + post
linkurl = list(context["discourse_prefix"].values())[0]
elif ID[0] in context["discourse_prefix"]:
linkurl = context["discourse_prefix"][ID[0]] + ID[1]
linkurl = context["discourse_prefix"][ID[0]]
postID = ID[1]
else:
logger.warning(
pagename
Expand All @@ -97,23 +135,36 @@ def discourse_links(IDlist):
)
continue
else:
linkurl = context["discourse_prefix"] + post
linkurl = context["discourse_prefix"]

# determine the title (and maybe strip it from the postID)
if post in cache:
title = cache[post]
elif postID.startswith("[") and postID.endswith(")"):
split = postID.partition("](")
title = split[0][1:]
postID = split[2][:-1]
else:

if postID.startswith("{") and postID.endswith(")"):
split = postID.partition("}(")
# if a backup link text exist, fall back on it if no
# other title can be retrieved
title = split[0][1:]
postID = split[2][:-1]

try:
r = requests.get(linkurl + ".json")
r = requests.get(linkurl + postID + ".json")
r.raise_for_status()
title = json.loads(r.text)["title"]
cache[post] = title
except requests.HTTPError as err:
logger.warning(pagename + ": " + str(err))
log_warning(pagename, str(err), title)
except requests.ConnectionError as err:
logger.warning(pagename + ": " + str(err))
log_warning(pagename, str(err), title)

if title:
linklist += '<li><a href="' + linkurl
linklist += '<li><a href="' + linkurl + postID
linklist += '" target="_blank">' + title + "</a></li>"

linklist += "</ul>"
Expand Down Expand Up @@ -141,24 +192,30 @@ def related_links(linklist):
title = split[0][1:]
link = split[2][:-1]
else:

if link.startswith("{") and link.endswith(")"):
split = link.partition("}(")
# if a backup link text exist, fall back on it if no
# other title can be retrieved
title = split[0][1:]
link = split[2][:-1]

try:
r = requests.get(link)
r.raise_for_status()
soup = BeautifulSoup(r.text, "html.parser")
if soup.title is None:
logger.warning(
pagename
+ ": "
+ link
+ " doesn't have a title."
log_warning(
pagename, link + " doesn't have a title.",
title
)
else:
title = soup.title.get_text()
cache[link] = title
except requests.HTTPError as err:
logger.warning(pagename + ": " + str(err))
log_warning(pagename, str(err), title)
except requests.ConnectionError as err:
logger.warning(pagename + ": " + str(err))
log_warning(pagename, str(err), title)

if title:
linklist += '<li><a href="' + link + '" target="_blank">'
Expand Down

0 comments on commit 3a09c85

Please sign in to comment.