From 58b7c785473a48d681f9722ebf74708bf29f4bc4 Mon Sep 17 00:00:00 2001 From: Michael Walker Date: Fri, 6 Sep 2024 14:06:31 +0100 Subject: [PATCH] [nyarlathotep] Use summary for kjp-to-hacksrus, not title tumblr truncates the title: https://hacksrus.xyz/notice/Alj49cnMAXJLmAUYKm I don't think I need to impose my own length limit here, as KJP posts are pretty short. --- hosts/nyarlathotep/configuration.nix | 3 ++- hosts/nyarlathotep/jobs/rss-to-mastodon.py | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/hosts/nyarlathotep/configuration.nix b/hosts/nyarlathotep/configuration.nix index dc79d324..a2c5d037 100644 --- a/hosts/nyarlathotep/configuration.nix +++ b/hosts/nyarlathotep/configuration.nix @@ -605,10 +605,11 @@ in startAt = "hourly"; serviceConfig = { ExecStart = - let python = pkgs.python3.withPackages (ps: [ ps.docopt ps.feedparser ps.requests ]); + let python = pkgs.python3.withPackages (ps: [ ps.beautifulsoup4 ps.docopt ps.feedparser ps.requests ]); in concatStringsSep " " [ "${python}/bin/python3" (pkgs.writeText "rss-to-mastodon.py" (fileContents ./jobs/rss-to-mastodon.py)) + "--use-summary" "-d https://hacksrus.xyz/" "-f https://kingjamesprogramming.tumblr.com/rss" "-l /persist/var/lib/rss-to-mastodon/kjp-hacksrus.txt" diff --git a/hosts/nyarlathotep/jobs/rss-to-mastodon.py b/hosts/nyarlathotep/jobs/rss-to-mastodon.py index 1b851714..9d4abcd1 100644 --- a/hosts/nyarlathotep/jobs/rss-to-mastodon.py +++ b/hosts/nyarlathotep/jobs/rss-to-mastodon.py @@ -5,10 +5,11 @@ Requires the API_KEY environment variable to be set. Usage: - rss-to-mastodon [--dry-run] -d -f -l [-e ] [-v ] + rss-to-mastodon [--dry-run] [--use-summary] -d -f -l [-e ] [-v ] Options: --dry-run just print what would be published + --use-summary use the (de-HTMLised) sumamry field, rather than the title -d api domain -f rss feed URL -l file to log feed item IDs to (to prevent double-posting) @@ -16,6 +17,7 @@ -v visibility of entries [default: public] """ +import bs4 import docopt import feedparser import html.parser @@ -28,6 +30,7 @@ args = docopt.docopt(__doc__) dry_run = args["--dry-run"] +use_summary = args["--use-summary"] api_domain = args["-d"] feed_url = args["-f"] history_file = pathlib.Path(args["-l"]) @@ -64,9 +67,11 @@ # if there are multiple items, post the older ones first for item in reversed(items): - # handle entities title = html.parser.unescape(item["title"]) + if use_summary: + title = bs4.BeautifulSoup(item["summary"], "html.parser").get_text().strip() + print(item["id"]) print(title) print()