From 151b40c2b70258a7f2be8b91e860648e66ee4dc6 Mon Sep 17 00:00:00 2001 From: xuyanling Date: Wed, 10 Jan 2024 19:41:21 +0800 Subject: [PATCH] =?UTF-8?q?feat(rss):=20=E9=80=82=E9=85=8Drefresh=20rss?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/rsschecker.py | 43 +++++++++++++++++++++ scripts/sqls/init_userrss_v3.sql | 64 +++++++++++++++++++++---------- web/templates/rss/rss_parser.html | 5 ++- 3 files changed, 90 insertions(+), 22 deletions(-) diff --git a/app/rsschecker.py b/app/rsschecker.py index b2fea99a..e746b996 100644 --- a/app/rsschecker.py +++ b/app/rsschecker.py @@ -1,3 +1,4 @@ +import re import json import time import traceback @@ -478,6 +479,48 @@ def __parse_userrss_result(self, taskinfo): ExceptionUtils.exception_traceback(err) log.error(f"【RssChecker】任务 {task_name} RSS地址 {rss_url} 获取的订阅报文无法解析:{str(err)}") continue + elif rss_parser.get("type") == "FRESH_XML": + try: + namespaces = {"media": "http://search.yahoo.com/mrss/"} + for prefix, uri in namespaces.items(): + etree.register_namespace(prefix, uri) + result_tree = etree.XML(ret.text.replace('\n', '').replace('\t', '').encode("utf-8")) + item_list = result_tree.xpath(rss_parser_format.get("list")) or [] + for item in item_list: + rss_item = {} + for key, attr in rss_parser_format.get("item", {}).items(): + item_html = etree.tostring(item, encoding='unicode') + if key == "enclosure": + enclosure_matches = re.findall(r'magnet:\?xt=urn:btih:(\w+)', item_html) + if enclosure_matches and len(enclosure_matches) >= 1: + value = f"magnet:?xt=urn:btih:{enclosure_matches[0]}" + rss_item.update({key: value}) + else: + continue + elif key == "size": + size_matches = re.findall(r'\b(\d+(\.\d+)?)\s*([KMGTP]i?B)\b', item_html) + if size_matches and len(size_matches[0]) >= 3: + size_value = size_matches[0][0] + size_unit = size_matches[0][2] + value = f"{size_value} {size_unit}" + rss_item.update({key: value}) + else: + continue + else: + if attr.get("path"): + values = item.xpath(attr.get("path", ""), namespaces=attr.get("namespaces", {})) + rss_item.update({key: values[0]}) + elif attr.get("value"): + values = attr.get("value") + rss_item.update({key: values[0]}) + else: + continue + rss_item.update({"address_index": i + 1}) + rss_result.append(rss_item) + except Exception as err: + ExceptionUtils.exception_traceback(err) + log.error(f"【RssChecker】任务 {task_name} RSS地址 {rss_url} 获取的订阅报文无法解析:{str(err)}") + continue elif rss_parser.get("type") == "JSON": try: result_json = json.loads(ret.text) diff --git a/scripts/sqls/init_userrss_v3.sql b/scripts/sqls/init_userrss_v3.sql index 1235aaf6..2d6a178a 100644 --- a/scripts/sqls/init_userrss_v3.sql +++ b/scripts/sqls/init_userrss_v3.sql @@ -75,26 +75,50 @@ INSERT OR REPLACE INTO "CONFIG_RSS_PARSER" ("ID", "NAME", "TYPE", "FORMAT", "PAR } }', 'api_key={TMDBKEY}&language=zh-CN', '', 'Y'); INSERT OR REPLACE INTO "CONFIG_RSS_PARSER" ("ID", "NAME", "TYPE", "FORMAT", "PARAMS", "NOTE", "SYSDEF") VALUES ('5', 'Nyaa', 'XML', '{ - "list": "//channel/item", - "item": { - "title": { - "path": ".//title/text()" - }, - "enclosure": { - "path": ".//link/text()" - }, - "link": { - "path": ".//guid/text()" - }, - "date": { - "path": ".//pubDate/text()" - }, - "description": { - "path": ".//description/text()" - }, - "size": { - "path": "size/text()", - "namespaces": "https://nyaa.si/xmlns/nyaa" + "list":"//channel/item", + "item":{ + "title":{ + "path":".//title/text()" + }, + "enclosure":{ + "path":".//link/text()" + }, + "link":{ + "path":".//guid/text()" + }, + "date":{ + "path":".//pubDate/text()" + }, + "description":{ + "path":".//description/text()" + }, + "size":{ + "path":"size/text()", + "namespaces":"https://nyaa.si/xmlns/nyaa" + } } +}', '', '', 'Y'); +INSERT OR REPLACE INTO "CONFIG_RSS_PARSER" ("ID", "NAME", "TYPE", "FORMAT", "PARAMS", "NOTE", "SYSDEF") VALUES ('6', 'FreshRSS', 'FRESH_XML', '{ + "list":"//channel/item", + "item":{ + "title":{ + "path":".//title/text()" + }, + "enclosure":{ + "path":".//media:content[@type=''application/x-bittorrent'']/@url", + "namespaces": {"media": "http://search.yahoo.com/mrss/"} + }, + "link":{ + "path":".//link/text()" + }, + "date":{ + "path":".//pubDate/text()" + }, + "description":{ + "path":".//description/text()" + }, + "size":{ + "path":".//description/a[contains(@href, ''#'')]/following-sibling::text()[1]" + } } }', '', '', 'Y'); \ No newline at end of file diff --git a/web/templates/rss/rss_parser.html b/web/templates/rss/rss_parser.html index a50aa6ad..c46a9d8d 100644 --- a/web/templates/rss/rss_parser.html +++ b/web/templates/rss/rss_parser.html @@ -108,8 +108,9 @@ title="支持XML及JSON,需要分别按XPath及JsonPath的语法维护解析格式" data-bs-toggle="tooltip">?