-
Notifications
You must be signed in to change notification settings - Fork 1
/
article-filter.py
95 lines (88 loc) · 2.88 KB
/
article-filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# rawdog plugin to filter articles on various criteria
# Copyright 2006, 2009, 2012, 2013 Adam Sampson <[email protected]>
#
# This is configured by giving a "filter" argument to the relevant feed, which
# contains a number of entries separated by spaces; each entry starts with
# "show" or "hide", then has a number of field-name/regexp pairs. All the
# expressions in an entry must match for it to be activated.
#
# The field names permitted are the fields of an "entry" in feedparser;
# see the sections called "entries[i].fieldname" in the feedparser manual at
# <http://pythonhosted.org/feedparser/> for more details. Some possibilities:
# title, summary, link, content, id, author...
#
# Some examples might make it clearer:
#
# # I don't want to see articles by Xeni or Cory -- well, except Cory's
# # articles about robots.
# feed 30m http://boingboing.net/rss.xml
# filter hide author "^Xeni" ; hide author "^Cory" ; show author "^Cory" title "(?i)robot"
#
# # I only want to see articles by Mark.
# feed 30m http://boingboing.net/rss.xml
# filter hide ; show author "^Mark"
import rawdoglib.plugins, sys, re
def parse_quoted(s):
"""Parse a string that contains a number of space-separated items,
which may optionally be surrounded by quotes, into a list of
strings."""
l = []
i = 0
while i < len(s):
while s[i] == ' ':
i += 1
if s[i] == '"':
b = i + 1
e = s.find('"', i + 1)
else:
b = i
e = s.find(' ', i + 1)
if e == -1:
e = len(s)
l.append(s[b:e])
i = e + 1
return l
def match_article(rawdog, article):
hide = False
fargs = rawdog.feeds[article.feed].args
if "filter" in fargs:
filter = fargs["filter"]
vs = parse_quoted(filter)
i = 0
while i < len(vs):
if vs[i] not in ("show", "hide"):
print >>sys.stderr, "Expected show or hide but got " + vs[i] + " in filter: " + filter
return True
value = (vs[i] == "hide")
matched = True
i += 1
while i < len(vs) and vs[i] != ";":
info = article.entry_info
if i + 1 >= len(vs):
print >>sys.stderr, "Expected regexp at end of filter: " + filter
return True
if not vs[i] in info:
print >>sys.stderr, "Bad field name " + vs[i] + " in filter: " + filter
return True
try:
m = re.search(vs[i + 1], info[vs[i]])
if m is None:
matched = False
except re.error:
print >>sys.stderr, "Bad regular expression " + vs[i + 1] + " in filter: " + filter
return True
i += 2
if matched:
hide = value
if i < len(vs) and vs[i] == ";":
i += 1
return hide
def output_sorted_filter(rawdog, config, articles):
orig = len(articles)
config.log("article-filter: examining ", orig, " articles")
for i in reversed(range(len(articles))):
if match_article(rawdog, articles[i]):
del articles[i]
config.log("article-filter: hid ", orig - len(articles), " articles")
return True
rawdoglib.plugins.attach_hook("output_sorted_filter", output_sorted_filter)