This repository has been archived by the owner on May 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
92 lines (67 loc) · 3.08 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
import re
from itertools import takewhile
from datetime import datetime, timezone
from xml.etree.ElementTree import Element
import hs
import guid as guidfile
from telegram import bot
PATTERN = re.compile(r'^\[HorribleSubs\] (?P<title>.*?) \[720p\](?:\.mkv| \(Batch\))$')
HORRIBLE_SUBS = 'https://horriblesubs.info'
def determine_last_show(rss: Element, guid: str) -> Element:
return rss.find('channel/item/[guid="{}"]'.format(guid))
def hourly_releases():
rss = hs.rss.parse()
newest_guid = rss.find('channel/item/guid').text
last_guid = guidfile.read()
if newest_guid == last_guid: # no new entries since last run
return iter([])
halt_point = determine_last_show(rss, last_guid)
entries = takewhile(lambda e: e is not halt_point, rss.iterfind('channel/item'))
releases = (PATTERN.match(entry.find('title').text).group('title').rpartition(' - ') for entry in entries)
guidfile.write(newest_guid)
return releases
def daily_releases():
# this function MUST run ONLY at 00:00 PST/PDT, or whatever timezone HorribleSubs is in.
soup = hs.frontpage.parse()
schedule = soup.select_one('table.schedule-table')
releases = []
for release_data in schedule.select('td'):
if 'schedule-widget-show' in release_data['class']:
# On the event that there is no <a> present in a node, fall back to setting `a` to `release_data` itself
a = release_data.select_one('a') or release_data
data = {'title': a.text, 'url': a.get('href', '')}
# I would love to treat this generically, but didn't find a way to bypass Cloufare.
# Which is good in a way, it means that the tool they develop and maintain works.
if 'idolmster' in data['url']:
data['title'] = data['title'].replace('[email\xa0protected]', 'iDOLM@STER')
elif 'schedule-widget-time' in release_data['class']:
data['time'] = release_data.text
releases.append(data)
return releases
def format_dailies(releases, now):
output = []
time_format = '%H:%M %Z'
for release in releases:
time_local = release['time'].split(':')
time_local = now.replace(hour=int(time_local[0]), minute=int(time_local[1]))
time_utc = time_local.astimezone(timezone.utc)
output.append("<a href=\"{url}\">{title}</a> at {local} ({utc})".format(
title=release['title'],
url=HORRIBLE_SUBS + release['url'],
local=time_local.strftime(time_format),
utc=time_utc.strftime(time_format)))
return output
if __name__ == '__main__':
if 'daily' in os.sys.argv:
now = datetime.now().astimezone()
releases = daily_releases()
hs.links.cache_from_releases(releases)
releases = [
'#{}, {:%-m/%-d %H:%M %Z}'.format(now.strftime('%A').lower(), now),
'Releases in the next 24 hours (estimates):',
] + format_dailies(releases, now)
else:
releases = hs.links.get_from_releases(hourly_releases())
if releases:
bot.send_to_channel('\n'.join(releases))