-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
77 lines (60 loc) · 2.45 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import requests
import json
import logging as log
import os
from bs4 import BeautifulSoup
from json import JSONDecodeError
URL = 'https://buymeacoffee.com/vkv_official'
URL_PATTERN = 'https://buymeacoffee.com/vkv_official/'
EXCLUDE_URLS = ['https://buymeacoffee.com/vkv_official/membership', 'https://buymeacoffee.com/vkv_official/posts', 'https://buymeacoffee.com/vkv_official/extras/checkout/0']
POSTS_FILE = 'posts.json'
class TelergamError(Exception):
pass
class VKVMonitor():
def __init__(self) -> None:
self.chat_id = os.environ.get('CHAT_ID')
self.token = os.environ.get('TOKEN')
def send_message(self, message: str):
url = f'https://api.telegram.org/bot{self.token}/sendMessage'
params = {
'chat_id': self.chat_id,
'text': message,
'parse_mode': 'Markdown',
}
response = requests.get(url, params=params)
if not response.status_code == 200:
raise TelergamError(response.json())
def process_new_post(self, url: str, posts: dict) -> dict:
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
title = soup.find('h1').get_text()
record = {'title': title, 'url': url}
log.info('Save to POSTS')
posts[url] = record
json.dump(posts, open(POSTS_FILE, 'w'))
log.info('Send notification')
message = f'Новий пост від ВКВ: \n\n[{title}]({url})'
self.send_message(message)
return posts
def process(self):
message = 'Запустили чудо моніторинг постів від ВКВ на buymeacoffee'
log.info(message)
# self.send_message(message)
try:
posts = json.load(open(POSTS_FILE))
except (FileNotFoundError, JSONDecodeError):
posts = {}
# Step 1: Get info from webpage
response = requests.get(URL)
# Step 2: Parse the webpage
soup = BeautifulSoup(response.content, 'html.parser')
links = soup.find_all('a')
links = [link.get('href') for link in links]
links = [link for link in links if link]
links = [link for link in links if link.startswith(URL_PATTERN)]
links = [link for link in links if not link in EXCLUDE_URLS]
for link in links:
if link not in posts:
posts = self.process_new_post(link, posts)
if __name__ == '__main__':
monitor = VKVMonitor()
monitor.process()