From 485667874a6b09bd8d394cc84645896c7c9446a4 Mon Sep 17 00:00:00 2001 From: gatura <79659261+gatura-source@users.noreply.github.com> Date: Fri, 6 Dec 2024 23:06:13 +0300 Subject: [PATCH] fix: connection timeout error (#91) * fixed timeout error * fix: timeout is an instance of aiohttp.Clienttimeout && timeout handled more graciously * Fix: default keyword args inheritance && error handling for know errors without traceback * fix: timeout annotation, typo fix and timeout handling for asyncio * fix: removed commented timeout --- src/scraper.py | 96 ++++++++++++++++++++++++++------------------------ src/task.py | 8 ++++- 2 files changed, 56 insertions(+), 48 deletions(-) diff --git a/src/scraper.py b/src/scraper.py index f1f20af..909eb80 100644 --- a/src/scraper.py +++ b/src/scraper.py @@ -7,10 +7,12 @@ class BaseDepartment: address: str id: str + timeout: aiohttp.ClientTimeout - def __init__(self, id: str, address: str) -> None: + def __init__(self, id: str, address: str, timeout: int = 5) -> None: self.id = id self.address = address + self.timeout = aiohttp.ClientTimeout(total=timeout) def _complete_url(self, url: str) -> str: url = self._fix_invalid_url(url) @@ -27,7 +29,7 @@ def _fix_invalid_url(url: str) -> str: return urllib.parse.quote(url, "\./_-:=?%&") async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address) as resp: html_text: str = await resp.text(encoding='utf-8', errors="replace") soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -49,8 +51,8 @@ async def get_announcements(self) -> list[dict]: class CS(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) @staticmethod def cleanup(str_: str) -> str: @@ -80,7 +82,7 @@ def cleanup(str_: str) -> str: return "".join(chars) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address + '/json/announcements.json') as resp: data: list[dict] = await resp.json() data = data[:5] @@ -104,11 +106,11 @@ async def get_announcements(self) -> list[dict]: class SKSDB(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address) as resp: html_text: str = await resp.text(encoding='utf-8', errors="replace") soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -131,11 +133,11 @@ async def get_announcements(self) -> list[dict]: class IE(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str,**kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address) as resp: html_text: str = await resp.text(encoding='utf-8', errors="replace") soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -175,11 +177,11 @@ async def get_announcements(self) -> list[dict]: class Mat(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str,**kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address + '/duyurular.html') as resp: html_text: str = await resp.text(encoding='utf-8', errors="replace") soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -201,11 +203,11 @@ async def get_announcements(self) -> list[dict]: class BBY(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address + '/duyurular.php') as resp: html_text = await resp.text(encoding='utf-8', errors="replace") @@ -229,11 +231,11 @@ async def get_announcements(self) -> list[dict]: class Edebiyat(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address) as resp: html_text: str = await resp.text(encoding='iso-8859-9', errors="replace") @@ -258,11 +260,11 @@ async def get_announcements(self) -> list[dict]: class EE(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address + '?link=archivedAnno&lang=e') as resp: html_text: str = await resp.text(errors="replace") soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -280,11 +282,11 @@ async def get_announcements(self) -> list[dict]: class Phys(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address + '/index.php') as resp: html_text: str = await resp.text(errors="replace") soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -306,11 +308,11 @@ async def get_announcements(self) -> list[dict]: class ABOfisi(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address) as resp: html_text: str = await resp.text(encoding='utf-8', errors="replace") soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -337,11 +339,11 @@ async def get_announcements(self) -> list[dict]: class BIDB(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address) as resp: html_text: str = await resp.text(encoding='utf-8', errors="replace") soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -368,11 +370,11 @@ async def get_announcements(self) -> list[dict]: class JeoMuh(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address) as resp: html_text: str = await resp.text(encoding='utf-8', errors="replace") soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -399,11 +401,11 @@ async def get_announcements(self) -> list[dict]: class Hidro(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address) as resp: html_text: str = await resp.text(encoding='utf-8', errors="replace") soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -433,11 +435,11 @@ async def get_announcements(self) -> list[dict]: class IDE(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address) as resp: html_text: str = await resp.text(encoding='iso-8859-9', errors="replace") soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -470,11 +472,11 @@ async def get_announcements(self) -> list[dict]: class SporBilimleri(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str, **kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address + '/index.php?pid=1444&lang=tr') as resp: html_text: str = await resp.text(encoding='iso-8859-9', errors='replace') soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') @@ -499,11 +501,11 @@ async def get_announcements(self) -> list[dict]: class Iletisim(BaseDepartment): - def __init__(self, id: str, address: str): - super().__init__(id, address) + def __init__(self, id: str, address: str,**kwargs): + super().__init__(id, address, **kwargs) async def get_announcements(self) -> list[dict]: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(self.address) as resp: html_text: str = await resp.text(encoding='utf-8', errors='replace') soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml') diff --git a/src/task.py b/src/task.py index ad0d2a7..936eb64 100644 --- a/src/task.py +++ b/src/task.py @@ -1,7 +1,8 @@ import traceback +import asyncio import telegram -from aiohttp import ClientConnectorError +from aiohttp import ClientConnectorError, ConnectionTimeoutError from telegram.ext import ContextTypes from .app import logger, DEPARTMENT_DB, USER_DB, AVAILABLE_DEPARTMENTS, decode @@ -30,6 +31,11 @@ async def check_announcements(context: ContextTypes.DEFAULT_TYPE) -> None: logger.exception(message) await context.bot.send_message(chat_id=LOGGER_CHAT_ID, text=message, disable_notification=True) continue + except (ConnectionTimeoutError, asyncio.exceptions.TimeoutError): + message = f"Connection Timeout while scraping {department.id}" + logger.exception(message) + await context.bot.send_message(chat_id=LOGGER_CHAT_ID, text=message, disable_notification=True) + continue except: message = f"Undefined Error while scraping {department.id}" logger.exception(message)