Skip to content

Commit

Permalink
fix: connection timeout error (#91)
Browse files Browse the repository at this point in the history
* fixed timeout error

* fix: timeout is an instance of aiohttp.Clienttimeout && timeout handled more graciously

* Fix: default keyword args inheritance && error handling for know errors without traceback

* fix: timeout annotation, typo fix and timeout handling for asyncio

* fix: removed commented timeout
  • Loading branch information
gatura-source authored Dec 6, 2024
1 parent ea7ab39 commit 4856678
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 48 deletions.
96 changes: 49 additions & 47 deletions src/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
class BaseDepartment:
address: str
id: str
timeout: aiohttp.ClientTimeout

def __init__(self, id: str, address: str) -> None:
def __init__(self, id: str, address: str, timeout: int = 5) -> None:
self.id = id
self.address = address
self.timeout = aiohttp.ClientTimeout(total=timeout)

def _complete_url(self, url: str) -> str:
url = self._fix_invalid_url(url)
Expand All @@ -27,7 +29,7 @@ def _fix_invalid_url(url: str) -> str:
return urllib.parse.quote(url, "\./_-:=?%&")

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address) as resp:
html_text: str = await resp.text(encoding='utf-8', errors="replace")
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand All @@ -49,8 +51,8 @@ async def get_announcements(self) -> list[dict]:


class CS(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

@staticmethod
def cleanup(str_: str) -> str:
Expand Down Expand Up @@ -80,7 +82,7 @@ def cleanup(str_: str) -> str:
return "".join(chars)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address + '/json/announcements.json') as resp:
data: list[dict] = await resp.json()
data = data[:5]
Expand All @@ -104,11 +106,11 @@ async def get_announcements(self) -> list[dict]:


class SKSDB(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address) as resp:
html_text: str = await resp.text(encoding='utf-8', errors="replace")
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand All @@ -131,11 +133,11 @@ async def get_announcements(self) -> list[dict]:


class IE(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str,**kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address) as resp:
html_text: str = await resp.text(encoding='utf-8', errors="replace")
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand Down Expand Up @@ -175,11 +177,11 @@ async def get_announcements(self) -> list[dict]:


class Mat(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str,**kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address + '/duyurular.html') as resp:
html_text: str = await resp.text(encoding='utf-8', errors="replace")
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand All @@ -201,11 +203,11 @@ async def get_announcements(self) -> list[dict]:


class BBY(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address + '/duyurular.php') as resp:
html_text = await resp.text(encoding='utf-8', errors="replace")

Expand All @@ -229,11 +231,11 @@ async def get_announcements(self) -> list[dict]:


class Edebiyat(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address) as resp:
html_text: str = await resp.text(encoding='iso-8859-9', errors="replace")

Expand All @@ -258,11 +260,11 @@ async def get_announcements(self) -> list[dict]:


class EE(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address + '?link=archivedAnno&lang=e') as resp:
html_text: str = await resp.text(errors="replace")
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand All @@ -280,11 +282,11 @@ async def get_announcements(self) -> list[dict]:


class Phys(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address + '/index.php') as resp:
html_text: str = await resp.text(errors="replace")
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand All @@ -306,11 +308,11 @@ async def get_announcements(self) -> list[dict]:


class ABOfisi(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address) as resp:
html_text: str = await resp.text(encoding='utf-8', errors="replace")
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand All @@ -337,11 +339,11 @@ async def get_announcements(self) -> list[dict]:


class BIDB(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address) as resp:
html_text: str = await resp.text(encoding='utf-8', errors="replace")
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand All @@ -368,11 +370,11 @@ async def get_announcements(self) -> list[dict]:


class JeoMuh(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address) as resp:
html_text: str = await resp.text(encoding='utf-8', errors="replace")
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand All @@ -399,11 +401,11 @@ async def get_announcements(self) -> list[dict]:


class Hidro(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address) as resp:
html_text: str = await resp.text(encoding='utf-8', errors="replace")
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand Down Expand Up @@ -433,11 +435,11 @@ async def get_announcements(self) -> list[dict]:


class IDE(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address) as resp:
html_text: str = await resp.text(encoding='iso-8859-9', errors="replace")
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand Down Expand Up @@ -470,11 +472,11 @@ async def get_announcements(self) -> list[dict]:


class SporBilimleri(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str, **kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address + '/index.php?pid=1444&lang=tr') as resp:
html_text: str = await resp.text(encoding='iso-8859-9', errors='replace')
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand All @@ -499,11 +501,11 @@ async def get_announcements(self) -> list[dict]:


class Iletisim(BaseDepartment):
def __init__(self, id: str, address: str):
super().__init__(id, address)
def __init__(self, id: str, address: str,**kwargs):
super().__init__(id, address, **kwargs)

async def get_announcements(self) -> list[dict]:
async with aiohttp.ClientSession() as session:
async with aiohttp.ClientSession(timeout=self.timeout) as session:
async with session.get(self.address) as resp:
html_text: str = await resp.text(encoding='utf-8', errors='replace')
soup: BeautifulSoup = BeautifulSoup(html_text, 'lxml')
Expand Down
8 changes: 7 additions & 1 deletion src/task.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import traceback
import asyncio

import telegram
from aiohttp import ClientConnectorError
from aiohttp import ClientConnectorError, ConnectionTimeoutError
from telegram.ext import ContextTypes

from .app import logger, DEPARTMENT_DB, USER_DB, AVAILABLE_DEPARTMENTS, decode
Expand Down Expand Up @@ -30,6 +31,11 @@ async def check_announcements(context: ContextTypes.DEFAULT_TYPE) -> None:
logger.exception(message)
await context.bot.send_message(chat_id=LOGGER_CHAT_ID, text=message, disable_notification=True)
continue
except (ConnectionTimeoutError, asyncio.exceptions.TimeoutError):
message = f"Connection Timeout while scraping {department.id}"
logger.exception(message)
await context.bot.send_message(chat_id=LOGGER_CHAT_ID, text=message, disable_notification=True)
continue
except:
message = f"Undefined Error while scraping {department.id}"
logger.exception(message)
Expand Down

0 comments on commit 4856678

Please sign in to comment.