diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 206b02f..1be4735 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,14 +2,14 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files - repo: https://github.com/asottile/pyupgrade - rev: v2.31.1 + rev: v3.15.0 hooks: - id: pyupgrade args: [--py38-plus] diff --git a/bin/capture_manager.py b/bin/capture_manager.py index 6e07ffc..6e91d30 100755 --- a/bin/capture_manager.py +++ b/bin/capture_manager.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +from __future__ import annotations + import asyncio import logging import logging.config @@ -19,13 +21,13 @@ class CaptureManager(AbstractManager): - def __init__(self, loglevel: Optional[int]=None): + def __init__(self, loglevel: Optional[int]=None) -> None: super().__init__(loglevel) self.script_name = 'capture_manager' - self.captures: Set[Task] = set() + self.captures: Set[Task] = set() # type: ignore[type-arg] self.lacus = Lacus() - async def clear_dead_captures(self): + async def clear_dead_captures(self) -> None: ongoing = {capture.get_name(): capture for capture in self.captures} max_capture_time = get_config('generic', 'max_capture_time') oldest_start_time = datetime.now() - timedelta(seconds=max_capture_time + 300) @@ -44,7 +46,7 @@ async def clear_dead_captures(self): except asyncio.CancelledError: self.logger.warning(f'{expected_uuid} is canceled now.') - async def _to_run_forever_async(self): + async def _to_run_forever_async(self) -> None: await self.clear_dead_captures() if self.force_stop: return @@ -61,7 +63,7 @@ async def _to_run_forever_async(self): # be decremented when it finishes self.set_running(len(self.captures) + 1) - async def _wait_to_finish_async(self): + async def _wait_to_finish_async(self) -> None: while self.captures: self.logger.info(f'Waiting for {len(self.captures)} capture(s) to finish...') self.logger.info(f'Ongoing captures: {", ".join(capture.get_name() for capture in self.captures)}') @@ -70,7 +72,7 @@ async def _wait_to_finish_async(self): self.logger.info('No more captures') -def main(): +def main() -> None: p = CaptureManager() loop = asyncio.new_event_loop() diff --git a/bin/run_backend.py b/bin/run_backend.py index 108717e..30bcee5 100755 --- a/bin/run_backend.py +++ b/bin/run_backend.py @@ -24,14 +24,14 @@ def check_running(name: str) -> bool: return False -def launch_cache(storage_directory: Optional[Path]=None): +def launch_cache(storage_directory: Optional[Path]=None) -> None: if not storage_directory: storage_directory = get_homedir() if not check_running('cache'): Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache')) -def shutdown_cache(storage_directory: Optional[Path]=None): +def shutdown_cache(storage_directory: Optional[Path]=None) -> None: if not storage_directory: storage_directory = get_homedir() r = Redis(unix_socket_path=get_socket_path('cache')) @@ -39,11 +39,11 @@ def shutdown_cache(storage_directory: Optional[Path]=None): print('Redis cache database shutdown.') -def launch_all(): +def launch_all() -> None: launch_cache() -def check_all(stop: bool=False): +def check_all(stop: bool=False) -> None: backends: Dict[str, bool] = {'cache': False} while True: for db_name in backends.keys(): @@ -65,11 +65,11 @@ def check_all(stop: bool=False): time.sleep(1) -def stop_all(): +def stop_all() -> None: shutdown_cache() -def main(): +def main() -> None: parser = argparse.ArgumentParser(description='Manage backend DBs.') parser.add_argument("--start", action='store_true', default=False, help="Start all") parser.add_argument("--stop", action='store_true', default=False, help="Stop all") diff --git a/bin/shutdown.py b/bin/shutdown.py index 6189c2b..959ed7b 100755 --- a/bin/shutdown.py +++ b/bin/shutdown.py @@ -5,7 +5,7 @@ from lacus.default import AbstractManager -def main(): +def main() -> None: AbstractManager.force_shutdown() time.sleep(5) while True: diff --git a/bin/start.py b/bin/start.py index faa85bc..8a5baec 100755 --- a/bin/start.py +++ b/bin/start.py @@ -5,7 +5,7 @@ from lacus.default import get_homedir -def main(): +def main() -> None: # Just fail if the env isn't set. get_homedir() print('Start backend (redis)...') diff --git a/bin/start_website.py b/bin/start_website.py index 44bd8ec..cd15478 100755 --- a/bin/start_website.py +++ b/bin/start_website.py @@ -15,13 +15,13 @@ class Website(AbstractManager): - def __init__(self, loglevel: Optional[int]=None): + def __init__(self, loglevel: Optional[int]=None) -> None: super().__init__(loglevel) self.script_name = 'website' self.process = self._launch_website() self.set_running() - def _launch_website(self): + def _launch_website(self) -> Popen: # type: ignore[type-arg] website_dir = get_homedir() / 'website' ip = get_config('generic', 'website_listen_ip') port = get_config('generic', 'website_listen_port') @@ -33,7 +33,7 @@ def _launch_website(self): cwd=website_dir) -def main(): +def main() -> None: w = Website() w.run(sleep_in_sec=10) diff --git a/bin/stop.py b/bin/stop.py index e51dd4c..0ef36a6 100755 --- a/bin/stop.py +++ b/bin/stop.py @@ -8,7 +8,7 @@ from lacus.default import get_homedir, get_socket_path -def main(): +def main() -> None: get_homedir() p = Popen(['shutdown']) p.wait() diff --git a/bin/stop_capture_manager.py b/bin/stop_capture_manager.py index 275a03f..483f74b 100755 --- a/bin/stop_capture_manager.py +++ b/bin/stop_capture_manager.py @@ -12,7 +12,7 @@ logging.config.dictConfig(get_config('logging')) -def main(): +def main() -> None: parser = argparse.ArgumentParser(description='Sends a SIGTERM to the capture_manager so you can restart it cleanly.') parser.parse_args() diff --git a/bin/update.py b/bin/update.py index c95d238..d212698 100755 --- a/bin/update.py +++ b/bin/update.py @@ -15,14 +15,14 @@ logging.config.dictConfig(get_config('logging')) -def compute_hash_self(): +def compute_hash_self() -> bytes: m = hashlib.sha256() with (get_homedir() / 'bin' / 'update.py').open('rb') as f: m.update(f.read()) return m.digest() -def keep_going(ignore=False): +def keep_going(ignore: bool=False) -> None: if ignore: return keep_going = input('Continue? (y/N) ') @@ -31,7 +31,7 @@ def keep_going(ignore=False): sys.exit() -def run_command(command, expect_fail: bool=False, capture_output: bool=True): +def run_command(command: str, expect_fail: bool=False, capture_output: bool=True) -> None: args = shlex.split(command) homedir = get_homedir() process = subprocess.run(args, cwd=homedir, capture_output=capture_output) @@ -42,7 +42,7 @@ def run_command(command, expect_fail: bool=False, capture_output: bool=True): sys.exit() -def check_poetry_version(): +def check_poetry_version() -> None: args = shlex.split("poetry self -V") homedir = get_homedir() process = subprocess.run(args, cwd=homedir, capture_output=True) @@ -58,7 +58,7 @@ def check_poetry_version(): sys.exit() -def main(): +def main() -> None: parser = argparse.ArgumentParser(description='Pull latest release, update dependencies, update and validate the config files, update 3rd deps for the website.') parser.add_argument('--yes', default=False, action='store_true', help='Run all commands without asking.') parser.add_argument('--init', default=False, action='store_true', help='Run all commands without starting the service.') diff --git a/lacus/default/__init__.py b/lacus/default/__init__.py index ef2e154..b28ac62 100644 --- a/lacus/default/__init__.py +++ b/lacus/default/__init__.py @@ -7,8 +7,25 @@ # and allow to update them easily. # You should not have to change anything in this file below this line. +import os # noqa + from .abstractmanager import AbstractManager # noqa from .exceptions import MissingEnv, CreateDirectoryException, ConfigError # noqa from .helpers import get_homedir, load_configs, get_config, safe_create_dir, get_socket_path, try_make_file # noqa + +os.chdir(get_homedir()) + +__all__ = [ + 'AbstractManager', + 'MissingEnv', + 'CreateDirectoryException', + 'ConfigError', + 'get_homedir', + 'load_configs', + 'get_config', + 'safe_create_dir', + 'get_socket_path', + 'try_make_file', +] diff --git a/lacus/default/abstractmanager.py b/lacus/default/abstractmanager.py index 1b96a5c..3d3fb8b 100644 --- a/lacus/default/abstractmanager.py +++ b/lacus/default/abstractmanager.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +from __future__ import annotations + import asyncio import logging import os @@ -20,18 +22,18 @@ class AbstractManager(ABC): script_name: str - def __init__(self, loglevel: Optional[int]=None): + def __init__(self, loglevel: int | None=None): self.loglevel: int = loglevel if loglevel is not None else get_config('generic', 'loglevel') or logging.INFO self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger.setLevel(self.loglevel) self.logger.info(f'Initializing {self.__class__.__name__}') - self.process: Optional[Popen] = None + self.process: Popen | None = None # type: ignore[type-arg] self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) self.force_stop = False @staticmethod - def is_running() -> List[Tuple[str, float]]: + def is_running() -> list[tuple[str, float]]: try: r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) for script_name, score in r.zrangebyscore('running', '-inf', '+inf', withscores=True): @@ -52,7 +54,7 @@ def is_running() -> List[Tuple[str, float]]: return [] @staticmethod - def clear_running(): + def clear_running() -> None: try: r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) r.delete('running') @@ -60,14 +62,14 @@ def clear_running(): print('Unable to connect to redis, the system is down.') @staticmethod - def force_shutdown(): + def force_shutdown() -> None: try: r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) r.set('shutdown', 1) except RedisConnectionError: print('Unable to connect to redis, the system is down.') - def set_running(self, number: Optional[int]=None) -> None: + def set_running(self, number: int | None=None) -> None: if number == 0: self.__redis.zrem('running', self.script_name) else: @@ -111,7 +113,7 @@ def shutdown_requested(self) -> bool: def _to_run_forever(self) -> None: raise NotImplementedError('This method must be implemented by the child') - def _kill_process(self): + def _kill_process(self) -> None: if self.process is None: return kill_order = [signal.SIGWINCH, signal.SIGTERM, signal.SIGINT, signal.SIGKILL] @@ -167,7 +169,7 @@ def run(self, sleep_in_sec: int) -> None: def _wait_to_finish(self) -> None: self.logger.info('Not implemented, nothing to wait for.') - async def stop(self): + async def stop(self) -> None: self.force_stop = True async def _to_run_forever_async(self) -> None: @@ -176,7 +178,7 @@ async def _to_run_forever_async(self) -> None: async def _wait_to_finish_async(self) -> None: self.logger.info('Not implemented, nothing to wait for.') - async def stop_async(self): + async def stop_async(self) -> None: """Method to pass the signal handler: loop.add_signal_handler(signal.SIGTERM, lambda: loop.create_task(p.stop())) """ diff --git a/lacus/default/helpers.py b/lacus/default/helpers.py index db8192e..7e4481f 100644 --- a/lacus/default/helpers.py +++ b/lacus/default/helpers.py @@ -1,4 +1,7 @@ #!/usr/bin/env python3 + +from __future__ import annotations + import json import logging import os @@ -9,7 +12,7 @@ from . import env_global_name from .exceptions import ConfigError, CreateDirectoryException, MissingEnv -configs: Dict[str, Dict[str, Any]] = {} +configs: dict[str, dict[str, Any]] = {} logger = logging.getLogger('Helpers') @@ -34,7 +37,7 @@ def get_homedir() -> Path: @lru_cache(64) -def load_configs(path_to_config_files: Optional[Union[str, Path]]=None): +def load_configs(path_to_config_files: str | Path | None=None) -> None: global configs if configs: return @@ -57,7 +60,7 @@ def load_configs(path_to_config_files: Optional[Union[str, Path]]=None): @lru_cache(64) -def get_config(config_type: str, entry: Optional[str]=None, quiet: bool=False) -> Any: +def get_config(config_type: str, entry: str | None=None, quiet: bool=False) -> Any: """Get an entry from the given config_type file. Automatic fallback to the sample file""" global configs if not configs: @@ -96,7 +99,7 @@ def get_socket_path(name: str) -> str: return str(get_homedir() / mapping[name]) -def try_make_file(filename: Path): +def try_make_file(filename: Path) -> bool: try: filename.touch(exist_ok=False) return True diff --git a/lacus/lacus.py b/lacus/lacus.py index 160a21b..7571ef4 100644 --- a/lacus/lacus.py +++ b/lacus/lacus.py @@ -3,6 +3,9 @@ import copy import logging +from datetime import datetime +from typing import Dict, Any + from redis import Redis, ConnectionPool from redis.connection import UnixDomainSocketConnection @@ -42,12 +45,29 @@ def __init__(self) -> None: self.global_proxy.pop('enable') @property - def redis(self): + def redis(self) -> Redis: # type: ignore[type-arg] return Redis(connection_pool=self.redis_pool) @property - def redis_decode(self): + def redis_decode(self) -> Redis: # type: ignore[type-arg] return Redis(connection_pool=self.redis_pool_decoded) - def check_redis_up(self): + def check_redis_up(self) -> bool: return self.redis.ping() + + def redis_status(self) -> Dict[str, Any]: + redis_info = self.redis.info() + return {'total_keys': redis_info['db0']['keys'], + 'current_memory_use': redis_info['used_memory_rss_human'], + 'peak_memory_use': redis_info['used_memory_peak_human']} + + def status(self) -> Dict[str, Any]: + to_return: Dict[str, Any] = {} + to_return['max_concurrent_captures'] = get_config('generic', 'concurrent_captures') + to_return['max_capture_time'] = get_config('generic', 'max_capture_time') + ongoing_captures = self.monitoring.get_ongoing_captures() + to_return['ongoing_captures'] = len(ongoing_captures) + to_return['captures_time'] = {uuid: (datetime.now() - start_time).total_seconds() for uuid, start_time in ongoing_captures} + enqueued_captures = self.monitoring.get_enqueued_captures() + to_return['enqueued_captures'] = len(enqueued_captures) + return to_return diff --git a/poetry.lock b/poetry.lock index ff45186..456b900 100644 --- a/poetry.lock +++ b/poetry.lock @@ -868,13 +868,13 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] [[package]] name = "jinja2" -version = "3.1.2" +version = "3.1.3" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" files = [ - {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, - {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, + {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, + {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, ] [package.dependencies] @@ -923,18 +923,18 @@ referencing = ">=0.31.0" [[package]] name = "lacuscore" -version = "1.7.8" +version = "1.7.9" description = "Core of Lacus, usable as a module" optional = false python-versions = ">=3.8,<4.0" files = [ - {file = "lacuscore-1.7.8-py3-none-any.whl", hash = "sha256:b877567a7efb35802c5fb6a01a8b88602978c16b49ee0ceead937337c6710081"}, - {file = "lacuscore-1.7.8.tar.gz", hash = "sha256:e0aa938a6555c8fe8485777e04c2ca549cd3b1fd7a75e7839d49a3fef1499252"}, + {file = "lacuscore-1.7.9-py3-none-any.whl", hash = "sha256:74309aa4216fabffadd4ab724f8f2273d12e59dedd8e826e2710847d92497f8c"}, + {file = "lacuscore-1.7.9.tar.gz", hash = "sha256:cb0df82d88ffe805fc78c60e535ee54d82842b763a84ad97cfc2a5a99d4c3ed7"}, ] [package.dependencies] defang = ">=0.5.3,<0.6.0" -playwrightcapture = {version = ">=1.22.5,<2.0.0", extras = ["recaptcha"]} +playwrightcapture = {version = ">=1.22.6,<2.0.0", extras = ["recaptcha"]} redis = {version = ">=5.0.1,<6.0.0", extras = ["hiredis"]} requests = ">=2.31.0,<3.0.0" ua-parser = ">=0.18.0,<0.19.0" @@ -949,6 +949,7 @@ description = "Powerful and Pythonic XML processing library combining libxml2/li optional = false python-versions = ">=3.6" files = [ + {file = "lxml-5.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:704f5572ff473a5f897745abebc6df40f22d4133c1e0a1f124e4f2bd3330ff7e"}, {file = "lxml-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9d3c0f8567ffe7502d969c2c1b809892dc793b5d0665f602aad19895f8d508da"}, {file = "lxml-5.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5fcfbebdb0c5d8d18b84118842f31965d59ee3e66996ac842e21f957eb76138c"}, {file = "lxml-5.1.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f37c6d7106a9d6f0708d4e164b707037b7380fcd0b04c5bd9cae1fb46a856fb"}, @@ -958,6 +959,7 @@ files = [ {file = "lxml-5.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:82bddf0e72cb2af3cbba7cec1d2fd11fda0de6be8f4492223d4a268713ef2147"}, {file = "lxml-5.1.0-cp310-cp310-win32.whl", hash = "sha256:b66aa6357b265670bb574f050ffceefb98549c721cf28351b748be1ef9577d93"}, {file = "lxml-5.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:4946e7f59b7b6a9e27bef34422f645e9a368cb2be11bf1ef3cafc39a1f6ba68d"}, + {file = "lxml-5.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:14deca1460b4b0f6b01f1ddc9557704e8b365f55c63070463f6c18619ebf964f"}, {file = "lxml-5.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ed8c3d2cd329bf779b7ed38db176738f3f8be637bb395ce9629fc76f78afe3d4"}, {file = "lxml-5.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:436a943c2900bb98123b06437cdd30580a61340fbdb7b28aaf345a459c19046a"}, {file = "lxml-5.1.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:acb6b2f96f60f70e7f34efe0c3ea34ca63f19ca63ce90019c6cbca6b676e81fa"}, @@ -967,6 +969,7 @@ files = [ {file = "lxml-5.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f4c9bda132ad108b387c33fabfea47866af87f4ea6ffb79418004f0521e63204"}, {file = "lxml-5.1.0-cp311-cp311-win32.whl", hash = "sha256:bc64d1b1dab08f679fb89c368f4c05693f58a9faf744c4d390d7ed1d8223869b"}, {file = "lxml-5.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:a5ab722ae5a873d8dcee1f5f45ddd93c34210aed44ff2dc643b5025981908cda"}, + {file = "lxml-5.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9aa543980ab1fbf1720969af1d99095a548ea42e00361e727c58a40832439114"}, {file = "lxml-5.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6f11b77ec0979f7e4dc5ae081325a2946f1fe424148d3945f943ceaede98adb8"}, {file = "lxml-5.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a36c506e5f8aeb40680491d39ed94670487ce6614b9d27cabe45d94cd5d63e1e"}, {file = "lxml-5.1.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f643ffd2669ffd4b5a3e9b41c909b72b2a1d5e4915da90a77e119b8d48ce867a"}, @@ -992,8 +995,8 @@ files = [ {file = "lxml-5.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8f52fe6859b9db71ee609b0c0a70fea5f1e71c3462ecf144ca800d3f434f0764"}, {file = "lxml-5.1.0-cp37-cp37m-win32.whl", hash = "sha256:d42e3a3fc18acc88b838efded0e6ec3edf3e328a58c68fbd36a7263a874906c8"}, {file = "lxml-5.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:eac68f96539b32fce2c9b47eb7c25bb2582bdaf1bbb360d25f564ee9e04c542b"}, + {file = "lxml-5.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ae15347a88cf8af0949a9872b57a320d2605ae069bcdf047677318bc0bba45b1"}, {file = "lxml-5.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c26aab6ea9c54d3bed716b8851c8bfc40cb249b8e9880e250d1eddde9f709bf5"}, - {file = "lxml-5.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cfbac9f6149174f76df7e08c2e28b19d74aed90cad60383ad8671d3af7d0502f"}, {file = "lxml-5.1.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:342e95bddec3a698ac24378d61996b3ee5ba9acfeb253986002ac53c9a5f6f84"}, {file = "lxml-5.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:725e171e0b99a66ec8605ac77fa12239dbe061482ac854d25720e2294652eeaa"}, {file = "lxml-5.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d184e0d5c918cff04cdde9dbdf9600e960161d773666958c9d7b565ccc60c45"}, @@ -1001,6 +1004,7 @@ files = [ {file = "lxml-5.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6d48fc57e7c1e3df57be5ae8614bab6d4e7b60f65c5457915c26892c41afc59e"}, {file = "lxml-5.1.0-cp38-cp38-win32.whl", hash = "sha256:7ec465e6549ed97e9f1e5ed51c657c9ede767bc1c11552f7f4d022c4df4a977a"}, {file = "lxml-5.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:b21b4031b53d25b0858d4e124f2f9131ffc1530431c6d1321805c90da78388d1"}, + {file = "lxml-5.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:52427a7eadc98f9e62cb1368a5079ae826f94f05755d2d567d93ee1bc3ceb354"}, {file = "lxml-5.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6a2a2c724d97c1eb8cf966b16ca2915566a4904b9aad2ed9a09c748ffe14f969"}, {file = "lxml-5.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:843b9c835580d52828d8f69ea4302537337a21e6b4f1ec711a52241ba4a824f3"}, {file = "lxml-5.1.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9b99f564659cfa704a2dd82d0684207b1aadf7d02d33e54845f9fc78e06b7581"}, @@ -1082,6 +1086,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -1299,13 +1313,13 @@ test = ["pytest"] [[package]] name = "playwrightcapture" -version = "1.22.5" +version = "1.22.6" description = "A simple library to capture websites using playwright" optional = false python-versions = ">=3.8,<4.0" files = [ - {file = "playwrightcapture-1.22.5-py3-none-any.whl", hash = "sha256:023d394efe2c6173178ac7a9143a9b77400704b965280c494e9bb418eaa2ea86"}, - {file = "playwrightcapture-1.22.5.tar.gz", hash = "sha256:8fac3bf723536ebc6ff0e1908aa838029a8b6e8ed1998fd162d5557d1d3fb2ec"}, + {file = "playwrightcapture-1.22.6-py3-none-any.whl", hash = "sha256:910ad4dabbc51864f1c8fed6e62c2869a519211bcf7ae6e9c5aac3ea29268e33"}, + {file = "playwrightcapture-1.22.6.tar.gz", hash = "sha256:b5c377585aba9ff71f055127b6be86458503ff3308e8fc8225dd4c05ab9597ae"}, ] [package.dependencies] @@ -1318,7 +1332,7 @@ pytz = {version = ">=2023.3.post1,<2024.0", markers = "python_version < \"3.9\"" requests = {version = ">=2.31.0,<3.0.0", extras = ["socks"], optional = true, markers = "extra == \"recaptcha\""} setuptools = ">=69.0.3,<70.0.0" SpeechRecognition = {version = ">=3.10.1,<4.0.0", optional = true, markers = "extra == \"recaptcha\""} -tzdata = ">=2023.3,<2024.0" +tzdata = ">=2023.4,<2024.0" w3lib = ">=2.1.2,<3.0.0" [package.extras] @@ -2032,4 +2046,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "541ac0b9d0d28b6da26180e2e65d4b67f1ee4c834335e453f98822056606a010" +content-hash = "cf55c17cb38f4bb383e71756ff44ecf2bfac86b1bb976052976f26dde37ffd24" diff --git a/pyproject.toml b/pyproject.toml index 44d80e7..64344e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ redis = {version = "^5.0.1", extras = ["hiredis"]} flask-restx = "^1.3.0" werkzeug = "^3.0.1" gunicorn = "^21.2.0" -lacuscore = "^1.7.8" +lacuscore = "^1.7.9" rich = "^13.7.0" psutil = "^5.9.7" diff --git a/tools/monitoring.py b/tools/monitoring.py index 3c8a390..e2b0177 100644 --- a/tools/monitoring.py +++ b/tools/monitoring.py @@ -1,9 +1,14 @@ #!/usr/bin/env python3 +from __future__ import annotations + import json import os import sys +from datetime import datetime +from typing import Any, Mapping + from lacus.default import get_socket_path, AbstractManager from lacuscore import LacusCoreMonitoring from rich.console import Console @@ -15,13 +20,12 @@ class Monitoring(): - def __init__(self): - self.redis_cache: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True) - + def __init__(self) -> None: + self.redis_cache: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True) # type: ignore[type-arg] self.lacus_monit = LacusCoreMonitoring(self.redis_cache) @property - def backend_status(self): + def backend_status(self) -> bool: socket_path_cache = get_socket_path('cache') backend_up = True if not os.path.exists(socket_path_cache): @@ -39,26 +43,26 @@ def backend_status(self): return backend_up @property - def ongoing(self): + def ongoing(self) -> list[tuple[str, datetime]]: return self.lacus_monit.get_ongoing_captures() @property - def enqueued(self): + def enqueued(self) -> list[tuple[str, float]]: return self.lacus_monit.get_enqueued_captures() - def capture_settings(self, uuid: str): + def capture_settings(self, uuid: str) -> dict[str, str]: return self.lacus_monit.get_capture_settings(uuid) @property - def number_keys(self): + def number_keys(self) -> int: return self.redis_cache.info('keyspace')['db0']['keys'] @property - def memory_use(self): + def memory_use(self) -> Mapping[str, Any]: return self.redis_cache.info('memory') @property - def stats(self): + def stats(self) -> dict[str, Any]: return self.lacus_monit.get_stats(cardinality_only=True) diff --git a/tools/validate_config_files.py b/tools/validate_config_files.py index a643820..a4df6a3 100755 --- a/tools/validate_config_files.py +++ b/tools/validate_config_files.py @@ -7,7 +7,7 @@ from lacus.default import get_homedir -def validate_generic_config_file(): +def validate_generic_config_file() -> bool: sample_config = get_homedir() / 'config' / 'generic.json.sample' with sample_config.open() as f: generic_config_sample = json.load(f) @@ -53,7 +53,7 @@ def validate_generic_config_file(): return True -def update_user_configs(): +def update_user_configs() -> bool: for file_name in ['generic']: with (get_homedir() / 'config' / f'{file_name}.json').open() as f: try: diff --git a/website/web/__init__.py b/website/web/__init__.py index 6aa3e11..97e9de7 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -1,16 +1,20 @@ #!/usr/bin/env python3 +from __future__ import annotations + import datetime import logging import logging.config from collections import defaultdict from importlib.metadata import version -from typing import Dict, Optional, Union +from typing import Dict, Optional, Union, Any, List, Tuple from flask import Flask, request from flask_restx import Api, Resource, fields # type: ignore +from lacuscore import CaptureStatus, CaptureResponse + from lacus.default import get_config from lacus.lacus import Lacus @@ -34,9 +38,9 @@ @api.route('/redis_up') @api.doc(description='Check if redis is up and running') -class RedisUp(Resource): +class RedisUp(Resource): # type: ignore[misc] - def get(self): + def get(self) -> bool: return lacus.check_redis_up() @@ -84,12 +88,12 @@ def get(self): @api.route('/enqueue') -class Enqueue(Resource): +class Enqueue(Resource): # type: ignore[misc] - @api.doc(body=submit_fields_post) - @api.produces(['text/text']) - def post(self): - to_query: Dict = request.get_json(force=True) + @api.doc(body=submit_fields_post) # type: ignore[misc] + @api.produces(['text/text']) # type: ignore[misc] + def post(self) -> str: + to_query: Dict[str, Any] = request.get_json(force=True) perma_uuid = lacus.core.enqueue( url=to_query.get('url'), document_name=to_query.get('document_name'), @@ -122,18 +126,18 @@ def post(self): @api.route('/capture_status/') @api.doc(description='Get the status of a capture.', params={'capture_uuid': 'The UUID of the capture'}) -class CaptureStatusQuery(Resource): +class CaptureStatusQuery(Resource): # type: ignore[misc] - def get(self, capture_uuid: str): + def get(self, capture_uuid: str) -> CaptureStatus: return lacus.core.get_capture_status(capture_uuid) @api.route('/capture_result/') @api.doc(description='Get the result of a capture.', params={'capture_uuid': 'The UUID of the capture'}) -class CaptureResult(Resource): +class CaptureResult(Resource): # type: ignore[misc] - def get(self, capture_uuid: str): + def get(self, capture_uuid: str) -> CaptureResponse: return lacus.core.get_capture(capture_uuid) @@ -149,10 +153,10 @@ def get(self, capture_uuid: str): @api.route('/daily_stats/') @api.doc(description='Get the statistics for a day.', params={'date': 'The date in ISO format YYYY-MM-DD'}) -class DailyStats(Resource): +class DailyStats(Resource): # type: ignore[misc] - @api.marshal_with(stats_model, skip_none=True) - def get(self, date: Optional[str]=None): + @api.marshal_with(stats_model, skip_none=True) # type: ignore[misc] + def get(self, date: Optional[str]=None) -> Dict[str, Any]: if 'date' in request.args: date = request.args['date'] if not date: @@ -172,10 +176,10 @@ def get(self, date: Optional[str]=None): @api.route('/daily_stats_details/') @api.doc(description='Get the statistics for a day, with lists of successful/failed URLs.', params={'date': 'The date in ISO format YYYY-MM-DD'}) -class DailyStatsDetails(Resource): +class DailyStatsDetails(Resource): # type: ignore[misc] - @api.marshal_with(stats_details_model, skip_none=True) - def get(self, date: Optional[str]=None): + @api.marshal_with(stats_details_model, skip_none=True) # type: ignore[misc] + def get(self, date: Optional[str]=None) -> Dict[str, Any]: if 'date' in request.args: date = request.args['date'] if not date: @@ -185,29 +189,26 @@ def get(self, date: Optional[str]=None): @api.route('/db_status') @api.doc(description='Get a few infos about Redis usage.') -class DBSatus(Resource): +class DBSatus(Resource): # type: ignore[misc] - def get(self): - redis_info = lacus.redis.info() - return {'total_keys': redis_info['db0']['keys'], - 'current_memory_use': redis_info['used_memory_rss_human'], - 'peak_memory_use': redis_info['used_memory_peak_human']} + def get(self) -> Dict[str, Any]: + return lacus.redis_status() @api.route('/ongoing_captures') @api.route('/ongoing_captures/') @api.doc(description='Get all the ongoing captures.', params={'with_settings': 'If set, returns the settings.'}) -class OngoingCaptures(Resource): +class OngoingCaptures(Resource): # type: ignore[misc] - def get(self, with_settings: Optional[int]=None): + def get(self, with_settings: Optional[int]=None) -> Union[List[Tuple[str, str]], Dict[str, Any]]: ongoing = lacus.monitoring.get_ongoing_captures() - _ongoing = [[uuid, d.isoformat()] for uuid, d in ongoing] + _ongoing = [(uuid, d.isoformat()) for uuid, d in ongoing] if 'with_settings' in request.args: with_settings = True if not with_settings: return _ongoing - to_return: Dict[str, Dict[str, Union[Dict, str]]] = defaultdict(dict) + to_return: Dict[str, Dict[str, Union[Dict[str, Any], str]]] = defaultdict(dict) for uuid, capture_time in _ongoing: to_return[uuid]['settings'] = lacus.monitoring.get_capture_settings(uuid) to_return[uuid]['capture_time'] = capture_time @@ -218,16 +219,24 @@ def get(self, with_settings: Optional[int]=None): @api.route('/enqueued_captures/') @api.doc(description='Get all the enqueued but not yet ongoing captures.', params={'with_settings': 'If set, returns the settings.'}) -class EnqueuedCaptures(Resource): +class EnqueuedCaptures(Resource): # type: ignore[misc] - def get(self, with_settings: Optional[int]=None): + def get(self, with_settings: Optional[int]=None) -> Union[List[Tuple[str, float]], Dict[str, Any]]: enqueued = lacus.monitoring.get_enqueued_captures() if 'with_settings' in request.args: with_settings = True if not with_settings: return enqueued - to_return: Dict[str, Dict[str, Union[Dict, str, float]]] = defaultdict(dict) + to_return: Dict[str, Dict[str, Union[Dict[str, Any], str, float]]] = defaultdict(dict) for uuid, priority in enqueued: to_return[uuid]['settings'] = lacus.monitoring.get_capture_settings(uuid) to_return[uuid]['priority'] = priority return to_return + + +@api.route('/lacus_status') +@api.doc(description='Get the status of the Lacus instance.') +class LacusStatus(Resource): # type: ignore[misc] + + def get(self) -> Dict[str, Any]: + return lacus.status() diff --git a/website/web/helpers.py b/website/web/helpers.py index dd59f1b..1a22227 100644 --- a/website/web/helpers.py +++ b/website/web/helpers.py @@ -1,13 +1,17 @@ #!/usr/bin/env python3 +from __future__ import annotations + import os from functools import lru_cache from pathlib import Path +from flask import Request + from lacus.default import get_homedir -def src_request_ip(request) -> str: +def src_request_ip(request: Request) -> str | None: # NOTE: X-Real-IP is the IP passed by the reverse proxy in the headers. real_ip = request.headers.get('X-Real-IP') if not real_ip: