From 6bd6d7d5fbe1ff5972dba4c467dd0551fcc9c7ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Thu, 10 Oct 2024 16:54:01 +0200 Subject: [PATCH] new: Optionally disable JS in capture. Fix https://github.com/ail-project/lacus/issues/30 --- poetry.lock | 51 ++++++++++++++++++++++++++++++----------- pyproject.toml | 2 +- website/web/__init__.py | 2 ++ 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/poetry.lock b/poetry.lock index 5d05df7..73696b4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -801,6 +801,26 @@ idna = ["idna (>=3.6)"] trio = ["trio (>=0.23)"] wmi = ["wmi (>=1.5.1)"] +[[package]] +name = "dnspython" +version = "2.7.0" +description = "DNS toolkit" +optional = false +python-versions = ">=3.9" +files = [ + {file = "dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86"}, + {file = "dnspython-2.7.0.tar.gz", hash = "sha256:ce9c432eda0dc91cf618a5cedf1a4e142651196bbcd2c80e89ed5a907e5cfaf1"}, +] + +[package.extras] +dev = ["black (>=23.1.0)", "coverage (>=7.0)", "flake8 (>=7)", "hypercorn (>=0.16.0)", "mypy (>=1.8)", "pylint (>=3)", "pytest (>=7.4)", "pytest-cov (>=4.1.0)", "quart-trio (>=0.11.0)", "sphinx (>=7.2.0)", "sphinx-rtd-theme (>=2.0.0)", "twine (>=4.0.0)", "wheel (>=0.42.0)"] +dnssec = ["cryptography (>=43)"] +doh = ["h2 (>=4.1.0)", "httpcore (>=1.0.0)", "httpx (>=0.26.0)"] +doq = ["aioquic (>=1.0.0)"] +idna = ["idna (>=3.7)"] +trio = ["trio (>=0.23)"] +wmi = ["wmi (>=1.5.1)"] + [[package]] name = "eval-type-backport" version = "0.2.0" @@ -1432,23 +1452,26 @@ referencing = ">=0.31.0" [[package]] name = "lacuscore" -version = "1.11.0" +version = "1.11.1" description = "Core of Lacus, usable as a module" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "lacuscore-1.11.0-py3-none-any.whl", hash = "sha256:1950e127f772e19e9cead68a789533d1508c4a345b105e74b830267a7110eb6f"}, - {file = "lacuscore-1.11.0.tar.gz", hash = "sha256:e9dbc065462be95843a2f697be14875ed3cc7601653d5fb2c700e5935c8f52ee"}, + {file = "lacuscore-1.11.1-py3-none-any.whl", hash = "sha256:580fe74fb14172a87b168deb31c1dc85fc5b4b95e0a8289ce3052fdf8f7b0062"}, + {file = "lacuscore-1.11.1.tar.gz", hash = "sha256:40917895db0cb4a46fcfd381ad374a21a6d31bbd85ba0b4e85c09467f0a2ff81"}, ] [package.dependencies] async-timeout = {version = ">=4.0.3,<5.0.0", markers = "python_version < \"3.11\""} defang = ">=0.5.3,<0.6.0" -dnspython = ">=2.6.1,<3.0.0" +dnspython = [ + {version = "<2.7", markers = "python_version < \"3.9\""}, + {version = ">=2.7,<3.0", markers = "python_version >= \"3.9\""}, +] eval-type-backport = {version = ">=0.2.0,<0.3.0", markers = "python_version < \"3.10\""} -playwrightcapture = {version = ">=1.26.0,<2.0.0", extras = ["recaptcha"]} -pydantic = ">=2.9.1,<3.0.0" -redis = {version = ">=5.0.8,<6.0.0", extras = ["hiredis"]} +playwrightcapture = {version = ">=1.26.1,<2.0.0", extras = ["recaptcha"]} +pydantic = ">=2.9.2,<3.0.0" +redis = {version = ">=5.1.1,<6.0.0", extras = ["hiredis"]} requests = ">=2.32.3,<3.0.0" ua-parser = ">=0.18.0,<0.19.0" @@ -1990,29 +2013,29 @@ test = ["pytest"] [[package]] name = "playwrightcapture" -version = "1.26.0" +version = "1.26.1" description = "A simple library to capture websites using playwright" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "playwrightcapture-1.26.0-py3-none-any.whl", hash = "sha256:2694fe7d1fe6a5347de6395e9edb8a45c8e7c41adc30a1651355cba7ff1388e5"}, - {file = "playwrightcapture-1.26.0.tar.gz", hash = "sha256:f149edbddab605d9c2659487c8a1d8dad388dc0f4919790b7ff99605524ee093"}, + {file = "playwrightcapture-1.26.1-py3-none-any.whl", hash = "sha256:66cb8efa97b99f5aff25c2046a28a2dc4496c3b316d6ac4d3482868ede1e8114"}, + {file = "playwrightcapture-1.26.1.tar.gz", hash = "sha256:20ba35fa2308f581d5ef8db92db9aef4520d9598a16050a8701808d2dd05907a"}, ] [package.dependencies] -aiohttp = {version = ">=3.10.5,<4.0.0", extras = ["speedups"]} +aiohttp = {version = ">=3.10.9,<4.0.0", extras = ["speedups"]} aiohttp-socks = ">=0.9,<0.10" async-timeout = {version = ">=4.0.3,<5.0.0", markers = "python_version < \"3.11\""} beautifulsoup4 = {version = ">=4.12.3,<5.0.0", extras = ["charset-normalizer", "lxml"]} dateparser = ">=1.2.0,<2.0.0" playwright = ">=1.47.0,<2.0.0" playwright-stealth = ">=1.0.6,<2.0.0" -puremagic = ">=1.27,<2.0" +puremagic = ">=1.28,<2.0" pydub = {version = ">=0.25.1,<0.26.0", optional = true, markers = "extra == \"recaptcha\""} pytz = {version = ">=2024.2,<2025.0", markers = "python_version < \"3.9\""} setuptools = ">=75.1.0,<76.0.0" SpeechRecognition = {version = ">=3.10.4,<4.0.0", optional = true, markers = "extra == \"recaptcha\""} -tzdata = ">=2024.1,<2025.0" +tzdata = ">=2024.2,<2025.0" w3lib = ">=2.2.1,<3.0.0" [package.extras] @@ -3289,4 +3312,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "4e9294e1bd454f0676db3e280add7a0839e450de7ee92c75e31c5908b6234ba8" +content-hash = "746990d9daf7ac6070127f1aaa6cb07271871268bb4bed7f40f40883abfd5c63" diff --git a/pyproject.toml b/pyproject.toml index af17c2e..f6cb8e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ redis = {version = "^5.1.1", extras = ["hiredis"]} flask-restx = "^1.3.0" werkzeug = "^3.0.4" gunicorn = {version = "^23.0.0", extras = ["setproctitle"]} -lacuscore = "^1.11.0" +lacuscore = "^1.11.1" rich = "^13.9.2" psutil = "^6.0.0" diff --git a/website/web/__init__.py b/website/web/__init__.py index e1a34f3..c42d4b9 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -92,6 +92,7 @@ def get(self) -> bool: 'timezone_id': fields.String(description="The timezone ID of the browser", example='Europe/Paris'), 'locale': fields.String(description="The locale of the browser", example='en-US'), 'color_scheme': fields.String(description="The color scheme of the browser", example='dark'), + 'java_script_enabled': fields.Boolean(description="If False, javascript won't be executed when rendering the page", example=True), 'referer': fields.String(description="Referer to pass to the capture", example='https://circl.lu'), 'with_favicon': fields.Boolean(description="Attempts to get favicons related to the landing page of the capture", example=False), 'allow_tracking': fields.Boolean(description="Attempt to let the website violate your privacy", example=False), @@ -126,6 +127,7 @@ def post(self) -> str: timezone_id=to_query.get('timezone_id'), locale=to_query.get('locale'), color_scheme=to_query.get('color_scheme'), + java_script_enabled=to_query.get('java_script_enabled', True), referer=to_query.get('referer'), with_favicon=to_query.get('with_favicon', False), allow_tracking=to_query.get('allow_tracking', False),