diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 0000000..fe43b12 --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,27 @@ +name: Ruff Code Check + +on: [push, pull_request] + +jobs: + ruff: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.x" + + - name: Install Ruff + run: | + pip install ruff + + - name: Run Ruff Format + run: | + ruff format --check + + - name: Run Ruff Check + run: | + ruff check . diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b204037..0bba579 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,6 +13,14 @@ Examples of contributions include: `Please formalize your pull request (PR)` you will get. +**Before each push or PR, run in the root directory of the project:** + +```bash +ruff check + +ruff format +``` + --- # Code of Conduct diff --git a/examples/spiders/auto_recaptcha.py b/examples/spiders/auto_recaptcha.py index 2d3fef4..b71e215 100644 --- a/examples/spiders/auto_recaptcha.py +++ b/examples/spiders/auto_recaptcha.py @@ -1,6 +1,7 @@ +import base64 import logging + import scrapy -import base64 from twisted.python.failure import Failure from scrapypuppeteer import PuppeteerRequest diff --git a/examples/spiders/fill_form.py b/examples/spiders/fill_form.py index 91a2a18..1027cbc 100644 --- a/examples/spiders/fill_form.py +++ b/examples/spiders/fill_form.py @@ -1,7 +1,9 @@ +import base64 + import scrapy + from scrapypuppeteer import PuppeteerRequest, PuppeteerScreenshotResponse -from scrapypuppeteer.actions import Screenshot, FillForm -import base64 +from scrapypuppeteer.actions import FillForm, Screenshot class FormActionSpider(scrapy.Spider): @@ -34,5 +36,5 @@ def screenshot(self, response): @staticmethod def make_screenshot(response: PuppeteerScreenshotResponse, **kwargs): data = response.screenshot - with open(f"screenshot.png", "wb") as fh: + with open("screenshot.png", "wb") as fh: fh.write(base64.b64decode(data)) diff --git a/examples/spiders/har.py b/examples/spiders/har.py index 7638867..ea9c708 100644 --- a/examples/spiders/har.py +++ b/examples/spiders/har.py @@ -1,4 +1,5 @@ import scrapy + from scrapypuppeteer import PuppeteerRequest from scrapypuppeteer.actions import Har diff --git a/examples/spiders/manual_recaptcha.py b/examples/spiders/manual_recaptcha.py index dc2a8b3..30a1cb3 100644 --- a/examples/spiders/manual_recaptcha.py +++ b/examples/spiders/manual_recaptcha.py @@ -1,10 +1,11 @@ +import base64 import logging + import scrapy -import base64 from twisted.python.failure import Failure from scrapypuppeteer import PuppeteerRequest -from scrapypuppeteer.actions import GoTo, RecaptchaSolver, Click, Screenshot +from scrapypuppeteer.actions import Click, GoTo, RecaptchaSolver, Screenshot from scrapypuppeteer.response import PuppeteerResponse, PuppeteerScreenshotResponse diff --git a/examples/spiders/meduza.py b/examples/spiders/meduza.py index 6abf5c3..a3c6f6e 100644 --- a/examples/spiders/meduza.py +++ b/examples/spiders/meduza.py @@ -1,6 +1,6 @@ import scrapy -from scrapypuppeteer import PuppeteerRequest, PuppeteerHtmlResponse +from scrapypuppeteer import PuppeteerHtmlResponse, PuppeteerRequest class MeduzaSpider(scrapy.Spider): diff --git a/examples/spiders/webscraperio.py b/examples/spiders/webscraperio.py index 661f021..68b2498 100644 --- a/examples/spiders/webscraperio.py +++ b/examples/spiders/webscraperio.py @@ -1,11 +1,10 @@ import scrapy from scrapypuppeteer import PuppeteerRequest -from scrapypuppeteer.actions import GoTo, Scroll, Click +from scrapypuppeteer.actions import Click, GoTo, Scroll class EcommerceSiteSpider(scrapy.Spider): - @staticmethod def extract_items(list_page_response): for item_selector in list_page_response.css("div.row div.thumbnail"): @@ -29,7 +28,7 @@ def extract_item(detail_page_response): "description": detail_page_response.css("p.description::text").get(), "rating": len(detail_page_response.css("span.glyphicon-star")), "reviews_count": int( - detail_page_response.css(".ratings::text").re_first("\d+") + detail_page_response.css(".ratings::text").re_first(r"\d+") ), } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..943999b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[tool.ruff] +line-length = 88 +fix = false +indent-width = 4 + +[tool.ruff.lint] +select = ["F", "C", "W", "I"] +ignore = ["E203", "E501", "F401", "C408", "F811", "N807"] + +[tool.ruff.format] +indent-style = "space" +line-ending = "auto" +quote-style = "double" +skip-magic-trailing-comma = false +docstring-code-line-length = 88 +docstring-code-format = true \ No newline at end of file diff --git a/scrapypuppeteer/__init__.py b/scrapypuppeteer/__init__.py index 0c57ae3..e80ef25 100644 --- a/scrapypuppeteer/__init__.py +++ b/scrapypuppeteer/__init__.py @@ -1,21 +1,21 @@ from .actions import ( - PuppeteerServiceAction, - GoTo, - GoForward, - GoBack, Click, - Scroll, - Screenshot, - Har, + CustomJsAction, FillForm, + GoBack, + GoForward, + GoTo, + Har, + PuppeteerServiceAction, RecaptchaSolver, - CustomJsAction, + Screenshot, + Scroll, ) -from .request import PuppeteerRequest, CloseContextRequest +from .request import CloseContextRequest, PuppeteerRequest from .response import ( - PuppeteerResponse, PuppeteerHtmlResponse, - PuppeteerScreenshotResponse, - PuppeteerRecaptchaSolverResponse, PuppeteerJsonResponse, + PuppeteerRecaptchaSolverResponse, + PuppeteerResponse, + PuppeteerScreenshotResponse, ) diff --git a/scrapypuppeteer/actions.py b/scrapypuppeteer/actions.py index b0a9d70..4e56588 100644 --- a/scrapypuppeteer/actions.py +++ b/scrapypuppeteer/actions.py @@ -1,8 +1,7 @@ -from abc import abstractmethod, ABC +from abc import ABC, abstractmethod class PuppeteerServiceAction(ABC): - @property @abstractmethod def endpoint(self): ... diff --git a/scrapypuppeteer/browser_managers/playwright_browser_manager.py b/scrapypuppeteer/browser_managers/playwright_browser_manager.py index 58e3ba8..1e1efc3 100644 --- a/scrapypuppeteer/browser_managers/playwright_browser_manager.py +++ b/scrapypuppeteer/browser_managers/playwright_browser_manager.py @@ -2,8 +2,8 @@ import base64 import uuid -from playwright.async_api import async_playwright import syncer +from playwright.async_api import async_playwright from scrapypuppeteer.browser_managers import BrowserManager from scrapypuppeteer.request import CloseContextRequest, PuppeteerRequest @@ -14,7 +14,6 @@ class ContextManager: - def __init__(self): self.browser = syncer.sync(self.launch_browser()) self.contexts = {} diff --git a/scrapypuppeteer/browser_managers/pyppeteer_browser_manager.py b/scrapypuppeteer/browser_managers/pyppeteer_browser_manager.py index 0faf21a..bc465f5 100644 --- a/scrapypuppeteer/browser_managers/pyppeteer_browser_manager.py +++ b/scrapypuppeteer/browser_managers/pyppeteer_browser_manager.py @@ -6,7 +6,7 @@ from pyppeteer import launch from scrapypuppeteer.browser_managers import BrowserManager -from scrapypuppeteer.request import PuppeteerRequest, CloseContextRequest +from scrapypuppeteer.request import CloseContextRequest, PuppeteerRequest from scrapypuppeteer.response import ( PuppeteerHtmlResponse, PuppeteerScreenshotResponse, @@ -14,7 +14,6 @@ class ContextManager: - def __init__(self): self.browser = syncer.sync(launch()) self.contexts = {} @@ -55,7 +54,6 @@ def close_contexts(self, request: CloseContextRequest): class PyppeteerBrowserManager(BrowserManager): - def __init__(self): self.context_manager = ContextManager() self.action_map = { @@ -72,7 +70,6 @@ def __init__(self): } def process_request(self, request): - if isinstance(request, PuppeteerRequest): endpoint = request.action.endpoint action_function = self.action_map.get(endpoint) diff --git a/scrapypuppeteer/browser_managers/service_browser_manager.py b/scrapypuppeteer/browser_managers/service_browser_manager.py index 7b67619..7829c72 100644 --- a/scrapypuppeteer/browser_managers/service_browser_manager.py +++ b/scrapypuppeteer/browser_managers/service_browser_manager.py @@ -4,30 +4,30 @@ from urllib.parse import urlencode, urljoin from scrapy.exceptions import DontCloseSpider -from scrapy.http import Headers, TextResponse, Response +from scrapy.http import Headers, Response, TextResponse from scrapy.utils.log import failure_to_exc_info from twisted.python.failure import Failure from scrapypuppeteer.actions import ( Click, + FillForm, GoBack, GoForward, GoTo, + Har, RecaptchaSolver, Screenshot, Scroll, - Har, - FillForm, ) +from scrapypuppeteer.browser_managers import BrowserManager +from scrapypuppeteer.request import ActionRequest, CloseContextRequest, PuppeteerRequest from scrapypuppeteer.response import ( - PuppeteerHtmlResponse, - PuppeteerScreenshotResponse, PuppeteerHarResponse, - PuppeteerRecaptchaSolverResponse, + PuppeteerHtmlResponse, PuppeteerJsonResponse, + PuppeteerRecaptchaSolverResponse, + PuppeteerScreenshotResponse, ) -from scrapypuppeteer.request import ActionRequest, PuppeteerRequest, CloseContextRequest -from scrapypuppeteer.browser_managers import BrowserManager class ServiceBrowserManager(BrowserManager): @@ -43,7 +43,6 @@ def __init__(self, service_base_url, include_meta, include_headers, crawler): raise ValueError("Puppeteer service URL must be provided") def process_request(self, request): - if isinstance(request, CloseContextRequest): return self.process_close_context_request(request) diff --git a/scrapypuppeteer/middleware.py b/scrapypuppeteer/middleware.py index 99568c5..b051ed0 100644 --- a/scrapypuppeteer/middleware.py +++ b/scrapypuppeteer/middleware.py @@ -6,31 +6,29 @@ from scrapy.crawler import Crawler from scrapy.exceptions import IgnoreRequest, NotConfigured - from scrapypuppeteer.actions import ( Click, + CustomJsAction, RecaptchaSolver, Screenshot, Scroll, - CustomJsAction, ) -from scrapypuppeteer.response import ( - PuppeteerResponse, - PuppeteerHtmlResponse, +from scrapypuppeteer.browser_managers import BrowserManager +from scrapypuppeteer.browser_managers.playwright_browser_manager import ( + PlaywrightBrowserManager, ) -from scrapypuppeteer.request import ActionRequest, PuppeteerRequest, CloseContextRequest from scrapypuppeteer.browser_managers.pyppeteer_browser_manager import ( PyppeteerBrowserManager, ) from scrapypuppeteer.browser_managers.service_browser_manager import ( ServiceBrowserManager, ) -from scrapypuppeteer.browser_managers.playwright_browser_manager import ( - PlaywrightBrowserManager, +from scrapypuppeteer.request import ActionRequest, CloseContextRequest, PuppeteerRequest +from scrapypuppeteer.response import ( + PuppeteerHtmlResponse, + PuppeteerResponse, ) -from scrapypuppeteer.browser_managers import BrowserManager - class PuppeteerServiceDownloaderMiddleware: """ diff --git a/scrapypuppeteer/request.py b/scrapypuppeteer/request.py index c00e41f..1f89453 100644 --- a/scrapypuppeteer/request.py +++ b/scrapypuppeteer/request.py @@ -1,7 +1,7 @@ import json -from typing import Tuple, List, Union +from typing import List, Tuple, Union -from scrapy.http import Request, Headers +from scrapy.http import Headers, Request from scrapypuppeteer.actions import GoTo, PuppeteerServiceAction diff --git a/scrapypuppeteer/response.py b/scrapypuppeteer/response.py index 043d49c..51dee13 100644 --- a/scrapypuppeteer/response.py +++ b/scrapypuppeteer/response.py @@ -29,7 +29,7 @@ def __init__( puppeteer_request: PuppeteerRequest, context_id: str, page_id: str, - **kwargs + **kwargs, ): self.puppeteer_request = puppeteer_request self.context_id = context_id @@ -41,7 +41,7 @@ def follow( action: Union[str, PuppeteerServiceAction], close_page=True, accumulate_meta: bool = False, - **kwargs + **kwargs, ) -> PuppeteerRequest: """ Execute action on the same browser page. @@ -67,7 +67,7 @@ def follow( context_id=self.context_id, page_id=page_id, close_page=close_page, - **kwargs + **kwargs, ) diff --git a/setup.py b/setup.py index a50dd96..638ce36 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -from setuptools import setup, find_packages +from setuptools import find_packages, setup def read_long_description(file_path): @@ -10,7 +10,7 @@ def read_long_description(file_path): setup( name="scrapy-puppeteer-client", - version="0.3.6", + version="0.3.7", description="A library to use Puppeteer-managed browser in Scrapy spiders", long_description=read_long_description("README.md"), long_description_content_type="text/markdown", diff --git a/tests/actions/constants.py b/tests/actions/constants.py index 3b64256..b3d6378 100644 --- a/tests/actions/constants.py +++ b/tests/actions/constants.py @@ -1,5 +1,5 @@ -from random import randint from itertools import combinations +from random import randint URLS = ("https://some_url.com", "not_url/not_url") WAIT_UNTIL = ("load", "domcontentloaded", "networkidle0") diff --git a/tests/actions/test_actions.py b/tests/actions/test_actions.py index cd3f448..3a76dcf 100644 --- a/tests/actions/test_actions.py +++ b/tests/actions/test_actions.py @@ -1,7 +1,9 @@ -from pytest import mark -from scrapypuppeteer.actions import GoTo, GoForward, GoBack, Click, Scroll from itertools import product -from constants import URLS, NAV_OPTS, WAIT_OPTS, SELECTORS, CLICK_OPTS, HAR_RECORDING + +from constants import CLICK_OPTS, HAR_RECORDING, NAV_OPTS, SELECTORS, URLS, WAIT_OPTS +from pytest import mark + +from scrapypuppeteer.actions import Click, GoBack, GoForward, GoTo, Scroll def _gen_goto(): diff --git a/tests/middleware/test_middleware.py b/tests/middleware/test_middleware.py index fa37c00..48b51fd 100644 --- a/tests/middleware/test_middleware.py +++ b/tests/middleware/test_middleware.py @@ -1,15 +1,16 @@ +from scrapy.utils.test import get_crawler +from twisted.internet import defer +from twisted.trial.unittest import TestCase + +from tests.mockserver import MockServer from tests.spiders import ( - GoToSpider, - GoBackForwardSpider, ClickSpider, - ScreenshotSpider, CustomJsActionSpider, + GoBackForwardSpider, + GoToSpider, RecaptchaSolverSpider, + ScreenshotSpider, ) -from tests.mockserver import MockServer -from twisted.trial.unittest import TestCase -from twisted.internet import defer -from scrapy.utils.test import get_crawler class PuppeteerCrawlTest(TestCase): diff --git a/tests/middleware/view.py b/tests/middleware/view.py index f8a6069..f3e3f9b 100644 --- a/tests/middleware/view.py +++ b/tests/middleware/view.py @@ -1,7 +1,5 @@ -from scrapy import Request - - import scrapy +from scrapy import Request class ViewSpider(scrapy.Spider): diff --git a/tests/mockserver.py b/tests/mockserver.py index 99f83c0..798127e 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -1,22 +1,20 @@ import argparse import os import sys - +from base64 import b64encode +from json import dumps from pathlib import Path +from secrets import token_hex from subprocess import PIPE, Popen from typing import Dict -from secrets import token_hex -from json import dumps -from base64 import b64encode +from scrapy.utils.python import to_bytes from twisted.internet import reactor from twisted.internet.protocol import ServerFactory from twisted.internet.task import deferLater from twisted.web import resource from twisted.web.server import NOT_DONE_YET, Site -from scrapy.utils.python import to_bytes - def get_arg(request, name, default=None, arg_type=None): if name in request.args: diff --git a/tests/spiders.py b/tests/spiders.py index dddcbc9..d23a101 100644 --- a/tests/spiders.py +++ b/tests/spiders.py @@ -1,13 +1,14 @@ from scrapy import Spider + from scrapypuppeteer import PuppeteerRequest from scrapypuppeteer.actions import ( - GoTo, - GoForward, - GoBack, Click, - Screenshot, CustomJsAction, + GoBack, + GoForward, + GoTo, RecaptchaSolver, + Screenshot, )