Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Formatter and linter #39

Merged
merged 19 commits into from
Sep 2, 2024
Merged
27 changes: 27 additions & 0 deletions .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Ruff Code Check

on: [push, pull_request]

jobs:
ruff:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.x"

- name: Install Ruff
run: |
pip install ruff

- name: Run Ruff Format
run: |
ruff format --check

- name: Run Ruff Check
run: |
ruff check .
8 changes: 8 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@ Examples of contributions include:

`Please formalize your pull request (PR)` you will get.

**Before each push or PR, run in the root directory of the project:**

```bash
ruff check

ruff format
```

---
# Code of Conduct

Expand Down
3 changes: 2 additions & 1 deletion examples/spiders/auto_recaptcha.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import base64
import logging

import scrapy
import base64
from twisted.python.failure import Failure

from scrapypuppeteer import PuppeteerRequest
Expand Down
8 changes: 5 additions & 3 deletions examples/spiders/fill_form.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import base64

import scrapy

from scrapypuppeteer import PuppeteerRequest, PuppeteerScreenshotResponse
from scrapypuppeteer.actions import Screenshot, FillForm
import base64
from scrapypuppeteer.actions import FillForm, Screenshot


class FormActionSpider(scrapy.Spider):
Expand Down Expand Up @@ -34,5 +36,5 @@ def screenshot(self, response):
@staticmethod
def make_screenshot(response: PuppeteerScreenshotResponse, **kwargs):
data = response.screenshot
with open(f"screenshot.png", "wb") as fh:
with open("screenshot.png", "wb") as fh:
fh.write(base64.b64decode(data))
1 change: 1 addition & 0 deletions examples/spiders/har.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import scrapy

from scrapypuppeteer import PuppeteerRequest
from scrapypuppeteer.actions import Har

Expand Down
5 changes: 3 additions & 2 deletions examples/spiders/manual_recaptcha.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import base64
import logging

import scrapy
import base64
from twisted.python.failure import Failure

from scrapypuppeteer import PuppeteerRequest
from scrapypuppeteer.actions import GoTo, RecaptchaSolver, Click, Screenshot
from scrapypuppeteer.actions import Click, GoTo, RecaptchaSolver, Screenshot
from scrapypuppeteer.response import PuppeteerResponse, PuppeteerScreenshotResponse


Expand Down
2 changes: 1 addition & 1 deletion examples/spiders/meduza.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import scrapy

from scrapypuppeteer import PuppeteerRequest, PuppeteerHtmlResponse
from scrapypuppeteer import PuppeteerHtmlResponse, PuppeteerRequest


class MeduzaSpider(scrapy.Spider):
Expand Down
5 changes: 2 additions & 3 deletions examples/spiders/webscraperio.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import scrapy

from scrapypuppeteer import PuppeteerRequest
from scrapypuppeteer.actions import GoTo, Scroll, Click
from scrapypuppeteer.actions import Click, GoTo, Scroll


class EcommerceSiteSpider(scrapy.Spider):

@staticmethod
def extract_items(list_page_response):
for item_selector in list_page_response.css("div.row div.thumbnail"):
Expand All @@ -29,7 +28,7 @@ def extract_item(detail_page_response):
"description": detail_page_response.css("p.description::text").get(),
"rating": len(detail_page_response.css("span.glyphicon-star")),
"reviews_count": int(
detail_page_response.css(".ratings::text").re_first("\d+")
detail_page_response.css(".ratings::text").re_first(r"\d+")
),
}

Expand Down
16 changes: 16 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[tool.ruff]
line-length = 88
fix = false
indent-width = 4

[tool.ruff.lint]
select = ["F", "C", "W", "I"]
ignore = ["E203", "E501", "F401", "C408", "F811", "N807"]

[tool.ruff.format]
indent-style = "space"
MatthewZMSU marked this conversation as resolved.
Show resolved Hide resolved
MatthewZMSU marked this conversation as resolved.
Show resolved Hide resolved
line-ending = "auto"
quote-style = "double"
skip-magic-trailing-comma = false
docstring-code-line-length = 88
docstring-code-format = true
24 changes: 12 additions & 12 deletions scrapypuppeteer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
from .actions import (
PuppeteerServiceAction,
GoTo,
GoForward,
GoBack,
Click,
Scroll,
Screenshot,
Har,
CustomJsAction,
FillForm,
GoBack,
GoForward,
GoTo,
Har,
PuppeteerServiceAction,
RecaptchaSolver,
CustomJsAction,
Screenshot,
Scroll,
)
from .request import PuppeteerRequest, CloseContextRequest
from .request import CloseContextRequest, PuppeteerRequest
from .response import (
PuppeteerResponse,
PuppeteerHtmlResponse,
PuppeteerScreenshotResponse,
PuppeteerRecaptchaSolverResponse,
PuppeteerJsonResponse,
PuppeteerRecaptchaSolverResponse,
PuppeteerResponse,
PuppeteerScreenshotResponse,
)
3 changes: 1 addition & 2 deletions scrapypuppeteer/actions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from abc import abstractmethod, ABC
from abc import ABC, abstractmethod


class PuppeteerServiceAction(ABC):

@property
@abstractmethod
def endpoint(self): ...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import base64
import uuid

from playwright.async_api import async_playwright
import syncer
from playwright.async_api import async_playwright

from scrapypuppeteer.browser_managers import BrowserManager
from scrapypuppeteer.request import CloseContextRequest, PuppeteerRequest
Expand All @@ -14,7 +14,6 @@


class ContextManager:

def __init__(self):
self.browser = syncer.sync(self.launch_browser())
self.contexts = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@
from pyppeteer import launch

from scrapypuppeteer.browser_managers import BrowserManager
from scrapypuppeteer.request import PuppeteerRequest, CloseContextRequest
from scrapypuppeteer.request import CloseContextRequest, PuppeteerRequest
from scrapypuppeteer.response import (
PuppeteerHtmlResponse,
PuppeteerScreenshotResponse,
)


class ContextManager:

def __init__(self):
self.browser = syncer.sync(launch())
self.contexts = {}
Expand Down Expand Up @@ -55,7 +54,6 @@ def close_contexts(self, request: CloseContextRequest):


class PyppeteerBrowserManager(BrowserManager):

def __init__(self):
self.context_manager = ContextManager()
self.action_map = {
Expand All @@ -72,7 +70,6 @@ def __init__(self):
}

def process_request(self, request):

if isinstance(request, PuppeteerRequest):
endpoint = request.action.endpoint
action_function = self.action_map.get(endpoint)
Expand Down
17 changes: 8 additions & 9 deletions scrapypuppeteer/browser_managers/service_browser_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,30 @@
from urllib.parse import urlencode, urljoin

from scrapy.exceptions import DontCloseSpider
from scrapy.http import Headers, TextResponse, Response
from scrapy.http import Headers, Response, TextResponse
from scrapy.utils.log import failure_to_exc_info
from twisted.python.failure import Failure

from scrapypuppeteer.actions import (
Click,
FillForm,
GoBack,
GoForward,
GoTo,
Har,
RecaptchaSolver,
Screenshot,
Scroll,
Har,
FillForm,
)
from scrapypuppeteer.browser_managers import BrowserManager
from scrapypuppeteer.request import ActionRequest, CloseContextRequest, PuppeteerRequest
from scrapypuppeteer.response import (
PuppeteerHtmlResponse,
PuppeteerScreenshotResponse,
PuppeteerHarResponse,
PuppeteerRecaptchaSolverResponse,
PuppeteerHtmlResponse,
PuppeteerJsonResponse,
PuppeteerRecaptchaSolverResponse,
PuppeteerScreenshotResponse,
)
from scrapypuppeteer.request import ActionRequest, PuppeteerRequest, CloseContextRequest
from scrapypuppeteer.browser_managers import BrowserManager


class ServiceBrowserManager(BrowserManager):
Expand All @@ -43,7 +43,6 @@ def __init__(self, service_base_url, include_meta, include_headers, crawler):
raise ValueError("Puppeteer service URL must be provided")

def process_request(self, request):

if isinstance(request, CloseContextRequest):
return self.process_close_context_request(request)

Expand Down
18 changes: 8 additions & 10 deletions scrapypuppeteer/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,29 @@
from scrapy.crawler import Crawler
from scrapy.exceptions import IgnoreRequest, NotConfigured


from scrapypuppeteer.actions import (
Click,
CustomJsAction,
RecaptchaSolver,
Screenshot,
Scroll,
CustomJsAction,
)
from scrapypuppeteer.response import (
PuppeteerResponse,
PuppeteerHtmlResponse,
from scrapypuppeteer.browser_managers import BrowserManager
from scrapypuppeteer.browser_managers.playwright_browser_manager import (
PlaywrightBrowserManager,
)
from scrapypuppeteer.request import ActionRequest, PuppeteerRequest, CloseContextRequest
from scrapypuppeteer.browser_managers.pyppeteer_browser_manager import (
PyppeteerBrowserManager,
)
from scrapypuppeteer.browser_managers.service_browser_manager import (
ServiceBrowserManager,
)
from scrapypuppeteer.browser_managers.playwright_browser_manager import (
PlaywrightBrowserManager,
from scrapypuppeteer.request import ActionRequest, CloseContextRequest, PuppeteerRequest
from scrapypuppeteer.response import (
PuppeteerHtmlResponse,
PuppeteerResponse,
)

from scrapypuppeteer.browser_managers import BrowserManager


class PuppeteerServiceDownloaderMiddleware:
"""
Expand Down
4 changes: 2 additions & 2 deletions scrapypuppeteer/request.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from typing import Tuple, List, Union
from typing import List, Tuple, Union

from scrapy.http import Request, Headers
from scrapy.http import Headers, Request

from scrapypuppeteer.actions import GoTo, PuppeteerServiceAction

Expand Down
6 changes: 3 additions & 3 deletions scrapypuppeteer/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(
puppeteer_request: PuppeteerRequest,
context_id: str,
page_id: str,
**kwargs
**kwargs,
):
self.puppeteer_request = puppeteer_request
self.context_id = context_id
Expand All @@ -41,7 +41,7 @@ def follow(
action: Union[str, PuppeteerServiceAction],
close_page=True,
accumulate_meta: bool = False,
**kwargs
**kwargs,
) -> PuppeteerRequest:
"""
Execute action on the same browser page.
Expand All @@ -67,7 +67,7 @@ def follow(
context_id=self.context_id,
page_id=page_id,
close_page=close_page,
**kwargs
**kwargs,
)


Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python

from setuptools import setup, find_packages
from setuptools import find_packages, setup


def read_long_description(file_path):
Expand All @@ -10,7 +10,7 @@ def read_long_description(file_path):

setup(
name="scrapy-puppeteer-client",
version="0.3.6",
version="0.3.7",
description="A library to use Puppeteer-managed browser in Scrapy spiders",
long_description=read_long_description("README.md"),
long_description_content_type="text/markdown",
Expand Down
2 changes: 1 addition & 1 deletion tests/actions/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from random import randint
from itertools import combinations
from random import randint

URLS = ("https://some_url.com", "not_url/not_url")
WAIT_UNTIL = ("load", "domcontentloaded", "networkidle0")
Expand Down
8 changes: 5 additions & 3 deletions tests/actions/test_actions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from pytest import mark
from scrapypuppeteer.actions import GoTo, GoForward, GoBack, Click, Scroll
from itertools import product
from constants import URLS, NAV_OPTS, WAIT_OPTS, SELECTORS, CLICK_OPTS, HAR_RECORDING

from constants import CLICK_OPTS, HAR_RECORDING, NAV_OPTS, SELECTORS, URLS, WAIT_OPTS
from pytest import mark

from scrapypuppeteer.actions import Click, GoBack, GoForward, GoTo, Scroll


def _gen_goto():
Expand Down
Loading
Loading