diff --git a/scrapy_zyte_api/__init__.py b/scrapy_zyte_api/__init__.py index 88bec2fb..3fb7584e 100644 --- a/scrapy_zyte_api/__init__.py +++ b/scrapy_zyte_api/__init__.py @@ -5,7 +5,7 @@ install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor") -from ._annotations import ExtractFrom, actions +from ._annotations import ExtractFrom, actions, custom_attrs from ._middlewares import ( ScrapyZyteAPIDownloaderMiddleware, ScrapyZyteAPISpiderMiddleware, diff --git a/scrapy_zyte_api/_annotations.py b/scrapy_zyte_api/_annotations.py index 20336b59..fca9d7fb 100644 --- a/scrapy_zyte_api/_annotations.py +++ b/scrapy_zyte_api/_annotations.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Iterable, List, Optional, TypedDict +from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Tuple, TypedDict class ExtractFrom(str, Enum): @@ -56,7 +56,8 @@ class _ActionResult(TypedDict, total=False): error: Optional[str] -def make_hashable(obj): +def make_hashable(obj: Any) -> Any: + """Converts input into hashable form, to use in ``Annotated``.""" if isinstance(obj, (tuple, list)): return tuple((make_hashable(e) for e in obj)) @@ -66,7 +67,26 @@ def make_hashable(obj): return obj -def actions(value: Iterable[Action]): +def _from_hashable(obj: Any) -> Any: + """Converts a result of ``make_hashable`` back to original form.""" + if isinstance(obj, tuple): + return [_from_hashable(o) for o in obj] + + if isinstance(obj, frozenset): + return {_from_hashable(k): _from_hashable(v) for k, v in obj} + + return obj + + +def actions(value: Iterable[Action]) -> Tuple[Any, ...]: """Convert an iterable of :class:`~scrapy_zyte_api.Action` dicts into a hashable value.""" # both lists and dicts are not hashable and we need dep types to be hashable return tuple(make_hashable(action) for action in value) + + +def custom_attrs( + input: Dict[str, Any], options: Optional[Dict[str, Any]] = None +) -> Tuple[FrozenSet[Any], Optional[FrozenSet[Any]]]: + input_wrapped = make_hashable(input) + options_wrapped = make_hashable(options) if options else None + return input_wrapped, options_wrapped diff --git a/scrapy_zyte_api/providers.py b/scrapy_zyte_api/providers.py index 4042775c..23c37869 100644 --- a/scrapy_zyte_api/providers.py +++ b/scrapy_zyte_api/providers.py @@ -26,6 +26,9 @@ AutoProductListPage, AutoProductNavigationPage, AutoProductPage, + CustomAttributes, + CustomAttributesMetadata, + CustomAttributesValues, Item, JobPosting, Product, @@ -35,7 +38,7 @@ from zyte_common_items.fields import is_auto_field from scrapy_zyte_api import Actions, ExtractFrom, Geolocation, Screenshot -from scrapy_zyte_api._annotations import _ActionResult +from scrapy_zyte_api._annotations import _ActionResult, _from_hashable from scrapy_zyte_api.responses import ZyteAPITextResponse try: @@ -76,6 +79,8 @@ class ZyteApiProvider(PageObjectInputProvider): ArticleNavigation, BrowserHtml, BrowserResponse, + CustomAttributes, + CustomAttributesValues, Geolocation, JobPosting, Product, @@ -175,15 +180,14 @@ async def __call__( # noqa: C901 ) zyte_api_meta["actions"] = [] for action in cls.__metadata__[0]: # type: ignore[attr-defined] - zyte_api_meta["actions"].append( - { - k: ( - dict(v) - if isinstance(v, frozenset) - else list(v) if isinstance(v, tuple) else v - ) - for k, v in action - } + zyte_api_meta["actions"].append(_from_hashable(action)) + continue + if cls_stripped in {CustomAttributes, CustomAttributesValues}: + custom_attrs_input, custom_attrs_options = cls.__metadata__[0] # type: ignore[attr-defined] + zyte_api_meta["customAttributes"] = _from_hashable(custom_attrs_input) + if custom_attrs_options: + zyte_api_meta["customAttributesOptions"] = _from_hashable( + custom_attrs_options ) continue kw = _ITEM_KEYWORDS.get(cls_stripped) @@ -322,6 +326,27 @@ async def __call__( # noqa: C901 result = AnnotatedInstance(Actions(actions_result), cls.__metadata__) # type: ignore[attr-defined] results.append(result) continue + if cls_stripped is CustomAttributes and is_typing_annotated(cls): + custom_attrs_result = api_response.raw_api_response["customAttributes"] + result = AnnotatedInstance( + CustomAttributes( + CustomAttributesValues(custom_attrs_result["values"]), + CustomAttributesMetadata.from_dict( + custom_attrs_result["metadata"] + ), + ), + cls.__metadata__, # type: ignore[attr-defined] + ) + results.append(result) + continue + if cls_stripped is CustomAttributesValues and is_typing_annotated(cls): + custom_attrs_result = api_response.raw_api_response["customAttributes"] + result = AnnotatedInstance( + CustomAttributesValues(custom_attrs_result["values"]), + cls.__metadata__, # type: ignore[attr-defined] + ) + results.append(result) + continue kw = _ITEM_KEYWORDS.get(cls_stripped) if not kw: continue diff --git a/setup.py b/setup.py index ac2de981..05817c46 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ def get_version(): "andi>=0.6.0", "scrapy-poet>=0.22.3", "web-poet>=0.17.0", - "zyte-common-items>=0.20.0", + "zyte-common-items>=0.23.0", ] }, classifiers=[ diff --git a/tests/mockserver.py b/tests/mockserver.py index dc709dd5..7567a3f1 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -230,6 +230,17 @@ def render_POST(self, request): "name" ] += f" (country {request_data['geolocation']})" + if "customAttributes" in request_data: + response_data["customAttributes"] = { + "metadata": { + "textInputTokens": 1000, + }, + "values": { + "attr1": "foo", + "attr2": 42, + }, + } + return json.dumps(response_data).encode() diff --git a/tests/test_annotations.py b/tests/test_annotations.py new file mode 100644 index 00000000..a6537d00 --- /dev/null +++ b/tests/test_annotations.py @@ -0,0 +1,110 @@ +import pytest + +from scrapy_zyte_api._annotations import ( + _from_hashable, + actions, + custom_attrs, + make_hashable, +) + + +@pytest.mark.parametrize( + "input,expected", + [ + ([], ()), + ({}, frozenset()), + ("foo", "foo"), + (["foo"], ("foo",)), + (42, 42), + ( + {"action": "foo", "id": "xx"}, + frozenset({("action", "foo"), ("id", "xx")}), + ), + ( + [{"action": "foo", "id": "xx"}, {"action": "bar"}], + ( + frozenset({("action", "foo"), ("id", "xx")}), + frozenset({("action", "bar")}), + ), + ), + ( + {"action": "foo", "options": {"a": "b", "c": ["d", "e"]}}, + frozenset( + { + ("action", "foo"), + ("options", frozenset({("a", "b"), ("c", ("d", "e"))})), + } + ), + ), + ], +) +def test_make_hashable(input, expected): + assert make_hashable(input) == expected + + +@pytest.mark.parametrize( + "input,expected", + [ + ((), []), + (frozenset(), {}), + ("foo", "foo"), + (("foo",), ["foo"]), + (42, 42), + ( + frozenset({("action", "foo"), ("id", "xx")}), + {"action": "foo", "id": "xx"}, + ), + ( + ( + frozenset({("action", "foo"), ("id", "xx")}), + frozenset({("action", "bar")}), + ), + [{"action": "foo", "id": "xx"}, {"action": "bar"}], + ), + ( + frozenset( + { + ("action", "foo"), + ("options", frozenset({("a", "b"), ("c", ("d", "e"))})), + } + ), + {"action": "foo", "options": {"a": "b", "c": ["d", "e"]}}, + ), + ], +) +def test_from_hashable(input, expected): + assert _from_hashable(input) == expected + + +@pytest.mark.parametrize( + "input,expected", + [ + ([], ()), + ([{}], (frozenset(),)), + ( + [{"action": "foo"}, {"action": "bar"}], + ( + frozenset({("action", "foo")}), + frozenset({("action", "bar")}), + ), + ), + ], +) +def test_actions(input, expected): + assert actions(input) == expected + + +@pytest.mark.parametrize( + "input,options,expected", + [ + ({}, None, (frozenset(), None)), + ({"foo": "bar"}, None, (frozenset({("foo", "bar")}), None)), + ( + {"foo": "bar"}, + {"tokens": 42}, + (frozenset({("foo", "bar")}), frozenset({("tokens", 42)})), + ), + ], +) +def test_custom_attrs(input, options, expected): + assert custom_attrs(input, options) == expected diff --git a/tests/test_providers.py b/tests/test_providers.py index 74dd17cc..c5a935be 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -24,10 +24,24 @@ handle_urls, ) from web_poet.pages import get_item_cls -from zyte_common_items import AutoProductPage, BasePage, BaseProductPage, Product +from zyte_common_items import ( + AutoProductPage, + BasePage, + BaseProductPage, + CustomAttributes, + CustomAttributesValues, + Product, +) from zyte_common_items.fields import auto_field -from scrapy_zyte_api import Actions, ExtractFrom, Geolocation, Screenshot, actions +from scrapy_zyte_api import ( + Actions, + ExtractFrom, + Geolocation, + Screenshot, + actions, + custom_attrs, +) from scrapy_zyte_api.handler import ScrapyZyteAPIDownloadHandler from scrapy_zyte_api.providers import _AUTO_PAGES, _ITEM_KEYWORDS, ZyteApiProvider @@ -394,6 +408,109 @@ def parse_(self, response: DummyResponse, page: GeoProductPage): # type: ignore assert "Geolocation dependencies must be annotated" in caplog.text +custom_attrs_input = { + "attr1": {"type": "string", "description": "descr1"}, + "attr2": {"type": "number", "description": "descr2"}, +} + + +@pytest.mark.skipif( + sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9" +) +@pytest.mark.parametrize( + "annotation", + [ + custom_attrs(custom_attrs_input), + custom_attrs(custom_attrs_input, None), + custom_attrs(custom_attrs_input, {}), + custom_attrs(custom_attrs_input, {"foo": "bar"}), + ], +) +@ensureDeferred +async def test_provider_custom_attrs(mockserver, annotation): + from typing import Annotated + + @attrs.define + class CustomAttrsPage(BasePage): + product: Product + custom_attrs: Annotated[CustomAttributes, annotation] + + class CustomAttrsZyteAPISpider(ZyteAPISpider): + def parse_(self, response: DummyResponse, page: CustomAttrsPage): # type: ignore[override] + yield { + "product": page.product, + "custom_attrs": page.custom_attrs, + } + + settings = create_scrapy_settings() + settings["ZYTE_API_URL"] = mockserver.urljoin("/") + settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} + + item, url, _ = await crawl_single_item( + CustomAttrsZyteAPISpider, HtmlResource, settings + ) + assert item["product"] == Product.from_dict( + dict( + url=url, + name="Product name", + price="10", + currency="USD", + ) + ) + assert item["custom_attrs"] == CustomAttributes.from_dict( + { + "values": { + "attr1": "foo", + "attr2": 42, + }, + "metadata": {"textInputTokens": 1000}, + } + ) + + +@pytest.mark.skipif( + sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9" +) +@ensureDeferred +async def test_provider_custom_attrs_values(mockserver): + from typing import Annotated + + @attrs.define + class CustomAttrsPage(BasePage): + product: Product + custom_attrs: Annotated[ + CustomAttributesValues, + custom_attrs(custom_attrs_input), + ] + + class CustomAttrsZyteAPISpider(ZyteAPISpider): + def parse_(self, response: DummyResponse, page: CustomAttrsPage): # type: ignore[override] + yield { + "product": page.product, + "custom_attrs": page.custom_attrs, + } + + settings = create_scrapy_settings() + settings["ZYTE_API_URL"] = mockserver.urljoin("/") + settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} + + item, url, _ = await crawl_single_item( + CustomAttrsZyteAPISpider, HtmlResource, settings + ) + assert item["product"] == Product.from_dict( + dict( + url=url, + name="Product name", + price="10", + currency="USD", + ) + ) + assert item["custom_attrs"] == { + "attr1": "foo", + "attr2": 42, + } + + class RecordingHandler(ScrapyZyteAPIDownloadHandler): """Subclasses the original handler in order to record the Zyte API parameters used for each downloading request. diff --git a/tox.ini b/tox.ini index 086d7971..6d601b24 100644 --- a/tox.ini +++ b/tox.ini @@ -90,7 +90,7 @@ deps = andi==0.6.0 scrapy-poet==0.22.3 web-poet==0.17.0 - zyte-common-items==0.20.0 + zyte-common-items==0.23.0 [testenv:pinned-extra] basepython=python3.8