From 5a13a79e2eeca154f6d30afee8b0e34a328207b9 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 19 Aug 2024 15:40:54 +0500 Subject: [PATCH 01/12] Handle custom attributes received in the API response. --- scrapy_zyte_api/providers.py | 3 +++ setup.py | 3 ++- tests/mockserver.py | 6 ++++++ tests/test_providers.py | 27 +++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/scrapy_zyte_api/providers.py b/scrapy_zyte_api/providers.py index 4042775c..497c2f44 100644 --- a/scrapy_zyte_api/providers.py +++ b/scrapy_zyte_api/providers.py @@ -327,6 +327,9 @@ async def __call__( # noqa: C901 continue assert issubclass(cls_stripped, Item) result = cls_stripped.from_dict(api_response.raw_api_response[kw]) # type: ignore[attr-defined] + custom_attrs = api_response.raw_api_response.get("customAttributes") + if custom_attrs: + result.customAttributes = custom_attrs if is_typing_annotated(cls): result = AnnotatedInstance(result, cls.__metadata__) # type: ignore[attr-defined] results.append(result) diff --git a/setup.py b/setup.py index ac2de981..b5a5dfcd 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,8 @@ def get_version(): "andi>=0.6.0", "scrapy-poet>=0.22.3", "web-poet>=0.17.0", - "zyte-common-items>=0.20.0", + # https://github.com/zytedata/zyte-common-items/pull/100 + "zyte-common-items @ git+https://github.com/scrapinghub/zyte-common-items.git@custom-attrs", ] }, classifiers=[ diff --git a/tests/mockserver.py b/tests/mockserver.py index dc709dd5..e8b17d9b 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -230,6 +230,12 @@ def render_POST(self, request): "name" ] += f" (country {request_data['geolocation']})" + if "customAttributes" in request_data: + response_data["customAttributes"] = { + "attr1": "foo", + "attr2": 42, + } + return json.dumps(response_data).encode() diff --git a/tests/test_providers.py b/tests/test_providers.py index 74dd17cc..ef54b57e 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -394,6 +394,33 @@ def parse_(self, response: DummyResponse, page: GeoProductPage): # type: ignore assert "Geolocation dependencies must be annotated" in caplog.text +@ensureDeferred +async def test_provider_custom_attrs(mockserver): + settings = create_scrapy_settings() + settings["ZYTE_API_URL"] = mockserver.urljoin("/") + settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} + settings["ZYTE_API_PROVIDER_PARAMS"] = { + "customAttributes": { + "attr1": {"type": "string", "description": "descr1"}, + "attr2": {"type": "number", "description": "descr2"}, + } + } + + item, url, _ = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + assert item["product"] == Product.from_dict( + dict( + url=url, + name="Product name", + price="10", + currency="USD", + customAttributes={ + "attr1": "foo", + "attr2": 42, + }, + ) + ) + + class RecordingHandler(ScrapyZyteAPIDownloadHandler): """Subclasses the original handler in order to record the Zyte API parameters used for each downloading request. From 25569e68a514441aa6d61f93048ac778161c9341 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 19 Aug 2024 16:23:20 +0500 Subject: [PATCH 02/12] Fix the dep URL. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b5a5dfcd..5c317eda 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ def get_version(): "scrapy-poet>=0.22.3", "web-poet>=0.17.0", # https://github.com/zytedata/zyte-common-items/pull/100 - "zyte-common-items @ git+https://github.com/scrapinghub/zyte-common-items.git@custom-attrs", + "zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs", ] }, classifiers=[ From 4ab9691dcb9948b8cccbdb50b52d66cad85bb694 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 19 Aug 2024 18:06:39 +0500 Subject: [PATCH 03/12] Update field tests. --- tests/test_providers.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_providers.py b/tests/test_providers.py index ef54b57e..6fa54123 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -1184,9 +1184,9 @@ def parse(self, response: DummyResponse, product: Product): assert auto_field_stats == { "scrapy-zyte-api/auto_fields/tests.test_providers.test_auto_field_stats_partial_override..MyProductPage": ( "additionalProperties aggregateRating availability breadcrumbs " - "canonicalUrl color currency currencyRaw description descriptionHtml " - "features gtin images mainImage metadata mpn price productId " - "regularPrice size sku style url variants" + "canonicalUrl color currency currencyRaw customAttributes description " + "descriptionHtml features gtin images mainImage metadata mpn price " + "productId regularPrice size sku style url variants" ), } @@ -1424,9 +1424,9 @@ def parse(self, response: DummyResponse, page: MyProductPage): assert auto_field_stats == { "scrapy-zyte-api/auto_fields/tests.test_providers.test_auto_field_stats_item_page_override..MyProductPage": ( "additionalProperties aggregateRating availability breadcrumbs " - "canonicalUrl color currency currencyRaw description descriptionHtml " - "features gtin images mainImage metadata mpn price productId " - "regularPrice size sku style url variants" + "canonicalUrl color currency currencyRaw customAttributes description " + "descriptionHtml features gtin images mainImage metadata mpn price " + "productId regularPrice size sku style url variants" ), } @@ -1490,9 +1490,9 @@ def parse(self, response: DummyResponse, page: AltProductPage): assert auto_field_stats == { "scrapy-zyte-api/auto_fields/tests.test_providers.test_auto_field_stats_alt_page_override..MyProductPage": ( "additionalProperties aggregateRating availability breadcrumbs " - "canonicalUrl color currency currencyRaw description descriptionHtml " - "features gtin images mainImage metadata mpn price productId " - "regularPrice size sku style url variants" + "canonicalUrl color currency currencyRaw customAttributes description " + "descriptionHtml features gtin images mainImage metadata mpn price " + "productId regularPrice size sku style url variants" ), } From 928939981520473c3598b3621b131a252b621cfe Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 20 Aug 2024 13:13:52 +0500 Subject: [PATCH 04/12] More fixes. --- scrapy_zyte_api/providers.py | 2 +- tests/test_providers.py | 4 ++++ tox.ini | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scrapy_zyte_api/providers.py b/scrapy_zyte_api/providers.py index 497c2f44..6bf93507 100644 --- a/scrapy_zyte_api/providers.py +++ b/scrapy_zyte_api/providers.py @@ -329,7 +329,7 @@ async def __call__( # noqa: C901 result = cls_stripped.from_dict(api_response.raw_api_response[kw]) # type: ignore[attr-defined] custom_attrs = api_response.raw_api_response.get("customAttributes") if custom_attrs: - result.customAttributes = custom_attrs + result.customAttributes = custom_attrs # type: ignore[attr-defined] if is_typing_annotated(cls): result = AnnotatedInstance(result, cls.__metadata__) # type: ignore[attr-defined] results.append(result) diff --git a/tests/test_providers.py b/tests/test_providers.py index 6fa54123..f2052600 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -1240,6 +1240,10 @@ def currency(self): def currencyRaw(self): return self.product.currencyRaw + @field + def customAttributes(self): + return self.product.customAttributes + @field def description(self): return self.product.description diff --git a/tox.ini b/tox.ini index 086d7971..50019795 100644 --- a/tox.ini +++ b/tox.ini @@ -90,7 +90,7 @@ deps = andi==0.6.0 scrapy-poet==0.22.3 web-poet==0.17.0 - zyte-common-items==0.20.0 + zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs [testenv:pinned-extra] basepython=python3.8 From 35ff1ce1f649626354a664abdba70fb36ac236c4 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 20 Aug 2024 16:31:15 +0500 Subject: [PATCH 05/12] Don't include metadata of custom attributes. --- scrapy_zyte_api/providers.py | 2 +- tests/mockserver.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/scrapy_zyte_api/providers.py b/scrapy_zyte_api/providers.py index 6bf93507..148036c4 100644 --- a/scrapy_zyte_api/providers.py +++ b/scrapy_zyte_api/providers.py @@ -329,7 +329,7 @@ async def __call__( # noqa: C901 result = cls_stripped.from_dict(api_response.raw_api_response[kw]) # type: ignore[attr-defined] custom_attrs = api_response.raw_api_response.get("customAttributes") if custom_attrs: - result.customAttributes = custom_attrs # type: ignore[attr-defined] + result.customAttributes = custom_attrs.get("values", {}) # type: ignore[attr-defined] if is_typing_annotated(cls): result = AnnotatedInstance(result, cls.__metadata__) # type: ignore[attr-defined] results.append(result) diff --git a/tests/mockserver.py b/tests/mockserver.py index e8b17d9b..a6d67fef 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -232,8 +232,13 @@ def render_POST(self, request): if "customAttributes" in request_data: response_data["customAttributes"] = { - "attr1": "foo", - "attr2": 42, + "metadata": { + "totalInputTokens": "1000", + }, + "values": { + "attr1": "foo", + "attr2": 42, + }, } return json.dumps(response_data).encode() From 3f374f0341ab2d76cbd7ea8242aad4803433159f Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 17 Sep 2024 15:48:58 +0500 Subject: [PATCH 06/12] Switch the custom attrs implementation to a dependency. --- scrapy_zyte_api/providers.py | 40 +++++++++++- setup.py | 4 +- tests/mockserver.py | 2 +- tests/test_providers.py | 123 ++++++++++++++++++++++++++++------- tox.ini | 2 +- 5 files changed, 142 insertions(+), 29 deletions(-) diff --git a/scrapy_zyte_api/providers.py b/scrapy_zyte_api/providers.py index 148036c4..977a226b 100644 --- a/scrapy_zyte_api/providers.py +++ b/scrapy_zyte_api/providers.py @@ -26,6 +26,9 @@ AutoProductListPage, AutoProductNavigationPage, AutoProductPage, + CustomAttributes, + CustomAttributesMetadata, + CustomAttributesValues, Item, JobPosting, Product, @@ -76,6 +79,8 @@ class ZyteApiProvider(PageObjectInputProvider): ArticleNavigation, BrowserHtml, BrowserResponse, + CustomAttributes, + CustomAttributesValues, Geolocation, JobPosting, Product, @@ -186,6 +191,17 @@ async def __call__( # noqa: C901 } ) continue + if cls_stripped in {CustomAttributes, CustomAttributesValues}: + zyte_api_meta["customAttributes"] = { + k: ( + dict(v) + if isinstance(v, frozenset) + else list(v) if isinstance(v, tuple) else v + ) + for k, v in cls.__metadata__[0] # type: ignore[attr-defined] + } + + continue kw = _ITEM_KEYWORDS.get(cls_stripped) if not kw: continue @@ -322,14 +338,32 @@ async def __call__( # noqa: C901 result = AnnotatedInstance(Actions(actions_result), cls.__metadata__) # type: ignore[attr-defined] results.append(result) continue + if cls_stripped is CustomAttributes and is_typing_annotated(cls): + custom_attrs_result = api_response.raw_api_response["customAttributes"] + result = AnnotatedInstance( + CustomAttributes( + CustomAttributesValues(custom_attrs_result["values"]), + CustomAttributesMetadata.from_dict( + custom_attrs_result["metadata"] + ), + ), + cls.__metadata__, # type: ignore[attr-defined] + ) + results.append(result) + continue + if cls_stripped is CustomAttributesValues and is_typing_annotated(cls): + custom_attrs_result = api_response.raw_api_response["customAttributes"] + result = AnnotatedInstance( + CustomAttributesValues(custom_attrs_result["values"]), + cls.__metadata__, # type: ignore[attr-defined] + ) + results.append(result) + continue kw = _ITEM_KEYWORDS.get(cls_stripped) if not kw: continue assert issubclass(cls_stripped, Item) result = cls_stripped.from_dict(api_response.raw_api_response[kw]) # type: ignore[attr-defined] - custom_attrs = api_response.raw_api_response.get("customAttributes") - if custom_attrs: - result.customAttributes = custom_attrs.get("values", {}) # type: ignore[attr-defined] if is_typing_annotated(cls): result = AnnotatedInstance(result, cls.__metadata__) # type: ignore[attr-defined] results.append(result) diff --git a/setup.py b/setup.py index 5c317eda..24a184be 100644 --- a/setup.py +++ b/setup.py @@ -33,8 +33,8 @@ def get_version(): "andi>=0.6.0", "scrapy-poet>=0.22.3", "web-poet>=0.17.0", - # https://github.com/zytedata/zyte-common-items/pull/100 - "zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs", + # https://github.com/zytedata/zyte-common-items/pull/106 + "zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs-dep", ] }, classifiers=[ diff --git a/tests/mockserver.py b/tests/mockserver.py index a6d67fef..7567a3f1 100644 --- a/tests/mockserver.py +++ b/tests/mockserver.py @@ -233,7 +233,7 @@ def render_POST(self, request): if "customAttributes" in request_data: response_data["customAttributes"] = { "metadata": { - "totalInputTokens": "1000", + "textInputTokens": 1000, }, "values": { "attr1": "foo", diff --git a/tests/test_providers.py b/tests/test_providers.py index f2052600..c3d851e0 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -3,6 +3,8 @@ import pytest +from scrapy_zyte_api._annotations import make_hashable + pytest.importorskip("scrapy_poet") import attrs @@ -24,7 +26,14 @@ handle_urls, ) from web_poet.pages import get_item_cls -from zyte_common_items import AutoProductPage, BasePage, BaseProductPage, Product +from zyte_common_items import ( + AutoProductPage, + BasePage, + BaseProductPage, + CustomAttributes, + CustomAttributesValues, + Product, +) from zyte_common_items.fields import auto_field from scrapy_zyte_api import Actions, ExtractFrom, Geolocation, Screenshot, actions @@ -394,31 +403,105 @@ def parse_(self, response: DummyResponse, page: GeoProductPage): # type: ignore assert "Geolocation dependencies must be annotated" in caplog.text +@pytest.mark.skipif( + sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9" +) @ensureDeferred async def test_provider_custom_attrs(mockserver): + from typing import Annotated + + @attrs.define + class CustomAttrsPage(BasePage): + product: Product + custom_attrs: Annotated[ + CustomAttributes, + make_hashable( + { + "attr1": {"type": "string", "description": "descr1"}, + "attr2": {"type": "number", "description": "descr2"}, + } + ), + ] + + class CustomAttrsZyteAPISpider(ZyteAPISpider): + def parse_(self, response: DummyResponse, page: CustomAttrsPage): # type: ignore[override] + yield { + "product": page.product, + "custom_attrs": page.custom_attrs, + } + settings = create_scrapy_settings() settings["ZYTE_API_URL"] = mockserver.urljoin("/") settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} - settings["ZYTE_API_PROVIDER_PARAMS"] = { - "customAttributes": { - "attr1": {"type": "string", "description": "descr1"}, - "attr2": {"type": "number", "description": "descr2"}, - } - } - item, url, _ = await crawl_single_item(ZyteAPISpider, HtmlResource, settings) + item, url, _ = await crawl_single_item( + CustomAttrsZyteAPISpider, HtmlResource, settings + ) assert item["product"] == Product.from_dict( dict( url=url, name="Product name", price="10", currency="USD", - customAttributes={ + ) + ) + assert item["custom_attrs"] == CustomAttributes.from_dict( + { + "values": { "attr1": "foo", "attr2": 42, }, + "metadata": {"textInputTokens": 1000}, + } + ) + + +@pytest.mark.skipif( + sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9" +) +@ensureDeferred +async def test_provider_custom_attrs_values(mockserver): + from typing import Annotated + + @attrs.define + class CustomAttrsPage(BasePage): + product: Product + custom_attrs: Annotated[ + CustomAttributesValues, + make_hashable( + { + "attr1": {"type": "string", "description": "descr1"}, + "attr2": {"type": "number", "description": "descr2"}, + } + ), + ] + + class CustomAttrsZyteAPISpider(ZyteAPISpider): + def parse_(self, response: DummyResponse, page: CustomAttrsPage): # type: ignore[override] + yield { + "product": page.product, + "custom_attrs": page.custom_attrs, + } + + settings = create_scrapy_settings() + settings["ZYTE_API_URL"] = mockserver.urljoin("/") + settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0} + + item, url, _ = await crawl_single_item( + CustomAttrsZyteAPISpider, HtmlResource, settings + ) + assert item["product"] == Product.from_dict( + dict( + url=url, + name="Product name", + price="10", + currency="USD", ) ) + assert item["custom_attrs"] == { + "attr1": "foo", + "attr2": 42, + } class RecordingHandler(ScrapyZyteAPIDownloadHandler): @@ -1184,9 +1267,9 @@ def parse(self, response: DummyResponse, product: Product): assert auto_field_stats == { "scrapy-zyte-api/auto_fields/tests.test_providers.test_auto_field_stats_partial_override..MyProductPage": ( "additionalProperties aggregateRating availability breadcrumbs " - "canonicalUrl color currency currencyRaw customAttributes description " - "descriptionHtml features gtin images mainImage metadata mpn price " - "productId regularPrice size sku style url variants" + "canonicalUrl color currency currencyRaw description descriptionHtml " + "features gtin images mainImage metadata mpn price productId " + "regularPrice size sku style url variants" ), } @@ -1240,10 +1323,6 @@ def currency(self): def currencyRaw(self): return self.product.currencyRaw - @field - def customAttributes(self): - return self.product.customAttributes - @field def description(self): return self.product.description @@ -1428,9 +1507,9 @@ def parse(self, response: DummyResponse, page: MyProductPage): assert auto_field_stats == { "scrapy-zyte-api/auto_fields/tests.test_providers.test_auto_field_stats_item_page_override..MyProductPage": ( "additionalProperties aggregateRating availability breadcrumbs " - "canonicalUrl color currency currencyRaw customAttributes description " - "descriptionHtml features gtin images mainImage metadata mpn price " - "productId regularPrice size sku style url variants" + "canonicalUrl color currency currencyRaw description descriptionHtml " + "features gtin images mainImage metadata mpn price productId " + "regularPrice size sku style url variants" ), } @@ -1494,9 +1573,9 @@ def parse(self, response: DummyResponse, page: AltProductPage): assert auto_field_stats == { "scrapy-zyte-api/auto_fields/tests.test_providers.test_auto_field_stats_alt_page_override..MyProductPage": ( "additionalProperties aggregateRating availability breadcrumbs " - "canonicalUrl color currency currencyRaw customAttributes description " - "descriptionHtml features gtin images mainImage metadata mpn price " - "productId regularPrice size sku style url variants" + "canonicalUrl color currency currencyRaw description descriptionHtml " + "features gtin images mainImage metadata mpn price productId " + "regularPrice size sku style url variants" ), } diff --git a/tox.ini b/tox.ini index 50019795..6be23c3f 100644 --- a/tox.ini +++ b/tox.ini @@ -90,7 +90,7 @@ deps = andi==0.6.0 scrapy-poet==0.22.3 web-poet==0.17.0 - zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs + zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs-dep [testenv:pinned-extra] basepython=python3.8 From bcc9d30167de8d229afa18dc27b8a1597d2e5836 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Wed, 18 Sep 2024 21:51:04 +0500 Subject: [PATCH 07/12] Add support for customAttributesOptions. --- scrapy_zyte_api/_annotations.py | 24 +++++++++++++++++++--- scrapy_zyte_api/providers.py | 26 +++++++----------------- tests/test_providers.py | 36 +++++++++++++++++---------------- 3 files changed, 47 insertions(+), 39 deletions(-) diff --git a/scrapy_zyte_api/_annotations.py b/scrapy_zyte_api/_annotations.py index 20336b59..2bad453a 100644 --- a/scrapy_zyte_api/_annotations.py +++ b/scrapy_zyte_api/_annotations.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Iterable, List, Optional, TypedDict +from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Tuple, TypedDict class ExtractFrom(str, Enum): @@ -56,7 +56,7 @@ class _ActionResult(TypedDict, total=False): error: Optional[str] -def make_hashable(obj): +def make_hashable(obj: Any) -> Any: if isinstance(obj, (tuple, list)): return tuple((make_hashable(e) for e in obj)) @@ -66,7 +66,25 @@ def make_hashable(obj): return obj -def actions(value: Iterable[Action]): +def _from_hashable(obj: Any) -> Any: + if isinstance(obj, tuple): + return [_from_hashable(o) for o in obj] + + if isinstance(obj, frozenset): + return {_from_hashable(k): _from_hashable(v) for k, v in obj} + + return obj + + +def actions(value: Iterable[Action]) -> Tuple[Any, ...]: """Convert an iterable of :class:`~scrapy_zyte_api.Action` dicts into a hashable value.""" # both lists and dicts are not hashable and we need dep types to be hashable return tuple(make_hashable(action) for action in value) + + +def custom_attrs( + input: Dict[str, Any], options: Optional[Dict[str, Any]] = None +) -> Tuple[FrozenSet[Any], Optional[FrozenSet[Any]]]: + input_wrapped = make_hashable(input) + options_wrapped = make_hashable(options) if options else None + return input_wrapped, options_wrapped diff --git a/scrapy_zyte_api/providers.py b/scrapy_zyte_api/providers.py index 977a226b..23c37869 100644 --- a/scrapy_zyte_api/providers.py +++ b/scrapy_zyte_api/providers.py @@ -38,7 +38,7 @@ from zyte_common_items.fields import is_auto_field from scrapy_zyte_api import Actions, ExtractFrom, Geolocation, Screenshot -from scrapy_zyte_api._annotations import _ActionResult +from scrapy_zyte_api._annotations import _ActionResult, _from_hashable from scrapy_zyte_api.responses import ZyteAPITextResponse try: @@ -180,27 +180,15 @@ async def __call__( # noqa: C901 ) zyte_api_meta["actions"] = [] for action in cls.__metadata__[0]: # type: ignore[attr-defined] - zyte_api_meta["actions"].append( - { - k: ( - dict(v) - if isinstance(v, frozenset) - else list(v) if isinstance(v, tuple) else v - ) - for k, v in action - } - ) + zyte_api_meta["actions"].append(_from_hashable(action)) continue if cls_stripped in {CustomAttributes, CustomAttributesValues}: - zyte_api_meta["customAttributes"] = { - k: ( - dict(v) - if isinstance(v, frozenset) - else list(v) if isinstance(v, tuple) else v + custom_attrs_input, custom_attrs_options = cls.__metadata__[0] # type: ignore[attr-defined] + zyte_api_meta["customAttributes"] = _from_hashable(custom_attrs_input) + if custom_attrs_options: + zyte_api_meta["customAttributesOptions"] = _from_hashable( + custom_attrs_options ) - for k, v in cls.__metadata__[0] # type: ignore[attr-defined] - } - continue kw = _ITEM_KEYWORDS.get(cls_stripped) if not kw: diff --git a/tests/test_providers.py b/tests/test_providers.py index c3d851e0..10bef8cf 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -3,7 +3,7 @@ import pytest -from scrapy_zyte_api._annotations import make_hashable +from scrapy_zyte_api._annotations import custom_attrs pytest.importorskip("scrapy_poet") @@ -403,25 +403,32 @@ def parse_(self, response: DummyResponse, page: GeoProductPage): # type: ignore assert "Geolocation dependencies must be annotated" in caplog.text +custom_attrs_input = { + "attr1": {"type": "string", "description": "descr1"}, + "attr2": {"type": "number", "description": "descr2"}, +} + + @pytest.mark.skipif( sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9" ) +@pytest.mark.parametrize( + "annotation", + [ + custom_attrs(custom_attrs_input), + custom_attrs(custom_attrs_input, None), + custom_attrs(custom_attrs_input, {}), + custom_attrs(custom_attrs_input, {"foo": "bar"}), + ], +) @ensureDeferred -async def test_provider_custom_attrs(mockserver): +async def test_provider_custom_attrs(mockserver, annotation): from typing import Annotated @attrs.define class CustomAttrsPage(BasePage): product: Product - custom_attrs: Annotated[ - CustomAttributes, - make_hashable( - { - "attr1": {"type": "string", "description": "descr1"}, - "attr2": {"type": "number", "description": "descr2"}, - } - ), - ] + custom_attrs: Annotated[CustomAttributes, annotation] class CustomAttrsZyteAPISpider(ZyteAPISpider): def parse_(self, response: DummyResponse, page: CustomAttrsPage): # type: ignore[override] @@ -468,12 +475,7 @@ class CustomAttrsPage(BasePage): product: Product custom_attrs: Annotated[ CustomAttributesValues, - make_hashable( - { - "attr1": {"type": "string", "description": "descr1"}, - "attr2": {"type": "number", "description": "descr2"}, - } - ), + custom_attrs(custom_attrs_input), ] class CustomAttrsZyteAPISpider(ZyteAPISpider): From 0c5c907d837e62b18c1e9fb6b224a52ffdc707e8 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 19 Sep 2024 14:57:24 +0500 Subject: [PATCH 08/12] Add tests for annotation helpers. --- scrapy_zyte_api/_annotations.py | 2 + tests/test_annotations.py | 111 ++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 tests/test_annotations.py diff --git a/scrapy_zyte_api/_annotations.py b/scrapy_zyte_api/_annotations.py index 2bad453a..fca9d7fb 100644 --- a/scrapy_zyte_api/_annotations.py +++ b/scrapy_zyte_api/_annotations.py @@ -57,6 +57,7 @@ class _ActionResult(TypedDict, total=False): def make_hashable(obj: Any) -> Any: + """Converts input into hashable form, to use in ``Annotated``.""" if isinstance(obj, (tuple, list)): return tuple((make_hashable(e) for e in obj)) @@ -67,6 +68,7 @@ def make_hashable(obj: Any) -> Any: def _from_hashable(obj: Any) -> Any: + """Converts a result of ``make_hashable`` back to original form.""" if isinstance(obj, tuple): return [_from_hashable(o) for o in obj] diff --git a/tests/test_annotations.py b/tests/test_annotations.py new file mode 100644 index 00000000..95bf60f6 --- /dev/null +++ b/tests/test_annotations.py @@ -0,0 +1,111 @@ +import pytest + +from scrapy_zyte_api._annotations import ( + _from_hashable, + actions, + custom_attrs, + make_hashable, +) + + +@pytest.mark.parametrize( + "input,expected", + [ + ([], ()), + (set(), ()), + ({}, frozenset()), + ("foo", "foo"), + (["foo"], ("foo",)), + (42, 42), + ( + {"action": "foo", "id": "xx"}, + frozenset({("action", "foo"), ("id", "xx")}), + ), + ( + [{"action": "foo", "id": "xx"}, {"action": "bar"}], + ( + frozenset({("action", "foo"), ("id", "xx")}), + frozenset({("action", "bar")}), + ), + ), + ( + {"action": "foo", "options": {"a": "b", "c": ["d", "e"]}}, + frozenset( + { + ("action", "foo"), + ("options", frozenset({("a", "b"), ("c", ("d", "e"))})), + } + ), + ), + ], +) +def test_make_hashable(input, expected): + assert make_hashable(input) == expected + + +@pytest.mark.parametrize( + "input,expected", + [ + ((), []), + (frozenset(), {}), + ("foo", "foo"), + (("foo",), ["foo"]), + (42, 42), + ( + frozenset({("action", "foo"), ("id", "xx")}), + {"action": "foo", "id": "xx"}, + ), + ( + ( + frozenset({("action", "foo"), ("id", "xx")}), + frozenset({("action", "bar")}), + ), + [{"action": "foo", "id": "xx"}, {"action": "bar"}], + ), + ( + frozenset( + { + ("action", "foo"), + ("options", frozenset({("a", "b"), ("c", ("d", "e"))})), + } + ), + {"action": "foo", "options": {"a": "b", "c": ["d", "e"]}}, + ), + ], +) +def test_from_hashable(input, expected): + assert _from_hashable(input) == expected + + +@pytest.mark.parametrize( + "input,expected", + [ + ([], ()), + ([{}], (frozenset(),)), + ( + [{"action": "foo"}, {"action": "bar"}], + ( + frozenset({("action", "foo")}), + frozenset({("action", "bar")}), + ), + ), + ], +) +def test_actions(input, expected): + assert actions(input) == expected + + +@pytest.mark.parametrize( + "input,options,expected", + [ + ({}, None, (frozenset(), None)), + ({"foo": "bar"}, None, (frozenset({("foo", "bar")}), None)), + ( + {"foo": "bar"}, + {"tokens": 42}, + (frozenset({("foo", "bar")}), frozenset({("tokens", 42)})), + ), + ], +) +def test_custom_attrs(input, options, expected): + assert custom_attrs(input, options) == expected From 4f19d5de2133641884cab42deee867a95f9272e5 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 19 Sep 2024 14:59:12 +0500 Subject: [PATCH 09/12] Specify the zyte-common-items commit. --- setup.py | 2 +- tox.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 24a184be..0cf430da 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ def get_version(): "scrapy-poet>=0.22.3", "web-poet>=0.17.0", # https://github.com/zytedata/zyte-common-items/pull/106 - "zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs-dep", + "zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@9b8cf84", ] }, classifiers=[ diff --git a/tox.ini b/tox.ini index 6be23c3f..d5a3e4be 100644 --- a/tox.ini +++ b/tox.ini @@ -90,7 +90,7 @@ deps = andi==0.6.0 scrapy-poet==0.22.3 web-poet==0.17.0 - zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs-dep + zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@9b8cf84 [testenv:pinned-extra] basepython=python3.8 From 3d1501c40b2abe230ed69ac842b81bb074d3036a Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 19 Sep 2024 15:26:02 +0500 Subject: [PATCH 10/12] Update to the released zyte-common-items. --- setup.py | 3 +-- tox.ini | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 0cf430da..05817c46 100644 --- a/setup.py +++ b/setup.py @@ -33,8 +33,7 @@ def get_version(): "andi>=0.6.0", "scrapy-poet>=0.22.3", "web-poet>=0.17.0", - # https://github.com/zytedata/zyte-common-items/pull/106 - "zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@9b8cf84", + "zyte-common-items>=0.23.0", ] }, classifiers=[ diff --git a/tox.ini b/tox.ini index d5a3e4be..6d601b24 100644 --- a/tox.ini +++ b/tox.ini @@ -90,7 +90,7 @@ deps = andi==0.6.0 scrapy-poet==0.22.3 web-poet==0.17.0 - zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@9b8cf84 + zyte-common-items==0.23.0 [testenv:pinned-extra] basepython=python3.8 From ceb67a7c8e1b6144d88e0fa2fd080cd667052d55 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 19 Sep 2024 15:26:51 +0500 Subject: [PATCH 11/12] Fix tests. --- tests/test_annotations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_annotations.py b/tests/test_annotations.py index 95bf60f6..a6537d00 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -12,7 +12,6 @@ "input,expected", [ ([], ()), - (set(), ()), ({}, frozenset()), ("foo", "foo"), (["foo"], ("foo",)), From b7e22e7bbf53ee5b61663e8f813fadf661f7c0ff Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 19 Sep 2024 18:13:04 +0500 Subject: [PATCH 12/12] Export ._annotations.custom_attrs. --- scrapy_zyte_api/__init__.py | 2 +- tests/test_providers.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/scrapy_zyte_api/__init__.py b/scrapy_zyte_api/__init__.py index 88bec2fb..3fb7584e 100644 --- a/scrapy_zyte_api/__init__.py +++ b/scrapy_zyte_api/__init__.py @@ -5,7 +5,7 @@ install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor") -from ._annotations import ExtractFrom, actions +from ._annotations import ExtractFrom, actions, custom_attrs from ._middlewares import ( ScrapyZyteAPIDownloaderMiddleware, ScrapyZyteAPISpiderMiddleware, diff --git a/tests/test_providers.py b/tests/test_providers.py index 10bef8cf..c5a935be 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -3,8 +3,6 @@ import pytest -from scrapy_zyte_api._annotations import custom_attrs - pytest.importorskip("scrapy_poet") import attrs @@ -36,7 +34,14 @@ ) from zyte_common_items.fields import auto_field -from scrapy_zyte_api import Actions, ExtractFrom, Geolocation, Screenshot, actions +from scrapy_zyte_api import ( + Actions, + ExtractFrom, + Geolocation, + Screenshot, + actions, + custom_attrs, +) from scrapy_zyte_api.handler import ScrapyZyteAPIDownloadHandler from scrapy_zyte_api.providers import _AUTO_PAGES, _ITEM_KEYWORDS, ZyteApiProvider