Skip to content

Commit

Permalink
Merge pull request #213 from scrapy-plugins/custom-attrs
Browse files Browse the repository at this point in the history
Handle custom attributes received in the API response.
  • Loading branch information
wRAR authored Sep 19, 2024
2 parents c1ba9c0 + b7e22e7 commit 47d3e1f
Show file tree
Hide file tree
Showing 8 changed files with 301 additions and 18 deletions.
2 changes: 1 addition & 1 deletion scrapy_zyte_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")

from ._annotations import ExtractFrom, actions
from ._annotations import ExtractFrom, actions, custom_attrs
from ._middlewares import (
ScrapyZyteAPIDownloaderMiddleware,
ScrapyZyteAPISpiderMiddleware,
Expand Down
26 changes: 23 additions & 3 deletions scrapy_zyte_api/_annotations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from enum import Enum
from typing import Iterable, List, Optional, TypedDict
from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Tuple, TypedDict


class ExtractFrom(str, Enum):
Expand Down Expand Up @@ -56,7 +56,8 @@ class _ActionResult(TypedDict, total=False):
error: Optional[str]


def make_hashable(obj):
def make_hashable(obj: Any) -> Any:
"""Converts input into hashable form, to use in ``Annotated``."""
if isinstance(obj, (tuple, list)):
return tuple((make_hashable(e) for e in obj))

Expand All @@ -66,7 +67,26 @@ def make_hashable(obj):
return obj


def actions(value: Iterable[Action]):
def _from_hashable(obj: Any) -> Any:
"""Converts a result of ``make_hashable`` back to original form."""
if isinstance(obj, tuple):
return [_from_hashable(o) for o in obj]

if isinstance(obj, frozenset):
return {_from_hashable(k): _from_hashable(v) for k, v in obj}

return obj


def actions(value: Iterable[Action]) -> Tuple[Any, ...]:
"""Convert an iterable of :class:`~scrapy_zyte_api.Action` dicts into a hashable value."""
# both lists and dicts are not hashable and we need dep types to be hashable
return tuple(make_hashable(action) for action in value)


def custom_attrs(
input: Dict[str, Any], options: Optional[Dict[str, Any]] = None
) -> Tuple[FrozenSet[Any], Optional[FrozenSet[Any]]]:
input_wrapped = make_hashable(input)
options_wrapped = make_hashable(options) if options else None
return input_wrapped, options_wrapped
45 changes: 35 additions & 10 deletions scrapy_zyte_api/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
AutoProductListPage,
AutoProductNavigationPage,
AutoProductPage,
CustomAttributes,
CustomAttributesMetadata,
CustomAttributesValues,
Item,
JobPosting,
Product,
Expand All @@ -35,7 +38,7 @@
from zyte_common_items.fields import is_auto_field

from scrapy_zyte_api import Actions, ExtractFrom, Geolocation, Screenshot
from scrapy_zyte_api._annotations import _ActionResult
from scrapy_zyte_api._annotations import _ActionResult, _from_hashable
from scrapy_zyte_api.responses import ZyteAPITextResponse

try:
Expand Down Expand Up @@ -76,6 +79,8 @@ class ZyteApiProvider(PageObjectInputProvider):
ArticleNavigation,
BrowserHtml,
BrowserResponse,
CustomAttributes,
CustomAttributesValues,
Geolocation,
JobPosting,
Product,
Expand Down Expand Up @@ -175,15 +180,14 @@ async def __call__( # noqa: C901
)
zyte_api_meta["actions"] = []
for action in cls.__metadata__[0]: # type: ignore[attr-defined]
zyte_api_meta["actions"].append(
{
k: (
dict(v)
if isinstance(v, frozenset)
else list(v) if isinstance(v, tuple) else v
)
for k, v in action
}
zyte_api_meta["actions"].append(_from_hashable(action))
continue
if cls_stripped in {CustomAttributes, CustomAttributesValues}:
custom_attrs_input, custom_attrs_options = cls.__metadata__[0] # type: ignore[attr-defined]
zyte_api_meta["customAttributes"] = _from_hashable(custom_attrs_input)
if custom_attrs_options:
zyte_api_meta["customAttributesOptions"] = _from_hashable(
custom_attrs_options
)
continue
kw = _ITEM_KEYWORDS.get(cls_stripped)
Expand Down Expand Up @@ -322,6 +326,27 @@ async def __call__( # noqa: C901
result = AnnotatedInstance(Actions(actions_result), cls.__metadata__) # type: ignore[attr-defined]
results.append(result)
continue
if cls_stripped is CustomAttributes and is_typing_annotated(cls):
custom_attrs_result = api_response.raw_api_response["customAttributes"]
result = AnnotatedInstance(
CustomAttributes(
CustomAttributesValues(custom_attrs_result["values"]),
CustomAttributesMetadata.from_dict(
custom_attrs_result["metadata"]
),
),
cls.__metadata__, # type: ignore[attr-defined]
)
results.append(result)
continue
if cls_stripped is CustomAttributesValues and is_typing_annotated(cls):
custom_attrs_result = api_response.raw_api_response["customAttributes"]
result = AnnotatedInstance(
CustomAttributesValues(custom_attrs_result["values"]),
cls.__metadata__, # type: ignore[attr-defined]
)
results.append(result)
continue
kw = _ITEM_KEYWORDS.get(cls_stripped)
if not kw:
continue
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def get_version():
"andi>=0.6.0",
"scrapy-poet>=0.22.3",
"web-poet>=0.17.0",
"zyte-common-items>=0.20.0",
"zyte-common-items>=0.23.0",
]
},
classifiers=[
Expand Down
11 changes: 11 additions & 0 deletions tests/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,17 @@ def render_POST(self, request):
"name"
] += f" (country {request_data['geolocation']})"

if "customAttributes" in request_data:
response_data["customAttributes"] = {
"metadata": {
"textInputTokens": 1000,
},
"values": {
"attr1": "foo",
"attr2": 42,
},
}

return json.dumps(response_data).encode()


Expand Down
110 changes: 110 additions & 0 deletions tests/test_annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import pytest

from scrapy_zyte_api._annotations import (
_from_hashable,
actions,
custom_attrs,
make_hashable,
)


@pytest.mark.parametrize(
"input,expected",
[
([], ()),
({}, frozenset()),
("foo", "foo"),
(["foo"], ("foo",)),
(42, 42),
(
{"action": "foo", "id": "xx"},
frozenset({("action", "foo"), ("id", "xx")}),
),
(
[{"action": "foo", "id": "xx"}, {"action": "bar"}],
(
frozenset({("action", "foo"), ("id", "xx")}),
frozenset({("action", "bar")}),
),
),
(
{"action": "foo", "options": {"a": "b", "c": ["d", "e"]}},
frozenset(
{
("action", "foo"),
("options", frozenset({("a", "b"), ("c", ("d", "e"))})),
}
),
),
],
)
def test_make_hashable(input, expected):
assert make_hashable(input) == expected


@pytest.mark.parametrize(
"input,expected",
[
((), []),
(frozenset(), {}),
("foo", "foo"),
(("foo",), ["foo"]),
(42, 42),
(
frozenset({("action", "foo"), ("id", "xx")}),
{"action": "foo", "id": "xx"},
),
(
(
frozenset({("action", "foo"), ("id", "xx")}),
frozenset({("action", "bar")}),
),
[{"action": "foo", "id": "xx"}, {"action": "bar"}],
),
(
frozenset(
{
("action", "foo"),
("options", frozenset({("a", "b"), ("c", ("d", "e"))})),
}
),
{"action": "foo", "options": {"a": "b", "c": ["d", "e"]}},
),
],
)
def test_from_hashable(input, expected):
assert _from_hashable(input) == expected


@pytest.mark.parametrize(
"input,expected",
[
([], ()),
([{}], (frozenset(),)),
(
[{"action": "foo"}, {"action": "bar"}],
(
frozenset({("action", "foo")}),
frozenset({("action", "bar")}),
),
),
],
)
def test_actions(input, expected):
assert actions(input) == expected


@pytest.mark.parametrize(
"input,options,expected",
[
({}, None, (frozenset(), None)),
({"foo": "bar"}, None, (frozenset({("foo", "bar")}), None)),
(
{"foo": "bar"},
{"tokens": 42},
(frozenset({("foo", "bar")}), frozenset({("tokens", 42)})),
),
],
)
def test_custom_attrs(input, options, expected):
assert custom_attrs(input, options) == expected
Loading

0 comments on commit 47d3e1f

Please sign in to comment.