Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle custom attributes received in the API response. #213

Merged
merged 13 commits into from
Sep 19, 2024
2 changes: 1 addition & 1 deletion scrapy_zyte_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")

from ._annotations import ExtractFrom, actions
from ._annotations import ExtractFrom, actions, custom_attrs
from ._middlewares import (
ScrapyZyteAPIDownloaderMiddleware,
ScrapyZyteAPISpiderMiddleware,
Expand Down
26 changes: 23 additions & 3 deletions scrapy_zyte_api/_annotations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from enum import Enum
from typing import Iterable, List, Optional, TypedDict
from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Tuple, TypedDict


class ExtractFrom(str, Enum):
Expand Down Expand Up @@ -56,7 +56,8 @@ class _ActionResult(TypedDict, total=False):
error: Optional[str]


def make_hashable(obj):
def make_hashable(obj: Any) -> Any:
"""Converts input into hashable form, to use in ``Annotated``."""
if isinstance(obj, (tuple, list)):
return tuple((make_hashable(e) for e in obj))

Expand All @@ -66,7 +67,26 @@ def make_hashable(obj):
return obj


def actions(value: Iterable[Action]):
def _from_hashable(obj: Any) -> Any:
"""Converts a result of ``make_hashable`` back to original form."""
if isinstance(obj, tuple):
return [_from_hashable(o) for o in obj]

if isinstance(obj, frozenset):
return {_from_hashable(k): _from_hashable(v) for k, v in obj}

return obj


def actions(value: Iterable[Action]) -> Tuple[Any, ...]:
"""Convert an iterable of :class:`~scrapy_zyte_api.Action` dicts into a hashable value."""
# both lists and dicts are not hashable and we need dep types to be hashable
return tuple(make_hashable(action) for action in value)


def custom_attrs(
input: Dict[str, Any], options: Optional[Dict[str, Any]] = None
) -> Tuple[FrozenSet[Any], Optional[FrozenSet[Any]]]:
input_wrapped = make_hashable(input)
options_wrapped = make_hashable(options) if options else None
return input_wrapped, options_wrapped
45 changes: 35 additions & 10 deletions scrapy_zyte_api/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
AutoProductListPage,
AutoProductNavigationPage,
AutoProductPage,
CustomAttributes,
CustomAttributesMetadata,
CustomAttributesValues,
Item,
JobPosting,
Product,
Expand All @@ -35,7 +38,7 @@
from zyte_common_items.fields import is_auto_field

from scrapy_zyte_api import Actions, ExtractFrom, Geolocation, Screenshot
from scrapy_zyte_api._annotations import _ActionResult
from scrapy_zyte_api._annotations import _ActionResult, _from_hashable
from scrapy_zyte_api.responses import ZyteAPITextResponse

try:
Expand Down Expand Up @@ -76,6 +79,8 @@
ArticleNavigation,
BrowserHtml,
BrowserResponse,
CustomAttributes,
CustomAttributesValues,
Geolocation,
JobPosting,
Product,
Expand Down Expand Up @@ -175,15 +180,14 @@
)
zyte_api_meta["actions"] = []
for action in cls.__metadata__[0]: # type: ignore[attr-defined]
zyte_api_meta["actions"].append(
{
k: (
dict(v)
if isinstance(v, frozenset)
else list(v) if isinstance(v, tuple) else v
)
for k, v in action
}
zyte_api_meta["actions"].append(_from_hashable(action))
continue

Check warning on line 184 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L183-L184

Added lines #L183 - L184 were not covered by tests
if cls_stripped in {CustomAttributes, CustomAttributesValues}:
custom_attrs_input, custom_attrs_options = cls.__metadata__[0] # type: ignore[attr-defined]
zyte_api_meta["customAttributes"] = _from_hashable(custom_attrs_input)

Check warning on line 187 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L186-L187

Added lines #L186 - L187 were not covered by tests
if custom_attrs_options:
zyte_api_meta["customAttributesOptions"] = _from_hashable(

Check warning on line 189 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L189

Added line #L189 was not covered by tests
custom_attrs_options
)
continue
kw = _ITEM_KEYWORDS.get(cls_stripped)
Expand Down Expand Up @@ -322,6 +326,27 @@
result = AnnotatedInstance(Actions(actions_result), cls.__metadata__) # type: ignore[attr-defined]
results.append(result)
continue
if cls_stripped is CustomAttributes and is_typing_annotated(cls):
custom_attrs_result = api_response.raw_api_response["customAttributes"]
result = AnnotatedInstance(

Check warning on line 331 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L330-L331

Added lines #L330 - L331 were not covered by tests
CustomAttributes(
CustomAttributesValues(custom_attrs_result["values"]),
CustomAttributesMetadata.from_dict(
custom_attrs_result["metadata"]
),
),
cls.__metadata__, # type: ignore[attr-defined]
)
results.append(result)
continue

Check warning on line 341 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L340-L341

Added lines #L340 - L341 were not covered by tests
if cls_stripped is CustomAttributesValues and is_typing_annotated(cls):
custom_attrs_result = api_response.raw_api_response["customAttributes"]
result = AnnotatedInstance(

Check warning on line 344 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L343-L344

Added lines #L343 - L344 were not covered by tests
CustomAttributesValues(custom_attrs_result["values"]),
cls.__metadata__, # type: ignore[attr-defined]
)
results.append(result)
continue

Check warning on line 349 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L348-L349

Added lines #L348 - L349 were not covered by tests
kw = _ITEM_KEYWORDS.get(cls_stripped)
if not kw:
continue
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def get_version():
"andi>=0.6.0",
"scrapy-poet>=0.22.3",
"web-poet>=0.17.0",
"zyte-common-items>=0.20.0",
"zyte-common-items>=0.23.0",
]
},
classifiers=[
Expand Down
11 changes: 11 additions & 0 deletions tests/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,17 @@ def render_POST(self, request):
"name"
] += f" (country {request_data['geolocation']})"

if "customAttributes" in request_data:
response_data["customAttributes"] = {
"metadata": {
"textInputTokens": 1000,
},
"values": {
"attr1": "foo",
"attr2": 42,
},
}

return json.dumps(response_data).encode()


Expand Down
110 changes: 110 additions & 0 deletions tests/test_annotations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import pytest

from scrapy_zyte_api._annotations import (
_from_hashable,
actions,
custom_attrs,
make_hashable,
)


@pytest.mark.parametrize(
"input,expected",
[
([], ()),
({}, frozenset()),
("foo", "foo"),
(["foo"], ("foo",)),
(42, 42),
(
{"action": "foo", "id": "xx"},
frozenset({("action", "foo"), ("id", "xx")}),
),
(
[{"action": "foo", "id": "xx"}, {"action": "bar"}],
(
frozenset({("action", "foo"), ("id", "xx")}),
frozenset({("action", "bar")}),
),
),
(
{"action": "foo", "options": {"a": "b", "c": ["d", "e"]}},
frozenset(
{
("action", "foo"),
("options", frozenset({("a", "b"), ("c", ("d", "e"))})),
}
),
),
],
)
def test_make_hashable(input, expected):
assert make_hashable(input) == expected


@pytest.mark.parametrize(
"input,expected",
[
((), []),
(frozenset(), {}),
("foo", "foo"),
(("foo",), ["foo"]),
(42, 42),
(
frozenset({("action", "foo"), ("id", "xx")}),
{"action": "foo", "id": "xx"},
),
(
(
frozenset({("action", "foo"), ("id", "xx")}),
frozenset({("action", "bar")}),
),
[{"action": "foo", "id": "xx"}, {"action": "bar"}],
),
(
frozenset(
{
("action", "foo"),
("options", frozenset({("a", "b"), ("c", ("d", "e"))})),
}
),
{"action": "foo", "options": {"a": "b", "c": ["d", "e"]}},
),
],
)
def test_from_hashable(input, expected):
assert _from_hashable(input) == expected


@pytest.mark.parametrize(
"input,expected",
[
([], ()),
([{}], (frozenset(),)),
(
[{"action": "foo"}, {"action": "bar"}],
(
frozenset({("action", "foo")}),
frozenset({("action", "bar")}),
),
),
],
)
def test_actions(input, expected):
assert actions(input) == expected


@pytest.mark.parametrize(
"input,options,expected",
[
({}, None, (frozenset(), None)),
({"foo": "bar"}, None, (frozenset({("foo", "bar")}), None)),
(
{"foo": "bar"},
{"tokens": 42},
(frozenset({("foo", "bar")}), frozenset({("tokens", 42)})),
),
],
)
def test_custom_attrs(input, options, expected):
assert custom_attrs(input, options) == expected
Loading