Skip to content

Commit

Permalink
expand_anchor_shorthand can be called with key of dict passed to schema
Browse files Browse the repository at this point in the history
  • Loading branch information
mscarey committed Jan 25, 2021
1 parent 0956fb7 commit 597cb57
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 22 deletions.
27 changes: 13 additions & 14 deletions anchorpoint/schemas.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Schema for serializing text selectors."""

from typing import Dict, List, Mapping, Optional, Sequence, TypedDict, Union
from typing import Dict, Mapping, Optional, Sequence, TypedDict, Union

from marshmallow import Schema, fields, pre_dump, post_load, pre_load

Expand Down Expand Up @@ -52,25 +52,22 @@ def get_real_start_and_end(self, obj, many=False):
obj.end = obj.real_end
return obj

def expand_anchor_shorthand(self, data: str) -> Mapping[str, str]:
def expand_anchor_shorthand(self, text: str) -> Mapping[str, str]:
"""
Convert input from shorthand format to normal selector format.
.. code-block:: python
>>> schema = SelectorSchema()
>>> schema.expand_anchor_shorthand("eats,|shoots,|and leaves")
{'exact': 'shoots,', 'prefix': 'eats,', 'suffix': 'and leaves'}
"""
result = {"text": data}
text = result.pop("text", None)
if text:
(
result["prefix"],
result["exact"],
result["suffix"],
) = TextQuoteSelector.split_anchor_text(text)
result = {}
(
result["prefix"],
result["exact"],
result["suffix"],
) = TextQuoteSelector.split_anchor_text(text)
return result

def convert_bool_to_dict(self, data: bool) -> Dict[str, int]:
Expand All @@ -82,13 +79,14 @@ def convert_bool_to_dict(self, data: bool) -> Dict[str, int]:

@pre_load
def preprocess_data(
self, data: Union[str, Mapping[str, Union[str, bool, int]]], **kwargs
self, data: Union[str, bool, Mapping[str, Union[str, bool, int]]], **kwargs
) -> Mapping[str, Union[str, bool, int]]:
if isinstance(data, bool):
return self.convert_bool_to_dict(data)
if isinstance(data, str):
return self.expand_anchor_shorthand(data)

if "text" in data.keys() and isinstance(data["text"], str):
return self.expand_anchor_shorthand(data["text"])
return data

@post_load
Expand Down Expand Up @@ -132,7 +130,8 @@ def from_selection(
return TextPositionSet()
if isinstance(selection, str):
schema = SelectorSchema()
selection = schema.load(selection)
data = schema.expand_anchor_shorthand(selection)
selection = schema.load(data)
if isinstance(selection, TextQuoteSelector):
selection = [selection]
elif isinstance(selection, TextPositionSelector):
Expand Down
2 changes: 1 addition & 1 deletion anchorpoint/textselectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import re
from dataclasses import dataclass

from typing import List, Optional, Sequence, Tuple, Union
from typing import List, Optional, Tuple, Union

from anchorpoint.textsequences import TextPassage, TextSequence
from anchorpoint.utils._helper import _is_iterable_non_string
Expand Down
6 changes: 3 additions & 3 deletions anchorpoint/textsequences.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Optional, Sequence, Union
from typing import List, Optional, Sequence, Union


class TextPassage:
Expand Down Expand Up @@ -56,7 +56,7 @@ class TextSequence(Sequence[Union[None, TextPassage]]):
to be part of the TextSequence.
"""

def __init__(self, passages: Sequence[Optional[TextPassage]] = None):
def __init__(self, passages: List[Optional[TextPassage]] = None):
self.passages = passages or []

def __repr__(self):
Expand Down Expand Up @@ -139,6 +139,6 @@ def means(self, other: TextSequence) -> bool:

zipped = zip(self_passages, other_passages)
return all(
(pair[0] is None and pair[1] is None) or pair[0].means(pair[1])
pair[0].means(pair[1]) if pair[0] is not None else pair[1] is None
for pair in zipped
)
29 changes: 25 additions & 4 deletions tests/test_text_sequence.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from tests.conftest import make_text
from typing import Text, Type
import pytest

from anchorpoint.textselectors import (
Expand Down Expand Up @@ -49,7 +47,7 @@ def test_blank_text_sequence_string(self):
assert str(sequence) == ""

def test_select_from_no_content(self):
selector_set = TextPositionSet([TextPositionSelector(start=5, end=10),])
selector_set = TextPositionSet([TextPositionSelector(start=5, end=10)])
sequence = selector_set.as_text_sequence("")
assert len(sequence) == 0
assert str(sequence) == ""
Expand Down Expand Up @@ -104,6 +102,29 @@ def test_same_meaning_regardless_of_leading_ellipsis(self, make_text):
assert passages_as_sequence.means(handcrafted_sequence)
assert not passages_as_sequence > handcrafted_sequence

def test_same_meaning_comparing_text_to_none(self):
first_sequence = TextSequence(
passages=[
None,
TextPassage("In no case does copyright protection"),
None,
TextPassage("extend to any idea"),
None,
]
)
second_sequence = TextSequence(
passages=[
None,
TextPassage("In no case does copyright protection"),
TextPassage("of a college memoir"),
TextPassage("extend to any idea"),
None,
]
)

assert not first_sequence.means(second_sequence)
assert second_sequence > first_sequence

def test_one_sequence_means_another(self, make_text):
passage = make_text["s102b"]
factory = TextPositionSetFactory(passage=passage)
Expand Down Expand Up @@ -200,7 +221,7 @@ def test_handle_Nones_at_beginning_and_end(self):
)

def test_add_without_Nones(self):
sequence = TextSequence(passages=[TextPassage("This is a full section."),])
sequence = TextSequence(passages=[TextPassage("This is a full section.")])
second_sequence = TextSequence(
passages=[TextPassage("This is the full immediately following section.")]
)
Expand Down

0 comments on commit 597cb57

Please sign in to comment.