expand_anchor_shorthand can be called with key of dict passed to schema

mscarey · Jan 25, 2021 · 597cb57 · 597cb57
1 parent 0956fb7
commit 597cb57
Show file tree

Hide file tree

Showing 4 changed files with 42 additions and 22 deletions.
diff --git a/anchorpoint/schemas.py b/anchorpoint/schemas.py
@@ -1,6 +1,6 @@
 """Schema for serializing text selectors."""
 
-from typing import Dict, List, Mapping, Optional, Sequence, TypedDict, Union
+from typing import Dict, Mapping, Optional, Sequence, TypedDict, Union
 
 from marshmallow import Schema, fields, pre_dump, post_load, pre_load
 
@@ -52,25 +52,22 @@ def get_real_start_and_end(self, obj, many=False):
             obj.end = obj.real_end
         return obj
 
-    def expand_anchor_shorthand(self, data: str) -> Mapping[str, str]:
+    def expand_anchor_shorthand(self, text: str) -> Mapping[str, str]:
         """
         Convert input from shorthand format to normal selector format.
 
-
         .. code-block:: python
 
             >>> schema = SelectorSchema()
             >>> schema.expand_anchor_shorthand("eats,|shoots,|and leaves")
             {'exact': 'shoots,', 'prefix': 'eats,', 'suffix': 'and leaves'}
         """
-        result = {"text": data}
-        text = result.pop("text", None)
-        if text:
-            (
-                result["prefix"],
-                result["exact"],
-                result["suffix"],
-            ) = TextQuoteSelector.split_anchor_text(text)
+        result = {}
+        (
+            result["prefix"],
+            result["exact"],
+            result["suffix"],
+        ) = TextQuoteSelector.split_anchor_text(text)
         return result
 
     def convert_bool_to_dict(self, data: bool) -> Dict[str, int]:
@@ -82,13 +79,14 @@ def convert_bool_to_dict(self, data: bool) -> Dict[str, int]:
 
     @pre_load
     def preprocess_data(
-        self, data: Union[str, Mapping[str, Union[str, bool, int]]], **kwargs
+        self, data: Union[str, bool, Mapping[str, Union[str, bool, int]]], **kwargs
     ) -> Mapping[str, Union[str, bool, int]]:
         if isinstance(data, bool):
             return self.convert_bool_to_dict(data)
         if isinstance(data, str):
             return self.expand_anchor_shorthand(data)
-
+        if "text" in data.keys() and isinstance(data["text"], str):
+            return self.expand_anchor_shorthand(data["text"])
         return data
 
     @post_load
@@ -132,7 +130,8 @@ def from_selection(
             return TextPositionSet()
         if isinstance(selection, str):
             schema = SelectorSchema()
-            selection = schema.load(selection)
+            data = schema.expand_anchor_shorthand(selection)
+            selection = schema.load(data)
         if isinstance(selection, TextQuoteSelector):
             selection = [selection]
         elif isinstance(selection, TextPositionSelector):

diff --git a/anchorpoint/textselectors.py b/anchorpoint/textselectors.py
@@ -10,7 +10,7 @@
 import re
 from dataclasses import dataclass
 
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
 from anchorpoint.textsequences import TextPassage, TextSequence
 from anchorpoint.utils._helper import _is_iterable_non_string

diff --git a/anchorpoint/textsequences.py b/anchorpoint/textsequences.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Optional, Sequence, Union
+from typing import List, Optional, Sequence, Union
 
 
 class TextPassage:
@@ -56,7 +56,7 @@ class TextSequence(Sequence[Union[None, TextPassage]]):
         to be part of the TextSequence.
     """
 
-    def __init__(self, passages: Sequence[Optional[TextPassage]] = None):
+    def __init__(self, passages: List[Optional[TextPassage]] = None):
         self.passages = passages or []
 
     def __repr__(self):
@@ -139,6 +139,6 @@ def means(self, other: TextSequence) -> bool:
 
         zipped = zip(self_passages, other_passages)
         return all(
-            (pair[0] is None and pair[1] is None) or pair[0].means(pair[1])
+            pair[0].means(pair[1]) if pair[0] is not None else pair[1] is None
             for pair in zipped
         )
diff --git a/tests/test_text_sequence.py b/tests/test_text_sequence.py
@@ -1,5 +1,3 @@
-from tests.conftest import make_text
-from typing import Text, Type
 import pytest
 
 from anchorpoint.textselectors import (
@@ -49,7 +47,7 @@ def test_blank_text_sequence_string(self):
         assert str(sequence) == ""
 
     def test_select_from_no_content(self):
-        selector_set = TextPositionSet([TextPositionSelector(start=5, end=10),])
+        selector_set = TextPositionSet([TextPositionSelector(start=5, end=10)])
         sequence = selector_set.as_text_sequence("")
         assert len(sequence) == 0
         assert str(sequence) == ""
@@ -104,6 +102,29 @@ def test_same_meaning_regardless_of_leading_ellipsis(self, make_text):
         assert passages_as_sequence.means(handcrafted_sequence)
         assert not passages_as_sequence > handcrafted_sequence
 
+    def test_same_meaning_comparing_text_to_none(self):
+        first_sequence = TextSequence(
+            passages=[
+                None,
+                TextPassage("In no case does copyright protection"),
+                None,
+                TextPassage("extend to any idea"),
+                None,
+            ]
+        )
+        second_sequence = TextSequence(
+            passages=[
+                None,
+                TextPassage("In no case does copyright protection"),
+                TextPassage("of a college memoir"),
+                TextPassage("extend to any idea"),
+                None,
+            ]
+        )
+
+        assert not first_sequence.means(second_sequence)
+        assert second_sequence > first_sequence
+
     def test_one_sequence_means_another(self, make_text):
         passage = make_text["s102b"]
         factory = TextPositionSetFactory(passage=passage)
@@ -200,7 +221,7 @@ def test_handle_Nones_at_beginning_and_end(self):
         )
 
     def test_add_without_Nones(self):
-        sequence = TextSequence(passages=[TextPassage("This is a full section."),])
+        sequence = TextSequence(passages=[TextPassage("This is a full section.")])
         second_sequence = TextSequence(
             passages=[TextPassage("This is the full immediately following section.")]
         )