diff --git a/dol/__init__.py b/dol/__init__.py index e9038c69..bb34bf08 100644 --- a/dol/__init__.py +++ b/dol/__init__.py @@ -130,7 +130,7 @@ def ihead(store, n=1): ) from dol.paths import ( - StringTemplate, # express strings, tuples, and dict keys from a string template + KeyTemplate, # express strings, tuples, and dict keys from a string template mk_relative_path_store, # transform path store into relative path store KeyPath, # a class to represent a path to a key path_get, # get a value from a path diff --git a/dol/kv_codecs.py b/dol/kv_codecs.py index fdbbb2f7..2734da04 100644 --- a/dol/kv_codecs.py +++ b/dol/kv_codecs.py @@ -1,11 +1,9 @@ # ------------------------------------ Codecs ------------------------------------------ from functools import partial -from dataclasses import dataclass -from typing import TypeVar, Generic, Callable, Iterable, Any, Optional +from typing import Callable, Iterable, Any, Optional -from dol.trans import wrap_kvs -from dol.util import Pipe +from dol.trans import Codec, ValueCodec, KeyCodec from dol.signatures import Sig # For the codecs: @@ -166,36 +164,6 @@ def _xml_tree_decode( ) -EncodedType = TypeVar('EncodedType') -DecodedType = TypeVar('DecodedType') - - -# TODO: Want a way to specify Encoded type and Decoded type -@dataclass -class Codec(Generic[DecodedType, EncodedType]): - encoder: Callable[[DecodedType], EncodedType] - decoder: Callable[[EncodedType], DecodedType] - - def __iter__(self): - return iter((self.encoder, self.decoder)) - - def __add__(self, other): - return Codec( - encoder=Pipe(self.encoder, other.encoder), - decoder=Pipe(other.decoder, self.decoder), - ) - - -class ValueCodec(Codec): - def __call__(self, obj): - return wrap_kvs(obj, data_of_obj=self.encoder, obj_of_data=self.decoder) - - -class KeyCodec(Codec): - def __call__(self, obj): - return wrap_kvs(obj, id_of_key=self.encoder, key_of_id=self.decoder) - - def extract_arguments(func, args, kwargs): return Sig(func).kwargs_from_args_and_kwargs( args, kwargs, allow_partial=True, allow_excess=True, ignore_kind=True @@ -316,7 +284,18 @@ def is_value_codec(attr_val): xml_etree: Codec[Any, bytes] = value_wrap(_xml_tree_encode, _xml_tree_decode) +from dol.paths import KeyTemplate + + class KeyCodecs: """ A collection of key codecs """ + + def without_suffix(suffix: str): + st = KeyTemplate('{}' + f"{suffix}") + return KeyCodec(st.simple_str_to_str, st.str_to_simple_str) + + # def suffixed(suffix: str, field_type: FieldTypeNames = 'simple_str'): + # st = StringTemplate('{}' + f"{suffix}") + # return KeyCodec(st. diff --git a/dol/paths.py b/dol/paths.py index 55571bfd..cad38b05 100644 --- a/dol/paths.py +++ b/dol/paths.py @@ -1156,7 +1156,9 @@ def identity(x): return x -Codec = namedtuple('Codec', 'encoder decoder') +from dol.trans import KeyCodec, filt_iter + +# Codec = namedtuple('Codec', 'encoder decoder') FieldTypeNames = Literal['str', 'dict', 'tuple', 'namedtuple', 'simple_str'] @@ -1167,43 +1169,115 @@ def identity(x): # It's a "path finder" meshed pattern. # TODO: Do we really want to allow field_patterns to be included in the template (the `{name:pattern}` options)? # Normally, this is used for string GENERATION as `{name:format}`, which is still useful for us here too. -# The counter argument is that the main usage of StringTemplate is not actually +# The counter argument is that the main usage of KeyTemplate is not actually # generation, but extraction. Further, the format language is not as general as simply # using a format_field = {field: cast_function, ...} argument. # My decision would be to remove any use of the `{name:X}` form in the base class, # and have classmethods specialized for short-hand versions that use `name:regex` or # `name:format`, ... -class StringTemplate: - """A class for parsing and generating strings based on a template. +class KeyTemplate: + """A class for parsing and generating keys based on a template. Args: template: A template string with fields to be extracted or filled in. field_patterns: A dictionary of field names and their regex patterns. simple_str_sep: A separator string for simple strings (i.e. strings without fields). + namedtuple_type_name: The name of the namedtuple type to use for namedtuple + fields. + dflt_pattern: The default pattern to use for fields that don't have a pattern + specified. + to_str_funcs: A dictionary of field names and their functions to convert them + to strings. + from_str_funcs: A dictionary of field names and their functions to convert + them from strings. Examples: - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} - ... ) - >>> st.str_to_dict("Alice is 30 years old.") - {'name': 'Alice', 'age': '30'} - >>> st.dict_to_str({'name': 'Alice', 'age': '30'}) - 'Alice is 30 years old.' - >>> st.dict_to_tuple({'name': 'Alice', 'age': '30'}) - ('Alice', '30') - >>> st.tuple_to_dict(('Alice', '30')) - {'name': 'Alice', 'age': '30'} - >>> st.str_to_tuple("Alice is 30 years old.") - ('Alice', '30') - - You can also ask any (handled) combination of field types: - >>> coder, encoder = st.codec('tuple', 'dict') - >>> coder(('Alice', '30')) - {'name': 'Alice', 'age': '30'} - >>> encoder({'name': 'Alice', 'age': '30'}) - ('Alice', '30') + + >>> st = KeyTemplate( + ... 'root/{name}/v_{version}.json', + ... field_patterns={'version': r'\d+'}, + ... from_str_funcs={'version': int}, + ... ) + + And now you have a template that can be used to convert between various + representations of the template: You can extract fields from strings, generate + strings from fields, etc. + + >>> st.str_to_dict("root/dol/v_9.json") + {'name': 'dol', 'version': 9} + >>> st.dict_to_str({'name': 'meshed', 'version': 42}) + 'root/meshed/v_42.json' + >>> st.dict_to_tuple({'name': 'meshed', 'version': 42}) + ('meshed', 42) + >>> st.tuple_to_dict(('i2', 96)) + {'name': 'i2', 'version': 96} + >>> st.str_to_tuple("root/dol/v_9.json") + ('dol', 9) + >>> st.tuple_to_str(('front', 11)) + 'root/front/v_11.json' + >>> st.str_to_namedtuple("root/dol/v_9.json") + NamedTuple(name='dol', version=9) + >>> st.str_to_simple_str("root/dol/v_9.json") + 'dol,9' + >>> st.str_to_simple_str("root/dol/v_9.json", sep='/') + 'dol/9' + + + With ``st.key_codec``, you can make a ``KeyCodec`` for the given source and target + types. A `key_codec` is a codec; it has an encoder and a decoder. + + >>> key_codec = st.key_codec('tuple', 'str') + >>> encoder, decoder = key_codec + >>> decoder('root/dol/v_9.json') + ('dol', 9) + >>> encoder(('dol', 9)) + 'root/dol/v_9.json' + + If you have a ``Mapping``, you can use ``key_codec`` as a decorator to wrap + the mapping with a key mappings. + + >>> store = { + ... 'root/meshed/v_151.json': '{"downloads": 41, "type": "productivity"}', + ... 'root/dol/v_9.json': '{"downloads": 132, "type": "utility"}', + ... } + >>> + >>> accessor = key_codec(store) + >>> list(accessor) + [('meshed', 151), ('dol', 9)] + >>> accessor['i2', 4] = '{"downloads": 274, "type": "utility"}' + >>> list(store) + ['root/meshed/v_151.json', 'root/dol/v_9.json', 'root/i2/v_4.json'] + >>> store['root/i2/v_4.json'] + '{"downloads": 274, "type": "utility"}' + + Note: If your store contains keys that don't fit the format, key_codec will + raise a ``ValueError``. To remedy this, you can use the ``st.filt_iter`` to + filter out keys that don't fit the format, before you wrap the store with + ``st.key_codec``. + + >>> store = { + ... 'root/meshed/v_151.json': '{"downloads": 41, "type": "productivity"}', + ... 'root/dol/v_9.json': '{"downloads": 132, "type": "utility"}', + ... 'root/not/the/right/format': "something else" + ... } + >>> accessor = st.filt_iter('str')(store) + >>> list(accessor) + ['root/meshed/v_151.json', 'root/dol/v_9.json'] + >>> accessor = st.key_codec('tuple', 'str')(st.filt_iter('str')(store)) + >>> list(accessor) + [('meshed', 151), ('dol', 9)] + >>> accessor['dol', 9] + '{"downloads": 132, "type": "utility"}' + + You can also ask any (handled) combination of field types: + + >>> key_codec = st.key_codec('tuple', 'dict') + >>> key_codec.encoder(('i2', 96)) + {'name': 'i2', 'version': 96} + >>> key_codec.decoder({'name': 'fantastic', 'version': 4}) + ('fantastic', 4) + """ _formatter = string_formatter @@ -1218,11 +1292,13 @@ def __init__( simple_str_sep: str = ',', namedtuple_type_name: str = 'NamedTuple', dflt_pattern: str = '.*', + dflt_field_name: Callable[[str], str] = 'i{:02.0f}_'.format, ): self._original_template = template self.simple_str_sep = simple_str_sep self.namedtuple_type_name = namedtuple_type_name self.dflt_pattern = dflt_pattern + self.dflt_field_name = dflt_field_name ( self.template, @@ -1244,155 +1320,79 @@ def __init__( ) self.regex = self._compile_regex(self.template) - def _extract_template_info(self, template): - r"""Extracts information from the template. Namely: - - - normalized_template: A template where each placeholder has a field name - (if not given, "_{index}" will be used) - - - field_names: The tuple of field names in the order they appear in template - - - to_str_funcs: A dict of field names and their corresponding to_str functions, - which will be used to convert the field values to strings when generating a - string. - - - field_patterns_: A dict of field names and their corresponding regex patterns, - which will be used to extract the field values from a string. - - These four values are used in the init to compute the parameters of the - instance. - - >>> st = StringTemplate('{:02.0f}/{name::\w+}') - >>> st.template - '{_1}/{name}' - >>> st.field_names - ('_1', 'name') - >>> st.field_patterns - {'_1': '.*', 'name': '\\w+'} - >>> st.regex.pattern - '(?P<_1>.*)/(?P\\w+)' - >>> to_str_funcs = st.to_str_funcs - >>> to_str_funcs['_1'](3) - '03' - >>> to_str_funcs['name']('Alice') - 'Alice' - - """ - - field_names = [] - field_patterns_ = {} - to_str_funcs = {} - - def parse_and_transform(): - for index, (literal_text, field_name, format_spec, conversion) in enumerate( - self._formatter.parse(template), 1 - ): - field_name = f"_{index}" if field_name == '' else field_name - if field_name is not None: - field_names.append(field_name) # remember the field name - # extract format and pattern information: - if ':' not in format_spec: - format_spec += ':' - to_str_func_format, pattern = format_spec.split(':') - if to_str_func_format: - to_str_funcs[field_name] = ( - '{' + f":{to_str_func_format}" + '}' - ).format - field_patterns_[field_name] = pattern or self.dflt_pattern - # At this point you should have a valid field_name and empty format_spec - yield ( - literal_text, - field_name, - '', - conversion, - ) - - normalized_template = string_unparse(parse_and_transform()) - return normalized_template, tuple(field_names), to_str_funcs, field_patterns_ - - def _compile_regex(self, template): - r"""Parses the template, generating regex for matching the template. - Essentially, it weaves together the literal text parts and the format_specs - parts, transformed into name-caputuring regex patterns. + def key_codec(self, source: FieldTypeNames, target: FieldTypeNames): + r"""Makes a ``KeyCodec`` for the given source and target types. - Note that the literal text parts are regex-escaped so that they are not - interpreted as regex. For example, if the template is "{name}.txt", the - literal text part is replaced with "\\.txt", to avoid that the "." is - interpreted as a regex wildcard. This would otherwise match any character. - Instead, the escaped dot is matched literally. - See https://docs.python.org/3/library/re.html#re.escape for more information. - - >>> StringTemplate('{}.ext').regex.pattern - '(?P<_1>.*)\\.ext' - >>> StringTemplate('{name}.ext').regex.pattern - '(?P.*)\\.ext' - >>> StringTemplate('{::\w+}.ext').regex.pattern - '(?P<_1>\\w+)\\.ext' - >>> StringTemplate('{name::\w+}.ext').regex.pattern - '(?P\\w+)\\.ext' - >>> StringTemplate('{:0.02f:\w+}.ext').regex.pattern - '(?P<_1>\\w+)\\.ext' - >>> StringTemplate('{name:0.02f:\w+}.ext').regex.pattern - '(?P\\w+)\\.ext' - """ - - def mk_named_capture_group(field_name): - if field_name: - return f"(?P<{field_name}>{self.field_patterns[field_name]})" - else: - return "" - - def generate_pattern_parts(template): - parts = self._formatter.parse(template) - for literal_text, field_name, _, _ in parts: - yield re.escape(literal_text) + mk_named_capture_group(field_name) - - return re.compile(''.join(generate_pattern_parts(template))) - - @staticmethod - def _assert_field_type(field_type: FieldTypeNames, name='field_type'): - if field_type not in FieldTypeNames.__args__: - raise ValueError( - f"{name} must be one of {FieldTypeNames}. Was: {field_type}" - ) + >>> st = KeyTemplate( + ... 'root/{name}/v_{version}.json', + ... field_patterns={'version': r'\d+'}, + ... from_str_funcs={'version': int}, + ... ) - def codec(self, source: FieldTypeNames, target: FieldTypeNames): - """Makes a ``(coder, decoder)`` pair for the given source and target types. + A `key_codec` is a codec; it has an encoder and a decoder. + + >>> key_codec = st.key_codec('tuple', 'str') + >>> encoder, decoder = key_codec + >>> decoder('root/dol/v_9.json') + ('dol', 9) + >>> encoder(('dol', 9)) + 'root/dol/v_9.json' + + If you have a ``Mapping``, you can use ``key_codec`` as a decorator to wrap + the mapping with a key mappings. + + >>> store = { + ... 'root/meshed/v_151.json': '{"downloads": 41, "type": "productivity"}', + ... 'root/dol/v_9.json': '{"downloads": 132, "type": "utility"}', + ... } + >>> + >>> accessor = key_codec(store) + >>> list(accessor) + [('meshed', 151), ('dol', 9)] + >>> accessor['i2', 4] = '{"downloads": 274, "type": "utility"}' + >>> list(store) + ['root/meshed/v_151.json', 'root/dol/v_9.json', 'root/i2/v_4.json'] + >>> store['root/i2/v_4.json'] + '{"downloads": 274, "type": "utility"}' + + Note: If your store contains keys that don't fit the format, key_codec will + raise a ``ValueError``. To remedy this, you can use the ``st.filt_iter`` to + filter out keys that don't fit the format, before you wrap the store with + ``st.key_codec``. - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} - ... ) - >>> coder, encoder = st.codec('tuple', 'dict') - >>> coder(('Alice', '30')) - {'name': 'Alice', 'age': '30'} - >>> encoder({'name': 'Alice', 'age': '30'}) - ('Alice', '30') """ self._assert_field_type(target, 'target') self._assert_field_type(source, 'source') coder = getattr(self, f'{source}_to_{target}') decoder = getattr(self, f'{target}_to_{source}') - return Codec(coder, decoder) + return KeyCodec(coder, decoder) def filt_iter(self, field_type: FieldTypeNames): - from dol.trans import filt_iter - + r""" + Makes a store decorator that filters out keys that don't match the template + given field type. + + >>> store = { + ... 'root/meshed/v_151.json': '{"downloads": 41, "type": "productivity"}', + ... 'root/dol/v_9.json': '{"downloads": 132, "type": "utility"}', + ... 'root/not/the/right/format': "something else" + ... } + """ self._assert_field_type(field_type, 'field_type') filt_func = getattr(self, f'match_{field_type}') return filt_iter(filt=filt_func) # @_return_none_if_none_input def str_to_dict(self, s: str) -> dict: - """Parses the input string and returns a dictionary of extracted values. + r"""Parses the input string and returns a dictionary of extracted values. - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', + ... from_str_funcs={'ver': int}, ... ) + >>> st.str_to_dict('root/life/v_30.json') + {'i01_': 'life', 'ver': 30} - >>> st.str_to_dict("Alice is 30 years old.") - {'name': 'Alice', 'age': '30'} """ if s is None: return None @@ -1404,14 +1404,13 @@ def str_to_dict(self, s: str) -> dict: # @_return_none_if_none_input def dict_to_str(self, params: dict) -> str: - """Generates a string from the dictionary values based on the template. + r"""Generates a string from the dictionary values based on the template. - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, ... ) - >>> st.dict_to_str({'name': 'Alice', 'age': '30'}) - 'Alice is 30 years old.' + >>> st.dict_to_str({'i01_': 'life', 'ver': 42}) + 'root/life/v_042.json' """ if params is None: @@ -1421,14 +1420,14 @@ def dict_to_str(self, params: dict) -> str: # @_return_none_if_none_input def dict_to_tuple(self, params: dict) -> tuple: - """Generates a tuple from the dictionary values based on the template. + r"""Generates a tuple from the dictionary values based on the template. - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, ... ) - >>> st.dict_to_tuple({'name': 'Alice', 'age': '30'}) - ('Alice', '30') + >>> st.str_to_tuple('root/life/v_42.json') + ('life', 42) + """ if params is None: return None @@ -1436,14 +1435,13 @@ def dict_to_tuple(self, params: dict) -> tuple: # @_return_none_if_none_input def tuple_to_dict(self, param_vals: tuple) -> dict: - """Generates a dictionary from the tuple values based on the template. + r"""Generates a dictionary from the tuple values based on the template. - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, ... ) - >>> st.tuple_to_dict(('Alice', '30')) - {'name': 'Alice', 'age': '30'} + >>> st.tuple_to_dict(('life', 42)) + {'i01_': 'life', 'ver': 42} """ if param_vals is None: return None @@ -1453,14 +1451,13 @@ def tuple_to_dict(self, param_vals: tuple) -> dict: # @_return_none_if_none_input def str_to_tuple(self, s: str) -> tuple: - """Parses the input string and returns a tuple of extracted values. + r"""Parses the input string and returns a tuple of extracted values. - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, ... ) - >>> st.str_to_tuple("Alice is 30 years old.") - ('Alice', '30') + >>> st.str_to_tuple('root/life/v_42.json') + ('life', 42) """ if s is None: return None @@ -1468,14 +1465,13 @@ def str_to_tuple(self, s: str) -> tuple: # @_return_none_if_none_input def tuple_to_str(self, param_vals: tuple) -> str: - """Generates a string from the tuple values based on the template. + r"""Generates a string from the tuple values based on the template. - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, ... ) - >>> st.tuple_to_str(('Alice', '30')) - 'Alice is 30 years old.' + >>> st.tuple_to_str(('life', 42)) + 'root/life/v_042.json' """ if param_vals is None: return None @@ -1486,15 +1482,14 @@ def dict_to_namedtuple( self, params: dict, ): - """Generates a namedtuple from the dictionary values based on the template. + r"""Generates a namedtuple from the dictionary values based on the template. - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, ... ) - >>> Person = st.dict_to_namedtuple({'name': 'Alice', 'age': '30'}) - >>> Person - NamedTuple(name='Alice', age='30') + >>> App = st.dict_to_namedtuple({'i01_': 'life', 'ver': 42}) + >>> App + NamedTuple(i01_='life', ver=42) """ if params is None: return None @@ -1502,54 +1497,90 @@ def dict_to_namedtuple( # @_return_none_if_none_input def namedtuple_to_dict(self, nt): - """Converts a namedtuple to a dictionary. + r"""Converts a namedtuple to a dictionary. - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, ... ) - >>> Person = st.dict_to_namedtuple({'name': 'Alice', 'age': '30'}) - >>> st.namedtuple_to_dict(Person) - {'name': 'Alice', 'age': '30'} + >>> App = st.dict_to_namedtuple({'i01_': 'life', 'ver': 42}) + >>> st.namedtuple_to_dict(App) + {'i01_': 'life', 'ver': 42} """ if nt is None: return None return dict(nt._asdict()) # TODO: Find way that doesn't involve private method + def str_to_namedtuple(self, s: str): + r"""Converts a string to a namedtuple. + + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, + ... ) + >>> App = st.str_to_namedtuple('root/life/v_042.json') + >>> App + NamedTuple(i01_='life', ver=42) + """ + if s is None: + return None + return self.dict_to_namedtuple(self.str_to_dict(s)) + # @_return_none_if_none_input def str_to_simple_str(self, s: str, sep: str = None): - """Converts a string to a simple string (i.e. a simple character-delimited string). + r"""Converts a string to a simple string (i.e. a simple character-delimited string). - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, ... ) - >>> st.str_to_simple_str("Alice is 30 years old.") - 'Alice,30' - >>> st.str_to_simple_str("Alice is 30 years old.", '-') - 'Alice-30' + >>> st.str_to_simple_str('root/life/v_042.json') + 'life,042' + >>> st.str_to_simple_str('root/life/v_042.json', '-') + 'life-042' """ sep = sep or self.simple_str_sep if s is None: return None return sep.join(self.to_str_funcs[k](v) for k, v in self.str_to_dict(s).items()) + # @_return_none_if_none_input + def simple_str_to_tuple(self, ss: str, sep: str): + r"""Converts a simple character-delimited string to a dict. + + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, + ... ) + >>> st.simple_str_to_tuple('life-042', '-') + ('life', 42) + """ + if ss is None: + return None + return tuple(f(x) for f, x in zip(self.from_str_funcs.values(), ss.split(sep))) + # @_return_none_if_none_input def simple_str_to_str(self, ss: str, sep: str): - """Converts a simple character-delimited string to a string. + r"""Converts a simple character-delimited string to a string. - >>> st = StringTemplate( - ... "{name} is {age} years old.", - ... field_patterns={"name": r"\w+", "age": r"\d+"} + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, ... ) - >>> st.simple_str_to_str('Alice-30', '-') - 'Alice is 30 years old.' + >>> st.simple_str_to_str('life-042', '-') + 'root/life/v_042.json' """ if ss is None: return None - return self.tuple_to_str(tuple(ss.split(sep))) + return self.tuple_to_str(self.simple_str_to_tuple(ss, sep=sep)) def match_str(self, s: str) -> bool: + r""" + Returns True iff the string matches the template. + + >>> st = KeyTemplate( + ... 'root/{}/v_{ver:03.0f:\d+}.json', from_str_funcs={'ver': int}, + ... ) + >>> st.match_str('root/life/v_042.json') + True + >>> st.match_str('this/does/not_match') + False + """ return self.regex.match(s) is not None def match_dict(self, params: dict) -> bool: @@ -1566,3 +1597,118 @@ def match_namedtuple(self, params: namedtuple) -> bool: def match_simple_str(self, params: str) -> bool: return self.match_str(self.simple_str_to_str(params)) + + def _extract_template_info(self, template): + r"""Extracts information from the template. Namely: + + - normalized_template: A template where each placeholder has a field name + (if not given, dflt_field_name will be used, which by default is + 'i{:02.0f}_'.format) + + - field_names: The tuple of field names in the order they appear in template + + - to_str_funcs: A dict of field names and their corresponding to_str functions, + which will be used to convert the field values to strings when generating a + string. + + - field_patterns_: A dict of field names and their corresponding regex patterns, + which will be used to extract the field values from a string. + + These four values are used in the init to compute the parameters of the + instance. + + >>> st = KeyTemplate('{:03.0f}/{name::\w+}') + >>> st.template + '{i01_}/{name}' + >>> st.field_names + ('i01_', 'name') + >>> st.field_patterns + {'i01_': '.*', 'name': '\\w+'} + >>> st.regex.pattern + '(?P.*)/(?P\\w+)' + >>> to_str_funcs = st.to_str_funcs + >>> to_str_funcs['i01_'](3) + '003' + >>> to_str_funcs['name']('life') + 'life' + + """ + + field_names = [] + field_patterns_ = {} + to_str_funcs = {} + + def parse_and_transform(): + for index, (literal_text, field_name, format_spec, conversion) in enumerate( + self._formatter.parse(template), 1 + ): + field_name = ( + self.dflt_field_name(index) if field_name == '' else field_name + ) + if field_name is not None: + field_names.append(field_name) # remember the field name + # extract format and pattern information: + if ':' not in format_spec: + format_spec += ':' + to_str_func_format, pattern = format_spec.split(':') + if to_str_func_format: + to_str_funcs[field_name] = ( + '{' + f":{to_str_func_format}" + '}' + ).format + field_patterns_[field_name] = pattern or self.dflt_pattern + # At this point you should have a valid field_name and empty format_spec + yield ( + literal_text, + field_name, + '', + conversion, + ) + + normalized_template = string_unparse(parse_and_transform()) + return normalized_template, tuple(field_names), to_str_funcs, field_patterns_ + + def _compile_regex(self, template): + r"""Parses the template, generating regex for matching the template. + Essentially, it weaves together the literal text parts and the format_specs + parts, transformed into name-caputuring regex patterns. + + Note that the literal text parts are regex-escaped so that they are not + interpreted as regex. For example, if the template is "{name}.txt", the + literal text part is replaced with "\\.txt", to avoid that the "." is + interpreted as a regex wildcard. This would otherwise match any character. + Instead, the escaped dot is matched literally. + See https://docs.python.org/3/library/re.html#re.escape for more information. + + >>> KeyTemplate('{}.ext').regex.pattern + '(?P.*)\\.ext' + >>> KeyTemplate('{name}.ext').regex.pattern + '(?P.*)\\.ext' + >>> KeyTemplate('{::\w+}.ext').regex.pattern + '(?P\\w+)\\.ext' + >>> KeyTemplate('{name::\w+}.ext').regex.pattern + '(?P\\w+)\\.ext' + >>> KeyTemplate('{:0.02f:\w+}.ext').regex.pattern + '(?P\\w+)\\.ext' + >>> KeyTemplate('{name:0.02f:\w+}.ext').regex.pattern + '(?P\\w+)\\.ext' + """ + + def mk_named_capture_group(field_name): + if field_name: + return f"(?P<{field_name}>{self.field_patterns[field_name]})" + else: + return "" + + def generate_pattern_parts(template): + parts = self._formatter.parse(template) + for literal_text, field_name, _, _ in parts: + yield re.escape(literal_text) + mk_named_capture_group(field_name) + + return re.compile(''.join(generate_pattern_parts(template))) + + @staticmethod + def _assert_field_type(field_type: FieldTypeNames, name='field_type'): + if field_type not in FieldTypeNames.__args__: + raise ValueError( + f"{name} must be one of {FieldTypeNames}. Was: {field_type}" + ) diff --git a/dol/tests/test_paths.py b/dol/tests/test_paths.py index 3381a64d..0418689a 100644 --- a/dol/tests/test_paths.py +++ b/dol/tests/test_paths.py @@ -1,51 +1,50 @@ """Tests for paths.py""" -from dol import StringTemplate +from dol.paths import KeyTemplate def test_string_template_template_construction(): - assert StringTemplate('{}.ext').template == '{_1}.ext' - assert StringTemplate('{name}.ext').template == '{name}.ext' - assert StringTemplate('{::\w+}.ext').template == '{_1}.ext' - assert StringTemplate('{name::\w+}.ext').template == '{name}.ext' - assert StringTemplate('{name::\w+}.ext').template == '{name}.ext' - assert StringTemplate('{name:0.02f}.ext').template == '{name}.ext' - assert StringTemplate('{name:0.02f:\w+}.ext').template == '{name}.ext' - assert StringTemplate('{:0.02f:\w+}.ext').template == '{_1}.ext' + assert KeyTemplate('{}.ext').template == '{i01_}.ext' + assert KeyTemplate('{name}.ext').template == '{name}.ext' + assert KeyTemplate('{::\w+}.ext').template == '{i01_}.ext' + assert KeyTemplate('{name::\w+}.ext').template == '{name}.ext' + assert KeyTemplate('{name::\w+}.ext').template == '{name}.ext' + assert KeyTemplate('{name:0.02f}.ext').template == '{name}.ext' + assert KeyTemplate('{name:0.02f:\w+}.ext').template == '{name}.ext' + assert KeyTemplate('{:0.02f:\w+}.ext').template == '{i01_}.ext' def test_string_template_regex(): - assert StringTemplate('{}.ext').regex.pattern == '(?P<_1>.*)\\.ext' - assert StringTemplate('{name}.ext').regex.pattern == '(?P.*)\\.ext' - assert StringTemplate('{::\w+}.ext').regex.pattern == '(?P<_1>\\w+)\\.ext' - assert StringTemplate('{name::\w+}.ext').regex.pattern == '(?P\\w+)\\.ext' - assert StringTemplate('{:0.02f:\w+}.ext').regex.pattern == '(?P<_1>\\w+)\\.ext' - assert StringTemplate('{name:0.02f:\w+}.ext').regex.pattern == '(?P\\w+)\\.ext' + assert KeyTemplate('{}.ext').regex.pattern == '(?P.*)\\.ext' + assert KeyTemplate('{name}.ext').regex.pattern == '(?P.*)\\.ext' + assert KeyTemplate('{::\w+}.ext').regex.pattern == '(?P\\w+)\\.ext' + assert KeyTemplate('{name::\w+}.ext').regex.pattern == '(?P\\w+)\\.ext' + assert KeyTemplate('{:0.02f:\w+}.ext').regex.pattern == '(?P\\w+)\\.ext' + assert KeyTemplate('{name:0.02f:\w+}.ext').regex.pattern == '(?P\\w+)\\.ext' def test_string_template_simple(): - from dol.paths import StringTemplate + from dol.paths import KeyTemplate from collections import namedtuple - st = StringTemplate( - 'root/{name}/v_{version}.json', - field_patterns={'name': r'\w+', 'version': r'\d+'}, + st = KeyTemplate( + 'root/{}/v_{version:03.0f:\d+}.json', from_str_funcs={'version': int}, ) - assert st.str_to_dict('root/Alice/v_30.json') == {'name': 'Alice', 'version': 30} - assert st.dict_to_str({'name': 'Alice', 'version': 30}) == 'root/Alice/v_30.json' - assert st.dict_to_tuple({'name': 'Alice', 'version': 30}) == ('Alice', 30) - assert st.tuple_to_dict(('Alice', 30)) == {'name': 'Alice', 'version': 30} - assert st.str_to_tuple('root/Alice/v_30.json') == ('Alice', 30) - assert st.tuple_to_str(('Alice', 30)) == 'root/Alice/v_30.json' + assert st.str_to_dict('root/life/v_42.json') == {'i01_': 'life', 'version': 42} + assert st.dict_to_str({'i01_': 'life', 'version': 42}) == 'root/life/v_042.json' + assert st.dict_to_tuple({'i01_': 'life', 'version': 42}) == ('life', 42) + assert st.tuple_to_dict(('life', 42)) == {'i01_': 'life', 'version': 42} + assert st.str_to_tuple('root/life/v_42.json') == ('life', 42) + assert st.tuple_to_str(('life', 42)) == 'root/life/v_042.json' - VersionedFile = st.dict_to_namedtuple({'name': 'Alice', 'version': 30}) + assert st.str_to_simple_str('root/life/v_42.json') == 'life,042' + assert st.str_to_simple_str('root/life/v_42.json', '-') == 'life-042' + assert st.simple_str_to_str('life-42', '-') == 'root/life/v_042.json' from collections import namedtuple - assert VersionedFile == namedtuple('VersionedFile', ['name', 'version'])('Alice', 30) - assert st.namedtuple_to_dict(VersionedFile) == {'name': 'Alice', 'version': 30} - assert st.str_to_simple_str('root/Alice/v_30.json') == 'Alice,30' - assert st.str_to_simple_str('root/Alice/v_30.json', '-') == 'Alice-30' - assert st.simple_str_to_str('Alice-30', '-') == 'root/Alice/v_30.json' \ No newline at end of file + VersionedFile = st.dict_to_namedtuple({'i01_': 'life', 'version': 42}) + assert VersionedFile == namedtuple('VersionedFile', ['i01_', 'version'])('life', 42) + assert st.namedtuple_to_dict(VersionedFile) == {'i01_': 'life', 'version': 42} diff --git a/dol/trans.py b/dol/trans.py index 1d08d8b1..550b72de 100644 --- a/dol/trans.py +++ b/dol/trans.py @@ -3,7 +3,8 @@ import types from types import SimpleNamespace from inspect import signature, Parameter -from typing import Union, Iterable, Optional, Collection, Callable, Any +from typing import Union, Iterable, Optional, Collection, Callable, Any, Generic +from dataclasses import dataclass from warnings import warn from collections.abc import Iterable from collections.abc import ( @@ -621,7 +622,9 @@ def _wrap_store( @store_decorator def insert_hash_method( - store=None, *, hash_method: Callable[[Any], int] = id, + store=None, + *, + hash_method: Callable[[Any], int] = id, ): """Make a store hashable using the specified ``hash_method``. Will add (or overwrite) a ``__hash__`` method to the store that uses the @@ -3004,3 +3007,33 @@ class StoreWithMissingKeyCallaback(store): StoreWithMissingKeyCallaback.__missing__ = missing_key_callback return StoreWithMissingKeyCallaback + + +EncodedType = TypeVar('EncodedType') +DecodedType = TypeVar('DecodedType') + + +# TODO: Want a way to specify Encoded type and Decoded type +@dataclass +class Codec(Generic[DecodedType, EncodedType]): + encoder: Callable[[DecodedType], EncodedType] + decoder: Callable[[EncodedType], DecodedType] + + def __iter__(self): + return iter((self.encoder, self.decoder)) + + def __add__(self, other): + return Codec( + encoder=Pipe(self.encoder, other.encoder), + decoder=Pipe(other.decoder, self.decoder), + ) + + +class ValueCodec(Codec): + def __call__(self, obj): + return wrap_kvs(obj, data_of_obj=self.encoder, obj_of_data=self.decoder) + + +class KeyCodec(Codec): + def __call__(self, obj): + return wrap_kvs(obj, id_of_key=self.encoder, key_of_id=self.decoder)