diff --git a/.hgtags b/.hgtags index ffa3974..e07fb43 100644 --- a/.hgtags +++ b/.hgtags @@ -211,3 +211,8 @@ c887dfe19e2f01b770a2b4b04163a01e00a33f85 0.17.21 b4c00a90b4d003ff3f239df622fb638cd33146dd 0.17.23 4309006902d2453399588f4ddccfb3fc460e1eba 0.17.24 12a642699fa84085248317ee765c4956f6deeec7 0.17.25 +8a26dc2a156aa189c472b5efeb10e8c3de206091 0.17.26 +f76dde33e9a175e7505a2933a5c2423d4e3db9aa 0.17.27 +d522a02977979e5feef1d0f1b94b6b7f823c0bdd 0.17.28 +41fd3925691106c999959771e54bd69cce70d1c8 0.17.29 +0ed43732b9e309d397e9c9cfa74f115f40f51a6b 0.17.30 diff --git a/CHANGES b/CHANGES index d8e0085..03e678a 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,31 @@ +[0, 17, 31]: 2023-05-31 + - added tag.setter on `ScalarEvent` and on `Node`, that takes either + a `Tag` instance, or a str + (reported by `Sorin Sbarnea `__) + +[0, 17, 30]: 2023-05-30 + - fix issue 467, caused by Tag instances not being hashable (reported by + `Douglas Raillard + `__) + +[0, 17, 29]: 2023-05-30 + - changed the internals of the tag property from a string to a class which allows + for preservation of the original handle and suffix. This should + result in better results using documents with %TAG directives, as well + as preserving URI escapes in tag suffixes. + +[0, 17, 28]: 2023-05-26 + - fix for issue 464: documents ending with document end marker without final newline + fail to load (reported by `Mariusz Rusiniak `__) + +[0, 17, 27]: 2023-05-25 + - fix issue with inline mappings as value for merge keys + (reported by Sirish on `StackOverflow `__) + - fix for 468, error inserting after accessing merge attribute on ``CommentedMap`` + (reported by `Bastien gerard `__) + - fix for issue 461 pop + insert on same `CommentedMap` key throwing error + (reported by `John Thorvald Wodder II `__) + [0, 17, 26]: 2023-05-09 - Fix for error on edge cage for issue 459 diff --git a/README.rst b/README.rst index 2cb1c12..4ef687c 100644 --- a/README.rst +++ b/README.rst @@ -4,8 +4,8 @@ ruamel.yaml ``ruamel.yaml`` is a YAML 1.2 loader/dumper package for Python. -:version: 0.17.26 -:updated: 2023-05-09 +:version: 0.17.31 +:updated: 2023-05-31 :documentation: http://yaml.readthedocs.io :repository: https://sourceforge.net/projects/ruamel-yaml/ :pypi: https://pypi.org/project/ruamel.yaml/ @@ -61,8 +61,36 @@ ChangeLog .. should insert NEXT: at the beginning of line for next key (with empty line) +0.17.31 (2023-05-31): + - added tag.setter on `ScalarEvent` and on `Node`, that takes either + a `Tag` instance, or a str + (reported by `Sorin Sbarnea `__) + +0.17.30 (2023-05-30): + - fix issue 467, caused by Tag instances not being hashable (reported by + `Douglas Raillard + `__) + +0.17.29 (2023-05-30): + - changed the internals of the tag property from a string to a class which allows + for preservation of the original handle and suffix. This should + result in better results using documents with %TAG directives, as well + as preserving URI escapes in tag suffixes. + +0.17.28 (2023-05-26): + - fix for issue 464: documents ending with document end marker without final newline + fail to load (reported by `Mariusz Rusiniak `__) + +0.17.27 (2023-05-25): + - fix issue with inline mappings as value for merge keys + (reported by Sirish on `StackOverflow `__) + - fix for 468, error inserting after accessing merge attribute on ``CommentedMap`` + (reported by `Bastien gerard `__) + - fix for issue 461 pop + insert on same `CommentedMap` key throwing error + (reported by `John Thorvald Wodder II `__) + 0.17.26 (2023-05-09): - - Fix for error on edge cage for issue 459 + - fix for error on edge cage for issue 459 0.17.25 (2023-05-09): - fix for regression while dumping wrapped strings with too many backslashes removed @@ -158,7 +186,7 @@ ChangeLog attrs with `@attr.s()` (both reported by `ssph `__) 0.17.11 (2021-08-19): - - fix error baseclass for ``DuplicateKeyErorr`` (reported by `Łukasz Rogalski + - fix error baseclass for ``DuplicateKeyError`` (reported by `Łukasz Rogalski `__) - fix typo in reader error message, causing `KeyError` during reader error (reported by `MTU `__) diff --git a/__init__.py b/__init__.py index 210337f..9a71f6e 100644 --- a/__init__.py +++ b/__init__.py @@ -5,9 +5,9 @@ _package_data = dict( full_package_name='ruamel.yaml', - version_info=(0, 17, 26), - __version__='0.17.26', - version_timestamp='2023-05-09 21:59:45', + version_info=(0, 17, 31), + __version__='0.17.31', + version_timestamp='2023-05-31 07:56:46', author='Anthon van der Neut', author_email='a.van.der.neut@ruamel.eu', description='ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order', # NOQA diff --git a/_doc/_static/pypi.svg b/_doc/_static/pypi.svg index 0790f1e..f9fb467 100644 --- a/_doc/_static/pypi.svg +++ b/_doc/_static/pypi.svg @@ -1 +1 @@ - pypipypi0.17.260.17.26 + pypipypi0.17.310.17.31 diff --git a/_test/test_api_change.py b/_test/test_api_change.py index 8961273..84e7828 100644 --- a/_test/test_api_change.py +++ b/_test/test_api_change.py @@ -209,6 +209,7 @@ def test_read_unicode(self, tmpdir: Any) -> None: with open(file_name, 'wb') as fp: fp.write('text: HELLO_WORLD©\n'.encode('utf-8')) text_dict = yaml.load(open(file_name, 'r')) + print(text_dict) assert text_dict['text'] == 'HELLO_WORLD©' diff --git a/_test/test_cyaml.py b/_test/test_cyaml.py index 056093b..e1280f3 100644 --- a/_test/test_cyaml.py +++ b/_test/test_cyaml.py @@ -5,7 +5,7 @@ import pytest # type: ignore # NOQA from textwrap import dedent -NO_CLIB_VER = (3, 10) +NO_CLIB_VER = (3, 12) @pytest.mark.skipif( # type: ignore diff --git a/_test/test_documents.py b/_test/test_documents.py index 43bd8f4..b5817f9 100644 --- a/_test/test_documents.py +++ b/_test/test_documents.py @@ -60,6 +60,36 @@ def test_multi_doc_ends_only(self) -> None: docs = list(round_trip_load_all(inp, version=(1, 2))) assert docs == [['a'], ['b']] + def test_single_scalar_comment(self) -> None: + from ruamel import yaml + + inp = """\ + one # comment + two + """ + with pytest.raises(yaml.parser.ParserError): + d = list(round_trip_load_all(inp, version=(1, 2))) # NOQA + + def test_scalar_after_seq_document(self) -> None: + from ruamel import yaml + + inp = """\ + [ 42 ] + hello + """ + with pytest.raises(yaml.parser.ParserError): + d = list(round_trip_load_all(inp, version=(1, 2))) # NOQA + + def test_yunk_after_explicit_document_end(self) -> None: + from ruamel import yaml + + inp = """\ + hello: world + ... this is no comment + """ + with pytest.raises(yaml.parser.ParserError): + d = list(round_trip_load_all(inp, version=(1, 2))) # NOQA + def test_multi_doc_ends_only_1_1(self) -> None: from ruamel import yaml diff --git a/_test/test_issues.py b/_test/test_issues.py index f0a7ede..fd7160b 100644 --- a/_test/test_issues.py +++ b/_test/test_issues.py @@ -1088,6 +1088,62 @@ def test_issue_459(self) -> None: data = yaml.load(out_stream.getvalue()) assert data[0]['data'] == MYOBJ['data'] + def test_issue_461(self) -> None: + from ruamel.yaml import YAML + + yaml = YAML() + + inp = dedent( + """ + first name: Roy + last name: Rogers + city: somewhere + """ + ) + yaml = YAML() + data = yaml.load(inp) + data.pop('last name') + assert data.pop('not there', 'xxx') == 'xxx' + data.insert(1, 'last name', 'Beaty', comment='he has seen things') + + def test_issue_463(self) -> None: + import sys + from ruamel.yaml.compat import StringIO + from ruamel.yaml import YAML + + yaml = YAML() + + inp = dedent( + """ + first_name: Art + """ + ) + data = yaml.load(inp) + _ = data.merge + data.insert(0, 'some_key', 'test') + yaml.dump(data, sys.stdout) + buf = StringIO() + yaml.dump(data, buf) + exp = dedent( + """ + some_key: test + first_name: Art + """ + ) + assert buf.getvalue() == exp + + def test_issue_464(self) -> None: + # document end marker without newline threw error in 0.17.27 + from ruamel.yaml import YAML + + yaml = YAML() + yaml.load('---\na: True\n...') + + def test_issue_467(self) -> None: + import ruamel.yaml + + yaml = ruamel.yaml.YAML() + yaml.constructor.add_constructor(yaml.resolver.DEFAULT_MAPPING_TAG, lambda x, y: None) # @pytest.mark.xfail(strict=True, reason='bla bla', raises=AssertionError) # def test_issue_ xxx(self) -> None: diff --git a/_test/test_literal.py b/_test/test_literal.py index f5c42e4..fcc949c 100644 --- a/_test/test_literal.py +++ b/_test/test_literal.py @@ -333,3 +333,29 @@ def test_rt_non_root_literal_scalar(self) -> None: ys = ys.format(s) d = yaml.load(ys) yaml.dump(d, compare=ys) + + def test_regular_spaces(self) -> None: + import ruamel.yaml + + yaml = ruamel.yaml.YAML() + ys = "key: |\n\n\n content\n" + d = yaml.load(ys) + assert d['key'] == '\n\ncontent\n' + + def test_irregular_spaces_content(self) -> None: + import ruamel.yaml + + yaml = ruamel.yaml.YAML() + ys = "key: |\n \n \n irregular content\n" + with pytest.raises(ruamel.yaml.scanner.ScannerError): + d = yaml.load(ys) + print(d) + + def test_irregular_spaces_comment(self) -> None: + import ruamel.yaml + + yaml = ruamel.yaml.YAML() + ys = "key: |\n \n \n # comment\n" + with pytest.raises(ruamel.yaml.scanner.ScannerError): + d = yaml.load(ys) + print(d) diff --git a/_test/test_tag.py b/_test/test_tag.py index 5f388e5..4f0587f 100644 --- a/_test/test_tag.py +++ b/_test/test_tag.py @@ -26,117 +26,165 @@ def yaml_load(cls, constructor: Any, node: Any) -> Any: class TestIndentFailures: def test_tag(self) -> None: - round_trip("""\ + round_trip( + """\ !!python/object:__main__.Developer name: Anthon location: Germany language: python - """) + """ + ) def test_full_tag(self) -> None: - round_trip("""\ + round_trip( + """\ !!tag:yaml.org,2002:python/object:__main__.Developer name: Anthon location: Germany language: python - """) + """ + ) def test_standard_tag(self) -> None: - round_trip("""\ + round_trip( + """\ !!tag:yaml.org,2002:python/object:map name: Anthon location: Germany language: python - """) + """ + ) def test_Y1(self) -> None: - round_trip("""\ + round_trip( + """\ !yyy name: Anthon location: Germany language: python - """) + """ + ) def test_Y2(self) -> None: - round_trip("""\ + round_trip( + """\ !!yyy name: Anthon location: Germany language: python - """) + """ + ) - @pytest.mark.xfail(strict=True) # type: ignore + # @pytest.mark.xfail(strict=True) # type: ignore def test_spec_6_26_tag_shorthands(self) -> None: - round_trip("""\ + from ruamel.yaml import YAML + from io import StringIO + from textwrap import dedent + + inp = dedent( + """\ %TAG !e! tag:example.com,2000:app/ --- - !local foo - !!str bar - !e!tag%21 baz - """) + """ + ) + yaml = YAML() + data = yaml.load(inp) + buf = StringIO() + yaml.dump(data, buf) + print('buf:\n', buf.getvalue(), sep='') + assert buf.getvalue() == inp + + +class TestTagGeneral: + def test_unknow_handle(self) -> None: + from ruamel.yaml.parser import ParserError + + with pytest.raises(ParserError): + round_trip( + """\ + %TAG !x! tag:example.com,2000:app/ + --- + - !y!tag%21 baz + """ + ) class TestRoundTripCustom: def test_X1(self) -> None: register_xxx() - round_trip("""\ + round_trip( + """\ !xxx name: Anthon location: Germany language: python - """) + """ + ) @pytest.mark.xfail(strict=True) # type: ignore def test_X_pre_tag_comment(self) -> None: register_xxx() - round_trip("""\ + round_trip( + """\ - # hello !xxx name: Anthon location: Germany language: python - """) + """ + ) @pytest.mark.xfail(strict=True) # type: ignore def test_X_post_tag_comment(self) -> None: register_xxx() - round_trip("""\ + round_trip( + """\ - !xxx # hello name: Anthon location: Germany language: python - """) + """ + ) def test_scalar_00(self) -> None: # https://stackoverflow.com/a/45967047/1307905 - round_trip("""\ + round_trip( + """\ Outputs: Vpc: Value: !Ref: vpc # first tag Export: Name: !Sub "${AWS::StackName}-Vpc" # second tag - """) + """ + ) class TestIssue201: def test_encoded_unicode_tag(self) -> None: - round_trip_load(""" + round_trip_load( + """ s: !!python/%75nicode 'abc' - """) + """ + ) class TestImplicitTaggedNodes: def test_scalar(self) -> None: - data = round_trip("""\ + data = round_trip( + """\ - !SString abcdefg - !SFloat 1.0 - !SInt 1961 - !SBool true - !SLit | glitter in the dark near the Tanhäuser gate - """) + """ + ) # tagged scalers have string or string types as value assert data[0].count('d') == 1 assert data[1].count('1') == 1 @@ -145,21 +193,27 @@ def test_scalar(self) -> None: assert data[4].count('a') == 4 def test_mapping(self) -> None: - round_trip("""\ + round_trip( + """\ - !Mapping {a: 1, b: 2} - """) + """ + ) def test_sequence(self) -> None: yaml = YAML() yaml.brace_single_entry_mapping_in_flow_sequence = True yaml.mapping_value_align = True - yaml.round_trip(""" + yaml.round_trip( + """ - !Sequence [a, {b: 1}, {c: {d: 3}}] - """) + """ + ) def test_sequence2(self) -> None: yaml = YAML() yaml.mapping_value_align = True - yaml.round_trip(""" + yaml.round_trip( + """ - !Sequence [a, b: 1, c: {d: 3}] - """) + """ + ) diff --git a/_test/test_z_data.py b/_test/test_z_data.py index f2e72f1..b78f732 100644 --- a/_test/test_z_data.py +++ b/_test/test_z_data.py @@ -23,6 +23,7 @@ def __init__(self, s: Any) -> None: 'TAB': '\t', '---': '---', '...': '...', + 'NL': '\n', } # fmt: on @@ -78,6 +79,22 @@ def value(self) -> Any: return self._pa +class Events(YAMLData): + yaml_tag = '!Events' + + +class JSONData(YAMLData): + yaml_tag = '!JSON' + + +class Dump(YAMLData): + yaml_tag = '!Dump' + + +class Emit(YAMLData): + yaml_tag = '!Emit' + + def pytest_generate_tests(metafunc: Any) -> None: test_yaml = [] paths = sorted(base_path.glob('**/*.yaml')) @@ -100,13 +117,16 @@ def pytest_generate_tests(metafunc: Any) -> None: class TestYAMLData: - def yaml(self, yaml_version: Optional[Any] = None) -> Any: + def yaml( + self, yaml_version: Optional[Any] = None, typ: Any = 'rt', pure: Any = None + ) -> Any: from ruamel.yaml import YAML - y = YAML() + y = YAML(typ=typ, pure=pure) y.preserve_quotes = True if yaml_version: y.version = yaml_version + y.composer.warn_double_anchors = False return y def docs(self, path: Path) -> List[Any]: @@ -117,6 +137,10 @@ def docs(self, path: Path) -> List[Any]: tyaml.register_class(Python) tyaml.register_class(Output) tyaml.register_class(Assert) + tyaml.register_class(Events) + tyaml.register_class(JSONData) + tyaml.register_class(Dump) + tyaml.register_class(Emit) return list(tyaml.load_all(path)) def yaml_load(self, value: Any, yaml_version: Optional[Any] = None) -> Tuple[Any, Any]: @@ -137,6 +161,74 @@ def round_trip( print('>>>> rt output\n', value.replace(' ', '\u2423'), sep='') # 2423 open box assert value == expected + def gen_events( + self, input: Any, output: Any, yaml_version: Optional[Any] = None + ) -> None: + from ruamel.yaml.compat import StringIO + + buf = StringIO() + yaml = self.yaml(yaml_version=yaml_version) + indent = 0 + try: + for event in yaml.parse(input.value): + compact = event.compact_repr() + assert compact[0] in '+=-' + if compact[0] == '-': + indent -= 1 + print(f'{" "*indent}{compact}', file=buf) + if compact[0] == '+': + indent += 1 + + except Exception as e: # NOQA + print('=EXCEPTION', file=buf) # exceptions not indented + if '=EXCEPTION' not in output.value: + raise + print('>>>> buf\n', buf.getvalue(), sep='') + assert buf.getvalue() == output.value + + def load_compare_json( + self, input: Any, output: Any, yaml_version: Optional[Any] = None + ) -> None: + import json + from ruamel.yaml.compat import StringIO + from ruamel.yaml.comments import CommentedMap, TaggedScalar + + def serialize_obj(obj: Any) -> Any: + if isinstance(obj, CommentedMap): + return {k: v for k, v in obj.items()} + elif isinstance(obj, TaggedScalar): + return str(obj.value) + elif isinstance(obj, set): + return {k: None for k in obj} + return str(obj) + + buf = StringIO() + yaml = self.yaml(typ='rt', yaml_version=yaml_version) + for data in yaml.load_all(input.value): + if isinstance(data, dict): + data = {str(k): v for k, v in data.items()} + json.dump(data, buf, sort_keys=True, indent=2, default=serialize_obj) + buf.write('\n') + print('>>>> buf\n', buf.getvalue(), sep='') + # jsons = json.dumps(json.loads(output.value)) # normalize formatting of JSON + assert buf.getvalue() == output.value + + def load_compare_emit( + self, input: Any, output: Any, yaml_version: Optional[Any] = None + ) -> None: + from ruamel.yaml.compat import StringIO + + buf = StringIO() + yaml = self.yaml(yaml_version=yaml_version) + yaml.preserve_quotes = True + data = input.value + if data.startswith('---') or '\n--- ' in data or '\n---' in data: + yaml.explicit_start = True + data = list(yaml.load_all(data)) + yaml.dump_all(data, buf) + print('>>>> buf\n', buf.getvalue(), sep='') + assert buf.getvalue() == output.value + def load_assert( self, input: Any, confirm: Any, yaml_version: Optional[Any] = None ) -> None: @@ -190,58 +282,95 @@ def test_yaml_data(self, yaml: Any, tmpdir: Any) -> None: from collections.abc import Mapping idx = 0 - typ = None + typs = [] # list of test to be performed yaml_version = None docs = self.docs(yaml) if isinstance(docs[0], Mapping): d = docs[0] + if d.get('skip'): + pytest.skip('explicit skip') + if '1.3-mod' in d.get('tags', []): + pytest.skip('YAML 1.3') typ = d.get('type') + if isinstance(typ, str): + typs.append(typ) + elif isinstance(typ, list): + typs.extend(typ[:]) + del typ yaml_version = d.get('yaml_version') if 'python' in d: if not check_python_version(d['python']): pytest.skip('unsupported version') idx += 1 - data = output = confirm = python = None + data = output = confirm = python = events = json = dump = emit = None for doc in docs[idx:]: if isinstance(doc, Output): output = doc + elif isinstance(doc, Events): + events = doc + elif isinstance(doc, JSONData): + json = doc + elif isinstance(doc, Dump): + dump = doc # NOQA + elif isinstance(doc, Emit): + emit = doc # NOQA elif isinstance(doc, Assert): confirm = doc elif isinstance(doc, Python): python = doc - if typ is None: - typ = 'python_run' + if len(typs) == 0: + typs = ['python_run'] elif isinstance(doc, YAMLData): data = doc else: print('no handler for type:', type(doc), repr(doc)) raise AssertionError() - if typ is None: + if len(typs) == 0: if data is not None and output is not None: - typ = 'rt' + typs = ['rt'] elif data is not None and confirm is not None: - typ = 'load_assert' + typs = ['load_assert'] else: assert data is not None - typ = 'rt' - print('type:', typ) + typs = ['rt'] + print('type:', typs) if data is not None: print('>>>> data:\n', data.value.replace(' ', '\u2423'), sep='', end='') - print('>>>> output:\n', output.value if output is not None else output, sep='') - if typ == 'rt': - self.round_trip(data, output, yaml_version=yaml_version) - elif typ == 'python_run': - inp = None if output is None or data is None else data - self.run_python(python, output if output is not None else data, tmpdir, input=inp) - elif typ == 'load_assert': - self.load_assert(data, confirm, yaml_version=yaml_version) - elif typ == 'comment': - actions: List[Any] = [] - self.insert_comments(data, actions) + if events is not None: + print('>>>> events:\n', events.value, sep='') else: - f'\n>>>>>> run type unknown: "{typ}" <<<<<<\n' - raise AssertionError() + print('>>>> output:\n', output.value if output is not None else output, sep='') + for typ in typs: + if typ == 'rt': + self.round_trip(data, output, yaml_version=yaml_version) + elif typ == 'python_run': + inp = None if output is None or data is None else data + self.run_python( + python, output if output is not None else data, tmpdir, input=inp + ) + elif typ == 'load_assert': + self.load_assert(data, confirm, yaml_version=yaml_version) + elif typ == 'comment': + actions: List[Any] = [] + self.insert_comments(data, actions) + elif typ == 'events': + if events is None: + print('need to specify !Events for type:', typ) + sys.exit(1) + self.gen_events(data, events, yaml_version=yaml_version) + elif typ == 'json': + if json is None: + print('need to specify !JSON for type:', typ) + sys.exit(1) + self.load_compare_json(data, json, yaml_version=yaml_version) + elif typ == 'dump': + continue + elif typ == 'emit': + self.load_compare_emit(data, emit) + else: + f'\n>>>>>> run type unknown: "{typ}" <<<<<<\n' + raise AssertionError() def check_python_version(match: Any, current: Optional[Any] = None) -> bool: diff --git a/comments.py b/comments.py index dc128ee..0360654 100644 --- a/comments.py +++ b/comments.py @@ -14,6 +14,7 @@ from ruamel.yaml.compat import MutableSliceableSequence, nprintf # NOQA from ruamel.yaml.scalarstring import ScalarString from ruamel.yaml.anchor import Anchor +from ruamel.yaml.tag import Tag from collections.abc import MutableSet, Sized, Set, Mapping @@ -79,7 +80,6 @@ def __str__(self) -> Any: format_attrib = '_yaml_format' line_col_attrib = '_yaml_line_col' merge_attrib = '_yaml_merge' -tag_attrib = '_yaml_tag' class Comment: @@ -194,8 +194,8 @@ def __contains__(self, x: Any) -> Any: # to distinguish key from None -def NoComment() -> None: - pass +class NotNone: + pass # NOQA class Format: @@ -264,19 +264,6 @@ def __repr__(self) -> str: return f'LineCol({self.line}, {self.col})' -class Tag: - """store tag information for roundtripping""" - - __slots__ = ('value',) - attrib = tag_attrib - - def __init__(self) -> None: - self.value = None - - def __repr__(self) -> Any: - return f'{self.__class__.__name__}({self.value!r})' - - class CommentedBase: @property def ca(self): @@ -380,7 +367,7 @@ def fa(self) -> Any: return getattr(self, Format.attrib) def yaml_add_eol_comment( - self, comment: Any, key: Optional[Any] = NoComment, column: Optional[Any] = None + self, comment: Any, key: Optional[Any] = NotNone, column: Optional[Any] = None ) -> None: """ there is a problem as eol comments should start with ' #' @@ -442,8 +429,8 @@ def tag(self) -> Any: setattr(self, Tag.attrib, Tag()) return getattr(self, Tag.attrib) - def yaml_set_tag(self, value: Any) -> None: - self.tag.value = value + def yaml_set_ctag(self, value: Tag) -> None: + setattr(self, Tag.attrib, value) def copy_attributes(self, t: Any, memo: Any = None) -> None: # fmt: off @@ -511,8 +498,8 @@ def extend(self, val: Any) -> None: def __eq__(self, other: Any) -> bool: return list.__eq__(self, other) - def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NoComment) -> None: - if key is not NoComment: + def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NotNone) -> None: + if key is not NotNone: self.yaml_key_comment_extend(key, comment) else: self.ca.comment = comment @@ -593,8 +580,8 @@ def __repr__(self) -> Any: class CommentedKeySeq(tuple, CommentedBase): # type: ignore """This primarily exists to be able to roundtrip keys that are sequences""" - def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NoComment) -> None: - if key is not NoComment: + def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NotNone) -> None: + if key is not NotNone: self.yaml_key_comment_extend(key, comment) else: self.ca.comment = comment @@ -714,13 +701,13 @@ def __init__(self, *args: Any, **kw: Any) -> None: ordereddict.__init__(self, *args, **kw) def _yaml_add_comment( - self, comment: Any, key: Optional[Any] = NoComment, value: Optional[Any] = NoComment + self, comment: Any, key: Optional[Any] = NotNone, value: Optional[Any] = NotNone ) -> None: """values is set to key to indicate a value attachment of comment""" - if key is not NoComment: + if key is not NotNone: self.yaml_key_comment_extend(key, comment) return - if value is not NoComment: + if value is not NotNone: self.yaml_value_comment_extend(value, comment) else: self.ca.comment = comment @@ -799,8 +786,11 @@ def insert(self, pos: Any, key: Any, value: Any, comment: Optional[Any] = None) if key in self._ok: del self[key] keys = [k for k in self.keys() if k in self._ok] - ma0 = getattr(self, merge_attrib, [[-1]])[0] - merge_pos = ma0[0] + try: + ma0 = getattr(self, merge_attrib, [[-1]])[0] + merge_pos = ma0[0] + except IndexError: + merge_pos = -1 if merge_pos >= 0: if merge_pos >= pos: getattr(self, merge_attrib)[0] = (merge_pos + 1, ma0[1]) @@ -920,6 +910,16 @@ def __iter__(self) -> Any: for x in ordereddict.__iter__(self): yield x + def pop(self, key: Any, default: Any = NotNone) -> Any: + try: + result = self[key] + except KeyError: + if default is NotNone: + raise + return default + del self[key] + return result + def _keys(self) -> Any: for x in ordereddict.__iter__(self): yield x @@ -1030,8 +1030,8 @@ def __repr__(self) -> Any: def fromkeys(keys: Any, v: Any = None) -> Any: return CommentedKeyMap(dict.fromkeys(keys, v)) - def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NoComment) -> None: - if key is not NoComment: + def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NotNone) -> None: + if key is not NotNone: self.yaml_key_comment_extend(key, comment) else: self.ca.comment = comment @@ -1085,13 +1085,13 @@ def __init__(self, values: Any = None) -> None: self |= values def _yaml_add_comment( - self, comment: Any, key: Optional[Any] = NoComment, value: Optional[Any] = NoComment + self, comment: Any, key: Optional[Any] = NotNone, value: Optional[Any] = NotNone ) -> None: """values is set to key to indicate a value attachment of comment""" - if key is not NoComment: + if key is not NotNone: self.yaml_key_comment_extend(key, comment) return - if value is not NoComment: + if value is not NotNone: self.yaml_value_comment_extend(value, comment) else: self.ca.comment = comment @@ -1128,7 +1128,9 @@ def __init__(self, value: Any = None, style: Any = None, tag: Any = None) -> Non self.value = value self.style = style if tag is not None: - self.yaml_set_tag(tag) + if isinstance(tag, str): + tag = Tag(suffix=tag) + self.yaml_set_ctag(tag) def __str__(self) -> Any: return self.value diff --git a/composer.py b/composer.py index c943c1b..e876bdc 100644 --- a/composer.py +++ b/composer.py @@ -32,6 +32,7 @@ def __init__(self, loader: Any = None) -> None: if self.loader is not None and getattr(self.loader, '_composer', None) is None: self.loader._composer = self self.anchors: Dict[Any, Any] = {} + self.warn_double_anchors = True @property def parser(self) -> Any: @@ -111,7 +112,7 @@ def compose_node(self, parent: Any, index: Any) -> Any: event = self.parser.peek_event() anchor = event.anchor if anchor is not None: # have an anchor - if anchor in self.anchors: + if self.warn_double_anchors and anchor in self.anchors: ws = ( f'\nfound duplicate anchor {anchor!r}\n' f'first occurrence {self.anchors[anchor].start_mark}\n' @@ -130,9 +131,11 @@ def compose_node(self, parent: Any, index: Any) -> Any: def compose_scalar_node(self, anchor: Any) -> Any: event = self.parser.get_event() - tag = event.tag - if tag is None or tag == '!': + tag = event.ctag + if tag is None or str(tag) == '!': tag = self.resolver.resolve(ScalarNode, event.value, event.implicit) + assert not isinstance(tag, str) + # e.g tag.yaml.org,2002:str node = ScalarNode( tag, event.value, @@ -148,9 +151,10 @@ def compose_scalar_node(self, anchor: Any) -> Any: def compose_sequence_node(self, anchor: Any) -> Any: start_event = self.parser.get_event() - tag = start_event.tag - if tag is None or tag == '!': + tag = start_event.ctag + if tag is None or str(tag) == '!': tag = self.resolver.resolve(SequenceNode, None, start_event.implicit) + assert not isinstance(tag, str) node = SequenceNode( tag, [], @@ -180,9 +184,10 @@ def compose_sequence_node(self, anchor: Any) -> Any: def compose_mapping_node(self, anchor: Any) -> Any: start_event = self.parser.get_event() - tag = start_event.tag - if tag is None or tag == '!': + tag = start_event.ctag + if tag is None or str(tag) == '!': tag = self.resolver.resolve(MappingNode, None, start_event.implicit) + assert not isinstance(tag, str) node = MappingNode( tag, [], diff --git a/constructor.py b/constructor.py index dc7e5ed..0054620 100644 --- a/constructor.py +++ b/constructor.py @@ -986,6 +986,17 @@ def construct_scalar(self, node: Any) -> Any: return SingleQuotedScalarString(node.value, anchor=node.anchor) if node.style == '"': return DoubleQuotedScalarString(node.value, anchor=node.anchor) + # if node.ctag: + # data2 = TaggedScalar() + # data2.value = node.value + # data2.style = node.style + # data2.yaml_set_ctag(node.ctag) + # if node.anchor: + # from ruamel.yaml.serializer import templated_id + + # if not templated_id(node.anchor): + # data2.yaml_set_anchor(node.anchor, always_dump=True) + # return data2 if node.anchor: return PlainScalarString(node.value, anchor=node.anchor) return node.value @@ -1162,7 +1173,10 @@ def leading_zeros(v: Any) -> int: ) def construct_yaml_str(self, node: Any) -> Any: - value = self.construct_scalar(node) + if node.ctag.handle: + value = self.construct_unknown(node) + else: + value = self.construct_scalar(node) if isinstance(value, ScalarString): return value return value @@ -1218,7 +1232,7 @@ def constructed(value_node: Any) -> Any: if value_node in self.constructed_objects: value = self.constructed_objects[value_node] else: - value = self.construct_object(value_node, deep=False) + value = self.construct_object(value_node, deep=True) return value # merge = [] @@ -1569,7 +1583,7 @@ def construct_unknown( data.fa.set_flow_style() elif node.flow_style is False: data.fa.set_block_style() - data.yaml_set_tag(node.tag) + data.yaml_set_ctag(node.ctag) yield data if node.anchor: from ruamel.yaml.serializer import templated_id @@ -1582,7 +1596,7 @@ def construct_unknown( data2 = TaggedScalar() data2.value = self.construct_scalar(node) data2.style = node.style - data2.yaml_set_tag(node.tag) + data2.yaml_set_ctag(node.ctag) yield data2 if node.anchor: from ruamel.yaml.serializer import templated_id @@ -1597,7 +1611,7 @@ def construct_unknown( data3.fa.set_flow_style() elif node.flow_style is False: data3.fa.set_block_style() - data3.yaml_set_tag(node.tag) + data3.yaml_set_ctag(node.ctag) yield data3 if node.anchor: from ruamel.yaml.serializer import templated_id diff --git a/emitter.py b/emitter.py index 94986fe..a068800 100644 --- a/emitter.py +++ b/emitter.py @@ -748,7 +748,7 @@ def check_simple_key(self) -> bool: and self.event.tag is not None ): if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(self.event.tag) + self.prepared_tag = self.prepare_tag(self.event.ctag) length += len(self.prepared_tag) if isinstance(self.event, ScalarEvent): if self.analysis is None: @@ -813,7 +813,7 @@ def process_tag(self) -> None: if tag is None: raise EmitterError('tag is not specified') if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(tag) + self.prepared_tag = self.prepare_tag(self.event.ctag) if self.prepared_tag: self.write_indicator(self.prepared_tag, True) if ( @@ -825,6 +825,9 @@ def process_tag(self) -> None: self.prepared_tag = None def choose_scalar_style(self) -> Any: + # issue 449 needs this otherwise emits single quoted empty string + if self.event.value == '' and self.event.ctag.handle == '!!': + return None if self.analysis is None: self.analysis = self.analyze_scalar(self.event.value) if self.event.style == '"' or self.canonical: @@ -956,6 +959,7 @@ def prepare_tag_prefix(self, prefix: Any) -> Any: def prepare_tag(self, tag: Any) -> Any: if not tag: raise EmitterError('tag must not be empty') + tag = str(tag) if tag == '!' or tag == '!!': return tag handle = None @@ -1723,3 +1727,26 @@ def write_post_comment(self, event: Any) -> bool: comment = event.comment[0] self.write_comment(comment) return True + + +class RoundTripEmitter(Emitter): + def prepare_tag(self, ctag: Any) -> Any: + if not ctag: + raise EmitterError('tag must not be empty') + tag = str(ctag) + # print('handling', repr(tag)) + if tag == '!' or tag == '!!': + return tag + handle = ctag.handle + suffix = ctag.suffix + prefixes = sorted(self.tag_prefixes.keys()) + # print('handling', repr(tag), repr(suffix), repr(handle)) + if handle is None: + for prefix in prefixes: + if tag.startswith(prefix) and (prefix == '!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = suffix[len(prefix) :] + if handle: + return f'{handle!s}{suffix!s}' + else: + return f'!<{suffix!s}>' diff --git a/events.py b/events.py index d3e679b..92e3b3b 100644 --- a/events.py +++ b/events.py @@ -3,6 +3,7 @@ # Abstract classes. from typing import Any, Dict, Optional, List # NOQA +from ruamel.yaml.tag import Tag SHOW_LINES = False @@ -13,6 +14,7 @@ def CommentCheck() -> None: class Event: __slots__ = 'start_mark', 'end_mark', 'comment' + crepr = 'Unspecified Event' def __init__( self, start_mark: Any = None, end_mark: Any = None, comment: Any = CommentCheck @@ -55,6 +57,9 @@ def __repr__(self) -> Any: arguments += f', comment={self.comment!r}' return f'{self.__class__.__name__!s}({arguments!s})' + def compact_repr(self) -> str: + return f'{self.crepr}' + class NodeEvent(Event): __slots__ = ('anchor',) @@ -67,7 +72,7 @@ def __init__( class CollectionStartEvent(NodeEvent): - __slots__ = 'tag', 'implicit', 'flow_style', 'nr_items' + __slots__ = 'ctag', 'implicit', 'flow_style', 'nr_items' def __init__( self, @@ -81,11 +86,15 @@ def __init__( nr_items: Optional[int] = None, ) -> None: NodeEvent.__init__(self, anchor, start_mark, end_mark, comment) - self.tag = tag + self.ctag = tag self.implicit = implicit self.flow_style = flow_style self.nr_items = nr_items + @property + def tag(self) -> Optional[str]: + return None if self.ctag is None else str(self.ctag) + class CollectionEndEvent(Event): __slots__ = () @@ -96,6 +105,7 @@ class CollectionEndEvent(Event): class StreamStartEvent(Event): __slots__ = ('encoding',) + crepr = '+STR' def __init__( self, @@ -110,10 +120,12 @@ def __init__( class StreamEndEvent(Event): __slots__ = () + crepr = '-STR' class DocumentStartEvent(Event): __slots__ = 'explicit', 'version', 'tags' + crepr = '+DOC' def __init__( self, @@ -129,9 +141,14 @@ def __init__( self.version = version self.tags = tags + def compact_repr(self) -> str: + start = ' ---' if self.explicit else '' + return f'{self.crepr}{start}' + class DocumentEndEvent(Event): __slots__ = ('explicit',) + crepr = '-DOC' def __init__( self, @@ -143,9 +160,14 @@ def __init__( Event.__init__(self, start_mark, end_mark, comment) self.explicit = explicit + def compact_repr(self) -> str: + end = ' ...' if self.explicit else '' + return f'{self.crepr}{end}' + class AliasEvent(NodeEvent): __slots__ = 'style' + crepr = '=ALI' def __init__( self, @@ -158,9 +180,13 @@ def __init__( NodeEvent.__init__(self, anchor, start_mark, end_mark, comment) self.style = style + def compact_repr(self) -> str: + return f'{self.crepr} *{self.anchor}' + class ScalarEvent(NodeEvent): - __slots__ = 'tag', 'implicit', 'value', 'style' + __slots__ = 'ctag', 'implicit', 'value', 'style' + crepr = '=VAL' def __init__( self, @@ -174,23 +200,65 @@ def __init__( comment: Any = None, ) -> None: NodeEvent.__init__(self, anchor, start_mark, end_mark, comment) - self.tag = tag + self.ctag = tag self.implicit = implicit self.value = value self.style = style + @property + def tag(self) -> Optional[str]: + return None if self.ctag is None else str(self.ctag) + + @tag.setter + def tag(self, val: Any) -> None: + if isinstance(val, str): + val = Tag(suffix=val) + self.ctag = val + + def compact_repr(self) -> str: + style = ':' if self.style is None else self.style + anchor = f'&{self.anchor} ' if self.anchor else '' + tag = f'<{self.tag!s}> ' if self.tag else '' + value = self.value + for ch, rep in [ + ('\\', '\\\\'), + ('\t', '\\t'), + ('\n', '\\n'), + ('\a', ''), # remove from folded + ('\r', '\\r'), + ('\b', '\\b'), + ]: + value = value.replace(ch, rep) + return f'{self.crepr} {anchor}{tag}{style}{value}' + class SequenceStartEvent(CollectionStartEvent): __slots__ = () + crepr = '+SEQ' + + def compact_repr(self) -> str: + flow = ' []' if self.flow_style else '' + anchor = f' &{self.anchor}' if self.anchor else '' + tag = f' <{self.tag!s}>' if self.tag else '' + return f'{self.crepr}{flow}{anchor}{tag}' class SequenceEndEvent(CollectionEndEvent): __slots__ = () + crepr = '-SEQ' class MappingStartEvent(CollectionStartEvent): __slots__ = () + crepr = '+MAP' + + def compact_repr(self) -> str: + flow = ' {}' if self.flow_style else '' + anchor = f' &{self.anchor}' if self.anchor else '' + tag = f' <{self.tag!s}>' if self.tag else '' + return f'{self.crepr}{flow}{anchor}{tag}' class MappingEndEvent(CollectionEndEvent): __slots__ = () + crepr = '-MAP' diff --git a/main.py b/main.py index e3bf851..9068282 100644 --- a/main.py +++ b/main.py @@ -118,7 +118,7 @@ def __init__( elif 'rtsc' in self.typ: self.default_flow_style = False # no optimized rt-dumper yet - self.Emitter = ruamel.yaml.emitter.Emitter + self.Emitter = ruamel.yaml.emitter.RoundTripEmitter self.Serializer = ruamel.yaml.serializer.Serializer self.Representer = ruamel.yaml.representer.RoundTripRepresenter self.Scanner = ruamel.yaml.scanner.RoundTripScannerSC @@ -133,7 +133,7 @@ def __init__( if setup_rt: self.default_flow_style = False # no optimized rt-dumper yet - self.Emitter = ruamel.yaml.emitter.Emitter + self.Emitter = ruamel.yaml.emitter.RoundTripEmitter self.Serializer = ruamel.yaml.serializer.Serializer self.Representer = ruamel.yaml.representer.RoundTripRepresenter self.Scanner = ruamel.yaml.scanner.RoundTripScanner diff --git a/nodes.py b/nodes.py index b2f4e13..4281368 100644 --- a/nodes.py +++ b/nodes.py @@ -2,11 +2,12 @@ import sys -from typing import Dict, Any, Text # NOQA +from typing import Dict, Any, Text, Optional # NOQA +from ruamel.yaml.tag import Tag class Node: - __slots__ = 'tag', 'value', 'start_mark', 'end_mark', 'comment', 'anchor' + __slots__ = 'ctag', 'value', 'start_mark', 'end_mark', 'comment', 'anchor' def __init__( self, @@ -17,13 +18,24 @@ def __init__( comment: Any = None, anchor: Any = None, ) -> None: - self.tag = tag + # you can still get a string from the serializer + self.ctag = tag if isinstance(tag, Tag) else Tag(suffix=tag) self.value = value self.start_mark = start_mark self.end_mark = end_mark self.comment = comment self.anchor = anchor + @property + def tag(self) -> Optional[str]: + return None if self.ctag is None else str(self.ctag) + + @tag.setter + def tag(self, val: Any) -> None: + if isinstance(val, str): + val = Tag(suffix=val) + self.ctag = val + def __repr__(self) -> Any: value = self.value # if isinstance(value, list): diff --git a/parser.py b/parser.py index c8b5fcf..7a7d979 100644 --- a/parser.py +++ b/parser.py @@ -81,6 +81,7 @@ from ruamel.yaml.scanner import BlankLineComment from ruamel.yaml.comments import C_PRE, C_POST, C_SPLIT_ON_FIRST_BLANK from ruamel.yaml.compat import nprint, nprintf # NOQA +from ruamel.yaml.tag import Tag from typing import Any, Dict, Optional, List, Optional # NOQA @@ -182,6 +183,7 @@ def parse_stream_start(self) -> Any: def parse_implicit_document_start(self) -> Any: # Parse an implicit document. if not self.scanner.check_token(DirectiveToken, DocumentStartToken, StreamEndToken): + # don't need copy, as an implicit tag doesn't add tag_handles self.tag_handles = self.DEFAULT_TAGS token = self.scanner.peek_token() start_mark = end_mark = token.start_mark @@ -243,6 +245,18 @@ def parse_document_end(self) -> Any: explicit = False if self.scanner.check_token(DocumentEndToken): token = self.scanner.get_token() + # if token.end_mark.line != self.peek_event().start_mark.line: + pt = self.scanner.peek_token() + if not isinstance(pt, StreamEndToken) and ( + token.end_mark.line == pt.start_mark.line + ): + raise ParserError( + None, + None, + 'found non-comment content after document end marker, ' + f'{self.scanner.peek_token().id,!r}', + self.scanner.peek_token().start_mark, + ) end_mark = token.end_mark explicit = True event = DocumentEndEvent(start_mark, end_mark, explicit=explicit) @@ -251,7 +265,11 @@ def parse_document_end(self) -> Any: if self.resolver.processing_version == (1, 1): self.state = self.parse_document_start else: - self.state = self.parse_implicit_document_start + if explicit: + # found a document end marker, can be followed by implicit document + self.state = self.parse_implicit_document_start + else: + self.state = self.parse_document_start return event @@ -331,8 +349,13 @@ def parse_flow_node(self) -> Any: def parse_block_node_or_indentless_sequence(self) -> Any: return self.parse_node(block=True, indentless_sequence=True) - def transform_tag(self, handle: Any, suffix: Any) -> Any: - return self.tag_handles[handle] + suffix + # def transform_tag(self, handle: Any, suffix: Any) -> Any: + # return self.tag_handles[handle] + suffix + + def select_tag_transform(self, tag: Tag) -> None: + if tag is None: + return + tag.select_transform(False) def parse_node(self, block: bool = False, indentless_sequence: bool = False) -> Any: if self.scanner.check_token(AliasToken): @@ -354,39 +377,34 @@ def parse_node(self, block: bool = False, indentless_sequence: bool = False) -> token = self.scanner.get_token() tag_mark = token.start_mark end_mark = token.end_mark - tag = token.value + # tag = token.value + tag = Tag( + handle=token.value[0], suffix=token.value[1], handles=self.tag_handles, + ) elif self.scanner.check_token(TagToken): token = self.scanner.get_token() start_mark = tag_mark = token.start_mark end_mark = token.end_mark - tag = token.value + # tag = token.value + tag = Tag(handle=token.value[0], suffix=token.value[1], handles=self.tag_handles) if self.scanner.check_token(AnchorToken): token = self.scanner.get_token() start_mark = tag_mark = token.start_mark end_mark = token.end_mark anchor = token.value if tag is not None: - handle, suffix = tag - if handle is not None: - if handle not in self.tag_handles: - raise ParserError( - 'while parsing a node', - start_mark, - f'found undefined tag handle {handle!r}', - tag_mark, - ) - tag = self.transform_tag(handle, suffix) - else: - tag = suffix - # if tag == '!': - # raise ParserError("while parsing a node", start_mark, - # "found non-specific tag '!'", tag_mark, - # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' - # and share your opinion.") + self.select_tag_transform(tag) + if tag.check_handle(): + raise ParserError( + 'while parsing a node', + start_mark, + f'found undefined tag handle {tag.handle!r}', + tag_mark, + ) if start_mark is None: start_mark = end_mark = self.scanner.peek_token().start_mark event = None - implicit = tag is None or tag == '!' + implicit = tag is None or str(tag) == '!' if indentless_sequence and self.scanner.check_token(BlockEntryToken): comment = None pt = self.scanner.peek_token() @@ -399,7 +417,7 @@ def parse_node(self, block: bool = False, indentless_sequence: bool = False) -> comment = pt.comment end_mark = self.scanner.peek_token().end_mark event = SequenceStartEvent( - anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment + anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment, ) self.state = self.parse_indentless_sequence_entry return event @@ -408,17 +426,17 @@ def parse_node(self, block: bool = False, indentless_sequence: bool = False) -> token = self.scanner.get_token() # self.scanner.peek_token_same_line_comment(token) end_mark = token.end_mark - if (token.plain and tag is None) or tag == '!': - implicit = (True, False) + if (token.plain and tag is None) or str(tag) == '!': + dimplicit = (True, False) elif tag is None: - implicit = (False, True) + dimplicit = (False, True) else: - implicit = (False, False) + dimplicit = (False, False) # nprint('se', token.value, token.comment) event = ScalarEvent( anchor, tag, - implicit, + dimplicit, token.value, start_mark, end_mark, @@ -775,24 +793,10 @@ def move_token_comment( class RoundTripParser(Parser): """roundtrip is a safe loader, that wants to see the unmangled tag""" - def transform_tag(self, handle: Any, suffix: Any) -> Any: - # return self.tag_handles[handle]+suffix - if handle == '!!' and suffix in ( - 'null', - 'bool', - 'int', - 'float', - 'binary', - 'timestamp', - 'omap', - 'pairs', - 'set', - 'str', - 'seq', - 'map', - ): - return Parser.transform_tag(self, handle, suffix) - return handle + suffix + def select_tag_transform(self, tag: Tag) -> None: + if tag is None: + return + tag.select_transform(True) def move_token_comment( self, token: Any, nt: Optional[Any] = None, empty: Optional[bool] = False diff --git a/representer.py b/representer.py index 8a03234..9d122bc 100644 --- a/representer.py +++ b/representer.py @@ -148,6 +148,8 @@ def represent_scalar( comment = getattr(value, 'comment', None) if comment: comment = [None, [comment]] + if isinstance(tag, str): + tag = Tag(suffix=tag) node = ScalarNode(tag, value, style=style, comment=comment, anchor=anchor) if self.alias_key is not None: self.represented_objects[self.alias_key] = node @@ -157,6 +159,8 @@ def represent_sequence( self, tag: Any, sequence: Any, flow_style: Any = None ) -> SequenceNode: value: List[Any] = [] + if isinstance(tag, str): + tag = Tag(suffix=tag) node = SequenceNode(tag, value, flow_style=flow_style) if self.alias_key is not None: self.represented_objects[self.alias_key] = node @@ -175,6 +179,8 @@ def represent_sequence( def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> SequenceNode: value: List[Any] = [] + if isinstance(tag, str): + tag = Tag(suffix=tag) node = SequenceNode(tag, value, flow_style=flow_style) if self.alias_key is not None: self.represented_objects[self.alias_key] = node @@ -195,6 +201,8 @@ def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> Sequenc def represent_mapping(self, tag: Any, mapping: Any, flow_style: Any = None) -> MappingNode: value: List[Any] = [] + if isinstance(tag, str): + tag = Tag(suffix=tag) node = MappingNode(tag, value, flow_style=flow_style) if self.alias_key is not None: self.represented_objects[self.alias_key] = node @@ -709,6 +717,8 @@ def represent_sequence( anchor = sequence.yaml_anchor() except AttributeError: anchor = None + if isinstance(tag, str): + tag = Tag(suffix=tag) node = SequenceNode(tag, value, flow_style=flow_style, anchor=anchor) if self.alias_key is not None: self.represented_objects[self.alias_key] = node @@ -784,6 +794,8 @@ def represent_mapping(self, tag: Any, mapping: Any, flow_style: Any = None) -> M anchor = mapping.yaml_anchor() except AttributeError: anchor = None + if isinstance(tag, str): + tag = Tag(suffix=tag) node = MappingNode(tag, value, flow_style=flow_style, anchor=anchor) if self.alias_key is not None: self.represented_objects[self.alias_key] = node @@ -858,7 +870,9 @@ def represent_mapping(self, tag: Any, mapping: Any, flow_style: Any = None) -> M else: arg = self.represent_data(merge_list) arg.flow_style = True - value.insert(merge_pos, (ScalarNode('tag:yaml.org,2002:merge', '<<'), arg)) + value.insert( + merge_pos, (ScalarNode(Tag(suffix='tag:yaml.org,2002:merge'), '<<'), arg) + ) return node def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> SequenceNode: @@ -871,6 +885,8 @@ def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> Sequenc anchor = omap.yaml_anchor() except AttributeError: anchor = None + if isinstance(tag, str): + tag = Tag(suffix=tag) node = SequenceNode(tag, value, flow_style=flow_style, anchor=anchor) if self.alias_key is not None: self.represented_objects[self.alias_key] = node @@ -926,7 +942,7 @@ def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> Sequenc def represent_set(self, setting: Any) -> MappingNode: flow_style = False - tag = 'tag:yaml.org,2002:set' + tag = Tag(suffix='tag:yaml.org,2002:set') # return self.represent_mapping(tag, value) value: List[Any] = [] flow_style = setting.fa.flow_style(flow_style) @@ -979,30 +995,32 @@ def represent_set(self, setting: Any) -> MappingNode: def represent_dict(self, data: Any) -> MappingNode: """write out tag if saved on loading""" try: - t = data.tag.value + _ = data.tag except AttributeError: - t = None - if t: - if t.startswith('!!'): - tag = 'tag:yaml.org,2002:' + t[2:] - else: - tag = t + tag = Tag(suffix='tag:yaml.org,2002:map') else: - tag = 'tag:yaml.org,2002:map' + if data.tag.trval: + if data.tag.startswith('!!'): + tag = Tag(suffix='tag:yaml.org,2002:' + data.tag.trval[2:]) + else: + tag = data.tag + else: + tag = Tag(suffix='tag:yaml.org,2002:map') return self.represent_mapping(tag, data) def represent_list(self, data: Any) -> SequenceNode: try: - t = data.tag.value + _ = data.tag except AttributeError: - t = None - if t: - if t.startswith('!!'): - tag = 'tag:yaml.org,2002:' + t[2:] - else: - tag = t + tag = Tag(suffix='tag:yaml.org,2002:seq') else: - tag = 'tag:yaml.org,2002:seq' + if data.tag.trval: + if data.tag.startswith('!!'): + tag = Tag(suffix='tag:yaml.org,2002:' + data.tag.trval[2:]) + else: + tag = data.tag + else: + tag = Tag(suffix='tag:yaml.org,2002:seq') return self.represent_sequence(tag, data) def represent_datetime(self, data: Any) -> ScalarNode: @@ -1019,7 +1037,10 @@ def represent_datetime(self, data: Any) -> ScalarNode: def represent_tagged_scalar(self, data: Any) -> ScalarNode: try: - tag = data.tag.value + if data.tag.handle == '!!': + tag = f'{data.tag.handle} {data.tag.suffix}' + else: + tag = data.tag except AttributeError: tag = None try: diff --git a/resolver.py b/resolver.py index e7ed6d9..b97c8b6 100644 --- a/resolver.py +++ b/resolver.py @@ -5,6 +5,7 @@ from typing import Any, Dict, List, Union, Text, Optional # NOQA from ruamel.yaml.compat import VersionType # NOQA +from ruamel.yaml.tag import Tag from ruamel.yaml.compat import _DEFAULT_YAML_VERSION # NOQA from ruamel.yaml.error import * # NOQA from ruamel.yaml.nodes import MappingNode, ScalarNode, SequenceNode # NOQA @@ -102,9 +103,9 @@ class ResolverError(YAMLError): class BaseResolver: - DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str' - DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq' - DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map' + DEFAULT_SCALAR_TAG = Tag(suffix='tag:yaml.org,2002:str') + DEFAULT_SEQUENCE_TAG = Tag(suffix='tag:yaml.org,2002:seq') + DEFAULT_MAPPING_TAG = Tag(suffix='tag:yaml.org,2002:map') yaml_implicit_resolvers: Dict[Any, Any] = {} yaml_path_resolvers: Dict[Any, Any] = {} @@ -268,14 +269,14 @@ def resolve(self, kind: Any, value: Any, implicit: Any) -> Any: resolvers += self.yaml_implicit_resolvers.get(None, []) for tag, regexp in resolvers: if regexp.match(value): - return tag + return Tag(suffix=tag) implicit = implicit[1] if bool(self.yaml_path_resolvers): exact_paths = self.resolver_exact_paths[-1] if kind in exact_paths: - return exact_paths[kind] + return Tag(suffix=exact_paths[kind]) if None in exact_paths: - return exact_paths[None] + return Tag(suffix=exact_paths[None]) if kind is ScalarNode: return self.DEFAULT_SCALAR_TAG elif kind is SequenceNode: @@ -354,14 +355,14 @@ def resolve(self, kind: Any, value: Any, implicit: Any) -> Any: resolvers += self.versioned_resolver.get(None, []) for tag, regexp in resolvers: if regexp.match(value): - return tag + return Tag(suffix=tag) implicit = implicit[1] if bool(self.yaml_path_resolvers): exact_paths = self.resolver_exact_paths[-1] if kind in exact_paths: - return exact_paths[kind] + return Tag(suffix=exact_paths[kind]) if None in exact_paths: - return exact_paths[None] + return Tag(suffix=exact_paths[None]) if kind is ScalarNode: return self.DEFAULT_SCALAR_TAG elif kind is SequenceNode: diff --git a/scanner.py b/scanner.py index c09ae9c..d746380 100644 --- a/scanner.py +++ b/scanner.py @@ -1291,16 +1291,25 @@ def scan_block_scalar_indentation(self) -> Any: srp = self.reader.peek srf = self.reader.forward chunks = [] + first_indent = -1 max_indent = 0 end_mark = self.reader.get_mark() while srp() in ' \r\n\x85\u2028\u2029': if srp() != ' ': + if first_indent < 0: + first_indent = self.reader.column chunks.append(self.scan_line_break()) end_mark = self.reader.get_mark() else: srf() if self.reader.column > max_indent: max_indent = self.reader.column + if first_indent > 0 and max_indent > first_indent: + start_mark = self.reader.get_mark() + raise ScannerError( + 'more indented follow up line than first in a block scalar', + start_mark, + ) return chunks, max_indent, end_mark def scan_block_scalar_breaks(self, indent: int) -> Any: @@ -1493,7 +1502,9 @@ def scan_plain(self) -> Any: break while True: ch = srp(length) - if ch == ':' and srp(length + 1) not in _THE_END_SPACE_TAB: + if ch == ':' and srp(length + 1) == ',': + break + elif ch == ':' and srp(length + 1) not in _THE_END_SPACE_TAB: pass elif ch == '?' and self.scanner_processing_version != (1, 1): pass @@ -1918,6 +1929,37 @@ def scan_line_break(self, empty_line: bool = False) -> Text: def scan_block_scalar(self, style: Any, rt: Optional[bool] = True) -> Any: return Scanner.scan_block_scalar(self, style, rt=rt) + def scan_uri_escapes(self, name: Any, start_mark: Any) -> Any: + """ + The roundtripscanner doesn't do URI escaping + """ + # See the specification for details. + srp = self.reader.peek + srf = self.reader.forward + code_bytes: List[Any] = [] + chunk = '' + mark = self.reader.get_mark() + while srp() == '%': + chunk += '%' + srf() + for k in range(2): + if srp(k) not in '0123456789ABCDEFabcdef': + raise ScannerError( + f'while scanning an {name!s}', + start_mark, + f'expected URI escape sequence of 2 hexdecimal numbers, ' + f'but found {srp(k)!r}', + self.reader.get_mark(), + ) + code_bytes.append(int(self.reader.prefix(2), 16)) + chunk += self.reader.prefix(2) + srf(2) + try: + _ = bytes(code_bytes).decode('utf-8') + except UnicodeDecodeError as exc: + raise ScannerError(f'while scanning an {name!s}', start_mark, str(exc), mark) + return chunk + # commenthandling 2021, differentiatiation not needed diff --git a/serializer.py b/serializer.py index 0034240..e36b3b5 100644 --- a/serializer.py +++ b/serializer.py @@ -158,14 +158,14 @@ def serialize_node(self, node: Any, parent: Any, index: Any) -> None: detected_tag = self.resolver.resolve(ScalarNode, node.value, (True, False)) default_tag = self.resolver.resolve(ScalarNode, node.value, (False, True)) implicit = ( - (node.tag == detected_tag), - (node.tag == default_tag), - node.tag.startswith('tag:yaml.org,2002:'), + (node.ctag == detected_tag), + (node.ctag == default_tag), + node.tag.startswith('tag:yaml.org,2002:'), # type: ignore ) self.emitter.emit( ScalarEvent( alias, - node.tag, + node.ctag, implicit, node.value, style=node.style, @@ -173,7 +173,7 @@ def serialize_node(self, node: Any, parent: Any, index: Any) -> None: ) ) elif isinstance(node, SequenceNode): - implicit = node.tag == self.resolver.resolve(SequenceNode, node.value, True) + implicit = node.ctag == self.resolver.resolve(SequenceNode, node.value, True) comment = node.comment end_comment = None seq_comment = None @@ -188,7 +188,7 @@ def serialize_node(self, node: Any, parent: Any, index: Any) -> None: self.emitter.emit( SequenceStartEvent( alias, - node.tag, + node.ctag, implicit, flow_style=node.flow_style, comment=node.comment, @@ -200,7 +200,7 @@ def serialize_node(self, node: Any, parent: Any, index: Any) -> None: index += 1 self.emitter.emit(SequenceEndEvent(comment=[seq_comment, end_comment])) elif isinstance(node, MappingNode): - implicit = node.tag == self.resolver.resolve(MappingNode, node.value, True) + implicit = node.ctag == self.resolver.resolve(MappingNode, node.value, True) comment = node.comment end_comment = None map_comment = None @@ -213,7 +213,7 @@ def serialize_node(self, node: Any, parent: Any, index: Any) -> None: self.emitter.emit( MappingStartEvent( alias, - node.tag, + node.ctag, implicit, flow_style=node.flow_style, comment=node.comment, diff --git a/tag.py b/tag.py new file mode 100644 index 0000000..e5539bb --- /dev/null +++ b/tag.py @@ -0,0 +1,122 @@ +# coding: utf-8 + +""" +In round-trip mode the original tag needs to be preserved, but the tag +transformed based on the directives needs to be available as well. + +A Tag that is created during loading has a handle and a suffix. +Not all objects loaded currently have a Tag, that .tag attribute can be None +A Tag that is created for dumping only (on an object loaded without a tag) has a suffix +only. +""" + +from typing import Any, Dict, Optional, List, Union, Optional, Iterator # NOQA + +tag_attrib = '_yaml_tag' + + +class Tag: + """store original tag information for roundtripping""" + + attrib = tag_attrib + + def __init__(self, handle: Any = None, suffix: Any = None, handles: Any = None) -> None: + self.handle = handle + self.suffix = suffix + self.handles = handles + self._transform_type: Optional[bool] = None + + def __repr__(self) -> str: + return f'{self.__class__.__name__}({self.trval!r})' + + def __str__(self) -> str: + return f'{self.trval}' + + def __hash__(self) -> int: + try: + return self._hash_id # type: ignore + except AttributeError: + self._hash_id = res = hash((self.handle, self.suffix)) + return res + + def __eq__(self, other: Any) -> bool: + # other should not be a string, but the serializer sometimes provides these + if isinstance(other, str): + return self.trval == other + return bool(self.trval == other.trval) + + def startswith(self, x: str) -> bool: + if self.trval is not None: + return self.trval.startswith(x) + return False + + @property + def trval(self) -> Optional[str]: + try: + return self._trval + except AttributeError: + pass + if self.handle is None: + self._trval: Optional[str] = self.uri_decoded_suffix + return self._trval + assert self._transform_type is not None + if not self._transform_type: + # the non-round-trip case + self._trval = self.handles[self.handle] + self.uri_decoded_suffix + return self._trval + # round-trip case + if self.handle == '!!' and self.suffix in ( + 'null', + 'bool', + 'int', + 'float', + 'binary', + 'timestamp', + 'omap', + 'pairs', + 'set', + 'str', + 'seq', + 'map', + ): + self._trval = self.handles[self.handle] + self.uri_decoded_suffix + else: + # self._trval = self.handle + self.suffix + self._trval = self.handles[self.handle] + self.uri_decoded_suffix + return self._trval + + @property + def uri_decoded_suffix(self) -> Optional[str]: + try: + return self._uri_decoded_suffix + except AttributeError: + pass + if self.suffix is None: + self._uri_decoded_suffix: Optional[str] = None + return None + res = '' + # don't have to check for scanner errors here + idx = 0 + while idx < len(self.suffix): + ch = self.suffix[idx] + idx += 1 + if ch != '%': + res += ch + else: + res += chr(int(self.suffix[idx : idx + 2], 16)) + idx += 2 + self._uri_decoded_suffix = res + return res + + def select_transform(self, val: bool) -> None: + """ + val: False -> non-round-trip + True -> round-trip + """ + assert self._transform_type is None + self._transform_type = val + + def check_handle(self) -> bool: + if self.handle is None: + return False + return self.handle not in self.handles diff --git a/timestamp.py b/timestamp.py index 4ab695f..753dfc1 100644 --- a/timestamp.py +++ b/timestamp.py @@ -5,6 +5,8 @@ # ToDo: at least on PY3 you could probably attach the tzinfo correctly to the object # a more complete datetime might be used by safe loading as well +# +# add type information (iso8601, spaced) from typing import Any, Dict, Optional, List # NOQA