diff --git a/.hgtags b/.hgtags
index ffa3974..e07fb43 100644
--- a/.hgtags
+++ b/.hgtags
@@ -211,3 +211,8 @@ c887dfe19e2f01b770a2b4b04163a01e00a33f85 0.17.21
b4c00a90b4d003ff3f239df622fb638cd33146dd 0.17.23
4309006902d2453399588f4ddccfb3fc460e1eba 0.17.24
12a642699fa84085248317ee765c4956f6deeec7 0.17.25
+8a26dc2a156aa189c472b5efeb10e8c3de206091 0.17.26
+f76dde33e9a175e7505a2933a5c2423d4e3db9aa 0.17.27
+d522a02977979e5feef1d0f1b94b6b7f823c0bdd 0.17.28
+41fd3925691106c999959771e54bd69cce70d1c8 0.17.29
+0ed43732b9e309d397e9c9cfa74f115f40f51a6b 0.17.30
diff --git a/CHANGES b/CHANGES
index d8e0085..03e678a 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,31 @@
+[0, 17, 31]: 2023-05-31
+ - added tag.setter on `ScalarEvent` and on `Node`, that takes either
+ a `Tag` instance, or a str
+ (reported by `Sorin Sbarnea `__)
+
+[0, 17, 30]: 2023-05-30
+ - fix issue 467, caused by Tag instances not being hashable (reported by
+ `Douglas Raillard
+ `__)
+
+[0, 17, 29]: 2023-05-30
+ - changed the internals of the tag property from a string to a class which allows
+ for preservation of the original handle and suffix. This should
+ result in better results using documents with %TAG directives, as well
+ as preserving URI escapes in tag suffixes.
+
+[0, 17, 28]: 2023-05-26
+ - fix for issue 464: documents ending with document end marker without final newline
+ fail to load (reported by `Mariusz Rusiniak `__)
+
+[0, 17, 27]: 2023-05-25
+ - fix issue with inline mappings as value for merge keys
+ (reported by Sirish on `StackOverflow `__)
+ - fix for 468, error inserting after accessing merge attribute on ``CommentedMap``
+ (reported by `Bastien gerard `__)
+ - fix for issue 461 pop + insert on same `CommentedMap` key throwing error
+ (reported by `John Thorvald Wodder II `__)
+
[0, 17, 26]: 2023-05-09
- Fix for error on edge cage for issue 459
diff --git a/README.rst b/README.rst
index 2cb1c12..4ef687c 100644
--- a/README.rst
+++ b/README.rst
@@ -4,8 +4,8 @@ ruamel.yaml
``ruamel.yaml`` is a YAML 1.2 loader/dumper package for Python.
-:version: 0.17.26
-:updated: 2023-05-09
+:version: 0.17.31
+:updated: 2023-05-31
:documentation: http://yaml.readthedocs.io
:repository: https://sourceforge.net/projects/ruamel-yaml/
:pypi: https://pypi.org/project/ruamel.yaml/
@@ -61,8 +61,36 @@ ChangeLog
.. should insert NEXT: at the beginning of line for next key (with empty line)
+0.17.31 (2023-05-31):
+ - added tag.setter on `ScalarEvent` and on `Node`, that takes either
+ a `Tag` instance, or a str
+ (reported by `Sorin Sbarnea `__)
+
+0.17.30 (2023-05-30):
+ - fix issue 467, caused by Tag instances not being hashable (reported by
+ `Douglas Raillard
+ `__)
+
+0.17.29 (2023-05-30):
+ - changed the internals of the tag property from a string to a class which allows
+ for preservation of the original handle and suffix. This should
+ result in better results using documents with %TAG directives, as well
+ as preserving URI escapes in tag suffixes.
+
+0.17.28 (2023-05-26):
+ - fix for issue 464: documents ending with document end marker without final newline
+ fail to load (reported by `Mariusz Rusiniak `__)
+
+0.17.27 (2023-05-25):
+ - fix issue with inline mappings as value for merge keys
+ (reported by Sirish on `StackOverflow `__)
+ - fix for 468, error inserting after accessing merge attribute on ``CommentedMap``
+ (reported by `Bastien gerard `__)
+ - fix for issue 461 pop + insert on same `CommentedMap` key throwing error
+ (reported by `John Thorvald Wodder II `__)
+
0.17.26 (2023-05-09):
- - Fix for error on edge cage for issue 459
+ - fix for error on edge cage for issue 459
0.17.25 (2023-05-09):
- fix for regression while dumping wrapped strings with too many backslashes removed
@@ -158,7 +186,7 @@ ChangeLog
attrs with `@attr.s()` (both reported by `ssph `__)
0.17.11 (2021-08-19):
- - fix error baseclass for ``DuplicateKeyErorr`` (reported by `Łukasz Rogalski
+ - fix error baseclass for ``DuplicateKeyError`` (reported by `Łukasz Rogalski
`__)
- fix typo in reader error message, causing `KeyError` during reader error
(reported by `MTU `__)
diff --git a/__init__.py b/__init__.py
index 210337f..9a71f6e 100644
--- a/__init__.py
+++ b/__init__.py
@@ -5,9 +5,9 @@
_package_data = dict(
full_package_name='ruamel.yaml',
- version_info=(0, 17, 26),
- __version__='0.17.26',
- version_timestamp='2023-05-09 21:59:45',
+ version_info=(0, 17, 31),
+ __version__='0.17.31',
+ version_timestamp='2023-05-31 07:56:46',
author='Anthon van der Neut',
author_email='a.van.der.neut@ruamel.eu',
description='ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order', # NOQA
diff --git a/_doc/_static/pypi.svg b/_doc/_static/pypi.svg
index 0790f1e..f9fb467 100644
--- a/_doc/_static/pypi.svg
+++ b/_doc/_static/pypi.svg
@@ -1 +1 @@
-
+
diff --git a/_test/test_api_change.py b/_test/test_api_change.py
index 8961273..84e7828 100644
--- a/_test/test_api_change.py
+++ b/_test/test_api_change.py
@@ -209,6 +209,7 @@ def test_read_unicode(self, tmpdir: Any) -> None:
with open(file_name, 'wb') as fp:
fp.write('text: HELLO_WORLD©\n'.encode('utf-8'))
text_dict = yaml.load(open(file_name, 'r'))
+ print(text_dict)
assert text_dict['text'] == 'HELLO_WORLD©'
diff --git a/_test/test_cyaml.py b/_test/test_cyaml.py
index 056093b..e1280f3 100644
--- a/_test/test_cyaml.py
+++ b/_test/test_cyaml.py
@@ -5,7 +5,7 @@
import pytest # type: ignore # NOQA
from textwrap import dedent
-NO_CLIB_VER = (3, 10)
+NO_CLIB_VER = (3, 12)
@pytest.mark.skipif( # type: ignore
diff --git a/_test/test_documents.py b/_test/test_documents.py
index 43bd8f4..b5817f9 100644
--- a/_test/test_documents.py
+++ b/_test/test_documents.py
@@ -60,6 +60,36 @@ def test_multi_doc_ends_only(self) -> None:
docs = list(round_trip_load_all(inp, version=(1, 2)))
assert docs == [['a'], ['b']]
+ def test_single_scalar_comment(self) -> None:
+ from ruamel import yaml
+
+ inp = """\
+ one # comment
+ two
+ """
+ with pytest.raises(yaml.parser.ParserError):
+ d = list(round_trip_load_all(inp, version=(1, 2))) # NOQA
+
+ def test_scalar_after_seq_document(self) -> None:
+ from ruamel import yaml
+
+ inp = """\
+ [ 42 ]
+ hello
+ """
+ with pytest.raises(yaml.parser.ParserError):
+ d = list(round_trip_load_all(inp, version=(1, 2))) # NOQA
+
+ def test_yunk_after_explicit_document_end(self) -> None:
+ from ruamel import yaml
+
+ inp = """\
+ hello: world
+ ... this is no comment
+ """
+ with pytest.raises(yaml.parser.ParserError):
+ d = list(round_trip_load_all(inp, version=(1, 2))) # NOQA
+
def test_multi_doc_ends_only_1_1(self) -> None:
from ruamel import yaml
diff --git a/_test/test_issues.py b/_test/test_issues.py
index f0a7ede..fd7160b 100644
--- a/_test/test_issues.py
+++ b/_test/test_issues.py
@@ -1088,6 +1088,62 @@ def test_issue_459(self) -> None:
data = yaml.load(out_stream.getvalue())
assert data[0]['data'] == MYOBJ['data']
+ def test_issue_461(self) -> None:
+ from ruamel.yaml import YAML
+
+ yaml = YAML()
+
+ inp = dedent(
+ """
+ first name: Roy
+ last name: Rogers
+ city: somewhere
+ """
+ )
+ yaml = YAML()
+ data = yaml.load(inp)
+ data.pop('last name')
+ assert data.pop('not there', 'xxx') == 'xxx'
+ data.insert(1, 'last name', 'Beaty', comment='he has seen things')
+
+ def test_issue_463(self) -> None:
+ import sys
+ from ruamel.yaml.compat import StringIO
+ from ruamel.yaml import YAML
+
+ yaml = YAML()
+
+ inp = dedent(
+ """
+ first_name: Art
+ """
+ )
+ data = yaml.load(inp)
+ _ = data.merge
+ data.insert(0, 'some_key', 'test')
+ yaml.dump(data, sys.stdout)
+ buf = StringIO()
+ yaml.dump(data, buf)
+ exp = dedent(
+ """
+ some_key: test
+ first_name: Art
+ """
+ )
+ assert buf.getvalue() == exp
+
+ def test_issue_464(self) -> None:
+ # document end marker without newline threw error in 0.17.27
+ from ruamel.yaml import YAML
+
+ yaml = YAML()
+ yaml.load('---\na: True\n...')
+
+ def test_issue_467(self) -> None:
+ import ruamel.yaml
+
+ yaml = ruamel.yaml.YAML()
+ yaml.constructor.add_constructor(yaml.resolver.DEFAULT_MAPPING_TAG, lambda x, y: None)
# @pytest.mark.xfail(strict=True, reason='bla bla', raises=AssertionError)
# def test_issue_ xxx(self) -> None:
diff --git a/_test/test_literal.py b/_test/test_literal.py
index f5c42e4..fcc949c 100644
--- a/_test/test_literal.py
+++ b/_test/test_literal.py
@@ -333,3 +333,29 @@ def test_rt_non_root_literal_scalar(self) -> None:
ys = ys.format(s)
d = yaml.load(ys)
yaml.dump(d, compare=ys)
+
+ def test_regular_spaces(self) -> None:
+ import ruamel.yaml
+
+ yaml = ruamel.yaml.YAML()
+ ys = "key: |\n\n\n content\n"
+ d = yaml.load(ys)
+ assert d['key'] == '\n\ncontent\n'
+
+ def test_irregular_spaces_content(self) -> None:
+ import ruamel.yaml
+
+ yaml = ruamel.yaml.YAML()
+ ys = "key: |\n \n \n irregular content\n"
+ with pytest.raises(ruamel.yaml.scanner.ScannerError):
+ d = yaml.load(ys)
+ print(d)
+
+ def test_irregular_spaces_comment(self) -> None:
+ import ruamel.yaml
+
+ yaml = ruamel.yaml.YAML()
+ ys = "key: |\n \n \n # comment\n"
+ with pytest.raises(ruamel.yaml.scanner.ScannerError):
+ d = yaml.load(ys)
+ print(d)
diff --git a/_test/test_tag.py b/_test/test_tag.py
index 5f388e5..4f0587f 100644
--- a/_test/test_tag.py
+++ b/_test/test_tag.py
@@ -26,117 +26,165 @@ def yaml_load(cls, constructor: Any, node: Any) -> Any:
class TestIndentFailures:
def test_tag(self) -> None:
- round_trip("""\
+ round_trip(
+ """\
!!python/object:__main__.Developer
name: Anthon
location: Germany
language: python
- """)
+ """
+ )
def test_full_tag(self) -> None:
- round_trip("""\
+ round_trip(
+ """\
!!tag:yaml.org,2002:python/object:__main__.Developer
name: Anthon
location: Germany
language: python
- """)
+ """
+ )
def test_standard_tag(self) -> None:
- round_trip("""\
+ round_trip(
+ """\
!!tag:yaml.org,2002:python/object:map
name: Anthon
location: Germany
language: python
- """)
+ """
+ )
def test_Y1(self) -> None:
- round_trip("""\
+ round_trip(
+ """\
!yyy
name: Anthon
location: Germany
language: python
- """)
+ """
+ )
def test_Y2(self) -> None:
- round_trip("""\
+ round_trip(
+ """\
!!yyy
name: Anthon
location: Germany
language: python
- """)
+ """
+ )
- @pytest.mark.xfail(strict=True) # type: ignore
+ # @pytest.mark.xfail(strict=True) # type: ignore
def test_spec_6_26_tag_shorthands(self) -> None:
- round_trip("""\
+ from ruamel.yaml import YAML
+ from io import StringIO
+ from textwrap import dedent
+
+ inp = dedent(
+ """\
%TAG !e! tag:example.com,2000:app/
---
- !local foo
- !!str bar
- !e!tag%21 baz
- """)
+ """
+ )
+ yaml = YAML()
+ data = yaml.load(inp)
+ buf = StringIO()
+ yaml.dump(data, buf)
+ print('buf:\n', buf.getvalue(), sep='')
+ assert buf.getvalue() == inp
+
+
+class TestTagGeneral:
+ def test_unknow_handle(self) -> None:
+ from ruamel.yaml.parser import ParserError
+
+ with pytest.raises(ParserError):
+ round_trip(
+ """\
+ %TAG !x! tag:example.com,2000:app/
+ ---
+ - !y!tag%21 baz
+ """
+ )
class TestRoundTripCustom:
def test_X1(self) -> None:
register_xxx()
- round_trip("""\
+ round_trip(
+ """\
!xxx
name: Anthon
location: Germany
language: python
- """)
+ """
+ )
@pytest.mark.xfail(strict=True) # type: ignore
def test_X_pre_tag_comment(self) -> None:
register_xxx()
- round_trip("""\
+ round_trip(
+ """\
-
# hello
!xxx
name: Anthon
location: Germany
language: python
- """)
+ """
+ )
@pytest.mark.xfail(strict=True) # type: ignore
def test_X_post_tag_comment(self) -> None:
register_xxx()
- round_trip("""\
+ round_trip(
+ """\
- !xxx
# hello
name: Anthon
location: Germany
language: python
- """)
+ """
+ )
def test_scalar_00(self) -> None:
# https://stackoverflow.com/a/45967047/1307905
- round_trip("""\
+ round_trip(
+ """\
Outputs:
Vpc:
Value: !Ref: vpc # first tag
Export:
Name: !Sub "${AWS::StackName}-Vpc" # second tag
- """)
+ """
+ )
class TestIssue201:
def test_encoded_unicode_tag(self) -> None:
- round_trip_load("""
+ round_trip_load(
+ """
s: !!python/%75nicode 'abc'
- """)
+ """
+ )
class TestImplicitTaggedNodes:
def test_scalar(self) -> None:
- data = round_trip("""\
+ data = round_trip(
+ """\
- !SString abcdefg
- !SFloat 1.0
- !SInt 1961
- !SBool true
- !SLit |
glitter in the dark near the Tanhäuser gate
- """)
+ """
+ )
# tagged scalers have string or string types as value
assert data[0].count('d') == 1
assert data[1].count('1') == 1
@@ -145,21 +193,27 @@ def test_scalar(self) -> None:
assert data[4].count('a') == 4
def test_mapping(self) -> None:
- round_trip("""\
+ round_trip(
+ """\
- !Mapping {a: 1, b: 2}
- """)
+ """
+ )
def test_sequence(self) -> None:
yaml = YAML()
yaml.brace_single_entry_mapping_in_flow_sequence = True
yaml.mapping_value_align = True
- yaml.round_trip("""
+ yaml.round_trip(
+ """
- !Sequence [a, {b: 1}, {c: {d: 3}}]
- """)
+ """
+ )
def test_sequence2(self) -> None:
yaml = YAML()
yaml.mapping_value_align = True
- yaml.round_trip("""
+ yaml.round_trip(
+ """
- !Sequence [a, b: 1, c: {d: 3}]
- """)
+ """
+ )
diff --git a/_test/test_z_data.py b/_test/test_z_data.py
index f2e72f1..b78f732 100644
--- a/_test/test_z_data.py
+++ b/_test/test_z_data.py
@@ -23,6 +23,7 @@ def __init__(self, s: Any) -> None:
'TAB': '\t',
'---': '---',
'...': '...',
+ 'NL': '\n',
}
# fmt: on
@@ -78,6 +79,22 @@ def value(self) -> Any:
return self._pa
+class Events(YAMLData):
+ yaml_tag = '!Events'
+
+
+class JSONData(YAMLData):
+ yaml_tag = '!JSON'
+
+
+class Dump(YAMLData):
+ yaml_tag = '!Dump'
+
+
+class Emit(YAMLData):
+ yaml_tag = '!Emit'
+
+
def pytest_generate_tests(metafunc: Any) -> None:
test_yaml = []
paths = sorted(base_path.glob('**/*.yaml'))
@@ -100,13 +117,16 @@ def pytest_generate_tests(metafunc: Any) -> None:
class TestYAMLData:
- def yaml(self, yaml_version: Optional[Any] = None) -> Any:
+ def yaml(
+ self, yaml_version: Optional[Any] = None, typ: Any = 'rt', pure: Any = None
+ ) -> Any:
from ruamel.yaml import YAML
- y = YAML()
+ y = YAML(typ=typ, pure=pure)
y.preserve_quotes = True
if yaml_version:
y.version = yaml_version
+ y.composer.warn_double_anchors = False
return y
def docs(self, path: Path) -> List[Any]:
@@ -117,6 +137,10 @@ def docs(self, path: Path) -> List[Any]:
tyaml.register_class(Python)
tyaml.register_class(Output)
tyaml.register_class(Assert)
+ tyaml.register_class(Events)
+ tyaml.register_class(JSONData)
+ tyaml.register_class(Dump)
+ tyaml.register_class(Emit)
return list(tyaml.load_all(path))
def yaml_load(self, value: Any, yaml_version: Optional[Any] = None) -> Tuple[Any, Any]:
@@ -137,6 +161,74 @@ def round_trip(
print('>>>> rt output\n', value.replace(' ', '\u2423'), sep='') # 2423 open box
assert value == expected
+ def gen_events(
+ self, input: Any, output: Any, yaml_version: Optional[Any] = None
+ ) -> None:
+ from ruamel.yaml.compat import StringIO
+
+ buf = StringIO()
+ yaml = self.yaml(yaml_version=yaml_version)
+ indent = 0
+ try:
+ for event in yaml.parse(input.value):
+ compact = event.compact_repr()
+ assert compact[0] in '+=-'
+ if compact[0] == '-':
+ indent -= 1
+ print(f'{" "*indent}{compact}', file=buf)
+ if compact[0] == '+':
+ indent += 1
+
+ except Exception as e: # NOQA
+ print('=EXCEPTION', file=buf) # exceptions not indented
+ if '=EXCEPTION' not in output.value:
+ raise
+ print('>>>> buf\n', buf.getvalue(), sep='')
+ assert buf.getvalue() == output.value
+
+ def load_compare_json(
+ self, input: Any, output: Any, yaml_version: Optional[Any] = None
+ ) -> None:
+ import json
+ from ruamel.yaml.compat import StringIO
+ from ruamel.yaml.comments import CommentedMap, TaggedScalar
+
+ def serialize_obj(obj: Any) -> Any:
+ if isinstance(obj, CommentedMap):
+ return {k: v for k, v in obj.items()}
+ elif isinstance(obj, TaggedScalar):
+ return str(obj.value)
+ elif isinstance(obj, set):
+ return {k: None for k in obj}
+ return str(obj)
+
+ buf = StringIO()
+ yaml = self.yaml(typ='rt', yaml_version=yaml_version)
+ for data in yaml.load_all(input.value):
+ if isinstance(data, dict):
+ data = {str(k): v for k, v in data.items()}
+ json.dump(data, buf, sort_keys=True, indent=2, default=serialize_obj)
+ buf.write('\n')
+ print('>>>> buf\n', buf.getvalue(), sep='')
+ # jsons = json.dumps(json.loads(output.value)) # normalize formatting of JSON
+ assert buf.getvalue() == output.value
+
+ def load_compare_emit(
+ self, input: Any, output: Any, yaml_version: Optional[Any] = None
+ ) -> None:
+ from ruamel.yaml.compat import StringIO
+
+ buf = StringIO()
+ yaml = self.yaml(yaml_version=yaml_version)
+ yaml.preserve_quotes = True
+ data = input.value
+ if data.startswith('---') or '\n--- ' in data or '\n---' in data:
+ yaml.explicit_start = True
+ data = list(yaml.load_all(data))
+ yaml.dump_all(data, buf)
+ print('>>>> buf\n', buf.getvalue(), sep='')
+ assert buf.getvalue() == output.value
+
def load_assert(
self, input: Any, confirm: Any, yaml_version: Optional[Any] = None
) -> None:
@@ -190,58 +282,95 @@ def test_yaml_data(self, yaml: Any, tmpdir: Any) -> None:
from collections.abc import Mapping
idx = 0
- typ = None
+ typs = [] # list of test to be performed
yaml_version = None
docs = self.docs(yaml)
if isinstance(docs[0], Mapping):
d = docs[0]
+ if d.get('skip'):
+ pytest.skip('explicit skip')
+ if '1.3-mod' in d.get('tags', []):
+ pytest.skip('YAML 1.3')
typ = d.get('type')
+ if isinstance(typ, str):
+ typs.append(typ)
+ elif isinstance(typ, list):
+ typs.extend(typ[:])
+ del typ
yaml_version = d.get('yaml_version')
if 'python' in d:
if not check_python_version(d['python']):
pytest.skip('unsupported version')
idx += 1
- data = output = confirm = python = None
+ data = output = confirm = python = events = json = dump = emit = None
for doc in docs[idx:]:
if isinstance(doc, Output):
output = doc
+ elif isinstance(doc, Events):
+ events = doc
+ elif isinstance(doc, JSONData):
+ json = doc
+ elif isinstance(doc, Dump):
+ dump = doc # NOQA
+ elif isinstance(doc, Emit):
+ emit = doc # NOQA
elif isinstance(doc, Assert):
confirm = doc
elif isinstance(doc, Python):
python = doc
- if typ is None:
- typ = 'python_run'
+ if len(typs) == 0:
+ typs = ['python_run']
elif isinstance(doc, YAMLData):
data = doc
else:
print('no handler for type:', type(doc), repr(doc))
raise AssertionError()
- if typ is None:
+ if len(typs) == 0:
if data is not None and output is not None:
- typ = 'rt'
+ typs = ['rt']
elif data is not None and confirm is not None:
- typ = 'load_assert'
+ typs = ['load_assert']
else:
assert data is not None
- typ = 'rt'
- print('type:', typ)
+ typs = ['rt']
+ print('type:', typs)
if data is not None:
print('>>>> data:\n', data.value.replace(' ', '\u2423'), sep='', end='')
- print('>>>> output:\n', output.value if output is not None else output, sep='')
- if typ == 'rt':
- self.round_trip(data, output, yaml_version=yaml_version)
- elif typ == 'python_run':
- inp = None if output is None or data is None else data
- self.run_python(python, output if output is not None else data, tmpdir, input=inp)
- elif typ == 'load_assert':
- self.load_assert(data, confirm, yaml_version=yaml_version)
- elif typ == 'comment':
- actions: List[Any] = []
- self.insert_comments(data, actions)
+ if events is not None:
+ print('>>>> events:\n', events.value, sep='')
else:
- f'\n>>>>>> run type unknown: "{typ}" <<<<<<\n'
- raise AssertionError()
+ print('>>>> output:\n', output.value if output is not None else output, sep='')
+ for typ in typs:
+ if typ == 'rt':
+ self.round_trip(data, output, yaml_version=yaml_version)
+ elif typ == 'python_run':
+ inp = None if output is None or data is None else data
+ self.run_python(
+ python, output if output is not None else data, tmpdir, input=inp
+ )
+ elif typ == 'load_assert':
+ self.load_assert(data, confirm, yaml_version=yaml_version)
+ elif typ == 'comment':
+ actions: List[Any] = []
+ self.insert_comments(data, actions)
+ elif typ == 'events':
+ if events is None:
+ print('need to specify !Events for type:', typ)
+ sys.exit(1)
+ self.gen_events(data, events, yaml_version=yaml_version)
+ elif typ == 'json':
+ if json is None:
+ print('need to specify !JSON for type:', typ)
+ sys.exit(1)
+ self.load_compare_json(data, json, yaml_version=yaml_version)
+ elif typ == 'dump':
+ continue
+ elif typ == 'emit':
+ self.load_compare_emit(data, emit)
+ else:
+ f'\n>>>>>> run type unknown: "{typ}" <<<<<<\n'
+ raise AssertionError()
def check_python_version(match: Any, current: Optional[Any] = None) -> bool:
diff --git a/comments.py b/comments.py
index dc128ee..0360654 100644
--- a/comments.py
+++ b/comments.py
@@ -14,6 +14,7 @@
from ruamel.yaml.compat import MutableSliceableSequence, nprintf # NOQA
from ruamel.yaml.scalarstring import ScalarString
from ruamel.yaml.anchor import Anchor
+from ruamel.yaml.tag import Tag
from collections.abc import MutableSet, Sized, Set, Mapping
@@ -79,7 +80,6 @@ def __str__(self) -> Any:
format_attrib = '_yaml_format'
line_col_attrib = '_yaml_line_col'
merge_attrib = '_yaml_merge'
-tag_attrib = '_yaml_tag'
class Comment:
@@ -194,8 +194,8 @@ def __contains__(self, x: Any) -> Any:
# to distinguish key from None
-def NoComment() -> None:
- pass
+class NotNone:
+ pass # NOQA
class Format:
@@ -264,19 +264,6 @@ def __repr__(self) -> str:
return f'LineCol({self.line}, {self.col})'
-class Tag:
- """store tag information for roundtripping"""
-
- __slots__ = ('value',)
- attrib = tag_attrib
-
- def __init__(self) -> None:
- self.value = None
-
- def __repr__(self) -> Any:
- return f'{self.__class__.__name__}({self.value!r})'
-
-
class CommentedBase:
@property
def ca(self):
@@ -380,7 +367,7 @@ def fa(self) -> Any:
return getattr(self, Format.attrib)
def yaml_add_eol_comment(
- self, comment: Any, key: Optional[Any] = NoComment, column: Optional[Any] = None
+ self, comment: Any, key: Optional[Any] = NotNone, column: Optional[Any] = None
) -> None:
"""
there is a problem as eol comments should start with ' #'
@@ -442,8 +429,8 @@ def tag(self) -> Any:
setattr(self, Tag.attrib, Tag())
return getattr(self, Tag.attrib)
- def yaml_set_tag(self, value: Any) -> None:
- self.tag.value = value
+ def yaml_set_ctag(self, value: Tag) -> None:
+ setattr(self, Tag.attrib, value)
def copy_attributes(self, t: Any, memo: Any = None) -> None:
# fmt: off
@@ -511,8 +498,8 @@ def extend(self, val: Any) -> None:
def __eq__(self, other: Any) -> bool:
return list.__eq__(self, other)
- def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NoComment) -> None:
- if key is not NoComment:
+ def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NotNone) -> None:
+ if key is not NotNone:
self.yaml_key_comment_extend(key, comment)
else:
self.ca.comment = comment
@@ -593,8 +580,8 @@ def __repr__(self) -> Any:
class CommentedKeySeq(tuple, CommentedBase): # type: ignore
"""This primarily exists to be able to roundtrip keys that are sequences"""
- def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NoComment) -> None:
- if key is not NoComment:
+ def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NotNone) -> None:
+ if key is not NotNone:
self.yaml_key_comment_extend(key, comment)
else:
self.ca.comment = comment
@@ -714,13 +701,13 @@ def __init__(self, *args: Any, **kw: Any) -> None:
ordereddict.__init__(self, *args, **kw)
def _yaml_add_comment(
- self, comment: Any, key: Optional[Any] = NoComment, value: Optional[Any] = NoComment
+ self, comment: Any, key: Optional[Any] = NotNone, value: Optional[Any] = NotNone
) -> None:
"""values is set to key to indicate a value attachment of comment"""
- if key is not NoComment:
+ if key is not NotNone:
self.yaml_key_comment_extend(key, comment)
return
- if value is not NoComment:
+ if value is not NotNone:
self.yaml_value_comment_extend(value, comment)
else:
self.ca.comment = comment
@@ -799,8 +786,11 @@ def insert(self, pos: Any, key: Any, value: Any, comment: Optional[Any] = None)
if key in self._ok:
del self[key]
keys = [k for k in self.keys() if k in self._ok]
- ma0 = getattr(self, merge_attrib, [[-1]])[0]
- merge_pos = ma0[0]
+ try:
+ ma0 = getattr(self, merge_attrib, [[-1]])[0]
+ merge_pos = ma0[0]
+ except IndexError:
+ merge_pos = -1
if merge_pos >= 0:
if merge_pos >= pos:
getattr(self, merge_attrib)[0] = (merge_pos + 1, ma0[1])
@@ -920,6 +910,16 @@ def __iter__(self) -> Any:
for x in ordereddict.__iter__(self):
yield x
+ def pop(self, key: Any, default: Any = NotNone) -> Any:
+ try:
+ result = self[key]
+ except KeyError:
+ if default is NotNone:
+ raise
+ return default
+ del self[key]
+ return result
+
def _keys(self) -> Any:
for x in ordereddict.__iter__(self):
yield x
@@ -1030,8 +1030,8 @@ def __repr__(self) -> Any:
def fromkeys(keys: Any, v: Any = None) -> Any:
return CommentedKeyMap(dict.fromkeys(keys, v))
- def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NoComment) -> None:
- if key is not NoComment:
+ def _yaml_add_comment(self, comment: Any, key: Optional[Any] = NotNone) -> None:
+ if key is not NotNone:
self.yaml_key_comment_extend(key, comment)
else:
self.ca.comment = comment
@@ -1085,13 +1085,13 @@ def __init__(self, values: Any = None) -> None:
self |= values
def _yaml_add_comment(
- self, comment: Any, key: Optional[Any] = NoComment, value: Optional[Any] = NoComment
+ self, comment: Any, key: Optional[Any] = NotNone, value: Optional[Any] = NotNone
) -> None:
"""values is set to key to indicate a value attachment of comment"""
- if key is not NoComment:
+ if key is not NotNone:
self.yaml_key_comment_extend(key, comment)
return
- if value is not NoComment:
+ if value is not NotNone:
self.yaml_value_comment_extend(value, comment)
else:
self.ca.comment = comment
@@ -1128,7 +1128,9 @@ def __init__(self, value: Any = None, style: Any = None, tag: Any = None) -> Non
self.value = value
self.style = style
if tag is not None:
- self.yaml_set_tag(tag)
+ if isinstance(tag, str):
+ tag = Tag(suffix=tag)
+ self.yaml_set_ctag(tag)
def __str__(self) -> Any:
return self.value
diff --git a/composer.py b/composer.py
index c943c1b..e876bdc 100644
--- a/composer.py
+++ b/composer.py
@@ -32,6 +32,7 @@ def __init__(self, loader: Any = None) -> None:
if self.loader is not None and getattr(self.loader, '_composer', None) is None:
self.loader._composer = self
self.anchors: Dict[Any, Any] = {}
+ self.warn_double_anchors = True
@property
def parser(self) -> Any:
@@ -111,7 +112,7 @@ def compose_node(self, parent: Any, index: Any) -> Any:
event = self.parser.peek_event()
anchor = event.anchor
if anchor is not None: # have an anchor
- if anchor in self.anchors:
+ if self.warn_double_anchors and anchor in self.anchors:
ws = (
f'\nfound duplicate anchor {anchor!r}\n'
f'first occurrence {self.anchors[anchor].start_mark}\n'
@@ -130,9 +131,11 @@ def compose_node(self, parent: Any, index: Any) -> Any:
def compose_scalar_node(self, anchor: Any) -> Any:
event = self.parser.get_event()
- tag = event.tag
- if tag is None or tag == '!':
+ tag = event.ctag
+ if tag is None or str(tag) == '!':
tag = self.resolver.resolve(ScalarNode, event.value, event.implicit)
+ assert not isinstance(tag, str)
+ # e.g tag.yaml.org,2002:str
node = ScalarNode(
tag,
event.value,
@@ -148,9 +151,10 @@ def compose_scalar_node(self, anchor: Any) -> Any:
def compose_sequence_node(self, anchor: Any) -> Any:
start_event = self.parser.get_event()
- tag = start_event.tag
- if tag is None or tag == '!':
+ tag = start_event.ctag
+ if tag is None or str(tag) == '!':
tag = self.resolver.resolve(SequenceNode, None, start_event.implicit)
+ assert not isinstance(tag, str)
node = SequenceNode(
tag,
[],
@@ -180,9 +184,10 @@ def compose_sequence_node(self, anchor: Any) -> Any:
def compose_mapping_node(self, anchor: Any) -> Any:
start_event = self.parser.get_event()
- tag = start_event.tag
- if tag is None or tag == '!':
+ tag = start_event.ctag
+ if tag is None or str(tag) == '!':
tag = self.resolver.resolve(MappingNode, None, start_event.implicit)
+ assert not isinstance(tag, str)
node = MappingNode(
tag,
[],
diff --git a/constructor.py b/constructor.py
index dc7e5ed..0054620 100644
--- a/constructor.py
+++ b/constructor.py
@@ -986,6 +986,17 @@ def construct_scalar(self, node: Any) -> Any:
return SingleQuotedScalarString(node.value, anchor=node.anchor)
if node.style == '"':
return DoubleQuotedScalarString(node.value, anchor=node.anchor)
+ # if node.ctag:
+ # data2 = TaggedScalar()
+ # data2.value = node.value
+ # data2.style = node.style
+ # data2.yaml_set_ctag(node.ctag)
+ # if node.anchor:
+ # from ruamel.yaml.serializer import templated_id
+
+ # if not templated_id(node.anchor):
+ # data2.yaml_set_anchor(node.anchor, always_dump=True)
+ # return data2
if node.anchor:
return PlainScalarString(node.value, anchor=node.anchor)
return node.value
@@ -1162,7 +1173,10 @@ def leading_zeros(v: Any) -> int:
)
def construct_yaml_str(self, node: Any) -> Any:
- value = self.construct_scalar(node)
+ if node.ctag.handle:
+ value = self.construct_unknown(node)
+ else:
+ value = self.construct_scalar(node)
if isinstance(value, ScalarString):
return value
return value
@@ -1218,7 +1232,7 @@ def constructed(value_node: Any) -> Any:
if value_node in self.constructed_objects:
value = self.constructed_objects[value_node]
else:
- value = self.construct_object(value_node, deep=False)
+ value = self.construct_object(value_node, deep=True)
return value
# merge = []
@@ -1569,7 +1583,7 @@ def construct_unknown(
data.fa.set_flow_style()
elif node.flow_style is False:
data.fa.set_block_style()
- data.yaml_set_tag(node.tag)
+ data.yaml_set_ctag(node.ctag)
yield data
if node.anchor:
from ruamel.yaml.serializer import templated_id
@@ -1582,7 +1596,7 @@ def construct_unknown(
data2 = TaggedScalar()
data2.value = self.construct_scalar(node)
data2.style = node.style
- data2.yaml_set_tag(node.tag)
+ data2.yaml_set_ctag(node.ctag)
yield data2
if node.anchor:
from ruamel.yaml.serializer import templated_id
@@ -1597,7 +1611,7 @@ def construct_unknown(
data3.fa.set_flow_style()
elif node.flow_style is False:
data3.fa.set_block_style()
- data3.yaml_set_tag(node.tag)
+ data3.yaml_set_ctag(node.ctag)
yield data3
if node.anchor:
from ruamel.yaml.serializer import templated_id
diff --git a/emitter.py b/emitter.py
index 94986fe..a068800 100644
--- a/emitter.py
+++ b/emitter.py
@@ -748,7 +748,7 @@ def check_simple_key(self) -> bool:
and self.event.tag is not None
):
if self.prepared_tag is None:
- self.prepared_tag = self.prepare_tag(self.event.tag)
+ self.prepared_tag = self.prepare_tag(self.event.ctag)
length += len(self.prepared_tag)
if isinstance(self.event, ScalarEvent):
if self.analysis is None:
@@ -813,7 +813,7 @@ def process_tag(self) -> None:
if tag is None:
raise EmitterError('tag is not specified')
if self.prepared_tag is None:
- self.prepared_tag = self.prepare_tag(tag)
+ self.prepared_tag = self.prepare_tag(self.event.ctag)
if self.prepared_tag:
self.write_indicator(self.prepared_tag, True)
if (
@@ -825,6 +825,9 @@ def process_tag(self) -> None:
self.prepared_tag = None
def choose_scalar_style(self) -> Any:
+ # issue 449 needs this otherwise emits single quoted empty string
+ if self.event.value == '' and self.event.ctag.handle == '!!':
+ return None
if self.analysis is None:
self.analysis = self.analyze_scalar(self.event.value)
if self.event.style == '"' or self.canonical:
@@ -956,6 +959,7 @@ def prepare_tag_prefix(self, prefix: Any) -> Any:
def prepare_tag(self, tag: Any) -> Any:
if not tag:
raise EmitterError('tag must not be empty')
+ tag = str(tag)
if tag == '!' or tag == '!!':
return tag
handle = None
@@ -1723,3 +1727,26 @@ def write_post_comment(self, event: Any) -> bool:
comment = event.comment[0]
self.write_comment(comment)
return True
+
+
+class RoundTripEmitter(Emitter):
+ def prepare_tag(self, ctag: Any) -> Any:
+ if not ctag:
+ raise EmitterError('tag must not be empty')
+ tag = str(ctag)
+ # print('handling', repr(tag))
+ if tag == '!' or tag == '!!':
+ return tag
+ handle = ctag.handle
+ suffix = ctag.suffix
+ prefixes = sorted(self.tag_prefixes.keys())
+ # print('handling', repr(tag), repr(suffix), repr(handle))
+ if handle is None:
+ for prefix in prefixes:
+ if tag.startswith(prefix) and (prefix == '!' or len(prefix) < len(tag)):
+ handle = self.tag_prefixes[prefix]
+ suffix = suffix[len(prefix) :]
+ if handle:
+ return f'{handle!s}{suffix!s}'
+ else:
+ return f'!<{suffix!s}>'
diff --git a/events.py b/events.py
index d3e679b..92e3b3b 100644
--- a/events.py
+++ b/events.py
@@ -3,6 +3,7 @@
# Abstract classes.
from typing import Any, Dict, Optional, List # NOQA
+from ruamel.yaml.tag import Tag
SHOW_LINES = False
@@ -13,6 +14,7 @@ def CommentCheck() -> None:
class Event:
__slots__ = 'start_mark', 'end_mark', 'comment'
+ crepr = 'Unspecified Event'
def __init__(
self, start_mark: Any = None, end_mark: Any = None, comment: Any = CommentCheck
@@ -55,6 +57,9 @@ def __repr__(self) -> Any:
arguments += f', comment={self.comment!r}'
return f'{self.__class__.__name__!s}({arguments!s})'
+ def compact_repr(self) -> str:
+ return f'{self.crepr}'
+
class NodeEvent(Event):
__slots__ = ('anchor',)
@@ -67,7 +72,7 @@ def __init__(
class CollectionStartEvent(NodeEvent):
- __slots__ = 'tag', 'implicit', 'flow_style', 'nr_items'
+ __slots__ = 'ctag', 'implicit', 'flow_style', 'nr_items'
def __init__(
self,
@@ -81,11 +86,15 @@ def __init__(
nr_items: Optional[int] = None,
) -> None:
NodeEvent.__init__(self, anchor, start_mark, end_mark, comment)
- self.tag = tag
+ self.ctag = tag
self.implicit = implicit
self.flow_style = flow_style
self.nr_items = nr_items
+ @property
+ def tag(self) -> Optional[str]:
+ return None if self.ctag is None else str(self.ctag)
+
class CollectionEndEvent(Event):
__slots__ = ()
@@ -96,6 +105,7 @@ class CollectionEndEvent(Event):
class StreamStartEvent(Event):
__slots__ = ('encoding',)
+ crepr = '+STR'
def __init__(
self,
@@ -110,10 +120,12 @@ def __init__(
class StreamEndEvent(Event):
__slots__ = ()
+ crepr = '-STR'
class DocumentStartEvent(Event):
__slots__ = 'explicit', 'version', 'tags'
+ crepr = '+DOC'
def __init__(
self,
@@ -129,9 +141,14 @@ def __init__(
self.version = version
self.tags = tags
+ def compact_repr(self) -> str:
+ start = ' ---' if self.explicit else ''
+ return f'{self.crepr}{start}'
+
class DocumentEndEvent(Event):
__slots__ = ('explicit',)
+ crepr = '-DOC'
def __init__(
self,
@@ -143,9 +160,14 @@ def __init__(
Event.__init__(self, start_mark, end_mark, comment)
self.explicit = explicit
+ def compact_repr(self) -> str:
+ end = ' ...' if self.explicit else ''
+ return f'{self.crepr}{end}'
+
class AliasEvent(NodeEvent):
__slots__ = 'style'
+ crepr = '=ALI'
def __init__(
self,
@@ -158,9 +180,13 @@ def __init__(
NodeEvent.__init__(self, anchor, start_mark, end_mark, comment)
self.style = style
+ def compact_repr(self) -> str:
+ return f'{self.crepr} *{self.anchor}'
+
class ScalarEvent(NodeEvent):
- __slots__ = 'tag', 'implicit', 'value', 'style'
+ __slots__ = 'ctag', 'implicit', 'value', 'style'
+ crepr = '=VAL'
def __init__(
self,
@@ -174,23 +200,65 @@ def __init__(
comment: Any = None,
) -> None:
NodeEvent.__init__(self, anchor, start_mark, end_mark, comment)
- self.tag = tag
+ self.ctag = tag
self.implicit = implicit
self.value = value
self.style = style
+ @property
+ def tag(self) -> Optional[str]:
+ return None if self.ctag is None else str(self.ctag)
+
+ @tag.setter
+ def tag(self, val: Any) -> None:
+ if isinstance(val, str):
+ val = Tag(suffix=val)
+ self.ctag = val
+
+ def compact_repr(self) -> str:
+ style = ':' if self.style is None else self.style
+ anchor = f'&{self.anchor} ' if self.anchor else ''
+ tag = f'<{self.tag!s}> ' if self.tag else ''
+ value = self.value
+ for ch, rep in [
+ ('\\', '\\\\'),
+ ('\t', '\\t'),
+ ('\n', '\\n'),
+ ('\a', ''), # remove from folded
+ ('\r', '\\r'),
+ ('\b', '\\b'),
+ ]:
+ value = value.replace(ch, rep)
+ return f'{self.crepr} {anchor}{tag}{style}{value}'
+
class SequenceStartEvent(CollectionStartEvent):
__slots__ = ()
+ crepr = '+SEQ'
+
+ def compact_repr(self) -> str:
+ flow = ' []' if self.flow_style else ''
+ anchor = f' &{self.anchor}' if self.anchor else ''
+ tag = f' <{self.tag!s}>' if self.tag else ''
+ return f'{self.crepr}{flow}{anchor}{tag}'
class SequenceEndEvent(CollectionEndEvent):
__slots__ = ()
+ crepr = '-SEQ'
class MappingStartEvent(CollectionStartEvent):
__slots__ = ()
+ crepr = '+MAP'
+
+ def compact_repr(self) -> str:
+ flow = ' {}' if self.flow_style else ''
+ anchor = f' &{self.anchor}' if self.anchor else ''
+ tag = f' <{self.tag!s}>' if self.tag else ''
+ return f'{self.crepr}{flow}{anchor}{tag}'
class MappingEndEvent(CollectionEndEvent):
__slots__ = ()
+ crepr = '-MAP'
diff --git a/main.py b/main.py
index e3bf851..9068282 100644
--- a/main.py
+++ b/main.py
@@ -118,7 +118,7 @@ def __init__(
elif 'rtsc' in self.typ:
self.default_flow_style = False
# no optimized rt-dumper yet
- self.Emitter = ruamel.yaml.emitter.Emitter
+ self.Emitter = ruamel.yaml.emitter.RoundTripEmitter
self.Serializer = ruamel.yaml.serializer.Serializer
self.Representer = ruamel.yaml.representer.RoundTripRepresenter
self.Scanner = ruamel.yaml.scanner.RoundTripScannerSC
@@ -133,7 +133,7 @@ def __init__(
if setup_rt:
self.default_flow_style = False
# no optimized rt-dumper yet
- self.Emitter = ruamel.yaml.emitter.Emitter
+ self.Emitter = ruamel.yaml.emitter.RoundTripEmitter
self.Serializer = ruamel.yaml.serializer.Serializer
self.Representer = ruamel.yaml.representer.RoundTripRepresenter
self.Scanner = ruamel.yaml.scanner.RoundTripScanner
diff --git a/nodes.py b/nodes.py
index b2f4e13..4281368 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2,11 +2,12 @@
import sys
-from typing import Dict, Any, Text # NOQA
+from typing import Dict, Any, Text, Optional # NOQA
+from ruamel.yaml.tag import Tag
class Node:
- __slots__ = 'tag', 'value', 'start_mark', 'end_mark', 'comment', 'anchor'
+ __slots__ = 'ctag', 'value', 'start_mark', 'end_mark', 'comment', 'anchor'
def __init__(
self,
@@ -17,13 +18,24 @@ def __init__(
comment: Any = None,
anchor: Any = None,
) -> None:
- self.tag = tag
+ # you can still get a string from the serializer
+ self.ctag = tag if isinstance(tag, Tag) else Tag(suffix=tag)
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.comment = comment
self.anchor = anchor
+ @property
+ def tag(self) -> Optional[str]:
+ return None if self.ctag is None else str(self.ctag)
+
+ @tag.setter
+ def tag(self, val: Any) -> None:
+ if isinstance(val, str):
+ val = Tag(suffix=val)
+ self.ctag = val
+
def __repr__(self) -> Any:
value = self.value
# if isinstance(value, list):
diff --git a/parser.py b/parser.py
index c8b5fcf..7a7d979 100644
--- a/parser.py
+++ b/parser.py
@@ -81,6 +81,7 @@
from ruamel.yaml.scanner import BlankLineComment
from ruamel.yaml.comments import C_PRE, C_POST, C_SPLIT_ON_FIRST_BLANK
from ruamel.yaml.compat import nprint, nprintf # NOQA
+from ruamel.yaml.tag import Tag
from typing import Any, Dict, Optional, List, Optional # NOQA
@@ -182,6 +183,7 @@ def parse_stream_start(self) -> Any:
def parse_implicit_document_start(self) -> Any:
# Parse an implicit document.
if not self.scanner.check_token(DirectiveToken, DocumentStartToken, StreamEndToken):
+ # don't need copy, as an implicit tag doesn't add tag_handles
self.tag_handles = self.DEFAULT_TAGS
token = self.scanner.peek_token()
start_mark = end_mark = token.start_mark
@@ -243,6 +245,18 @@ def parse_document_end(self) -> Any:
explicit = False
if self.scanner.check_token(DocumentEndToken):
token = self.scanner.get_token()
+ # if token.end_mark.line != self.peek_event().start_mark.line:
+ pt = self.scanner.peek_token()
+ if not isinstance(pt, StreamEndToken) and (
+ token.end_mark.line == pt.start_mark.line
+ ):
+ raise ParserError(
+ None,
+ None,
+ 'found non-comment content after document end marker, '
+ f'{self.scanner.peek_token().id,!r}',
+ self.scanner.peek_token().start_mark,
+ )
end_mark = token.end_mark
explicit = True
event = DocumentEndEvent(start_mark, end_mark, explicit=explicit)
@@ -251,7 +265,11 @@ def parse_document_end(self) -> Any:
if self.resolver.processing_version == (1, 1):
self.state = self.parse_document_start
else:
- self.state = self.parse_implicit_document_start
+ if explicit:
+ # found a document end marker, can be followed by implicit document
+ self.state = self.parse_implicit_document_start
+ else:
+ self.state = self.parse_document_start
return event
@@ -331,8 +349,13 @@ def parse_flow_node(self) -> Any:
def parse_block_node_or_indentless_sequence(self) -> Any:
return self.parse_node(block=True, indentless_sequence=True)
- def transform_tag(self, handle: Any, suffix: Any) -> Any:
- return self.tag_handles[handle] + suffix
+ # def transform_tag(self, handle: Any, suffix: Any) -> Any:
+ # return self.tag_handles[handle] + suffix
+
+ def select_tag_transform(self, tag: Tag) -> None:
+ if tag is None:
+ return
+ tag.select_transform(False)
def parse_node(self, block: bool = False, indentless_sequence: bool = False) -> Any:
if self.scanner.check_token(AliasToken):
@@ -354,39 +377,34 @@ def parse_node(self, block: bool = False, indentless_sequence: bool = False) ->
token = self.scanner.get_token()
tag_mark = token.start_mark
end_mark = token.end_mark
- tag = token.value
+ # tag = token.value
+ tag = Tag(
+ handle=token.value[0], suffix=token.value[1], handles=self.tag_handles,
+ )
elif self.scanner.check_token(TagToken):
token = self.scanner.get_token()
start_mark = tag_mark = token.start_mark
end_mark = token.end_mark
- tag = token.value
+ # tag = token.value
+ tag = Tag(handle=token.value[0], suffix=token.value[1], handles=self.tag_handles)
if self.scanner.check_token(AnchorToken):
token = self.scanner.get_token()
start_mark = tag_mark = token.start_mark
end_mark = token.end_mark
anchor = token.value
if tag is not None:
- handle, suffix = tag
- if handle is not None:
- if handle not in self.tag_handles:
- raise ParserError(
- 'while parsing a node',
- start_mark,
- f'found undefined tag handle {handle!r}',
- tag_mark,
- )
- tag = self.transform_tag(handle, suffix)
- else:
- tag = suffix
- # if tag == '!':
- # raise ParserError("while parsing a node", start_mark,
- # "found non-specific tag '!'", tag_mark,
- # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag'
- # and share your opinion.")
+ self.select_tag_transform(tag)
+ if tag.check_handle():
+ raise ParserError(
+ 'while parsing a node',
+ start_mark,
+ f'found undefined tag handle {tag.handle!r}',
+ tag_mark,
+ )
if start_mark is None:
start_mark = end_mark = self.scanner.peek_token().start_mark
event = None
- implicit = tag is None or tag == '!'
+ implicit = tag is None or str(tag) == '!'
if indentless_sequence and self.scanner.check_token(BlockEntryToken):
comment = None
pt = self.scanner.peek_token()
@@ -399,7 +417,7 @@ def parse_node(self, block: bool = False, indentless_sequence: bool = False) ->
comment = pt.comment
end_mark = self.scanner.peek_token().end_mark
event = SequenceStartEvent(
- anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment
+ anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment,
)
self.state = self.parse_indentless_sequence_entry
return event
@@ -408,17 +426,17 @@ def parse_node(self, block: bool = False, indentless_sequence: bool = False) ->
token = self.scanner.get_token()
# self.scanner.peek_token_same_line_comment(token)
end_mark = token.end_mark
- if (token.plain and tag is None) or tag == '!':
- implicit = (True, False)
+ if (token.plain and tag is None) or str(tag) == '!':
+ dimplicit = (True, False)
elif tag is None:
- implicit = (False, True)
+ dimplicit = (False, True)
else:
- implicit = (False, False)
+ dimplicit = (False, False)
# nprint('se', token.value, token.comment)
event = ScalarEvent(
anchor,
tag,
- implicit,
+ dimplicit,
token.value,
start_mark,
end_mark,
@@ -775,24 +793,10 @@ def move_token_comment(
class RoundTripParser(Parser):
"""roundtrip is a safe loader, that wants to see the unmangled tag"""
- def transform_tag(self, handle: Any, suffix: Any) -> Any:
- # return self.tag_handles[handle]+suffix
- if handle == '!!' and suffix in (
- 'null',
- 'bool',
- 'int',
- 'float',
- 'binary',
- 'timestamp',
- 'omap',
- 'pairs',
- 'set',
- 'str',
- 'seq',
- 'map',
- ):
- return Parser.transform_tag(self, handle, suffix)
- return handle + suffix
+ def select_tag_transform(self, tag: Tag) -> None:
+ if tag is None:
+ return
+ tag.select_transform(True)
def move_token_comment(
self, token: Any, nt: Optional[Any] = None, empty: Optional[bool] = False
diff --git a/representer.py b/representer.py
index 8a03234..9d122bc 100644
--- a/representer.py
+++ b/representer.py
@@ -148,6 +148,8 @@ def represent_scalar(
comment = getattr(value, 'comment', None)
if comment:
comment = [None, [comment]]
+ if isinstance(tag, str):
+ tag = Tag(suffix=tag)
node = ScalarNode(tag, value, style=style, comment=comment, anchor=anchor)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
@@ -157,6 +159,8 @@ def represent_sequence(
self, tag: Any, sequence: Any, flow_style: Any = None
) -> SequenceNode:
value: List[Any] = []
+ if isinstance(tag, str):
+ tag = Tag(suffix=tag)
node = SequenceNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
@@ -175,6 +179,8 @@ def represent_sequence(
def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> SequenceNode:
value: List[Any] = []
+ if isinstance(tag, str):
+ tag = Tag(suffix=tag)
node = SequenceNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
@@ -195,6 +201,8 @@ def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> Sequenc
def represent_mapping(self, tag: Any, mapping: Any, flow_style: Any = None) -> MappingNode:
value: List[Any] = []
+ if isinstance(tag, str):
+ tag = Tag(suffix=tag)
node = MappingNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
@@ -709,6 +717,8 @@ def represent_sequence(
anchor = sequence.yaml_anchor()
except AttributeError:
anchor = None
+ if isinstance(tag, str):
+ tag = Tag(suffix=tag)
node = SequenceNode(tag, value, flow_style=flow_style, anchor=anchor)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
@@ -784,6 +794,8 @@ def represent_mapping(self, tag: Any, mapping: Any, flow_style: Any = None) -> M
anchor = mapping.yaml_anchor()
except AttributeError:
anchor = None
+ if isinstance(tag, str):
+ tag = Tag(suffix=tag)
node = MappingNode(tag, value, flow_style=flow_style, anchor=anchor)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
@@ -858,7 +870,9 @@ def represent_mapping(self, tag: Any, mapping: Any, flow_style: Any = None) -> M
else:
arg = self.represent_data(merge_list)
arg.flow_style = True
- value.insert(merge_pos, (ScalarNode('tag:yaml.org,2002:merge', '<<'), arg))
+ value.insert(
+ merge_pos, (ScalarNode(Tag(suffix='tag:yaml.org,2002:merge'), '<<'), arg)
+ )
return node
def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> SequenceNode:
@@ -871,6 +885,8 @@ def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> Sequenc
anchor = omap.yaml_anchor()
except AttributeError:
anchor = None
+ if isinstance(tag, str):
+ tag = Tag(suffix=tag)
node = SequenceNode(tag, value, flow_style=flow_style, anchor=anchor)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
@@ -926,7 +942,7 @@ def represent_omap(self, tag: Any, omap: Any, flow_style: Any = None) -> Sequenc
def represent_set(self, setting: Any) -> MappingNode:
flow_style = False
- tag = 'tag:yaml.org,2002:set'
+ tag = Tag(suffix='tag:yaml.org,2002:set')
# return self.represent_mapping(tag, value)
value: List[Any] = []
flow_style = setting.fa.flow_style(flow_style)
@@ -979,30 +995,32 @@ def represent_set(self, setting: Any) -> MappingNode:
def represent_dict(self, data: Any) -> MappingNode:
"""write out tag if saved on loading"""
try:
- t = data.tag.value
+ _ = data.tag
except AttributeError:
- t = None
- if t:
- if t.startswith('!!'):
- tag = 'tag:yaml.org,2002:' + t[2:]
- else:
- tag = t
+ tag = Tag(suffix='tag:yaml.org,2002:map')
else:
- tag = 'tag:yaml.org,2002:map'
+ if data.tag.trval:
+ if data.tag.startswith('!!'):
+ tag = Tag(suffix='tag:yaml.org,2002:' + data.tag.trval[2:])
+ else:
+ tag = data.tag
+ else:
+ tag = Tag(suffix='tag:yaml.org,2002:map')
return self.represent_mapping(tag, data)
def represent_list(self, data: Any) -> SequenceNode:
try:
- t = data.tag.value
+ _ = data.tag
except AttributeError:
- t = None
- if t:
- if t.startswith('!!'):
- tag = 'tag:yaml.org,2002:' + t[2:]
- else:
- tag = t
+ tag = Tag(suffix='tag:yaml.org,2002:seq')
else:
- tag = 'tag:yaml.org,2002:seq'
+ if data.tag.trval:
+ if data.tag.startswith('!!'):
+ tag = Tag(suffix='tag:yaml.org,2002:' + data.tag.trval[2:])
+ else:
+ tag = data.tag
+ else:
+ tag = Tag(suffix='tag:yaml.org,2002:seq')
return self.represent_sequence(tag, data)
def represent_datetime(self, data: Any) -> ScalarNode:
@@ -1019,7 +1037,10 @@ def represent_datetime(self, data: Any) -> ScalarNode:
def represent_tagged_scalar(self, data: Any) -> ScalarNode:
try:
- tag = data.tag.value
+ if data.tag.handle == '!!':
+ tag = f'{data.tag.handle} {data.tag.suffix}'
+ else:
+ tag = data.tag
except AttributeError:
tag = None
try:
diff --git a/resolver.py b/resolver.py
index e7ed6d9..b97c8b6 100644
--- a/resolver.py
+++ b/resolver.py
@@ -5,6 +5,7 @@
from typing import Any, Dict, List, Union, Text, Optional # NOQA
from ruamel.yaml.compat import VersionType # NOQA
+from ruamel.yaml.tag import Tag
from ruamel.yaml.compat import _DEFAULT_YAML_VERSION # NOQA
from ruamel.yaml.error import * # NOQA
from ruamel.yaml.nodes import MappingNode, ScalarNode, SequenceNode # NOQA
@@ -102,9 +103,9 @@ class ResolverError(YAMLError):
class BaseResolver:
- DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str'
- DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq'
- DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map'
+ DEFAULT_SCALAR_TAG = Tag(suffix='tag:yaml.org,2002:str')
+ DEFAULT_SEQUENCE_TAG = Tag(suffix='tag:yaml.org,2002:seq')
+ DEFAULT_MAPPING_TAG = Tag(suffix='tag:yaml.org,2002:map')
yaml_implicit_resolvers: Dict[Any, Any] = {}
yaml_path_resolvers: Dict[Any, Any] = {}
@@ -268,14 +269,14 @@ def resolve(self, kind: Any, value: Any, implicit: Any) -> Any:
resolvers += self.yaml_implicit_resolvers.get(None, [])
for tag, regexp in resolvers:
if regexp.match(value):
- return tag
+ return Tag(suffix=tag)
implicit = implicit[1]
if bool(self.yaml_path_resolvers):
exact_paths = self.resolver_exact_paths[-1]
if kind in exact_paths:
- return exact_paths[kind]
+ return Tag(suffix=exact_paths[kind])
if None in exact_paths:
- return exact_paths[None]
+ return Tag(suffix=exact_paths[None])
if kind is ScalarNode:
return self.DEFAULT_SCALAR_TAG
elif kind is SequenceNode:
@@ -354,14 +355,14 @@ def resolve(self, kind: Any, value: Any, implicit: Any) -> Any:
resolvers += self.versioned_resolver.get(None, [])
for tag, regexp in resolvers:
if regexp.match(value):
- return tag
+ return Tag(suffix=tag)
implicit = implicit[1]
if bool(self.yaml_path_resolvers):
exact_paths = self.resolver_exact_paths[-1]
if kind in exact_paths:
- return exact_paths[kind]
+ return Tag(suffix=exact_paths[kind])
if None in exact_paths:
- return exact_paths[None]
+ return Tag(suffix=exact_paths[None])
if kind is ScalarNode:
return self.DEFAULT_SCALAR_TAG
elif kind is SequenceNode:
diff --git a/scanner.py b/scanner.py
index c09ae9c..d746380 100644
--- a/scanner.py
+++ b/scanner.py
@@ -1291,16 +1291,25 @@ def scan_block_scalar_indentation(self) -> Any:
srp = self.reader.peek
srf = self.reader.forward
chunks = []
+ first_indent = -1
max_indent = 0
end_mark = self.reader.get_mark()
while srp() in ' \r\n\x85\u2028\u2029':
if srp() != ' ':
+ if first_indent < 0:
+ first_indent = self.reader.column
chunks.append(self.scan_line_break())
end_mark = self.reader.get_mark()
else:
srf()
if self.reader.column > max_indent:
max_indent = self.reader.column
+ if first_indent > 0 and max_indent > first_indent:
+ start_mark = self.reader.get_mark()
+ raise ScannerError(
+ 'more indented follow up line than first in a block scalar',
+ start_mark,
+ )
return chunks, max_indent, end_mark
def scan_block_scalar_breaks(self, indent: int) -> Any:
@@ -1493,7 +1502,9 @@ def scan_plain(self) -> Any:
break
while True:
ch = srp(length)
- if ch == ':' and srp(length + 1) not in _THE_END_SPACE_TAB:
+ if ch == ':' and srp(length + 1) == ',':
+ break
+ elif ch == ':' and srp(length + 1) not in _THE_END_SPACE_TAB:
pass
elif ch == '?' and self.scanner_processing_version != (1, 1):
pass
@@ -1918,6 +1929,37 @@ def scan_line_break(self, empty_line: bool = False) -> Text:
def scan_block_scalar(self, style: Any, rt: Optional[bool] = True) -> Any:
return Scanner.scan_block_scalar(self, style, rt=rt)
+ def scan_uri_escapes(self, name: Any, start_mark: Any) -> Any:
+ """
+ The roundtripscanner doesn't do URI escaping
+ """
+ # See the specification for details.
+ srp = self.reader.peek
+ srf = self.reader.forward
+ code_bytes: List[Any] = []
+ chunk = ''
+ mark = self.reader.get_mark()
+ while srp() == '%':
+ chunk += '%'
+ srf()
+ for k in range(2):
+ if srp(k) not in '0123456789ABCDEFabcdef':
+ raise ScannerError(
+ f'while scanning an {name!s}',
+ start_mark,
+ f'expected URI escape sequence of 2 hexdecimal numbers, '
+ f'but found {srp(k)!r}',
+ self.reader.get_mark(),
+ )
+ code_bytes.append(int(self.reader.prefix(2), 16))
+ chunk += self.reader.prefix(2)
+ srf(2)
+ try:
+ _ = bytes(code_bytes).decode('utf-8')
+ except UnicodeDecodeError as exc:
+ raise ScannerError(f'while scanning an {name!s}', start_mark, str(exc), mark)
+ return chunk
+
# commenthandling 2021, differentiatiation not needed
diff --git a/serializer.py b/serializer.py
index 0034240..e36b3b5 100644
--- a/serializer.py
+++ b/serializer.py
@@ -158,14 +158,14 @@ def serialize_node(self, node: Any, parent: Any, index: Any) -> None:
detected_tag = self.resolver.resolve(ScalarNode, node.value, (True, False))
default_tag = self.resolver.resolve(ScalarNode, node.value, (False, True))
implicit = (
- (node.tag == detected_tag),
- (node.tag == default_tag),
- node.tag.startswith('tag:yaml.org,2002:'),
+ (node.ctag == detected_tag),
+ (node.ctag == default_tag),
+ node.tag.startswith('tag:yaml.org,2002:'), # type: ignore
)
self.emitter.emit(
ScalarEvent(
alias,
- node.tag,
+ node.ctag,
implicit,
node.value,
style=node.style,
@@ -173,7 +173,7 @@ def serialize_node(self, node: Any, parent: Any, index: Any) -> None:
)
)
elif isinstance(node, SequenceNode):
- implicit = node.tag == self.resolver.resolve(SequenceNode, node.value, True)
+ implicit = node.ctag == self.resolver.resolve(SequenceNode, node.value, True)
comment = node.comment
end_comment = None
seq_comment = None
@@ -188,7 +188,7 @@ def serialize_node(self, node: Any, parent: Any, index: Any) -> None:
self.emitter.emit(
SequenceStartEvent(
alias,
- node.tag,
+ node.ctag,
implicit,
flow_style=node.flow_style,
comment=node.comment,
@@ -200,7 +200,7 @@ def serialize_node(self, node: Any, parent: Any, index: Any) -> None:
index += 1
self.emitter.emit(SequenceEndEvent(comment=[seq_comment, end_comment]))
elif isinstance(node, MappingNode):
- implicit = node.tag == self.resolver.resolve(MappingNode, node.value, True)
+ implicit = node.ctag == self.resolver.resolve(MappingNode, node.value, True)
comment = node.comment
end_comment = None
map_comment = None
@@ -213,7 +213,7 @@ def serialize_node(self, node: Any, parent: Any, index: Any) -> None:
self.emitter.emit(
MappingStartEvent(
alias,
- node.tag,
+ node.ctag,
implicit,
flow_style=node.flow_style,
comment=node.comment,
diff --git a/tag.py b/tag.py
new file mode 100644
index 0000000..e5539bb
--- /dev/null
+++ b/tag.py
@@ -0,0 +1,122 @@
+# coding: utf-8
+
+"""
+In round-trip mode the original tag needs to be preserved, but the tag
+transformed based on the directives needs to be available as well.
+
+A Tag that is created during loading has a handle and a suffix.
+Not all objects loaded currently have a Tag, that .tag attribute can be None
+A Tag that is created for dumping only (on an object loaded without a tag) has a suffix
+only.
+"""
+
+from typing import Any, Dict, Optional, List, Union, Optional, Iterator # NOQA
+
+tag_attrib = '_yaml_tag'
+
+
+class Tag:
+ """store original tag information for roundtripping"""
+
+ attrib = tag_attrib
+
+ def __init__(self, handle: Any = None, suffix: Any = None, handles: Any = None) -> None:
+ self.handle = handle
+ self.suffix = suffix
+ self.handles = handles
+ self._transform_type: Optional[bool] = None
+
+ def __repr__(self) -> str:
+ return f'{self.__class__.__name__}({self.trval!r})'
+
+ def __str__(self) -> str:
+ return f'{self.trval}'
+
+ def __hash__(self) -> int:
+ try:
+ return self._hash_id # type: ignore
+ except AttributeError:
+ self._hash_id = res = hash((self.handle, self.suffix))
+ return res
+
+ def __eq__(self, other: Any) -> bool:
+ # other should not be a string, but the serializer sometimes provides these
+ if isinstance(other, str):
+ return self.trval == other
+ return bool(self.trval == other.trval)
+
+ def startswith(self, x: str) -> bool:
+ if self.trval is not None:
+ return self.trval.startswith(x)
+ return False
+
+ @property
+ def trval(self) -> Optional[str]:
+ try:
+ return self._trval
+ except AttributeError:
+ pass
+ if self.handle is None:
+ self._trval: Optional[str] = self.uri_decoded_suffix
+ return self._trval
+ assert self._transform_type is not None
+ if not self._transform_type:
+ # the non-round-trip case
+ self._trval = self.handles[self.handle] + self.uri_decoded_suffix
+ return self._trval
+ # round-trip case
+ if self.handle == '!!' and self.suffix in (
+ 'null',
+ 'bool',
+ 'int',
+ 'float',
+ 'binary',
+ 'timestamp',
+ 'omap',
+ 'pairs',
+ 'set',
+ 'str',
+ 'seq',
+ 'map',
+ ):
+ self._trval = self.handles[self.handle] + self.uri_decoded_suffix
+ else:
+ # self._trval = self.handle + self.suffix
+ self._trval = self.handles[self.handle] + self.uri_decoded_suffix
+ return self._trval
+
+ @property
+ def uri_decoded_suffix(self) -> Optional[str]:
+ try:
+ return self._uri_decoded_suffix
+ except AttributeError:
+ pass
+ if self.suffix is None:
+ self._uri_decoded_suffix: Optional[str] = None
+ return None
+ res = ''
+ # don't have to check for scanner errors here
+ idx = 0
+ while idx < len(self.suffix):
+ ch = self.suffix[idx]
+ idx += 1
+ if ch != '%':
+ res += ch
+ else:
+ res += chr(int(self.suffix[idx : idx + 2], 16))
+ idx += 2
+ self._uri_decoded_suffix = res
+ return res
+
+ def select_transform(self, val: bool) -> None:
+ """
+ val: False -> non-round-trip
+ True -> round-trip
+ """
+ assert self._transform_type is None
+ self._transform_type = val
+
+ def check_handle(self) -> bool:
+ if self.handle is None:
+ return False
+ return self.handle not in self.handles
diff --git a/timestamp.py b/timestamp.py
index 4ab695f..753dfc1 100644
--- a/timestamp.py
+++ b/timestamp.py
@@ -5,6 +5,8 @@
# ToDo: at least on PY3 you could probably attach the tzinfo correctly to the object
# a more complete datetime might be used by safe loading as well
+#
+# add type information (iso8601, spaced)
from typing import Any, Dict, Optional, List # NOQA