Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support yaml merge operator #1040

Merged
merged 7 commits into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
package com.onthegomap.planetiler.util;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.onthegomap.planetiler.reader.FileFormatException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.snakeyaml.engine.v2.api.Load;
import org.snakeyaml.engine.v2.api.LoadSettings;

Expand All @@ -33,12 +40,64 @@ public static <T> T load(Path file, Class<T> clazz) {
public static <T> T load(InputStream stream, Class<T> clazz) {
try (stream) {
Object parsed = snakeYaml.loadFromInputStream(stream);
handleMergeOperator(parsed);
return convertValue(parsed, clazz);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

private static void handleMergeOperator(Object parsed) {
handleMergeOperator(parsed, Collections.newSetFromMap(new IdentityHashMap<>()));
}

/**
* SnakeYaml doesn't handle the <a href="https://yaml.org/type/merge.html">merge operator</a> so manually post-process
* the parsed yaml object to merge referenced objects into the parent one.
*/
private static void handleMergeOperator(Object parsed, Set<Object> parentNodes) {
if (!parentNodes.add(parsed)) {
throw new FileFormatException("Illegal recursive reference in yaml file");
}
if (parsed instanceof Map<?, ?> map) {
Object toMerge = map.remove("<<");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this code, or similar, be used check if the merge operator is the first key in the mapping?!

Suggested change
Object toMerge = map.remove("<<");
// Check if the first key is a merge operator using an iterator
Iterator<String> keyIterator = map.keySet().iterator();
Object toMerge = null;
if (keyIterator.next().equals("<<")) {
toMerge = map.remove("<<");
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, when I read the merge operator draft again, it does not prohibit placing the merge operator at any location in a map. Neither does it prohibit multiple instances of merge operators in the same map.

Here is a YAML example where the use of the merge operator is not restricted, as in this suggestion:

- base: &base
    nested_mapping: &base_nested_mapping
      key1: value1
      nested_sequence:
        - item1
        - item2
- extension: &extension
    nested_mapping:
      <<: *base_nested_mapping
      key2: value2
      nested_sequence:
        - item3
        - item4
- merged_using_array:
    <<: [ *extension, *base]
- merged_separately:
    <<: *extension
    <<: *base
- merged_interleaved:
    start: marker1
    <<: *extension
    mid: marker2
    <<: *base
    end: marker3

According to YAML Lint, it would expand to

- base:
    nested_mapping:
      key1: value1
      nested_sequence:
        - item1
        - item2
- extension:
    nested_mapping:
      key1: value1
      nested_sequence:
        - item3
        - item4
      key2: value2
- merged_using_array:
    nested_mapping:
      key1: value1
      nested_sequence:
        - item3
        - item4
      key2: value2
- merged_separately:
    nested_mapping:
      key1: value1
      nested_sequence:
        - item3
        - item4
      key2: value2
- merged_interleaved:
    start: marker1
    nested_mapping:
      key1: value1
      nested_sequence:
        - item3
        - item4
      key2: value2
    mid: marker2
    end: marker3

Please feel free to reject this suggestion

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I noticed that playing around with pyyaml. Since this approach happens post-parsing and the parser throws a clear exception in this case and you can easily switch to <<: [*a, *b] I think it's an OK limitation. A more fully-thought-out merge operator might support things like merging maps in the order they are specified when the operator appears more than once (and working for lists) but that would require support at the parser level.

if (toMerge != null) {
var orig = new LinkedHashMap<>(map);
// to preserve the map key order we insert the merged operator objects first, then the original ones
map.clear();
mergeInto(map, toMerge, false, parentNodes);
mergeInto(map, orig, true, parentNodes);
}
for (var value : map.values()) {
handleMergeOperator(value, parentNodes);
}
} else if (parsed instanceof List<?> list) {
for (var item : list) {
handleMergeOperator(item, parentNodes);
}
}
parentNodes.remove(parsed);
}

@SuppressWarnings("rawtypes")
private static void mergeInto(Map dest, Object source, boolean replace, Set<Object> parentNodes) {
if (!parentNodes.add(source)) {
throw new FileFormatException("Illegal recursive reference in yaml file");
}
if (source instanceof Map<?, ?> map) {
if (replace) {
dest.putAll(map);
} else {
map.forEach(dest::putIfAbsent);
}
} else if (source instanceof List<?> nesteds) {
for (var nested : nesteds) {
mergeInto(dest, nested, replace, parentNodes);
}
}
parentNodes.remove(source);
}

public static <T> T load(String config, Class<T> clazz) {
try (var stream = new ByteArrayInputStream(config.getBytes(StandardCharsets.UTF_8))) {
return load(stream, clazz);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package com.onthegomap.planetiler.util;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

import com.onthegomap.planetiler.reader.FileFormatException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -45,4 +47,258 @@ void testLoadLargeYamlMap() {
}
assertEquals(expected, YAML.load(builder.toString(), Map.class));
}

@Test
void testMergeOperator() {
assertSameYaml("""
source: &label
a: 1
dest:
<<: *label
b: 2
""", """
source:
a: 1
dest:
a: 1
b: 2
""");
}

@Test
void testMergeOperatorNested() {
assertSameYaml("""
source: &label
a: 1
dest:
l1:
l2:
l3:
<<: *label
b: 2
""", """
source:
a: 1
dest:
l1:
l2:
l3:
a: 1
b: 2
""");
}

@Test
void testMergeOperatorOverride() {
assertSameYaml("""
source: &label
a: 1
dest:
<<: *label
a: 2
""", """
source:
a: 1
dest:
a: 2
""");
}

@Test
void testMergeOperatorMultiple() {
assertSameYaml("""
source: &label1
a: 1
z: 1
source2: &label2
a: 2
b: 3
dest:
<<: [*label1, *label2]
b: 4
c: 5
""", """
source:
a: 1
z: 1
source2:
a: 2
b: 3
dest:
a: 1 # from label1 since it came first
b: 4
c: 5
z: 1
""");
}

@Test
void testMergeNotAnchor() {
assertSameYaml("""
<<:
a: 1
b: 2
b: 3
c: 4
""", """
a: 1
b: 3
c: 4
""");
}

@Test
void testMergeOperatorSecond() {
assertSameYaml("""
source: &label
a: 1
dest:
c: 3
<<: *label
b: 2
""", """
source:
a: 1
dest:
a: 1
b: 2
c: 3
""");
}

@Test
void testMergeOperatorFromDraft1() {
assertSameYaml("""
- { x: 1, y: 2 }
- { x: 0, y: 2 }
- { r: 10 }
- { r: 1 }
- # Explicit keys
x: 1
y: 2
r: 10
label: center/big
""", """
- &CENTER { x: 1, y: 2 }
- &LEFT { x: 0, y: 2 }
- &BIG { r: 10 }
- &SMALL { r: 1 }
- # Merge one map
<< : *CENTER
r: 10
label: center/big
""");
}

@Test
void testMergeOperatorFromDraft2() {
assertSameYaml("""
- { x: 1, y: 2 }
- { x: 0, y: 2 }
- { r: 10 }
- { r: 1 }
- # Explicit keys
x: 1
y: 2
r: 10
label: center/big
""", """
- &CENTER { x: 1, y: 2 }
- &LEFT { x: 0, y: 2 }
- &BIG { r: 10 }
- &SMALL { r: 1 }
- # Merge multiple maps
<< : [ *CENTER, *BIG ]
label: center/big
""");
}

@Test
void testMergeOperatorFromDraft3() {
assertSameYaml("""
- { x: 1, y: 2 }
- { x: 0, y: 2 }
- { r: 10 }
- { r: 1 }
- # Explicit keys
x: 1
y: 2
r: 10
label: center/big
""", """
- &CENTER { x: 1, y: 2 }
- &LEFT { x: 0, y: 2 }
- &BIG { r: 10 }
- &SMALL { r: 1 }
- # Override
<< : [ *BIG, *LEFT, *SMALL ]
x: 1
label: center/big
""");
}

@Test
void testAnchorAndAliasMap() {
assertSameYaml("""
source: &label
a: 1
dest: *label
""", """
source:
a: 1
dest:
a: 1
""");
}

@Test
void testAnchorAndAliasList() {
assertSameYaml("""
source: &label
- 1
dest: *label
""", """
source: [1]
dest: [1]
""");
}

@Test
void testAllowRefInMergeDoc() {
assertSameYaml("""
source: &label
a: &label1
c: 1
b: *label1
d:
<<: *label1
dest: *label
""", """
source: {a: {c: 1}, b: {c: 1}, d: {c: 1}}
dest: {a: {c: 1}, b: {c: 1}, d: {c: 1}}
""");
}

@Test
void testFailsOnRecursiveRefs() {
assertThrows(FileFormatException.class, () -> YAML.load("""
source: &label
- *label
""", Object.class));
assertThrows(FileFormatException.class, () -> YAML.load("""
source: &label
<<: *label
""", Object.class));
assertThrows(FileFormatException.class, () -> YAML.load("""
source: &label
a: *label
""", Object.class));
}

private static void assertSameYaml(String a, String b) {
assertEquals(
YAML.load(b, Object.class),
YAML.load(a, Object.class)
);
}
}
Loading