Skip to content

Commit

Permalink
Improvements of YAML parser based on Kubernetes config examples
Browse files Browse the repository at this point in the history
closes #402
closes #413

Make some internal non-interface methods of TextWire protected

* Harmonize Text & Yaml wire writers
* Support anchor/alias in YAML parser
* Allow reading of literal text blocks
* Remove redundant tests, switch TEXT tests to YAML
* Fix parsing of text fields depending on quotes
* Support parsing of multi-document files
* Allow empty values treated as null
* Fix octal and trailing whitespace
* Deprecate TextWire's ValueOut implementation
  • Loading branch information
alamar authored Mar 23, 2022
1 parent a0aa1a2 commit 29b5ada
Show file tree
Hide file tree
Showing 118 changed files with 1,626 additions and 1,128 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@
<configuration>
<referenceVersion>2.23ea0</referenceVersion>
<artifactsURI>https://teamcity.chronicle.software/repository/download</artifactsURI>
<binaryCompatibilityPercentageRequired>99.9</binaryCompatibilityPercentageRequired>
<binaryCompatibilityPercentageRequired>99.8</binaryCompatibilityPercentageRequired>
</configuration>
</execution>
</executions>
Expand Down
30 changes: 24 additions & 6 deletions src/main/java/net/openhft/chronicle/wire/TextWire.java
Original file line number Diff line number Diff line change
Expand Up @@ -628,8 +628,23 @@ private boolean notNewLine(int readCode) {
protected void consumeDocumentStart() {
if (bytes.readRemaining() > 4) {
long pos = bytes.readPosition();
if (bytes.readByte(pos) == '-' && bytes.readByte(pos + 1) == '-' && bytes.readByte(pos + 2) == '-')
if (bytes.readByte(pos) == '-' && bytes.readByte(pos + 1) == '-' && bytes.readByte(pos + 2) == '-') {
bytes.readSkip(3);

consumeWhiteSpace();

pos = bytes.readPosition();
@NotNull String word = bytes.parseUtf8(StopCharTesters.SPACE_STOP);
switch (word) {
case "!!data":
case "!!data-not-ready":
case "!!meta-data":
case "!!meta-data-not-ready":
break;
default:
bytes.readPosition(pos);
}
}
}
}

Expand Down Expand Up @@ -1030,9 +1045,7 @@ public void append(@NotNull CharSequence cs, int offset, int length) {

@Nullable
public Object readObject() {
consumePadding();
consumeDocumentStart();
return readObject(0);
return getValueIn().object(Object.class);
}

@Nullable
Expand Down Expand Up @@ -1182,6 +1195,11 @@ public TextWire trimFirstCurly(boolean trimFirstCurly) {

enum NoObject {NO_OBJECT}

/**
* @deprecated Will be replaced with a different implementation in the future,
* which will generate correct Yaml but may introduce some behavior changes.
*/
@Deprecated(/* To be removed and replaced by YamlWire.TextValueOut in 2.24 #411 */)
class TextValueOut implements ValueOut, CommentAnnotationNotifier {
protected boolean hasCommentAnnotation = false;

Expand Down Expand Up @@ -1978,7 +1996,7 @@ public WireOut marshallable(@NotNull WriteMarshallable object) {
} else if (!seps.isEmpty()) {
popSep = seps.get(seps.size() - 1);
popState();
sep = NEW_LINE;
newLine();
}
if (sep.startsWith(',')) {
append(sep, 1, sep.length() - 1);
Expand Down Expand Up @@ -2031,7 +2049,7 @@ public WireOut marshallable(@NotNull Serializable object) {
} else if (seps.size() > 0) {
popSep = seps.get(seps.size() - 1);
popState();
sep = NEW_LINE;
newLine();
}
if (sep.startsWith(',')) {
append(sep, 1, sep.length() - 1);
Expand Down
47 changes: 21 additions & 26 deletions src/main/java/net/openhft/chronicle/wire/WireType.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@
package net.openhft.chronicle.wire;

import net.openhft.chronicle.bytes.Bytes;
import net.openhft.chronicle.bytes.BytesStore;
import net.openhft.chronicle.bytes.BytesUtil;
import net.openhft.chronicle.bytes.StopCharTesters;
import net.openhft.chronicle.bytes.ref.*;
import net.openhft.chronicle.core.Jvm;
import net.openhft.chronicle.core.LicenceCheck;
Expand Down Expand Up @@ -72,36 +70,19 @@ public Supplier<LongArrayValues> newLongArrayReference() {

@Nullable
@Override
public <T> T fromString(@NotNull CharSequence cs) {
public <T> T fromString(Class<T> tClass, @NotNull CharSequence cs) {
Bytes bytes = Bytes.allocateElasticDirect(cs.length());
try {
bytes.appendUtf8(cs);
if (bytes.startsWith(PREABLE)) {
truncatePreable(bytes);
}
@NotNull Wire wire = apply(bytes);
//noinspection unchecked
return (T) wire.getValueIn().object();
@NotNull TextWire wire = (TextWire) apply(bytes);
wire.consumePadding();
wire.consumeDocumentStart();
return wire.getValueIn().object(tClass);
} finally {
bytes.releaseLast();
}
}

public void truncatePreable(@NotNull Bytes bytes) {
bytes.readSkip(4);
long pos = bytes.readPosition();
@NotNull String word = bytes.parseUtf8(StopCharTesters.SPACE_STOP);
switch (word) {
case "!!data":
case "!!data-not-ready":
case "!!meta-data":
case "!!meta-data-not-ready":
break;
default:
bytes.readPosition(pos);
}
}

@Override
public boolean isText() {
return true;
Expand Down Expand Up @@ -307,6 +288,21 @@ public Wire apply(@NotNull Bytes bytes) {
return new YamlWire(bytes).useBinaryDocuments();
}

@Nullable
@Override
public <T> T fromString(Class<T> tClass, @NotNull CharSequence cs) {
Bytes bytes = Bytes.allocateElasticDirect(cs.length());
try {
bytes.appendUtf8(cs);
@NotNull YamlWire wire = (YamlWire) apply(bytes);
wire.consumePadding();
wire.consumeDocumentStart();
return wire.getValueIn().object(tClass);
} finally {
bytes.releaseLast();
}
}

@Override
public boolean isText() {
return true;
Expand Down Expand Up @@ -351,7 +347,6 @@ public Wire apply(@NotNull Bytes bytes) {
}
};

static final BytesStore PREABLE = BytesStore.from("--- ");
private static final int COMPRESSED_SIZE = Integer.getInteger("WireType.compressedSize", 128);
private static final boolean IS_DELTA_AVAILABLE = isDeltaAvailable();
private static final boolean IS_DEFAULT_ZERO_AVAILABLE = isDefaultZeroAvailable();
Expand Down Expand Up @@ -483,7 +478,7 @@ else if (marshallable instanceof Serializable)
*/
@Nullable
public <T> T fromString(@NotNull CharSequence cs) {
return (T) fromString(Object.class, cs);
return (T) fromString(/* Allow Marshallable tuples by not requesting Object */ null, cs);
}

/**
Expand Down
1 change: 1 addition & 0 deletions src/main/java/net/openhft/chronicle/wire/YamlToken.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public enum YamlToken {
SEQUENCE_ENTRY,
SEQUENCE_START(SEQUENCE_END),
TEXT,
LITERAL,
ANCHOR,
ALIAS,
RESERVED,
Expand Down
63 changes: 35 additions & 28 deletions src/main/java/net/openhft/chronicle/wire/YamlTokeniser.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ void reset() {
freeContexts.addAll(contexts);
contexts.clear();
if (temp != null) temp.clear();
lineStart = 0;
lineStart = in.readPosition();
flowDepth = Integer.MAX_VALUE;
blockQuote = 0;
hasSequenceEntry = false;
Expand Down Expand Up @@ -127,15 +127,15 @@ YamlToken next0(int minIndent) {
if (wouldChangeContext(minIndent, indent2))
return dontRead();
lastKeyPosition = in.readPosition() - 1;
readQuoted('"');
readDoublyQuoted();
if (isFieldEnd())
return indent(YamlToken.MAPPING_START, YamlToken.MAPPING_KEY, YamlToken.TEXT, indent2);
return YamlToken.TEXT;
case '\'':
if (wouldChangeContext(minIndent, indent2))
return dontRead();
lastKeyPosition = in.readPosition() - 1;
readQuoted('\'');
readSinglyQuoted();
if (isFieldEnd())
return indent(YamlToken.MAPPING_START, YamlToken.MAPPING_KEY, YamlToken.TEXT, indent2);

Expand All @@ -155,10 +155,12 @@ YamlToken next0(int minIndent) {
if (next <= ' ') {
if (wouldChangeContext(minIndent, indent2 + 1))
return dontRead();

hasSequenceEntry = true;
return indent(YamlToken.SEQUENCE_START, YamlToken.SEQUENCE_ENTRY, YamlToken.STREAM_START, indent2 + 1);
}
if (next == '-' && in.peekUnsignedByte(in.readPosition() + 1) == '-' && in.peekUnsignedByte(in.readPosition() + 2) <= ' ') {
if (contextIndent() <= minIndent)
if (contextIndent() <= minIndent && minIndent >= 0)
return dontRead();
in.readSkip(2);
pushed.add(YamlToken.DIRECTIVES_END);
Expand All @@ -184,29 +186,28 @@ YamlToken next0(int minIndent) {
unreadLast();
return readText(indent2);
}
/* TODO
case '&':
if (in.peekUnsignedByte() > ' ') {
readAnchor();
readWord();
return YamlToken.ANCHOR;
}
break;
case '*':
if (in.peekUnsignedByte() > ' ') {
readAnchor();
readWord();
return YamlToken.ALIAS;
}
*/
break;
case '|':
if (in.peekUnsignedByte() <= ' ') {
readLiteral();
return seq(YamlToken.TEXT);
return seq(YamlToken.LITERAL);
}
break;
case '>':
if (in.peekUnsignedByte() <= ' ') {
readFolded();
return seq(YamlToken.TEXT);
return seq(YamlToken.LITERAL);
}
case '%':
readDirective();
Expand Down Expand Up @@ -294,7 +295,7 @@ private YamlToken flowPop(YamlToken start, char end) {

private YamlToken flow(YamlToken token) {
pushed.add(token);
if (!hasSequenceEntry && context() == YamlToken.SEQUENCE_START) {
if (!hasSequenceEntry && token != YamlToken.SEQUENCE_START && context() == YamlToken.SEQUENCE_START) {
hasSequenceEntry = true;
pushed.add(YamlToken.SEQUENCE_ENTRY);
}
Expand Down Expand Up @@ -345,14 +346,16 @@ private void readLiteral(boolean withNewLines) {
if (withNewLines)
readNewline();
temp.write(in, start, in.readPosition() - start);
if (!withNewLines)
if (temp.peekUnsignedByte(temp.writePosition() - 1) > ' ')
temp.append(' ');

readIndent();
int indent3 = Math.toIntExact(in.readPosition() - lineStart);
if (indent3 < indent2)
return;

if (!withNewLines)
if (temp.peekUnsignedByte(temp.writePosition() - 1) > ' ')
temp.append(' ');

if (indent3 > indent2)
in.readPosition(lineStart + indent2);
start = in.readPosition();
Expand Down Expand Up @@ -381,16 +384,6 @@ private void readNewline() {
}
}

private void readAnchor() {
blockStart = in.readPosition();
while (true) {
blockEnd = in.readPosition();
int ch = in.readUnsignedByte();
if (ch <= ' ')
return;
}
}

private YamlToken indent(
@NotNull YamlToken indented,
@NotNull YamlToken key,
Expand Down Expand Up @@ -561,18 +554,32 @@ private void contextPush(YamlToken context, int indent) {
pushContext0(context, indent);
}

private void readQuoted(char stop) {
blockQuote = stop;
private void readDoublyQuoted() {
blockQuote = '"';
blockStart = in.readPosition();
while (in.readRemaining() > 0) {
int ch = in.readUnsignedByte();
if (ch == '\\') {
ch = in.readUnsignedByte();
} else if (ch == blockQuote) {
blockEnd = in.readPosition() - 1;
return;
}
if (ch < 0) {
throw new IllegalStateException("Unterminated quotes " + in.subBytes(blockStart - 1, in.readPosition()));
}
if (ch == stop) {
}
}

private void readSinglyQuoted() {
blockQuote = '\'';
blockStart = in.readPosition();
while (in.readRemaining() > 0) {
int ch = in.readUnsignedByte();
if (ch == blockQuote) {
// ignore double single quotes.
int ch2 = in.peekUnsignedByte();
if (ch2 == stop) {
if (ch2 == blockQuote) {
in.readSkip(1);
continue;
}
Expand Down
Loading

0 comments on commit 29b5ada

Please sign in to comment.