diff --git a/ballerina/tests/parse_string_negative.bal b/ballerina/tests/parse_string_negative.bal index 22ded28..3934a6d 100644 --- a/ballerina/tests/parse_string_negative.bal +++ b/ballerina/tests/parse_string_negative.bal @@ -67,7 +67,12 @@ function negativeDataProvider() returns [string, string][] => [ "'invalid token 'DIRECTIVE_MARKER' inside the single-quoted scalar'" + " at line: '3' column: '3'" ], - ["negative_test_19.yaml", "'invalid block header' at line: '1' column: '10'"] + ["negative_test_19.yaml", "'invalid block header' at line: '1' column: '10'"], + ["negative_test_20.yaml", "'token cannot start in the same line as the document marker' at line: '2' column: '7'"], + [ + "negative_test_21.yaml", + "'block collection token cannot start in the same line as the directive marker' at line: '2' column: '7'" + ] ]; @@ -91,7 +96,8 @@ function tagHandleNegativeDataProvider() returns [string, string][] => [ ["tag_handle_negative_6.yaml", "'invalid digit character' at line: '1' column: '9'"], ["tag_handle_negative_7.yaml", "'invalid directive document' at line: '2' column: '1'"], ["tag_handle_negative_8.yaml", "'invalid document' at line: '1' column: '8'"], - ["tag_handle_negative_9.yaml", "'directives are not allowed in a bare document' at line: '3' column: '5'"] + ["tag_handle_negative_9.yaml", "'directives are not allowed in a bare document' at line: '3' column: '5'"], + ["tag_handle_negative_10.yaml", "'tag schema not supported' at line: '3' column: '7'"] ]; @test:Config { diff --git a/ballerina/tests/parser_tests.bal b/ballerina/tests/parser_tests.bal index fd1dfde..fc02a7d 100644 --- a/ballerina/tests/parser_tests.bal +++ b/ballerina/tests/parser_tests.bal @@ -37,7 +37,10 @@ function tagHandleData() returns [string, TestCase][] => [ ["tag_handle_5.yaml", {case: "yaml_version"}], ["tag_handle_6.yaml", {case: "reserved directive"}], ["tag_handle_7.yaml", {case: "secondary tag handle"}], - ["tag_handle_8.yaml", {case: "value"}] + ["tag_handle_8.yaml", {case: "value"}], + ["tag_handle_9.yaml", {case: "uri_scanner"}], + ["tag_handle_10.yaml", {case: "value"}], + ["tag_handle_11.yaml", {case: "value"}] ]; type TestCase record {| diff --git a/ballerina/tests/resources/negative/negative_test_20.yaml b/ballerina/tests/resources/negative/negative_test_20.yaml new file mode 100644 index 0000000..97eddac --- /dev/null +++ b/ballerina/tests/resources/negative/negative_test_20.yaml @@ -0,0 +1,2 @@ +"END OF document" +... key: value diff --git a/ballerina/tests/resources/negative/negative_test_21.yaml b/ballerina/tests/resources/negative/negative_test_21.yaml new file mode 100644 index 0000000..f0ab426 --- /dev/null +++ b/ballerina/tests/resources/negative/negative_test_21.yaml @@ -0,0 +1,2 @@ +"END OF document" +--- key: value \ No newline at end of file diff --git a/ballerina/tests/resources/negative/tag_handle_negative/tag_handle_negative_10.yaml b/ballerina/tests/resources/negative/tag_handle_negative/tag_handle_negative_10.yaml new file mode 100644 index 0000000..109e320 --- /dev/null +++ b/ballerina/tests/resources/negative/tag_handle_negative/tag_handle_negative_10.yaml @@ -0,0 +1,3 @@ +%TAG !yaml! tag:yaml.org,2002: +--- +case: ! diff --git a/ballerina/tests/resources/parser/tag_handle_10.yaml b/ballerina/tests/resources/parser/tag_handle_10.yaml new file mode 100644 index 0000000..b4f619b --- /dev/null +++ b/ballerina/tests/resources/parser/tag_handle_10.yaml @@ -0,0 +1,3 @@ +%TAG ! tag:yaml.org,2002: +--- +case: !str value diff --git a/ballerina/tests/resources/parser/tag_handle_11.yaml b/ballerina/tests/resources/parser/tag_handle_11.yaml new file mode 100644 index 0000000..d9b54ee --- /dev/null +++ b/ballerina/tests/resources/parser/tag_handle_11.yaml @@ -0,0 +1,3 @@ +%TAG ! tag:yaml.org,2002: +--- +case: !st%72 value diff --git a/ballerina/tests/resources/parser/tag_handle_9.yaml b/ballerina/tests/resources/parser/tag_handle_9.yaml new file mode 100644 index 0000000..8adf9c2 --- /dev/null +++ b/ballerina/tests/resources/parser/tag_handle_9.yaml @@ -0,0 +1,3 @@ +%TAG !yaml! tag:yaml.org,2002: +--- +case: !yaml!str uri_scanner diff --git a/native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java b/native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java index c52bd13..d00d012 100644 --- a/native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java +++ b/native/src/main/java/io/ballerina/lib/data/yaml/lexer/LexerState.java @@ -457,6 +457,7 @@ public State transition(LexerState lexerState) throws Error.YamlParserException } default -> { // Check for primary and name tag handles lexerState.lexeme = "!"; + lexerState.forward(); Scanner.iterate(lexerState, Scanner.DIFF_TAG_HANDLE_SCANNER, TAG_HANDLE, true); return this; } diff --git a/native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java b/native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java index 7ad9552..f781beb 100644 --- a/native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java +++ b/native/src/main/java/io/ballerina/lib/data/yaml/lexer/Scanner.java @@ -315,6 +315,14 @@ public boolean scan(LexerState sm) throws Error.YamlParserException { sm.setLexeme("!"); return true; } + + // Store the complete primary tag if a white space or a flow indicator is detected. + if (differentiate && matchPattern(sm, List.of(FLOW_INDICATOR_PATTERN, WHITE_SPACE_PATTERN), 1)) { + sm.setLexemeBuffer(sm.getLexeme().substring(1)); + sm.setLexeme("!"); + return true; + } + return false; } @@ -324,13 +332,6 @@ public boolean scan(LexerState sm) throws Error.YamlParserException { return true; } - // Store the complete primary tag if a white space or a flow indicator is detected. - if (differentiate && matchPattern(sm, List.of(FLOW_INDICATOR_PATTERN, WHITE_SPACE_PATTERN), 0)) { - sm.setLexemeBuffer(sm.getLexeme().substring(1)); - sm.setLexeme("!"); - return true; - } - // Store the complete primary tag if a hexadecimal escape is detected. if (differentiate && sm.peek() == '%') { scanUnicodeEscapedCharacters(sm, '%', 2); diff --git a/native/src/main/java/io/ballerina/lib/data/yaml/parser/YamlParser.java b/native/src/main/java/io/ballerina/lib/data/yaml/parser/YamlParser.java index e783ee0..7dc03d7 100644 --- a/native/src/main/java/io/ballerina/lib/data/yaml/parser/YamlParser.java +++ b/native/src/main/java/io/ballerina/lib/data/yaml/parser/YamlParser.java @@ -1222,14 +1222,14 @@ private static YamlEvent parse(ParserState state, ParserUtils.ParserOption optio // There cannot be nodes next to the document marker. if (bufferedTokenType != EOL && bufferedTokenType != COMMENT && !explicit) { - throw new Error.YamlParserException("'${state.tokenBuffer.token}' token cannot " + - "start in the same line as the document marker", state.getLine(), state.getColumn()); + throw new Error.YamlParserException("token cannot start in the same line as " + + "the document marker", state.getLine(), state.getColumn()); } // Block collection nodes cannot be next to the directive marker. if (explicit && (bufferedTokenType == PLANAR_CHAR && bufferedToken.getIndentation() != null || bufferedTokenType == SEQUENCE_ENTRY)) { - throw new Error.YamlParserException("'${state.tokenBuffer.token}' token cannot start " + + throw new Error.YamlParserException("block collection token cannot start " + "in the same line as the directive marker", state.getLine(), state.getColumn()); } } @@ -1778,7 +1778,8 @@ private static YamlEvent constructEvent(YamlEvent yamlEvent, TagStructure newNod return event; } - /** Extract the data for the given node. + /** + * Extract the data for the given node. * * @param state - Current parser state * @param peeked - If the expected token is already in the state