diff --git a/src/scanner.jl b/src/scanner.jl index 0ea091a..91f735a 100644 --- a/src/scanner.jl +++ b/src/scanner.jl @@ -1,5 +1,41 @@ +# YAML 1.1 [22] b-line-feed ::= #xA /*LF*/ +# YAML 1.2 [24] b-line-feed ::= x0A +const b_line_feed = '\n' + +# YAML 1.1 [23] b-carriage-return ::= #xD /*CR*/ +# YAML 1.2 [25] b-carriage-return ::= x0D +const b_carriage_return = '\r' + +# YAML 1.1 [24] b-next-line ::= #x85 /*NEL*/ +# YAML 1.2 don't have this. +const yaml_1_1_b_next_line = '\u85' + +# YAML 1.1 [25] b-line-separator ::= #x2028 /*LS*/ +# YAML 1.2 don't have this. +const yaml_1_1_b_line_separator = '\u2028' + +# YAML 1.1 [26] b-paragraph-separator ::= #x2029 /*PS*/ +# YAML 1.2 don't have this. +const yaml_1_1_b_paragraph_separator = '\u2029' + # YAML 1.1 [27] b-char ::= b-line-feed | b-carriage-return | b-next-line | b-line-separator | b-paragraph-separator -is_b_char(::YAMLV1_1, c::Char) = c == '\n' || c == '\r' || c == '\u85' || c == '\u2028' || c == '\u2029' +is_b_char(::YAMLV1_1, c::Char) = + c == b_line_feed || + c == b_carriage_return || + c == yaml_1_1_b_next_line || + c == yaml_1_1_b_line_separator || + c == yaml_1_1_b_paragraph_separator + +# YAML 1.2 [26] b-char ::= b-line-feed | b-carriage-return # x0A x0D +is_b_char(::YAMLV1_2, c::Char) = + c == b_line_feed || + c == b_carriage_return + +# YAML 1.1 [28] b-specific ::= b-line-separator | b-paragraph-separator +# YAML 1.2 don't have this. +is_b_specific(::YAMLV1_1, c::Char) = + c == yaml_1_1_b_line_separator || + c == yaml_1_1_b_paragraph_separator # YAML 1.2 [31] s-space ::= x20 const yaml_1_2_s_space = ' ' @@ -154,25 +190,88 @@ function get_mark(stream::TokenStream) Mark(stream.index, stream.line, stream.column) end +# ------------- +# forwardchars! +# ------------- + +# Advance the stream by a chacater and the index. +function forwardchar_skip!(stream::TokenStream) + forward!(stream.input) + stream.index += 1 + nothing +end + +# Advance the stream by a chacater and the index and a column. +function forwardchar_nobreak!(stream::TokenStream) + forward!(stream.input) + stream.index += 1 + stream.column += 1 + nothing +end + +# Advance the stream by a chacater and the index and break a line. +function forwardchar_breakline!(stream::TokenStream) + forward!(stream.input) + stream.index += 1 + stream.column = 0 + stream.line += 1 + nothing +end + +# forwardchars!(::YAMLVersion, ::TokenStream, ::Integer=1) +# Advance the stream by `n` characters. -# Advance the stream by k characters. -function forwardchars!(stream::TokenStream, k::Integer=1) - for _ in 1:k +# forwardchars!(::YAMLV1_1, ::TokenStream, ::Integer=1) +# YAML 1.1 [29] b-generic ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed | b-next-line +# YAML 1.1 [33] b-ignored-any ::= b-generic | b-specific +function forwardchars!(::YAMLV1_1, stream::TokenStream, n::Integer=1) + i = 1 + while i ≤ n + # check whether the stream head is `b-ignored-any` c = peek(stream.input) - forward!(stream.input) - stream.index += 1 - if in(c, "\n\u0085\u2028\u2029") || - (c == '\r' && peek(stream.input) == '\n') - stream.column = 0 - stream.line += 1 + # check whether the stream head is `b-carriage-return` + if c == b_carriage_return + # `b-carriage-return` or `b-carriage-return b-line-feed` + forwardchar_breakline!(stream) + i += 1 + if peek(stream.input) == b_line_feed + forwardchar_skip!(stream) + i += 1 + end + # check whether the stream head is `b-ignored-any - b-carriage-return - ( b-carriage-return b-line-feed )` + elseif c == b_line_feed || c == yaml_1_1_b_next_line || is_b_specific(YAMLV1_1(), c) + forwardchar_breakline!(stream) + i += 1 + # the stream head is not `b-ignored-any` else - stream.column += 1 + forwardchar_nobreak!(stream) + i += 1 end end - stream.index += k - nothing end +# forwardchars!(::YAMLV1_2, ::TokenStream, ::Integer=1) +# YAML 1.2 [28] b-break ::= ( b-carriage-return b-line-feed ) | b-carriage-return | b-line-feed +function forwardchars!(::YAMLV1_2, stream::TokenStream, n::Integer=1) + i = 1 + while i ≤ n + c = peek(stream.input) + if c == b_carriage_return + forwardchar_breakline!(stream) + i += 1 + if peek(stream.input) == b_line_feed + forwardchar_skip!(stream) + i += 1 + end + elseif c == b_line_feed + forwardchar_breakline!(stream) + i += 1 + else + forwardchar_nobreak!(stream) + i += 1 + end + end +end function need_more_tokens(stream::TokenStream) if stream.done @@ -466,7 +565,7 @@ function fetch_document_indicator(stream::TokenStream, ::Type{T}) where {T<:Toke # Add DOCUMENT-START or DOCUMENT-END. start_mark = get_mark(stream) - forwardchars!(stream, 3) + forwardchars!(YAMLV1_1(), stream, 3) end_mark = get_mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -507,7 +606,7 @@ function fetch_flow_collection_start(stream::TokenStream, ::Type{T}) where {T<:T # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. start_mark = get_mark(stream) - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -535,7 +634,7 @@ function fetch_flow_collection_end(stream::TokenStream, ::Type{T}) where {T<:Tok # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. start_mark = get_mark(stream) - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, T(Span(start_mark, end_mark))) end @@ -550,7 +649,7 @@ function fetch_flow_entry(stream::TokenStream) # Add FLOW-ENTRY. start_mark = get_mark(stream) - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, FlowEntryToken(Span(start_mark, end_mark))) end @@ -586,7 +685,7 @@ function fetch_block_entry(stream::TokenStream) # Add BLOCK-ENTRY. start_mark = get_mark(stream) - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, BlockEntryToken(Span(start_mark, end_mark))) @@ -618,7 +717,7 @@ function fetch_key(stream::TokenStream) # Add KEY. start_mark = get_mark(stream) - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, KeyToken(Span(start_mark, end_mark))) end @@ -677,7 +776,7 @@ function fetch_value(stream::TokenStream) # Add VALUE. start_mark = get_mark(stream) - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) enqueue!(stream.token_queue, ValueToken(Span(start_mark, end_mark))) end @@ -799,16 +898,16 @@ function scan_line_break(::YAMLV1_1, stream::TokenStream)::String c = peek(stream.input) if c == '\u000d' if peek(stream.input, 1) == '\u000a' - forwardchars!(stream, 2) + forwardchars!(YAMLV1_1(), stream, 2) else - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end "\u000a" elseif c == '\u000a' || c == '\u0085' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) "\u000a" elseif c == '\u2028' || c == '\u2029' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) string(c) else "" @@ -832,13 +931,13 @@ function scan_line_break(::YAMLV1_2, stream::TokenStream)::String c = peek(stream.input) if c == '\u000d' if peek(stream.input, 1) == '\u000a' - forwardchars!(stream, 2) + forwardchars!(YAMLV1_2(), stream, 2) else - forwardchars!(stream) + forwardchars!(YAMLV1_2(), stream) end "\u000a" elseif c == '\u000a' - forwardchars!(stream) + forwardchars!(YAMLV1_2(), stream) "\u000a" else "" @@ -850,13 +949,13 @@ function scan_to_next_token(stream::TokenStream) while true # whitespace while peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end # comment if peek(stream.input) == '#' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end # line break @@ -874,7 +973,7 @@ end function scan_directive(stream::TokenStream) start_mark = get_mark(stream) - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) name = scan_directive_name(stream, start_mark) value = nothing @@ -891,7 +990,7 @@ function scan_directive(stream::TokenStream) end_mark = get_mark(stream) @warn """unknown directive name: "$name" at $end_mark. We ignore this.""" while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end @@ -915,7 +1014,7 @@ function scan_directive_name(stream::TokenStream, start_mark::Mark) end value = prefix(stream.input, length) - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) c = peek(stream.input) if !in(c, ":\0 \r\n\u0085\u2028\u2029") @@ -930,7 +1029,7 @@ end function scan_yaml_directive_value(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' || peek(stream.input) == ':' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end major = scan_yaml_directive_number(stream, start_mark) @@ -939,7 +1038,7 @@ function scan_yaml_directive_value(stream::TokenStream, start_mark::Mark) "expected '.' but found '$(peek(stream.input))'", get_mark(stream))) end - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) minor = scan_yaml_directive_number(stream, start_mark) if !in(peek(stream.input), "\0 \r\n\u0085\u2028\u2029") throw(ScannerError("while scanning a directive", start_mark, @@ -982,7 +1081,7 @@ function scan_yaml_directive_number(stream::TokenStream, start_mark::Mark)::Int # --------------------------------------------------- # advance the stream by the length that has been read # --------------------------------------------------- - forwardchars!(stream, pos) + forwardchars!(YAMLV1_1(), stream, pos) # ----------------- # return the number # ----------------- @@ -992,7 +1091,7 @@ end function scan_tag_directive_handle(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end value = scan_tag_handle(stream, "directive", start_mark) @@ -1007,7 +1106,7 @@ end function scan_tag_directive_prefix(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end value = scan_tag_uri(stream, "directive", start_mark) @@ -1022,12 +1121,12 @@ end function scan_directive_ignored_line(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end if peek(stream.input) == '#' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end if !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") @@ -1047,7 +1146,7 @@ function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} else name = "anchor" end - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) length = 0 c = peek(stream.input) while is_ns_ascii_letter(c) || isdigit(c) || c == '-' || c == '_' @@ -1061,7 +1160,7 @@ function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token} get_mark(stream))) end value = prefix(stream.input, length) - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) if !in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029?:,]}%@`") throw(ScannerError("while scanning an $(name)", start_mark, "expected an alphanumeric character, but found '$(peek(stream.input))'", @@ -1077,18 +1176,18 @@ function scan_tag(stream::TokenStream) c = peek(stream.input, 1) if c == '<' handle = nothing - forwardchars!(stream, 2) + forwardchars!(YAMLV1_1(), stream, 2) suffix = scan_tag_uri(stream, "tag", start_mark) if peek(stream.input) != '>' throw(ScannerError("while parsing a tag", start_mark, "expected '>', but found '$(peek(stream.input))'", get_mark(stream))) end - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) elseif in(c, "\0 \t\r\n\u0085\u2028\u2029") handle = nothing suffix = '!' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) else length = 1 use_handle = false @@ -1104,7 +1203,7 @@ function scan_tag(stream::TokenStream) handle = scan_tag_handle(stream, "tag", start_mark) else handle = "!" - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end suffix = scan_tag_uri(stream, "tag", start_mark) end @@ -1129,7 +1228,7 @@ function scan_block_scalar(stream::TokenStream, style::Char) start_mark = get_mark(stream) # Scan the header. - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) chomping, increment = scan_block_scalar_indicators(stream, start_mark) scan_block_scalar_ignored_line(stream, start_mark) @@ -1153,7 +1252,7 @@ function scan_block_scalar(stream::TokenStream, style::Char) length += 1 end push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) line_break = scan_line_break(YAMLV1_1(), stream) breaks, end_mark = scan_block_scalar_breaks(stream, indent) if stream.column == indent && peek(stream.input) != '\0' @@ -1185,12 +1284,12 @@ end function scan_block_scalar_ignored_line(stream::TokenStream, start_mark::Mark) while peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end if peek(stream.input) == '#' while !in(peek(stream.input), "\0\r\n\u0085\u2028\u2029") - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end @@ -1210,7 +1309,7 @@ function scan_block_scalar_indicators(stream::TokenStream, start_mark::Mark) c = peek(stream.input) if c == '+' || c == '-' chomping = c == '+' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) c = peek(stream.input) if isdigit(c) increment = parse(Int, c) @@ -1225,11 +1324,11 @@ function scan_block_scalar_indicators(stream::TokenStream, start_mark::Mark) "while scanning a block scalar", start_mark, "expected indentation indicator in the range 1-9, but found 0", get_mark(stream), )) - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) c = peek(stream.input) if c == '+' || c == '-' chomping = c == '+' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end c = peek(stream.input) @@ -1251,7 +1350,7 @@ function scan_block_scalar_indentation(stream::TokenStream) push!(chunks, scan_line_break(YAMLV1_1(), stream)) end_mark = get_mark(stream) else - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) if stream.column > max_indent max_indent = stream.column end @@ -1266,14 +1365,14 @@ function scan_block_scalar_breaks(stream::TokenStream, indent) chunks = Any[] end_mark = get_mark(stream) while stream.column < indent && peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end while is_b_char(YAMLV1_1(), peek(stream.input)) push!(chunks, scan_line_break(YAMLV1_1(), stream)) end_mark = get_mark(stream) while stream.column < indent && peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end end @@ -1286,14 +1385,14 @@ function scan_flow_scalar(stream::TokenStream, style::Char) chunks = Any[] start_mark = get_mark(stream) q = peek(stream.input) # quote - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) while peek(stream.input) != q || peek(stream.input, 1) == q append!(chunks, scan_flow_scalar_spaces(stream, double, start_mark)) append!(chunks, scan_flow_scalar_non_spaces(stream, double, start_mark)) end - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) end_mark = get_mark(stream) ScalarToken(Span(start_mark, end_mark), string(chunks...), false, style) end @@ -1337,13 +1436,13 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, end if length > 0 push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) end c = peek(stream.input) if !double && c == '\'' && peek(stream.input, 1) == '\'' push!(chunks, '\'') - forwardchars!(stream, 2) + forwardchars!(YAMLV1_1(), stream, 2) elseif (double && c == '\'') || (!double && in(c, "\"\\")) push!(chunks, c) forward!(stream.input) @@ -1368,7 +1467,7 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool, end end push!(chunks, Char(parse(Int, prefix(stream.input, length), base = 16))) - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) elseif is_b_char(YAMLV1_1(), c) scan_line_break(YAMLV1_1(), stream) append!(chunks, scan_flow_scalar_breaks(stream, double, start_mark)) @@ -1393,7 +1492,7 @@ function scan_flow_scalar_spaces(stream::TokenStream, double::Bool, length += 1 end whitespaces = prefix(stream.input, length) - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) c = peek(stream.input) if c == '\0' @@ -1480,7 +1579,7 @@ function scan_plain(stream::TokenStream) c = peek(stream.input) if stream.flow_level != 0 && c == ':' && !in(peek(stream.input, length + 1), "\0 \t\r\n\u0085\u2028\u2029,[]{}") - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) throw(ScannerError("while scanning a plain scalar", start_mark, "found unexpected ':'", get_mark(stream))) end @@ -1492,7 +1591,7 @@ function scan_plain(stream::TokenStream) stream.allow_simple_key = true append!(chunks, spaces) push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) end_mark = get_mark(stream) spaces = scan_plain_spaces(stream, indent, start_mark) if isempty(spaces) || peek(stream.input) == '#' || @@ -1514,7 +1613,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, end whitespaces = prefix(stream.input, length) - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) c = peek(stream.input) if is_b_char(YAMLV1_1(), c) line_break = scan_line_break(YAMLV1_1(), stream) @@ -1531,7 +1630,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer, breaks = Any[] while in(peek(stream.input), " \r\n\u0085\u2028\u2029") if peek(stream.input) == ' ' - forwardchars!(stream) + forwardchars!(YAMLV1_1(), stream) else push!(breaks, scan_line_break(YAMLV1_1(), stream)) if peek(stream.input) == '\uFEFF' @@ -1573,7 +1672,7 @@ function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) end if c != '!' - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) throw(ScannerError("while scanning a $(name)", start_mark, "expected '!', but found '$(c)'", get_mark(stream))) @@ -1582,7 +1681,7 @@ function scan_tag_handle(stream::TokenStream, name::String, start_mark::Mark) end value = prefix(stream.input, length) - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) value end @@ -1594,7 +1693,7 @@ function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) while is_ns_ascii_letter(c) || isdigit(c) || in(c, "-;/?:@&=+\$,_.!~*\'()[]%") if c == '%' push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) length = 0 push!(chunks, scan_uri_escapes(stream, name, start_mark)) else @@ -1605,7 +1704,7 @@ function scan_tag_uri(stream::TokenStream, name::String, start_mark::Mark) if length > 0 push!(chunks, prefix(stream.input, length)) - forwardchars!(stream, length) + forwardchars!(YAMLV1_1(), stream, length) length = 0 end @@ -1632,7 +1731,7 @@ function scan_uri_escapes(stream::TokenStream, name::String, start_mark::Mark):: )) end push!(bytes, Char(parse(Int, prefix(stream.input, 2), base=16))) - forwardchars!(stream, 2) + forwardchars!(YAMLV1_1(), stream, 2) end String(bytes) end