diff --git a/bin/prism b/bin/prism index d2b0ec6db14..2b4f8895cce 100755 --- a/bin/prism +++ b/bin/prism @@ -224,14 +224,21 @@ module Prism # bin/prism parser [source] def parser(argv) - require "parser/current" + require "parser/ruby33" source, filepath = read_source(argv) + buffer = Parser::Source::Buffer.new(filepath, 1) + buffer.source = source + puts "Parser:" - pp Parser::CurrentRuby.parse(source, filepath) + parser_ast, _, parser_tokens = Parser::Ruby33.new.tokenize(buffer) + pp parser_ast + pp parser_tokens puts "Prism:" - pp Translation::Parser.parse(source, filepath) + prism_ast, _, prism_tokens = Prism::Translation::Parser33.new.tokenize(buffer) + pp prism_ast + pp prism_tokens end # bin/prism ripper [source] diff --git a/lib/prism/translation.rb b/lib/prism/translation.rb index e367c6e0532..8b75e8a3abe 100644 --- a/lib/prism/translation.rb +++ b/lib/prism/translation.rb @@ -5,6 +5,8 @@ module Prism # syntax trees. module Translation # steep:ignore autoload :Parser, "prism/translation/parser" + autoload :Parser33, "prism/translation/parser33" + autoload :Parser34, "prism/translation/parser34" autoload :Ripper, "prism/translation/ripper" autoload :RubyParser, "prism/translation/ruby_parser" end diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb index 10f4a70dc19..fd1302821d0 100644 --- a/lib/prism/translation/parser.rb +++ b/lib/prism/translation/parser.rb @@ -168,7 +168,7 @@ def build_comments(comments, offset_cache) # Build the parser gem tokens from the prism tokens. def build_tokens(tokens, offset_cache) - Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a + Lexer.new(source_buffer, tokens, offset_cache).to_a end # Build a range from a prism location. diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 5c993cfab81..b710b1981b9 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -177,12 +177,23 @@ class Lexer WORDS_SEP: :tSPACE } - private_constant :TYPES + # These constants represent flags in our lex state. We really, really + # don't want to be using them and we really, really don't want to be + # exposing them as part of our public API. Unfortunately, we don't have + # another way of matching the exact tokens that the parser gem expects + # without them. We should find another way to do this, but in the + # meantime we'll hide them from the documentation and mark them as + # private constants. + EXPR_BEG = 0x1 # :nodoc: + EXPR_LABEL = 0x400 # :nodoc: + + private_constant :TYPES, :EXPR_BEG, :EXPR_LABEL # The Parser::Source::Buffer that the tokens were lexed from. attr_reader :source_buffer - # An array of prism tokens that we lexed. + # An array of tuples that contain prism tokens and their associated lex + # state when they were lexed. attr_reader :lexed # A hash that maps offsets in bytes to offsets in characters. @@ -205,9 +216,9 @@ def to_a index = 0 while index < lexed.length - token, = lexed[index] + token, state = lexed[index] index += 1 - next if token.type == :IGNORED_NEWLINE || token.type == :EOF + next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type) type = TYPES.fetch(token.type) value = token.value @@ -218,13 +229,13 @@ def to_a value.delete_prefix!("?") when :tCOMMENT if token.type == :EMBDOC_BEGIN - until (next_token = lexed[index]) && next_token.type == :EMBDOC_END + until (next_token = lexed[index][0]) && next_token.type == :EMBDOC_END value += next_token.value index += 1 end value += next_token.value - location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset]) + location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset]) index += 1 else value.chomp! @@ -247,6 +258,8 @@ def to_a value.chomp!(":") when :tLABEL_END value.chomp!(":") + when :tLCURLY + type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL when :tNTH_REF value = Integer(value.delete_prefix("$")) when :tOP_ASGN @@ -256,13 +269,13 @@ def to_a when :tSPACE value = nil when :tSTRING_BEG - if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END + if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END next_location = token.location.join(next_token.location) type = :tSTRING value = "" location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) index += 1 - elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END + elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END next_location = token.location.join(next_next_token.location) type = :tSTRING value = next_token.value @@ -280,7 +293,7 @@ def to_a location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1]) end when :tSYMBEG - if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR + if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR next_location = token.location.join(next_token.location) type = :tSYMBOL value = next_token.value diff --git a/lib/prism/translation/parser33.rb b/lib/prism/translation/parser33.rb index cfaa623163a..b09266e06a4 100644 --- a/lib/prism/translation/parser33.rb +++ b/lib/prism/translation/parser33.rb @@ -1,4 +1,4 @@ -require_relative "parser" +# frozen_string_literal: true module Prism module Translation diff --git a/lib/prism/translation/parser34.rb b/lib/prism/translation/parser34.rb index 0a34758659c..0ead70ad3c8 100644 --- a/lib/prism/translation/parser34.rb +++ b/lib/prism/translation/parser34.rb @@ -1,4 +1,4 @@ -require_relative "parser" +# frozen_string_literal: true module Prism module Translation diff --git a/test/prism/parser_test.rb b/test/prism/parser_test.rb index 71dd6d82238..61f1b737dcf 100644 --- a/test/prism/parser_test.rb +++ b/test/prism/parser_test.rb @@ -101,9 +101,11 @@ def test_warnings parser = Prism::Translation::Parser33.new parser.diagnostics.all_errors_are_fatal = false + warning = nil parser.diagnostics.consumer = ->(received) { warning = received } parser.parse(buffer) + assert_equal :warning, warning.level assert_includes warning.message, "has been interpreted as" end