Skip to content

Commit

Permalink
refactor Lexer to be static class function
Browse files Browse the repository at this point in the history
  • Loading branch information
ggmichaelgo committed Nov 17, 2024
1 parent 02411ac commit bd5098c
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 92 deletions.
170 changes: 83 additions & 87 deletions lib/liquid/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,38 +25,37 @@ class Lexer1
COMPARISON_OPERATOR = /==|!=|<>|<=?|>=?|contains(?=\s)/
WHITESPACE_OR_NOTHING = /\s*/

def initialize(input)
@ss = StringScanner.new(input)
end

def tokenize
@output = []

until @ss.eos?
@ss.skip(WHITESPACE_OR_NOTHING)
break if @ss.eos?
tok = if (t = @ss.scan(COMPARISON_OPERATOR))
[:comparison, t]
elsif (t = @ss.scan(STRING_LITERAL))
[:string, t]
elsif (t = @ss.scan(NUMBER_LITERAL))
[:number, t]
elsif (t = @ss.scan(IDENTIFIER))
[:id, t]
elsif (t = @ss.scan(DOTDOT))
[:dotdot, t]
else
c = @ss.getch
if (s = SPECIALS[c])
[s, c]
class << self
def tokenize(input)
ss = StringScanner.new(input)
output = []

until ss.eos?
ss.skip(WHITESPACE_OR_NOTHING)
break if ss.eos?
tok = if (t = ss.scan(COMPARISON_OPERATOR))
[:comparison, t]
elsif (t = ss.scan(STRING_LITERAL))
[:string, t]
elsif (t = ss.scan(NUMBER_LITERAL))
[:number, t]
elsif (t = ss.scan(IDENTIFIER))
[:id, t]
elsif (t = ss.scan(DOTDOT))
[:dotdot, t]
else
raise SyntaxError, "Unexpected character #{c}"
c = ss.getch
if (s = SPECIALS[c])
[s, c]
else
raise SyntaxError, "Unexpected character #{c}"
end
end
output << tok
end
@output << tok
end

@output << [:end_of_string]
output << [:end_of_string]
end
end
end

Expand Down Expand Up @@ -157,82 +156,79 @@ class Lexer2
table.freeze
end

def initialize(input)
@input = input
end

# rubocop:disable Metrics/BlockNesting
def tokenize
ss = StringScannerPool.pop(@input)
@output = []
class << self
def tokenize(input)
ss = StringScannerPool.pop(input)
output = []

until ss.eos?
ss.skip(WHITESPACE_OR_NOTHING)
until ss.eos?
ss.skip(WHITESPACE_OR_NOTHING)

break if ss.eos?
break if ss.eos?

start_pos = ss.pos
peeked = ss.peek_byte
start_pos = ss.pos
peeked = ss.peek_byte

if (special = SPECIAL_TABLE[peeked])
ss.scan_byte
# Special case for ".."
if special == DOT && ss.peek_byte == DOT_ORD
if (special = SPECIAL_TABLE[peeked])
ss.scan_byte
@output << DOTDOT
elsif special == DASH
# Special case for negative numbers
if (peeked_byte = ss.peek_byte) && NUMBER_TABLE[peeked_byte]
ss.pos -= 1
@output << [:number, ss.scan(NUMBER_LITERAL)]
# Special case for ".."
if special == DOT && ss.peek_byte == DOT_ORD
ss.scan_byte
output << DOTDOT
elsif special == DASH
# Special case for negative numbers
if (peeked_byte = ss.peek_byte) && NUMBER_TABLE[peeked_byte]
ss.pos -= 1
output << [:number, ss.scan(NUMBER_LITERAL)]
else
output << special
end
else
@output << special
output << special
end
else
@output << special
end
elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked])
ss.scan_byte
if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte])
@output << found
elsif (sub_table = TWO_CHARS_COMPARISON_JUMP_TABLE[peeked])
ss.scan_byte
else
raise_syntax_error(start_pos, ss)
end
elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
ss.scan_byte
if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte])
@output << found
if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte])
output << found
ss.scan_byte
else
raise_syntax_error(start_pos, ss)
end
elsif (sub_table = COMPARISON_JUMP_TABLE[peeked])
ss.scan_byte
else
@output << SINGLE_COMPARISON_TOKENS[peeked]
end
else
type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked]

if type && (t = ss.scan(pattern))
# Special case for "contains"
@output << if type == :id && t == "contains" && @output.last&.first != :dot
COMPARISON_CONTAINS
if (peeked_byte = ss.peek_byte) && (found = sub_table[peeked_byte])
output << found
ss.scan_byte
else
[type, t]
output << SINGLE_COMPARISON_TOKENS[peeked]
end
else
raise_syntax_error(start_pos, ss)
type, pattern = NEXT_MATCHER_JUMP_TABLE[peeked]

if type && (t = ss.scan(pattern))
# Special case for "contains"
output << if type == :id && t == "contains" && output.last&.first != :dot
COMPARISON_CONTAINS
else
[type, t]
end
else
raise_syntax_error(start_pos, ss)
end
end
end
# rubocop:enable Metrics/BlockNesting
output << EOS
ensure
StringScannerPool.release(ss)
end
# rubocop:enable Metrics/BlockNesting

@output << EOS
ensure
StringScannerPool.release(ss)
end

def raise_syntax_error(start_pos, ss)
ss.pos = start_pos
# the character could be a UTF-8 character, use getch to get all the bytes
raise SyntaxError, "Unexpected character #{ss.getch}"
def raise_syntax_error(start_pos, ss)
ss.pos = start_pos
# the character could be a UTF-8 character, use getch to get all the bytes
raise SyntaxError, "Unexpected character #{ss.getch}"
end
end
end

Expand Down
3 changes: 1 addition & 2 deletions lib/liquid/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
module Liquid
class Parser
def initialize(input)
l = Lexer.new(input)
@tokens = l.tokenize
@tokens = Lexer.tokenize(input)
@p = 0 # pointer to current location
end

Expand Down
5 changes: 3 additions & 2 deletions lib/liquid/string_scanner_pool.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# frozen_string_literal: true

module Liquid
class StringScannerPool
class << self
def pop(input)
@ss_pool ||= [StringScanner.new("")] * 5
@ss_pool ||= 5.times.each_with_object([]) { |_i, arr| arr << StringScanner.new("") }

if @ss_pool.empty?
StringScanner.new(input)
Expand All @@ -14,7 +16,6 @@ def pop(input)
end

def release(ss)
binding.irb if ss.nil?
@ss_pool ||= []
@ss_pool << ss
end
Expand Down
2 changes: 1 addition & 1 deletion test/unit/lexer_unit_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,6 @@ def test_tokenize_incomplete_expression
private

def tokenize(input)
Lexer.new(input).tokenize
Lexer.tokenize(input)
end
end

0 comments on commit bd5098c

Please sign in to comment.