-
-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathLexer.php
110 lines (98 loc) · 4.7 KB
/
Lexer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\Component\ExpressionLanguage;
/**
* Lexes an expression.
*
* @author Fabien Potencier <[email protected]>
*/
class Lexer
{
/**
* Tokenizes an expression.
*
* @throws SyntaxError
*/
public function tokenize(string $expression): TokenStream
{
$expression = str_replace(["\r", "\n", "\t", "\v", "\f"], ' ', $expression);
$cursor = 0;
$tokens = [];
$brackets = [];
$end = \strlen($expression);
while ($cursor < $end) {
if (' ' == $expression[$cursor]) {
++$cursor;
continue;
}
if (preg_match('/
(?(DEFINE)(?P<LNUM>[0-9]+(_[0-9]+)*))
(?:\.(?&LNUM)|(?&LNUM)(?:\.(?!\.)(?&LNUM)?)?)(?:[eE][+-]?(?&LNUM))?/Ax',
$expression, $match, 0, $cursor)
) {
// numbers
$tokens[] = new Token(Token::NUMBER_TYPE, 0 + str_replace('_', '', $match[0]), $cursor + 1);
$cursor += \strlen($match[0]);
} elseif (str_contains('([{', $expression[$cursor])) {
// opening bracket
$brackets[] = [$expression[$cursor], $cursor];
$tokens[] = new Token(Token::PUNCTUATION_TYPE, $expression[$cursor], $cursor + 1);
++$cursor;
} elseif (str_contains(')]}', $expression[$cursor])) {
// closing bracket
if (!$brackets) {
throw new SyntaxError(\sprintf('Unexpected "%s".', $expression[$cursor]), $cursor, $expression);
}
[$expect, $cur] = array_pop($brackets);
if ($expression[$cursor] != strtr($expect, '([{', ')]}')) {
throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $cur, $expression);
}
$tokens[] = new Token(Token::PUNCTUATION_TYPE, $expression[$cursor], $cursor + 1);
++$cursor;
} elseif (preg_match('/"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As', $expression, $match, 0, $cursor)) {
// strings
$tokens[] = new Token(Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1)), $cursor + 1);
$cursor += \strlen($match[0]);
} elseif (preg_match('{/\*.*?\*/}A', $expression, $match, 0, $cursor)) {
// comments
$cursor += \strlen($match[0]);
} elseif (preg_match('/(?<=^|[\s(])starts with(?=[\s(])|(?<=^|[\s(])ends with(?=[\s(])|(?<=^|[\s(])contains(?=[\s(])|(?<=^|[\s(])matches(?=[\s(])|(?<=^|[\s(])not in(?=[\s(])|(?<=^|[\s(])not(?=[\s(])|(?<=^|[\s(])xor(?=[\s(])|(?<=^|[\s(])and(?=[\s(])|\=\=\=|\!\=\=|(?<=^|[\s(])or(?=[\s(])|\|\||&&|\=\=|\!\=|\>\=|\<\=|(?<=^|[\s(])in(?=[\s(])|\.\.|\*\*|\<\<|\>\>|\!|\||\^|&|\<|\>|\+|\-|~|\*|\/|%/A', $expression, $match, 0, $cursor)) {
// operators
$tokens[] = new Token(Token::OPERATOR_TYPE, $match[0], $cursor + 1);
$cursor += \strlen($match[0]);
} elseif ('?' === $expression[$cursor] && '.' === ($expression[$cursor + 1] ?? '')) {
// null-safe
$tokens[] = new Token(Token::PUNCTUATION_TYPE, '?.', ++$cursor);
++$cursor;
} elseif ('?' === $expression[$cursor] && '?' === ($expression[$cursor + 1] ?? '')) {
// null-coalescing
$tokens[] = new Token(Token::PUNCTUATION_TYPE, '??', ++$cursor);
++$cursor;
} elseif (str_contains('.,?:', $expression[$cursor])) {
// punctuation
$tokens[] = new Token(Token::PUNCTUATION_TYPE, $expression[$cursor], $cursor + 1);
++$cursor;
} elseif (preg_match('/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A', $expression, $match, 0, $cursor)) {
// names
$tokens[] = new Token(Token::NAME_TYPE, $match[0], $cursor + 1);
$cursor += \strlen($match[0]);
} else {
// unlexable
throw new SyntaxError(\sprintf('Unexpected character "%s".', $expression[$cursor]), $cursor, $expression);
}
}
$tokens[] = new Token(Token::EOF_TYPE, null, $cursor + 1);
if ($brackets) {
[$expect, $cur] = array_pop($brackets);
throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $cur, $expression);
}
return new TokenStream($tokens, $expression);
}
}