diff --git a/src/Lexer.php b/src/Lexer.php index 93c9e52820d..9556785e918 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -51,6 +51,10 @@ class Lexer private int $position; private array $positions; private int $currentVarBlockLine; + /** + * @var array + */ + private array $deprecations = []; public const STATE_DATA = 0; public const STATE_BLOCK = 1; @@ -82,6 +86,8 @@ public function __construct(Environment $env, array $options = []) private function initialize() { + $this->deprecations = []; + if ($this->isInitialized) { return; } @@ -381,7 +387,7 @@ private function lexExpression(): void } // strings elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) { - $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes(substr($match[0], 1, -1))); + $this->pushToken(/* Token::STRING_TYPE */ 7, $this->stripcslashes(substr($match[0], 1, -1), substr($match[0], 0, 1))); $this->moveCursor($match[0]); } // opening double quoted string @@ -396,6 +402,64 @@ private function lexExpression(): void } } + private function stripcslashes(string $str, string $quoteType): string { + $result = ''; + $length = strlen($str); + $specialChars = [ + 'f' => "\f", + 'n' => "\n", + 'r' => "\r", + 't' => "\t", + 'v' => "\v", + ]; + + for ($i = 0; $i < $length; $i++) { + if ($str[$i] === '\\' && $i + 1 < $length) { + $nextChar = $str[$i + 1]; + if (isset($specialChars[$nextChar])) { + $result .= $specialChars[$nextChar]; + $i++; + } elseif ($nextChar === '#') { + $result .= "#"; + $i++; + } elseif ($nextChar === '\\') { + if ($quoteType === "'" && (!($i + 2 < $length && isset($specialChars[$str[$i + 2]])))) { + $this->deprecations[] = sprintf("Character %s at position %d does not require to be escaped.", $nextChar, $i + 2); + } + $result .= '\\'; + $i++; + } elseif ($nextChar === "'" || $nextChar === '"') { + if ($nextChar !== $quoteType) { + $this->deprecations[] = sprintf("Character %s at position %d does not require to be escaped.", $nextChar, $i + 2); + } + $result .= $nextChar; + $i++; + } elseif ($nextChar === 'x' && $i + 2 < $length && ctype_xdigit($str[$i + 1]) && ctype_xdigit($str[$i + 2])) { + $result .= chr(hexdec($str[$i + 1] . $str[$i + 2])); + $i += 2; + } elseif (ctype_digit($nextChar) && $nextChar < '8') { + $octal = $nextChar; + for ($j = 1; $j <= 2; $j++) { + if ($i + $j + 1 < $length && ctype_digit($str[$i + $j + 1]) && $str[$i + $j + 1] < '8') { + $octal .= $str[$i + $j + 1]; + $i++; + } else { + break; + } + } + $result .= chr(octdec($octal)); + $i++; + } else { + $result .= '\\' . $nextChar; + $i++; + } + } else { + $result .= $str[$i]; + } + } + return $result; + } + private function lexRawData(): void { if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) { @@ -437,7 +501,7 @@ private function lexString(): void $this->moveCursor($match[0]); $this->pushState(self::STATE_INTERPOLATION); } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && '' !== $match[0]) { - $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes($match[0])); + $this->pushToken(/* Token::STRING_TYPE */ 7, $this->stripcslashes($match[0], '"')); $this->moveCursor($match[0]); } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) { [$expect, $lineno] = array_pop($this->brackets); @@ -530,4 +594,12 @@ private function popState(): void $this->state = array_pop($this->states); } + + /** + * @return string[] + */ + public function getDeprecations() : array + { + return $this->deprecations; + } } diff --git a/tests/LexerTest.php b/tests/LexerTest.php index 7926034ffa1..66600f31916 100644 --- a/tests/LexerTest.php +++ b/tests/LexerTest.php @@ -178,23 +178,97 @@ public function testBigNumbers() $this->assertEquals('922337203685477580700', $node->getValue()); } - public function testStringWithEscapedDelimiter() + /** + * @dataProvider getStringWithEscapedDelimiter + */ + public function testStringWithEscapedDelimiter(string $template, string $expected, array $expectedDeprecations = []) { - $tests = [ - "{{ 'foo \' bar' }}" => 'foo \' bar', - '{{ "foo \" bar" }}' => 'foo " bar', - ]; - $lexer = new Lexer(new Environment(new ArrayLoader())); - foreach ($tests as $template => $expected) { - $stream = $lexer->tokenize(new Source($template, 'index')); - $stream->expect(Token::VAR_START_TYPE); - $stream->expect(Token::STRING_TYPE, $expected); + $stream = $lexer->tokenize(new Source($template, 'index')); + $stream->expect(Token::VAR_START_TYPE); + $stream->expect(Token::STRING_TYPE, $expected); - // add a dummy assertion here to satisfy PHPUnit, the only thing we want to test is that the code above - // can be executed without throwing any exceptions - $this->addToAssertionCount(1); - } + $this->assertSame($expectedDeprecations, $lexer->getDeprecations()); + } + + public function getStringWithEscapedDelimiter() + { + yield '{{ \'App\\\\Test\' }} => App\Test' => [ + <<<'TWIG' + {{ 'App\\Test' }} + TWIG, + <<<'EOF' + App\Test + EOF, + [ + 'Character \ at position 5 does not require to be escaped.' + ] + ]; + yield '{{ \'App\Test\' }} => App\Test' => [ + <<<'TWIG' + {{ 'App\Test' }} + TWIG, + <<<'EOF' + App\Test + EOF, + [] + ]; + yield '{{ \'foo \\\' bar\' }} => foo \' bar' => [ + <<<'TWIG' + {{ 'foo \' bar' }} + TWIG, + <<<'EOF' + foo ' bar + EOF, + [], + ]; + yield '{{ "foo \\\' bar" }} => foo \' bar' => [ + <<<'TWIG' + {{ "foo \' bar" }} + TWIG, + <<<'EOF' + foo ' bar + EOF, + [ + "Character ' at position 6 does not require to be escaped.", + ], + ]; + yield '{{ "foo \" bar" }} => foo " bar' => [ + <<<'TWIG' + {{ "foo \" bar" }} + TWIG, + <<<'EOF' + foo " bar + EOF, + [], + ]; + yield '{{ \'foo \" bar\' }} => foo " bar' => [ + <<<'TWIG' + {{ 'foo \" bar' }} + TWIG, + <<<'EOF' + foo " bar + EOF, + [ + 'Character " at position 6 does not require to be escaped.', + ], + ]; + yield '{{ \'\f\n\r\t\v\' }} => \f\n\r\t\v' => [ + <<<'TWIG' + {{ '\f\n\r\t\v' }} + TWIG, + "\f\n\r\t\v", + [], + ]; + yield '{{ \'\\\\f\\\\n\\\\r\\\\t\\\\v\' }} => \\f\\n\\r\\t\\v' => [ + <<<'TWIG' + {{ '\\f\\n\\r\\t\\v' }} + TWIG, + <<<'EOF' + \f\n\r\t\v + EOF, + [], + ]; } public function testStringWithInterpolation()