From a4b9caea7385bf05b1ec0403b4e3007312592e0c Mon Sep 17 00:00:00 2001 From: Ruud Kamphuis Date: Wed, 7 Aug 2024 19:19:01 +0200 Subject: [PATCH] Deprecate unnecessary escape characters See #4123 #2712 This allows to change the implementation in v4 so that we no longer have to escape backslashes. --- doc/templates.rst | 23 +++++++++-- src/Lexer.php | 79 +++++++++++++++++++++++++++++++++++++- tests/LexerTest.php | 93 +++++++++++++++++++++++++++++++++++++++------ 3 files changed, 177 insertions(+), 18 deletions(-) diff --git a/doc/templates.rst b/doc/templates.rst index b0f5b9a1c29..2746d90f6a7 100644 --- a/doc/templates.rst +++ b/doc/templates.rst @@ -522,10 +522,25 @@ exist: * ``"Hello World"``: Everything between two double or single quotes is a string. They are useful whenever you need a string in the template (for example as arguments to function calls, filters or just to extend or include - a template). A string can contain a delimiter if it is preceded by a - backslash (``\``) -- like in ``'It\'s good'``. If the string contains a - backslash (e.g. ``'c:\Program Files'``) escape it by doubling it - (e.g. ``'c:\\Program Files'``). + a template). + + Note that certain characters require escaping: + * ``\f``: Form feed + * ``\n``: New line + * ``\r``: Carriage return + * ``\t``: Horizontal tab + * ``\v``: Vertical tab + * ``\x``: Hexadecimal escape sequence + * ``\0`` to ``\377``: Octal escape sequences representing characters + * ``\``: Backslash + + When using single-quoted strings, the single quote character (``'``) needs to be escaped with a backslash (``\'``). + When using double-quoted strings, the double quote character (``"``) needs to be escaped with a backslash (``\"``). + + For example, a single quoted string can contain a delimiter if it is preceded by a + backslash (``\``) -- like in ``'It\'s good'``. If the string contains a + backslash (e.g. ``'c:\Program Files'``) escape it by doubling it + (e.g. ``'c:\\Program Files'``). * ``42`` / ``42.23``: Integers and floating point numbers are created by writing the number down. If a dot is present the number is a float, diff --git a/src/Lexer.php b/src/Lexer.php index 8973fbbc8da..526eb750b61 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -367,7 +367,7 @@ private function lexExpression(): void } // strings elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) { - $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes(substr($match[0], 1, -1))); + $this->pushToken(/* Token::STRING_TYPE */ 7, $this->stripcslashes(substr($match[0], 1, -1), substr($match[0], 0, 1))); $this->moveCursor($match[0]); } // opening double quoted string @@ -382,6 +382,81 @@ private function lexExpression(): void } } + private function stripcslashes(string $str, string $quoteType): string + { + if (!str_contains($str, '\\')) { + return $str; + } + + $result = ''; + $length = strlen($str); + $specialChars = [ + 'f' => "\f", + 'n' => "\n", + 'r' => "\r", + 't' => "\t", + 'v' => "\v", + ]; + + for ($i = 0; $i < $length; $i++) { + if ($str[$i] !== '\\' || $i + 1 >= $length) { + $result .= $str[$i]; + + continue; + } + + $nextChar = $str[$i + 1]; + if (isset($specialChars[$nextChar])) { + $result .= $specialChars[$nextChar]; + $i++; + } elseif ($nextChar === '\\') { + $result .= '\\'; + $i++; + } elseif ($nextChar === "'" || $nextChar === '"') { + if ($nextChar !== $quoteType) { + trigger_deprecation('twig/twig', '3.12', 'Character "%s" at position %d does not need to be escaped anymore.', $nextChar, $i + 2); + } + $result .= $nextChar; + $i++; + } elseif ($nextChar === '#' && $i + 2 < $length && $str[$i + 2] === '{') { + $result .= '#{'; + $i += 2; + } elseif ($nextChar === 'x' && $i + 2 < $length && ctype_xdigit($str[$i + 2])) { + $hex = $str[$i + 2]; + if ($i + 3 < $length && ctype_xdigit($str[$i + 3])) { + $hex .= $str[$i + 3]; + $i++; + } + $result .= chr(hexdec($hex)); + $i += 2; + } elseif (ctype_digit($nextChar) && $nextChar < '8') { + $octal = $nextChar; + for ($j = $i + 1; $j <= 3; $j++) { + $o = $j + 1; + if ($o >= $length) { + break; + } + if (!ctype_digit($str[$o])) { + break; + } + if ($str[$o] >= '8') { + break; + } + $octal .= $str[$o]; + $i++; + } + $result .= chr(octdec($octal)); + $i++; + } else { + trigger_deprecation('twig/twig', '3.12', 'Character "%s" at position %d does not need to be escaped anymore.', $nextChar, $i + 2); + $result .= $nextChar; + $i++; + } + } + + return $result; + } + private function lexRawData(): void { if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) { @@ -423,7 +498,7 @@ private function lexString(): void $this->moveCursor($match[0]); $this->pushState(self::STATE_INTERPOLATION); } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && '' !== $match[0]) { - $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes($match[0])); + $this->pushToken(/* Token::STRING_TYPE */ 7, $this->stripcslashes($match[0], '"')); $this->moveCursor($match[0]); } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) { [$expect, $lineno] = array_pop($this->brackets); diff --git a/tests/LexerTest.php b/tests/LexerTest.php index 38ee2361569..b4eb1b7c56a 100644 --- a/tests/LexerTest.php +++ b/tests/LexerTest.php @@ -12,6 +12,7 @@ */ use PHPUnit\Framework\TestCase; +use Symfony\Bridge\PhpUnit\ExpectDeprecationTrait; use Twig\Environment; use Twig\Error\SyntaxError; use Twig\Lexer; @@ -21,6 +22,8 @@ class LexerTest extends TestCase { + use ExpectDeprecationTrait; + public function testNameLabelForTag() { $template = '{% ยง %}'; @@ -178,23 +181,89 @@ public function testBigNumbers() $this->assertEquals('922337203685477580700', $node->getValue()); } - public function testStringWithEscapedDelimiter() + /** + * @dataProvider getStringWithEscapedDelimiter + */ + public function testStringWithEscapedDelimiter(string $template, string $expected) + { + $lexer = new Lexer(new Environment(new ArrayLoader())); + $stream = $lexer->tokenize(new Source($template, 'index')); + $stream->expect(Token::VAR_START_TYPE); + $token = $stream->expect(Token::STRING_TYPE); + $this->assertSame($expected, $token->getValue()); + } + + public function getStringWithEscapedDelimiter() { - $tests = [ - "{{ 'foo \' bar' }}" => 'foo \' bar', - '{{ "foo \" bar" }}' => 'foo " bar', + yield '{{ \'\x6\' }} => \x6' => [ + '{{ \'\x6\' }}', + "\x6", ]; + yield '{{ \'\065\x64\' }} => \065\x64' => [ + '{{ \'\065\x64\' }}', + "\065\x64", + ]; + yield '{{ \'App\\\\Test\' }} => App\Test' => [ + '{{ \'App\\\\Test\' }}', + 'App\\Test', + ]; + yield '{{ "App\#{var}" }} => App#{var}' => [ + '{{ "App\#{var}" }}', + 'App#{var}', + ]; + yield '{{ \'foo \\\' bar\' }} => foo \' bar' => [ + '{{ \'foo \\\' bar\' }}', + 'foo \' bar', + ]; + yield '{{ "foo \" bar" }} => foo " bar' => [ + '{{ "foo \\" bar" }}', + 'foo " bar', + ]; + yield '{{ \'\f\n\r\t\v\' }} => \f\n\r\t\v' => [ + '{{ \'\\f\\n\\r\\t\\v\' }}', + "\f\n\r\t\v", + ]; + yield '{{ \'\\\\f\\\\n\\\\r\\\\t\\\\v\' }} => \\f\\n\\r\\t\\v' => [ + '{{ \'\\\\f\\\\n\\\\r\\\\t\\\\v\' }}', + '\\f\\n\\r\\t\\v', + ]; + } + + /** + * @group legacy + * @dataProvider getStringWithEscapedDelimiterProducingDeprecation + */ + public function testStringWithEscapedDelimiterProducingDeprecation(string $template, string $expected, string $expectedDeprecation) + { + $this->expectDeprecation($expectedDeprecation); $lexer = new Lexer(new Environment(new ArrayLoader())); - foreach ($tests as $template => $expected) { - $stream = $lexer->tokenize(new Source($template, 'index')); - $stream->expect(Token::VAR_START_TYPE); - $stream->expect(Token::STRING_TYPE, $expected); + $stream = $lexer->tokenize(new Source($template, 'index')); + $stream->expect(Token::VAR_START_TYPE); + $stream->expect(Token::STRING_TYPE, $expected); - // add a dummy assertion here to satisfy PHPUnit, the only thing we want to test is that the code above - // can be executed without throwing any exceptions - $this->addToAssertionCount(1); - } + // add a dummy assertion here to satisfy PHPUnit, the only thing we want to test is that the code above + // can be executed without throwing any exceptions + $this->addToAssertionCount(1); + } + + public function getStringWithEscapedDelimiterProducingDeprecation() + { + yield '{{ \'App\Test\' }} => AppTest' => [ + '{{ \'App\\Test\' }}', + 'AppTest', + "Since twig/twig 3.11: Character \"T\" at position 5 does not need to be escaped anymore.", + ]; + yield '{{ "foo \\\' bar" }} => foo \' bar' => [ + '{{ "foo \\\' bar" }}', + 'foo \' bar', + "Since twig/twig 3.11: Character \"'\" at position 6 does not need to be escaped anymore.", + ]; + yield '{{ \'foo \" bar\' }} => foo " bar' => [ + '{{ \'foo \\" bar\' }}', + 'foo " bar', + 'Since twig/twig 3.11: Character """ at position 6 does not need to be escaped anymore.', + ]; } public function testStringWithInterpolation()