Skip to content

Commit

Permalink
Deprecate unnecessary escape characters
Browse files Browse the repository at this point in the history
  • Loading branch information
ruudk committed Aug 7, 2024
1 parent c7d57d1 commit c499b06
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 16 deletions.
76 changes: 74 additions & 2 deletions src/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ class Lexer
private int $position;
private array $positions;
private int $currentVarBlockLine;
/**
* @var array<string>
*/
private array $deprecations = [];

public const STATE_DATA = 0;
public const STATE_BLOCK = 1;
Expand Down Expand Up @@ -82,6 +86,8 @@ public function __construct(Environment $env, array $options = [])

private function initialize()
{
$this->deprecations = [];

if ($this->isInitialized) {
return;
}
Expand Down Expand Up @@ -381,7 +387,7 @@ private function lexExpression(): void
}
// strings
elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
$this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes(substr($match[0], 1, -1)));
$this->pushToken(/* Token::STRING_TYPE */ 7, $this->stripcslashes(substr($match[0], 1, -1), substr($match[0], 0, 1)));
$this->moveCursor($match[0]);
}
// opening double quoted string
Expand All @@ -396,6 +402,64 @@ private function lexExpression(): void
}
}

private function stripcslashes(string $str, string $quoteType): string {
$result = '';
$length = strlen($str);
$specialChars = [
'f' => "\f",
'n' => "\n",
'r' => "\r",
't' => "\t",
'v' => "\v",
];

for ($i = 0; $i < $length; $i++) {
if ($str[$i] === '\\' && $i + 1 < $length) {
$nextChar = $str[$i + 1];
if (isset($specialChars[$nextChar])) {
$result .= $specialChars[$nextChar];
$i++;
} elseif ($nextChar === '#') {
$result .= "#";
$i++;
} elseif ($nextChar === '\\') {
if ($quoteType === "'" && (!($i + 2 < $length && isset($specialChars[$str[$i + 2]])))) {
$this->deprecations[] = sprintf("Character %s at position %d does not require to be escaped.", $nextChar, $i + 2);
}
$result .= '\\';
$i++;
} elseif ($nextChar === "'" || $nextChar === '"') {
if ($nextChar !== $quoteType) {
$this->deprecations[] = sprintf("Character %s at position %d does not require to be escaped.", $nextChar, $i + 2);
}
$result .= $nextChar;
$i++;
} elseif ($nextChar === 'x' && $i + 2 < $length && ctype_xdigit($str[$i + 1]) && ctype_xdigit($str[$i + 2])) {
$result .= chr(hexdec($str[$i + 1] . $str[$i + 2]));
$i += 2;
} elseif (ctype_digit($nextChar) && $nextChar < '8') {
$octal = $nextChar;
for ($j = 1; $j <= 2; $j++) {
if ($i + $j + 1 < $length && ctype_digit($str[$i + $j + 1]) && $str[$i + $j + 1] < '8') {
$octal .= $str[$i + $j + 1];
$i++;
} else {
break;
}
}
$result .= chr(octdec($octal));
$i++;
} else {
$result .= '\\' . $nextChar;
$i++;
}
} else {
$result .= $str[$i];
}
}
return $result;
}

private function lexRawData(): void
{
if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) {
Expand Down Expand Up @@ -437,7 +501,7 @@ private function lexString(): void
$this->moveCursor($match[0]);
$this->pushState(self::STATE_INTERPOLATION);
} elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && '' !== $match[0]) {
$this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes($match[0]));
$this->pushToken(/* Token::STRING_TYPE */ 7, $this->stripcslashes($match[0], '"'));
$this->moveCursor($match[0]);
} elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
[$expect, $lineno] = array_pop($this->brackets);
Expand Down Expand Up @@ -530,4 +594,12 @@ private function popState(): void

$this->state = array_pop($this->states);
}

/**
* @return string[]
*/
public function getDeprecations() : array
{
return $this->deprecations;
}
}
102 changes: 88 additions & 14 deletions tests/LexerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -178,23 +178,97 @@ public function testBigNumbers()
$this->assertEquals('922337203685477580700', $node->getValue());
}

public function testStringWithEscapedDelimiter()
/**
* @dataProvider getStringWithEscapedDelimiter
*/
public function testStringWithEscapedDelimiter(string $template, string $expected, array $expectedDeprecations = [])
{
$tests = [
"{{ 'foo \' bar' }}" => 'foo \' bar',
'{{ "foo \" bar" }}' => 'foo " bar',
];

$lexer = new Lexer(new Environment(new ArrayLoader()));
foreach ($tests as $template => $expected) {
$stream = $lexer->tokenize(new Source($template, 'index'));
$stream->expect(Token::VAR_START_TYPE);
$stream->expect(Token::STRING_TYPE, $expected);
$stream = $lexer->tokenize(new Source($template, 'index'));
$stream->expect(Token::VAR_START_TYPE);
$stream->expect(Token::STRING_TYPE, $expected);

// add a dummy assertion here to satisfy PHPUnit, the only thing we want to test is that the code above
// can be executed without throwing any exceptions
$this->addToAssertionCount(1);
}
$this->assertSame($expectedDeprecations, $lexer->getDeprecations());
}

public function getStringWithEscapedDelimiter()
{
yield '{{ \'App\\\\Test\' }} => App\Test' => [
<<<'TWIG'
{{ 'App\\Test' }}
TWIG,
<<<'EOF'
App\Test
EOF,
[
'Character \ at position 5 does not require to be escaped.'
]
];
yield '{{ \'App\Test\' }} => App\Test' => [
<<<'TWIG'
{{ 'App\Test' }}
TWIG,
<<<'EOF'
App\Test
EOF,
[]
];
yield '{{ \'foo \\\' bar\' }} => foo \' bar' => [
<<<'TWIG'
{{ 'foo \' bar' }}
TWIG,
<<<'EOF'
foo ' bar
EOF,
[],
];
yield '{{ "foo \\\' bar" }} => foo \' bar' => [
<<<'TWIG'
{{ "foo \' bar" }}
TWIG,
<<<'EOF'
foo ' bar
EOF,
[
"Character ' at position 6 does not require to be escaped.",
],
];
yield '{{ "foo \" bar" }} => foo " bar' => [
<<<'TWIG'
{{ "foo \" bar" }}
TWIG,
<<<'EOF'
foo " bar
EOF,
[],
];
yield '{{ \'foo \" bar\' }} => foo " bar' => [
<<<'TWIG'
{{ 'foo \" bar' }}
TWIG,
<<<'EOF'
foo " bar
EOF,
[
'Character " at position 6 does not require to be escaped.',
],
];
yield '{{ \'\f\n\r\t\v\' }} => \f\n\r\t\v' => [
<<<'TWIG'
{{ '\f\n\r\t\v' }}
TWIG,
"\f\n\r\t\v",
[],
];
yield '{{ \'\\\\f\\\\n\\\\r\\\\t\\\\v\' }} => \\f\\n\\r\\t\\v' => [
<<<'TWIG'
{{ '\\f\\n\\r\\t\\v' }}
TWIG,
<<<'EOF'
\f\n\r\t\v
EOF,
[],
];
}

public function testStringWithInterpolation()
Expand Down

0 comments on commit c499b06

Please sign in to comment.