From c062f967379d2ffc68a5faa9888973cb1182eae1 Mon Sep 17 00:00:00 2001 From: Ruud Kamphuis Date: Wed, 7 Aug 2024 19:19:01 +0200 Subject: [PATCH] Deprecate unnecessary escape characters See #4123 #2712 This allows to change the implementation in v4 so that we no longer have to escape backslashes. --- doc/templates.rst | 10 ++++++ src/Lexer.php | 56 +++++++++++++++++++++++++++++++-- tests/LexerTest.php | 76 +++++++++++++++++++++++++++++++++++++-------- 3 files changed, 127 insertions(+), 15 deletions(-) diff --git a/doc/templates.rst b/doc/templates.rst index e09a6a0185c..1c8ad43a6ca 100644 --- a/doc/templates.rst +++ b/doc/templates.rst @@ -527,6 +527,16 @@ exist: backslash (e.g. ``'c:\Program Files'``) escape it by doubling it (e.g. ``'c:\\Program Files'``). + Note that certain characters require escaping: + * ``\f``: Form feed + * ``\n``: New line + * ``\r``: Carriage return + * ``\t``: Horizontal tab + * ``\v``: Vertical tab + * ``\``: Backslash + + When using single-quoted strings, the single quote character (``'``) needs to be escaped with a backslash (``\'``). + When using double-quoted strings, the double quote character (``"``) needs to be escaped with a backslash (``\"``). * ``42`` / ``42.23``: Integers and floating point numbers are created by writing the number down. If a dot is present the number is a float, otherwise an integer. diff --git a/src/Lexer.php b/src/Lexer.php index 8973fbbc8da..ee0854070f2 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -367,7 +367,7 @@ private function lexExpression(): void } // strings elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) { - $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes(substr($match[0], 1, -1))); + $this->pushToken(/* Token::STRING_TYPE */ 7, $this->stripcslashes(substr($match[0], 1, -1), substr($match[0], 0, 1))); $this->moveCursor($match[0]); } // opening double quoted string @@ -382,6 +382,58 @@ private function lexExpression(): void } } + private function stripcslashes(string $str, string $quoteType): string { + $result = ''; + $length = strlen($str); + $specialChars = [ + 'f' => "\f", + 'n' => "\n", + 'r' => "\r", + 't' => "\t", + 'v' => "\v", + ]; + + for ($i = 0; $i < $length; $i++) { + if ($str[$i] === '\\' && $i + 1 < $length) { + $nextChar = $str[$i + 1]; + if (isset($specialChars[$nextChar])) { + $result .= $specialChars[$nextChar]; + $i++; + } elseif ($nextChar === '\\') { + $result .= '\\'; + $i++; + } elseif ($nextChar === "'" || $nextChar === '"') { + if ($nextChar !== $quoteType) { + trigger_deprecation('twig/twig', '3.11', 'Character "%s" at position %d does not need to be escaped anymore.', $nextChar, $i + 2); + } + $result .= $nextChar; + $i++; + } elseif ($nextChar === 'x' && $i + 2 < $length && ctype_xdigit($str[$i + 1]) && ctype_xdigit($str[$i + 2])) { + $result .= chr(hexdec($str[$i + 1] . $str[$i + 2])); + $i += 2; + } elseif (ctype_digit($nextChar) && $nextChar < '8') { + $octal = $nextChar; + for ($j = 1; $j <= 2; $j++) { + if ($i + $j + 1 < $length && ctype_digit($str[$i + $j + 1]) && $str[$i + $j + 1] < '8') { + $octal .= $str[$i + $j + 1]; + $i++; + } else { + break; + } + } + $result .= chr(octdec($octal)); + $i++; + } else { + $result .= $nextChar; + $i++; + } + } else { + $result .= $str[$i]; + } + } + return $result; + } + private function lexRawData(): void { if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) { @@ -423,7 +475,7 @@ private function lexString(): void $this->moveCursor($match[0]); $this->pushState(self::STATE_INTERPOLATION); } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && '' !== $match[0]) { - $this->pushToken(/* Token::STRING_TYPE */ 7, stripcslashes($match[0])); + $this->pushToken(/* Token::STRING_TYPE */ 7, $this->stripcslashes($match[0], '"')); $this->moveCursor($match[0]); } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) { [$expect, $lineno] = array_pop($this->brackets); diff --git a/tests/LexerTest.php b/tests/LexerTest.php index 7926034ffa1..3b5c26ce59d 100644 --- a/tests/LexerTest.php +++ b/tests/LexerTest.php @@ -12,6 +12,7 @@ */ use PHPUnit\Framework\TestCase; +use Symfony\Bridge\PhpUnit\ExpectDeprecationTrait; use Twig\Environment; use Twig\Error\SyntaxError; use Twig\Lexer; @@ -21,6 +22,8 @@ class LexerTest extends TestCase { + use ExpectDeprecationTrait; + public function testNameLabelForTag() { $template = '{% ยง %}'; @@ -178,23 +181,70 @@ public function testBigNumbers() $this->assertEquals('922337203685477580700', $node->getValue()); } - public function testStringWithEscapedDelimiter() + /** + * @group legacy + * @dataProvider getStringWithEscapedDelimiter + */ + public function testStringWithEscapedDelimiter(string $template, string $expected, ?string $expectedDeprecation = null) { - $tests = [ - "{{ 'foo \' bar' }}" => 'foo \' bar', - '{{ "foo \" bar" }}' => 'foo " bar', - ]; + if ($expectedDeprecation) { + $this->expectDeprecation($expectedDeprecation); + } $lexer = new Lexer(new Environment(new ArrayLoader())); - foreach ($tests as $template => $expected) { - $stream = $lexer->tokenize(new Source($template, 'index')); - $stream->expect(Token::VAR_START_TYPE); - $stream->expect(Token::STRING_TYPE, $expected); + $stream = $lexer->tokenize(new Source($template, 'index')); + $stream->expect(Token::VAR_START_TYPE); + $stream->expect(Token::STRING_TYPE, $expected); - // add a dummy assertion here to satisfy PHPUnit, the only thing we want to test is that the code above - // can be executed without throwing any exceptions - $this->addToAssertionCount(1); - } + // add a dummy assertion here to satisfy PHPUnit, the only thing we want to test is that the code above + // can be executed without throwing any exceptions + $this->addToAssertionCount(1); + } + + public function getStringWithEscapedDelimiter() + { + yield '{{ \'App\\\\Test\' }} => App\Test' => [ + '{{ \'App\\\\Test\' }}', + 'App\\Test', + ]; + yield '{{ \'App\Test\' }} => AppTest' => [ + '{{ \'App\\Test\' }}', + 'AppTest', + ]; + yield '{{ \'A\B\C\' }} => ABC' => [ + '{{ \'A\B\C\' }}', + 'ABC', + ]; + yield '{{ "App\#{var}" }} => App#{var}' => [ + '{{ "App\#{var}" }}', + 'App#{var}', + ]; + yield '{{ \'foo \\\' bar\' }} => foo \' bar' => [ + '{{ \'foo \\\' bar\' }}', + 'foo \' bar', + ]; + yield '{{ "foo \\\' bar" }} => foo \' bar' => [ + '{{ "foo \\\' bar" }}', + 'foo \' bar', + "Since twig/twig 3.11: Character \"'\" at position 6 does not need to be escaped anymore.", + ]; + yield '{{ "foo \" bar" }} => foo " bar' => [ + '{{ "foo \\" bar" }}', + 'foo " bar', + ]; + yield '{{ \'foo \" bar\' }} => foo " bar' => [ + '{{ \'foo \\" bar\' }}', + 'foo " bar', + 'Since twig/twig 3.11: Character """ at position 6 does not need to be escaped anymore.', + ]; + yield '{{ \'\f\n\r\t\v\' }} => \f\n\r\t\v' => [ + '{{ \'\\f\\n\\r\\t\\v\' }}', + "\f\n\r\t\v", + ]; + yield '{{ \'\\\\f\\\\n\\\\r\\\\t\\\\v\' }} => \\f\\n\\r\\t\\v' => [ + '{{ \'\\\\f\\\\n\\\\r\\\\t\\\\v\' }}', + '\\f\\n\\r\\t\\v', + ]; } public function testStringWithInterpolation()