vendor/twig/twig/src/Lexer.php line 176

Open in your IDE?
  1. <?php
  2. /*
  3. * This file is part of Twig.
  4. *
  5. * (c) Fabien Potencier
  6. * (c) Armin Ronacher
  7. *
  8. * For the full copyright and license information, please view the LICENSE
  9. * file that was distributed with this source code.
  10. */
  11. namespace Twig;
  12. use Twig\Error\SyntaxError;
  13. /**
  14. * @author Fabien Potencier <fabien@symfony.com>
  15. */
  16. class Lexer
  17. {
  18. private $isInitialized = false;
  19. private $tokens;
  20. private $code;
  21. private $cursor;
  22. private $lineno;
  23. private $end;
  24. private $state;
  25. private $states;
  26. private $brackets;
  27. private $env;
  28. private $source;
  29. private $options;
  30. private $regexes;
  31. private $position;
  32. private $positions;
  33. private $currentVarBlockLine;
  34. public const STATE_DATA = 0;
  35. public const STATE_BLOCK = 1;
  36. public const STATE_VAR = 2;
  37. public const STATE_STRING = 3;
  38. public const STATE_INTERPOLATION = 4;
  39. public const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
  40. public const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?([Ee][\+\-][0-9]+)?/A';
  41. public const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
  42. public const REGEX_DQ_STRING_DELIM = '/"/A';
  43. public const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As';
  44. public const REGEX_INLINE_COMMENT = '/#[^\n]*/A';
  45. public const PUNCTUATION = '()[]{}?:.,|';
  46. private const SPECIAL_CHARS = [
  47. 'f' => "\f",
  48. 'n' => "\n",
  49. 'r' => "\r",
  50. 't' => "\t",
  51. 'v' => "\v",
  52. ];
  53. public function __construct(Environment $env, array $options = [])
  54. {
  55. $this->env = $env;
  56. $this->options = array_merge([
  57. 'tag_comment' => ['{#', '#}'],
  58. 'tag_block' => ['{%', '%}'],
  59. 'tag_variable' => ['{{', '}}'],
  60. 'whitespace_trim' => '-',
  61. 'whitespace_line_trim' => '~',
  62. 'whitespace_line_chars' => ' \t\0\x0B',
  63. 'interpolation' => ['#{', '}'],
  64. ], $options);
  65. }
  66. private function initialize()
  67. {
  68. if ($this->isInitialized) {
  69. return;
  70. }
  71. // when PHP 7.3 is the min version, we will be able to remove the '#' part in preg_quote as it's part of the default
  72. $this->regexes = [
  73. // }}
  74. 'lex_var' => '{
  75. \s*
  76. (?:'.
  77. preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '#').'\s*'. // -}}\s*
  78. '|'.
  79. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_variable'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~}}[ \t\0\x0B]*
  80. '|'.
  81. preg_quote($this->options['tag_variable'][1], '#'). // }}
  82. ')
  83. }Ax',
  84. // %}
  85. 'lex_block' => '{
  86. \s*
  87. (?:'.
  88. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*\n?'. // -%}\s*\n?
  89. '|'.
  90. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  91. '|'.
  92. preg_quote($this->options['tag_block'][1], '#').'\n?'. // %}\n?
  93. ')
  94. }Ax',
  95. // {% endverbatim %}
  96. 'lex_raw_data' => '{'.
  97. preg_quote($this->options['tag_block'][0], '#'). // {%
  98. '('.
  99. $this->options['whitespace_trim']. // -
  100. '|'.
  101. $this->options['whitespace_line_trim']. // ~
  102. ')?\s*endverbatim\s*'.
  103. '(?:'.
  104. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}
  105. '|'.
  106. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  107. '|'.
  108. preg_quote($this->options['tag_block'][1], '#'). // %}
  109. ')
  110. }sx',
  111. 'operator' => $this->getOperatorRegex(),
  112. // #}
  113. 'lex_comment' => '{
  114. (?:'.
  115. preg_quote($this->options['whitespace_trim'].$this->options['tag_comment'][1], '#').'\s*\n?'. // -#}\s*\n?
  116. '|'.
  117. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_comment'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~#}[ \t\0\x0B]*
  118. '|'.
  119. preg_quote($this->options['tag_comment'][1], '#').'\n?'. // #}\n?
  120. ')
  121. }sx',
  122. // verbatim %}
  123. 'lex_block_raw' => '{
  124. \s*verbatim\s*
  125. (?:'.
  126. preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '#').'\s*'. // -%}\s*
  127. '|'.
  128. preg_quote($this->options['whitespace_line_trim'].$this->options['tag_block'][1], '#').'['.$this->options['whitespace_line_chars'].']*'. // ~%}[ \t\0\x0B]*
  129. '|'.
  130. preg_quote($this->options['tag_block'][1], '#'). // %}
  131. ')
  132. }Asx',
  133. 'lex_block_line' => '{\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '#').'}As',
  134. // {{ or {% or {#
  135. 'lex_tokens_start' => '{
  136. ('.
  137. preg_quote($this->options['tag_variable'][0], '#'). // {{
  138. '|'.
  139. preg_quote($this->options['tag_block'][0], '#'). // {%
  140. '|'.
  141. preg_quote($this->options['tag_comment'][0], '#'). // {#
  142. ')('.
  143. preg_quote($this->options['whitespace_trim'], '#'). // -
  144. '|'.
  145. preg_quote($this->options['whitespace_line_trim'], '#'). // ~
  146. ')?
  147. }sx',
  148. 'interpolation_start' => '{'.preg_quote($this->options['interpolation'][0], '#').'\s*}A',
  149. 'interpolation_end' => '{\s*'.preg_quote($this->options['interpolation'][1], '#').'}A',
  150. ];
  151. $this->isInitialized = true;
  152. }
  153. public function tokenize(Source $source): TokenStream
  154. {
  155. $this->initialize();
  156. $this->source = $source;
  157. $this->code = str_replace(["\r\n", "\r"], "\n", $source->getCode());
  158. $this->cursor = 0;
  159. $this->lineno = 1;
  160. $this->end = \strlen($this->code);
  161. $this->tokens = [];
  162. $this->state = self::STATE_DATA;
  163. $this->states = [];
  164. $this->brackets = [];
  165. $this->position = -1;
  166. // find all token starts in one go
  167. preg_match_all($this->regexes['lex_tokens_start'], $this->code, $matches, \PREG_OFFSET_CAPTURE);
  168. $this->positions = $matches;
  169. while ($this->cursor < $this->end) {
  170. // dispatch to the lexing functions depending
  171. // on the current state
  172. switch ($this->state) {
  173. case self::STATE_DATA:
  174. $this->lexData();
  175. break;
  176. case self::STATE_BLOCK:
  177. $this->lexBlock();
  178. break;
  179. case self::STATE_VAR:
  180. $this->lexVar();
  181. break;
  182. case self::STATE_STRING:
  183. $this->lexString();
  184. break;
  185. case self::STATE_INTERPOLATION:
  186. $this->lexInterpolation();
  187. break;
  188. }
  189. }
  190. $this->pushToken(Token::EOF_TYPE);
  191. if ($this->brackets) {
  192. [$expect, $lineno] = array_pop($this->brackets);
  193. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  194. }
  195. return new TokenStream($this->tokens, $this->source);
  196. }
  197. private function lexData(): void
  198. {
  199. // if no matches are left we return the rest of the template as simple text token
  200. if ($this->position == \count($this->positions[0]) - 1) {
  201. $this->pushToken(Token::TEXT_TYPE, substr($this->code, $this->cursor));
  202. $this->cursor = $this->end;
  203. return;
  204. }
  205. // Find the first token after the current cursor
  206. $position = $this->positions[0][++$this->position];
  207. while ($position[1] < $this->cursor) {
  208. if ($this->position == \count($this->positions[0]) - 1) {
  209. return;
  210. }
  211. $position = $this->positions[0][++$this->position];
  212. }
  213. // push the template text first
  214. $text = $textContent = substr($this->code, $this->cursor, $position[1] - $this->cursor);
  215. // trim?
  216. if (isset($this->positions[2][$this->position][0])) {
  217. if ($this->options['whitespace_trim'] === $this->positions[2][$this->position][0]) {
  218. // whitespace_trim detected ({%-, {{- or {#-)
  219. $text = rtrim($text);
  220. } elseif ($this->options['whitespace_line_trim'] === $this->positions[2][$this->position][0]) {
  221. // whitespace_line_trim detected ({%~, {{~ or {#~)
  222. // don't trim \r and \n
  223. $text = rtrim($text, " \t\0\x0B");
  224. }
  225. }
  226. $this->pushToken(Token::TEXT_TYPE, $text);
  227. $this->moveCursor($textContent.$position[0]);
  228. switch ($this->positions[1][$this->position][0]) {
  229. case $this->options['tag_comment'][0]:
  230. $this->lexComment();
  231. break;
  232. case $this->options['tag_block'][0]:
  233. // raw data?
  234. if (preg_match($this->regexes['lex_block_raw'], $this->code, $match, 0, $this->cursor)) {
  235. $this->moveCursor($match[0]);
  236. $this->lexRawData();
  237. // {% line \d+ %}
  238. } elseif (preg_match($this->regexes['lex_block_line'], $this->code, $match, 0, $this->cursor)) {
  239. $this->moveCursor($match[0]);
  240. $this->lineno = (int) $match[1];
  241. } else {
  242. $this->pushToken(Token::BLOCK_START_TYPE);
  243. $this->pushState(self::STATE_BLOCK);
  244. $this->currentVarBlockLine = $this->lineno;
  245. }
  246. break;
  247. case $this->options['tag_variable'][0]:
  248. $this->pushToken(Token::VAR_START_TYPE);
  249. $this->pushState(self::STATE_VAR);
  250. $this->currentVarBlockLine = $this->lineno;
  251. break;
  252. }
  253. }
  254. private function lexBlock(): void
  255. {
  256. if (!$this->brackets && preg_match($this->regexes['lex_block'], $this->code, $match, 0, $this->cursor)) {
  257. $this->pushToken(Token::BLOCK_END_TYPE);
  258. $this->moveCursor($match[0]);
  259. $this->popState();
  260. } else {
  261. $this->lexExpression();
  262. }
  263. }
  264. private function lexVar(): void
  265. {
  266. if (!$this->brackets && preg_match($this->regexes['lex_var'], $this->code, $match, 0, $this->cursor)) {
  267. $this->pushToken(Token::VAR_END_TYPE);
  268. $this->moveCursor($match[0]);
  269. $this->popState();
  270. } else {
  271. $this->lexExpression();
  272. }
  273. }
  274. private function lexExpression(): void
  275. {
  276. // whitespace
  277. if (preg_match('/\s+/A', $this->code, $match, 0, $this->cursor)) {
  278. $this->moveCursor($match[0]);
  279. if ($this->cursor >= $this->end) {
  280. throw new SyntaxError(\sprintf('Unclosed "%s".', self::STATE_BLOCK === $this->state ? 'block' : 'variable'), $this->currentVarBlockLine, $this->source);
  281. }
  282. }
  283. // spread operator
  284. if ('.' === $this->code[$this->cursor] && ($this->cursor + 2 < $this->end) && '.' === $this->code[$this->cursor + 1] && '.' === $this->code[$this->cursor + 2]) {
  285. $this->pushToken(Token::SPREAD_TYPE, '...');
  286. $this->moveCursor('...');
  287. }
  288. // arrow function
  289. elseif ('=' === $this->code[$this->cursor] && ($this->cursor + 1 < $this->end) && '>' === $this->code[$this->cursor + 1]) {
  290. $this->pushToken(Token::ARROW_TYPE, '=>');
  291. $this->moveCursor('=>');
  292. }
  293. // operators
  294. elseif (preg_match($this->regexes['operator'], $this->code, $match, 0, $this->cursor)) {
  295. $this->pushToken(Token::OPERATOR_TYPE, preg_replace('/\s+/', ' ', $match[0]));
  296. $this->moveCursor($match[0]);
  297. }
  298. // names
  299. elseif (preg_match(self::REGEX_NAME, $this->code, $match, 0, $this->cursor)) {
  300. $this->pushToken(Token::NAME_TYPE, $match[0]);
  301. $this->moveCursor($match[0]);
  302. }
  303. // numbers
  304. elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) {
  305. $number = (float) $match[0]; // floats
  306. if (ctype_digit($match[0]) && $number <= \PHP_INT_MAX) {
  307. $number = (int) $match[0]; // integers lower than the maximum
  308. }
  309. $this->pushToken(Token::NUMBER_TYPE, $number);
  310. $this->moveCursor($match[0]);
  311. }
  312. // punctuation
  313. elseif (str_contains(self::PUNCTUATION, $this->code[$this->cursor])) {
  314. // opening bracket
  315. if (str_contains('([{', $this->code[$this->cursor])) {
  316. $this->brackets[] = [$this->code[$this->cursor], $this->lineno];
  317. }
  318. // closing bracket
  319. elseif (str_contains(')]}', $this->code[$this->cursor])) {
  320. if (!$this->brackets) {
  321. throw new SyntaxError(\sprintf('Unexpected "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  322. }
  323. [$expect, $lineno] = array_pop($this->brackets);
  324. if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
  325. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  326. }
  327. }
  328. $this->pushToken(Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
  329. ++$this->cursor;
  330. }
  331. // strings
  332. elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
  333. $this->pushToken(Token::STRING_TYPE, $this->stripcslashes(substr($match[0], 1, -1), substr($match[0], 0, 1)));
  334. $this->moveCursor($match[0]);
  335. }
  336. // opening double quoted string
  337. elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  338. $this->brackets[] = ['"', $this->lineno];
  339. $this->pushState(self::STATE_STRING);
  340. $this->moveCursor($match[0]);
  341. }
  342. // inline comment
  343. elseif (preg_match(self::REGEX_INLINE_COMMENT, $this->code, $match, 0, $this->cursor)) {
  344. $this->moveCursor($match[0]);
  345. }
  346. // unlexable
  347. else {
  348. throw new SyntaxError(\sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  349. }
  350. }
  351. private function stripcslashes(string $str, string $quoteType): string
  352. {
  353. $result = '';
  354. $length = \strlen($str);
  355. $i = 0;
  356. while ($i < $length) {
  357. if (false === $pos = strpos($str, '\\', $i)) {
  358. $result .= substr($str, $i);
  359. break;
  360. }
  361. $result .= substr($str, $i, $pos - $i);
  362. $i = $pos + 1;
  363. if ($i >= $length) {
  364. $result .= '\\';
  365. break;
  366. }
  367. $nextChar = $str[$i];
  368. if (isset(self::SPECIAL_CHARS[$nextChar])) {
  369. $result .= self::SPECIAL_CHARS[$nextChar];
  370. } elseif ('\\' === $nextChar) {
  371. $result .= $nextChar;
  372. } elseif ("'" === $nextChar || '"' === $nextChar) {
  373. if ($nextChar !== $quoteType) {
  374. trigger_deprecation('twig/twig', '3.12', 'Character "%s" should not be escaped; the "\" character is ignored in Twig 3 but will not be in Twig 4. Please remove the extra "\" character at position %d in "%s" at line %d.', $nextChar, $i + 1, $this->source->getName(), $this->lineno);
  375. }
  376. $result .= $nextChar;
  377. } elseif ('#' === $nextChar && $i + 1 < $length && '{' === $str[$i + 1]) {
  378. $result .= '#{';
  379. ++$i;
  380. } elseif ('x' === $nextChar && $i + 1 < $length && ctype_xdigit($str[$i + 1])) {
  381. $hex = $str[++$i];
  382. if ($i + 1 < $length && ctype_xdigit($str[$i + 1])) {
  383. $hex .= $str[++$i];
  384. }
  385. $result .= \chr(hexdec($hex));
  386. } elseif (ctype_digit($nextChar) && $nextChar < '8') {
  387. $octal = $nextChar;
  388. while ($i + 1 < $length && ctype_digit($str[$i + 1]) && $str[$i + 1] < '8' && \strlen($octal) < 3) {
  389. $octal .= $str[++$i];
  390. }
  391. $result .= \chr(octdec($octal));
  392. } else {
  393. trigger_deprecation('twig/twig', '3.12', 'Character "%s" should not be escaped; the "\" character is ignored in Twig 3 but will not be in Twig 4. Please remove the extra "\" character at position %d in "%s" at line %d.', $nextChar, $i + 1, $this->source->getName(), $this->lineno);
  394. $result .= $nextChar;
  395. }
  396. ++$i;
  397. }
  398. return $result;
  399. }
  400. private function lexRawData(): void
  401. {
  402. if (!preg_match($this->regexes['lex_raw_data'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) {
  403. throw new SyntaxError('Unexpected end of file: Unclosed "verbatim" block.', $this->lineno, $this->source);
  404. }
  405. $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
  406. $this->moveCursor($text.$match[0][0]);
  407. // trim?
  408. if (isset($match[1][0])) {
  409. if ($this->options['whitespace_trim'] === $match[1][0]) {
  410. // whitespace_trim detected ({%-, {{- or {#-)
  411. $text = rtrim($text);
  412. } else {
  413. // whitespace_line_trim detected ({%~, {{~ or {#~)
  414. // don't trim \r and \n
  415. $text = rtrim($text, " \t\0\x0B");
  416. }
  417. }
  418. $this->pushToken(Token::TEXT_TYPE, $text);
  419. }
  420. private function lexComment(): void
  421. {
  422. if (!preg_match($this->regexes['lex_comment'], $this->code, $match, \PREG_OFFSET_CAPTURE, $this->cursor)) {
  423. throw new SyntaxError('Unclosed comment.', $this->lineno, $this->source);
  424. }
  425. $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
  426. }
  427. private function lexString(): void
  428. {
  429. if (preg_match($this->regexes['interpolation_start'], $this->code, $match, 0, $this->cursor)) {
  430. $this->brackets[] = [$this->options['interpolation'][0], $this->lineno];
  431. $this->pushToken(Token::INTERPOLATION_START_TYPE);
  432. $this->moveCursor($match[0]);
  433. $this->pushState(self::STATE_INTERPOLATION);
  434. } elseif (preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor) && '' !== $match[0]) {
  435. $this->pushToken(Token::STRING_TYPE, $this->stripcslashes($match[0], '"'));
  436. $this->moveCursor($match[0]);
  437. } elseif (preg_match(self::REGEX_DQ_STRING_DELIM, $this->code, $match, 0, $this->cursor)) {
  438. [$expect, $lineno] = array_pop($this->brackets);
  439. if ('"' != $this->code[$this->cursor]) {
  440. throw new SyntaxError(\sprintf('Unclosed "%s".', $expect), $lineno, $this->source);
  441. }
  442. $this->popState();
  443. ++$this->cursor;
  444. } else {
  445. // unlexable
  446. throw new SyntaxError(\sprintf('Unexpected character "%s".', $this->code[$this->cursor]), $this->lineno, $this->source);
  447. }
  448. }
  449. private function lexInterpolation(): void
  450. {
  451. $bracket = end($this->brackets);
  452. if ($this->options['interpolation'][0] === $bracket[0] && preg_match($this->regexes['interpolation_end'], $this->code, $match, 0, $this->cursor)) {
  453. array_pop($this->brackets);
  454. $this->pushToken(Token::INTERPOLATION_END_TYPE);
  455. $this->moveCursor($match[0]);
  456. $this->popState();
  457. } else {
  458. $this->lexExpression();
  459. }
  460. }
  461. private function pushToken($type, $value = ''): void
  462. {
  463. // do not push empty text tokens
  464. if (Token::TEXT_TYPE === $type && '' === $value) {
  465. return;
  466. }
  467. $this->tokens[] = new Token($type, $value, $this->lineno);
  468. }
  469. private function moveCursor($text): void
  470. {
  471. $this->cursor += \strlen($text);
  472. $this->lineno += substr_count($text, "\n");
  473. }
  474. private function getOperatorRegex(): string
  475. {
  476. $operators = array_merge(
  477. ['='],
  478. array_keys($this->env->getUnaryOperators()),
  479. array_keys($this->env->getBinaryOperators())
  480. );
  481. $operators = array_combine($operators, array_map('strlen', $operators));
  482. arsort($operators);
  483. $regex = [];
  484. foreach ($operators as $operator => $length) {
  485. // an operator that ends with a character must be followed by
  486. // a whitespace, a parenthesis, an opening map [ or sequence {
  487. $r = preg_quote($operator, '/');
  488. if (ctype_alpha($operator[$length - 1])) {
  489. $r .= '(?=[\s()\[{])';
  490. }
  491. // an operator that begins with a character must not have a dot or pipe before
  492. if (ctype_alpha($operator[0])) {
  493. $r = '(?<![\.\|])'.$r;
  494. }
  495. // an operator with a space can be any amount of whitespaces
  496. $r = preg_replace('/\s+/', '\s+', $r);
  497. $regex[] = $r;
  498. }
  499. return '/'.implode('|', $regex).'/A';
  500. }
  501. private function pushState($state): void
  502. {
  503. $this->states[] = $this->state;
  504. $this->state = $state;
  505. }
  506. private function popState(): void
  507. {
  508. if (0 === \count($this->states)) {
  509. throw new \LogicException('Cannot pop state without a previous state.');
  510. }
  511. $this->state = array_pop($this->states);
  512. }
  513. }