From 62eb567bd82de266b5ad6c9735a437ea6c14cab4 Mon Sep 17 00:00:00 2001 From: Dan Wallis Date: Sat, 11 Oct 2025 16:31:12 +0100 Subject: [PATCH 1/4] Define polyfilled constants from Tokenizer as int The value of these constants are not stable, and therefore already cannot be relied upon. This is because the specific values that PHP assigns can change with different versions of PHP. PHPCS does not use the values of these constants (other than to look up their name using the Tokens::tokenName() method). There are other tools which also polyfill these constants. Some of those tools also perform validation on the value for these constants. In order to play nicely with the arbitrary validation that other tools perform on these constants, we are switching from string values to integer values. All PHPCS 'native' tokens currently have reliable values. In line with PHP T_* constants, the values of these tokens should never be relied upon. In a future version of PHPCS, the values for these tokens will switch from strings to integers. Existing tests already cover the use of these constants and do not require adjustment for the code being changed here. --- src/Util/Tokens.php | 158 +++++++++++------------ tests/Core/Util/Tokens/TokenNameTest.php | 1 + 2 files changed, 77 insertions(+), 82 deletions(-) diff --git a/src/Util/Tokens.php b/src/Util/Tokens.php index 62c918c31b..0ffff5a7a5 100644 --- a/src/Util/Tokens.php +++ b/src/Util/Tokens.php @@ -10,6 +10,7 @@ namespace PHP_CodeSniffer\Util; +// PHPCS native tokens. define('T_NONE', 'PHPCS_T_NONE'); define('T_OPEN_CURLY_BRACKET', 'PHPCS_T_OPEN_CURLY_BRACKET'); define('T_CLOSE_CURLY_BRACKET', 'PHPCS_T_CLOSE_CURLY_BRACKET'); @@ -70,84 +71,6 @@ define('T_TYPE_OPEN_PARENTHESIS', 'PHPCS_T_TYPE_OPEN_PARENTHESIS'); define('T_TYPE_CLOSE_PARENTHESIS', 'PHPCS_T_TYPE_CLOSE_PARENTHESIS'); -/* - * {@internal IMPORTANT: all PHP native polyfilled tokens MUST be added to the - * `PHP_CodeSniffer\Tests\Core\Util\Tokens\TokenNameTest::dataPolyfilledPHPNativeTokens()` test method!} - */ - -// Some PHP 7.4 tokens, replicated for lower versions. -if (defined('T_COALESCE_EQUAL') === false) { - define('T_COALESCE_EQUAL', 'PHPCS_T_COALESCE_EQUAL'); -} - -if (defined('T_BAD_CHARACTER') === false) { - define('T_BAD_CHARACTER', 'PHPCS_T_BAD_CHARACTER'); -} - -if (defined('T_FN') === false) { - define('T_FN', 'PHPCS_T_FN'); -} - -// Some PHP 8.0 tokens, replicated for lower versions. -if (defined('T_NULLSAFE_OBJECT_OPERATOR') === false) { - define('T_NULLSAFE_OBJECT_OPERATOR', 'PHPCS_T_NULLSAFE_OBJECT_OPERATOR'); -} - -if (defined('T_NAME_QUALIFIED') === false) { - define('T_NAME_QUALIFIED', 'PHPCS_T_NAME_QUALIFIED'); -} - -if (defined('T_NAME_FULLY_QUALIFIED') === false) { - define('T_NAME_FULLY_QUALIFIED', 'PHPCS_T_NAME_FULLY_QUALIFIED'); -} - -if (defined('T_NAME_RELATIVE') === false) { - define('T_NAME_RELATIVE', 'PHPCS_T_NAME_RELATIVE'); -} - -if (defined('T_MATCH') === false) { - define('T_MATCH', 'PHPCS_T_MATCH'); -} - -if (defined('T_ATTRIBUTE') === false) { - define('T_ATTRIBUTE', 'PHPCS_T_ATTRIBUTE'); -} - -// Some PHP 8.1 tokens, replicated for lower versions. -if (defined('T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG') === false) { - define('T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG', 'PHPCS_T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG'); -} - -if (defined('T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG') === false) { - define('T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG', 'PHPCS_T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG'); -} - -if (defined('T_READONLY') === false) { - define('T_READONLY', 'PHPCS_T_READONLY'); -} - -if (defined('T_ENUM') === false) { - define('T_ENUM', 'PHPCS_T_ENUM'); -} - -// Some PHP 8.4 tokens, replicated for lower versions. -if (defined('T_PUBLIC_SET') === false) { - define('T_PUBLIC_SET', 'PHPCS_T_PUBLIC_SET'); -} - -if (defined('T_PROTECTED_SET') === false) { - define('T_PROTECTED_SET', 'PHPCS_T_PROTECTED_SET'); -} - -if (defined('T_PRIVATE_SET') === false) { - define('T_PRIVATE_SET', 'PHPCS_T_PRIVATE_SET'); -} - -// Some PHP 8.5 tokens, replicated for lower versions. -if (defined('T_VOID_CAST') === false) { - define('T_VOID_CAST', 'PHPCS_T_VOID_CAST'); -} - // Tokens used for parsing doc blocks. define('T_DOC_COMMENT_STAR', 'PHPCS_T_DOC_COMMENT_STAR'); define('T_DOC_COMMENT_WHITESPACE', 'PHPCS_T_DOC_COMMENT_WHITESPACE'); @@ -163,6 +86,8 @@ define('T_PHPCS_IGNORE', 'PHPCS_T_PHPCS_IGNORE'); define('T_PHPCS_IGNORE_FILE', 'PHPCS_T_PHPCS_IGNORE_FILE'); +Tokens::polyfillTokenizerConstants(); + final class Tokens { @@ -612,6 +537,13 @@ final class Tokens T_YIELD_FROM => T_YIELD_FROM, ]; + /** + * Mapping table for polyfilled constants + * + * @var array + */ + private static $polyfillMappingTable = []; + /** * The token weightings. * @@ -943,12 +875,12 @@ final class Tokens */ public static function tokenName($token) { - if (is_string($token) === false) { - // PHP-supplied token name. - return token_name($token); + if (is_string($token) === true) { + // PHPCS native token. + return substr($token, 6); } - return substr($token, 6); + return (self::$polyfillMappingTable[$token] ?? token_name($token)); } @@ -991,4 +923,66 @@ public static function getHighestWeightedToken(array $tokens) return $highestType; } + + + /** + * Polyfill tokenizer (T_*) constants. + * + * {@internal IMPORTANT: all PHP native polyfilled tokens MUST be added to the + * `PHP_CodeSniffer\Tests\Core\Util\Tokens\TokenNameTest::dataPolyfilledPHPNativeTokens()` test method!} + * + * @return void + */ + public static function polyfillTokenizerConstants(): void + { + // Ideally this would be a private class constant. We cannot do that + // here as the constants that we are polyfilling in this method are + // used in some of the class constants for this class. If we reference + // any class constants or properties before this method has fully run, + // PHP will intitialise the class, leading to warnings about undefined + // T_* constants. + $tokensToPolyfill = [ + 'T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG', + 'T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG', + 'T_ATTRIBUTE', + 'T_BAD_CHARACTER', + 'T_COALESCE_EQUAL', + 'T_ENUM', + 'T_FN', + 'T_MATCH', + 'T_NAME_FULLY_QUALIFIED', + 'T_NAME_QUALIFIED', + 'T_NAME_RELATIVE', + 'T_NULLSAFE_OBJECT_OPERATOR', + 'T_PRIVATE_SET', + 'T_PROTECTED_SET', + 'T_PUBLIC_SET', + 'T_READONLY', + 'T_VOID_CAST', + ]; + + // + // The PHP manual suggests "using big numbers like 10000" for + // polyfilled T_* constants. We have arbitrarily chosen to start our + // numbering scheme from 135_000. + $nextTokenNumber = 135000; + + $polyfillMappingTable = []; + + foreach ($tokensToPolyfill as $tokenName) { + if (defined($tokenName) === false) { + while (isset($polyfillMappingTable[$nextTokenNumber]) === true) { + $nextTokenNumber++; + } + + define($tokenName, $nextTokenNumber); + } + + $polyfillMappingTable[constant($tokenName)] = $tokenName; + } + + // Be careful to not reference this class anywhere in this method until + // *after* all constants have been polyfilled. + self::$polyfillMappingTable = $polyfillMappingTable; + } } diff --git a/tests/Core/Util/Tokens/TokenNameTest.php b/tests/Core/Util/Tokens/TokenNameTest.php index 43198266d1..5c5689f546 100644 --- a/tests/Core/Util/Tokens/TokenNameTest.php +++ b/tests/Core/Util/Tokens/TokenNameTest.php @@ -16,6 +16,7 @@ * Tests for the \PHP_CodeSniffer\Util\Tokens::tokenName() method. * * @covers \PHP_CodeSniffer\Util\Tokens::tokenName + * @covers \PHP_CodeSniffer\Util\Tokens::polyfillTokenizerConstants */ final class TokenNameTest extends TestCase { From af09543a431896f711c0cb53d2525d517af2f510 Mon Sep 17 00:00:00 2001 From: Dan Wallis Date: Fri, 14 Nov 2025 16:47:24 +0000 Subject: [PATCH 2/4] Reorder PHP constants for better maintainability --- src/Util/Tokens.php | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/Util/Tokens.php b/src/Util/Tokens.php index 0ffff5a7a5..35d0959dc6 100644 --- a/src/Util/Tokens.php +++ b/src/Util/Tokens.php @@ -942,22 +942,31 @@ public static function polyfillTokenizerConstants(): void // PHP will intitialise the class, leading to warnings about undefined // T_* constants. $tokensToPolyfill = [ - 'T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG', - 'T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG', - 'T_ATTRIBUTE', + // PHP 7.4 native tokens. 'T_BAD_CHARACTER', 'T_COALESCE_EQUAL', - 'T_ENUM', 'T_FN', + + // PHP 8.0 native tokens. + 'T_ATTRIBUTE', 'T_MATCH', 'T_NAME_FULLY_QUALIFIED', 'T_NAME_QUALIFIED', 'T_NAME_RELATIVE', 'T_NULLSAFE_OBJECT_OPERATOR', + + // PHP 8.1 native tokens. + 'T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG', + 'T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG', + 'T_ENUM', + 'T_READONLY', + + // PHP 8.4 native tokens. 'T_PRIVATE_SET', 'T_PROTECTED_SET', 'T_PUBLIC_SET', - 'T_READONLY', + + // PHP 8.5 native tokens. 'T_VOID_CAST', ]; From 7b972cbd4a135f145fd00c6b005870f20d0f66e7 Mon Sep 17 00:00:00 2001 From: Dan Wallis Date: Fri, 14 Nov 2025 18:45:41 +0000 Subject: [PATCH 3/4] Better protect against collisions --- src/Util/Tokens.php | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/Util/Tokens.php b/src/Util/Tokens.php index 35d0959dc6..aee637107b 100644 --- a/src/Util/Tokens.php +++ b/src/Util/Tokens.php @@ -976,15 +976,38 @@ public static function polyfillTokenizerConstants(): void // numbering scheme from 135_000. $nextTokenNumber = 135000; + // This variable is necessary to avoid collisions with any other + // libraries which also polyfill T_* constants. + // array_flip()/isset() because in_array() is slow. + $existingConstants = array_flip(get_defined_constants(true)['tokenizer']); + foreach ((get_defined_constants(true)['user'] ?? []) as $k => $v) { + if (isset($k[2]) === false || $k[0] !== 'T' || $k[1] !== '_') { + // We only care about T_* constants. + continue; + } + + if (isset($existingConstants[$v]) === true) { + throw new \Exception("Externally polyfilled tokenizer constant value collision detected! $k has the same value as {$existingConstants[$v]}"); + } + + $existingConstants[$v] = $k; + } + $polyfillMappingTable = []; foreach ($tokensToPolyfill as $tokenName) { + if (isset(get_defined_constants(true)['tokenizer'][$tokenName]) === true) { + // This is a PHP native token, which is already defined by PHP. + continue; + } + if (defined($tokenName) === false) { - while (isset($polyfillMappingTable[$nextTokenNumber]) === true) { + while (isset($existingConstants[$nextTokenNumber]) === true) { $nextTokenNumber++; } define($tokenName, $nextTokenNumber); + $existingConstants[$nextTokenNumber] = $tokenName; } $polyfillMappingTable[constant($tokenName)] = $tokenName; From fb73709fbb96564b30bbc0a5a186ff30fcd53129 Mon Sep 17 00:00:00 2001 From: Dan Wallis Date: Mon, 22 Dec 2025 17:48:02 +0000 Subject: [PATCH 4/4] Add tests for new tokenizer constant code --- phpunit.xml.dist | 3 +++ ...yfillTokenizerConstants-collision-php.phpt | 18 +++++++++++++++++ ...fillTokenizerConstants-collision-user.phpt | 20 +++++++++++++++++++ .../polyfillTokenizerConstants-number.phpt | 14 +++++++++++++ ...polyfillTokenizerConstants-skip-names.phpt | 17 ++++++++++++++++ 5 files changed, 72 insertions(+) create mode 100644 tests/EndToEndPhpt/Util/Tokens/polyfillTokenizerConstants-collision-php.phpt create mode 100644 tests/EndToEndPhpt/Util/Tokens/polyfillTokenizerConstants-collision-user.phpt create mode 100644 tests/EndToEndPhpt/Util/Tokens/polyfillTokenizerConstants-number.phpt create mode 100644 tests/EndToEndPhpt/Util/Tokens/polyfillTokenizerConstants-skip-names.phpt diff --git a/phpunit.xml.dist b/phpunit.xml.dist index a3a6255492..a6d9abea55 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -31,6 +31,9 @@ ./src/Standards/Squiz/Tests/ ./src/Standards/Zend/Tests/ + + tests/EndToEndPhpt/ + diff --git a/tests/EndToEndPhpt/Util/Tokens/polyfillTokenizerConstants-collision-php.phpt b/tests/EndToEndPhpt/Util/Tokens/polyfillTokenizerConstants-collision-php.phpt new file mode 100644 index 0000000000..89a8c85d12 --- /dev/null +++ b/tests/EndToEndPhpt/Util/Tokens/polyfillTokenizerConstants-collision-php.phpt @@ -0,0 +1,18 @@ +--TEST-- +Detect when the value of a polyfilled PHP token collides with a value already used by an existing internal PHP token. +--SKIPIF-- +=")) { + echo "skip because tokens used in this test already exist in PHP 8.4 so we cannot test polyfilling them", PHP_EOL; +} +--FILE-- +=")) { + echo "skip because tokens used in this test already exist in PHP 8.4 so we cannot test polyfilling them", PHP_EOL; +} +--FILE-- +=")) { + echo "skip because tokens used in this test already exist in PHP 8.4 so we cannot test polyfilling them", PHP_EOL; +} +--FILE-- +=")) { + echo "skip because tokens used in this test already exist in PHP 8.4 so we cannot test polyfilling them", PHP_EOL; +} +--FILE-- +