From 4a8ca98fc72ed28ec00693a56efe35833c70f751 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 15 Dec 2025 20:03:45 +0100 Subject: [PATCH 01/48] Auto-escape JavaScript and JSON script tags when necessary --- .../html-api/class-wp-html-tag-processor.php | 245 ++++++++++++++++-- .../wpHtmlTagProcessorModifiableText.php | 149 ++++++++++- 2 files changed, 373 insertions(+), 21 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 31c4bc8a10654..1f2a4a3dc62ac 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3811,29 +3811,83 @@ public function set_modifiable_text( string $plaintext_content ): bool { switch ( $this->get_tag() ) { case 'SCRIPT': - /** - * This is over-protective, but ensures the update doesn't break - * the HTML structure of the SCRIPT element. + /* + * SCRIPT tag contents can be dangerous. + * + * The text `` could close the SCRIPT element prematurely. * - * More thorough analysis could track the HTML tokenizer states - * and to ensure that the SCRIPT element closes at the expected - * SCRIPT close tag as is done in {@see ::skip_script_data()}. + * The text ``. A SCRIPT element could be prevented from - * closing by contents like `' => array( '', 'Comments end in -->' ), - 'Comment with --!>' => array( '', 'Invalid but legitimate comments end in --!>' ), - 'SCRIPT with ' => array( '', 'Just a ' ), - 'SCRIPT with ' => array( '', 'beforeafter' ), - 'SCRIPT with "', '' => array( '', 'Comments end in -->' ), + 'Comment with --!>' => array( '', 'Invalid but legitimate comments end in --!>' ), + 'Non-JS SCRIPT with ', ' └─────┐ + * │ ▼ │ │ + * │ ┌─────────────────────────────────────────┐ │ + * │ + * │ │ ' + * + * The original source of this graph is included at the bottom of this file. + * * @see https://html.spec.whatwg.org/#restrictions-for-contents-of-script-elements */ private function escape_javascript_script_contents( string $text ): string { @@ -4133,6 +4165,7 @@ static function ( $matches ) { * does not allow backslash escaping of "<", so there's no need to * consider whether the "<" is escaped. * + * @see WP_HTML_Tag_Processor::escape_javascript_script_contents() * @see https://www.json.org/json-en.html */ private function escape_json_script_contents( string $text ): string { @@ -4932,3 +4965,40 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info { */ const TEXT_IS_WHITESPACE = 'TEXT_IS_WHITESPACE'; } + +/* +# This is the original Graphviz source for the SCRIPT content +# parsinge behavior. It's used in the documention of +# `WP_HTML_Tag_Processor::escape_javascript_script_contents()`. +# ==== +digraph { + rankdir=TB; + + // Entry point + entry [shape=plaintext label="Open script"]; + entry -> script_data; + + // Double-circle states arranged more compactly + data [shape=doublecircle label="Close script"]; + script_data [shape=doublecircle color=blue label="script\ndata"]; + script_data_escaped [shape=circle color=orange label="escaped"]; + script_data_double_escaped [shape=circle color=red label="double\nescaped"]; + + // Group related nodes on same ranks where possible + {rank=same; script_data script_data_escaped script_data_double_escaped} + + script_data -> script_data [label=""]; + script_data_escaped -> script_data_double_escaped [label=" data [label=" script_data [label="-->"]; + script_data_double_escaped -> script_data_escaped [label=" Date: Mon, 22 Dec 2025 18:44:01 +0100 Subject: [PATCH 32/48] Improve linking between escapes --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index fc1f51360daf4..92462873f7dd7 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -4165,7 +4165,7 @@ static function ( $matches ) { * does not allow backslash escaping of "<", so there's no need to * consider whether the "<" is escaped. * - * @see WP_HTML_Tag_Processor::escape_javascript_script_contents() + * For more details, see {@see WP_HTML_Tag_Processor::escape_javascript_script_contents()}. * @see https://www.json.org/json-en.html */ private function escape_json_script_contents( string $text ): string { From 83ff62fab13559e0359a84d3d75e158806dfab62 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Dec 2025 18:51:16 +0100 Subject: [PATCH 33/48] Fix comments, typos, lints --- .../html-api/class-wp-html-tag-processor.php | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 92462873f7dd7..05df59d40671b 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3811,14 +3811,14 @@ public function set_modifiable_text( string $plaintext_content ): bool { switch ( $this->get_tag() ) { case 'SCRIPT': - /* - * SCRIPT tag contents can be dangerous: + /** + * Identify risky script contents to escape when possible or reject otherwise: * * - "" could close the SCRIPT element prematurely. - * - "` + * within a a text/plain SCRIPT tag. * * @ticket 61617 * @ticket 62797 @@ -467,7 +468,7 @@ public function test_rejects_dangerous_updates( string $html_with_nonempty_modif $this->assertFalse( $processor->set_modifiable_text( $invalid_update ), - 'Should have reject possibly-compromising modifiable text update.' + 'Should have rejected possibly-compromising modifiable text update.' ); // Flush updates. @@ -490,7 +491,7 @@ public static function data_unallowed_modifiable_text_updates() { 'Comment with -->' => array( '', 'Comments end in -->' ), 'Comment with --!>' => array( '', 'Invalid but legitimate comments end in --!>' ), 'Non-JS SCRIPT with ', '` within a comment or `` - * within a a text/plain SCRIPT tag. + * within a text/plain SCRIPT tag. * * @ticket 61617 * @ticket 62797 From 402ae9f9feb0fb1ff21ffbe44e930d23c5b1ecc2 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Dec 2025 19:08:04 +0100 Subject: [PATCH 36/48] Fix \c -> \r (carriage return) typo --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 05df59d40671b..a5f46af35077b 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -4134,7 +4134,7 @@ private function is_json_script_tag(): bool { * └─────────▶ ║ ║ └────────── │ escaped │ ─┘ * ╚══════════════╝ └───────────┘ * - * † = Case insensitive 'script' followed by one of ' \t\f\c\n/>' + * † = Case insensitive 'script' followed by one of ' \t\f\r\n/>' * * The original source of this graph is included at the bottom of this file. * @@ -4998,7 +4998,7 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info { script_data_double_escaped -> script_data [label="-->"]; script_data_double_escaped -> script_data_escaped [label=" Date: Mon, 22 Dec 2025 19:11:16 +0100 Subject: [PATCH 37/48] Add note about not parsing MIME types for JS script tags --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index a5f46af35077b..a0dfcb7e2ff2a 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3895,6 +3895,9 @@ static function ( $tag_match ) { /** * Indicates if the currently matched tag is a JavaScript script tag. * + * Note that this does not parse a MIME type. This behavior is well-documented in + * in the HTML standard and uses string comparisons, *not* actual MIME Types. + * * @see https://html.spec.whatwg.org/multipage/scripting.html#prepare-the-script-element * * @ignore From eef0ccbd89e00afffc6554a050801ee4cc056718 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Dec 2025 19:12:13 +0100 Subject: [PATCH 38/48] Add todo comment to is_json_script_tag --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index a0dfcb7e2ff2a..6b87d250288b3 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -4016,6 +4016,7 @@ private function is_javascript_script_tag(): bool { * * @ignore * @todo Consider a public API that is clear and general. + * @todo Use a MIME type parser when available. * * @since 7.0.0 * From 71d268670ba337f763c04baf9830bed074d03546 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Dec 2025 19:15:04 +0100 Subject: [PATCH 39/48] Re-order tag name termination chars to match elsewhere --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 6b87d250288b3..0ccf83828f15d 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -4146,7 +4146,7 @@ private function is_json_script_tag(): bool { */ private function escape_javascript_script_contents( string $text ): string { return preg_replace_callback( - '~(?Ps)(?Pcript[\\t\\r\\n\\f />])~i', + '~(?Ps)(?Pcript[ \\t\\f\\r\\n/>])~i', static function ( $matches ) { $escaped_s_char = 's' === $matches['S_CHAR'] ? '\\u0073' From d4693a27ffaf7950b7e5aaef4c4d47268c07607d Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 22 Dec 2025 19:15:37 +0100 Subject: [PATCH 40/48] Fix typo --- tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php index 9f89f78a2fcd4..f7da39a887d71 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php @@ -102,7 +102,7 @@ public static function data_is_javascript_script_tag(): array { 'Script tag with language="jscript"' => array( '', true ), 'Script tag with language="livescript"' => array( '', true ), - // Whitespace is not trimmed in the langauge attribute. + // Whitespace is not trimmed in the language attribute. 'Script tag with language=" javascript"' => array( '', false ), // Non-JavaScript script tags - should NOT be JavaScript. From 4c3b0b21a4d6f795984733d21ceb384a86920cba Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 29 Dec 2025 17:50:00 +0100 Subject: [PATCH 41/48] Update comments on tag prefixes matching search pattern --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 0ccf83828f15d..d53709051062d 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3814,8 +3814,8 @@ public function set_modifiable_text( string $plaintext_content ): bool { /** * Identify risky script contents to escape when possible or reject otherwise: * - * - "" could close the SCRIPT element prematurely. - * - "', true ), - 'Script tag with other attributes' => array( '', true ), + 'Script tag without attributes' => array( '', 'javascript' ), + 'Script tag with other attributes' => array( '', 'javascript' ), // Script tags with empty type attribute - should be JavaScript. - 'Script tag with empty type attribute' => array( '', true ), - 'Script tag with boolean type attribute' => array( '', true ), + 'Script tag with empty type attribute' => array( '', 'javascript' ), + 'Script tag with boolean type attribute' => array( '', 'javascript' ), // Script tags with falsy but non-empty type attribute. - 'Script tag with type="0"' => array( '', false ), + 'Script tag with type="0"' => array( '', null ), // Script tags without type but with language attribute - should be JavaScript. - 'Script tag with empty language attribute' => array( '', true ), - 'Script tag with boolean language attribute' => array( '', true ), + 'Script tag with empty language attribute' => array( '', 'javascript' ), + 'Script tag with boolean language attribute' => array( '', 'javascript' ), // Script tags with falsy but non-empty language attribute. - 'Script tag with language="0"' => array( '', false ), + 'Script tag with language="0"' => array( '', null ), // Script tags with JavaScript MIME essence - should be JavaScript. - 'Script tag with application/ecmascript' => array( '', true ), - 'Script tag with application/javascript' => array( '', true ), - 'Script tag with application/x-ecmascript' => array( '', true ), - 'Script tag with application/x-javascript' => array( '', true ), - 'Script tag with text/ecmascript' => array( '', true ), - 'Script tag with text/javascript' => array( '', true ), - 'Script tag with text/javascript1.0' => array( '', true ), - 'Script tag with text/javascript1.1' => array( '', true ), - 'Script tag with text/javascript1.2' => array( '', true ), - 'Script tag with text/javascript1.3' => array( '', true ), - 'Script tag with text/javascript1.4' => array( '', true ), - 'Script tag with text/javascript1.5' => array( '', true ), - 'Script tag with text/jscript' => array( '', true ), - 'Script tag with text/livescript' => array( '', true ), - 'Script tag with text/x-ecmascript' => array( '', true ), - 'Script tag with text/x-javascript' => array( '', true ), + 'Script tag with application/ecmascript' => array( '', 'javascript' ), + 'Script tag with application/javascript' => array( '', 'javascript' ), + 'Script tag with application/x-ecmascript' => array( '', 'javascript' ), + 'Script tag with application/x-javascript' => array( '', 'javascript' ), + 'Script tag with text/ecmascript' => array( '', 'javascript' ), + 'Script tag with text/javascript' => array( '', 'javascript' ), + 'Script tag with text/javascript1.0' => array( '', 'javascript' ), + 'Script tag with text/javascript1.1' => array( '', 'javascript' ), + 'Script tag with text/javascript1.2' => array( '', 'javascript' ), + 'Script tag with text/javascript1.3' => array( '', 'javascript' ), + 'Script tag with text/javascript1.4' => array( '', 'javascript' ), + 'Script tag with text/javascript1.5' => array( '', 'javascript' ), + 'Script tag with text/jscript' => array( '', 'javascript' ), + 'Script tag with text/livescript' => array( '', 'javascript' ), + 'Script tag with text/x-ecmascript' => array( '', 'javascript' ), + 'Script tag with text/x-javascript' => array( '', 'javascript' ), // Case-insensitive matching for JavaScript MIME essence. - 'Script tag with UPPERCASE type' => array( '', true ), - 'Script tag with MixedCase type' => array( '', true ), - 'Script tag with APPLICATION/JAVASCRIPT' => array( '', true ), + 'Script tag with UPPERCASE type' => array( '', 'javascript' ), + 'Script tag with MixedCase type' => array( '', 'javascript' ), + 'Script tag with APPLICATION/JAVASCRIPT' => array( '', 'javascript' ), // Script tags with module type - should be JavaScript. - 'Script tag with module type' => array( '', true ), - 'Script tag with MODULE type uppercase' => array( '', true ), - 'Script tag with MoDuLe type mixed case' => array( '', true ), + 'Script tag with module type' => array( '', 'javascript' ), + 'Script tag with MODULE type uppercase' => array( '', 'javascript' ), + 'Script tag with MoDuLe type mixed case' => array( '', 'javascript' ), // Script tags with whitespace around type - should strip whitespace. - 'Script tag with leading whitespace' => array( '', true ), - 'Script tag with trailing whitespace' => array( '', true ), - 'Script tag with surrounding whitespace' => array( '', true ), - 'Script tag with tab whitespace' => array( "", true ), - 'Script tag with newline whitespace' => array( "", true ), - 'Script tag with mixed whitespace' => array( "", true ), + 'Script tag with leading whitespace' => array( '', 'javascript' ), + 'Script tag with trailing whitespace' => array( '', 'javascript' ), + 'Script tag with surrounding whitespace' => array( '', 'javascript' ), + 'Script tag with tab whitespace' => array( "", 'javascript' ), + 'Script tag with newline whitespace' => array( "", 'javascript' ), + 'Script tag with mixed whitespace' => array( "", 'javascript' ), // Script tags with language attribute and non-empty value - should use text/{language}. - 'Script tag with language="javascript"' => array( '', true ), - 'Script tag with language="JavaScript"' => array( '', true ), - 'Script tag with language="ecmascript"' => array( '', true ), - 'Script tag with language="jscript"' => array( '', true ), - 'Script tag with language="livescript"' => array( '', true ), + 'Script tag with language="javascript"' => array( '', 'javascript' ), + 'Script tag with language="JavaScript"' => array( '', 'javascript' ), + 'Script tag with language="ecmascript"' => array( '', 'javascript' ), + 'Script tag with language="jscript"' => array( '', 'javascript' ), + 'Script tag with language="livescript"' => array( '', 'javascript' ), // Whitespace is not trimmed in the language attribute. - 'Script tag with language=" javascript"' => array( '', false ), + 'Script tag with language=" javascript"' => array( '', null ), // Non-JavaScript script tags - should NOT be JavaScript. - 'Script tag with importmap type' => array( '', false ), - 'Script tag with speculationrules type' => array( '', false ), - 'Script tag with application/json type' => array( '', false ), - 'Script tag with text/json type' => array( '', false ), - 'Script tag with unknown MIME type' => array( '', false ), - 'Script tag with application/xml type' => array( '', false ), - 'Script tag with random type' => array( '', false ), + 'Script tag with importmap type' => array( '', 'json' ), + 'Script tag with speculationrules type' => array( '', 'json' ), + 'Script tag with application/json type' => array( '', 'json' ), + 'Script tag with text/json type' => array( '', 'json' ), + 'Script tag with unknown MIME type' => array( '', null ), + 'Script tag with application/xml type' => array( '', null ), + 'Script tag with random type' => array( '', null ), // Non-script tags - should NOT be JavaScript. - 'DIV tag' => array( '
', false ), - 'SPAN tag' => array( '', false ), - 'P tag' => array( '

', false ), + 'DIV tag' => array( '
', null ), + 'SPAN tag' => array( '', null ), + 'P tag' => array( '

', null ), ); } @@ -129,12 +141,10 @@ public static function data_is_javascript_script_tag(): array { public function test_is_javascript_script_tag_returns_false_before_finding_tags() { $processor = new WP_HTML_Tag_Processor( 'Just some text' ); $processor->next_token(); - $result = ( function () { - return $this->is_javascript_script_tag(); - } )->call( $processor ); - $this->assertFalse( - $result, - 'Should return false when not stopped on script tag' + + $this->assertNull( + $this->get_script_content_type_with( $processor ), + 'Should fail to infer a content type when not matched on a SCRIPT element.' ); } @@ -147,13 +157,16 @@ public function test_is_javascript_script_tag_returns_false_for_non_html_namespa $processor = new WP_HTML_Tag_Processor( '' ); $processor->change_parsing_namespace( 'svg' ); $processor->next_tag(); - $this->assertSame( 'SCRIPT', $processor->get_tag() ); - $result = ( function () { - return $this->is_javascript_script_tag(); - } )->call( $processor ); - $this->assertFalse( - $result, - 'Should return false for script tags in non-HTML namespace' + + $this->assertSame( + 'SCRIPT', + $processor->get_tag(), + 'Expected to find a SCRIPT tag in the SVG namespace: check test setup.' + ); + + $this->assertNull( + $this->get_script_content_type_with( $processor ), + 'Should fail to infer content type for SCRIPT elements in non-HTML namespace' ); } @@ -170,14 +183,27 @@ public function test_is_javascript_script_tag_returns_false_for_non_html_namespa public function test_is_json_script_tag( string $html, bool $expected_result ) { $processor = new WP_HTML_Tag_Processor( $html ); $processor->next_tag(); - $result = ( function () { - return $this->is_json_script_tag(); - } )->call( $processor ); - $this->assertSame( - $expected_result, - $result, - 'Failed to correctly identify JSON script tag' - ); + + $detected = $this->get_script_content_type_with( $processor ); + + if ( isset( $content_type, $detected ) ) { + $this->assertSame( + $content_type, + $detected, + 'Misidentified the type of contents within the SCRIPT element.' + ); + } elseif ( isset( $content_type ) ) { + $this->assertSame( + $content_type, + $detected, + 'Should have identified the type of contents within the SCRIPT element but failed to recognize any type.' + ); + } else { + $this->assertNull( + $detected, + 'Should have failed to identify the type of contents within the SCRIPT element.' + ); + } } /** @@ -188,46 +214,46 @@ public function test_is_json_script_tag( string $html, bool $expected_result ) { public static function data_is_json_script_tag(): array { return array( // JSON MIME types - should be JSON. - 'Script tag with application/json type' => array( '', true ), - 'Script tag with text/json type' => array( '', true ), + 'Script tag with application/json type' => array( '', 'json' ), + 'Script tag with text/json type' => array( '', 'json' ), // importmap and speculationrules - should be JSON. - 'Script tag with importmap type' => array( '', true ), - 'Script tag with speculationrules type' => array( '', true ), + 'Script tag with importmap type' => array( '', 'json' ), + 'Script tag with speculationrules type' => array( '', 'json' ), // Case-insensitive matching for JSON types. - 'Script tag with APPLICATION/JSON uppercase' => array( '', true ), - 'Script tag with Text/Json mixed case' => array( '', true ), - 'Script tag with IMPORTMAP uppercase' => array( '', true ), - 'Script tag with ImportMap mixed case' => array( '', true ), - 'Script tag with SPECULATIONRULES uppercase' => array( '', true ), - 'Script tag with SpeculationRules mixed' => array( '', true ), + 'Script tag with APPLICATION/JSON uppercase' => array( '', 'json' ), + 'Script tag with Text/Json mixed case' => array( '', 'json' ), + 'Script tag with IMPORTMAP uppercase' => array( '', 'json' ), + 'Script tag with ImportMap mixed case' => array( '', 'json' ), + 'Script tag with SPECULATIONRULES uppercase' => array( '', 'json' ), + 'Script tag with SpeculationRules mixed' => array( '', 'json' ), // Whitespace handling - should strip whitespace. - 'Script tag with leading whitespace' => array( '', true ), - 'Script tag with trailing whitespace' => array( '', true ), - 'Script tag with surrounding whitespace' => array( '', true ), - 'Script tag with tab whitespace' => array( "", true ), - 'Script tag with newline whitespace' => array( "", true ), - 'Script tag with mixed whitespace' => array( "", true ), + 'Script tag with leading whitespace' => array( '', 'json' ), + 'Script tag with trailing whitespace' => array( '', 'json' ), + 'Script tag with surrounding whitespace' => array( '', 'json' ), + 'Script tag with tab whitespace' => array( "", 'json' ), + 'Script tag with newline whitespace' => array( "", 'json' ), + 'Script tag with mixed whitespace' => array( "", 'json' ), // Non-JSON script tags - should NOT be JSON. - 'Script tag without type attribute' => array( '', false ), - 'Script tag with empty type attribute' => array( '', false ), - 'Script tag with boolean type attribute' => array( '', false ), + 'Script tag without type attribute' => array( '', 'javascript' ), + 'Script tag with empty type attribute' => array( '', 'javascript' ), + 'Script tag with boolean type attribute' => array( '', 'javascript' ), // Script tags with falsy but non-empty type attribute. - 'Script tag with type="0"' => array( '', false ), + 'Script tag with type="0"' => array( '', null ), - 'Script tag with text/javascript type' => array( '', false ), - 'Script tag with module type' => array( '', false ), - 'Script tag with unknown MIME type' => array( '', false ), - 'Script tag with application/xml type' => array( '', false ), + 'Script tag with text/javascript type' => array( '', 'javascript' ), + 'Script tag with module type' => array( '', 'javascript' ), + 'Script tag with unknown MIME type' => array( '', null ), + 'Script tag with application/xml type' => array( '', null ), // Non-script tags - should NOT be JSON. - 'DIV tag' => array( '
', false ), - 'SPAN tag' => array( '', false ), - 'P tag' => array( '

', false ), + 'DIV tag' => array( '
', null ), + 'SPAN tag' => array( '', null ), + 'P tag' => array( '

', null ), ); } @@ -239,12 +265,10 @@ public static function data_is_json_script_tag(): array { public function test_is_json_script_tag_returns_false_before_finding_tags() { $processor = new WP_HTML_Tag_Processor( 'Just some text' ); $processor->next_token(); - $result = ( function () { - return $this->is_json_script_tag(); - } )->call( $processor ); - $this->assertFalse( - $result, - 'Should return false when not stopped on script tag' + + $this->assertNull( + $this->get_script_content_type_with( $processor ), + 'Should fail to infer a content type when not matched on a SCRIPT element.' ); } @@ -257,13 +281,32 @@ public function test_is_json_script_tag_returns_false_for_non_html_namespace() { $processor = new WP_HTML_Tag_Processor( '' ); $processor->change_parsing_namespace( 'svg' ); $processor->next_tag(); - $this->assertSame( 'SCRIPT', $processor->get_tag() ); - $result = ( function () { - return $this->is_json_script_tag(); - } )->call( $processor ); - $this->assertFalse( - $result, - 'Should return false for script tags in non-HTML namespace' + + $this->assertSame( + 'SCRIPT', + $processor->get_tag(), + 'Expected to find a SCRIPT tag in the SVG namespace: check test setup.' ); + + $this->assertNull( + $this->get_script_content_type_with( $processor ), + 'Should fail to infer content type for SCRIPT elements in non-HTML namespace' + ); + } + + /** + * Test helper to call private script content type getter. + * + * @since 7.0.0 + * + * @param WP_HTML_Tag_Processor $processor Call the private method on this instance. + * @return string|null Script content type if matched and recognized, else `null`. + */ + private static function get_script_content_type_with( WP_HTML_Tag_Processor $processor ) { + $getter = function () { + return $this->get_script_content_type(); + }; + + return $getter->call( $processor ); } } From ddeb30109b1a9b00e06030e9889cf79f1c4c4f40 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 30 Dec 2025 20:38:23 -0700 Subject: [PATCH 46/48] fixup! Use content-type identification and escape without PCRE --- .../phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php index 48240c9dd2509..3c6628a4ac855 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php @@ -177,10 +177,10 @@ public function test_is_javascript_script_tag_returns_false_for_non_html_namespa * * @dataProvider data_is_json_script_tag * - * @param string $html HTML containing a script tag. - * @param bool $expected_result Whether the script tag should be identified as JSON. + * @param string $html HTML containing a script tag. + * @param string|null $content_type Inferred content type of SCRIPT element. */ - public function test_is_json_script_tag( string $html, bool $expected_result ) { + public function test_is_json_script_tag( string $html, ?string $content_type ) { $processor = new WP_HTML_Tag_Processor( $html ); $processor->next_tag(); From b36fc32f2b697101088bddfb64ed0b2d6e122a99 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 30 Dec 2025 20:39:58 -0700 Subject: [PATCH 47/48] fixup! Use content-type identification and escape without PCRE --- .../tests/html-api/wpHtmlTagProcessorScriptTag.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php index 3c6628a4ac855..663156d780cf0 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessorScriptTag.php @@ -13,7 +13,7 @@ class Tests_HtmlApi_WpHtmlTagProcessorScriptTag extends WP_UnitTestCase { /** * @ticket 64419 * - * @covers WP_HTML_Tag_Processor::is_javascript_script_tag + * @covers ::get_script_content_type() * * @dataProvider data_is_javascript_script_tag * @@ -136,7 +136,7 @@ public static function data_is_javascript_script_tag(): array { /** * @ticket 64419 * - * @covers WP_HTML_Tag_Processor::is_javascript_script_tag + * @covers ::get_script_content_type() */ public function test_is_javascript_script_tag_returns_false_before_finding_tags() { $processor = new WP_HTML_Tag_Processor( 'Just some text' ); @@ -151,7 +151,7 @@ public function test_is_javascript_script_tag_returns_false_before_finding_tags( /** * @ticket 64419 * - * @covers WP_HTML_Tag_Processor::is_javascript_script_tag + * @covers ::get_script_content_type() */ public function test_is_javascript_script_tag_returns_false_for_non_html_namespace() { $processor = new WP_HTML_Tag_Processor( '' ); @@ -173,7 +173,7 @@ public function test_is_javascript_script_tag_returns_false_for_non_html_namespa /** * @ticket 64419 * - * @covers WP_HTML_Tag_Processor::is_json_script_tag + * @covers ::get_script_content_type() * * @dataProvider data_is_json_script_tag * @@ -260,7 +260,7 @@ public static function data_is_json_script_tag(): array { /** * @ticket 64419 * - * @covers WP_HTML_Tag_Processor::is_json_script_tag + * @covers ::get_script_content_type() */ public function test_is_json_script_tag_returns_false_before_finding_tags() { $processor = new WP_HTML_Tag_Processor( 'Just some text' ); @@ -275,7 +275,7 @@ public function test_is_json_script_tag_returns_false_before_finding_tags() { /** * @ticket 64419 * - * @covers WP_HTML_Tag_Processor::is_json_script_tag + * @covers ::get_script_content_type() */ public function test_is_json_script_tag_returns_false_for_non_html_namespace() { $processor = new WP_HTML_Tag_Processor( '' ); From 1249af078b60893af8066a49e303b781cea1e17d Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 30 Dec 2025 20:48:52 -0700 Subject: [PATCH 48/48] fixup! Use content-type identification and escape without PCRE --- .../tests/html-api/wpHtmlTagProcessorModifiableText.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php index a0808d8c1de57..9e0d94aecd17e 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php @@ -572,7 +572,7 @@ public function test_complex_javascript_and_json_auto_escaping() { $expected = <<<'HTML'