diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 31c4bc8a10654..140e3005f7821 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -3812,28 +3812,29 @@ public function set_modifiable_text( string $plaintext_content ): bool {
switch ( $this->get_tag() ) {
case 'SCRIPT':
/**
- * This is over-protective, but ensures the update doesn't break
- * the HTML structure of the SCRIPT element.
+ * Identify risky script contents to escape when possible or reject otherwise:
*
- * More thorough analysis could track the HTML tokenizer states
- * and to ensure that the SCRIPT element closes at the expected
- * SCRIPT close tag as is done in {@see ::skip_script_data()}.
+ * - "`. A SCRIPT element could be prevented from
- * closing by contents like ` └─────┐
+ * │ ▼ │ │
+ * │ ┌─────────────────────────────────────────┐ │
+ * │
+ * │ │ '
+ *
+ * The original source of this graph is included at the bottom of this file.
+ *
+ * @see https://html.spec.whatwg.org/#restrictions-for-contents-of-script-elements
+ */
+ private function escape_javascript_script_contents( string $text ): string {
+ return preg_replace_callback(
+ '~(?P
?)(?Ps)(?Pcript[ \\t\\f\\r\\n/>])~i',
+ static function ( $matches ) {
+ $escaped_s_char = 's' === $matches['S_CHAR']
+ ? '\\u0073'
+ : '\\u0053';
+ return "{$matches['HEAD']}{$escaped_s_char}{$matches['TAIL']}";
+ },
+ $text
+ );
+ }
+
+ /**
+ * Escape JSON script tag contents.
+ *
+ * Prevent JSON text from modifying the HTML structure of a document and
+ * ensure that it's contained within its enclosing SCRIPT tag as intended.
+ *
+ * JSON can be escaped simply by replacing "<" with its Unicode escape
+ * sequence "\u003C". "<" is not part of the JSON syntax and only appears
+ * in JSON strings, so it's always safe to escape. Furthermore, JSON does
+ * not allow backslash escaping of "<", so there's no need to consider
+ * whether the "<" is preceded by an escaping backslash.
+ *
+ * For more details, see {@see WP_HTML_Tag_Processor::escape_javascript_script_contents()}.
+ * @see https://www.json.org/json-en.html
+ */
+ private function escape_json_script_contents( string $text ): string {
+ return strtr(
+ $text,
+ array( '<' => '\\u003C' )
+ );
+ }
+
+ /**
+ * Escape style tag contents.
+ *
+ * Prevent CSS text from modifying the HTML structure of a document and
+ * ensure that it's contained within its enclosing STYLE tag as intended.
+ */
+ private function escape_style_contents( string $text ): string {
+ return preg_replace_callback(
+ '~(?Ps)(?Ptyle[ \\t\\f\\r\\n/>])~i',
+ static function ( $matches ) {
+ $escaped_s_char = 's' === $matches['S_CHAR']
+ ? '\\73'
+ : '\\53';
+ return "{$escaped_s_char}{$matches['TAIL']}";
+ },
+ $text
+ );
+ }
+
/**
* Updates or creates a new attribute on the currently matched tag with the passed value.
*
@@ -4681,3 +4984,40 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info {
*/
const TEXT_IS_WHITESPACE = 'TEXT_IS_WHITESPACE';
}
+
+/*
+# This is the original Graphviz source for the SCRIPT tag
+# parsing behavior. It's used in the documentation for
+# `WP_HTML_Tag_Processor::escape_javascript_script_contents()`.
+# ====
+digraph {
+ rankdir=TB;
+
+ // Entry point
+ entry [shape=plaintext label="Open script"];
+ entry -> script_data;
+
+ // Double-circle states arranged more compactly
+ data [shape=doublecircle label="Close script"];
+ script_data [shape=doublecircle color=blue label="script\ndata"];
+ script_data_escaped [shape=circle color=orange label="escaped"];
+ script_data_double_escaped [shape=circle color=red label="double\nescaped"];
+
+ // Group related nodes on same ranks where possible
+ {rank=same; script_data script_data_escaped script_data_double_escaped}
+
+ script_data -> script_data [label=""];
+ script_data_escaped -> script_data_double_escaped [label=" script_data [label="-->"];
+ script_data_double_escaped -> script_data_escaped [label="'";
+ labelloc=b;
+}
+*/
diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php
index 66f9e67f5c8ed..018a66dfed052 100644
--- a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php
+++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php
@@ -426,35 +426,40 @@ public function test_updates_basic_modifiable_text_on_supported_nodes( string $h
*/
public static function data_tokens_with_basic_modifiable_text_updates() {
return array(
- 'Text node (start)' => array( 'Text', 1, 'Blubber', 'Blubber' ),
- 'Text node (middle)' => array( 'Bold move', 2, 'yo', 'yo' ),
- 'Text node (end)' => array( '
of a dog', 2, 'of a cat', '
of a cat' ),
- 'Encoded text node' => array( 'birds and dogs', 2, ' & ', '<birds> & <dogs>' ),
- 'SCRIPT tag' => array( 'beforeafter', 2, 'const img = "
&
";', 'beforeafter' ),
- 'STYLE tag' => array( '', 1, 'p::before { content: "
& "; }', '' ),
- 'TEXTAREA tag' => array( 'ab', 2, "so it ", "ab" ),
- 'TEXTAREA (escape)' => array( 'ab', 2, 'but it does for ', 'ab' ),
- 'TEXTAREA (escape+attrs)' => array( 'ab', 2, 'but it does for ', 'ab' ),
- 'TITLE tag' => array( 'ahas no need to escapeb', 2, "so it ", "aso it b" ),
- 'TITLE (escape)' => array( 'ahas no need to escapeb', 2, 'but it does for ', 'abut it does for </title>b' ),
- 'TITLE (escape+attrs)' => array( 'ahas no need to escapeb', 2, 'but it does for ', 'abut it does for </title not an="attribute">b' ),
+ 'Text node (start)' => array( 'Text', 1, 'Blubber', 'Blubber' ),
+ 'Text node (middle)' => array( 'Bold move', 2, 'yo', 'yo' ),
+ 'Text node (end)' => array( '
of a dog', 2, 'of a cat', '
of a cat' ),
+ 'Encoded text node' => array( 'birds and dogs', 2, ' & ', '<birds> & <dogs>' ),
+ 'SCRIPT tag' => array( 'beforeafter', 2, 'const img = "
&
";', 'beforeafter' ),
+ 'STYLE tag' => array( '', 1, 'p::before { content: "
& "; }', '' ),
+ 'STYLE tag (mixed casing)' => array( '', 1, 'p::before { content: "
& "; }', '' ),
+ 'STYLE tag (trailing characters)' => array( '', 1, "p::before { content: \"
& \"; }", "" ),
+ 'STYLE tag (non-closing tag)' => array( '', 1, 'p::before { content: "
& "; }', '' ),
+ 'TEXTAREA tag' => array( 'ab', 2, "so it ", "ab" ),
+ 'TEXTAREA (escape)' => array( 'ab', 2, 'but it does for ', 'ab' ),
+ 'TEXTAREA (escape+attrs)' => array( 'ab', 2, 'but it does for ', 'ab' ),
+ 'TITLE tag' => array( 'ahas no need to escapeb', 2, "so it ", "aso it b" ),
+ 'TITLE (escape)' => array( 'ahas no need to escapeb', 2, 'but it does for ', 'abut it does for </title>b' ),
+ 'TITLE (escape+attrs)' => array( 'ahas no need to escapeb', 2, 'but it does for ', 'abut it does for </title not an="attribute">b' ),
);
}
/**
* Ensures that updates with potentially-compromising values aren't accepted.
*
- * For example, a modifiable text update should be allowed which would break
- * the structure of the containing element, such as in a script or comment.
+ * For example, a modifiable text update that would change the structure of the HTML
+ * document is not allowed, like attempting to set `-->` within a comment or ``
+ * within a text/plain SCRIPT tag.
*
* @ticket 61617
+ * @ticket 62797
*
* @dataProvider data_unallowed_modifiable_text_updates
*
* @param string $html_with_nonempty_modifiable_text Will be used to find the test element.
* @param string $invalid_update Update containing possibly-compromising text.
*/
- public function test_rejects_updates_with_unallowed_substrings( string $html_with_nonempty_modifiable_text, string $invalid_update ) {
+ public function test_rejects_dangerous_updates( string $html_with_nonempty_modifiable_text, string $invalid_update ) {
$processor = new WP_HTML_Tag_Processor( $html_with_nonempty_modifiable_text );
while ( '' === $processor->get_modifiable_text() && $processor->next_token() ) {
@@ -466,7 +471,7 @@ public function test_rejects_updates_with_unallowed_substrings( string $html_wit
$this->assertFalse(
$processor->set_modifiable_text( $invalid_update ),
- 'Should have reject possibly-compromising modifiable text update.'
+ 'Should have rejected possibly-compromising modifiable text update.'
);
// Flush updates.
@@ -486,11 +491,152 @@ public function test_rejects_updates_with_unallowed_substrings( string $html_wit
*/
public static function data_unallowed_modifiable_text_updates() {
return array(
- 'Comment with -->' => array( '', 'Comments end in -->' ),
- 'Comment with --!>' => array( '', 'Invalid but legitimate comments end in --!>' ),
- 'SCRIPT with ' => array( '', 'Just a ' ),
- 'SCRIPT with ' => array( '', 'beforeafter' ),
- 'SCRIPT with "', '' => array( '', 'Comments end in -->' ),
+ 'Comment with --!>' => array( '', 'Invalid but legitimate comments end in --!>' ),
+ 'Non-JS SCRIPT with ', '