diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 31c4bc8a10654..140e3005f7821 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3812,28 +3812,29 @@ public function set_modifiable_text( string $plaintext_content ): bool { switch ( $this->get_tag() ) { case 'SCRIPT': /** - * This is over-protective, but ensures the update doesn't break - * the HTML structure of the SCRIPT element. + * Identify risky script contents to escape when possible or reject otherwise: * - * More thorough analysis could track the HTML tokenizer states - * and to ensure that the SCRIPT element closes at the expected - * SCRIPT close tag as is done in {@see ::skip_script_data()}. + * - "`. A SCRIPT element could be prevented from - * closing by contents like ` └─────┐ + * │ ▼ │ │ + * │ ┌─────────────────────────────────────────┐ │ + * │ + * │ │ ' + * + * The original source of this graph is included at the bottom of this file. + * + * @see https://html.spec.whatwg.org/#restrictions-for-contents-of-script-elements + */ + private function escape_javascript_script_contents( string $text ): string { + return preg_replace_callback( + '~(?Ps)(?Pcript[ \\t\\f\\r\\n/>])~i', + static function ( $matches ) { + $escaped_s_char = 's' === $matches['S_CHAR'] + ? '\\u0073' + : '\\u0053'; + return "{$matches['HEAD']}{$escaped_s_char}{$matches['TAIL']}"; + }, + $text + ); + } + + /** + * Escape JSON script tag contents. + * + * Prevent JSON text from modifying the HTML structure of a document and + * ensure that it's contained within its enclosing SCRIPT tag as intended. + * + * JSON can be escaped simply by replacing "<" with its Unicode escape + * sequence "\u003C". "<" is not part of the JSON syntax and only appears + * in JSON strings, so it's always safe to escape. Furthermore, JSON does + * not allow backslash escaping of "<", so there's no need to consider + * whether the "<" is preceded by an escaping backslash. + * + * For more details, see {@see WP_HTML_Tag_Processor::escape_javascript_script_contents()}. + * @see https://www.json.org/json-en.html + */ + private function escape_json_script_contents( string $text ): string { + return strtr( + $text, + array( '<' => '\\u003C' ) + ); + } + + /** + * Escape style tag contents. + * + * Prevent CSS text from modifying the HTML structure of a document and + * ensure that it's contained within its enclosing STYLE tag as intended. + */ + private function escape_style_contents( string $text ): string { + return preg_replace_callback( + '~s)(?Ptyle[ \\t\\f\\r\\n/>])~i', + static function ( $matches ) { + $escaped_s_char = 's' === $matches['S_CHAR'] + ? '\\73' + : '\\53'; + return " script_data; + + // Double-circle states arranged more compactly + data [shape=doublecircle label="Close script"]; + script_data [shape=doublecircle color=blue label="script\ndata"]; + script_data_escaped [shape=circle color=orange label="escaped"]; + script_data_double_escaped [shape=circle color=red label="double\nescaped"]; + + // Group related nodes on same ranks where possible + {rank=same; script_data script_data_escaped script_data_double_escaped} + + script_data -> script_data [label=""]; + script_data_escaped -> script_data_double_escaped [label=" data [label=" script_data [label="-->"]; + script_data_double_escaped -> script_data_escaped [label=" array( 'Text', 1, 'Blubber', 'Blubber' ), - 'Text node (middle)' => array( 'Bold move', 2, 'yo', 'yo' ), - 'Text node (end)' => array( 'of a dog', 2, 'of a cat', 'of a cat' ), - 'Encoded text node' => array( '
birds and dogs
', 2, ' & ', '
<birds> & <dogs>
' ), - 'SCRIPT tag' => array( 'beforeafter', 2, 'const img = " &
";', 'beforeafter' ), - 'STYLE tag' => array( '', 1, 'p::before { content: " & "; }', '' ), - 'TEXTAREA tag' => array( 'ab', 2, "so it ", "ab" ), - 'TEXTAREA (escape)' => array( 'ab', 2, 'but it does for ', 'ab' ), - 'TEXTAREA (escape+attrs)' => array( 'ab', 2, 'but it does for ', 'ab' ), - 'TITLE tag' => array( 'ahas no need to escapeb', 2, "so it ", "aso it <doesn't>b" ), - 'TITLE (escape)' => array( 'ahas no need to escapeb', 2, 'but it does for ', 'abut it does for </title>b' ), - 'TITLE (escape+attrs)' => array( 'ahas no need to escapeb', 2, 'but it does for ', 'abut it does for </title not an="attribute">b' ), + 'Text node (start)' => array( 'Text', 1, 'Blubber', 'Blubber' ), + 'Text node (middle)' => array( 'Bold move', 2, 'yo', 'yo' ), + 'Text node (end)' => array( 'of a dog', 2, 'of a cat', 'of a cat' ), + 'Encoded text node' => array( '
birds and dogs
', 2, ' & ', '
<birds> & <dogs>
' ), + 'SCRIPT tag' => array( 'beforeafter', 2, 'const img = " &
";', 'beforeafter' ), + 'STYLE tag' => array( '', 1, 'p::before { content: " & "; }', '' ), + 'STYLE tag (mixed casing)' => array( '', 1, 'p::before { content: " & "; }', '' ), + 'STYLE tag (trailing characters)' => array( '', 1, "p::before { content: \" & \"; }", "" ), + 'STYLE tag (non-closing tag)' => array( '', 1, 'p::before { content: " & "; }', '' ), + 'TEXTAREA tag' => array( 'ab', 2, "so it ", "ab" ), + 'TEXTAREA (escape)' => array( 'ab', 2, 'but it does for ', 'ab' ), + 'TEXTAREA (escape+attrs)' => array( 'ab', 2, 'but it does for ', 'ab' ), + 'TITLE tag' => array( 'ahas no need to escapeb', 2, "so it ", "aso it <doesn't>b" ), + 'TITLE (escape)' => array( 'ahas no need to escapeb', 2, 'but it does for ', 'abut it does for </title>b' ), + 'TITLE (escape+attrs)' => array( 'ahas no need to escapeb', 2, 'but it does for ', 'abut it does for </title not an="attribute">b' ), ); } /** * Ensures that updates with potentially-compromising values aren't accepted. * - * For example, a modifiable text update should be allowed which would break - * the structure of the containing element, such as in a script or comment. + * For example, a modifiable text update that would change the structure of the HTML + * document is not allowed, like attempting to set `-->` within a comment or `` + * within a text/plain SCRIPT tag. * * @ticket 61617 + * @ticket 62797 * * @dataProvider data_unallowed_modifiable_text_updates * * @param string $html_with_nonempty_modifiable_text Will be used to find the test element. * @param string $invalid_update Update containing possibly-compromising text. */ - public function test_rejects_updates_with_unallowed_substrings( string $html_with_nonempty_modifiable_text, string $invalid_update ) { + public function test_rejects_dangerous_updates( string $html_with_nonempty_modifiable_text, string $invalid_update ) { $processor = new WP_HTML_Tag_Processor( $html_with_nonempty_modifiable_text ); while ( '' === $processor->get_modifiable_text() && $processor->next_token() ) { @@ -466,7 +471,7 @@ public function test_rejects_updates_with_unallowed_substrings( string $html_wit $this->assertFalse( $processor->set_modifiable_text( $invalid_update ), - 'Should have reject possibly-compromising modifiable text update.' + 'Should have rejected possibly-compromising modifiable text update.' ); // Flush updates. @@ -486,11 +491,152 @@ public function test_rejects_updates_with_unallowed_substrings( string $html_wit */ public static function data_unallowed_modifiable_text_updates() { return array( - 'Comment with -->' => array( '', 'Comments end in -->' ), - 'Comment with --!>' => array( '', 'Invalid but legitimate comments end in --!>' ), - 'SCRIPT with ' => array( '', 'Just a ' ), - 'SCRIPT with ' => array( '', 'beforeafter' ), - 'SCRIPT with "', '' => array( '', 'Comments end in -->' ), + 'Comment with --!>' => array( '', 'Invalid but legitimate comments end in --!>' ), + 'Non-JS SCRIPT with ', '