diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 31c4bc8a10654..d53709051062d 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3812,28 +3812,29 @@ public function set_modifiable_text( string $plaintext_content ): bool { switch ( $this->get_tag() ) { case 'SCRIPT': /** - * This is over-protective, but ensures the update doesn't break - * the HTML structure of the SCRIPT element. + * Identify risky script contents to escape when possible or reject otherwise: * - * More thorough analysis could track the HTML tokenizer states - * and to ensure that the SCRIPT element closes at the expected - * SCRIPT close tag as is done in {@see ::skip_script_data()}. + * - "`. A SCRIPT element could be prevented from - * closing by contents like ` └─────┐ + * │ ▼ │ │ + * │ ┌─────────────────────────────────────────┐ │ + * │ + * │ │ ' + * + * The original source of this graph is included at the bottom of this file. + * + * @see https://html.spec.whatwg.org/#restrictions-for-contents-of-script-elements + */ + private function escape_javascript_script_contents( string $text ): string { + return preg_replace_callback( + '~(?Ps)(?Pcript[ \\t\\f\\r\\n/>])~i', + static function ( $matches ) { + $escaped_s_char = 's' === $matches['S_CHAR'] + ? '\\u0073' + : '\\u0053'; + return "{$matches['HEAD']}{$escaped_s_char}{$matches['TAIL']}"; + }, + $text + ); + } + + /** + * Escape JSON script tag contents. + * + * Prevent JSON text from modifying the HTML structure of a document and + * ensure that it's contained within its enclosing SCRIPT tag as intended. + * + * JSON can be escaped simply by replacing "<" with its Unicode escape + * sequence "\u003C". "<" is not part of the JSON syntax and only appears + * in JSON strings, so it's always safe to escape. Furthermore, JSON does + * not allow backslash escaping of "<", so there's no need to consider + * whether the "<" is preceded by an escaping backslash. + * + * For more details, see {@see WP_HTML_Tag_Processor::escape_javascript_script_contents()}. + * @see https://www.json.org/json-en.html + */ + private function escape_json_script_contents( string $text ): string { + return strtr( + $text, + array( '<' => '\\u003C' ) + ); + } + /** * Updates or creates a new attribute on the currently matched tag with the passed value. * @@ -4681,3 +4969,40 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info { */ const TEXT_IS_WHITESPACE = 'TEXT_IS_WHITESPACE'; } + +/* +# This is the original Graphviz source for the SCRIPT tag +# parsing behavior. It's used in the documentation for +# `WP_HTML_Tag_Processor::escape_javascript_script_contents()`. +# ==== +digraph { + rankdir=TB; + + // Entry point + entry [shape=plaintext label="Open script"]; + entry -> script_data; + + // Double-circle states arranged more compactly + data [shape=doublecircle label="Close script"]; + script_data [shape=doublecircle color=blue label="script\ndata"]; + script_data_escaped [shape=circle color=orange label="escaped"]; + script_data_double_escaped [shape=circle color=red label="double\nescaped"]; + + // Group related nodes on same ranks where possible + {rank=same; script_data script_data_escaped script_data_double_escaped} + + script_data -> script_data [label=""]; + script_data_escaped -> script_data_double_escaped [label=" data [label=" script_data [label="-->"]; + script_data_double_escaped -> script_data_escaped [label="\n", wp_sanitize_script_attributes( $attributes ) ); + $processor = new WP_HTML_Tag_Processor( "\n" ); + $processor->next_tag(); + foreach ( $attributes as $name => $value ) { + if ( is_string( $value ) || true === $value ) { + $processor->set_attribute( $name, $value ); + } + } + return $processor->get_updated_html(); } /** @@ -2955,7 +2962,15 @@ function wp_get_inline_script_tag( $data, $attributes = array() ) { */ $attributes = apply_filters( 'wp_inline_script_attributes', $attributes, $data ); - return sprintf( "%s\n", wp_sanitize_script_attributes( $attributes ), $data ); + $processor = new WP_HTML_Tag_Processor( "\n" ); + $processor->next_tag(); + foreach ( $attributes as $name => $value ) { + if ( is_string( $value ) || true === $value ) { + $processor->set_attribute( $name, $value ); + } + } + $processor->set_modifiable_text( $data ); + return $processor->get_updated_html(); } /** diff --git a/tests/phpunit/tests/dependencies/scripts.php b/tests/phpunit/tests/dependencies/scripts.php index 934d7c039f1e7..c7cbfe930c598 100644 --- a/tests/phpunit/tests/dependencies/scripts.php +++ b/tests/phpunit/tests/dependencies/scripts.php @@ -333,8 +333,8 @@ public function test_delayed_dependent_with_blocking_dependency_not_enqueued( $s // This dependent is registered but not enqueued, so it should not factor into the eligible loading strategy. wp_register_script( 'dependent-script-a4', '/dependent-script-a4.js', array( 'main-script-a4' ), null ); $output = get_echo( 'wp_print_scripts' ); - $expected = str_replace( "'", '"', "" ); - $this->assertStringContainsString( $expected, $output, 'Only enqueued dependents should affect the eligible strategy.' ); + $expected = ""; + $this->assertEqualHTMLScriptTagById( $expected, $output, 'Only enqueued dependents should affect the eligible strategy.' ); } /** @@ -1076,8 +1076,8 @@ public function test_various_strategy_dependency_chains( $set_up, $expected_mark public function test_loading_strategy_with_defer_having_no_dependents_nor_dependencies() { wp_enqueue_script( 'main-script-d1', 'http://example.com/main-script-d1.js', array(), null, array( 'strategy' => 'defer' ) ); $output = get_echo( 'wp_print_scripts' ); - $expected = str_replace( "'", '"', "\n" ); - $this->assertStringContainsString( $expected, $output, 'Expected defer, as there is no dependent or dependency' ); + $expected = "\n"; + $this->assertEqualHTMLScriptTagById( $expected, $output, 'Expected defer, as there is no dependent or dependency' ); } /** @@ -1096,7 +1096,7 @@ public function test_loading_strategy_with_defer_dependent_and_varied_dependenci wp_enqueue_script( 'main-script-d2', 'http://example.com/main-script-d2.js', array( 'dependency-script-d2-1', 'dependency-script-d2-3' ), null, array( 'strategy' => 'defer' ) ); $output = get_echo( 'wp_print_scripts' ); $expected = ''; - $this->assertStringContainsString( $expected, $output, 'Expected defer, as all dependencies are either deferred or blocking' ); + $this->assertEqualHTMLScriptTagById( $expected, $output, 'Expected defer, as all dependencies are either deferred or blocking' ); } /** @@ -1115,7 +1115,7 @@ public function test_loading_strategy_with_all_defer_dependencies() { wp_enqueue_script( 'dependent-script-d3-3', 'http://example.com/dependent-script-d3-3.js', array( 'dependent-script-d3-2' ), null, array( 'strategy' => 'defer' ) ); $output = get_echo( 'wp_print_scripts' ); $expected = ''; - $this->assertStringContainsString( $expected, $output, 'Expected defer, as all dependents have defer loading strategy' ); + $this->assertEqualHTMLScriptTagById( $expected, $output, 'Expected defer, as all dependents have defer loading strategy' ); } /** @@ -1495,9 +1495,10 @@ public function test_loading_strategy_with_invalid_defer_registration() { wp_enqueue_script( 'dependent-script-d4-1', '/dependent-script-d4-1.js', array( 'main-script-d4' ), null, array( 'strategy' => 'defer' ) ); wp_enqueue_script( 'dependent-script-d4-2', '/dependent-script-d4-2.js', array( 'dependent-script-d4-1' ), null ); wp_enqueue_script( 'dependent-script-d4-3', '/dependent-script-d4-3.js', array( 'dependent-script-d4-2' ), null, array( 'strategy' => 'defer' ) ); + $output = get_echo( 'wp_print_scripts' ); - $expected = str_replace( "'", '"', "\n" ); - $this->assertStringContainsString( $expected, $output, 'Scripts registered as defer but that have all dependents with no strategy, should become blocking (no strategy).' ); + $expected = "\n"; + $this->assertEqualHTMLScriptTagById( $expected, $output, 'Scripts registered as defer but that have all dependents with no strategy, should become blocking (no strategy).' ); } /** diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php index 66f9e67f5c8ed..a0808d8c1de57 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php @@ -444,17 +444,19 @@ public static function data_tokens_with_basic_modifiable_text_updates() { /** * Ensures that updates with potentially-compromising values aren't accepted. * - * For example, a modifiable text update should be allowed which would break - * the structure of the containing element, such as in a script or comment. + * For example, a modifiable text update that would change the structure of the HTML + * document is not allowed, like attempting to set `-->` within a comment or `` + * within a text/plain SCRIPT tag. * * @ticket 61617 + * @ticket 62797 * * @dataProvider data_unallowed_modifiable_text_updates * * @param string $html_with_nonempty_modifiable_text Will be used to find the test element. * @param string $invalid_update Update containing possibly-compromising text. */ - public function test_rejects_updates_with_unallowed_substrings( string $html_with_nonempty_modifiable_text, string $invalid_update ) { + public function test_rejects_dangerous_updates( string $html_with_nonempty_modifiable_text, string $invalid_update ) { $processor = new WP_HTML_Tag_Processor( $html_with_nonempty_modifiable_text ); while ( '' === $processor->get_modifiable_text() && $processor->next_token() ) { @@ -466,7 +468,7 @@ public function test_rejects_updates_with_unallowed_substrings( string $html_wit $this->assertFalse( $processor->set_modifiable_text( $invalid_update ), - 'Should have reject possibly-compromising modifiable text update.' + 'Should have rejected possibly-compromising modifiable text update.' ); // Flush updates. @@ -486,11 +488,152 @@ public function test_rejects_updates_with_unallowed_substrings( string $html_wit */ public static function data_unallowed_modifiable_text_updates() { return array( - 'Comment with -->' => array( '', 'Comments end in -->' ), - 'Comment with --!>' => array( '', 'Invalid but legitimate comments end in --!>' ), - 'SCRIPT with ' => array( '', 'Just a ' ), - 'SCRIPT with ' => array( '', 'beforeafter' ), - 'SCRIPT with "', '' => array( '', 'Comments end in -->' ), + 'Comment with --!>' => array( '', 'Invalid but legitimate comments end in --!>' ), + 'Non-JS SCRIPT with ', '