diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 31c4bc8a10654..519a3631f39ed 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3811,35 +3811,29 @@ public function set_modifiable_text( string $plaintext_content ): bool { switch ( $this->get_tag() ) { case 'SCRIPT': - /** - * This is over-protective, but ensures the update doesn't break - * the HTML structure of the SCRIPT element. - * - * More thorough analysis could track the HTML tokenizer states - * and to ensure that the SCRIPT element closes at the expected - * SCRIPT close tag as is done in {@see ::skip_script_data()}. - * - * A SCRIPT element could be closed prematurely by contents - * like ``. A SCRIPT element could be prevented from - * closing by contents like ` └─────┐ + * │ ▼ │ │ + * │ ┌─────────────────────────────────────────┐ │ + * │ + * │ │ ' + * + * The original source of this graph is included at the bottom of this file. + * + * @see https://html.spec.whatwg.org/#restrictions-for-contents-of-script-elements + * @see wp_html_api_script_element_escaping_diagram_source() + * + * @since 7.0.0 + * + * @param string $sourcecode Raw contents intended to be serialized into an HTML SCRIPT element. + * @return string Escaped form of input contents which will not lead to premature closing of the containing SCRIPT element. + */ + public static function escape_javascript_script_contents( string $sourcecode ): string { + $at = 0; + $was_at = 0; + $end = strlen( $sourcecode ); + $escaped = ''; + + /* + * Replace all instances of the ASCII case-insensitive match of "", by using a + * character replacement for the "s" (or the "S"). + */ + while ( $at < $end ) { + $tag_at = strpos( $sourcecode, '<', $at ); + if ( false === $tag_at ) { + break; + } + + $tag_name_at = $tag_at + 1; + $has_closing_slash = $tag_name_at < $end && '/' === $sourcecode[ $tag_name_at ]; + $tag_name_at += $has_closing_slash ? 1 : 0; + + if ( 0 !== substr_compare( $sourcecode, 'script', $tag_name_at, 6, true ) ) { + $at = $tag_at + 1; + continue; + } + + if ( 1 !== strspn( $sourcecode, " \t\f\r\n/>", $tag_name_at + 6, 1 ) ) { + $at = $tag_name_at + 5; + continue; + } + + $escaped .= substr( $sourcecode, $was_at, $tag_name_at - $was_at ); + $escaped .= 's' === $sourcecode[ $tag_name_at ] ? '\u0073' : '\u0053'; + $was_at = $tag_name_at + 1; + $at = $tag_name_at + 7; + } + + if ( '' === $escaped ) { + return $sourcecode; + } + + if ( $was_at < $end ) { + $escaped .= substr( $sourcecode, $was_at ); + } + + return $escaped; + } + /** * Updates or creates a new attribute on the currently matched tag with the passed value. * diff --git a/tests/phpunit/data/html-api/script-element-escaping-diagram.dot b/tests/phpunit/data/html-api/script-element-escaping-diagram.dot new file mode 100644 index 0000000000000..d83b42096366a --- /dev/null +++ b/tests/phpunit/data/html-api/script-element-escaping-diagram.dot @@ -0,0 +1,30 @@ +digraph { + rankdir=TB; + + // Entry point + entry [shape=plaintext label="Open script"]; + entry -> script_data; + + // Double-circle states arranged more compactly + data [shape=doublecircle label="Close script"]; + script_data [shape=doublecircle color=blue label="script\ndata"]; + script_data_escaped [shape=circle color=orange label="escaped"]; + script_data_double_escaped [shape=circle color=red label="double\nescaped"]; + + // Group related nodes on same ranks where possible + {rank=same; script_data script_data_escaped script_data_double_escaped} + + script_data -> script_data [label=""]; + script_data_escaped -> script_data_double_escaped [label=" data [label=" script_data [label="-->"]; + script_data_double_escaped -> script_data_escaped [label="` within a comment or `` + * within a text/plain SCRIPT tag. * * @ticket 61617 + * @ticket 62797 * * @dataProvider data_unallowed_modifiable_text_updates * * @param string $html_with_nonempty_modifiable_text Will be used to find the test element. * @param string $invalid_update Update containing possibly-compromising text. */ - public function test_rejects_updates_with_unallowed_substrings( string $html_with_nonempty_modifiable_text, string $invalid_update ) { + public function test_rejects_dangerous_updates( string $html_with_nonempty_modifiable_text, string $invalid_update ) { $processor = new WP_HTML_Tag_Processor( $html_with_nonempty_modifiable_text ); while ( '' === $processor->get_modifiable_text() && $processor->next_token() ) { @@ -466,7 +468,7 @@ public function test_rejects_updates_with_unallowed_substrings( string $html_wit $this->assertFalse( $processor->set_modifiable_text( $invalid_update ), - 'Should have reject possibly-compromising modifiable text update.' + 'Should have rejected possibly-compromising modifiable text update.' ); // Flush updates. @@ -486,11 +488,152 @@ public function test_rejects_updates_with_unallowed_substrings( string $html_wit */ public static function data_unallowed_modifiable_text_updates() { return array( - 'Comment with -->' => array( '', 'Comments end in -->' ), - 'Comment with --!>' => array( '', 'Invalid but legitimate comments end in --!>' ), - 'SCRIPT with ' => array( '', 'Just a ' ), - 'SCRIPT with ' => array( '', 'beforeafter' ), - 'SCRIPT with "', '' => array( '', 'Comments end in -->' ), + 'Comment with --!>' => array( '', 'Invalid but legitimate comments end in --!>' ), + 'Non-JS SCRIPT with ', '