diff --git a/lib/experimental/html/class-wp-html-processor.php b/lib/experimental/html/class-wp-html-processor.php index 6fa236aa94a33..d6d937562ca97 100644 --- a/lib/experimental/html/class-wp-html-processor.php +++ b/lib/experimental/html/class-wp-html-processor.php @@ -135,28 +135,35 @@ public function balanced_next( WP_HTML_Processor_Scan_State $state, $query = nul } public function get_content_inside_balanced_tags() { - static $start_name = null; - static $end_name = null; + list( $start_name, $end_name ) = $this->get_balanced_tags(); + $content = $this->get_content_inside_bookmarks( $start_name, $end_name ); + $this->seek( $start_name ); - if ( null === $start_name || array_key_exists( $start_name, $this->bookmarks ) ) { - $rand_id = rand( 1, PHP_INT_MAX ); - $start_name = "start_{$rand_id}"; - } + $this->release_bookmark( $start_name ); + $this->release_bookmark( $end_name ); - if ( null === $end_name || array_key_exists( $end_name, $this->bookmarks ) ) { - $rand_id = rand( 1, PHP_INT_MAX ); - $end_name = "start_{$rand_id}"; + return $content; + } + + private function get_content_inside_bookmarks( $start_bookmark, $end_bookmark ) { + if ( ! isset( $this->bookmarks[ $start_bookmark ], $this->bookmarks[ $end_bookmark ] ) ) { + return null; } - $this->set_bookmark( $start_name ); + $start = $this->bookmarks[ $start_bookmark ]; + $end = $this->bookmarks[ $end_bookmark ]; - $state = self::new_state(); - while ( $this->balanced_next( $state ) ) { - continue; + return substr( $this->get_updated_html(), $start->end + 1, $end->start - $start->end - 2 ); + } + + public function set_content_inside_balanced_tags( $content ) { + if ( self::is_html_void_element( $this->get_tag() ) ) { + return false; } - $this->set_bookmark( $end_name ); - $content = $this->content_inside_bookmarks( $start_name, $end_name ); + $this->get_updated_html(); + list( $start_name, $end_name ) = $this->get_balanced_tags(); + $this->set_content_inside_bookmarks( $start_name, $end_name, $content ); $this->seek( $start_name ); $this->release_bookmark( $start_name ); @@ -165,7 +172,7 @@ public function get_content_inside_balanced_tags() { return $content; } - private function content_inside_bookmarks( $start_bookmark, $end_bookmark ) { + private function set_content_inside_bookmarks( $start_bookmark, $end_bookmark, $content ) { if ( ! isset( $this->bookmarks[ $start_bookmark ], $this->bookmarks[ $end_bookmark ] ) ) { return null; } @@ -173,7 +180,45 @@ private function content_inside_bookmarks( $start_bookmark, $end_bookmark ) { $start = $this->bookmarks[ $start_bookmark ]; $end = $this->bookmarks[ $end_bookmark ]; - return substr( $this->get_updated_html(), $start->end + 1, $end->start - $start->end - 2 ); + $this->add_lexical_update( $start->end + 1, $end->start - 1, $content ); + } + + /** + * If on an opening tag, return a pair of bookmarks for it, and for the matching closing tag. + * + * @return array A pair of bookmarks for the current opening and matching closing tags. + */ + public function get_balanced_tags() { + static $start_name = null; + static $end_name = null; + + if ( null === $start_name || array_key_exists( $start_name, $this->bookmarks ) ) { + $rand_id = rand( 1, PHP_INT_MAX ); + $start_name = "start_{$rand_id}"; + } + + if ( null === $end_name || array_key_exists( $end_name, $this->bookmarks ) ) { + $rand_id = rand( 1, PHP_INT_MAX ); + $end_name = "start_{$rand_id}"; + } + + $this->set_bookmark( $start_name ); + $this->find_matching_closing_tag(); + $this->set_bookmark( $end_name ); + + return array( $start_name, $end_name ); + } + + /** + * If on an opening tag, navigate to the matching closing tag. + * + * @return void + */ + public function find_matching_closing_tag() { + $state = self::new_state(); + while ( $this->balanced_next( $state ) ) { + continue; + } } /* diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index 72c342dbd02a7..3016a2fea79b6 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -423,6 +423,18 @@ class WP_HTML_Tag_Processor { * @since 6.2.0 * @var WP_HTML_Text_Replacement[] */ + private $lexical_updates = array(); + + /** + * Attribute replacements to apply to input HTML document. + * + * Unlike more generic lexical updates, attribute updates are stored + * in an associative array, where the keys are (lowercase-normalized) + * attribute names, in order to avoid duplication. + * + * @since 6.2.0 + * @var WP_HTML_Text_Replacement[] + */ private $attribute_updates = array(); /** @@ -1097,15 +1109,16 @@ private function skip_whitespace() { } /** - * Applies attribute updates and cleans up once a tag is fully parsed. + * Applies lexical updates and cleans up once a tag is fully parsed. * * @since 6.2.0 * * @return void */ private function after_tag() { - $this->class_name_updates_to_attributes_updates(); - $this->apply_attributes_updates(); + $this->class_name_updates_to_attribute_updates(); + $this->attribute_updates_to_lexical_updates(); + $this->apply_lexical_updates(); $this->tag_name_starts_at = null; $this->tag_name_length = null; $this->tag_ends_at = null; @@ -1114,7 +1127,7 @@ private function after_tag() { } /** - * Converts class name updates into tag attributes updates + * Converts class name updates into tag attribute updates * (they are accumulated in different data formats for performance). * * This method is only meant to run right before the attribute updates are applied. @@ -1126,7 +1139,7 @@ private function after_tag() { * @see $classname_updates * @see $attribute_updates */ - private function class_name_updates_to_attributes_updates() { + private function class_name_updates_to_attribute_updates() { if ( count( $this->classname_updates ) === 0 || isset( $this->attribute_updates['class'] ) ) { $this->classname_updates = array(); return; @@ -1242,18 +1255,38 @@ private function class_name_updates_to_attributes_updates() { } } + /** + * Converts attribute updates into lexical updates. + * + * This method is only meant to run right before the attribute updates are applied. + * The behavior in all other cases is undefined. + * + * @return void + * @since 6.2.0 + * + * @see $attribute_updates + * @see $lexical_updates + */ + private function attribute_updates_to_lexical_updates() { + $this->lexical_updates = array_merge( + $this->lexical_updates, + array_values( $this->attribute_updates ) + ); + $this->attribute_updates = array(); + } + /** * Applies updates to attributes. * * @since 6.2.0 */ - private function apply_attributes_updates() { - if ( ! count( $this->attribute_updates ) ) { + private function apply_lexical_updates() { + if ( ! count( $this->lexical_updates ) ) { return; } /** - * Attribute updates can be enqueued in any order but as we + * Lexical updates can be enqueued in any order but as we * progress through the document to replace them we have to * make our replacements in the order in which they are found * in that document. @@ -1262,17 +1295,17 @@ private function apply_attributes_updates() { * out of order, which could otherwise lead to mangled output, * partially-duplicate attributes, and overwritten attributes. */ - usort( $this->attribute_updates, array( self::class, 'sort_start_ascending' ) ); + usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) ); - foreach ( $this->attribute_updates as $diff ) { + foreach ( $this->lexical_updates as $diff ) { $this->updated_html .= substr( $this->html, $this->updated_bytes, $diff->start - $this->updated_bytes ); $this->updated_html .= $diff->text; $this->updated_bytes = $diff->end; } - foreach ( $this->bookmarks as $bookmark ) { + foreach ( $this->bookmarks as $bookmark_name => $bookmark ) { /** - * As we loop through $this->attribute_updates, we keep comparing + * As we loop through $this->lexical_updates, we keep comparing * $bookmark->start and $bookmark->end to $diff->start. We can't * change it and still expect the correct result, so let's accumulate * the deltas separately and apply them all at once after the loop. @@ -1280,30 +1313,43 @@ private function apply_attributes_updates() { $head_delta = 0; $tail_delta = 0; - foreach ( $this->attribute_updates as $diff ) { - $update_head = $bookmark->start >= $diff->start; - $update_tail = $bookmark->end >= $diff->start; + foreach ( $this->lexical_updates as $diff ) { + $bookmark_start_is_after_diff_start = $bookmark->start >= $diff->start; + $bookmark_end_is_after_diff_end = $bookmark->end >= $diff->start; - if ( ! $update_head && ! $update_tail ) { + if ( $bookmark_start_is_after_diff_start ) { + $bookmark_end_is_before_diff_end = $bookmark->end < $diff->end; + if ( $bookmark_end_is_before_diff_end ) { + // The bookmark is fully contained within the diff. We need to invalidate it. + $this->release_bookmark( $bookmark_name ); + } + } + + if ( ! $bookmark_start_is_after_diff_start && ! $bookmark_end_is_after_diff_end ) { break; } $delta = strlen( $diff->text ) - ( $diff->end - $diff->start ); - if ( $update_head ) { + if ( $bookmark_start_is_after_diff_start ) { $head_delta += $delta; } - if ( $update_tail ) { + if ( $bookmark_end_is_after_diff_end ) { $tail_delta += $delta; } } + // Did we end up invalidating the bookmark? + if ( ! isset( $this->bookmarks[ $bookmark_name ] ) ) { + continue; + } + $bookmark->start += $head_delta; $bookmark->end += $tail_delta; } - $this->attribute_updates = array(); + $this->lexical_updates = array(); } /** @@ -1346,8 +1392,8 @@ public function seek( $bookmark_name ) { * * @since 6.2.0 * - * @param WP_HTML_Text_Replacement $a First attribute update. - * @param WP_HTML_Text_Replacement $b Second attribute update. + * @param WP_HTML_Text_Replacement $a First lexical update. + * @param WP_HTML_Text_Replacement $b Second lexical update. * @return integer */ private static function sort_start_ascending( $a, $b ) { @@ -1479,6 +1525,18 @@ public function is_tag_closer() { return $this->is_closing_tag; } + /** + * Add a lexical update, i.e. a replacement of HTML at a given position. + * + * @param int $start The start offset of the replacement. + * @param int $end The end offset of the replacement. + * @param string $text The replacement. + * @return void + */ + protected function add_lexical_update( $start, $end, $text ) { + $this->lexical_updates[] = new WP_HTML_Text_Replacement( $start, $end, $text ); + } + /** * Updates or creates a new attribute on the currently matched tag with the value passed. * @@ -1702,7 +1760,11 @@ public function __toString() { */ public function get_updated_html() { // Short-circuit if there are no new updates to apply. - if ( ! count( $this->classname_updates ) && ! count( $this->attribute_updates ) ) { + if ( + ! count( $this->classname_updates ) && + ! count( $this->attribute_updates ) && + ! count( $this->lexical_updates ) + ) { return $this->updated_html . substr( $this->html, $this->updated_bytes ); } @@ -1715,8 +1777,9 @@ public function get_updated_html() { $updated_html_up_to_current_tag_name_end = $this->updated_html . $delta_between_updated_html_end_and_current_tag_end; // 1. Apply the attributes updates to the original HTML - $this->class_name_updates_to_attributes_updates(); - $this->apply_attributes_updates(); + $this->class_name_updates_to_attribute_updates(); + $this->attribute_updates_to_lexical_updates(); + $this->apply_lexical_updates(); // 2. Replace the original HTML with the updated HTML $this->html = $this->updated_html . substr( $this->html, $this->updated_bytes ); diff --git a/phpunit/html/wp-html-processor-test.php b/phpunit/html/wp-html-processor-test.php index ae9089d761f4f..942e248a30a79 100644 --- a/phpunit/html/wp-html-processor-test.php +++ b/phpunit/html/wp-html-processor-test.php @@ -23,8 +23,10 @@ class WP_UnitTestCase extends PHPUnit\Framework\TestCase {} * @coversDefaultClass WP_HTML_Processor */ class WP_HTML_Processor_Test extends WP_UnitTestCase { + const HTML = '
outside
inside
'; + public function test_find_descendant_tag() { - $tags = new WP_HTML_Processor( '
outside
inside
' ); + $tags = new WP_HTML_Processor( self::HTML ); $tags->next_tag( 'div' ); $state = $tags->new_state(); @@ -222,4 +224,81 @@ public function test_can_scan_through_tags_at_a_given_depth() { // Did we only visit the tags inside section > * > p? $this->assertEquals( 1, $p2_count ); } + + public function test_set_content_inside_balanced_tags_on_void_element_has_no_effect() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'img' ); + $content = $tags->set_content_inside_balanced_tags( 'This is the new img content' ); + $this->assertFalse( $content ); + $this->assertSame( self::HTML, $tags->get_updated_html() ); + } + + public function test_set_content_inside_balanced_tags_sets_content_correctly() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'section' ); + $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); + $this->assertSame( '
outside
This is the new section content.
', $tags->get_updated_html() ); + } + + public function test_set_content_inside_balanced_tags_updates_bookmarks_correctly() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'div' ); + $tags->set_bookmark( 'start' ); + $tags->next_tag( 'img' ); + $this->assertSame( 'IMG', $tags->get_tag() ); + $tags->set_bookmark( 'after' ); + $tags->seek( 'start' ); + + $tags->set_content_inside_balanced_tags( 'This is the new div content.' ); + $this->assertSame( '
This is the new div content.
inside
', $tags->get_updated_html() ); + $tags->seek( 'after' ); + $this->assertSame( 'IMG', $tags->get_tag() ); + } + + public function test_set_content_inside_balanced_tags_subsequent_updates_on_the_same_tag_work() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'section' ); + $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); + $tags->set_content_inside_balanced_tags( 'This is the even newer section content.' ); + $this->assertSame( '
outside
This is the even newer section content.
', $tags->get_updated_html() ); + } + + public function test_set_content_inside_balanced_tags_followed_by_set_attribute_works() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'section' ); + $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); + $tags->set_attribute( 'id', 'thesection' ); + $this->assertSame( '
outside
This is the new section content.
', $tags->get_updated_html() ); + } + + public function test_set_content_inside_balanced_tags_preceded_by_set_attribute_works() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'section' ); + $tags->set_attribute( 'id', 'thesection' ); + $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); + $this->assertSame( '
outside
This is the new section content.
', $tags->get_updated_html() ); + } + + public function test_set_content_inside_balanced_tags_invalidates_bookmarks_that_point_to_replaced_content() { + $tags = new WP_HTML_Processor( self::HTML ); + + $tags->next_tag( 'section' ); + $tags->set_bookmark( 'start' ); + $tags->next_tag( 'img' ); + $tags->set_bookmark( 'replaced' ); + $tags->seek( 'start' ); + + $tags->set_content_inside_balanced_tags( 'This is the new section content.' ); + $this->assertSame( '
outside
This is the new section content.
', $tags->get_updated_html() ); + + $this->expectExceptionMessage( 'Invalid bookmark name' ); + $successful_seek = $tags->seek( 'replaced' ); + $this->assertFalse( $successful_seek ); + } }