Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WP_HTML_Processor: Add set_content_inside_balanced_tags #47036

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 62 additions & 17 deletions lib/experimental/html/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -135,28 +135,35 @@ public function balanced_next( WP_HTML_Processor_Scan_State $state, $query = nul
}

public function get_content_inside_balanced_tags() {
static $start_name = null;
static $end_name = null;
list( $start_name, $end_name ) = $this->get_balanced_tags();
$content = $this->get_content_inside_bookmarks( $start_name, $end_name );
$this->seek( $start_name );

if ( null === $start_name || array_key_exists( $start_name, $this->bookmarks ) ) {
$rand_id = rand( 1, PHP_INT_MAX );
$start_name = "start_{$rand_id}";
}
$this->release_bookmark( $start_name );
$this->release_bookmark( $end_name );

if ( null === $end_name || array_key_exists( $end_name, $this->bookmarks ) ) {
$rand_id = rand( 1, PHP_INT_MAX );
$end_name = "start_{$rand_id}";
return $content;
}

private function get_content_inside_bookmarks( $start_bookmark, $end_bookmark ) {
if ( ! isset( $this->bookmarks[ $start_bookmark ], $this->bookmarks[ $end_bookmark ] ) ) {
return null;
}

$this->set_bookmark( $start_name );
$start = $this->bookmarks[ $start_bookmark ];
$end = $this->bookmarks[ $end_bookmark ];

$state = self::new_state();
while ( $this->balanced_next( $state ) ) {
continue;
return substr( $this->get_updated_html(), $start->end + 1, $end->start - $start->end - 2 );
}

public function set_content_inside_balanced_tags( $content ) {
if ( self::is_html_void_element( $this->get_tag() ) ) {
return false;
}

$this->set_bookmark( $end_name );
$content = $this->content_inside_bookmarks( $start_name, $end_name );
$this->get_updated_html();
list( $start_name, $end_name ) = $this->get_balanced_tags();
$this->set_content_inside_bookmarks( $start_name, $end_name, $content );
$this->seek( $start_name );

$this->release_bookmark( $start_name );
Expand All @@ -165,15 +172,53 @@ public function get_content_inside_balanced_tags() {
return $content;
}

private function content_inside_bookmarks( $start_bookmark, $end_bookmark ) {
private function set_content_inside_bookmarks( $start_bookmark, $end_bookmark, $content ) {
if ( ! isset( $this->bookmarks[ $start_bookmark ], $this->bookmarks[ $end_bookmark ] ) ) {
return null;
}

$start = $this->bookmarks[ $start_bookmark ];
$end = $this->bookmarks[ $end_bookmark ];

return substr( $this->get_updated_html(), $start->end + 1, $end->start - $start->end - 2 );
$this->add_lexical_update( $start->end + 1, $end->start - 1, $content );
}

/**
* If on an opening tag, return a pair of bookmarks for it, and for the matching closing tag.
*
* @return array A pair of bookmarks for the current opening and matching closing tags.
*/
public function get_balanced_tags() {
static $start_name = null;
static $end_name = null;

if ( null === $start_name || array_key_exists( $start_name, $this->bookmarks ) ) {
$rand_id = rand( 1, PHP_INT_MAX );
$start_name = "start_{$rand_id}";
}

if ( null === $end_name || array_key_exists( $end_name, $this->bookmarks ) ) {
$rand_id = rand( 1, PHP_INT_MAX );
$end_name = "start_{$rand_id}";
}

$this->set_bookmark( $start_name );
$this->find_matching_closing_tag();
$this->set_bookmark( $end_name );

return array( $start_name, $end_name );
}

/**
* If on an opening tag, navigate to the matching closing tag.
*
* @return void
*/
public function find_matching_closing_tag() {
$state = self::new_state();
while ( $this->balanced_next( $state ) ) {
continue;
}
}

/*
Expand Down
111 changes: 87 additions & 24 deletions lib/experimental/html/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,18 @@ class WP_HTML_Tag_Processor {
* @since 6.2.0
* @var WP_HTML_Text_Replacement[]
*/
private $lexical_updates = array();

/**
* Attribute replacements to apply to input HTML document.
*
* Unlike more generic lexical updates, attribute updates are stored
* in an associative array, where the keys are (lowercase-normalized)
* attribute names, in order to avoid duplication.
*
* @since 6.2.0
* @var WP_HTML_Text_Replacement[]
*/
private $attribute_updates = array();

/**
Expand Down Expand Up @@ -1097,15 +1109,16 @@ private function skip_whitespace() {
}

/**
* Applies attribute updates and cleans up once a tag is fully parsed.
* Applies lexical updates and cleans up once a tag is fully parsed.
*
* @since 6.2.0
*
* @return void
*/
private function after_tag() {
$this->class_name_updates_to_attributes_updates();
$this->apply_attributes_updates();
$this->class_name_updates_to_attribute_updates();
$this->attribute_updates_to_lexical_updates();
$this->apply_lexical_updates();
$this->tag_name_starts_at = null;
$this->tag_name_length = null;
$this->tag_ends_at = null;
Expand All @@ -1114,7 +1127,7 @@ private function after_tag() {
}

/**
* Converts class name updates into tag attributes updates
* Converts class name updates into tag attribute updates
* (they are accumulated in different data formats for performance).
*
* This method is only meant to run right before the attribute updates are applied.
Expand All @@ -1126,7 +1139,7 @@ private function after_tag() {
* @see $classname_updates
* @see $attribute_updates
*/
private function class_name_updates_to_attributes_updates() {
private function class_name_updates_to_attribute_updates() {
if ( count( $this->classname_updates ) === 0 || isset( $this->attribute_updates['class'] ) ) {
$this->classname_updates = array();
return;
Expand Down Expand Up @@ -1242,18 +1255,38 @@ private function class_name_updates_to_attributes_updates() {
}
}

/**
* Converts attribute updates into lexical updates.
*
* This method is only meant to run right before the attribute updates are applied.
* The behavior in all other cases is undefined.
*
* @return void
* @since 6.2.0
*
* @see $attribute_updates
* @see $lexical_updates
*/
private function attribute_updates_to_lexical_updates() {
$this->lexical_updates = array_merge(
$this->lexical_updates,
array_values( $this->attribute_updates )
);
$this->attribute_updates = array();
}

/**
* Applies updates to attributes.
*
* @since 6.2.0
*/
private function apply_attributes_updates() {
if ( ! count( $this->attribute_updates ) ) {
private function apply_lexical_updates() {
if ( ! count( $this->lexical_updates ) ) {
return;
}

/**
* Attribute updates can be enqueued in any order but as we
* Lexical updates can be enqueued in any order but as we
* progress through the document to replace them we have to
* make our replacements in the order in which they are found
* in that document.
Expand All @@ -1262,48 +1295,61 @@ private function apply_attributes_updates() {
* out of order, which could otherwise lead to mangled output,
* partially-duplicate attributes, and overwritten attributes.
*/
usort( $this->attribute_updates, array( self::class, 'sort_start_ascending' ) );
usort( $this->lexical_updates, array( self::class, 'sort_start_ascending' ) );

foreach ( $this->attribute_updates as $diff ) {
foreach ( $this->lexical_updates as $diff ) {
$this->updated_html .= substr( $this->html, $this->updated_bytes, $diff->start - $this->updated_bytes );
$this->updated_html .= $diff->text;
$this->updated_bytes = $diff->end;
}

foreach ( $this->bookmarks as $bookmark ) {
foreach ( $this->bookmarks as $bookmark_name => $bookmark ) {
/**
* As we loop through $this->attribute_updates, we keep comparing
* As we loop through $this->lexical_updates, we keep comparing
* $bookmark->start and $bookmark->end to $diff->start. We can't
* change it and still expect the correct result, so let's accumulate
* the deltas separately and apply them all at once after the loop.
*/
$head_delta = 0;
$tail_delta = 0;

foreach ( $this->attribute_updates as $diff ) {
$update_head = $bookmark->start >= $diff->start;
$update_tail = $bookmark->end >= $diff->start;
foreach ( $this->lexical_updates as $diff ) {
$bookmark_start_is_after_diff_start = $bookmark->start >= $diff->start;
$bookmark_end_is_after_diff_end = $bookmark->end >= $diff->start;

if ( ! $update_head && ! $update_tail ) {
if ( $bookmark_start_is_after_diff_start ) {
$bookmark_end_is_before_diff_end = $bookmark->end < $diff->end;
if ( $bookmark_end_is_before_diff_end ) {
// The bookmark is fully contained within the diff. We need to invalidate it.
$this->release_bookmark( $bookmark_name );
}
}

if ( ! $bookmark_start_is_after_diff_start && ! $bookmark_end_is_after_diff_end ) {
break;
}

$delta = strlen( $diff->text ) - ( $diff->end - $diff->start );

if ( $update_head ) {
if ( $bookmark_start_is_after_diff_start ) {
$head_delta += $delta;
}

if ( $update_tail ) {
if ( $bookmark_end_is_after_diff_end ) {
$tail_delta += $delta;
}
}

// Did we end up invalidating the bookmark?
if ( ! isset( $this->bookmarks[ $bookmark_name ] ) ) {
continue;
}

$bookmark->start += $head_delta;
$bookmark->end += $tail_delta;
}

$this->attribute_updates = array();
$this->lexical_updates = array();
}

/**
Expand Down Expand Up @@ -1346,8 +1392,8 @@ public function seek( $bookmark_name ) {
*
* @since 6.2.0
*
* @param WP_HTML_Text_Replacement $a First attribute update.
* @param WP_HTML_Text_Replacement $b Second attribute update.
* @param WP_HTML_Text_Replacement $a First lexical update.
* @param WP_HTML_Text_Replacement $b Second lexical update.
* @return integer
*/
private static function sort_start_ascending( $a, $b ) {
Expand Down Expand Up @@ -1479,6 +1525,18 @@ public function is_tag_closer() {
return $this->is_closing_tag;
}

/**
* Add a lexical update, i.e. a replacement of HTML at a given position.
*
* @param int $start The start offset of the replacement.
* @param int $end The end offset of the replacement.
* @param string $text The replacement.
* @return void
*/
protected function add_lexical_update( $start, $end, $text ) {
$this->lexical_updates[] = new WP_HTML_Text_Replacement( $start, $end, $text );
}

/**
* Updates or creates a new attribute on the currently matched tag with the value passed.
*
Expand Down Expand Up @@ -1702,7 +1760,11 @@ public function __toString() {
*/
public function get_updated_html() {
// Short-circuit if there are no new updates to apply.
if ( ! count( $this->classname_updates ) && ! count( $this->attribute_updates ) ) {
if (
! count( $this->classname_updates ) &&
! count( $this->attribute_updates ) &&
! count( $this->lexical_updates )
) {
return $this->updated_html . substr( $this->html, $this->updated_bytes );
}

Expand All @@ -1715,8 +1777,9 @@ public function get_updated_html() {
$updated_html_up_to_current_tag_name_end = $this->updated_html . $delta_between_updated_html_end_and_current_tag_end;

// 1. Apply the attributes updates to the original HTML
$this->class_name_updates_to_attributes_updates();
$this->apply_attributes_updates();
$this->class_name_updates_to_attribute_updates();
$this->attribute_updates_to_lexical_updates();
$this->apply_lexical_updates();

// 2. Replace the original HTML with the updated HTML
$this->html = $this->updated_html . substr( $this->html, $this->updated_bytes );
Expand Down
Loading