Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update post grammar to include markers for inner blocks #11082

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 37 additions & 22 deletions lib/parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -259,20 +259,22 @@ private function peg_f1($pre, $bs, $post) { return peg_join_blocks( $pre, $bs, $
private function peg_f2($blockName, $a) { return $a; }
private function peg_f3($blockName, $attrs) {
return array(
'blockName' => $blockName,
'attrs' => isset( $attrs ) ? $attrs : array(),
'innerBlocks' => array(),
'innerHTML' => '',
'blockName' => $blockName,
'attrs' => isset( $attrs ) ? $attrs : array(),
'innerBlocks' => array(),
'innerHTML' => '',
'blockMarkers' => array(),
);
}
private function peg_f4($s, $children, $e) {
list( $innerHTML, $innerBlocks ) = peg_array_partition( $children, 'is_string' );
list( $innerHTML, $innerBlocks, $blockMarkers ) = peg_split_inner_content( $children );

return array(
'blockName' => $s['blockName'],
'attrs' => $s['attrs'],
'innerBlocks' => $innerBlocks,
'innerHTML' => implode( '', $innerHTML ),
'blockMarkers' => $blockMarkers,
);
}
private function peg_f5($blockName, $attrs) {
Expand Down Expand Up @@ -1441,18 +1443,31 @@ public function parse($input) {
// are the same as `json_decode`

// array arguments are backwards because of PHP
if ( ! function_exists( 'peg_array_partition' ) ) {
function peg_array_partition( $array, $predicate ) {
$truthy = array();
$falsey = array();
if ( ! function_exists( 'peg_split_inner_content' ) ) {
function peg_split_inner_content( $array ) {
$strings = array();
$blocks = array();
$markers = array();
$offset = 0;
$string = '';

foreach ( $array as $item ) {
call_user_func( $predicate, $item )
? $truthy[] = $item
: $falsey[] = $item;
if ( is_string( $item ) ) {
$string .= $item;
} else {
$offset += strlen( $string );
$strings[] = $string;
$markers[] = $offset;
$blocks[] = $item;
$string = '';
}
}

if ( $string !== '' ) {
$strings[] = $string;
}

return array( $truthy, $falsey );
return array( $strings, $blocks, $markers );
}
}

Expand All @@ -1462,10 +1477,10 @@ function peg_join_blocks( $pre, $tokens, $post ) {

if ( ! empty( $pre ) ) {
$blocks[] = array(
'blockName' => null,
'attrs' => array(),
'blockName' => null,
'attrs' => array(),
'innerBlocks' => array(),
'innerHTML' => $pre
'innerHTML' => $pre
);
}

Expand All @@ -1476,20 +1491,20 @@ function peg_join_blocks( $pre, $tokens, $post ) {

if ( ! empty( $html ) ) {
$blocks[] = array(
'blockName' => null,
'attrs' => array(),
'blockName' => null,
'attrs' => array(),
'innerBlocks' => array(),
'innerHTML' => $html
'innerHTML' => $html
);
}
}

if ( ! empty( $post ) ) {
$blocks[] = array(
'blockName' => null,
'attrs' => array(),
'blockName' => null,
'attrs' => array(),
'innerBlocks' => array(),
'innerHTML' => $post
'innerHTML' => $post
);
}

Expand Down
43 changes: 32 additions & 11 deletions packages/block-serialization-default-parser/parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,20 @@ class WP_Block_Parser_Block {
*/
public $innerHTML;

function __construct( $name, $attrs, $innerBlocks, $innerHTML ) {
$this->blockName = $name;
$this->attrs = $attrs;
$this->innerBlocks = $innerBlocks;
$this->innerHTML = $innerHTML;
/**
* Bytes into `innerHTML` where inner blocks were found, assumed UTF8 encoding
*
* @since 5.0.0
* @var int[]
*/
public $blockMarkers;

function __construct( $name, $attrs, $innerBlocks, $innerHTML, $blockMarkers ) {
$this->blockName = $name;
$this->attrs = $attrs;
$this->innerBlocks = $innerBlocks;
$this->innerHTML = $innerHTML;
$this->blockMarkers = $blockMarkers;
}
}

Expand Down Expand Up @@ -252,14 +261,14 @@ function proceed() {
) );
}

$this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '' );
$this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() );
$this->offset = $start_offset + $token_length;
return true;
}

// otherwise we found an inner block
$this->add_inner_block(
new WP_Block_Parser_Block( $block_name, $attrs, array(), '' ),
new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
$start_offset,
$token_length
);
Expand All @@ -269,7 +278,7 @@ function proceed() {
case 'block-opener':
// track all newly-opened blocks on the stack
array_push( $this->stack, new WP_Block_Parser_Frame(
new WP_Block_Parser_Block( $block_name, $attrs, array(), '' ),
new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
$start_offset,
$token_length,
$start_offset + $token_length,
Expand Down Expand Up @@ -403,10 +412,15 @@ function next_token() {
* @since 3.9.0
*
* @param string $innerHTML HTML content of block
* @return WP_Block_Parser_Block freeform block object
* @return array freeform block object
*/
static function freeform( $innerHTML ) {
return new WP_Block_Parser_Block( null, array(), array(), $innerHTML );
return array(
'blockName' => null,
'attrs' => array(),
'innerBlocks' => array(),
'innerHTML' => $innerHTML,
);
}

/**
Expand Down Expand Up @@ -440,8 +454,15 @@ function add_freeform( $length = null ) {
*/
function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) {
$parent = $this->stack[ count( $this->stack ) - 1 ];

$next_html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );
$prev_length = ! empty( $parent->block->blockMarkers )
? $parent->block->blockMarkers[ count( $parent->block->blockMarkers ) - 1 ]
: 0;

$parent->block->innerBlocks[] = $block;
$parent->block->innerHTML .= substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );
$parent->block->blockMarkers[] = $prev_length + strlen( $next_html );
$parent->block->innerHTML .= $next_html;
$parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length;
}

Expand Down
81 changes: 71 additions & 10 deletions packages/block-serialization-default-parser/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,23 @@ let output;
let stack;
const tokenizer = /<!--\s+(\/)?wp:([a-z][a-z0-9_-]*\/)?([a-z][a-z0-9_-]*)\s+({(?:(?!}\s+-->)[^])+?}\s+)?(\/)?-->/g;

function Block( blockName, attrs, innerBlocks, innerHTML ) {
function Block( blockName, attrs, innerBlocks, innerHTML, blockMarkers ) {
return {
blockName,
attrs,
innerBlocks,
innerHTML,
blockMarkers,
};
}

function Freeform( innerHTML ) {
return Block( null, {}, [], innerHTML );
return {
blockName: null,
attrs: {},
innerBlocks: [],
innerHTML,
};
}

function Frame( block, tokenStart, tokenLength, prevOffset, leadingHtmlStart ) {
Expand Down Expand Up @@ -84,14 +90,14 @@ function proceed() {
if ( null !== leadingHtmlStart ) {
output.push( Freeform( document.substr( leadingHtmlStart, startOffset - leadingHtmlStart ) ) );
}
output.push( Block( blockName, attrs, [], '' ) );
output.push( Block( blockName, attrs, [], '', [] ) );
offset = startOffset + tokenLength;
return true;
}

// otherwise we found an inner block
addInnerBlock(
Block( blockName, attrs, [], '' ),
Block( blockName, attrs, [], '', [] ),
startOffset,
tokenLength,
);
Expand All @@ -102,7 +108,7 @@ function proceed() {
// track all newly-opened blocks on the stack
stack.push(
Frame(
Block( blockName, attrs, [], '' ),
Block( blockName, attrs, [], '', [] ),
startOffset,
tokenLength,
startOffset + tokenLength,
Expand Down Expand Up @@ -227,13 +233,68 @@ function addFreeform( rawLength ) {
output.push( Freeform( document.substr( offset, length ) ) );
}

/**
* Returns bytes required to represent given string in UTF8
*
* Assumes input is encoded in UCS2 or UTF16 according to the ECMAScript spec
* @see: https://www.ecma-international.org/ecma-262/9.0/index.html#sec-ecmascript-language-types-string-type
*
* Transparently counts bytes for invalid encodings:
* e.g. unpaired surrogate pair characters count as three bytes
*
* @cite: https://stackoverflow.com/a/34920444
*
* @param {string} s input string
* @return {number} how many bytes are in the UTF8 representation of the given string
*/
function utf8bytes( s ) {
let n = 0;

for ( let i = 0, l = s.length; i < l; i++ ) {
const hi = s.charCodeAt( i );

if ( hi < 0x0080 ) { // [0x0000, 0x007F]
n += 1;
} else if ( hi < 0x0800 ) { // [0x0080, 0x07FF]
n += 2;
} else if ( hi < 0xD800 ) { // [0x0800, 0xD7FF]
n += 3;
} else if ( hi < 0xDC00 ) { // [0xD800, 0xDBFF]
const lo = s.charCodeAt( ++i );

if ( i < l && lo >= 0xDC00 && lo <= 0xDFFF ) { //followed by [0xDC00, 0xDFFF]
n += 4;
} else {
// this is an invalid string with an unpaired surrogate.
// transparently pass it through for byte counts
// and back up to restart processing at the next character.
n += 3;
i -= 1;
}
} else if ( hi < 0xE000 ) { //[0xDC00, 0xDFFF]
// these are invalid encodings in the Unicode standard
// because they are reserved for encoding surrogate pairs.
// transparently pass them through here for byte counts.
n += 3;
} else { // [0xE000, 0xFFFF]
n += 3;
}
}

return n;
}

function addInnerBlock( block, tokenStart, tokenLength, lastOffset ) {
const parent = stack[ stack.length - 1 ];
parent.block.innerBlocks.push( block );
parent.block.innerHTML += document.substr(
parent.prevOffset,
tokenStart - parent.prevOffset,
);
const parentBlock = parent.block;
const blockMarkers = parentBlock.blockMarkers;

const nextHTML = document.substr( parent.prevOffset, tokenStart - parent.prevOffset );
const prevLength = blockMarkers.length ? blockMarkers[ blockMarkers.length - 1 ] : 0;

parentBlock.innerBlocks.push( block );
blockMarkers.push( prevLength + utf8bytes( nextHTML ) );
parentBlock.innerHTML += nextHTML;
parent.prevOffset = lastOffset ? lastOffset : tokenStart + tokenLength;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ exports[`block-serialization-default-parser-js basic parsing parse() works prope
Array [
Object {
"attrs": Object {},
"blockMarkers": Array [],
"blockName": "core/more",
"innerBlocks": Array [],
"innerHTML": "<!--more-->",
Expand All @@ -15,6 +16,7 @@ exports[`block-serialization-default-parser-php basic parsing parse() works prop
Array [
Object {
"attrs": Object {},
"blockMarkers": Array [],
"blockName": "core/more",
"innerBlocks": Array [],
"innerHTML": "<!--more-->",
Expand Down
Loading