Skip to content

Commit 3443569

Browse files
authored
[Data Liberation] Block markup consumers and producers (#2121)
A part of #1894 Introduces a standardized API for converting between static data formats and blocks+metadata. * The `data format -> blocks+metadata` operation is represented by the WP_Data_Format_Consumer interface * The `blocks+metadata -> data format` operation is represented by the WP_Data_Format_Producer interface This PR also ships a few initial consumers and producers: * `WP_Annotated_Block_Markup_Consumer` – for consuming static block markup with `<meta>` tags. * `WP_Markup_Processor_Consumer` – for consuming an HTML/XHTML markup processor instance. It handles just the regular HTML/XHTML markup, not block markup. * `WP_Annotated_Block_Markup_Producer` – for serializing block markup + metadata array as block markup with `<meta>` tags ## Example The two-way conversion pipeline shipped in this PR goes between this: ```php $block_markup = <<<BLOCKS <!-- wp:paragraph --> <p>Hello <b>world</b>!</p> <!-- /wp:paragraph --> BLOCKS; $metadata = array( 'post_title' => array( 'My first post' ), ); ``` And this: ```html <meta name="post_title" content="My first post"> <!-- wp:paragraph --> <p>Hello <b>world</b>!</p> <!-- /wp:paragraph --> ``` ## Other changes This PR also ships the block parser from WordPress core to enable running unit tests – we need to call `parse_blocks()` now. ## Testing The code isn't used anywhere yet – just rely on the CI.
1 parent cbd8ea4 commit 3443569

23 files changed

+3965
-181
lines changed

packages/playground/data-liberation-markdown/src/WP_Markdown_To_Blocks.php renamed to packages/playground/data-liberation-markdown/src/WP_Markdown_Consumer.php

+9-29
Original file line numberDiff line numberDiff line change
@@ -21,46 +21,26 @@
2121
use League\CommonMark\Extension\Table\TableRow;
2222
use League\CommonMark\Extension\Table\TableSection;
2323

24-
class WP_Markdown_To_Blocks implements WP_Block_Markup_Converter {
25-
const STATE_READY = 'STATE_READY';
26-
const STATE_COMPLETE = 'STATE_COMPLETE';
27-
28-
private $state = self::STATE_READY;
24+
class WP_Markdown_Consumer implements WP_Data_Format_Consumer {
2925
private $root_block;
3026
private $block_stack = array();
3127
private $current_block = null;
3228

3329
private $frontmatter = array();
3430
private $markdown;
3531
private $parsed_blocks = array();
36-
private $block_markup = '';
32+
private $parsed;
3733

3834
public function __construct( $markdown ) {
3935
$this->markdown = $markdown;
4036
}
4137

42-
public function convert() {
43-
if ( self::STATE_READY !== $this->state ) {
44-
return false;
45-
}
46-
$this->convert_markdown_to_blocks();
47-
$this->block_markup = WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks );
48-
return true;
49-
}
50-
51-
public function get_all_metadata() {
52-
return $this->frontmatter;
53-
}
54-
55-
public function get_first_meta_value( $key ) {
56-
if ( ! array_key_exists( $key, $this->frontmatter ) ) {
57-
return null;
58-
}
59-
return $this->frontmatter[ $key ][0];
60-
}
61-
62-
public function get_block_markup() {
63-
return $this->block_markup;
38+
public function consume() {
39+
if( ! $this->parsed ) {
40+
$this->convert_markdown_to_blocks();
41+
$this->parsed = new WP_Blocks_With_Metadata( WP_Import_Utils::convert_blocks_to_markup( $this->parsed_blocks ), $this->frontmatter );
42+
}
43+
return $this->parsed;
6444
}
6545

6646
private function convert_markdown_to_blocks() {
@@ -82,7 +62,7 @@ private function convert_markdown_to_blocks() {
8262
$document = $parser->parse( $this->markdown );
8363
$this->frontmatter = array();
8464
foreach ( $document->data as $key => $value ) {
85-
// Use an array as a value to comply with the WP_Block_Markup_Converter interface.
65+
// Use an array as a value to comply with the WP_Data_Format_Consumer interface.
8666
$this->frontmatter[ $key ] = array( $value );
8767
}
8868

packages/playground/data-liberation-markdown/src/WP_Markdown_Importer.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ function ( $cursor = null ) use ( $markdown_directory ) {
1616
'allowed_extensions' => array( 'md' ),
1717
'index_file_patterns' => array( '#^index\.md$#' ),
1818
'markup_converter_factory' => function ( $content ) {
19-
return new WP_Markdown_To_Blocks( $content );
19+
return new WP_Markdown_Consumer( $content );
2020
},
2121
)
2222
);
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<?php
22

33
require_once __DIR__ . '/WP_Markdown_Importer.php';
4-
require_once __DIR__ . '/WP_Markdown_To_Blocks.php';
4+
require_once __DIR__ . '/WP_Markdown_Consumer.php';
55

66
require_once __DIR__ . '/../vendor/autoload.php';

packages/playground/data-liberation/bootstrap.php

+10-2
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,22 @@
5252
require_once __DIR__ . '/src/wordpress-core-html-api/html5-named-character-references.php';
5353
}
5454

55-
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Converter.php';
55+
require_once __DIR__ . '/src/Data_Liberation_Exception.php';
56+
require_once __DIR__ . '/src/data-format-consumers/WP_Blocks_With_Metadata.php';
57+
require_once __DIR__ . '/src/data-format-consumers/WP_Data_Format_Consumer.php';
58+
require_once __DIR__ . '/src/data-format-consumers/WP_Markup_Processor_Consumer.php';
59+
require_once __DIR__ . '/src/data-format-consumers/WP_Annotated_Block_Markup_Consumer.php';
60+
61+
require_once __DIR__ . '/src/data-format-producers/WP_Data_Format_Producer.php';
62+
require_once __DIR__ . '/src/data-format-producers/WP_Annotated_Block_Markup_Producer.php';
63+
5664
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Processor.php';
5765
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Url_Processor.php';
5866
require_once __DIR__ . '/src/block-markup/WP_URL_In_Text_Processor.php';
5967
require_once __DIR__ . '/src/block-markup/WP_URL.php';
60-
require_once __DIR__ . '/src/block-markup/WP_HTML_To_Blocks.php';
6168

6269
require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader.php';
70+
require_once __DIR__ . '/src/entity-readers/WP_Blocks_With_Metadata_Entity_Reader.php';
6371
require_once __DIR__ . '/src/entity-readers/WP_HTML_Entity_Reader.php';
6472
require_once __DIR__ . '/src/entity-readers/WP_EPub_Entity_Reader.php';
6573
require_once __DIR__ . '/src/entity-readers/WP_WXR_Entity_Reader.php';

packages/playground/data-liberation/phpunit.xml

+1-2
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
<testsuites>
44
<testsuite name="Application Test Suite">
55
<file>tests/WPHTMLEntityReaderTests.php</file>
6-
<file>tests/WPHTMLToBlocksTests.php</file>
76
<file>tests/WPWXRReaderTests.php</file>
87
<file>tests/WPRewriteUrlsTests.php</file>
9-
<file>tests/WPHTMLToBlocksTests.php</file>
8+
<file>tests/WPMarkupProcessorConsumerTests.php</file>
109
<file>tests/WPHTMLEntityReaderTests.php</file>
1110
<file>tests/WPURLInTextProcessorTests.php</file>
1211
<file>tests/WPBlockMarkupProcessorTests.php</file>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<?php
2+
3+
/**
4+
* Represents an error that occurs during the data liberation process.
5+
*/
6+
class Data_Liberation_Exception extends RuntimeException {
7+
8+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
<?php
2+
3+
class WP_Data_Liberation_HTML_Processor extends WP_HTML_Processor {
4+
5+
public function get_inner_html() {
6+
if ( '#tag' !== $this->get_token_type() ) {
7+
return false;
8+
}
9+
10+
if ( $this->is_tag_closer() ) {
11+
return false;
12+
}
13+
14+
if ( false === WP_HTML_Tag_Processor::set_bookmark( 'tag-start' ) ) {
15+
return false;
16+
}
17+
18+
$this->skip_to_closer();
19+
20+
if ( false === WP_HTML_Tag_Processor::set_bookmark( 'tag-end' ) ) {
21+
WP_HTML_Tag_Processor::release_bookmark( 'tag-start' );
22+
return false;
23+
}
24+
25+
$inner_html_start = $this->bookmarks['tag-start']->start + $this->bookmarks['tag-start']->length;
26+
$inner_html_end = $this->bookmarks['tag-end']->start - $inner_html_start;
27+
28+
WP_HTML_Tag_Processor::seek( 'tag-start' );
29+
WP_HTML_Tag_Processor::release_bookmark( 'tag-start' );
30+
WP_HTML_Tag_Processor::release_bookmark( 'tag-end' );
31+
32+
return substr(
33+
$this->html,
34+
$inner_html_start,
35+
$inner_html_end
36+
);
37+
}
38+
39+
public function skip_to_closer() {
40+
$starting_depth = $this->get_current_depth();
41+
while ( $this->next_token() ) {
42+
if (
43+
$this->get_token_type() === '#tag' &&
44+
$this->is_tag_closer() &&
45+
$this->get_current_depth() === $starting_depth - 1
46+
) {
47+
return true;
48+
}
49+
}
50+
51+
return false;
52+
}
53+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
<?php
2+
/**
3+
* Converts a metadata-annotated block markup into block markup+metadata pair.
4+
*
5+
* Example:
6+
*
7+
* <meta name="post_title" content="My first post">
8+
* <!-- wp:paragraph {"className":"my-class"} -->
9+
* <p class="my-class">Hello world!</p>
10+
* <!-- /wp:paragraph -->
11+
*
12+
* Becomes:
13+
*
14+
* <!-- wp:paragraph -->
15+
* <p>Hello <b>world</b>!</p>
16+
* <!-- /wp:paragraph -->
17+
*
18+
* With the following metadata:
19+
*
20+
* array(
21+
* 'post_title' => array( 'My first post' ),
22+
* )
23+
*/
24+
class WP_Annotated_Block_Markup_Consumer implements WP_Data_Format_Consumer {
25+
26+
/**
27+
* @var string
28+
*/
29+
private $original_html;
30+
31+
/**
32+
* @var WP_Consumed_Block_Markup
33+
*/
34+
private $result;
35+
36+
public function __construct( $original_html ) {
37+
$this->original_html = $original_html;
38+
}
39+
40+
public function consume() {
41+
if ( ! $this->result ) {
42+
$block_markup = '';
43+
$metadata = array();
44+
foreach ( parse_blocks( $this->original_html ) as $block ) {
45+
if ( $block['blockName'] === null ) {
46+
$html_converter = new WP_Markup_Processor_Consumer( WP_HTML_Processor::create_fragment( $block['innerHTML'] ) );
47+
$result = $html_converter->consume();
48+
$block_markup .= $result->get_block_markup() . "\n";
49+
$metadata = array_merge( $metadata, $result->get_all_metadata() );
50+
} else {
51+
$block_markup .= serialize_block( $block ) . "\n";
52+
}
53+
}
54+
$this->result = new WP_Blocks_With_Metadata(
55+
$block_markup,
56+
$metadata
57+
);
58+
}
59+
60+
return $this->result;
61+
}
62+
}
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,39 @@
11
<?php
2-
32
/**
4-
* Represents a {Data Format} -> Block Markup + Metadata converter.
5-
*
6-
* Used by the Data Liberation importers to accept data formatted as HTML, Markdown, etc.
7-
* and convert them to WordPress posts.
3+
* Represents the result of a {data format} -> block markup conversion.
84
*/
9-
interface WP_Block_Markup_Converter {
10-
/**
11-
* Converts the input document specified in the constructor to block markup.
12-
*
13-
* @return bool Whether the conversion was successful.
14-
*/
15-
public function convert();
5+
class WP_Blocks_With_Metadata {
6+
7+
private $block_markup;
8+
private $metadata;
9+
10+
public function __construct( $block_markup, $metadata = array() ) {
11+
$this->block_markup = $block_markup;
12+
$this->metadata = $metadata;
13+
}
1614

1715
/**
18-
* Gets the block markup generated by the convert() method.
16+
* Gets the first metadata value for a given key.
1917
*
20-
* @return string The block markup.
18+
* Example:
19+
*
20+
* Metadata:
21+
* array(
22+
* 'post_title' => array( 'The Name of the Wind' ),
23+
* 'post_author' => array( 'Patrick Rothfuss', 'Betsy Wollheim' )
24+
* )
25+
*
26+
* get_first_meta_value( 'post_author' ) returns 'Patrick Rothfuss'.
27+
*
28+
* @param string $key The metadata key.
29+
* @return mixed The metadata value.
2130
*/
22-
public function get_block_markup();
31+
public function get_first_meta_value( $key ) {
32+
if ( ! array_key_exists( $key, $this->metadata ) ) {
33+
return null;
34+
}
35+
return $this->metadata[ $key ][0];
36+
}
2337

2438
/**
2539
* Gets all the metadata sourced from the input document by the convert() method.
@@ -35,23 +49,16 @@ public function get_block_markup();
3549
*
3650
* @return array The metadata sourced from the input document.
3751
*/
38-
public function get_all_metadata();
52+
public function get_all_metadata() {
53+
return $this->metadata;
54+
}
3955

4056
/**
41-
* Gets the first metadata value for a given key.
42-
*
43-
* Example:
44-
*
45-
* Metadata:
46-
* array(
47-
* 'post_title' => array( 'The Name of the Wind' ),
48-
* 'post_author' => array( 'Patrick Rothfuss', 'Betsy Wollheim' )
49-
* )
50-
*
51-
* get_first_meta_value( 'post_author' ) returns 'Patrick Rothfuss'.
57+
* Gets the block markup generated by the convert() method.
5258
*
53-
* @param string $key The metadata key.
54-
* @return mixed The metadata value.
59+
* @return string The block markup.
5560
*/
56-
public function get_first_meta_value( $key );
61+
public function get_block_markup() {
62+
return $this->block_markup;
63+
}
5764
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<?php
2+
3+
/**
4+
* Represents a {Data Format} -> Block Markup + Metadata consumer.
5+
*
6+
* Used by the Data Liberation importers to accept data formatted as HTML, Markdown, etc.
7+
* and convert them to WordPress posts.
8+
*/
9+
interface WP_Data_Format_Consumer {
10+
/**
11+
* Converts the input document specified in the constructor to block markup.
12+
*
13+
* @return WP_Blocks_With_Metadata The consumed block markup and metadata.
14+
*/
15+
public function consume();
16+
}

0 commit comments

Comments
 (0)