Skip to content

[Data Liberation] Build markdown importer as phar #2094

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?php

use KevinGH\Box\Compactor\Compactor;

class DataLiberationBoxCompactor implements Compactor
{
/**
* {@inheritdoc}
*/
public function compact(string $file, string $contents): string
{
if (!preg_match('/\.(php|json|lock)$/', $file)) {
return '';
}

if (
str_contains($file, 'platform_check.php') ||
str_contains($file, '/tests/') ||
str_contains($file, '/.git/') ||
str_contains($file, '/.github/') ||
str_contains($file, '/bin/')
) {
return '';
}

if( str_contains($contents, 'Your Composer dependencies require ') ) {
return '';
}


return $contents;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?php

$box_base_path = dirname(getenv('BOX_BASE_PATH'));
require_once $box_base_path . '/../autoload.php';
require_once __DIR__ . '/DataLiberationBoxCompactor.php';
require_once $box_base_path . '/box';
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?php
require_once __DIR__ . '/../../bootstrap.php';
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

require_once __DIR__ . '/../../../data-liberation/dist/data-liberation-core.phar.gz';
require_once __DIR__ . '/../../dist/data-liberation-markdown.phar';

/**
* None of this will actually try to parse a file or import
* any data. We're just making sure the importer can
* be created without throwing an exception.
*/
$markdown_root = __DIR__ . '/markdown-test-data';
$c = WP_Markdown_Importer::create_for_markdown_directory(
$markdown_root,
array(
'source_site_url' => 'file://' . $markdown_root,
'local_markdown_assets_root' => $markdown_root,
'local_markdown_assets_url_prefix' => '@site/',
),
$import['cursor'] ?? null
);

echo 'Markdown importer created!';

Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?php

$file = $argv[1];
$phar = new Phar($file);
$phar->startBuffering();


/**
* Box, includes an autoloader with a fixed name in every build.
* However, we want to load two .phar files built with Box, not
* one. Unfortunately this yields an error:
*
* Cannot declare class ComposerAutoloaderInitHumbugBox451
*
* Therefore, we're giving all the HumbugBox classes a unique suffix.
*/
$autoloadSuffix = substr(md5(__FILE__), 0, 8);
foreach (new RecursiveIteratorIterator($phar) as $file) {
if(!$file->isFile()) {
continue;
}
$relativePath = $file->getPathname();
$relativePath = str_replace('phar://', '', $relativePath);
$relativePath = str_replace($phar->getPath().'/', '', $relativePath);
$contents = $file->getContent();
$updated_contents = $contents;
foreach([
'InitHumbugBox',
] as $class) {
$updated_contents = str_replace($class, $class . $autoloadSuffix, $updated_contents);
}
if($updated_contents !== $contents) {
$phar[$relativePath] = $updated_contents;
}
}

/**
* Box, very annoyingly, force-adds a platform_check.php file
* into the final built .phar archive. The vendor libraries
* do work with a PHP version lower than 8.1 enforced by that
* platform_check.php file, so let's just truncate it.
*/
$phar['vendor/composer/platform_check.php'] = '';
$phar['.box/bin/check-requirements.php'] = '';
$phar->stopBuffering();

12 changes: 12 additions & 0 deletions packages/playground/data-liberation-markdown/box.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"$schema": "https://raw.githubusercontent.com/box-project/box/refs/heads/main/res/schema.json",
"main": "src/bootstrap.php",
"output": "dist/data-liberation-markdown.phar",
"force-autodiscovery": true,
"compactors": [
"KevinGH\\Box\\Compactor\\Php",
"DataLiberationBoxCompactor"
],
"annotations": false,
"directories": ["src/", "vendor/"]
}
1 change: 1 addition & 0 deletions packages/playground/data-liberation-markdown/composer.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"name": "wordpress/data-liberation-markdown",
"name": "wordpress/data-liberation-markdown",
"prefer-stable": true,
"require": {
Expand Down
Empty file.
Binary file not shown.
26 changes: 26 additions & 0 deletions packages/playground/data-liberation-markdown/phar-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

# Builds the standalone dist/core-data-liberation.phar.gz file meant for
# use in the importWxr Blueprint step.
#
# This is a temporary measure until we have a canonical way of distributing,
# versioning, and using the Data Liberation modules and their dependencies.
# Possible solutions might include composer packages, WordPress plugins, or
# tree-shaken zip files with each module and its composer deps.

set -e
echo "Building data liberation plugin"
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
DATA_LIBERATION_DIR=$SCRIPT_DIR
BUILD_DIR=$DATA_LIBERATION_DIR/bin/build
DIST_DIR=$DATA_LIBERATION_DIR/dist

rm $DIST_DIR/* > /dev/null 2>&1 || true
export BOX_BASE_PATH=$(type -a box | grep -v 'alias' | awk '{print $3}')
php $BUILD_DIR/box.php compile -d $DATA_LIBERATION_DIR -c $DATA_LIBERATION_DIR/box.json
php -d 'phar.readonly=0' $BUILD_DIR/truncate-composer-checks.php $DIST_DIR/data-liberation-markdown.phar
php $BUILD_DIR/smoke-test.php
PHP=8.0 bun $DATA_LIBERATION_DIR/../../php-wasm/cli/src/main.ts $BUILD_DIR/smoke-test.php
cd $DIST_DIR
gzip data-liberation-markdown.phar
ls -sgh $DIST_DIR
39 changes: 39 additions & 0 deletions packages/playground/data-liberation-markdown/phpcs.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<ruleset name="WordPressStandard">
<description>PHP 7.0 compatibility.</description>
<config name="testVersion" value="7.2"/>
<exclude-pattern>vendor/*</exclude-pattern>
<rule ref="PHPCompatibility">
<exclude name="PHPCompatibility.Keywords.ForbiddenNamesAsDeclared"/>
</rule>
<rule ref="WordPress-Core">
<exclude name="Generic.Commenting.DocComment.MissingShort"/>
<exclude name="Generic.PHP.DiscourageGoto.Found"/>
<exclude name="Generic.CodeAnalysis.EmptyStatement.DetectedIf"/>
<!-- Unused arguments are necessary when inheriting from classes and overriding methods. -->
<exclude name="Generic.CodeAnalysis.UnusedFunctionParameter.Found"/>
<exclude name="Squiz.PHP.NonExecutableCode.Unreachable"/>
<exclude name="Squiz.Commenting.BlockComment.CloserSameLine"/>
<exclude name="Squiz.Commenting.ClassComment.Missing"/>
<exclude name="Squiz.Commenting.FileComment.WrongStyle"/>
<exclude name="Squiz.Commenting.FileComment.Missing"/>
<exclude name="Squiz.Commenting.FunctionComment.Missing"/>
<exclude name="Squiz.Commenting.FunctionComment.MissingParamTag"/>
<exclude name="Squiz.Commenting.FunctionComment.MissingParamType"/>
<exclude name="Squiz.Commenting.FunctionComment.MissingParamComment"/>
<exclude name="Squiz.Commenting.VariableComment.Missing"/>
<exclude name="Squiz.PHP.CommentedOutCode.Found"/>
<!-- "Parameter comment must end with a full stop" is such a pebble in the shoe. -->
<exclude name="Squiz.Commenting.FunctionComment.ParamCommentFullStop"/>
<exclude name="Squiz.PHP.DisallowSizeFunctionsInLoops.Found"/>
<!-- Aligning the 1500 lines of public_suffix_list.php adds a lot of unnecessary noise and then
the actual indentation is not even correct because the rule seems to cound bytes, not printable
UTF-8 characteds. -->
<exclude name="WordPress.Arrays.MultipleStatementAlignment.DoubleArrowNotAligned"/>
<exclude name="WordPress.Files.FileName.InvalidClassFileName"/>
<exclude name="WordPress.Files.FileName.NotHyphenatedLowercase"/>
<exclude name="WordPress.PHP.YodaConditions.NotYoda"/>
<exclude name="WordPress.Security.EscapeOutput.OutputNotEscaped"/>
<exclude name="WordPress.WP.AlternativeFunctions"/>
<exclude name="WordPress.WP.AlternativeFunctions.file_system_operations_fclose"/>
</rule>
</ruleset>
45 changes: 45 additions & 0 deletions packages/playground/data-liberation-markdown/project.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"name": "playground-data-liberation-markdown",
"$schema": "../../../node_modules/nx/schemas/project-schema.json",
"sourceRoot": "packages/playground/data-liberation-markdown",
"projectType": "library",
"targets": {
"install": {
"executor": "nx:run-commands",
"options": {
"cwd": "packages/playground/data-liberation-markdown",
"commands": ["composer install"],
"parallel": false
}
},
"build:phar": {
"executor": "nx:run-commands",
"options": {
"cwd": "packages/playground/data-liberation-markdown",
"commands": ["bash ./phar-build.sh"],
"parallel": false
},
"dependsOn": ["playground-data-liberation:build:phar"]
},
"lint:php": {
"executor": "nx:run-commands",
"options": {
"cwd": "packages/playground/data-liberation-markdown",
"commands": [
"../data-liberation/vendor/bin/phpcs --standard=./phpcs.xml -s ./src ./*.php"
],
"parallel": false
}
},
"lint:php:fix": {
"executor": "nx:run-commands",
"options": {
"cwd": "packages/playground/data-liberation-markdown",
"commands": [
"../data-liberation/vendor/bin/phpcbf --standard=./phpcs.xml ./src"
],
"parallel": false
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ public static function create_for_markdown_directory( $markdown_directory, $opti
return WP_Markdown_Importer::create(
function ( $cursor = null ) use ( $markdown_directory ) {
// @TODO: Handle $cursor
return new WP_Directory_Tree_Entity_Reader(
return new WP_Directory_Tree_Entity_Reader(
new WP_Filesystem(),
array (
array(
'root_dir' => $markdown_directory,
'first_post_id' => 1,
'allowed_extensions' => array( 'md' ),
'index_file_patterns' => array( '#^index\.md$#' ),
'markup_converter_factory' => function( $content ) {
'markup_converter_factory' => function ( $content ) {
return new WP_Markdown_To_Blocks( $content );
},
)
Expand All @@ -42,7 +42,7 @@ protected static function parse_options( $options ) {
return false;
}
$options['local_markdown_assets_root'] = rtrim( $options['local_markdown_assets_root'], '/' );

return parent::parse_options( $options );
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ private function convert_markdown_to_blocks() {
$parser = new MarkdownParser( $environment );

$document = $parser->parse( $this->markdown );
$this->frontmatter = [];
foreach( $document->data as $key => $value ) {
$this->frontmatter = array();
foreach ( $document->data as $key => $value ) {
// Use an array as a value to comply with the WP_Block_Markup_Converter interface.
$this->frontmatter[ $key ] = [$value];
$this->frontmatter[ $key ] = array( $value );
}

$walker = $document->walker();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
require_once __DIR__ . '/WP_Markdown_Importer.php';
require_once __DIR__ . '/WP_Markdown_To_Blocks.php';

require_once __DIR__ . '/../vendor/autoload.php';
require_once __DIR__ . '/../vendor/autoload.php';
2 changes: 1 addition & 1 deletion packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
require_once __DIR__ . '/src/wordpress-core-html-api/html5-named-character-references.php';
}

require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Converter.php';
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Processor.php';
require_once __DIR__ . '/src/block-markup/WP_Block_Markup_Url_Processor.php';
require_once __DIR__ . '/src/block-markup/WP_URL_In_Text_Processor.php';
Expand All @@ -63,7 +64,6 @@
require_once __DIR__ . '/src/import/WP_Stream_Importer.php';
require_once __DIR__ . '/src/import/WP_Entity_Iterator_Chain.php';
require_once __DIR__ . '/src/import/WP_Retry_Frontloading_Iterator.php';
require_once __DIR__ . '/src/import/WP_Markdown_Importer.php';

require_once __DIR__ . '/src/utf8_decoder.php';

Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?php

interface WP_Block_Markup_Converter {
public function convert();
public function get_block_markup();
public function get_all_metadata();
public function get_meta_value( $key );
}

This file was deleted.

Loading