wp-md-rest-import/src/BlocksConverter.php

421 lines
14 KiB
PHP
Raw Normal View History

2025-03-24 13:54:35 +01:00
<?php
namespace MarkdownParserWP;
/**
* Markdown to Gutenberg Blocks Converter
*/
class BlocksConverter {
/**
* Convert markdown content to Gutenberg blocks
*
* @param string $markdown Markdown content
* @param bool $import_images Whether to import images to media library
* @param string $original_url Original markdown URL for resolving relative paths
* @return string Gutenberg blocks content
*/
public static function convert_markdown_to_blocks($markdown, $import_images = true, $original_url = '') {
// Parse markdown to HTML using Parsedown
$parsedown = new \Parsedown();
$html = $parsedown->text($markdown);
// Process images if needed
if ($import_images) {
$html = self::process_images_in_html($html, $original_url);
}
// Convert HTML to blocks
return self::html_to_blocks($html);
}
/**
* Process images in HTML to import them to media library
*
* @param string $html HTML content
* @param string $original_url Original markdown URL for resolving relative paths
* @return string Processed HTML
*/
private static function process_images_in_html($html, $original_url = '') {
// Use DOMDocument to find and process images
$dom = new \DOMDocument();
@$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
$images = $dom->getElementsByTagName('img');
$updated = false;
// Process each image
foreach ($images as $img) {
$src = $img->getAttribute('src');
// Skip data URLs
if (strpos($src, 'data:') === 0) {
continue;
}
// Handle relative URLs
if (!filter_var($src, FILTER_VALIDATE_URL) && !empty($original_url)) {
// If the src doesn't start with http/https, it's likely a relative path
if (strpos($src, 'http') !== 0) {
$base_url = dirname($original_url) . '/';
$src = $base_url . ltrim($src, '/');
}
}
// Import external image
$attachment_id = PostCreator::import_external_image($src);
if ($attachment_id && !is_wp_error($attachment_id)) {
// Get new image URL
$new_src = wp_get_attachment_url($attachment_id);
if ($new_src) {
$img->setAttribute('src', $new_src);
$img->setAttribute('data-attachment-id', $attachment_id);
$updated = true;
}
}
}
if ($updated) {
$html = $dom->saveHTML();
}
return $html;
}
/**
* Convert HTML to Gutenberg blocks
*
* @param string $html HTML content
* @return string Gutenberg blocks content
*/
private static function html_to_blocks($html) {
// Create a DOM document
$dom = new \DOMDocument();
@$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
$xpath = new \DOMXPath($dom);
// Get the body element
$body = $dom->getElementsByTagName('body')->item(0);
// Process each child node of the body
$blocks = [];
if ($body) {
foreach ($body->childNodes as $node) {
if ($node->nodeType === XML_ELEMENT_NODE) {
$block = self::node_to_block($node, $xpath);
if ($block) {
$blocks[] = $block;
}
}
}
}
return implode("\n\n", $blocks);
}
/**
* Convert a DOM node to a Gutenberg block
*
* @param DOMNode $node The DOM node
* @param DOMXPath $xpath XPath object for querying
* @return string|null Gutenberg block or null if not convertible
*/
private static function node_to_block($node, $xpath) {
switch ($node->nodeName) {
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
$level = (int) substr($node->nodeName, 1);
return self::create_heading_block($node->textContent, $level);
case 'p':
// Check if paragraph contains only an image
$images = $node->getElementsByTagName('img');
if ($images->length === 1 && $node->childNodes->length === 1) {
$img = $images->item(0);
return self::create_image_block(
$img->getAttribute('src'),
$img->getAttribute('alt'),
$img->getAttribute('data-attachment-id')
);
} else {
// Regular paragraph with possible inline elements
$innerHTML = '';
foreach ($node->childNodes as $child) {
$innerHTML .= $node->ownerDocument->saveHTML($child);
}
return self::create_paragraph_block($innerHTML);
}
case 'ul':
case 'ol':
$items = [];
$list_items = $node->getElementsByTagName('li');
foreach ($list_items as $item) {
$innerHTML = '';
foreach ($item->childNodes as $child) {
$innerHTML .= $node->ownerDocument->saveHTML($child);
}
$items[] = $innerHTML;
}
return self::create_list_block($items, $node->nodeName === 'ol');
case 'blockquote':
$innerHTML = '';
foreach ($node->childNodes as $child) {
$innerHTML .= $node->ownerDocument->saveHTML($child);
}
return self::create_quote_block($innerHTML);
case 'pre':
$code = $node->getElementsByTagName('code')->item(0);
if ($code) {
return self::create_code_block($code->textContent);
}
return self::create_preformatted_block($node->textContent);
case 'hr':
return self::create_separator_block();
case 'table':
return self::create_table_block($node);
case 'img':
return self::create_image_block(
$node->getAttribute('src'),
$node->getAttribute('alt'),
$node->getAttribute('data-attachment-id')
);
default:
// For unsupported elements, convert to HTML block
return self::create_html_block($node->ownerDocument->saveHTML($node));
}
}
/**
* Create a heading block
*
* @param string $content Heading content
* @param int $level Heading level (1-6)
* @return string Gutenberg heading block
*/
private static function create_heading_block($content, $level = 2) {
return '<!-- wp:heading {"level":' . $level . '} -->' .
'<h' . $level . '>' . esc_html($content) . '</h' . $level . '>' .
'<!-- /wp:heading -->';
}
/**
* Create a paragraph block
*
* @param string $content Paragraph content (can contain HTML)
* @return string Gutenberg paragraph block
*/
private static function create_paragraph_block($content) {
return '<!-- wp:paragraph -->' .
'<p>' . $content . '</p>' .
'<!-- /wp:paragraph -->';
}
/**
* Create an image block
*
* @param string $src Image source URL
* @param string $alt Image alt text
* @param string $attachment_id WordPress attachment ID
* @return string Gutenberg image block
*/
private static function create_image_block($src, $alt = '', $attachment_id = '') {
$block_attrs = [
'url' => $src,
'alt' => $alt
];
if ($attachment_id) {
$block_attrs['id'] = (int) $attachment_id;
// Get image dimensions if available
$image_meta = wp_get_attachment_metadata($attachment_id);
if ($image_meta && isset($image_meta['width']) && isset($image_meta['height'])) {
$block_attrs['width'] = $image_meta['width'];
$block_attrs['height'] = $image_meta['height'];
$block_attrs['sizeSlug'] = 'full';
}
// Get caption if available
$attachment = get_post($attachment_id);
if ($attachment && !empty($attachment->post_excerpt)) {
$block_attrs['caption'] = $attachment->post_excerpt;
}
}
// Create the image block with proper figure and figcaption if needed
$figure_html = '<figure class="wp-block-image';
// Add size class if available
if (isset($block_attrs['sizeSlug'])) {
$figure_html .= ' size-' . $block_attrs['sizeSlug'];
}
$figure_html .= '">';
// Add image tag
$figure_html .= '<img src="' . esc_url($src) . '" alt="' . esc_attr($alt) . '"';
// Add width and height if available
if (isset($block_attrs['width']) && isset($block_attrs['height'])) {
$figure_html .= ' width="' . esc_attr($block_attrs['width']) . '"';
$figure_html .= ' height="' . esc_attr($block_attrs['height']) . '"';
}
// Add class and close img tag
$figure_html .= ' class="wp-image-' . esc_attr($attachment_id) . '"/>';
// Add caption if available
if (isset($block_attrs['caption'])) {
$figure_html .= '<figcaption>' . esc_html($block_attrs['caption']) . '</figcaption>';
}
$figure_html .= '</figure>';
return '<!-- wp:image ' . json_encode($block_attrs) . ' -->' .
$figure_html .
'<!-- /wp:image -->';
}
/**
* Create a list block
*
* @param array $items List items (can contain HTML)
* @param bool $ordered Whether the list is ordered
* @return string Gutenberg list block
*/
private static function create_list_block($items, $ordered = false) {
$block_type = 'core/list';
$tag = $ordered ? 'ol' : 'ul';
$attrs = $ordered ? '{"ordered":true}' : '';
$list_items = '';
foreach ($items as $item) {
$list_items .= '<li>' . $item . '</li>';
}
return '<!-- wp:' . $block_type . ' ' . $attrs . ' -->' .
'<' . $tag . '>' . $list_items . '</' . $tag . '>' .
'<!-- /wp:' . $block_type . ' -->';
}
/**
* Create a quote block
*
* @param string $content Quote content (can contain HTML)
* @return string Gutenberg quote block
*/
private static function create_quote_block($content) {
return '<!-- wp:quote -->' .
'<blockquote class="wp-block-quote"><p>' . $content . '</p></blockquote>' .
'<!-- /wp:quote -->';
}
/**
* Create a code block
*
* @param string $content Code content
* @return string Gutenberg code block
*/
private static function create_code_block($content) {
return '<!-- wp:code -->' .
'<pre class="wp-block-code"><code>' . esc_html($content) . '</code></pre>' .
'<!-- /wp:code -->';
}
/**
* Create a preformatted block
*
* @param string $content Preformatted content
* @return string Gutenberg preformatted block
*/
private static function create_preformatted_block($content) {
return '<!-- wp:preformatted -->' .
'<pre class="wp-block-preformatted">' . esc_html($content) . '</pre>' .
'<!-- /wp:preformatted -->';
}
/**
* Create a separator block
*
* @return string Gutenberg separator block
*/
private static function create_separator_block() {
return '<!-- wp:separator -->' .
'<hr class="wp-block-separator"/>' .
'<!-- /wp:separator -->';
}
/**
* Create a table block
*
* @param DOMNode $table Table DOM node
* @return string Gutenberg table block
*/
private static function create_table_block($table) {
$thead = $table->getElementsByTagName('thead')->item(0);
$tbody = $table->getElementsByTagName('tbody')->item(0);
$html = '<table class="wp-block-table"><tbody>';
// Process header if exists
if ($thead) {
$html .= '<thead>';
$rows = $thead->getElementsByTagName('tr');
foreach ($rows as $row) {
$html .= '<tr>';
$cells = $row->getElementsByTagName('th');
foreach ($cells as $cell) {
$html .= '<th>' . $cell->textContent . '</th>';
}
$html .= '</tr>';
}
$html .= '</thead>';
}
// Process body
if ($tbody) {
$rows = $tbody->getElementsByTagName('tr');
} else {
$rows = $table->getElementsByTagName('tr');
}
foreach ($rows as $row) {
$html .= '<tr>';
$cells = $row->getElementsByTagName('td');
foreach ($cells as $cell) {
$html .= '<td>' . $cell->textContent . '</td>';
}
$html .= '</tr>';
}
$html .= '</tbody></table>';
return '<!-- wp:table -->' . $html . '<!-- /wp:table -->';
}
/**
* Create an HTML block
*
* @param string $content HTML content
* @return string Gutenberg HTML block
*/
private static function create_html_block($content) {
return '<!-- wp:html -->' .
$content .
'<!-- /wp:html -->';
}
}