421 lines
14 KiB
PHP
421 lines
14 KiB
PHP
|
|
<?php
|
||
|
|
namespace MarkdownParserWP;
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Markdown to Gutenberg Blocks Converter
|
||
|
|
*/
|
||
|
|
class BlocksConverter {
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Convert markdown content to Gutenberg blocks
|
||
|
|
*
|
||
|
|
* @param string $markdown Markdown content
|
||
|
|
* @param bool $import_images Whether to import images to media library
|
||
|
|
* @param string $original_url Original markdown URL for resolving relative paths
|
||
|
|
* @return string Gutenberg blocks content
|
||
|
|
*/
|
||
|
|
public static function convert_markdown_to_blocks($markdown, $import_images = true, $original_url = '') {
|
||
|
|
// Parse markdown to HTML using Parsedown
|
||
|
|
$parsedown = new \Parsedown();
|
||
|
|
$html = $parsedown->text($markdown);
|
||
|
|
|
||
|
|
// Process images if needed
|
||
|
|
if ($import_images) {
|
||
|
|
$html = self::process_images_in_html($html, $original_url);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Convert HTML to blocks
|
||
|
|
return self::html_to_blocks($html);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Process images in HTML to import them to media library
|
||
|
|
*
|
||
|
|
* @param string $html HTML content
|
||
|
|
* @param string $original_url Original markdown URL for resolving relative paths
|
||
|
|
* @return string Processed HTML
|
||
|
|
*/
|
||
|
|
private static function process_images_in_html($html, $original_url = '') {
|
||
|
|
// Use DOMDocument to find and process images
|
||
|
|
$dom = new \DOMDocument();
|
||
|
|
@$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
|
||
|
|
|
||
|
|
$images = $dom->getElementsByTagName('img');
|
||
|
|
$updated = false;
|
||
|
|
|
||
|
|
// Process each image
|
||
|
|
foreach ($images as $img) {
|
||
|
|
$src = $img->getAttribute('src');
|
||
|
|
|
||
|
|
// Skip data URLs
|
||
|
|
if (strpos($src, 'data:') === 0) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Handle relative URLs
|
||
|
|
if (!filter_var($src, FILTER_VALIDATE_URL) && !empty($original_url)) {
|
||
|
|
// If the src doesn't start with http/https, it's likely a relative path
|
||
|
|
if (strpos($src, 'http') !== 0) {
|
||
|
|
$base_url = dirname($original_url) . '/';
|
||
|
|
$src = $base_url . ltrim($src, '/');
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Import external image
|
||
|
|
$attachment_id = PostCreator::import_external_image($src);
|
||
|
|
|
||
|
|
if ($attachment_id && !is_wp_error($attachment_id)) {
|
||
|
|
// Get new image URL
|
||
|
|
$new_src = wp_get_attachment_url($attachment_id);
|
||
|
|
|
||
|
|
if ($new_src) {
|
||
|
|
$img->setAttribute('src', $new_src);
|
||
|
|
$img->setAttribute('data-attachment-id', $attachment_id);
|
||
|
|
$updated = true;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if ($updated) {
|
||
|
|
$html = $dom->saveHTML();
|
||
|
|
}
|
||
|
|
|
||
|
|
return $html;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Convert HTML to Gutenberg blocks
|
||
|
|
*
|
||
|
|
* @param string $html HTML content
|
||
|
|
* @return string Gutenberg blocks content
|
||
|
|
*/
|
||
|
|
private static function html_to_blocks($html) {
|
||
|
|
// Create a DOM document
|
||
|
|
$dom = new \DOMDocument();
|
||
|
|
@$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'));
|
||
|
|
$xpath = new \DOMXPath($dom);
|
||
|
|
|
||
|
|
// Get the body element
|
||
|
|
$body = $dom->getElementsByTagName('body')->item(0);
|
||
|
|
|
||
|
|
// Process each child node of the body
|
||
|
|
$blocks = [];
|
||
|
|
|
||
|
|
if ($body) {
|
||
|
|
foreach ($body->childNodes as $node) {
|
||
|
|
if ($node->nodeType === XML_ELEMENT_NODE) {
|
||
|
|
$block = self::node_to_block($node, $xpath);
|
||
|
|
if ($block) {
|
||
|
|
$blocks[] = $block;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return implode("\n\n", $blocks);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Convert a DOM node to a Gutenberg block
|
||
|
|
*
|
||
|
|
* @param DOMNode $node The DOM node
|
||
|
|
* @param DOMXPath $xpath XPath object for querying
|
||
|
|
* @return string|null Gutenberg block or null if not convertible
|
||
|
|
*/
|
||
|
|
private static function node_to_block($node, $xpath) {
|
||
|
|
switch ($node->nodeName) {
|
||
|
|
case 'h1':
|
||
|
|
case 'h2':
|
||
|
|
case 'h3':
|
||
|
|
case 'h4':
|
||
|
|
case 'h5':
|
||
|
|
case 'h6':
|
||
|
|
$level = (int) substr($node->nodeName, 1);
|
||
|
|
return self::create_heading_block($node->textContent, $level);
|
||
|
|
|
||
|
|
case 'p':
|
||
|
|
// Check if paragraph contains only an image
|
||
|
|
$images = $node->getElementsByTagName('img');
|
||
|
|
if ($images->length === 1 && $node->childNodes->length === 1) {
|
||
|
|
$img = $images->item(0);
|
||
|
|
return self::create_image_block(
|
||
|
|
$img->getAttribute('src'),
|
||
|
|
$img->getAttribute('alt'),
|
||
|
|
$img->getAttribute('data-attachment-id')
|
||
|
|
);
|
||
|
|
} else {
|
||
|
|
// Regular paragraph with possible inline elements
|
||
|
|
$innerHTML = '';
|
||
|
|
foreach ($node->childNodes as $child) {
|
||
|
|
$innerHTML .= $node->ownerDocument->saveHTML($child);
|
||
|
|
}
|
||
|
|
return self::create_paragraph_block($innerHTML);
|
||
|
|
}
|
||
|
|
|
||
|
|
case 'ul':
|
||
|
|
case 'ol':
|
||
|
|
$items = [];
|
||
|
|
$list_items = $node->getElementsByTagName('li');
|
||
|
|
foreach ($list_items as $item) {
|
||
|
|
$innerHTML = '';
|
||
|
|
foreach ($item->childNodes as $child) {
|
||
|
|
$innerHTML .= $node->ownerDocument->saveHTML($child);
|
||
|
|
}
|
||
|
|
$items[] = $innerHTML;
|
||
|
|
}
|
||
|
|
return self::create_list_block($items, $node->nodeName === 'ol');
|
||
|
|
|
||
|
|
case 'blockquote':
|
||
|
|
$innerHTML = '';
|
||
|
|
foreach ($node->childNodes as $child) {
|
||
|
|
$innerHTML .= $node->ownerDocument->saveHTML($child);
|
||
|
|
}
|
||
|
|
return self::create_quote_block($innerHTML);
|
||
|
|
|
||
|
|
case 'pre':
|
||
|
|
$code = $node->getElementsByTagName('code')->item(0);
|
||
|
|
if ($code) {
|
||
|
|
return self::create_code_block($code->textContent);
|
||
|
|
}
|
||
|
|
return self::create_preformatted_block($node->textContent);
|
||
|
|
|
||
|
|
case 'hr':
|
||
|
|
return self::create_separator_block();
|
||
|
|
|
||
|
|
case 'table':
|
||
|
|
return self::create_table_block($node);
|
||
|
|
|
||
|
|
case 'img':
|
||
|
|
return self::create_image_block(
|
||
|
|
$node->getAttribute('src'),
|
||
|
|
$node->getAttribute('alt'),
|
||
|
|
$node->getAttribute('data-attachment-id')
|
||
|
|
);
|
||
|
|
|
||
|
|
default:
|
||
|
|
// For unsupported elements, convert to HTML block
|
||
|
|
return self::create_html_block($node->ownerDocument->saveHTML($node));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Create a heading block
|
||
|
|
*
|
||
|
|
* @param string $content Heading content
|
||
|
|
* @param int $level Heading level (1-6)
|
||
|
|
* @return string Gutenberg heading block
|
||
|
|
*/
|
||
|
|
private static function create_heading_block($content, $level = 2) {
|
||
|
|
return '<!-- wp:heading {"level":' . $level . '} -->' .
|
||
|
|
'<h' . $level . '>' . esc_html($content) . '</h' . $level . '>' .
|
||
|
|
'<!-- /wp:heading -->';
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Create a paragraph block
|
||
|
|
*
|
||
|
|
* @param string $content Paragraph content (can contain HTML)
|
||
|
|
* @return string Gutenberg paragraph block
|
||
|
|
*/
|
||
|
|
private static function create_paragraph_block($content) {
|
||
|
|
return '<!-- wp:paragraph -->' .
|
||
|
|
'<p>' . $content . '</p>' .
|
||
|
|
'<!-- /wp:paragraph -->';
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Create an image block
|
||
|
|
*
|
||
|
|
* @param string $src Image source URL
|
||
|
|
* @param string $alt Image alt text
|
||
|
|
* @param string $attachment_id WordPress attachment ID
|
||
|
|
* @return string Gutenberg image block
|
||
|
|
*/
|
||
|
|
private static function create_image_block($src, $alt = '', $attachment_id = '') {
|
||
|
|
$block_attrs = [
|
||
|
|
'url' => $src,
|
||
|
|
'alt' => $alt
|
||
|
|
];
|
||
|
|
|
||
|
|
if ($attachment_id) {
|
||
|
|
$block_attrs['id'] = (int) $attachment_id;
|
||
|
|
|
||
|
|
// Get image dimensions if available
|
||
|
|
$image_meta = wp_get_attachment_metadata($attachment_id);
|
||
|
|
if ($image_meta && isset($image_meta['width']) && isset($image_meta['height'])) {
|
||
|
|
$block_attrs['width'] = $image_meta['width'];
|
||
|
|
$block_attrs['height'] = $image_meta['height'];
|
||
|
|
$block_attrs['sizeSlug'] = 'full';
|
||
|
|
}
|
||
|
|
|
||
|
|
// Get caption if available
|
||
|
|
$attachment = get_post($attachment_id);
|
||
|
|
if ($attachment && !empty($attachment->post_excerpt)) {
|
||
|
|
$block_attrs['caption'] = $attachment->post_excerpt;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Create the image block with proper figure and figcaption if needed
|
||
|
|
$figure_html = '<figure class="wp-block-image';
|
||
|
|
|
||
|
|
// Add size class if available
|
||
|
|
if (isset($block_attrs['sizeSlug'])) {
|
||
|
|
$figure_html .= ' size-' . $block_attrs['sizeSlug'];
|
||
|
|
}
|
||
|
|
|
||
|
|
$figure_html .= '">';
|
||
|
|
|
||
|
|
// Add image tag
|
||
|
|
$figure_html .= '<img src="' . esc_url($src) . '" alt="' . esc_attr($alt) . '"';
|
||
|
|
|
||
|
|
// Add width and height if available
|
||
|
|
if (isset($block_attrs['width']) && isset($block_attrs['height'])) {
|
||
|
|
$figure_html .= ' width="' . esc_attr($block_attrs['width']) . '"';
|
||
|
|
$figure_html .= ' height="' . esc_attr($block_attrs['height']) . '"';
|
||
|
|
}
|
||
|
|
|
||
|
|
// Add class and close img tag
|
||
|
|
$figure_html .= ' class="wp-image-' . esc_attr($attachment_id) . '"/>';
|
||
|
|
|
||
|
|
// Add caption if available
|
||
|
|
if (isset($block_attrs['caption'])) {
|
||
|
|
$figure_html .= '<figcaption>' . esc_html($block_attrs['caption']) . '</figcaption>';
|
||
|
|
}
|
||
|
|
|
||
|
|
$figure_html .= '</figure>';
|
||
|
|
|
||
|
|
return '<!-- wp:image ' . json_encode($block_attrs) . ' -->' .
|
||
|
|
$figure_html .
|
||
|
|
'<!-- /wp:image -->';
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Create a list block
|
||
|
|
*
|
||
|
|
* @param array $items List items (can contain HTML)
|
||
|
|
* @param bool $ordered Whether the list is ordered
|
||
|
|
* @return string Gutenberg list block
|
||
|
|
*/
|
||
|
|
private static function create_list_block($items, $ordered = false) {
|
||
|
|
$block_type = 'core/list';
|
||
|
|
$tag = $ordered ? 'ol' : 'ul';
|
||
|
|
$attrs = $ordered ? '{"ordered":true}' : '';
|
||
|
|
|
||
|
|
$list_items = '';
|
||
|
|
foreach ($items as $item) {
|
||
|
|
$list_items .= '<li>' . $item . '</li>';
|
||
|
|
}
|
||
|
|
|
||
|
|
return '<!-- wp:' . $block_type . ' ' . $attrs . ' -->' .
|
||
|
|
'<' . $tag . '>' . $list_items . '</' . $tag . '>' .
|
||
|
|
'<!-- /wp:' . $block_type . ' -->';
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Create a quote block
|
||
|
|
*
|
||
|
|
* @param string $content Quote content (can contain HTML)
|
||
|
|
* @return string Gutenberg quote block
|
||
|
|
*/
|
||
|
|
private static function create_quote_block($content) {
|
||
|
|
return '<!-- wp:quote -->' .
|
||
|
|
'<blockquote class="wp-block-quote"><p>' . $content . '</p></blockquote>' .
|
||
|
|
'<!-- /wp:quote -->';
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Create a code block
|
||
|
|
*
|
||
|
|
* @param string $content Code content
|
||
|
|
* @return string Gutenberg code block
|
||
|
|
*/
|
||
|
|
private static function create_code_block($content) {
|
||
|
|
return '<!-- wp:code -->' .
|
||
|
|
'<pre class="wp-block-code"><code>' . esc_html($content) . '</code></pre>' .
|
||
|
|
'<!-- /wp:code -->';
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Create a preformatted block
|
||
|
|
*
|
||
|
|
* @param string $content Preformatted content
|
||
|
|
* @return string Gutenberg preformatted block
|
||
|
|
*/
|
||
|
|
private static function create_preformatted_block($content) {
|
||
|
|
return '<!-- wp:preformatted -->' .
|
||
|
|
'<pre class="wp-block-preformatted">' . esc_html($content) . '</pre>' .
|
||
|
|
'<!-- /wp:preformatted -->';
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Create a separator block
|
||
|
|
*
|
||
|
|
* @return string Gutenberg separator block
|
||
|
|
*/
|
||
|
|
private static function create_separator_block() {
|
||
|
|
return '<!-- wp:separator -->' .
|
||
|
|
'<hr class="wp-block-separator"/>' .
|
||
|
|
'<!-- /wp:separator -->';
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Create a table block
|
||
|
|
*
|
||
|
|
* @param DOMNode $table Table DOM node
|
||
|
|
* @return string Gutenberg table block
|
||
|
|
*/
|
||
|
|
private static function create_table_block($table) {
|
||
|
|
$thead = $table->getElementsByTagName('thead')->item(0);
|
||
|
|
$tbody = $table->getElementsByTagName('tbody')->item(0);
|
||
|
|
|
||
|
|
$html = '<table class="wp-block-table"><tbody>';
|
||
|
|
|
||
|
|
// Process header if exists
|
||
|
|
if ($thead) {
|
||
|
|
$html .= '<thead>';
|
||
|
|
$rows = $thead->getElementsByTagName('tr');
|
||
|
|
foreach ($rows as $row) {
|
||
|
|
$html .= '<tr>';
|
||
|
|
$cells = $row->getElementsByTagName('th');
|
||
|
|
foreach ($cells as $cell) {
|
||
|
|
$html .= '<th>' . $cell->textContent . '</th>';
|
||
|
|
}
|
||
|
|
$html .= '</tr>';
|
||
|
|
}
|
||
|
|
$html .= '</thead>';
|
||
|
|
}
|
||
|
|
|
||
|
|
// Process body
|
||
|
|
if ($tbody) {
|
||
|
|
$rows = $tbody->getElementsByTagName('tr');
|
||
|
|
} else {
|
||
|
|
$rows = $table->getElementsByTagName('tr');
|
||
|
|
}
|
||
|
|
|
||
|
|
foreach ($rows as $row) {
|
||
|
|
$html .= '<tr>';
|
||
|
|
$cells = $row->getElementsByTagName('td');
|
||
|
|
foreach ($cells as $cell) {
|
||
|
|
$html .= '<td>' . $cell->textContent . '</td>';
|
||
|
|
}
|
||
|
|
$html .= '</tr>';
|
||
|
|
}
|
||
|
|
|
||
|
|
$html .= '</tbody></table>';
|
||
|
|
|
||
|
|
return '<!-- wp:table -->' . $html . '<!-- /wp:table -->';
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Create an HTML block
|
||
|
|
*
|
||
|
|
* @param string $content HTML content
|
||
|
|
* @return string Gutenberg HTML block
|
||
|
|
*/
|
||
|
|
private static function create_html_block($content) {
|
||
|
|
return '<!-- wp:html -->' .
|
||
|
|
$content .
|
||
|
|
'<!-- /wp:html -->';
|
||
|
|
}
|
||
|
|
}
|