From dd0cc1c303d467cae5be5523d05d91d8cdedd0b7 Mon Sep 17 00:00:00 2001 From: Jenkings Date: Mon, 15 Jan 2024 06:08:09 +0000 Subject: [PATCH] Markdown parser --- models/Markdown.php | 186 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 models/Markdown.php diff --git a/models/Markdown.php b/models/Markdown.php new file mode 100644 index 0000000..3269b00 --- /dev/null +++ b/models/Markdown.php @@ -0,0 +1,186 @@ +parseBold($markdownString); + + // Basic parsing for italic (surrounding text with *) + $markdownString = $this->parseItalic($markdownString); + + // Table parsing + $markdownString = $this->parseTables($markdownString); + + // Horizontal line parsing (--- or *** or ___) + $markdownString = $this->horizontalLinesParsing($markdownString); + + // Basic parsing for headers (lines starting with #) + $markdownString = $this->parseHeaders($markdownString); + + // Basic parsing for ordered lists + $markdownString = $this->parseOrderedLists($markdownString); + + // Basic parsing for unordered lists + $markdownString = $this->parseUnorderedLists($markdownString); + + // Code block parsing (lines surrounded with ``` or indented with 4 spaces) + $markdownString = $this->parseCodeBlocks($markdownString); + + // Blockquote parsing (lines starting with >) + $markdownString = $this->parseBlockquote($markdownString); + + // Link parsing ([text](url)) + $markdownString = $this->parseLinks($markdownString); + + // Image parsing (![alt text](url)) + $markdownString = $this->parseImages($markdownString); + + return $markdownString; + } + + private function parseHeaders($input) + { + return preg_replace_callback('/^(#+)(.*)/m', function ($matches) { + $level = strlen($matches[1]); + return "{$matches[2]}"; + }, $input); + } + + private function parseBold($input) + { + return preg_replace('/\*\*(.+?)\*\*/', '$1', $input); + } + + private function parseItalic($input) + { + return preg_replace('/\*(.+[^*])\*/', '$1', $input); + } + + + function parseOrderedLists($input){ + $html = preg_replace_callback('/^\s*(\d+)\.\s*(.*)(\n\s*\d+\.\s*.*)*/m', function ($matches) { + $listItems = array_map('trim', explode("\n", $matches[0])); + $listItems = array_map(function ($item) { + return preg_replace('/^\d+\.\s*/', '', $item); // Remove the number and dot at the beginning + }, $listItems); + + return '
  1. ' . implode('
  2. ', $listItems) . '
'; + }, $input); + + return $html; + } + + function parseUnorderedLists($input){ + // Match contiguous lines starting with *, -, or + as list items + $html = preg_replace_callback('/^\s*([-*+])\s*(.*)(\n\s*[-*+]\s*.*)*/m', function ($matches) { + $listItems = array_map('trim', explode("\n", $matches[0])); + $listItems = array_map(function ($item) { + return preg_replace('/^[-*+]\s*/', '', $item); // Remove the *, -, or + at the beginning + }, $listItems); + + return ''; + }, $input); + + return $html; + } + + + private function parseCodeBlocks($input) + { + $input = preg_replace_callback('/```(.+?)```/s', function ($matches) { + return "
{$matches[1]}
"; + }, $input); + $input = preg_replace_callback('/^\s{4}(.+)$/m', function ($matches) { + return "
{$matches[1]}
"; + }, $input); + return $input; + } + + private function parseBlockquote($input) + { + return preg_replace_callback('/^\s*>\s*(.*)/m', function ($matches) { + return "
{$matches[1]}
"; + }, $input); + } + + private function parseLinks($input) + { + return preg_replace('/\[(.*?)\]\((.*?)\)/', '$1', $input); + } + + private function parseImages($input) + { + return preg_replace('/!\[(.*?)\]\((.*?)\)/', '$1', $input); + } + + private function horizontalLinesParsing($input){ + return preg_replace('/^(.*?)(---|___|\*\*\*)(.*?)$/m', '$1
$3', $input); + } + +private function parseTables($input) +{ + $tables = []; + $currentTable = ''; + $inTable = false; + + $lines = explode("\n", $input); + + foreach ($lines as $line) { + if (preg_match('/^\s*\|(.+)\|\s*$/', $line, $matches)) { + $tableRow = trim($matches[1]); + $columns = explode('|', $tableRow); + $columns = array_map('trim', $columns); + + // Skip lines with only dashes or empty columns + if (preg_match('/^\s*-{3,}\s*$/', $columns[0])) { + $inTable = true; + continue; + } + + $tableHtml = ''; + + if (!$inTable) { + $tableHtml .= ''; + } + + $tableHtml .= ''; + foreach ($columns as $column) { + $cellTag = $inTable ? 'td' : 'th'; + $tableHtml .= "<$cellTag>$column"; + } + $tableHtml .= ''; + + $currentTable .= $tableHtml; + } else { + if ($inTable) { + $currentTable .= '
'; + $tables[] = $currentTable; + $currentTable = ''; + $inTable = false; + } + $tables[] = $line; + } + } + + // Check if there's a remaining table + if ($inTable && !empty($currentTable)) { + $currentTable .= ''; + $tables[] = $currentTable; + } + + return implode("\n", $tables); +} + + + + + + +} \ No newline at end of file