Switch to markdown-it (#595)

Fixes #537, fixes #586
This commit is contained in:
rubenwardy
2025-06-03 22:41:20 +01:00
committed by GitHub
parent 98f27364f2
commit 8ed86b53ca
9 changed files with 301 additions and 223 deletions

106
app/markdown/__init__.py Normal file
View File

@@ -0,0 +1,106 @@
# ContentDB
# Copyright (C) rubenwardy
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Sequence
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from jinja2.utils import markupsafe
from markdown_it import MarkdownIt
from markdown_it.common.utils import unescapeAll, escapeHtml
from markdown_it.token import Token
from markdown_it.presets import gfm_like
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters.html import HtmlFormatter
from .cleaner import clean_html
from .mention import init_mention
def highlight_code(code, name, attrs):
if name == "":
return None
lexer = get_lexer_by_name(name)
formatter = HtmlFormatter()
return highlight(code, lexer, formatter)
def render_code(self, tokens: Sequence[Token], idx, options, env):
token = tokens[idx]
info = unescapeAll(token.info).strip() if token.info else ""
langName = info.split(maxsplit=1)[0] if info else ""
if options.highlight:
return options.highlight(
token.content, langName, ""
) or f"<pre><code>{escapeHtml(token.content)}</code></pre>"
return f"<pre><code>{escapeHtml(token.content)}</code></pre>"
gfm_like.make()
md = MarkdownIt("gfm-like", {"highlight": highlight_code})
md.add_render_rule("fence", render_code)
init_mention(md)
def render_markdown(source):
html = md.render(source)
return clean_html(html)
def init_markdown(app):
@app.template_filter()
def markdown(source):
return markupsafe.Markup(render_markdown(source))
def get_headings(html: str):
soup = BeautifulSoup(html, "html.parser")
headings = soup.find_all(["h1", "h2", "h3"])
root = []
stack = []
for heading in headings:
this = {"link": heading.get("id") or "", "text": heading.text, "children": []}
this_level = int(heading.name[1:]) - 1
while this_level <= len(stack):
stack.pop()
if len(stack) > 0:
stack[-1]["children"].append(this)
else:
root.append(this)
stack.append(this)
return root
def get_user_mentions(html: str) -> set:
soup = BeautifulSoup(html, "html.parser")
links = soup.select("a[data-username]")
return set([x.get("data-username") for x in links])
def get_links(html: str, url: str) -> set:
soup = BeautifulSoup(html, "html.parser")
links = soup.select("a[href]")
return set([urljoin(url, x.get("href")) for x in links])