107 lines
2.9 KiB
Python
107 lines
2.9 KiB
Python
# ContentDB
|
|
# Copyright (C) rubenwardy
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
from typing import Sequence
|
|
from urllib.parse import urljoin
|
|
from bs4 import BeautifulSoup
|
|
from jinja2.utils import markupsafe
|
|
from markdown_it import MarkdownIt
|
|
from markdown_it.common.utils import unescapeAll, escapeHtml
|
|
from markdown_it.token import Token
|
|
from markdown_it.presets import gfm_like
|
|
from pygments import highlight
|
|
from pygments.lexers import get_lexer_by_name
|
|
from pygments.formatters.html import HtmlFormatter
|
|
|
|
from .cleaner import clean_html
|
|
from .mention import init_mention
|
|
|
|
|
|
def highlight_code(code, name, attrs):
|
|
if name == "":
|
|
return None
|
|
|
|
lexer = get_lexer_by_name(name)
|
|
formatter = HtmlFormatter()
|
|
|
|
return highlight(code, lexer, formatter)
|
|
|
|
|
|
def render_code(self, tokens: Sequence[Token], idx, options, env):
|
|
token = tokens[idx]
|
|
info = unescapeAll(token.info).strip() if token.info else ""
|
|
langName = info.split(maxsplit=1)[0] if info else ""
|
|
|
|
if options.highlight:
|
|
return options.highlight(
|
|
token.content, langName, ""
|
|
) or f"<pre><code>{escapeHtml(token.content)}</code></pre>"
|
|
|
|
return f"<pre><code>{escapeHtml(token.content)}</code></pre>"
|
|
|
|
|
|
|
|
gfm_like.make()
|
|
md = MarkdownIt("gfm-like", {"highlight": highlight_code})
|
|
md.add_render_rule("fence", render_code)
|
|
init_mention(md)
|
|
|
|
|
|
def render_markdown(source):
|
|
html = md.render(source)
|
|
return clean_html(html)
|
|
|
|
|
|
def init_markdown(app):
|
|
@app.template_filter()
|
|
def markdown(source):
|
|
return markupsafe.Markup(render_markdown(source))
|
|
|
|
|
|
def get_headings(html: str):
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
headings = soup.find_all(["h1", "h2", "h3"])
|
|
|
|
root = []
|
|
stack = []
|
|
for heading in headings:
|
|
this = {"link": heading.get("id") or "", "text": heading.text, "children": []}
|
|
this_level = int(heading.name[1:]) - 1
|
|
|
|
while this_level <= len(stack):
|
|
stack.pop()
|
|
|
|
if len(stack) > 0:
|
|
stack[-1]["children"].append(this)
|
|
else:
|
|
root.append(this)
|
|
|
|
stack.append(this)
|
|
|
|
return root
|
|
|
|
|
|
def get_user_mentions(html: str) -> set:
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
links = soup.select("a[data-username]")
|
|
return set([x.get("data-username") for x in links])
|
|
|
|
|
|
def get_links(html: str, url: str) -> set:
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
links = soup.select("a[href]")
|
|
return set([urljoin(url, x.get("href")) for x in links])
|