Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 84 additions & 50 deletions mdformat_myst/_directives.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
"""Helpers to handle directives---including their headers and fence syntax."""

from __future__ import annotations

from collections.abc import Mapping, MutableMapping, Sequence
import io

from markdown_it import MarkdownIt
import mdformat
import mdformat.plugins
from mdformat.renderer import LOGGER, RenderContext, RenderTreeNode
import ruamel.yaml

Expand All @@ -13,7 +17,12 @@

def longest_consecutive_sequence(seq: str, char: str) -> int:
"""Return length of the longest consecutive sequence of `char` characters
in string `seq`."""
in string `seq`.

This measured faster than the more "Pythonic":

`max((len(list(g)) for k, g in groupby(s) if k == char), default=0)`
"""
assert len(char) == 1
longest = 0
current_streak = 0
Expand All @@ -30,27 +39,32 @@ def longest_consecutive_sequence(seq: str, char: str) -> int:
def fence(node: "RenderTreeNode", context: "RenderContext") -> str:
"""Render fences (and directives).

Copied from upstream `mdformat` core and should be kept up-to-date
if upstream introduces changes. Note that only two lines are added
to the upstream implementation, i.e. the condition that calls
`format_directive_content` function.
Originally copied from upstream `mdformat` core. Key changes so far:
- call our `format_directive_content` function when a directive is detected (instead
of treating the contents as code in that case).
- allow colon fences (and use a heuristic to ensure good spacing when recombining
those).
"""
info_str = node.info.strip()
lang = info_str.split(maxsplit=1)[0] if info_str else ""
code_block = node.content
is_directive = lang.startswith("{") and lang.endswith("}")
unformatted_body = node.content

if node.type == "colon_fence":
fence_char = ":"
# Info strings of backtick code fences can not contain backticks or tildes.
# If that is the case, we make a tilde code fence instead.
if "`" in info_str or "~" in info_str:
elif "`" in info_str or "~" in info_str:
fence_char = "~"
else:
fence_char = "`"

# Format the code block using enabled codeformatter funcs
if lang in context.options.get("codeformatters", {}):
if is_directive:
body = format_directive_content(unformatted_body, context=context)
elif lang in context.options.get("codeformatters", {}):
fmt_func = context.options["codeformatters"][lang]
try:
code_block = fmt_func(code_block, info_str)
body = fmt_func(unformatted_body, info_str)
except Exception:
# Swallow exceptions so that formatter errors (e.g. due to
# invalid code) do not crash mdformat.
Expand All @@ -59,49 +73,67 @@ def fence(node: "RenderTreeNode", context: "RenderContext") -> str:
f"Failed formatting content of a {lang} code block "
f"(line {node.map[0] + 1} before formatting)"
)
# This "elif" is the *only* thing added to the upstream `fence` implementation!
elif lang.startswith("{") and lang.endswith("}"):
code_block = format_directive_content(code_block)
body = unformatted_body
else:
body = unformatted_body

# The code block must not include as long or longer sequence of `fence_char`s
# as the fence string itself
fence_len = max(3, longest_consecutive_sequence(code_block, fence_char) + 1)
# The fenced contents must not include as long or longer sequence of `fence_char`s
# as the fence string itself.
fence_len = max(3, longest_consecutive_sequence(body, fence_char) + 1)
fence_str = fence_char * fence_len

return f"{fence_str}{info_str}\n{code_block}{fence_str}"


def format_directive_content(raw_content: str) -> str:
parse_result = parse_opts_and_content(raw_content)
if not parse_result:
return raw_content
unformatted_yaml, content = parse_result
dump_stream = io.StringIO()
try:
parsed = yaml.load(unformatted_yaml)
yaml.dump(parsed, stream=dump_stream)
except ruamel.yaml.YAMLError:
LOGGER.warning("Invalid YAML in MyST directive options.")
return raw_content
formatted_yaml = dump_stream.getvalue()

# Remove the YAML closing tag if added by `ruamel.yaml`
if formatted_yaml.endswith("\n...\n"):
formatted_yaml = formatted_yaml[:-4]

# Convert empty YAML to most concise form
if formatted_yaml == "null\n":
formatted_yaml = ""

formatted = "---\n" + formatted_yaml + "---\n"
if content:
formatted += content + "\n"
formatted_fence = f"{fence_str}{info_str}\n"
# Heuristic to ensure child colon fences recombine with a leading blank line for
# consistency.
if body.startswith(":::"):
formatted_fence += "\n"
formatted_fence += f"{body}{fence_str}"
return formatted_fence


def format_directive_content(raw_content: str, context) -> str:
unformatted_yaml, content = parse_opts_and_content(raw_content)
formatted = ""
if unformatted_yaml is not None:
dump_stream = io.StringIO()
try:
parsed = yaml.load(unformatted_yaml)
yaml.dump(parsed, stream=dump_stream)
except ruamel.yaml.YAMLError:
LOGGER.warning("Invalid YAML in MyST directive options.")
return raw_content
if parsed:
formatted += "\n".join([f":{k}: {v}" for k, v in parsed.items()]) + "\n\n"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to have a way for people to opt-out of the newer formatting with a configuration option or do you think most people would prefer these changes?

if content.strip():
# Get currently active plugin modules
active_plugins = context.options.get("parser_extension", [])

# Resolve modules back to their string names
# mdformat.text() requires names (str), not objects
extension_names = [
name
for name, plugin in mdformat.plugins.PARSER_EXTENSIONS.items()
if plugin in active_plugins
]
formatted += mdformat.text(
content, options=context.options, extensions=extension_names
)
if not formatted:
return ""
# In both the content-containing case (in which case we might have many terminal
# newlines in the content) and the options-only case (in which case, we have
# inserted two newlines above to separate the options from the non-existent content)
# we want to ensure we end in _exactly_ one newline.
formatted = formatted.rstrip("\n") + "\n"
# Unless the last thing in the content is a colon-fence, which for consistency we
# always add padding to.
if formatted.endswith(":::\n"):
formatted += "\n"
return formatted


def parse_opts_and_content(raw_content: str) -> tuple[str, str] | None:
def parse_opts_and_content(raw_content: str) -> tuple[str | None, str]:
if not raw_content:
return None
return None, raw_content
lines = raw_content.splitlines()
line = lines.pop(0)
yaml_lines = []
Expand All @@ -111,15 +143,17 @@ def parse_opts_and_content(raw_content: str) -> tuple[str, str] | None:
if all(c == "-" for c in line) and len(line) >= 3:
break
yaml_lines.append(line)
elif line.lstrip().startswith(":"):
elif line.lstrip().startswith(":") and not line.lstrip().startswith(":::"):
yaml_lines.append(line.lstrip()[1:])
while lines:
if not lines[0].lstrip().startswith(":"):
if not lines[0].lstrip().startswith(":") or lines[0].lstrip().startswith(
":::"
):
break
line = lines.pop(0).lstrip()[1:]
yaml_lines.append(line)
else:
return None
return None, raw_content

first_line_is_empty_but_second_line_isnt = (
len(lines) >= 2 and not lines[0].strip() and lines[1].strip()
Expand Down
36 changes: 17 additions & 19 deletions mdformat_myst/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from markdown_it import MarkdownIt
import mdformat.plugins
from mdformat.renderer import RenderContext, RenderTreeNode
from mdit_py_plugins.colon_fence import colon_fence_plugin
from mdit_py_plugins.dollarmath import dollarmath_plugin
from mdit_py_plugins.myst_blocks import myst_block_plugin
from mdit_py_plugins.myst_role import myst_role_plugin
Expand All @@ -17,23 +18,16 @@


def update_mdit(mdit: MarkdownIt) -> None:
# Enable mdformat-tables plugin
tables_plugin = mdformat.plugins.PARSER_EXTENSIONS["tables"]
if tables_plugin not in mdit.options["parser_extension"]:
mdit.options["parser_extension"].append(tables_plugin)
tables_plugin.update_mdit(mdit)

# Enable mdformat-front-matters plugin
front_matters_plugin = mdformat.plugins.PARSER_EXTENSIONS["front_matters"]
if front_matters_plugin not in mdit.options["parser_extension"]:
mdit.options["parser_extension"].append(front_matters_plugin)
front_matters_plugin.update_mdit(mdit)

# Enable mdformat-footnote plugin
footnote_plugin = mdformat.plugins.PARSER_EXTENSIONS["footnote"]
if footnote_plugin not in mdit.options["parser_extension"]:
mdit.options["parser_extension"].append(footnote_plugin)
footnote_plugin.update_mdit(mdit)
plugins_to_enable = [
"tables",
"front_matters",
"footnote",
]
for plugin_name in plugins_to_enable:
plugin = mdformat.plugins.PARSER_EXTENSIONS[plugin_name]
if plugin not in mdit.options["parser_extension"]:
mdit.options["parser_extension"].append(plugin)
plugin.update_mdit(mdit)

# Enable MyST role markdown-it extension
mdit.use(myst_role_plugin)
Expand All @@ -45,6 +39,9 @@ def update_mdit(mdit: MarkdownIt) -> None:
# Enable dollarmath markdown-it extension
mdit.use(dollarmath_plugin)

# Enable support for the colon fence syntax
mdit.use(colon_fence_plugin)

# Trick `mdformat`s AST validation by removing HTML rendering of code
# blocks and fences. Directives are parsed as code fences and we
# modify them in ways that don't break MyST AST but do break
Expand Down Expand Up @@ -117,7 +114,6 @@ def _escape_paragraph(text: str, node: RenderTreeNode, context: RenderContext) -
lines = text.split("\n")

for i in range(len(lines)):

# Three or more "+" chars are interpreted as a block break. Escape them.
space_removed = lines[i].replace(" ", "")
if space_removed.startswith("+++"):
Expand All @@ -144,15 +140,17 @@ def _escape_text(text: str, node: RenderTreeNode, context: RenderContext) -> str
return text


CHANGES_AST = True
RENDERERS = {
"blockquote": _math_block_safe_blockquote_renderer,
"colon_fence": fence,
"fence": fence,
"myst_role": _role_renderer,
"myst_line_comment": _comment_renderer,
"myst_block_break": _blockbreak_renderer,
"myst_target": _target_renderer,
"math_inline": _math_inline_renderer,
"math_block_label": _math_block_label_renderer,
"math_block": _math_block_renderer,
"fence": fence,
}
POSTPROCESSORS = {"paragraph": _escape_paragraph, "text": _escape_text}
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ keywords = "mdformat,markdown,formatter,gfm"

requires-python = ">=3.10"
requires = [
"markdown-it-py >= 2.0.0",
"mdformat >=0.7.0",
"mdit-py-plugins >=0.3.0",
"mdformat-front-matters >= 1.0.0",
Expand Down
Loading
Loading