diff --git a/mdformat_myst/_directives.py b/mdformat_myst/_directives.py index 1a5545a..9ddbf3e 100644 --- a/mdformat_myst/_directives.py +++ b/mdformat_myst/_directives.py @@ -1,9 +1,13 @@ +"""Helpers to handle directives---including their headers and fence syntax.""" + from __future__ import annotations from collections.abc import Mapping, MutableMapping, Sequence import io from markdown_it import MarkdownIt +import mdformat +import mdformat.plugins from mdformat.renderer import LOGGER, RenderContext, RenderTreeNode import ruamel.yaml @@ -13,7 +17,12 @@ def longest_consecutive_sequence(seq: str, char: str) -> int: """Return length of the longest consecutive sequence of `char` characters - in string `seq`.""" + in string `seq`. + + This measured faster than the more "Pythonic": + + `max((len(list(g)) for k, g in groupby(s) if k == char), default=0)` + """ assert len(char) == 1 longest = 0 current_streak = 0 @@ -30,27 +39,32 @@ def longest_consecutive_sequence(seq: str, char: str) -> int: def fence(node: "RenderTreeNode", context: "RenderContext") -> str: """Render fences (and directives). - Copied from upstream `mdformat` core and should be kept up-to-date - if upstream introduces changes. Note that only two lines are added - to the upstream implementation, i.e. the condition that calls - `format_directive_content` function. + Originally copied from upstream `mdformat` core. Key changes so far: + - call our `format_directive_content` function when a directive is detected (instead + of treating the contents as code in that case). + - allow colon fences (and use a heuristic to ensure good spacing when recombining + those). """ info_str = node.info.strip() lang = info_str.split(maxsplit=1)[0] if info_str else "" - code_block = node.content + is_directive = lang.startswith("{") and lang.endswith("}") + unformatted_body = node.content + if node.type == "colon_fence": + fence_char = ":" # Info strings of backtick code fences can not contain backticks or tildes. # If that is the case, we make a tilde code fence instead. - if "`" in info_str or "~" in info_str: + elif "`" in info_str or "~" in info_str: fence_char = "~" else: fence_char = "`" - # Format the code block using enabled codeformatter funcs - if lang in context.options.get("codeformatters", {}): + if is_directive: + body = format_directive_content(unformatted_body, context=context) + elif lang in context.options.get("codeformatters", {}): fmt_func = context.options["codeformatters"][lang] try: - code_block = fmt_func(code_block, info_str) + body = fmt_func(unformatted_body, info_str) except Exception: # Swallow exceptions so that formatter errors (e.g. due to # invalid code) do not crash mdformat. @@ -59,49 +73,67 @@ def fence(node: "RenderTreeNode", context: "RenderContext") -> str: f"Failed formatting content of a {lang} code block " f"(line {node.map[0] + 1} before formatting)" ) - # This "elif" is the *only* thing added to the upstream `fence` implementation! - elif lang.startswith("{") and lang.endswith("}"): - code_block = format_directive_content(code_block) + body = unformatted_body + else: + body = unformatted_body - # The code block must not include as long or longer sequence of `fence_char`s - # as the fence string itself - fence_len = max(3, longest_consecutive_sequence(code_block, fence_char) + 1) + # The fenced contents must not include as long or longer sequence of `fence_char`s + # as the fence string itself. + fence_len = max(3, longest_consecutive_sequence(body, fence_char) + 1) fence_str = fence_char * fence_len - - return f"{fence_str}{info_str}\n{code_block}{fence_str}" - - -def format_directive_content(raw_content: str) -> str: - parse_result = parse_opts_and_content(raw_content) - if not parse_result: - return raw_content - unformatted_yaml, content = parse_result - dump_stream = io.StringIO() - try: - parsed = yaml.load(unformatted_yaml) - yaml.dump(parsed, stream=dump_stream) - except ruamel.yaml.YAMLError: - LOGGER.warning("Invalid YAML in MyST directive options.") - return raw_content - formatted_yaml = dump_stream.getvalue() - - # Remove the YAML closing tag if added by `ruamel.yaml` - if formatted_yaml.endswith("\n...\n"): - formatted_yaml = formatted_yaml[:-4] - - # Convert empty YAML to most concise form - if formatted_yaml == "null\n": - formatted_yaml = "" - - formatted = "---\n" + formatted_yaml + "---\n" - if content: - formatted += content + "\n" + formatted_fence = f"{fence_str}{info_str}\n" + # Heuristic to ensure child colon fences recombine with a leading blank line for + # consistency. + if body.startswith(":::"): + formatted_fence += "\n" + formatted_fence += f"{body}{fence_str}" + return formatted_fence + + +def format_directive_content(raw_content: str, context) -> str: + unformatted_yaml, content = parse_opts_and_content(raw_content) + formatted = "" + if unformatted_yaml is not None: + dump_stream = io.StringIO() + try: + parsed = yaml.load(unformatted_yaml) + yaml.dump(parsed, stream=dump_stream) + except ruamel.yaml.YAMLError: + LOGGER.warning("Invalid YAML in MyST directive options.") + return raw_content + if parsed: + formatted += "\n".join([f":{k}: {v}" for k, v in parsed.items()]) + "\n\n" + if content.strip(): + # Get currently active plugin modules + active_plugins = context.options.get("parser_extension", []) + + # Resolve modules back to their string names + # mdformat.text() requires names (str), not objects + extension_names = [ + name + for name, plugin in mdformat.plugins.PARSER_EXTENSIONS.items() + if plugin in active_plugins + ] + formatted += mdformat.text( + content, options=context.options, extensions=extension_names + ) + if not formatted: + return "" + # In both the content-containing case (in which case we might have many terminal + # newlines in the content) and the options-only case (in which case, we have + # inserted two newlines above to separate the options from the non-existent content) + # we want to ensure we end in _exactly_ one newline. + formatted = formatted.rstrip("\n") + "\n" + # Unless the last thing in the content is a colon-fence, which for consistency we + # always add padding to. + if formatted.endswith(":::\n"): + formatted += "\n" return formatted -def parse_opts_and_content(raw_content: str) -> tuple[str, str] | None: +def parse_opts_and_content(raw_content: str) -> tuple[str | None, str]: if not raw_content: - return None + return None, raw_content lines = raw_content.splitlines() line = lines.pop(0) yaml_lines = [] @@ -111,15 +143,17 @@ def parse_opts_and_content(raw_content: str) -> tuple[str, str] | None: if all(c == "-" for c in line) and len(line) >= 3: break yaml_lines.append(line) - elif line.lstrip().startswith(":"): + elif line.lstrip().startswith(":") and not line.lstrip().startswith(":::"): yaml_lines.append(line.lstrip()[1:]) while lines: - if not lines[0].lstrip().startswith(":"): + if not lines[0].lstrip().startswith(":") or lines[0].lstrip().startswith( + ":::" + ): break line = lines.pop(0).lstrip()[1:] yaml_lines.append(line) else: - return None + return None, raw_content first_line_is_empty_but_second_line_isnt = ( len(lines) >= 2 and not lines[0].strip() and lines[1].strip() diff --git a/mdformat_myst/plugin.py b/mdformat_myst/plugin.py index 88f2a1a..b03c912 100644 --- a/mdformat_myst/plugin.py +++ b/mdformat_myst/plugin.py @@ -6,6 +6,7 @@ from markdown_it import MarkdownIt import mdformat.plugins from mdformat.renderer import RenderContext, RenderTreeNode +from mdit_py_plugins.colon_fence import colon_fence_plugin from mdit_py_plugins.dollarmath import dollarmath_plugin from mdit_py_plugins.myst_blocks import myst_block_plugin from mdit_py_plugins.myst_role import myst_role_plugin @@ -17,23 +18,16 @@ def update_mdit(mdit: MarkdownIt) -> None: - # Enable mdformat-tables plugin - tables_plugin = mdformat.plugins.PARSER_EXTENSIONS["tables"] - if tables_plugin not in mdit.options["parser_extension"]: - mdit.options["parser_extension"].append(tables_plugin) - tables_plugin.update_mdit(mdit) - - # Enable mdformat-front-matters plugin - front_matters_plugin = mdformat.plugins.PARSER_EXTENSIONS["front_matters"] - if front_matters_plugin not in mdit.options["parser_extension"]: - mdit.options["parser_extension"].append(front_matters_plugin) - front_matters_plugin.update_mdit(mdit) - - # Enable mdformat-footnote plugin - footnote_plugin = mdformat.plugins.PARSER_EXTENSIONS["footnote"] - if footnote_plugin not in mdit.options["parser_extension"]: - mdit.options["parser_extension"].append(footnote_plugin) - footnote_plugin.update_mdit(mdit) + plugins_to_enable = [ + "tables", + "front_matters", + "footnote", + ] + for plugin_name in plugins_to_enable: + plugin = mdformat.plugins.PARSER_EXTENSIONS[plugin_name] + if plugin not in mdit.options["parser_extension"]: + mdit.options["parser_extension"].append(plugin) + plugin.update_mdit(mdit) # Enable MyST role markdown-it extension mdit.use(myst_role_plugin) @@ -45,6 +39,9 @@ def update_mdit(mdit: MarkdownIt) -> None: # Enable dollarmath markdown-it extension mdit.use(dollarmath_plugin) + # Enable support for the colon fence syntax + mdit.use(colon_fence_plugin) + # Trick `mdformat`s AST validation by removing HTML rendering of code # blocks and fences. Directives are parsed as code fences and we # modify them in ways that don't break MyST AST but do break @@ -117,7 +114,6 @@ def _escape_paragraph(text: str, node: RenderTreeNode, context: RenderContext) - lines = text.split("\n") for i in range(len(lines)): - # Three or more "+" chars are interpreted as a block break. Escape them. space_removed = lines[i].replace(" ", "") if space_removed.startswith("+++"): @@ -144,8 +140,11 @@ def _escape_text(text: str, node: RenderTreeNode, context: RenderContext) -> str return text +CHANGES_AST = True RENDERERS = { "blockquote": _math_block_safe_blockquote_renderer, + "colon_fence": fence, + "fence": fence, "myst_role": _role_renderer, "myst_line_comment": _comment_renderer, "myst_block_break": _blockbreak_renderer, @@ -153,6 +152,5 @@ def _escape_text(text: str, node: RenderTreeNode, context: RenderContext) -> str "math_inline": _math_inline_renderer, "math_block_label": _math_block_label_renderer, "math_block": _math_block_renderer, - "fence": fence, } POSTPROCESSORS = {"paragraph": _escape_paragraph, "text": _escape_text} diff --git a/pyproject.toml b/pyproject.toml index 19c5378..14624ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ keywords = "mdformat,markdown,formatter,gfm" requires-python = ">=3.10" requires = [ + "markdown-it-py >= 2.0.0", "mdformat >=0.7.0", "mdit-py-plugins >=0.3.0", "mdformat-front-matters >= 1.0.0", diff --git a/tests/data/fixtures.md b/tests/data/fixtures.md index 0b70e44..54aac21 100644 --- a/tests/data/fixtures.md +++ b/tests/data/fixtures.md @@ -435,34 +435,30 @@ Content ``` . ```{some-directive} args ---- -option1: 1 -option2: hello ---- +:option1: 1 +:option2: hello + Content ``` ```{some-directive} args ---- -option1: 1 -option2: hello ---- +:option1: 1 +:option2: hello + Content ``` ```{some-directive} args ---- -option1: 1 -option2: hello ---- +:option1: 1 +:option2: hello + Content ``` ```{some-directive} args ---- -option1: 1 -option2: hello ---- +:option1: 1 +:option2: hello + Content ``` . @@ -476,8 +472,6 @@ Content ``` . ```{some-directive} args ---- ---- Content ``` . @@ -490,9 +484,7 @@ MyST directive no content ``` . ```{some-directive} args ---- -letter: a ---- +:letter: a ``` . @@ -504,3 +496,129 @@ MyST directive, no opts or content ```{some-directive} args ``` . +MyST colon fenced directive with title +. +:::{admonition} MyST colon fenced directive with a title +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor +incididunt ut labore et dolore magna aliqua. +::: +. +:::{admonition} MyST colon fenced directive with a title +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor +incididunt ut labore et dolore magna aliqua. +::: +. + +MyST colon fenced directive with metadata +. +:::{admonition} MyST colon fenced directive with metadata +:class: foo +:truc: bla +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor +incididunt ut labore et dolore magna aliqua. +::: +. +:::{admonition} MyST colon fenced directive with metadata +:class: foo +:truc: bla + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor +incididunt ut labore et dolore magna aliqua. +::: +. + +MyST colon fenced directive with nested directive +. +::::{admonition} Parent directive +:::{image} foo.png +--- +class: foo +alt: An image +--- +::: +:::: +. +::::{admonition} Parent directive + +:::{image} foo.png +:class: foo +:alt: An image +::: + +:::: +. + +MyST colon fenced directive with multiple nested admonitions +. +::::{admonition} Multiple nested admonitions +:::{admonition} +First nested admonition content. +::: +:::{admonition} +Second nested admonition content. +::: +:::{admonition} +Third nested admonition content. +::: +:::: +. +::::{admonition} Multiple nested admonitions + +:::{admonition} +First nested admonition content. +::: + +:::{admonition} +Second nested admonition content. +::: + +:::{admonition} +Third nested admonition content. +::: + +:::: +. + +MyST colon fenced directive with mixed content and nested directives +. +::::{hint} A hint with nested tips and paragraphs +This is some introductory text. +:::{tip} +A nested tip with content. +::: +More text between directives. +:::{tip} +Another nested tip. +::: +Concluding text. +:::: +. +::::{hint} A hint with nested tips and paragraphs +This is some introductory text. + +:::{tip} +A nested tip with content. +::: + +More text between directives. + +:::{tip} +Another nested tip. +::: + +Concluding text. +:::: +. + +MyST colon fenced directive nested in list +. +- Item with directive + :::{tip} Nested tip in list item + Tip content inside a list item. + ::: +. +- Item with directive + :::{tip} Nested tip in list item + Tip content inside a list item. + ::: +.