Source code for pannb

"""A panflute filter that process ipynb inputs."""

from __future__ import annotations

import os
from typing import TYPE_CHECKING

from panflute.elements import CodeBlock, Div, Doc, Para, RawBlock
from panflute.io import run_filters
from panflute.tools import convert_text

from .util import setup_logging

if TYPE_CHECKING:
    from typing import Any

    from panflute.elements import CodeBlock, Doc

logger = setup_logging()

RAW_TEX_FORMATS = {"latex", "textile", "html", "ipynb"}
CODE_CELL_CLASSES = {"cell", "code"}
CODE_OUTPUT_CLASSES = {"output", "execute_result"}

#: priority for converting from raw-block format to AST
#: markdown first because if exist, it is likely considered a source format to be converted from
CONVERT_CELL_OUTPUT_PRIORITY = ("markdown", "html", "latex")

#: specify extra pandoc args used when calling convert_text
PANNBPANDOCARGS: list[str] = os.environ.get("PANNBPANDOCARGS", "").split()

__version__: str = "0.1.3"


[docs]def prepare_jupytext_metadata(
    doc: Doc | None,
) -> None:
    """Replace doc metadata with jupytext metadata."""
    if (
        doc is not None
        and (content := doc.content)
        and isinstance(div := content[0], Div)
        and len(div.content) == 1
        and isinstance((raw_block := div.content[0]), RawBlock)
        and raw_block.format == "ipynb"
    ):
        # overwrite
        meta = convert_text(raw_block.text, standalone=True, extra_args=PANNBPANDOCARGS)
        if metadata := meta._metadata:
            doc._metadata = metadata
            del content[0]
            logger.debug("Overwritten doc metadata by jupytext's: %s", doc.get_metadata())


[docs]def convert_cell_output(
    element=None,
    doc: Doc | None = None,
) -> Div | None:
    if (
        isinstance(element, Div)
        and CODE_OUTPUT_CLASSES.issubset(set(element.classes))
        and CODE_CELL_CLASSES.issubset(set(element.parent.classes))
    ):
        content = element.content
        # possible types are CodeBlock, Para, RawBlock
        # Para is images
        has_no_para = True
        for elem in content:
            if isinstance(elem, Para):
                has_no_para = False
                break
        # if there's a Para (which contains an Image), don't touch this and let pandoc decide
        if has_no_para:
            choices: dict[str, RawBlock] = {}
            for elem in content:
                if isinstance(elem, RawBlock):
                    choices[elem.format] = elem
            # if no choices, i.e. only CodeBlock, don't touch this and let pandoc decide
            if choices:
                for input_format in CONVERT_CELL_OUTPUT_PRIORITY:
                    if input_format in choices:
                        elem = choices[input_format]
                        if input_format in RAW_TEX_FORMATS:
                            input_format += "+raw_tex"
                        return convert_text(elem.text, input_format=input_format, extra_args=PANNBPANDOCARGS)
    return None


[docs]def remove_code_cell_classes(
    element=None,
    doc: Doc | None = None,
) -> Div | None:
    """Remove code-cell classes from code-cell.

    Pandoc by default inject a CSS that indent class with code.
    Doing this will remove that from from output code cells,
    hence also remove that extra indentation.
    """
    if isinstance(element, Div) and CODE_CELL_CLASSES.issubset(set(element.classes)):
        return Div(
            *element.content,
            identifier=element.identifier,
            classes=[cls for cls in element.classes if cls not in CODE_CELL_CLASSES],
            attributes=element.attributes,
        )
    return None


[docs]def remove_cell_input_python(
    element=None,
    doc: Doc | None = None,
) -> list | None:
    """Delete ipynb cell input with python code."""
    if (
        isinstance(element, CodeBlock)
        and element.classes == ["python"]
        and CODE_CELL_CLASSES.issubset(set(element.parent.classes))
    ):
        return []
    else:
        return None


[docs]def main(doc: Doc | None = None) -> Any:
    """a pandoc filter converting math in code block.

    Fenced code block with class math will be runned using texp.
    """
    return run_filters(
        (
            convert_cell_output,
            remove_cell_input_python,
            remove_code_cell_classes,
        ),
        prepare=prepare_jupytext_metadata,
        doc=doc,
    )


if __name__ == "__main__":
    main()