Source code for yaml_provenance._yaml_dumper

"""
YAML dumper with provenance comments.

Provides ``dump_yaml`` which serialises a provenance-tracked configuration
to YAML with inline end-of-line comments recording the origin of each value.

The output format mirrors the approach in ``esm_parser.dict_to_yaml`` from
ESM-Tools, generalised to work without any ESM-Tools-specific dependencies.

Typical usage::

    from yaml_provenance import load_yaml, dump_yaml

    cfg = load_yaml("config.yaml")
    dump_yaml(cfg)                        # print to stdout
    dump_yaml(cfg, filepath="out.yaml")   # write to file

    from io import StringIO
    buf = StringIO()
    dump_yaml(cfg, stream=buf)
    print(buf.getvalue())
"""

import sys
from io import StringIO

from ruamel.yaml import YAML

from ._dict import DictWithProvenance
from ._helpers import clean_provenance
from ._list import ListWithProvenance


def _format_provenance_comment(provenance):
    """
    Format a provenance dict as a short comment string.

    Parameters
    ----------
    provenance : dict or None
        Provenance dict with keys ``yaml_file``, ``line``, ``col``, and
        optionally ``category`` and ``subcategory``.

    Returns
    -------
    str
        A human-readable comment string, or ``"no provenance"`` if
        ``provenance`` is ``None`` or empty.
    """
    if not provenance:
        return "no provenance"

    comment = (
        f"{provenance['yaml_file']},"
        f"line:{provenance['line']},"
        f"col:{provenance['col']}"
    )

    category = provenance.get("category")
    if category is not None:
        subcategory = provenance.get("subcategory")
        if subcategory is not None:
            comment += f",category:{category}/{subcategory}"
        else:
            comment += f",category:{category}"

    return comment


def _add_eol_comments(commented_config, config):
    """
    Recursively add end-of-line provenance comments to a ruamel.yaml
    ``CommentedMap`` / ``CommentedSeq``.

    Parameters
    ----------
    commented_config : CommentedMap or CommentedSeq
        The ruamel.yaml structure to annotate (modified in-place).
    config : DictWithProvenance or ListWithProvenance
        The provenance-tracked config to read provenance from.
    """
    if isinstance(commented_config, dict):
        for key, cvalue in commented_config.items():
            if not isinstance(config, dict):
                continue
            pvalue = config.get(key)
            if pvalue is None and key not in config:
                commented_config.yaml_add_eol_comment("no provenance", key)
                continue
            if isinstance(cvalue, (dict, list)):
                if isinstance(pvalue, (dict, list)):
                    _add_eol_comments(cvalue, pvalue)
            else:
                provenance = getattr(pvalue, "provenance", [None])[-1]
                comment = _format_provenance_comment(provenance)
                commented_config.yaml_add_eol_comment(comment, key)

    elif isinstance(commented_config, list):
        for indx, cvalue in enumerate(commented_config):
            if not isinstance(config, list) or indx >= len(config):
                continue
            pvalue = config[indx]
            if isinstance(cvalue, (dict, list)):
                if isinstance(pvalue, (dict, list)):
                    _add_eol_comments(cvalue, pvalue)
            else:
                provenance = getattr(pvalue, "provenance", [None])[-1]
                comment = _format_provenance_comment(provenance)
                commented_config.yaml_add_eol_comment(comment, indx)


[docs] def dump_yaml(config, filepath=None, stream=None): """ Dump a provenance-tracked config to YAML with end-of-line provenance comments. Each scalar value is annotated with an end-of-line comment showing the source file, line, and column where the value originated. Values added programmatically (without provenance) receive a ``# no provenance`` comment. Output priority: ``stream`` > ``filepath`` > stdout. Parameters ---------- config : DictWithProvenance or ListWithProvenance The provenance-tracked configuration to dump. filepath : str or Path or None Destination file path. Used when ``stream`` is not given. If both are ``None``, output goes to stdout. stream : file-like or None An output stream (e.g. ``StringIO``). Takes priority over ``filepath``. Useful for testing or in-memory processing. Examples -------- >>> from yaml_provenance import load_yaml, dump_yaml >>> cfg = load_yaml("config.yaml") >>> dump_yaml(cfg) # to stdout >>> dump_yaml(cfg, filepath="out.yaml") # to file >>> from io import StringIO >>> buf = StringIO() >>> dump_yaml(cfg, stream=buf) >>> print(buf.getvalue()) """ my_yaml = YAML() my_yaml.width = 10000 # Register representers so DictWithProvenance / ListWithProvenance are # serialised as plain YAML mappings/sequences rather than custom tags. def _dict_representer(dumper, obj): return dumper.represent_mapping("tag:yaml.org,2002:map", obj) def _list_representer(dumper, obj): return dumper.represent_sequence("tag:yaml.org,2002:seq", obj) my_yaml.representer.add_representer(DictWithProvenance, _dict_representer) my_yaml.representer.add_representer(ListWithProvenance, _list_representer) # Strip provenance wrappers to get plain Python values. config_clean = clean_provenance(config) # Dump to an intermediate string so we can reload into a CommentedMap # (ruamel.yaml's round-trip type), which supports adding EOL comments. intermediate = StringIO() my_yaml.dump(config_clean, intermediate) intermediate.seek(0) config_with_comments = my_yaml.load(intermediate) # Walk both structures simultaneously and attach provenance comments. _add_eol_comments(config_with_comments, config) if stream is not None: my_yaml.dump(config_with_comments, stream) elif filepath is not None: with open(filepath, "w") as f: my_yaml.dump(config_with_comments, f) else: my_yaml.dump(config_with_comments, sys.stdout)