#!/usr/bin/env python3 """Audit frontmatter marks (monograms + epistemic figures). Walks ``content/**/*.md``, resolves each piece's monogram candidate path, checks whether ``mark.svg`` exists and whether ``status:`` is set, and emits a table plus corpus-wide coverage percentages. Output is pure ASCII so it pipes / scrolls cleanly. Run as:: make audit-marks or directly via:: uv run python tools/audit-marks.py Exit code is always 0; this is a report tool, not a gate. The dual-form path resolver matches ``build/Marks.hs``: * ``content/essays/foo.md`` -> ``content/essays/foo.mark.svg`` * ``content/essays/foo/index.md`` -> ``content/essays/foo/mark.svg`` Photography is excluded: visual content doesn't carry monograms or epistemic figures by design (see PHOTOGRAPHY.md). """ from __future__ import annotations import sys from dataclasses import dataclass from pathlib import Path import yaml CONTENT_ROOT = Path("content") # Sections that ship marks by design — these get a coverage line in # the summary even when empty (so a regression is visible). Other # sections appear in the summary only when they contain pieces. PRIMARY_SECTIONS = ("essays", "blog", "poetry", "fiction", "music") # Excluded entirely: visual content (PHOTOGRAPHY.md), in-progress # drafts, and the per-portal tag-meta sidecar tree (which is metadata # infrastructure, not authored pieces). SKIPPED_DIRS = ("photography", "drafts", "tag-meta") @dataclass class AuditRow: """One row of audit output for a single source file.""" path: Path section: str has_monogram: bool has_status: bool @property def suggestion(self) -> str: actions = [] if not self.has_monogram: actions.append("add mark.svg") if not self.has_status: actions.append("set status:") return ", ".join(actions) def parse_frontmatter(md_path: Path) -> dict: """Extract the YAML frontmatter block from a Markdown file. Returns an empty dict on parse failure or when no frontmatter is present. Errors are non-fatal — the audit reports what it can.""" try: text = md_path.read_text(encoding="utf-8", errors="replace") except OSError: return {} if not text.startswith("---"): return {} end = text.find("\n---", 3) if end == -1: return {} fm_block = text[3:end] try: data = yaml.safe_load(fm_block) except yaml.YAMLError: return {} return data if isinstance(data, dict) else {} def monogram_path(md_path: Path) -> Path: """Resolve the candidate ``mark.svg`` path for a Markdown source. Mirrors ``Marks.monogramCandidates`` in build/Marks.hs.""" if md_path.name == "index.md": return md_path.parent / "mark.svg" return md_path.with_suffix(".mark.svg") def section_of(path: Path) -> str: """Bucket a content path under its top-level section name. Returns ``"standalone"`` for files directly under ``content/``.""" rel = path.relative_to(CONTENT_ROOT) if len(rel.parts) == 1: return "standalone" return rel.parts[0] def collect() -> list[AuditRow]: """Walk content/ and return one AuditRow per published source file.""" rows: list[AuditRow] = [] for md_path in CONTENT_ROOT.rglob("*.md"): rel = md_path.relative_to(CONTENT_ROOT) if rel.parts and rel.parts[0] in SKIPPED_DIRS: continue # Skip tag-meta sidecars (they're not authored pages). if md_path.name == "_tag-meta.md": continue fm = parse_frontmatter(md_path) rows.append( AuditRow( path=md_path, section=section_of(md_path), has_monogram=monogram_path(md_path).is_file(), has_status="status" in fm and bool(str(fm["status"]).strip()), ) ) rows.sort( key=lambda r: ( r.section != "standalone", # standalone last r.section, not r.has_status, not r.has_monogram, str(r.path), ) ) return rows def fmt_check(present: bool) -> str: return "OK" if present else "--" def render_table(rows: list[AuditRow]) -> None: if not rows: print("No content files found under content/.") return path_w = max(len(str(r.path)) for r in rows) path_w = min(path_w, 60) # cap so suggestions stay on the same line header = f"{'PATH':<{path_w}} {'MONO':<5} {'EPIS':<5} SUGGESTION" print(header) print("-" * len(header)) current_section = None for r in rows: if r.section != current_section: current_section = r.section print(f"\n# {current_section}") path_str = str(r.path) if len(path_str) > path_w: path_str = path_str[: path_w - 1] + "..." print( f"{path_str:<{path_w}} " f"{fmt_check(r.has_monogram):<5} " f"{fmt_check(r.has_status):<5} " f"{r.suggestion}" ) def render_summary(rows: list[AuditRow]) -> None: print() print("# Coverage") print("-" * 60) by_section: dict[str, list[AuditRow]] = {} for r in rows: by_section.setdefault(r.section, []).append(r) def line(label: str, group: list[AuditRow]) -> None: n = len(group) if n == 0: return m = sum(1 for r in group if r.has_monogram) e = sum(1 for r in group if r.has_status) print( f"{label:<14} {n:>3} pieces " f"monogram {m:>3}/{n:<3} ({m * 100 // n:>3}%) " f"epistemic {e:>3}/{n:<3} ({e * 100 // n:>3}%)" ) rendered: set[str] = set() for section in PRIMARY_SECTIONS: if section in by_section: line(section, by_section[section]) rendered.add(section) other_sections = sorted(s for s in by_section if s not in rendered) for section in other_sections: line(section, by_section[section]) print("-" * 60) line("total", rows) def main() -> int: if not CONTENT_ROOT.is_dir(): print(f"error: {CONTENT_ROOT}/ not found (run from repo root)", file=sys.stderr) return 1 rows = collect() render_table(rows) render_summary(rows) return 0 if __name__ == "__main__": raise SystemExit(main())