levineuwirth.org/tools/extract-dimensions.py

#!/usr/bin/env python3
"""
extract-dimensions.py — Build-time pixel-dimension sidecar generator.

Walks @static/images/@ and @content/**@ for raster image files
(JPEG / PNG / GIF) and writes a @{image}.dims.yaml@ sidecar alongside
each one containing the file's pixel width and height. Consumed by
@build/Filters/Images.hs@, which attaches matching @width@ and
@height@ attributes to every <img> tag at compile time — preventing
cumulative layout shift while images load.

This is the body-image counterpart to @extract-exif.py@, which writes
photography-specific @{image}.exif.yaml@ sidecars (containing
dimensions plus camera / lens / etc.). The two complement each other:
photography templates read width / height through the EXIF sidecar
via @photographyCtx@; everything else (essay figures, blog images,
inline images) gets dimensions through @{image}.dims.yaml@ via the
filter.

Strategy:
  * Pillow's @Image.size@ is independent of EXIF, so synthetic
    images (ImageMagick gradients, GIMP exports) and EXIF-stripped
    JPEGs both yield correct dimensions.
  * Staleness check: skip when sidecar mtime > image mtime.
  * Per-image failures are logged and the walk continues; the build
    never fails on a dimensions extraction error.

Called by `make build` when .venv exists. Failures on individual
images are logged and the rest of the walk continues.
"""

from __future__ import annotations

import sys
from pathlib import Path
from typing import Any

import yaml

REPO_ROOT = Path(__file__).parent.parent

# Roots to walk. content/photography/ also gets visited (its photos
# become double-sidecared with both .exif.yaml and .dims.yaml) — that's
# harmless and keeps the contract uniform: "every raster file has a
# .dims.yaml". The few extra bytes of YAML are immaterial.
WALK_ROOTS = [
    REPO_ROOT / "static" / "images",
    REPO_ROOT / "content",
]

IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif"}


def _sidecar_path(image: Path) -> Path:
    return image.with_suffix(image.suffix + ".dims.yaml")


def _is_stale(image: Path, sidecar: Path) -> bool:
    if not sidecar.exists():
        return True
    return image.stat().st_mtime > sidecar.stat().st_mtime


def _atomic_write_yaml(path: Path, data: dict[str, Any]) -> None:
    tmp = path.with_suffix(path.suffix + ".tmp")
    with tmp.open("w", encoding="utf-8") as f:
        # Preserve a stable key order (width before height) so a manual
        # diff stays easy to read across regenerations.
        ordered = {k: data[k] for k in ("width", "height") if k in data}
        yaml.safe_dump(ordered, f, sort_keys=False, allow_unicode=True)
    tmp.replace(path)


def _read_dimensions(image: Path) -> dict[str, int]:
    from PIL import Image

    with Image.open(image) as img:
        width, height = img.size
        return {"width": int(width), "height": int(height)}


def _walk_one_root(root: Path, counters: dict[str, int]) -> None:
    if not root.exists():
        return
    for image in sorted(root.rglob("*")):
        if image.suffix.lower() not in IMAGE_EXTS:
            continue
        # Skip dotfiles, tmp files, and the .webp companions produced
        # by tools/convert-images.sh (their extension is .webp so they
        # already wouldn't match IMAGE_EXTS, but be explicit).
        if image.name.startswith(".") or image.name.endswith(".tmp"):
            continue

        sidecar = _sidecar_path(image)
        if not _is_stale(image, sidecar):
            counters["skipped"] += 1
            continue

        try:
            data = _read_dimensions(image)
        except Exception as e:  # noqa: BLE001 — keep walking
            print(f"extract-dimensions: {image}: {e}", file=sys.stderr)
            counters["failed"] += 1
            continue

        _atomic_write_yaml(sidecar, data)
        counters["written"] += 1


def main() -> int:
    counters = {"written": 0, "skipped": 0, "failed": 0}

    for root in WALK_ROOTS:
        _walk_one_root(root, counters)

    print(
        "extract-dimensions: "
        f"{counters['written']} written, "
        f"{counters['skipped']} skipped, "
        f"{counters['failed']} failed",
        file=sys.stderr,
    )
    return 0


if __name__ == "__main__":
    sys.exit(main())