add Provenance subobject and LoadedModel

Captures the MeTRAbs SHA-256 and filename plus tensorflow / tensorflow-metal / numpy / neuropose / python versions, and reserves slots for seed, deterministic, and analysis_config. Populated automatically by Estimator.process_video when the model was loaded via load_model; propagates into JobResults and BenchmarkResult via the existing output path. None on the injected-model test path where no SHA is known. _model.load_metrabs_model now returns a LoadedModel dataclass so the estimator can bundle the TF handle with the pinned SHA without re-hashing the tarball on every daemon startup. All test fakes and the integration smoke tests updated to unwrap .model. Bumps the optional schema_version field on VideoPredictions and BenchmarkResult to default=CURRENT_VERSION so fresh writes stamp the latest version; legacy payloads without it are migrated on load via the chain registered in the previous commit.
2026-04-18 17:10:52 -04:00 · 2026-04-18 17:10:52 -04:00 · fe8e417aa0
parent 9c549fd9e2
commit fe8e417aa0
10 changed files with 549 additions and 31 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -222,6 +222,23 @@ be split into per-release sections once tagging begins.
  at `CURRENT_VERSION = 2`, with registered v1 → v2 migrations for
  `VideoPredictions` and `BenchmarkResult` that add the optional
  `provenance` field.
+- **`neuropose.io.Provenance`** — reproducibility envelope for every
+  inference run. Populated automatically by `Estimator.process_video`
+  when the model was loaded via `load_model` (the production path)
+  and attached to the output `VideoPredictions`; propagates from
+  there into `JobResults` (per-video) and `BenchmarkResult` (via the
+  benchmark loop). Captures the MeTRAbs artifact SHA-256 and
+  filename, `tensorflow` / `tensorflow-metal` / `numpy` /
+  `neuropose` / Python versions, and reserved slots for a `seed`,
+  `deterministic` flag (Track 2), and `analysis_config` (Phase 0
+  YAML pipeline). `None` on the injected-model test path where
+  NeuroPose has no way to fingerprint the supplied artifact. Frozen
+  pydantic model with `extra="forbid"` and
+  `protected_namespaces=()` so the `model_*` field names do not
+  collide with pydantic v2's internal namespace. `_model.load_metrabs_model`
+  now returns a `LoadedModel` dataclass bundling the TF handle with
+  the pinned SHA and filename so the estimator can build the
+  `Provenance` without re-hashing the tarball.
 - **`neuropose.benchmark`** — multi-pass inference benchmarking for
  a single video. `run_benchmark()` runs `process_video` N times
  (default 5), always discards the first pass as warmup (graph
--- a/src/neuropose/_model.py
+++ b/src/neuropose/_model.py
@ -41,11 +41,33 @@ import os
 import shutil
 import tarfile
 import urllib.request
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any

 logger = logging.getLogger(__name__)

+
+@dataclass(frozen=True)
+class LoadedModel:
+    """Result of :func:`load_metrabs_model`.
+
+    Bundles the loaded TensorFlow model with the provenance metadata
+    that identifies which artifact it came from. Callers that only want
+    the model reach for :attr:`model`; callers that build a
+    :class:`~neuropose.io.Provenance` (primarily
+    :class:`~neuropose.estimator.Estimator`) pull :attr:`sha256` and
+    :attr:`filename` too.
+
+    Frozen — once :func:`load_metrabs_model` has produced a
+    ``LoadedModel``, nothing downstream should edit the identity of
+    the artifact it describes.
+    """
+
+    model: Any
+    sha256: str
+    filename: str
+
 # ---------------------------------------------------------------------------
 # Model artifact: pinned URL and checksum.
 # ---------------------------------------------------------------------------
@ -74,7 +96,7 @@ _REQUIRED_MODEL_ATTRS = (
 # ---------------------------------------------------------------------------


-def load_metrabs_model(cache_dir: Path | None = None) -> Any:
+def load_metrabs_model(cache_dir: Path | None = None) -> LoadedModel:
    """Load the MeTRAbs model, downloading and caching on first use.

    Parameters
@ -87,9 +109,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:

    Returns
    -------
-    object
-        A TensorFlow SavedModel handle exposing ``detect_poses`` and
-        the ``per_skeleton_joint_names`` / ``per_skeleton_joint_edges``
+    LoadedModel
+        Bundle containing the TensorFlow SavedModel handle alongside
+        the pinned artifact SHA-256 and filename that identify which
+        model the handle came from. The handle exposes ``detect_poses``
+        and the ``per_skeleton_joint_names`` / ``per_skeleton_joint_edges``
        attributes used by :class:`neuropose.estimator.Estimator`.

    Raises
@ -99,6 +123,18 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
        automatic retry), extraction fails, TensorFlow is not
        installed, or the loaded model does not expose the expected
        interface.
+
+    Notes
+    -----
+    The returned ``sha256`` is the module-pinned :data:`_MODEL_SHA256`,
+    not a re-hash of the on-disk tarball. On the cold-cache path this
+    is exactly the hash we verified against before loading. On the
+    warm-cache path the tarball is not re-verified (that would cost a
+    2 GB I/O pass on every daemon startup), so the reported SHA is an
+    attestation of "this is the pinned artifact NeuroPose loads" rather
+    than a direct fingerprint of the on-disk bytes. For the threat
+    model this supports — reproducibility, not tamper-evidence — that
+    is the correct semantics.
    """
    resolved_cache = Path(cache_dir) if cache_dir is not None else _default_cache_dir()
    resolved_cache.mkdir(parents=True, exist_ok=True)
@ -115,7 +151,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
            )
            shutil.rmtree(model_dir, ignore_errors=True)
        else:
-            return _tf_load(saved_model_dir)
+            return LoadedModel(
+                model=_tf_load(saved_model_dir),
+                sha256=_MODEL_SHA256,
+                filename=_MODEL_ARCHIVE_NAME,
+            )

    tarball = resolved_cache / _MODEL_ARCHIVE_NAME

@ -135,7 +175,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:

    _extract_tarball(tarball, model_dir)
    saved_model_dir = _find_saved_model(model_dir)
-    return _tf_load(saved_model_dir)
+    return LoadedModel(
+        model=_tf_load(saved_model_dir),
+        sha256=_MODEL_SHA256,
+        filename=_MODEL_ARCHIVE_NAME,
+    )


 # ---------------------------------------------------------------------------
--- a/src/neuropose/benchmark.py
+++ b/src/neuropose/benchmark.py
@ -105,9 +105,17 @@ def run_benchmark(

    passes: list[PerformanceMetrics] = []
    reference_predictions: VideoPredictions | None = None
+    # Provenance is identical across every pass of a single run (same
+    # estimator, same model, same environment), so we keep just the
+    # latest one we see. Doing this on every iteration is cheap — it's
+    # one attribute read — and means the benchmark result carries
+    # provenance even when ``capture_reference`` is off.
+    latest_provenance = None
    for i in range(repeats):
        result = estimator.process_video(video_path)
        passes.append(result.metrics)
+        if result.predictions.provenance is not None:
+            latest_provenance = result.predictions.provenance
        # Only the *last* measured pass needs to be captured for
        # divergence comparison. Earlier passes would just be
        # overwritten, so we avoid holding their frame dicts in memory.
@ -122,6 +130,7 @@ def run_benchmark(
        warmup_pass=passes[0],
        measured_passes=passes[1:],
        aggregate=aggregate,
+        provenance=latest_provenance,
    )
    return BenchmarkRunOutcome(
        result=benchmark_result,
--- a/src/neuropose/estimator.py
+++ b/src/neuropose/estimator.py
@ -34,19 +34,25 @@ model is present raises :class:`ModelNotLoadedError`.
 from __future__ import annotations

 import logging
+import sys
 import time
 from collections.abc import Callable
 from dataclasses import dataclass, field
+from importlib.metadata import PackageNotFoundError
+from importlib.metadata import version as _pkg_version
 from pathlib import Path
 from typing import Any

 import cv2
+import numpy as np
 import psutil

+from neuropose import __version__ as _neuropose_version
 from neuropose._model import load_metrabs_model
 from neuropose.io import (
    FramePrediction,
    PerformanceMetrics,
+    Provenance,
    VideoMetadata,
    VideoPredictions,
 )
@ -158,6 +164,12 @@ class Estimator:
        # successful ``load_model`` below so the next ``process_video`` can
        # pass the real number through into ``PerformanceMetrics``.
        self._model_load_seconds: float | None = None
+        # MeTRAbs artifact identity, set only by ``load_model``. When the
+        # model was injected via the constructor we have no way to
+        # fingerprint it, so these remain ``None`` and ``process_video``
+        # leaves the output's ``provenance`` as ``None`` too.
+        self._model_sha256: str | None = None
+        self._model_filename: str | None = None

    # -- model lifecycle ----------------------------------------------------

@ -176,6 +188,21 @@ class Estimator:
        """Return ``True`` if a model has been supplied or loaded."""
        return self._model is not None

+    @property
+    def model_sha256(self) -> str | None:
+        """Return the SHA-256 of the loaded MeTRAbs artifact, or ``None``.
+
+        ``None`` when the model was injected via ``Estimator(model=...)``
+        rather than loaded via :meth:`load_model`. The value, when
+        present, is the module-pinned SHA from :mod:`neuropose._model`.
+        """
+        return self._model_sha256
+
+    @property
+    def model_filename(self) -> str | None:
+        """Return the basename of the MeTRAbs artifact, or ``None`` if injected."""
+        return self._model_filename
+
    def load_model(self, cache_dir: Path | None = None) -> None:
        """Load the MeTRAbs model via :func:`neuropose._model.load_metrabs_model`.

@ -196,9 +223,16 @@ class Estimator:
            return
        logger.info("Loading MeTRAbs model (cache_dir=%s)", cache_dir)
        start = time.perf_counter()
-        self._model = load_metrabs_model(cache_dir=cache_dir)
+        loaded = load_metrabs_model(cache_dir=cache_dir)
        self._model_load_seconds = time.perf_counter() - start
-        logger.info("MeTRAbs model loaded in %.2f s", self._model_load_seconds)
+        self._model = loaded.model
+        self._model_sha256 = loaded.sha256
+        self._model_filename = loaded.filename
+        logger.info(
+            "MeTRAbs model loaded in %.2f s (sha256=%s)",
+            self._model_load_seconds,
+            loaded.sha256[:12],
+        )

    # -- inference ----------------------------------------------------------

@ -330,11 +364,53 @@ class Estimator:
            metrics.active_device,
        )

-        predictions = VideoPredictions(metadata=metadata, frames=frames)
+        provenance = self._build_provenance(device_info=device_info)
+        predictions = VideoPredictions(
+            metadata=metadata,
+            frames=frames,
+            provenance=provenance,
+        )
        return ProcessVideoResult(predictions=predictions, metrics=metrics)

    # -- internals ----------------------------------------------------------

+    def _build_provenance(self, *, device_info: _ActiveDeviceInfo) -> Provenance | None:
+        """Construct a :class:`~neuropose.io.Provenance` for the current run.
+
+        Returns ``None`` when the model was injected via the constructor
+        rather than loaded via :meth:`load_model` — in that case we
+        cannot fingerprint the artifact, and a partial provenance would
+        mislead readers into thinking we could.
+
+        The device-info bundle is shared with the :class:`PerformanceMetrics`
+        construction (one call to :func:`_detect_active_device` per
+        ``process_video`` invocation) so that both artifacts see
+        identical TF and Metal state.
+        """
+        if self._model_sha256 is None or self._model_filename is None:
+            return None
+
+        metal_version: str | None = None
+        if device_info.metal_active:
+            try:
+                metal_version = _pkg_version("tensorflow-metal")
+            except PackageNotFoundError:
+                metal_version = None
+
+        python_version = (
+            f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
+        )
+
+        return Provenance(
+            model_sha256=self._model_sha256,
+            model_filename=self._model_filename,
+            tensorflow_version=device_info.tf_version,
+            tensorflow_metal_version=metal_version,
+            numpy_version=np.__version__,
+            neuropose_version=_neuropose_version,
+            python_version=python_version,
+        )
+
    def _infer_frame(
        self,
        model: Any,
--- a/src/neuropose/io.py
+++ b/src/neuropose/io.py
@ -10,6 +10,14 @@ Atomicity: :func:`save_status`, :func:`save_job_results`, and
 atomically rename, so a crash mid-write will not leave a partially-written
 file behind. This matches the crash-resilience guarantee the interfacer
 daemon makes to callers.
+
+Schema versioning: :class:`VideoPredictions` and :class:`BenchmarkResult`
+each carry a ``schema_version`` integer. On load, the raw JSON dict is
+passed through :mod:`neuropose.migrations` before pydantic validation so
+that files written by earlier versions upgrade transparently. :class:`JobResults`
+is a ``RootModel`` with no envelope of its own, so its loader runs the
+per-video migration on each entry of its mapping. See
+:mod:`neuropose.migrations` for the migration-registration pattern.
 """

 from __future__ import annotations
@ -23,6 +31,13 @@ from typing import Annotated, Any, Literal

 from pydantic import BaseModel, ConfigDict, Field, RootModel, model_validator

+from neuropose.migrations import (
+    CURRENT_VERSION,
+    migrate_benchmark_result,
+    migrate_job_results,
+    migrate_video_predictions,
+)
+

 class JobStatus(StrEnum):
    """Lifecycle state of a single processing job."""
@ -157,6 +172,104 @@ class PerformanceMetrics(BaseModel):
    )


+class Provenance(BaseModel):
+    """Reproducibility-grade record of the environment that produced a payload.
+
+    Populated by the estimator on every inference run when the MeTRAbs
+    model was loaded through
+    :meth:`neuropose.estimator.Estimator.load_model` (the production
+    path). ``None`` when the model was injected directly via the
+    ``Estimator(model=...)`` constructor (the test-fixture path), since
+    NeuroPose has no way to fingerprint a model it did not load itself.
+
+    Paper C's reproducibility story rests on this envelope: two runs
+    that produced equal ``Provenance`` objects against the same input
+    are expected to produce equal output (modulo non-determinism
+    controlled by ``deterministic``). Reviewers who want to re-derive a
+    figure from raw video need exactly these fields.
+
+    Frozen so a captured ``Provenance`` cannot be mutated after it has
+    been attached to a result; this matches the invariant that
+    provenance is a property of the run, not of the reader.
+
+    ``protected_namespaces=()`` silences pydantic's ``model_*`` field
+    warning — the ``model_sha256`` / ``model_filename`` names refer to
+    the MeTRAbs model artifact, not to pydantic's internal
+    ``model_validate`` / ``model_dump`` namespace, so the collision is
+    cosmetic.
+    """
+
+    model_config = ConfigDict(extra="forbid", frozen=True, protected_namespaces=())
+
+    model_sha256: str = Field(
+        description=(
+            "SHA-256 of the MeTRAbs model tarball (hex-encoded, lowercase). "
+            "Pinned at build time in :mod:`neuropose._model` and verified on "
+            "first download. Identifies the exact model weights used."
+        ),
+    )
+    model_filename: str = Field(
+        description=(
+            "Canonical basename of the MeTRAbs tarball, e.g. "
+            "``metrabs_eff2l_y4_384px_800k_28ds.tar.gz``. Human-readable "
+            "companion to ``model_sha256``."
+        ),
+    )
+    tensorflow_version: str = Field(
+        description="Value of ``tensorflow.__version__`` at the time of the run.",
+    )
+    tensorflow_metal_version: str | None = Field(
+        default=None,
+        description=(
+            "Version of the ``tensorflow-metal`` PyPI package when installed; "
+            "``None`` on platforms without Metal GPU acceleration."
+        ),
+    )
+    numpy_version: str = Field(
+        description="Value of ``numpy.__version__`` at the time of the run.",
+    )
+    neuropose_version: str = Field(
+        description="Value of ``neuropose.__version__`` at the time of the run.",
+    )
+    python_version: str = Field(
+        description=(
+            "Python version as ``MAJOR.MINOR.MICRO``, e.g. ``3.11.14``. The "
+            "full ``sys.version`` string is intentionally not captured; the "
+            "three-component form is stable across patch builds and avoids "
+            "embedding compiler and build-date metadata."
+        ),
+    )
+    seed: int | None = Field(
+        default=None,
+        description=(
+            "Random seed used for the run if one was set, else ``None``. "
+            "MeTRAbs inference is deterministic on a given device up to "
+            "floating-point associativity, so seeding mostly matters for "
+            "downstream analysis that introduces randomness (bootstraps, "
+            "learned metrics)."
+        ),
+    )
+    deterministic: bool = Field(
+        default=False,
+        description=(
+            "``True`` if ``tf.config.experimental.enable_op_determinism()`` "
+            "was active during the run. Track 2 deterministic-inference "
+            "mode; the field exists in Phase 0 so payloads can record "
+            "whether the run *was* deterministic without requiring a "
+            "schema change when the toggle lands."
+        ),
+    )
+    analysis_config: dict[str, Any] | None = Field(
+        default=None,
+        description=(
+            "Parsed YAML dict if this payload was produced by ``neuropose "
+            "analyze --config <file>``. ``None`` for direct-library or "
+            "``neuropose watch`` invocations. Reserved for the Phase 0 "
+            "YAML-configurable analysis pipeline."
+        ),
+    )
+
+
 class BenchmarkAggregate(BaseModel):
    """Distributional statistics aggregated across benchmark passes.

@ -255,6 +368,16 @@ class BenchmarkResult(BaseModel):

    model_config = ConfigDict(extra="forbid", frozen=True)

+    schema_version: int = Field(
+        default=CURRENT_VERSION,
+        ge=1,
+        description=(
+            "Schema version of this BenchmarkResult payload. Fresh writes "
+            "stamp :data:`neuropose.migrations.CURRENT_VERSION`; older files "
+            "are migrated on load via :mod:`neuropose.migrations` before "
+            "pydantic validation."
+        ),
+    )
    video_name: str = Field(
        description="Basename of the benchmarked video (no directory components).",
    )
@ -280,6 +403,14 @@ class BenchmarkResult(BaseModel):
    )
    aggregate: BenchmarkAggregate
    cpu_comparison: CpuComparisonResult | None = None
+    provenance: Provenance | None = Field(
+        default=None,
+        description=(
+            "Reproducibility envelope from the benchmark run. ``None`` on "
+            "tests where the model was injected directly via "
+            "``Estimator(model=...)``."
+        ),
+    )


 class JointAxisExtractor(BaseModel):
@ -469,9 +600,30 @@ class VideoPredictions(BaseModel):

    model_config = ConfigDict(extra="forbid", frozen=True)

+    schema_version: int = Field(
+        default=CURRENT_VERSION,
+        ge=1,
+        description=(
+            "Schema version of this VideoPredictions payload. Fresh writes "
+            "stamp :data:`neuropose.migrations.CURRENT_VERSION`; files written "
+            "by older NeuroPose versions are migrated to the current version "
+            "by :mod:`neuropose.migrations` before pydantic validation."
+        ),
+    )
    metadata: VideoMetadata
    frames: dict[str, FramePrediction]
    segmentations: dict[str, Segmentation] = Field(default_factory=dict)
+    provenance: Provenance | None = Field(
+        default=None,
+        description=(
+            "Reproducibility envelope populated by the estimator on runs "
+            "where the MeTRAbs model was loaded via "
+            ":meth:`neuropose.estimator.Estimator.load_model`. ``None`` on "
+            "test paths where the model was injected via "
+            "``Estimator(model=...)``, because no model SHA is known in "
+            "that case."
+        ),
+    )

    def frame_names(self) -> list[str]:
        """Return frame identifiers in insertion order."""
@ -623,9 +775,16 @@ class StatusFile(RootModel[dict[str, JobStatusEntry]]):


 def load_video_predictions(path: Path) -> VideoPredictions:
-    """Load and validate a per-video predictions JSON file."""
+    """Load and validate a per-video predictions JSON file.
+
+    Runs the payload through :func:`neuropose.migrations.migrate_video_predictions`
+    before pydantic validation so files written by older NeuroPose versions
+    upgrade to the current schema transparently.
+    """
    with path.open("r", encoding="utf-8") as f:
        data: Any = json.load(f)
+    if isinstance(data, dict):
+        data = migrate_video_predictions(data)
    return VideoPredictions.model_validate(data)


@ -636,9 +795,17 @@ def save_video_predictions(path: Path, predictions: VideoPredictions) -> None:


 def load_job_results(path: Path) -> JobResults:
-    """Load and validate an aggregated per-job results JSON file."""
+    """Load and validate an aggregated per-job results JSON file.
+
+    Runs each video's payload through
+    :func:`neuropose.migrations.migrate_video_predictions` before pydantic
+    validation. :class:`JobResults` is a ``RootModel`` with no envelope of
+    its own, so migration happens per-entry rather than at the top level.
+    """
    with path.open("r", encoding="utf-8") as f:
        data: Any = json.load(f)
+    if isinstance(data, dict):
+        data = migrate_job_results(data)
    return JobResults.model_validate(data)


@ -649,9 +816,16 @@ def save_job_results(path: Path, results: JobResults) -> None:


 def load_benchmark_result(path: Path) -> BenchmarkResult:
-    """Load and validate a benchmark-result JSON file."""
+    """Load and validate a benchmark-result JSON file.
+
+    Runs the payload through :func:`neuropose.migrations.migrate_benchmark_result`
+    before pydantic validation so files written by older NeuroPose versions
+    upgrade transparently.
+    """
    with path.open("r", encoding="utf-8") as f:
        data: Any = json.load(f)
+    if isinstance(data, dict):
+        data = migrate_benchmark_result(data)
    return BenchmarkResult.model_validate(data)


--- a/tests/integration/test_estimator_smoke.py
+++ b/tests/integration/test_estimator_smoke.py
@ -81,26 +81,29 @@ class TestMetrabsLoader:
    """Exercises the loader's download → verify → extract → load path."""

    def test_download_and_load(self, shared_model_cache_dir: Path) -> None:
-        model = load_metrabs_model(cache_dir=shared_model_cache_dir)
-        assert model is not None
+        loaded = load_metrabs_model(cache_dir=shared_model_cache_dir)
+        assert loaded.model is not None
+        assert loaded.sha256
+        assert loaded.filename
        for attr in ("detect_poses", "per_skeleton_joint_names", "per_skeleton_joint_edges"):
-            assert hasattr(model, attr), f"loaded model is missing {attr}"
+            assert hasattr(loaded.model, attr), f"loaded model is missing {attr}"

    def test_second_call_uses_cache(self, shared_model_cache_dir: Path) -> None:
        """Idempotent: second call should return the cached model cheaply."""
-        model_a = load_metrabs_model(cache_dir=shared_model_cache_dir)
-        model_b = load_metrabs_model(cache_dir=shared_model_cache_dir)
+        loaded_a = load_metrabs_model(cache_dir=shared_model_cache_dir)
+        loaded_b = load_metrabs_model(cache_dir=shared_model_cache_dir)
        # tf.saved_model.load returns a new Python object each call, so
        # identity comparison doesn't work — but both should still
-        # expose the MeTRAbs interface.
-        assert hasattr(model_a, "detect_poses")
-        assert hasattr(model_b, "detect_poses")
+        # expose the MeTRAbs interface, and the SHA should match.
+        assert hasattr(loaded_a.model, "detect_poses")
+        assert hasattr(loaded_b.model, "detect_poses")
+        assert loaded_a.sha256 == loaded_b.sha256

    def test_berkeley_mhad_skeleton_is_present(self, shared_model_cache_dir: Path) -> None:
        """The estimator pins skeleton='berkeley_mhad_43'; verify it exists."""
-        model = load_metrabs_model(cache_dir=shared_model_cache_dir)
-        joint_names = model.per_skeleton_joint_names["berkeley_mhad_43"]
-        joint_edges = model.per_skeleton_joint_edges["berkeley_mhad_43"]
+        loaded = load_metrabs_model(cache_dir=shared_model_cache_dir)
+        joint_names = loaded.model.per_skeleton_joint_names["berkeley_mhad_43"]
+        joint_edges = loaded.model.per_skeleton_joint_edges["berkeley_mhad_43"]
        # MeTRAbs exposes these as tf.Tensor objects; just verify we
        # can pull a shape out.
        assert joint_names.shape[0] == 43
--- a/tests/integration/test_joint_names_drift.py
+++ b/tests/integration/test_joint_names_drift.py
@ -50,8 +50,8 @@ def test_joint_names_match_pinned_model(metrabs_model_cache_dir: Path) -> None:
       commit that bumps the model pin in :mod:`neuropose._model`.
    2. Cross-check any CLI or docs that embed hardcoded joint names.
    """
-    model = load_metrabs_model(cache_dir=metrabs_model_cache_dir)
-    tensor = model.per_skeleton_joint_names["berkeley_mhad_43"]
+    loaded = load_metrabs_model(cache_dir=metrabs_model_cache_dir)
+    tensor = loaded.model.per_skeleton_joint_names["berkeley_mhad_43"]
    model_names = tuple(tensor.numpy().astype(str).tolist())
    assert model_names == JOINT_NAMES, (
        "JOINT_NAMES drift detected — the hardcoded tuple in "
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@ -683,9 +683,15 @@ def stub_estimator_with_metrics(monkeypatch: pytest.MonkeyPatch):
                "poses2d": np.array([[[0.0, 0.0], [1.0, 1.0]]]),
            }

-    def fake_loader(cache_dir: Path | None = None) -> object:
+    from neuropose._model import LoadedModel
+
+    def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
        del cache_dir
-        return RecordingFake()
+        return LoadedModel(
+            model=RecordingFake(),
+            sha256="smoke_sha",
+            filename="metrabs_smoke.tar.gz",
+        )

    monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)

--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@ -70,17 +70,21 @@ class TestModelGuard:
        network: the loader is monkeypatched to return a sentinel, and we
        assert it ends up as the estimator's model.
        """
+        from neuropose._model import LoadedModel
+
        sentinel = object()
        called_with: list[Path | None] = []

-        def fake_loader(cache_dir: Path | None = None) -> object:
+        def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
            called_with.append(cache_dir)
-            return sentinel
+            return LoadedModel(model=sentinel, sha256="deadbeef", filename="fake.tar.gz")

        monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
        estimator = Estimator()
        estimator.load_model(cache_dir=Path("/tmp/fake-cache"))
        assert estimator.model is sentinel
+        assert estimator.model_sha256 == "deadbeef"
+        assert estimator.model_filename == "fake.tar.gz"
        assert called_with == [Path("/tmp/fake-cache")]

    def test_load_model_is_idempotent_when_already_loaded(
@ -278,9 +282,15 @@ class TestPerformanceMetrics:
                    "poses2d": np.array([[[0.0, 0.0]]]),
                }

-        def fake_loader(cache_dir: Path | None = None) -> object:
+        from neuropose._model import LoadedModel
+
+        def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
            del cache_dir
-            return Recorder()
+            return LoadedModel(
+                model=Recorder(),
+                sha256="fake_sha",
+                filename="metrabs_fake.tar.gz",
+            )

        monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
        estimator = Estimator()
@ -312,6 +322,88 @@ class TestPerformanceMetrics:
        assert result.metrics.tensorflow_version not in {"", "unknown"}


+class TestProvenance:
+    """Provenance attachment to VideoPredictions.
+
+    Covers the two relevant paths: the injected-model path (no SHA
+    known → ``provenance=None`` on output) and the ``load_model`` path
+    (SHA is known → full ``Provenance`` populated and attached).
+    """
+
+    def test_injected_model_produces_no_provenance(
+        self,
+        synthetic_video: Path,
+        fake_metrabs_model,
+    ) -> None:
+        estimator = Estimator(model=fake_metrabs_model)
+        result = estimator.process_video(synthetic_video)
+        assert result.predictions.provenance is None
+        assert estimator.model_sha256 is None
+        assert estimator.model_filename is None
+
+    def test_loaded_model_populates_provenance(
+        self,
+        synthetic_video: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        import numpy as np
+
+        from neuropose._model import LoadedModel
+
+        class Recorder:
+            def detect_poses(self, image, **kwargs):
+                del image, kwargs
+                return {
+                    "boxes": np.array([[0.0, 0.0, 1.0, 1.0, 0.9]]),
+                    "poses3d": np.array([[[0.0, 0.0, 0.0]]]),
+                    "poses2d": np.array([[[0.0, 0.0]]]),
+                }
+
+        def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
+            del cache_dir
+            return LoadedModel(
+                model=Recorder(),
+                sha256="e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
+                filename="metrabs_stub.tar.gz",
+            )
+
+        monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
+        estimator = Estimator()
+        estimator.load_model()
+        result = estimator.process_video(synthetic_video)
+
+        prov = result.predictions.provenance
+        assert prov is not None
+        assert prov.model_sha256.startswith("e3b0c44")
+        assert prov.model_filename == "metrabs_stub.tar.gz"
+        assert prov.numpy_version == np.__version__
+        assert prov.python_version.count(".") == 2  # MAJOR.MINOR.MICRO
+        # neuropose_version should match the package's __version__
+        from neuropose import __version__ as pkg_version
+
+        assert prov.neuropose_version == pkg_version
+        # tensorflow_version should also be real (TF is in dev deps).
+        assert prov.tensorflow_version not in {"", "unknown"}
+
+    def test_model_sha256_and_filename_properties_after_load(
+        self,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        from neuropose._model import LoadedModel
+
+        def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
+            del cache_dir
+            return LoadedModel(model=object(), sha256="abcd", filename="x.tar.gz")
+
+        monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
+        estimator = Estimator()
+        assert estimator.model_sha256 is None
+        assert estimator.model_filename is None
+        estimator.load_model()
+        assert estimator.model_sha256 == "abcd"
+        assert estimator.model_filename == "x.tar.gz"
+
+
 class TestErrors:
    def test_missing_video(
        self,
--- a/tests/unit/test_io.py
+++ b/tests/unit/test_io.py
@ -22,6 +22,7 @@ from neuropose.io import (
    JointPairDistanceExtractor,
    JointSpeedExtractor,
    PerformanceMetrics,
+    Provenance,
    Segment,
    Segmentation,
    SegmentationConfig,
@ -278,6 +279,102 @@ class TestPerformanceMetricsModel:
            m.total_seconds = 2.0


+def _minimal_provenance() -> Provenance:
+    return Provenance(
+        model_sha256="a" * 64,
+        model_filename="metrabs_fake.tar.gz",
+        tensorflow_version="2.18.1",
+        numpy_version="2.0.2",
+        neuropose_version="0.1.0.dev0",
+        python_version="3.11.14",
+    )
+
+
+class TestProvenanceModel:
+    """Schema-level behaviour of :class:`neuropose.io.Provenance`."""
+
+    def test_roundtrip_through_json(self) -> None:
+        p = Provenance(
+            model_sha256="a" * 64,
+            model_filename="metrabs_fake.tar.gz",
+            tensorflow_version="2.18.1",
+            tensorflow_metal_version="1.2.0",
+            numpy_version="2.0.2",
+            neuropose_version="0.1.0.dev0",
+            python_version="3.11.14",
+            seed=42,
+            deterministic=True,
+            analysis_config={"step": "dtw", "nan_policy": "propagate"},
+        )
+        rehydrated = Provenance.model_validate(p.model_dump(mode="json"))
+        assert rehydrated == p
+
+    def test_optional_fields_default_to_none_and_false(self) -> None:
+        p = _minimal_provenance()
+        assert p.tensorflow_metal_version is None
+        assert p.seed is None
+        assert p.deterministic is False
+        assert p.analysis_config is None
+
+    def test_is_frozen(self) -> None:
+        p = _minimal_provenance()
+        with pytest.raises(ValidationError):
+            p.model_sha256 = "different"
+
+    def test_extra_fields_forbidden(self) -> None:
+        # Construct via model_validate so pyright doesn't have to prove the
+        # keyword doesn't exist on the class at static-type time.
+        with pytest.raises(ValidationError):
+            Provenance.model_validate(
+                {
+                    "model_sha256": "x" * 64,
+                    "model_filename": "x.tar.gz",
+                    "tensorflow_version": "2.18",
+                    "numpy_version": "2.0",
+                    "neuropose_version": "0.1",
+                    "python_version": "3.11.14",
+                    "unknown_field": "bogus",
+                }
+            )
+
+
+class TestVideoPredictionsProvenance:
+    """``provenance`` field on :class:`VideoPredictions` round-trips."""
+
+    def test_default_is_none(self) -> None:
+        vp = VideoPredictions(
+            metadata=VideoMetadata(frame_count=0, fps=30.0, width=32, height=32),
+            frames={},
+        )
+        assert vp.provenance is None
+
+    def test_roundtrip_with_provenance(self, tmp_path: Path) -> None:
+        prov = Provenance(
+            model_sha256="f" * 64,
+            model_filename="metrabs.tar.gz",
+            tensorflow_version="2.18.1",
+            numpy_version="2.0.2",
+            neuropose_version="0.1.0.dev0",
+            python_version="3.11.14",
+        )
+        vp = VideoPredictions(
+            metadata=VideoMetadata(frame_count=1, fps=30.0, width=32, height=32),
+            frames={
+                "frame_000000": FramePrediction(
+                    boxes=[[0.0, 0.0, 32.0, 32.0, 0.9]],
+                    poses3d=[[[1.0, 2.0, 3.0]]],
+                    poses2d=[[[10.0, 20.0]]],
+                )
+            },
+            provenance=prov,
+        )
+        path = tmp_path / "vp.json"
+        save_video_predictions(path, vp)
+        loaded = load_video_predictions(path)
+        assert loaded == vp
+        assert loaded.provenance == prov
+
+
 class TestBenchmarkResultPersistence:
    def test_roundtrip_to_disk(self, tmp_path: Path) -> None:
        result = BenchmarkResult(