add Provenance subobject and LoadedModel

Captures the MeTRAbs SHA-256 and filename plus tensorflow / tensorflow-metal / numpy / neuropose / python versions, and reserves slots for seed, deterministic, and analysis_config. Populated automatically by Estimator.process_video when the model was loaded via load_model; propagates into JobResults and BenchmarkResult via the existing output path. None on the injected-model test path where no SHA is known. _model.load_metrabs_model now returns a LoadedModel dataclass so the estimator can bundle the TF handle with the pinned SHA without re-hashing the tarball on every daemon startup. All test fakes and the integration smoke tests updated to unwrap .model. Bumps the optional schema_version field on VideoPredictions and BenchmarkResult to default=CURRENT_VERSION so fresh writes stamp the latest version; legacy payloads without it are migrated on load via the chain registered in the previous commit.
2026-04-18 17:10:52 -04:00 · 2026-04-18 17:10:52 -04:00 · fe8e417aa0
parent 9c549fd9e2
commit fe8e417aa0
10 changed files with 549 additions and 31 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -222,6 +222,23 @@ be split into per-release sections once tagging begins.
  at `CURRENT_VERSION = 2`, with registered v1 → v2 migrations for
  `VideoPredictions` and `BenchmarkResult` that add the optional
  `provenance` field.
 - **`neuropose.io.Provenance`** — reproducibility envelope for every
  inference run. Populated automatically by `Estimator.process_video`
  when the model was loaded via `load_model` (the production path)
  and attached to the output `VideoPredictions`; propagates from
  there into `JobResults` (per-video) and `BenchmarkResult` (via the
  benchmark loop). Captures the MeTRAbs artifact SHA-256 and
  filename, `tensorflow` / `tensorflow-metal` / `numpy` /
  `neuropose` / Python versions, and reserved slots for a `seed`,
  `deterministic` flag (Track 2), and `analysis_config` (Phase 0
  YAML pipeline). `None` on the injected-model test path where
  NeuroPose has no way to fingerprint the supplied artifact. Frozen
  pydantic model with `extra="forbid"` and
  `protected_namespaces=()` so the `model_*` field names do not
  collide with pydantic v2's internal namespace. `_model.load_metrabs_model`
  now returns a `LoadedModel` dataclass bundling the TF handle with
  the pinned SHA and filename so the estimator can build the
  `Provenance` without re-hashing the tarball.
 - **`neuropose.benchmark`** — multi-pass inference benchmarking for
  a single video. `run_benchmark()` runs `process_video` N times
  (default 5), always discards the first pass as warmup (graph
--- a/src/neuropose/_model.py
+++ b/src/neuropose/_model.py
@ -41,11 +41,33 @@ import os
 import shutil
 import tarfile
 import urllib.request
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
 logger = logging.getLogger(__name__)
@dataclass(frozen=True)
 class LoadedModel:
    """Result of :func:`load_metrabs_model`.
    Bundles the loaded TensorFlow model with the provenance metadata
    that identifies which artifact it came from. Callers that only want
    the model reach for :attr:`model`; callers that build a
    :class:`~neuropose.io.Provenance` (primarily
    :class:`~neuropose.estimator.Estimator`) pull :attr:`sha256` and
    :attr:`filename` too.
    Frozen — once :func:`load_metrabs_model` has produced a
    ``LoadedModel``, nothing downstream should edit the identity of
    the artifact it describes.
    """
    model: Any
    sha256: str
    filename: str
 # ---------------------------------------------------------------------------
 # Model artifact: pinned URL and checksum.
 # ---------------------------------------------------------------------------
@ -74,7 +96,7 @@ _REQUIRED_MODEL_ATTRS = (
 # ---------------------------------------------------------------------------
-def load_metrabs_model(cache_dir: Path | None = None) -> Any:
+def load_metrabs_model(cache_dir: Path | None = None) -> LoadedModel:
    """Load the MeTRAbs model, downloading and caching on first use.
    Parameters
@ -87,9 +109,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
    Returns
    -------
-    object
+    LoadedModel
-        A TensorFlow SavedModel handle exposing ``detect_poses`` and
+        Bundle containing the TensorFlow SavedModel handle alongside
-        the ``per_skeleton_joint_names`` / ``per_skeleton_joint_edges``
+        the pinned artifact SHA-256 and filename that identify which
        model the handle came from. The handle exposes ``detect_poses``
        and the ``per_skeleton_joint_names`` / ``per_skeleton_joint_edges``
        attributes used by :class:`neuropose.estimator.Estimator`.
    Raises
@ -99,6 +123,18 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
        automatic retry), extraction fails, TensorFlow is not
        installed, or the loaded model does not expose the expected
        interface.
    Notes
    -----
    The returned ``sha256`` is the module-pinned :data:`_MODEL_SHA256`,
    not a re-hash of the on-disk tarball. On the cold-cache path this
    is exactly the hash we verified against before loading. On the
    warm-cache path the tarball is not re-verified (that would cost a
    2 GB I/O pass on every daemon startup), so the reported SHA is an
    attestation of "this is the pinned artifact NeuroPose loads" rather
    than a direct fingerprint of the on-disk bytes. For the threat
    model this supports — reproducibility, not tamper-evidence — that
    is the correct semantics.
    """
    resolved_cache = Path(cache_dir) if cache_dir is not None else _default_cache_dir()
    resolved_cache.mkdir(parents=True, exist_ok=True)
@ -115,7 +151,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
            )
            shutil.rmtree(model_dir, ignore_errors=True)
        else:
-            return _tf_load(saved_model_dir)
+            return LoadedModel(
                model=_tf_load(saved_model_dir),
                sha256=_MODEL_SHA256,
                filename=_MODEL_ARCHIVE_NAME,
            )
    tarball = resolved_cache / _MODEL_ARCHIVE_NAME
@ -135,7 +175,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
    _extract_tarball(tarball, model_dir)
    saved_model_dir = _find_saved_model(model_dir)
-    return _tf_load(saved_model_dir)
+    return LoadedModel(
        model=_tf_load(saved_model_dir),
        sha256=_MODEL_SHA256,
        filename=_MODEL_ARCHIVE_NAME,
    )
 # ---------------------------------------------------------------------------
--- a/src/neuropose/benchmark.py
+++ b/src/neuropose/benchmark.py
@ -105,9 +105,17 @@ def run_benchmark(
    passes: list[PerformanceMetrics] = []
    reference_predictions: VideoPredictions | None = None
    # Provenance is identical across every pass of a single run (same
    # estimator, same model, same environment), so we keep just the
    # latest one we see. Doing this on every iteration is cheap — it's
    # one attribute read — and means the benchmark result carries
    # provenance even when ``capture_reference`` is off.
    latest_provenance = None
    for i in range(repeats):
        result = estimator.process_video(video_path)
        passes.append(result.metrics)
        if result.predictions.provenance is not None:
            latest_provenance = result.predictions.provenance
        # Only the *last* measured pass needs to be captured for
        # divergence comparison. Earlier passes would just be
        # overwritten, so we avoid holding their frame dicts in memory.
@ -122,6 +130,7 @@ def run_benchmark(
        warmup_pass=passes[0],
        measured_passes=passes[1:],
        aggregate=aggregate,
        provenance=latest_provenance,
    )
    return BenchmarkRunOutcome(
        result=benchmark_result,
--- a/src/neuropose/estimator.py
+++ b/src/neuropose/estimator.py
@ -34,19 +34,25 @@ model is present raises :class:`ModelNotLoadedError`.
 from __future__ import annotations
 import logging
 import sys
 import time
 from collections.abc import Callable
 from dataclasses import dataclass, field
 from importlib.metadata import PackageNotFoundError
 from importlib.metadata import version as _pkg_version
 from pathlib import Path
 from typing import Any
 import cv2
 import numpy as np
 import psutil
 from neuropose import __version__ as _neuropose_version
 from neuropose._model import load_metrabs_model
 from neuropose.io import (
    FramePrediction,
    PerformanceMetrics,
    Provenance,
    VideoMetadata,
    VideoPredictions,
 )
@ -158,6 +164,12 @@ class Estimator:
        # successful ``load_model`` below so the next ``process_video`` can
        # pass the real number through into ``PerformanceMetrics``.
        self._model_load_seconds: float | None = None
        # MeTRAbs artifact identity, set only by ``load_model``. When the
        # model was injected via the constructor we have no way to
        # fingerprint it, so these remain ``None`` and ``process_video``
        # leaves the output's ``provenance`` as ``None`` too.
        self._model_sha256: str | None = None
        self._model_filename: str | None = None
    # -- model lifecycle ----------------------------------------------------
@ -176,6 +188,21 @@ class Estimator:
        """Return ``True`` if a model has been supplied or loaded."""
        return self._model is not None
    @property
    def model_sha256(self) -> str | None:
        """Return the SHA-256 of the loaded MeTRAbs artifact, or ``None``.
        ``None`` when the model was injected via ``Estimator(model=...)``
        rather than loaded via :meth:`load_model`. The value, when
        present, is the module-pinned SHA from :mod:`neuropose._model`.
        """
        return self._model_sha256
    @property
    def model_filename(self) -> str | None:
        """Return the basename of the MeTRAbs artifact, or ``None`` if injected."""
        return self._model_filename
    def load_model(self, cache_dir: Path | None = None) -> None:
        """Load the MeTRAbs model via :func:`neuropose._model.load_metrabs_model`.
@ -196,9 +223,16 @@ class Estimator:
            return
        logger.info("Loading MeTRAbs model (cache_dir=%s)", cache_dir)
        start = time.perf_counter()
-        self._model = load_metrabs_model(cache_dir=cache_dir)
+        loaded = load_metrabs_model(cache_dir=cache_dir)
        self._model_load_seconds = time.perf_counter() - start
-        logger.info("MeTRAbs model loaded in %.2f s", self._model_load_seconds)
+        self._model = loaded.model
        self._model_sha256 = loaded.sha256
        self._model_filename = loaded.filename
        logger.info(
            "MeTRAbs model loaded in %.2f s (sha256=%s)",
            self._model_load_seconds,
            loaded.sha256[:12],
        )
    # -- inference ----------------------------------------------------------
@ -330,11 +364,53 @@ class Estimator:
            metrics.active_device,
        )
-        predictions = VideoPredictions(metadata=metadata, frames=frames)
+        provenance = self._build_provenance(device_info=device_info)
        predictions = VideoPredictions(
            metadata=metadata,
            frames=frames,
            provenance=provenance,
        )
        return ProcessVideoResult(predictions=predictions, metrics=metrics)
    # -- internals ----------------------------------------------------------
    def _build_provenance(self, *, device_info: _ActiveDeviceInfo) -> Provenance | None:
        """Construct a :class:`~neuropose.io.Provenance` for the current run.
        Returns ``None`` when the model was injected via the constructor
        rather than loaded via :meth:`load_model` — in that case we
        cannot fingerprint the artifact, and a partial provenance would
        mislead readers into thinking we could.
        The device-info bundle is shared with the :class:`PerformanceMetrics`
        construction (one call to :func:`_detect_active_device` per
        ``process_video`` invocation) so that both artifacts see
        identical TF and Metal state.
        """
        if self._model_sha256 is None or self._model_filename is None:
            return None
        metal_version: str | None = None
        if device_info.metal_active:
            try:
                metal_version = _pkg_version("tensorflow-metal")
            except PackageNotFoundError:
                metal_version = None
        python_version = (
            f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
        )
        return Provenance(
            model_sha256=self._model_sha256,
            model_filename=self._model_filename,
            tensorflow_version=device_info.tf_version,
            tensorflow_metal_version=metal_version,
            numpy_version=np.__version__,
            neuropose_version=_neuropose_version,
            python_version=python_version,
        )
    def _infer_frame(
        self,
        model: Any,
--- a/src/neuropose/io.py
+++ b/src/neuropose/io.py
@ -10,6 +10,14 @@ Atomicity: :func:`save_status`, :func:`save_job_results`, and
 atomically rename, so a crash mid-write will not leave a partially-written
 file behind. This matches the crash-resilience guarantee the interfacer
 daemon makes to callers.
 Schema versioning: :class:`VideoPredictions` and :class:`BenchmarkResult`
 each carry a ``schema_version`` integer. On load, the raw JSON dict is
 passed through :mod:`neuropose.migrations` before pydantic validation so
 that files written by earlier versions upgrade transparently. :class:`JobResults`
 is a ``RootModel`` with no envelope of its own, so its loader runs the
 per-video migration on each entry of its mapping. See
 :mod:`neuropose.migrations` for the migration-registration pattern.
 """
 from __future__ import annotations
@ -23,6 +31,13 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, ConfigDict, Field, RootModel, model_validator
 from neuropose.migrations import (
    CURRENT_VERSION,
    migrate_benchmark_result,
    migrate_job_results,
    migrate_video_predictions,
 )
 class JobStatus(StrEnum):
    """Lifecycle state of a single processing job."""
@ -157,6 +172,104 @@ class PerformanceMetrics(BaseModel):
    )
 class Provenance(BaseModel):
    """Reproducibility-grade record of the environment that produced a payload.
    Populated by the estimator on every inference run when the MeTRAbs
    model was loaded through
    :meth:`neuropose.estimator.Estimator.load_model` (the production
    path). ``None`` when the model was injected directly via the
    ``Estimator(model=...)`` constructor (the test-fixture path), since
    NeuroPose has no way to fingerprint a model it did not load itself.
    Paper C's reproducibility story rests on this envelope: two runs
    that produced equal ``Provenance`` objects against the same input
    are expected to produce equal output (modulo non-determinism
    controlled by ``deterministic``). Reviewers who want to re-derive a
    figure from raw video need exactly these fields.
    Frozen so a captured ``Provenance`` cannot be mutated after it has
    been attached to a result; this matches the invariant that
    provenance is a property of the run, not of the reader.
    ``protected_namespaces=()`` silences pydantic's ``model_*`` field
    warning — the ``model_sha256`` / ``model_filename`` names refer to
    the MeTRAbs model artifact, not to pydantic's internal
    ``model_validate`` / ``model_dump`` namespace, so the collision is
    cosmetic.
    """
    model_config = ConfigDict(extra="forbid", frozen=True, protected_namespaces=())
    model_sha256: str = Field(
        description=(
            "SHA-256 of the MeTRAbs model tarball (hex-encoded, lowercase). "
            "Pinned at build time in :mod:`neuropose._model` and verified on "
            "first download. Identifies the exact model weights used."
        ),
    )
    model_filename: str = Field(
        description=(
            "Canonical basename of the MeTRAbs tarball, e.g. "
            "``metrabs_eff2l_y4_384px_800k_28ds.tar.gz``. Human-readable "
            "companion to ``model_sha256``."
        ),
    )
    tensorflow_version: str = Field(
        description="Value of ``tensorflow.__version__`` at the time of the run.",
    )
    tensorflow_metal_version: str | None = Field(
        default=None,
        description=(
            "Version of the ``tensorflow-metal`` PyPI package when installed; "
            "``None`` on platforms without Metal GPU acceleration."
        ),
    )
    numpy_version: str = Field(
        description="Value of ``numpy.__version__`` at the time of the run.",
    )
    neuropose_version: str = Field(
        description="Value of ``neuropose.__version__`` at the time of the run.",
    )
    python_version: str = Field(
        description=(
            "Python version as ``MAJOR.MINOR.MICRO``, e.g. ``3.11.14``. The "
            "full ``sys.version`` string is intentionally not captured; the "
            "three-component form is stable across patch builds and avoids "
            "embedding compiler and build-date metadata."
        ),
    )
    seed: int | None = Field(
        default=None,
        description=(
            "Random seed used for the run if one was set, else ``None``. "
            "MeTRAbs inference is deterministic on a given device up to "
            "floating-point associativity, so seeding mostly matters for "
            "downstream analysis that introduces randomness (bootstraps, "
            "learned metrics)."
        ),
    )
    deterministic: bool = Field(
        default=False,
        description=(
            "``True`` if ``tf.config.experimental.enable_op_determinism()`` "
            "was active during the run. Track 2 deterministic-inference "
            "mode; the field exists in Phase 0 so payloads can record "
            "whether the run *was* deterministic without requiring a "
            "schema change when the toggle lands."
        ),
    )
    analysis_config: dict[str, Any] | None = Field(
        default=None,
        description=(
            "Parsed YAML dict if this payload was produced by ``neuropose "
            "analyze --config <file>``. ``None`` for direct-library or "
            "``neuropose watch`` invocations. Reserved for the Phase 0 "
            "YAML-configurable analysis pipeline."
        ),
    )
 class BenchmarkAggregate(BaseModel):
    """Distributional statistics aggregated across benchmark passes.
@ -255,6 +368,16 @@ class BenchmarkResult(BaseModel):
    model_config = ConfigDict(extra="forbid", frozen=True)
    schema_version: int = Field(
        default=CURRENT_VERSION,
        ge=1,
        description=(
            "Schema version of this BenchmarkResult payload. Fresh writes "
            "stamp :data:`neuropose.migrations.CURRENT_VERSION`; older files "
            "are migrated on load via :mod:`neuropose.migrations` before "
            "pydantic validation."
        ),
    )
    video_name: str = Field(
        description="Basename of the benchmarked video (no directory components).",
    )
@ -280,6 +403,14 @@ class BenchmarkResult(BaseModel):
    )
    aggregate: BenchmarkAggregate
    cpu_comparison: CpuComparisonResult | None = None
    provenance: Provenance | None = Field(
        default=None,
        description=(
            "Reproducibility envelope from the benchmark run. ``None`` on "
            "tests where the model was injected directly via "
            "``Estimator(model=...)``."
        ),
    )
 class JointAxisExtractor(BaseModel):
@ -469,9 +600,30 @@ class VideoPredictions(BaseModel):
    model_config = ConfigDict(extra="forbid", frozen=True)
    schema_version: int = Field(
        default=CURRENT_VERSION,
        ge=1,
        description=(
            "Schema version of this VideoPredictions payload. Fresh writes "
            "stamp :data:`neuropose.migrations.CURRENT_VERSION`; files written "
            "by older NeuroPose versions are migrated to the current version "
            "by :mod:`neuropose.migrations` before pydantic validation."
        ),
    )
    metadata: VideoMetadata
    frames: dict[str, FramePrediction]
    segmentations: dict[str, Segmentation] = Field(default_factory=dict)
    provenance: Provenance | None = Field(
        default=None,
        description=(
            "Reproducibility envelope populated by the estimator on runs "
            "where the MeTRAbs model was loaded via "
            ":meth:`neuropose.estimator.Estimator.load_model`. ``None`` on "
            "test paths where the model was injected via "
            "``Estimator(model=...)``, because no model SHA is known in "
            "that case."
        ),
    )
    def frame_names(self) -> list[str]:
        """Return frame identifiers in insertion order."""
@ -623,9 +775,16 @@ class StatusFile(RootModel[dict[str, JobStatusEntry]]):
 def load_video_predictions(path: Path) -> VideoPredictions:
-    """Load and validate a per-video predictions JSON file."""
+    """Load and validate a per-video predictions JSON file.
    Runs the payload through :func:`neuropose.migrations.migrate_video_predictions`
    before pydantic validation so files written by older NeuroPose versions
    upgrade to the current schema transparently.
    """
    with path.open("r", encoding="utf-8") as f:
        data: Any = json.load(f)
    if isinstance(data, dict):
        data = migrate_video_predictions(data)
    return VideoPredictions.model_validate(data)
@ -636,9 +795,17 @@ def save_video_predictions(path: Path, predictions: VideoPredictions) -> None:
 def load_job_results(path: Path) -> JobResults:
-    """Load and validate an aggregated per-job results JSON file."""
+    """Load and validate an aggregated per-job results JSON file.
    Runs each video's payload through
    :func:`neuropose.migrations.migrate_video_predictions` before pydantic
    validation. :class:`JobResults` is a ``RootModel`` with no envelope of
    its own, so migration happens per-entry rather than at the top level.
    """
    with path.open("r", encoding="utf-8") as f:
        data: Any = json.load(f)
    if isinstance(data, dict):
        data = migrate_job_results(data)
    return JobResults.model_validate(data)
@ -649,9 +816,16 @@ def save_job_results(path: Path, results: JobResults) -> None:
 def load_benchmark_result(path: Path) -> BenchmarkResult:
-    """Load and validate a benchmark-result JSON file."""
+    """Load and validate a benchmark-result JSON file.
    Runs the payload through :func:`neuropose.migrations.migrate_benchmark_result`
    before pydantic validation so files written by older NeuroPose versions
    upgrade transparently.
    """
    with path.open("r", encoding="utf-8") as f:
        data: Any = json.load(f)
    if isinstance(data, dict):
        data = migrate_benchmark_result(data)
    return BenchmarkResult.model_validate(data)
--- a/tests/integration/test_estimator_smoke.py
+++ b/tests/integration/test_estimator_smoke.py
@ -81,26 +81,29 @@ class TestMetrabsLoader:
    """Exercises the loader's download → verify → extract → load path."""
    def test_download_and_load(self, shared_model_cache_dir: Path) -> None:
-        model = load_metrabs_model(cache_dir=shared_model_cache_dir)
+        loaded = load_metrabs_model(cache_dir=shared_model_cache_dir)
-        assert model is not None
+        assert loaded.model is not None
        assert loaded.sha256
        assert loaded.filename
        for attr in ("detect_poses", "per_skeleton_joint_names", "per_skeleton_joint_edges"):
-            assert hasattr(model, attr), f"loaded model is missing {attr}"
+            assert hasattr(loaded.model, attr), f"loaded model is missing {attr}"
    def test_second_call_uses_cache(self, shared_model_cache_dir: Path) -> None:
        """Idempotent: second call should return the cached model cheaply."""
-        model_a = load_metrabs_model(cache_dir=shared_model_cache_dir)
+        loaded_a = load_metrabs_model(cache_dir=shared_model_cache_dir)
-        model_b = load_metrabs_model(cache_dir=shared_model_cache_dir)
+        loaded_b = load_metrabs_model(cache_dir=shared_model_cache_dir)
        # tf.saved_model.load returns a new Python object each call, so
        # identity comparison doesn't work — but both should still
-        # expose the MeTRAbs interface.
+        # expose the MeTRAbs interface, and the SHA should match.
-        assert hasattr(model_a, "detect_poses")
+        assert hasattr(loaded_a.model, "detect_poses")
-        assert hasattr(model_b, "detect_poses")
+        assert hasattr(loaded_b.model, "detect_poses")
        assert loaded_a.sha256 == loaded_b.sha256
    def test_berkeley_mhad_skeleton_is_present(self, shared_model_cache_dir: Path) -> None:
        """The estimator pins skeleton='berkeley_mhad_43'; verify it exists."""
-        model = load_metrabs_model(cache_dir=shared_model_cache_dir)
+        loaded = load_metrabs_model(cache_dir=shared_model_cache_dir)
-        joint_names = model.per_skeleton_joint_names["berkeley_mhad_43"]
+        joint_names = loaded.model.per_skeleton_joint_names["berkeley_mhad_43"]
-        joint_edges = model.per_skeleton_joint_edges["berkeley_mhad_43"]
+        joint_edges = loaded.model.per_skeleton_joint_edges["berkeley_mhad_43"]
        # MeTRAbs exposes these as tf.Tensor objects; just verify we
        # can pull a shape out.
        assert joint_names.shape[0] == 43
--- a/tests/integration/test_joint_names_drift.py
+++ b/tests/integration/test_joint_names_drift.py
@ -50,8 +50,8 @@ def test_joint_names_match_pinned_model(metrabs_model_cache_dir: Path) -> None:
       commit that bumps the model pin in :mod:`neuropose._model`.
    2. Cross-check any CLI or docs that embed hardcoded joint names.
    """
-    model = load_metrabs_model(cache_dir=metrabs_model_cache_dir)
+    loaded = load_metrabs_model(cache_dir=metrabs_model_cache_dir)
-    tensor = model.per_skeleton_joint_names["berkeley_mhad_43"]
+    tensor = loaded.model.per_skeleton_joint_names["berkeley_mhad_43"]
    model_names = tuple(tensor.numpy().astype(str).tolist())
    assert model_names == JOINT_NAMES, (
        "JOINT_NAMES drift detected — the hardcoded tuple in "
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@ -683,9 +683,15 @@ def stub_estimator_with_metrics(monkeypatch: pytest.MonkeyPatch):
                "poses2d": np.array([[[0.0, 0.0], [1.0, 1.0]]]),
            }
-    def fake_loader(cache_dir: Path | None = None) -> object:
+    from neuropose._model import LoadedModel
    def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
        del cache_dir
-        return RecordingFake()
+        return LoadedModel(
            model=RecordingFake(),
            sha256="smoke_sha",
            filename="metrabs_smoke.tar.gz",
        )
    monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
--- a/tests/unit/test_estimator.py
+++ b/tests/unit/test_estimator.py
@ -70,17 +70,21 @@ class TestModelGuard:
        network: the loader is monkeypatched to return a sentinel, and we
        assert it ends up as the estimator's model.
        """
        from neuropose._model import LoadedModel
        sentinel = object()
        called_with: list[Path | None] = []
-        def fake_loader(cache_dir: Path | None = None) -> object:
+        def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
            called_with.append(cache_dir)
-            return sentinel
+            return LoadedModel(model=sentinel, sha256="deadbeef", filename="fake.tar.gz")
        monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
        estimator = Estimator()
        estimator.load_model(cache_dir=Path("/tmp/fake-cache"))
        assert estimator.model is sentinel
        assert estimator.model_sha256 == "deadbeef"
        assert estimator.model_filename == "fake.tar.gz"
        assert called_with == [Path("/tmp/fake-cache")]
    def test_load_model_is_idempotent_when_already_loaded(
@ -278,9 +282,15 @@ class TestPerformanceMetrics:
                    "poses2d": np.array([[[0.0, 0.0]]]),
                }
-        def fake_loader(cache_dir: Path | None = None) -> object:
+        from neuropose._model import LoadedModel
        def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
            del cache_dir
-            return Recorder()
+            return LoadedModel(
                model=Recorder(),
                sha256="fake_sha",
                filename="metrabs_fake.tar.gz",
            )
        monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
        estimator = Estimator()
@ -312,6 +322,88 @@ class TestPerformanceMetrics:
        assert result.metrics.tensorflow_version not in {"", "unknown"}
 class TestProvenance:
    """Provenance attachment to VideoPredictions.
    Covers the two relevant paths: the injected-model path (no SHA
    known → ``provenance=None`` on output) and the ``load_model`` path
    (SHA is known → full ``Provenance`` populated and attached).
    """
    def test_injected_model_produces_no_provenance(
        self,
        synthetic_video: Path,
        fake_metrabs_model,
    ) -> None:
        estimator = Estimator(model=fake_metrabs_model)
        result = estimator.process_video(synthetic_video)
        assert result.predictions.provenance is None
        assert estimator.model_sha256 is None
        assert estimator.model_filename is None
    def test_loaded_model_populates_provenance(
        self,
        synthetic_video: Path,
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
        import numpy as np
        from neuropose._model import LoadedModel
        class Recorder:
            def detect_poses(self, image, **kwargs):
                del image, kwargs
                return {
                    "boxes": np.array([[0.0, 0.0, 1.0, 1.0, 0.9]]),
                    "poses3d": np.array([[[0.0, 0.0, 0.0]]]),
                    "poses2d": np.array([[[0.0, 0.0]]]),
                }
        def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
            del cache_dir
            return LoadedModel(
                model=Recorder(),
                sha256="e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
                filename="metrabs_stub.tar.gz",
            )
        monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
        estimator = Estimator()
        estimator.load_model()
        result = estimator.process_video(synthetic_video)
        prov = result.predictions.provenance
        assert prov is not None
        assert prov.model_sha256.startswith("e3b0c44")
        assert prov.model_filename == "metrabs_stub.tar.gz"
        assert prov.numpy_version == np.__version__
        assert prov.python_version.count(".") == 2  # MAJOR.MINOR.MICRO
        # neuropose_version should match the package's __version__
        from neuropose import __version__ as pkg_version
        assert prov.neuropose_version == pkg_version
        # tensorflow_version should also be real (TF is in dev deps).
        assert prov.tensorflow_version not in {"", "unknown"}
    def test_model_sha256_and_filename_properties_after_load(
        self,
        monkeypatch: pytest.MonkeyPatch,
    ) -> None:
        from neuropose._model import LoadedModel
        def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
            del cache_dir
            return LoadedModel(model=object(), sha256="abcd", filename="x.tar.gz")
        monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
        estimator = Estimator()
        assert estimator.model_sha256 is None
        assert estimator.model_filename is None
        estimator.load_model()
        assert estimator.model_sha256 == "abcd"
        assert estimator.model_filename == "x.tar.gz"
 class TestErrors:
    def test_missing_video(
        self,
--- a/tests/unit/test_io.py
+++ b/tests/unit/test_io.py
@ -22,6 +22,7 @@ from neuropose.io import (
    JointPairDistanceExtractor,
    JointSpeedExtractor,
    PerformanceMetrics,
    Provenance,
    Segment,
    Segmentation,
    SegmentationConfig,
@ -278,6 +279,102 @@ class TestPerformanceMetricsModel:
            m.total_seconds = 2.0
 def _minimal_provenance() -> Provenance:
    return Provenance(
        model_sha256="a" * 64,
        model_filename="metrabs_fake.tar.gz",
        tensorflow_version="2.18.1",
        numpy_version="2.0.2",
        neuropose_version="0.1.0.dev0",
        python_version="3.11.14",
    )
 class TestProvenanceModel:
    """Schema-level behaviour of :class:`neuropose.io.Provenance`."""
    def test_roundtrip_through_json(self) -> None:
        p = Provenance(
            model_sha256="a" * 64,
            model_filename="metrabs_fake.tar.gz",
            tensorflow_version="2.18.1",
            tensorflow_metal_version="1.2.0",
            numpy_version="2.0.2",
            neuropose_version="0.1.0.dev0",
            python_version="3.11.14",
            seed=42,
            deterministic=True,
            analysis_config={"step": "dtw", "nan_policy": "propagate"},
        )
        rehydrated = Provenance.model_validate(p.model_dump(mode="json"))
        assert rehydrated == p
    def test_optional_fields_default_to_none_and_false(self) -> None:
        p = _minimal_provenance()
        assert p.tensorflow_metal_version is None
        assert p.seed is None
        assert p.deterministic is False
        assert p.analysis_config is None
    def test_is_frozen(self) -> None:
        p = _minimal_provenance()
        with pytest.raises(ValidationError):
            p.model_sha256 = "different"
    def test_extra_fields_forbidden(self) -> None:
        # Construct via model_validate so pyright doesn't have to prove the
        # keyword doesn't exist on the class at static-type time.
        with pytest.raises(ValidationError):
            Provenance.model_validate(
                {
                    "model_sha256": "x" * 64,
                    "model_filename": "x.tar.gz",
                    "tensorflow_version": "2.18",
                    "numpy_version": "2.0",
                    "neuropose_version": "0.1",
                    "python_version": "3.11.14",
                    "unknown_field": "bogus",
                }
            )
 class TestVideoPredictionsProvenance:
    """``provenance`` field on :class:`VideoPredictions` round-trips."""
    def test_default_is_none(self) -> None:
        vp = VideoPredictions(
            metadata=VideoMetadata(frame_count=0, fps=30.0, width=32, height=32),
            frames={},
        )
        assert vp.provenance is None
    def test_roundtrip_with_provenance(self, tmp_path: Path) -> None:
        prov = Provenance(
            model_sha256="f" * 64,
            model_filename="metrabs.tar.gz",
            tensorflow_version="2.18.1",
            numpy_version="2.0.2",
            neuropose_version="0.1.0.dev0",
            python_version="3.11.14",
        )
        vp = VideoPredictions(
            metadata=VideoMetadata(frame_count=1, fps=30.0, width=32, height=32),
            frames={
                "frame_000000": FramePrediction(
                    boxes=[[0.0, 0.0, 32.0, 32.0, 0.9]],
                    poses3d=[[[1.0, 2.0, 3.0]]],
                    poses2d=[[[10.0, 20.0]]],
                )
            },
            provenance=prov,
        )
        path = tmp_path / "vp.json"
        save_video_predictions(path, vp)
        loaded = load_video_predictions(path)
        assert loaded == vp
        assert loaded.provenance == prov
 class TestBenchmarkResultPersistence:
    def test_roundtrip_to_disk(self, tmp_path: Path) -> None:
        result = BenchmarkResult(