add Provenance subobject and LoadedModel

Captures the MeTRAbs SHA-256 and filename plus tensorflow /
tensorflow-metal / numpy / neuropose / python versions, and reserves
slots for seed, deterministic, and analysis_config. Populated
automatically by Estimator.process_video when the model was loaded via
load_model; propagates into JobResults and BenchmarkResult via the
existing output path. None on the injected-model test path where no
SHA is known.

_model.load_metrabs_model now returns a LoadedModel dataclass so the
estimator can bundle the TF handle with the pinned SHA without
re-hashing the tarball on every daemon startup. All test fakes and
the integration smoke tests updated to unwrap .model.

Bumps the optional schema_version field on VideoPredictions and
BenchmarkResult to default=CURRENT_VERSION so fresh writes stamp the
latest version; legacy payloads without it are migrated on load via
the chain registered in the previous commit.
This commit is contained in:
Levi Neuwirth 2026-04-18 17:10:52 -04:00
parent 9c549fd9e2
commit fe8e417aa0
10 changed files with 549 additions and 31 deletions

View File

@ -222,6 +222,23 @@ be split into per-release sections once tagging begins.
at `CURRENT_VERSION = 2`, with registered v1 → v2 migrations for at `CURRENT_VERSION = 2`, with registered v1 → v2 migrations for
`VideoPredictions` and `BenchmarkResult` that add the optional `VideoPredictions` and `BenchmarkResult` that add the optional
`provenance` field. `provenance` field.
- **`neuropose.io.Provenance`** — reproducibility envelope for every
inference run. Populated automatically by `Estimator.process_video`
when the model was loaded via `load_model` (the production path)
and attached to the output `VideoPredictions`; propagates from
there into `JobResults` (per-video) and `BenchmarkResult` (via the
benchmark loop). Captures the MeTRAbs artifact SHA-256 and
filename, `tensorflow` / `tensorflow-metal` / `numpy` /
`neuropose` / Python versions, and reserved slots for a `seed`,
`deterministic` flag (Track 2), and `analysis_config` (Phase 0
YAML pipeline). `None` on the injected-model test path where
NeuroPose has no way to fingerprint the supplied artifact. Frozen
pydantic model with `extra="forbid"` and
`protected_namespaces=()` so the `model_*` field names do not
collide with pydantic v2's internal namespace. `_model.load_metrabs_model`
now returns a `LoadedModel` dataclass bundling the TF handle with
the pinned SHA and filename so the estimator can build the
`Provenance` without re-hashing the tarball.
- **`neuropose.benchmark`** — multi-pass inference benchmarking for - **`neuropose.benchmark`** — multi-pass inference benchmarking for
a single video. `run_benchmark()` runs `process_video` N times a single video. `run_benchmark()` runs `process_video` N times
(default 5), always discards the first pass as warmup (graph (default 5), always discards the first pass as warmup (graph

View File

@ -41,11 +41,33 @@ import os
import shutil import shutil
import tarfile import tarfile
import urllib.request import urllib.request
from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class LoadedModel:
"""Result of :func:`load_metrabs_model`.
Bundles the loaded TensorFlow model with the provenance metadata
that identifies which artifact it came from. Callers that only want
the model reach for :attr:`model`; callers that build a
:class:`~neuropose.io.Provenance` (primarily
:class:`~neuropose.estimator.Estimator`) pull :attr:`sha256` and
:attr:`filename` too.
Frozen once :func:`load_metrabs_model` has produced a
``LoadedModel``, nothing downstream should edit the identity of
the artifact it describes.
"""
model: Any
sha256: str
filename: str
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Model artifact: pinned URL and checksum. # Model artifact: pinned URL and checksum.
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -74,7 +96,7 @@ _REQUIRED_MODEL_ATTRS = (
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def load_metrabs_model(cache_dir: Path | None = None) -> Any: def load_metrabs_model(cache_dir: Path | None = None) -> LoadedModel:
"""Load the MeTRAbs model, downloading and caching on first use. """Load the MeTRAbs model, downloading and caching on first use.
Parameters Parameters
@ -87,9 +109,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
Returns Returns
------- -------
object LoadedModel
A TensorFlow SavedModel handle exposing ``detect_poses`` and Bundle containing the TensorFlow SavedModel handle alongside
the ``per_skeleton_joint_names`` / ``per_skeleton_joint_edges`` the pinned artifact SHA-256 and filename that identify which
model the handle came from. The handle exposes ``detect_poses``
and the ``per_skeleton_joint_names`` / ``per_skeleton_joint_edges``
attributes used by :class:`neuropose.estimator.Estimator`. attributes used by :class:`neuropose.estimator.Estimator`.
Raises Raises
@ -99,6 +123,18 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
automatic retry), extraction fails, TensorFlow is not automatic retry), extraction fails, TensorFlow is not
installed, or the loaded model does not expose the expected installed, or the loaded model does not expose the expected
interface. interface.
Notes
-----
The returned ``sha256`` is the module-pinned :data:`_MODEL_SHA256`,
not a re-hash of the on-disk tarball. On the cold-cache path this
is exactly the hash we verified against before loading. On the
warm-cache path the tarball is not re-verified (that would cost a
2 GB I/O pass on every daemon startup), so the reported SHA is an
attestation of "this is the pinned artifact NeuroPose loads" rather
than a direct fingerprint of the on-disk bytes. For the threat
model this supports reproducibility, not tamper-evidence that
is the correct semantics.
""" """
resolved_cache = Path(cache_dir) if cache_dir is not None else _default_cache_dir() resolved_cache = Path(cache_dir) if cache_dir is not None else _default_cache_dir()
resolved_cache.mkdir(parents=True, exist_ok=True) resolved_cache.mkdir(parents=True, exist_ok=True)
@ -115,7 +151,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
) )
shutil.rmtree(model_dir, ignore_errors=True) shutil.rmtree(model_dir, ignore_errors=True)
else: else:
return _tf_load(saved_model_dir) return LoadedModel(
model=_tf_load(saved_model_dir),
sha256=_MODEL_SHA256,
filename=_MODEL_ARCHIVE_NAME,
)
tarball = resolved_cache / _MODEL_ARCHIVE_NAME tarball = resolved_cache / _MODEL_ARCHIVE_NAME
@ -135,7 +175,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
_extract_tarball(tarball, model_dir) _extract_tarball(tarball, model_dir)
saved_model_dir = _find_saved_model(model_dir) saved_model_dir = _find_saved_model(model_dir)
return _tf_load(saved_model_dir) return LoadedModel(
model=_tf_load(saved_model_dir),
sha256=_MODEL_SHA256,
filename=_MODEL_ARCHIVE_NAME,
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View File

@ -105,9 +105,17 @@ def run_benchmark(
passes: list[PerformanceMetrics] = [] passes: list[PerformanceMetrics] = []
reference_predictions: VideoPredictions | None = None reference_predictions: VideoPredictions | None = None
# Provenance is identical across every pass of a single run (same
# estimator, same model, same environment), so we keep just the
# latest one we see. Doing this on every iteration is cheap — it's
# one attribute read — and means the benchmark result carries
# provenance even when ``capture_reference`` is off.
latest_provenance = None
for i in range(repeats): for i in range(repeats):
result = estimator.process_video(video_path) result = estimator.process_video(video_path)
passes.append(result.metrics) passes.append(result.metrics)
if result.predictions.provenance is not None:
latest_provenance = result.predictions.provenance
# Only the *last* measured pass needs to be captured for # Only the *last* measured pass needs to be captured for
# divergence comparison. Earlier passes would just be # divergence comparison. Earlier passes would just be
# overwritten, so we avoid holding their frame dicts in memory. # overwritten, so we avoid holding their frame dicts in memory.
@ -122,6 +130,7 @@ def run_benchmark(
warmup_pass=passes[0], warmup_pass=passes[0],
measured_passes=passes[1:], measured_passes=passes[1:],
aggregate=aggregate, aggregate=aggregate,
provenance=latest_provenance,
) )
return BenchmarkRunOutcome( return BenchmarkRunOutcome(
result=benchmark_result, result=benchmark_result,

View File

@ -34,19 +34,25 @@ model is present raises :class:`ModelNotLoadedError`.
from __future__ import annotations from __future__ import annotations
import logging import logging
import sys
import time import time
from collections.abc import Callable from collections.abc import Callable
from dataclasses import dataclass, field from dataclasses import dataclass, field
from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as _pkg_version
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
import cv2 import cv2
import numpy as np
import psutil import psutil
from neuropose import __version__ as _neuropose_version
from neuropose._model import load_metrabs_model from neuropose._model import load_metrabs_model
from neuropose.io import ( from neuropose.io import (
FramePrediction, FramePrediction,
PerformanceMetrics, PerformanceMetrics,
Provenance,
VideoMetadata, VideoMetadata,
VideoPredictions, VideoPredictions,
) )
@ -158,6 +164,12 @@ class Estimator:
# successful ``load_model`` below so the next ``process_video`` can # successful ``load_model`` below so the next ``process_video`` can
# pass the real number through into ``PerformanceMetrics``. # pass the real number through into ``PerformanceMetrics``.
self._model_load_seconds: float | None = None self._model_load_seconds: float | None = None
# MeTRAbs artifact identity, set only by ``load_model``. When the
# model was injected via the constructor we have no way to
# fingerprint it, so these remain ``None`` and ``process_video``
# leaves the output's ``provenance`` as ``None`` too.
self._model_sha256: str | None = None
self._model_filename: str | None = None
# -- model lifecycle ---------------------------------------------------- # -- model lifecycle ----------------------------------------------------
@ -176,6 +188,21 @@ class Estimator:
"""Return ``True`` if a model has been supplied or loaded.""" """Return ``True`` if a model has been supplied or loaded."""
return self._model is not None return self._model is not None
@property
def model_sha256(self) -> str | None:
"""Return the SHA-256 of the loaded MeTRAbs artifact, or ``None``.
``None`` when the model was injected via ``Estimator(model=...)``
rather than loaded via :meth:`load_model`. The value, when
present, is the module-pinned SHA from :mod:`neuropose._model`.
"""
return self._model_sha256
@property
def model_filename(self) -> str | None:
"""Return the basename of the MeTRAbs artifact, or ``None`` if injected."""
return self._model_filename
def load_model(self, cache_dir: Path | None = None) -> None: def load_model(self, cache_dir: Path | None = None) -> None:
"""Load the MeTRAbs model via :func:`neuropose._model.load_metrabs_model`. """Load the MeTRAbs model via :func:`neuropose._model.load_metrabs_model`.
@ -196,9 +223,16 @@ class Estimator:
return return
logger.info("Loading MeTRAbs model (cache_dir=%s)", cache_dir) logger.info("Loading MeTRAbs model (cache_dir=%s)", cache_dir)
start = time.perf_counter() start = time.perf_counter()
self._model = load_metrabs_model(cache_dir=cache_dir) loaded = load_metrabs_model(cache_dir=cache_dir)
self._model_load_seconds = time.perf_counter() - start self._model_load_seconds = time.perf_counter() - start
logger.info("MeTRAbs model loaded in %.2f s", self._model_load_seconds) self._model = loaded.model
self._model_sha256 = loaded.sha256
self._model_filename = loaded.filename
logger.info(
"MeTRAbs model loaded in %.2f s (sha256=%s)",
self._model_load_seconds,
loaded.sha256[:12],
)
# -- inference ---------------------------------------------------------- # -- inference ----------------------------------------------------------
@ -330,11 +364,53 @@ class Estimator:
metrics.active_device, metrics.active_device,
) )
predictions = VideoPredictions(metadata=metadata, frames=frames) provenance = self._build_provenance(device_info=device_info)
predictions = VideoPredictions(
metadata=metadata,
frames=frames,
provenance=provenance,
)
return ProcessVideoResult(predictions=predictions, metrics=metrics) return ProcessVideoResult(predictions=predictions, metrics=metrics)
# -- internals ---------------------------------------------------------- # -- internals ----------------------------------------------------------
def _build_provenance(self, *, device_info: _ActiveDeviceInfo) -> Provenance | None:
"""Construct a :class:`~neuropose.io.Provenance` for the current run.
Returns ``None`` when the model was injected via the constructor
rather than loaded via :meth:`load_model` in that case we
cannot fingerprint the artifact, and a partial provenance would
mislead readers into thinking we could.
The device-info bundle is shared with the :class:`PerformanceMetrics`
construction (one call to :func:`_detect_active_device` per
``process_video`` invocation) so that both artifacts see
identical TF and Metal state.
"""
if self._model_sha256 is None or self._model_filename is None:
return None
metal_version: str | None = None
if device_info.metal_active:
try:
metal_version = _pkg_version("tensorflow-metal")
except PackageNotFoundError:
metal_version = None
python_version = (
f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
)
return Provenance(
model_sha256=self._model_sha256,
model_filename=self._model_filename,
tensorflow_version=device_info.tf_version,
tensorflow_metal_version=metal_version,
numpy_version=np.__version__,
neuropose_version=_neuropose_version,
python_version=python_version,
)
def _infer_frame( def _infer_frame(
self, self,
model: Any, model: Any,

View File

@ -10,6 +10,14 @@ Atomicity: :func:`save_status`, :func:`save_job_results`, and
atomically rename, so a crash mid-write will not leave a partially-written atomically rename, so a crash mid-write will not leave a partially-written
file behind. This matches the crash-resilience guarantee the interfacer file behind. This matches the crash-resilience guarantee the interfacer
daemon makes to callers. daemon makes to callers.
Schema versioning: :class:`VideoPredictions` and :class:`BenchmarkResult`
each carry a ``schema_version`` integer. On load, the raw JSON dict is
passed through :mod:`neuropose.migrations` before pydantic validation so
that files written by earlier versions upgrade transparently. :class:`JobResults`
is a ``RootModel`` with no envelope of its own, so its loader runs the
per-video migration on each entry of its mapping. See
:mod:`neuropose.migrations` for the migration-registration pattern.
""" """
from __future__ import annotations from __future__ import annotations
@ -23,6 +31,13 @@ from typing import Annotated, Any, Literal
from pydantic import BaseModel, ConfigDict, Field, RootModel, model_validator from pydantic import BaseModel, ConfigDict, Field, RootModel, model_validator
from neuropose.migrations import (
CURRENT_VERSION,
migrate_benchmark_result,
migrate_job_results,
migrate_video_predictions,
)
class JobStatus(StrEnum): class JobStatus(StrEnum):
"""Lifecycle state of a single processing job.""" """Lifecycle state of a single processing job."""
@ -157,6 +172,104 @@ class PerformanceMetrics(BaseModel):
) )
class Provenance(BaseModel):
"""Reproducibility-grade record of the environment that produced a payload.
Populated by the estimator on every inference run when the MeTRAbs
model was loaded through
:meth:`neuropose.estimator.Estimator.load_model` (the production
path). ``None`` when the model was injected directly via the
``Estimator(model=...)`` constructor (the test-fixture path), since
NeuroPose has no way to fingerprint a model it did not load itself.
Paper C's reproducibility story rests on this envelope: two runs
that produced equal ``Provenance`` objects against the same input
are expected to produce equal output (modulo non-determinism
controlled by ``deterministic``). Reviewers who want to re-derive a
figure from raw video need exactly these fields.
Frozen so a captured ``Provenance`` cannot be mutated after it has
been attached to a result; this matches the invariant that
provenance is a property of the run, not of the reader.
``protected_namespaces=()`` silences pydantic's ``model_*`` field
warning the ``model_sha256`` / ``model_filename`` names refer to
the MeTRAbs model artifact, not to pydantic's internal
``model_validate`` / ``model_dump`` namespace, so the collision is
cosmetic.
"""
model_config = ConfigDict(extra="forbid", frozen=True, protected_namespaces=())
model_sha256: str = Field(
description=(
"SHA-256 of the MeTRAbs model tarball (hex-encoded, lowercase). "
"Pinned at build time in :mod:`neuropose._model` and verified on "
"first download. Identifies the exact model weights used."
),
)
model_filename: str = Field(
description=(
"Canonical basename of the MeTRAbs tarball, e.g. "
"``metrabs_eff2l_y4_384px_800k_28ds.tar.gz``. Human-readable "
"companion to ``model_sha256``."
),
)
tensorflow_version: str = Field(
description="Value of ``tensorflow.__version__`` at the time of the run.",
)
tensorflow_metal_version: str | None = Field(
default=None,
description=(
"Version of the ``tensorflow-metal`` PyPI package when installed; "
"``None`` on platforms without Metal GPU acceleration."
),
)
numpy_version: str = Field(
description="Value of ``numpy.__version__`` at the time of the run.",
)
neuropose_version: str = Field(
description="Value of ``neuropose.__version__`` at the time of the run.",
)
python_version: str = Field(
description=(
"Python version as ``MAJOR.MINOR.MICRO``, e.g. ``3.11.14``. The "
"full ``sys.version`` string is intentionally not captured; the "
"three-component form is stable across patch builds and avoids "
"embedding compiler and build-date metadata."
),
)
seed: int | None = Field(
default=None,
description=(
"Random seed used for the run if one was set, else ``None``. "
"MeTRAbs inference is deterministic on a given device up to "
"floating-point associativity, so seeding mostly matters for "
"downstream analysis that introduces randomness (bootstraps, "
"learned metrics)."
),
)
deterministic: bool = Field(
default=False,
description=(
"``True`` if ``tf.config.experimental.enable_op_determinism()`` "
"was active during the run. Track 2 deterministic-inference "
"mode; the field exists in Phase 0 so payloads can record "
"whether the run *was* deterministic without requiring a "
"schema change when the toggle lands."
),
)
analysis_config: dict[str, Any] | None = Field(
default=None,
description=(
"Parsed YAML dict if this payload was produced by ``neuropose "
"analyze --config <file>``. ``None`` for direct-library or "
"``neuropose watch`` invocations. Reserved for the Phase 0 "
"YAML-configurable analysis pipeline."
),
)
class BenchmarkAggregate(BaseModel): class BenchmarkAggregate(BaseModel):
"""Distributional statistics aggregated across benchmark passes. """Distributional statistics aggregated across benchmark passes.
@ -255,6 +368,16 @@ class BenchmarkResult(BaseModel):
model_config = ConfigDict(extra="forbid", frozen=True) model_config = ConfigDict(extra="forbid", frozen=True)
schema_version: int = Field(
default=CURRENT_VERSION,
ge=1,
description=(
"Schema version of this BenchmarkResult payload. Fresh writes "
"stamp :data:`neuropose.migrations.CURRENT_VERSION`; older files "
"are migrated on load via :mod:`neuropose.migrations` before "
"pydantic validation."
),
)
video_name: str = Field( video_name: str = Field(
description="Basename of the benchmarked video (no directory components).", description="Basename of the benchmarked video (no directory components).",
) )
@ -280,6 +403,14 @@ class BenchmarkResult(BaseModel):
) )
aggregate: BenchmarkAggregate aggregate: BenchmarkAggregate
cpu_comparison: CpuComparisonResult | None = None cpu_comparison: CpuComparisonResult | None = None
provenance: Provenance | None = Field(
default=None,
description=(
"Reproducibility envelope from the benchmark run. ``None`` on "
"tests where the model was injected directly via "
"``Estimator(model=...)``."
),
)
class JointAxisExtractor(BaseModel): class JointAxisExtractor(BaseModel):
@ -469,9 +600,30 @@ class VideoPredictions(BaseModel):
model_config = ConfigDict(extra="forbid", frozen=True) model_config = ConfigDict(extra="forbid", frozen=True)
schema_version: int = Field(
default=CURRENT_VERSION,
ge=1,
description=(
"Schema version of this VideoPredictions payload. Fresh writes "
"stamp :data:`neuropose.migrations.CURRENT_VERSION`; files written "
"by older NeuroPose versions are migrated to the current version "
"by :mod:`neuropose.migrations` before pydantic validation."
),
)
metadata: VideoMetadata metadata: VideoMetadata
frames: dict[str, FramePrediction] frames: dict[str, FramePrediction]
segmentations: dict[str, Segmentation] = Field(default_factory=dict) segmentations: dict[str, Segmentation] = Field(default_factory=dict)
provenance: Provenance | None = Field(
default=None,
description=(
"Reproducibility envelope populated by the estimator on runs "
"where the MeTRAbs model was loaded via "
":meth:`neuropose.estimator.Estimator.load_model`. ``None`` on "
"test paths where the model was injected via "
"``Estimator(model=...)``, because no model SHA is known in "
"that case."
),
)
def frame_names(self) -> list[str]: def frame_names(self) -> list[str]:
"""Return frame identifiers in insertion order.""" """Return frame identifiers in insertion order."""
@ -623,9 +775,16 @@ class StatusFile(RootModel[dict[str, JobStatusEntry]]):
def load_video_predictions(path: Path) -> VideoPredictions: def load_video_predictions(path: Path) -> VideoPredictions:
"""Load and validate a per-video predictions JSON file.""" """Load and validate a per-video predictions JSON file.
Runs the payload through :func:`neuropose.migrations.migrate_video_predictions`
before pydantic validation so files written by older NeuroPose versions
upgrade to the current schema transparently.
"""
with path.open("r", encoding="utf-8") as f: with path.open("r", encoding="utf-8") as f:
data: Any = json.load(f) data: Any = json.load(f)
if isinstance(data, dict):
data = migrate_video_predictions(data)
return VideoPredictions.model_validate(data) return VideoPredictions.model_validate(data)
@ -636,9 +795,17 @@ def save_video_predictions(path: Path, predictions: VideoPredictions) -> None:
def load_job_results(path: Path) -> JobResults: def load_job_results(path: Path) -> JobResults:
"""Load and validate an aggregated per-job results JSON file.""" """Load and validate an aggregated per-job results JSON file.
Runs each video's payload through
:func:`neuropose.migrations.migrate_video_predictions` before pydantic
validation. :class:`JobResults` is a ``RootModel`` with no envelope of
its own, so migration happens per-entry rather than at the top level.
"""
with path.open("r", encoding="utf-8") as f: with path.open("r", encoding="utf-8") as f:
data: Any = json.load(f) data: Any = json.load(f)
if isinstance(data, dict):
data = migrate_job_results(data)
return JobResults.model_validate(data) return JobResults.model_validate(data)
@ -649,9 +816,16 @@ def save_job_results(path: Path, results: JobResults) -> None:
def load_benchmark_result(path: Path) -> BenchmarkResult: def load_benchmark_result(path: Path) -> BenchmarkResult:
"""Load and validate a benchmark-result JSON file.""" """Load and validate a benchmark-result JSON file.
Runs the payload through :func:`neuropose.migrations.migrate_benchmark_result`
before pydantic validation so files written by older NeuroPose versions
upgrade transparently.
"""
with path.open("r", encoding="utf-8") as f: with path.open("r", encoding="utf-8") as f:
data: Any = json.load(f) data: Any = json.load(f)
if isinstance(data, dict):
data = migrate_benchmark_result(data)
return BenchmarkResult.model_validate(data) return BenchmarkResult.model_validate(data)

View File

@ -81,26 +81,29 @@ class TestMetrabsLoader:
"""Exercises the loader's download → verify → extract → load path.""" """Exercises the loader's download → verify → extract → load path."""
def test_download_and_load(self, shared_model_cache_dir: Path) -> None: def test_download_and_load(self, shared_model_cache_dir: Path) -> None:
model = load_metrabs_model(cache_dir=shared_model_cache_dir) loaded = load_metrabs_model(cache_dir=shared_model_cache_dir)
assert model is not None assert loaded.model is not None
assert loaded.sha256
assert loaded.filename
for attr in ("detect_poses", "per_skeleton_joint_names", "per_skeleton_joint_edges"): for attr in ("detect_poses", "per_skeleton_joint_names", "per_skeleton_joint_edges"):
assert hasattr(model, attr), f"loaded model is missing {attr}" assert hasattr(loaded.model, attr), f"loaded model is missing {attr}"
def test_second_call_uses_cache(self, shared_model_cache_dir: Path) -> None: def test_second_call_uses_cache(self, shared_model_cache_dir: Path) -> None:
"""Idempotent: second call should return the cached model cheaply.""" """Idempotent: second call should return the cached model cheaply."""
model_a = load_metrabs_model(cache_dir=shared_model_cache_dir) loaded_a = load_metrabs_model(cache_dir=shared_model_cache_dir)
model_b = load_metrabs_model(cache_dir=shared_model_cache_dir) loaded_b = load_metrabs_model(cache_dir=shared_model_cache_dir)
# tf.saved_model.load returns a new Python object each call, so # tf.saved_model.load returns a new Python object each call, so
# identity comparison doesn't work — but both should still # identity comparison doesn't work — but both should still
# expose the MeTRAbs interface. # expose the MeTRAbs interface, and the SHA should match.
assert hasattr(model_a, "detect_poses") assert hasattr(loaded_a.model, "detect_poses")
assert hasattr(model_b, "detect_poses") assert hasattr(loaded_b.model, "detect_poses")
assert loaded_a.sha256 == loaded_b.sha256
def test_berkeley_mhad_skeleton_is_present(self, shared_model_cache_dir: Path) -> None: def test_berkeley_mhad_skeleton_is_present(self, shared_model_cache_dir: Path) -> None:
"""The estimator pins skeleton='berkeley_mhad_43'; verify it exists.""" """The estimator pins skeleton='berkeley_mhad_43'; verify it exists."""
model = load_metrabs_model(cache_dir=shared_model_cache_dir) loaded = load_metrabs_model(cache_dir=shared_model_cache_dir)
joint_names = model.per_skeleton_joint_names["berkeley_mhad_43"] joint_names = loaded.model.per_skeleton_joint_names["berkeley_mhad_43"]
joint_edges = model.per_skeleton_joint_edges["berkeley_mhad_43"] joint_edges = loaded.model.per_skeleton_joint_edges["berkeley_mhad_43"]
# MeTRAbs exposes these as tf.Tensor objects; just verify we # MeTRAbs exposes these as tf.Tensor objects; just verify we
# can pull a shape out. # can pull a shape out.
assert joint_names.shape[0] == 43 assert joint_names.shape[0] == 43

View File

@ -50,8 +50,8 @@ def test_joint_names_match_pinned_model(metrabs_model_cache_dir: Path) -> None:
commit that bumps the model pin in :mod:`neuropose._model`. commit that bumps the model pin in :mod:`neuropose._model`.
2. Cross-check any CLI or docs that embed hardcoded joint names. 2. Cross-check any CLI or docs that embed hardcoded joint names.
""" """
model = load_metrabs_model(cache_dir=metrabs_model_cache_dir) loaded = load_metrabs_model(cache_dir=metrabs_model_cache_dir)
tensor = model.per_skeleton_joint_names["berkeley_mhad_43"] tensor = loaded.model.per_skeleton_joint_names["berkeley_mhad_43"]
model_names = tuple(tensor.numpy().astype(str).tolist()) model_names = tuple(tensor.numpy().astype(str).tolist())
assert model_names == JOINT_NAMES, ( assert model_names == JOINT_NAMES, (
"JOINT_NAMES drift detected — the hardcoded tuple in " "JOINT_NAMES drift detected — the hardcoded tuple in "

View File

@ -683,9 +683,15 @@ def stub_estimator_with_metrics(monkeypatch: pytest.MonkeyPatch):
"poses2d": np.array([[[0.0, 0.0], [1.0, 1.0]]]), "poses2d": np.array([[[0.0, 0.0], [1.0, 1.0]]]),
} }
def fake_loader(cache_dir: Path | None = None) -> object: from neuropose._model import LoadedModel
def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
del cache_dir del cache_dir
return RecordingFake() return LoadedModel(
model=RecordingFake(),
sha256="smoke_sha",
filename="metrabs_smoke.tar.gz",
)
monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader) monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)

View File

@ -70,17 +70,21 @@ class TestModelGuard:
network: the loader is monkeypatched to return a sentinel, and we network: the loader is monkeypatched to return a sentinel, and we
assert it ends up as the estimator's model. assert it ends up as the estimator's model.
""" """
from neuropose._model import LoadedModel
sentinel = object() sentinel = object()
called_with: list[Path | None] = [] called_with: list[Path | None] = []
def fake_loader(cache_dir: Path | None = None) -> object: def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
called_with.append(cache_dir) called_with.append(cache_dir)
return sentinel return LoadedModel(model=sentinel, sha256="deadbeef", filename="fake.tar.gz")
monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader) monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
estimator = Estimator() estimator = Estimator()
estimator.load_model(cache_dir=Path("/tmp/fake-cache")) estimator.load_model(cache_dir=Path("/tmp/fake-cache"))
assert estimator.model is sentinel assert estimator.model is sentinel
assert estimator.model_sha256 == "deadbeef"
assert estimator.model_filename == "fake.tar.gz"
assert called_with == [Path("/tmp/fake-cache")] assert called_with == [Path("/tmp/fake-cache")]
def test_load_model_is_idempotent_when_already_loaded( def test_load_model_is_idempotent_when_already_loaded(
@ -278,9 +282,15 @@ class TestPerformanceMetrics:
"poses2d": np.array([[[0.0, 0.0]]]), "poses2d": np.array([[[0.0, 0.0]]]),
} }
def fake_loader(cache_dir: Path | None = None) -> object: from neuropose._model import LoadedModel
def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
del cache_dir del cache_dir
return Recorder() return LoadedModel(
model=Recorder(),
sha256="fake_sha",
filename="metrabs_fake.tar.gz",
)
monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader) monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
estimator = Estimator() estimator = Estimator()
@ -312,6 +322,88 @@ class TestPerformanceMetrics:
assert result.metrics.tensorflow_version not in {"", "unknown"} assert result.metrics.tensorflow_version not in {"", "unknown"}
class TestProvenance:
"""Provenance attachment to VideoPredictions.
Covers the two relevant paths: the injected-model path (no SHA
known ``provenance=None`` on output) and the ``load_model`` path
(SHA is known full ``Provenance`` populated and attached).
"""
def test_injected_model_produces_no_provenance(
self,
synthetic_video: Path,
fake_metrabs_model,
) -> None:
estimator = Estimator(model=fake_metrabs_model)
result = estimator.process_video(synthetic_video)
assert result.predictions.provenance is None
assert estimator.model_sha256 is None
assert estimator.model_filename is None
def test_loaded_model_populates_provenance(
self,
synthetic_video: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
import numpy as np
from neuropose._model import LoadedModel
class Recorder:
def detect_poses(self, image, **kwargs):
del image, kwargs
return {
"boxes": np.array([[0.0, 0.0, 1.0, 1.0, 0.9]]),
"poses3d": np.array([[[0.0, 0.0, 0.0]]]),
"poses2d": np.array([[[0.0, 0.0]]]),
}
def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
del cache_dir
return LoadedModel(
model=Recorder(),
sha256="e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
filename="metrabs_stub.tar.gz",
)
monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
estimator = Estimator()
estimator.load_model()
result = estimator.process_video(synthetic_video)
prov = result.predictions.provenance
assert prov is not None
assert prov.model_sha256.startswith("e3b0c44")
assert prov.model_filename == "metrabs_stub.tar.gz"
assert prov.numpy_version == np.__version__
assert prov.python_version.count(".") == 2 # MAJOR.MINOR.MICRO
# neuropose_version should match the package's __version__
from neuropose import __version__ as pkg_version
assert prov.neuropose_version == pkg_version
# tensorflow_version should also be real (TF is in dev deps).
assert prov.tensorflow_version not in {"", "unknown"}
def test_model_sha256_and_filename_properties_after_load(
self,
monkeypatch: pytest.MonkeyPatch,
) -> None:
from neuropose._model import LoadedModel
def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
del cache_dir
return LoadedModel(model=object(), sha256="abcd", filename="x.tar.gz")
monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
estimator = Estimator()
assert estimator.model_sha256 is None
assert estimator.model_filename is None
estimator.load_model()
assert estimator.model_sha256 == "abcd"
assert estimator.model_filename == "x.tar.gz"
class TestErrors: class TestErrors:
def test_missing_video( def test_missing_video(
self, self,

View File

@ -22,6 +22,7 @@ from neuropose.io import (
JointPairDistanceExtractor, JointPairDistanceExtractor,
JointSpeedExtractor, JointSpeedExtractor,
PerformanceMetrics, PerformanceMetrics,
Provenance,
Segment, Segment,
Segmentation, Segmentation,
SegmentationConfig, SegmentationConfig,
@ -278,6 +279,102 @@ class TestPerformanceMetricsModel:
m.total_seconds = 2.0 m.total_seconds = 2.0
def _minimal_provenance() -> Provenance:
return Provenance(
model_sha256="a" * 64,
model_filename="metrabs_fake.tar.gz",
tensorflow_version="2.18.1",
numpy_version="2.0.2",
neuropose_version="0.1.0.dev0",
python_version="3.11.14",
)
class TestProvenanceModel:
"""Schema-level behaviour of :class:`neuropose.io.Provenance`."""
def test_roundtrip_through_json(self) -> None:
p = Provenance(
model_sha256="a" * 64,
model_filename="metrabs_fake.tar.gz",
tensorflow_version="2.18.1",
tensorflow_metal_version="1.2.0",
numpy_version="2.0.2",
neuropose_version="0.1.0.dev0",
python_version="3.11.14",
seed=42,
deterministic=True,
analysis_config={"step": "dtw", "nan_policy": "propagate"},
)
rehydrated = Provenance.model_validate(p.model_dump(mode="json"))
assert rehydrated == p
def test_optional_fields_default_to_none_and_false(self) -> None:
p = _minimal_provenance()
assert p.tensorflow_metal_version is None
assert p.seed is None
assert p.deterministic is False
assert p.analysis_config is None
def test_is_frozen(self) -> None:
p = _minimal_provenance()
with pytest.raises(ValidationError):
p.model_sha256 = "different"
def test_extra_fields_forbidden(self) -> None:
# Construct via model_validate so pyright doesn't have to prove the
# keyword doesn't exist on the class at static-type time.
with pytest.raises(ValidationError):
Provenance.model_validate(
{
"model_sha256": "x" * 64,
"model_filename": "x.tar.gz",
"tensorflow_version": "2.18",
"numpy_version": "2.0",
"neuropose_version": "0.1",
"python_version": "3.11.14",
"unknown_field": "bogus",
}
)
class TestVideoPredictionsProvenance:
"""``provenance`` field on :class:`VideoPredictions` round-trips."""
def test_default_is_none(self) -> None:
vp = VideoPredictions(
metadata=VideoMetadata(frame_count=0, fps=30.0, width=32, height=32),
frames={},
)
assert vp.provenance is None
def test_roundtrip_with_provenance(self, tmp_path: Path) -> None:
prov = Provenance(
model_sha256="f" * 64,
model_filename="metrabs.tar.gz",
tensorflow_version="2.18.1",
numpy_version="2.0.2",
neuropose_version="0.1.0.dev0",
python_version="3.11.14",
)
vp = VideoPredictions(
metadata=VideoMetadata(frame_count=1, fps=30.0, width=32, height=32),
frames={
"frame_000000": FramePrediction(
boxes=[[0.0, 0.0, 32.0, 32.0, 0.9]],
poses3d=[[[1.0, 2.0, 3.0]]],
poses2d=[[[10.0, 20.0]]],
)
},
provenance=prov,
)
path = tmp_path / "vp.json"
save_video_predictions(path, vp)
loaded = load_video_predictions(path)
assert loaded == vp
assert loaded.provenance == prov
class TestBenchmarkResultPersistence: class TestBenchmarkResultPersistence:
def test_roundtrip_to_disk(self, tmp_path: Path) -> None: def test_roundtrip_to_disk(self, tmp_path: Path) -> None:
result = BenchmarkResult( result = BenchmarkResult(