add Provenance subobject and LoadedModel

Captures the MeTRAbs SHA-256 and filename plus tensorflow /
tensorflow-metal / numpy / neuropose / python versions, and reserves
slots for seed, deterministic, and analysis_config. Populated
automatically by Estimator.process_video when the model was loaded via
load_model; propagates into JobResults and BenchmarkResult via the
existing output path. None on the injected-model test path where no
SHA is known.

_model.load_metrabs_model now returns a LoadedModel dataclass so the
estimator can bundle the TF handle with the pinned SHA without
re-hashing the tarball on every daemon startup. All test fakes and
the integration smoke tests updated to unwrap .model.

Bumps the optional schema_version field on VideoPredictions and
BenchmarkResult to default=CURRENT_VERSION so fresh writes stamp the
latest version; legacy payloads without it are migrated on load via
the chain registered in the previous commit.
This commit is contained in:
Levi Neuwirth 2026-04-18 17:10:52 -04:00
parent 9c549fd9e2
commit fe8e417aa0
10 changed files with 549 additions and 31 deletions

View File

@ -222,6 +222,23 @@ be split into per-release sections once tagging begins.
at `CURRENT_VERSION = 2`, with registered v1 → v2 migrations for
`VideoPredictions` and `BenchmarkResult` that add the optional
`provenance` field.
- **`neuropose.io.Provenance`** — reproducibility envelope for every
inference run. Populated automatically by `Estimator.process_video`
when the model was loaded via `load_model` (the production path)
and attached to the output `VideoPredictions`; propagates from
there into `JobResults` (per-video) and `BenchmarkResult` (via the
benchmark loop). Captures the MeTRAbs artifact SHA-256 and
filename, `tensorflow` / `tensorflow-metal` / `numpy` /
`neuropose` / Python versions, and reserved slots for a `seed`,
`deterministic` flag (Track 2), and `analysis_config` (Phase 0
YAML pipeline). `None` on the injected-model test path where
NeuroPose has no way to fingerprint the supplied artifact. Frozen
pydantic model with `extra="forbid"` and
`protected_namespaces=()` so the `model_*` field names do not
collide with pydantic v2's internal namespace. `_model.load_metrabs_model`
now returns a `LoadedModel` dataclass bundling the TF handle with
the pinned SHA and filename so the estimator can build the
`Provenance` without re-hashing the tarball.
- **`neuropose.benchmark`** — multi-pass inference benchmarking for
a single video. `run_benchmark()` runs `process_video` N times
(default 5), always discards the first pass as warmup (graph

View File

@ -41,11 +41,33 @@ import os
import shutil
import tarfile
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class LoadedModel:
"""Result of :func:`load_metrabs_model`.
Bundles the loaded TensorFlow model with the provenance metadata
that identifies which artifact it came from. Callers that only want
the model reach for :attr:`model`; callers that build a
:class:`~neuropose.io.Provenance` (primarily
:class:`~neuropose.estimator.Estimator`) pull :attr:`sha256` and
:attr:`filename` too.
Frozen once :func:`load_metrabs_model` has produced a
``LoadedModel``, nothing downstream should edit the identity of
the artifact it describes.
"""
model: Any
sha256: str
filename: str
# ---------------------------------------------------------------------------
# Model artifact: pinned URL and checksum.
# ---------------------------------------------------------------------------
@ -74,7 +96,7 @@ _REQUIRED_MODEL_ATTRS = (
# ---------------------------------------------------------------------------
def load_metrabs_model(cache_dir: Path | None = None) -> Any:
def load_metrabs_model(cache_dir: Path | None = None) -> LoadedModel:
"""Load the MeTRAbs model, downloading and caching on first use.
Parameters
@ -87,9 +109,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
Returns
-------
object
A TensorFlow SavedModel handle exposing ``detect_poses`` and
the ``per_skeleton_joint_names`` / ``per_skeleton_joint_edges``
LoadedModel
Bundle containing the TensorFlow SavedModel handle alongside
the pinned artifact SHA-256 and filename that identify which
model the handle came from. The handle exposes ``detect_poses``
and the ``per_skeleton_joint_names`` / ``per_skeleton_joint_edges``
attributes used by :class:`neuropose.estimator.Estimator`.
Raises
@ -99,6 +123,18 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
automatic retry), extraction fails, TensorFlow is not
installed, or the loaded model does not expose the expected
interface.
Notes
-----
The returned ``sha256`` is the module-pinned :data:`_MODEL_SHA256`,
not a re-hash of the on-disk tarball. On the cold-cache path this
is exactly the hash we verified against before loading. On the
warm-cache path the tarball is not re-verified (that would cost a
2 GB I/O pass on every daemon startup), so the reported SHA is an
attestation of "this is the pinned artifact NeuroPose loads" rather
than a direct fingerprint of the on-disk bytes. For the threat
model this supports reproducibility, not tamper-evidence that
is the correct semantics.
"""
resolved_cache = Path(cache_dir) if cache_dir is not None else _default_cache_dir()
resolved_cache.mkdir(parents=True, exist_ok=True)
@ -115,7 +151,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
)
shutil.rmtree(model_dir, ignore_errors=True)
else:
return _tf_load(saved_model_dir)
return LoadedModel(
model=_tf_load(saved_model_dir),
sha256=_MODEL_SHA256,
filename=_MODEL_ARCHIVE_NAME,
)
tarball = resolved_cache / _MODEL_ARCHIVE_NAME
@ -135,7 +175,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any:
_extract_tarball(tarball, model_dir)
saved_model_dir = _find_saved_model(model_dir)
return _tf_load(saved_model_dir)
return LoadedModel(
model=_tf_load(saved_model_dir),
sha256=_MODEL_SHA256,
filename=_MODEL_ARCHIVE_NAME,
)
# ---------------------------------------------------------------------------

View File

@ -105,9 +105,17 @@ def run_benchmark(
passes: list[PerformanceMetrics] = []
reference_predictions: VideoPredictions | None = None
# Provenance is identical across every pass of a single run (same
# estimator, same model, same environment), so we keep just the
# latest one we see. Doing this on every iteration is cheap — it's
# one attribute read — and means the benchmark result carries
# provenance even when ``capture_reference`` is off.
latest_provenance = None
for i in range(repeats):
result = estimator.process_video(video_path)
passes.append(result.metrics)
if result.predictions.provenance is not None:
latest_provenance = result.predictions.provenance
# Only the *last* measured pass needs to be captured for
# divergence comparison. Earlier passes would just be
# overwritten, so we avoid holding their frame dicts in memory.
@ -122,6 +130,7 @@ def run_benchmark(
warmup_pass=passes[0],
measured_passes=passes[1:],
aggregate=aggregate,
provenance=latest_provenance,
)
return BenchmarkRunOutcome(
result=benchmark_result,

View File

@ -34,19 +34,25 @@ model is present raises :class:`ModelNotLoadedError`.
from __future__ import annotations
import logging
import sys
import time
from collections.abc import Callable
from dataclasses import dataclass, field
from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as _pkg_version
from pathlib import Path
from typing import Any
import cv2
import numpy as np
import psutil
from neuropose import __version__ as _neuropose_version
from neuropose._model import load_metrabs_model
from neuropose.io import (
FramePrediction,
PerformanceMetrics,
Provenance,
VideoMetadata,
VideoPredictions,
)
@ -158,6 +164,12 @@ class Estimator:
# successful ``load_model`` below so the next ``process_video`` can
# pass the real number through into ``PerformanceMetrics``.
self._model_load_seconds: float | None = None
# MeTRAbs artifact identity, set only by ``load_model``. When the
# model was injected via the constructor we have no way to
# fingerprint it, so these remain ``None`` and ``process_video``
# leaves the output's ``provenance`` as ``None`` too.
self._model_sha256: str | None = None
self._model_filename: str | None = None
# -- model lifecycle ----------------------------------------------------
@ -176,6 +188,21 @@ class Estimator:
"""Return ``True`` if a model has been supplied or loaded."""
return self._model is not None
@property
def model_sha256(self) -> str | None:
"""Return the SHA-256 of the loaded MeTRAbs artifact, or ``None``.
``None`` when the model was injected via ``Estimator(model=...)``
rather than loaded via :meth:`load_model`. The value, when
present, is the module-pinned SHA from :mod:`neuropose._model`.
"""
return self._model_sha256
@property
def model_filename(self) -> str | None:
"""Return the basename of the MeTRAbs artifact, or ``None`` if injected."""
return self._model_filename
def load_model(self, cache_dir: Path | None = None) -> None:
"""Load the MeTRAbs model via :func:`neuropose._model.load_metrabs_model`.
@ -196,9 +223,16 @@ class Estimator:
return
logger.info("Loading MeTRAbs model (cache_dir=%s)", cache_dir)
start = time.perf_counter()
self._model = load_metrabs_model(cache_dir=cache_dir)
loaded = load_metrabs_model(cache_dir=cache_dir)
self._model_load_seconds = time.perf_counter() - start
logger.info("MeTRAbs model loaded in %.2f s", self._model_load_seconds)
self._model = loaded.model
self._model_sha256 = loaded.sha256
self._model_filename = loaded.filename
logger.info(
"MeTRAbs model loaded in %.2f s (sha256=%s)",
self._model_load_seconds,
loaded.sha256[:12],
)
# -- inference ----------------------------------------------------------
@ -330,11 +364,53 @@ class Estimator:
metrics.active_device,
)
predictions = VideoPredictions(metadata=metadata, frames=frames)
provenance = self._build_provenance(device_info=device_info)
predictions = VideoPredictions(
metadata=metadata,
frames=frames,
provenance=provenance,
)
return ProcessVideoResult(predictions=predictions, metrics=metrics)
# -- internals ----------------------------------------------------------
def _build_provenance(self, *, device_info: _ActiveDeviceInfo) -> Provenance | None:
"""Construct a :class:`~neuropose.io.Provenance` for the current run.
Returns ``None`` when the model was injected via the constructor
rather than loaded via :meth:`load_model` in that case we
cannot fingerprint the artifact, and a partial provenance would
mislead readers into thinking we could.
The device-info bundle is shared with the :class:`PerformanceMetrics`
construction (one call to :func:`_detect_active_device` per
``process_video`` invocation) so that both artifacts see
identical TF and Metal state.
"""
if self._model_sha256 is None or self._model_filename is None:
return None
metal_version: str | None = None
if device_info.metal_active:
try:
metal_version = _pkg_version("tensorflow-metal")
except PackageNotFoundError:
metal_version = None
python_version = (
f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
)
return Provenance(
model_sha256=self._model_sha256,
model_filename=self._model_filename,
tensorflow_version=device_info.tf_version,
tensorflow_metal_version=metal_version,
numpy_version=np.__version__,
neuropose_version=_neuropose_version,
python_version=python_version,
)
def _infer_frame(
self,
model: Any,

View File

@ -10,6 +10,14 @@ Atomicity: :func:`save_status`, :func:`save_job_results`, and
atomically rename, so a crash mid-write will not leave a partially-written
file behind. This matches the crash-resilience guarantee the interfacer
daemon makes to callers.
Schema versioning: :class:`VideoPredictions` and :class:`BenchmarkResult`
each carry a ``schema_version`` integer. On load, the raw JSON dict is
passed through :mod:`neuropose.migrations` before pydantic validation so
that files written by earlier versions upgrade transparently. :class:`JobResults`
is a ``RootModel`` with no envelope of its own, so its loader runs the
per-video migration on each entry of its mapping. See
:mod:`neuropose.migrations` for the migration-registration pattern.
"""
from __future__ import annotations
@ -23,6 +31,13 @@ from typing import Annotated, Any, Literal
from pydantic import BaseModel, ConfigDict, Field, RootModel, model_validator
from neuropose.migrations import (
CURRENT_VERSION,
migrate_benchmark_result,
migrate_job_results,
migrate_video_predictions,
)
class JobStatus(StrEnum):
"""Lifecycle state of a single processing job."""
@ -157,6 +172,104 @@ class PerformanceMetrics(BaseModel):
)
class Provenance(BaseModel):
"""Reproducibility-grade record of the environment that produced a payload.
Populated by the estimator on every inference run when the MeTRAbs
model was loaded through
:meth:`neuropose.estimator.Estimator.load_model` (the production
path). ``None`` when the model was injected directly via the
``Estimator(model=...)`` constructor (the test-fixture path), since
NeuroPose has no way to fingerprint a model it did not load itself.
Paper C's reproducibility story rests on this envelope: two runs
that produced equal ``Provenance`` objects against the same input
are expected to produce equal output (modulo non-determinism
controlled by ``deterministic``). Reviewers who want to re-derive a
figure from raw video need exactly these fields.
Frozen so a captured ``Provenance`` cannot be mutated after it has
been attached to a result; this matches the invariant that
provenance is a property of the run, not of the reader.
``protected_namespaces=()`` silences pydantic's ``model_*`` field
warning the ``model_sha256`` / ``model_filename`` names refer to
the MeTRAbs model artifact, not to pydantic's internal
``model_validate`` / ``model_dump`` namespace, so the collision is
cosmetic.
"""
model_config = ConfigDict(extra="forbid", frozen=True, protected_namespaces=())
model_sha256: str = Field(
description=(
"SHA-256 of the MeTRAbs model tarball (hex-encoded, lowercase). "
"Pinned at build time in :mod:`neuropose._model` and verified on "
"first download. Identifies the exact model weights used."
),
)
model_filename: str = Field(
description=(
"Canonical basename of the MeTRAbs tarball, e.g. "
"``metrabs_eff2l_y4_384px_800k_28ds.tar.gz``. Human-readable "
"companion to ``model_sha256``."
),
)
tensorflow_version: str = Field(
description="Value of ``tensorflow.__version__`` at the time of the run.",
)
tensorflow_metal_version: str | None = Field(
default=None,
description=(
"Version of the ``tensorflow-metal`` PyPI package when installed; "
"``None`` on platforms without Metal GPU acceleration."
),
)
numpy_version: str = Field(
description="Value of ``numpy.__version__`` at the time of the run.",
)
neuropose_version: str = Field(
description="Value of ``neuropose.__version__`` at the time of the run.",
)
python_version: str = Field(
description=(
"Python version as ``MAJOR.MINOR.MICRO``, e.g. ``3.11.14``. The "
"full ``sys.version`` string is intentionally not captured; the "
"three-component form is stable across patch builds and avoids "
"embedding compiler and build-date metadata."
),
)
seed: int | None = Field(
default=None,
description=(
"Random seed used for the run if one was set, else ``None``. "
"MeTRAbs inference is deterministic on a given device up to "
"floating-point associativity, so seeding mostly matters for "
"downstream analysis that introduces randomness (bootstraps, "
"learned metrics)."
),
)
deterministic: bool = Field(
default=False,
description=(
"``True`` if ``tf.config.experimental.enable_op_determinism()`` "
"was active during the run. Track 2 deterministic-inference "
"mode; the field exists in Phase 0 so payloads can record "
"whether the run *was* deterministic without requiring a "
"schema change when the toggle lands."
),
)
analysis_config: dict[str, Any] | None = Field(
default=None,
description=(
"Parsed YAML dict if this payload was produced by ``neuropose "
"analyze --config <file>``. ``None`` for direct-library or "
"``neuropose watch`` invocations. Reserved for the Phase 0 "
"YAML-configurable analysis pipeline."
),
)
class BenchmarkAggregate(BaseModel):
"""Distributional statistics aggregated across benchmark passes.
@ -255,6 +368,16 @@ class BenchmarkResult(BaseModel):
model_config = ConfigDict(extra="forbid", frozen=True)
schema_version: int = Field(
default=CURRENT_VERSION,
ge=1,
description=(
"Schema version of this BenchmarkResult payload. Fresh writes "
"stamp :data:`neuropose.migrations.CURRENT_VERSION`; older files "
"are migrated on load via :mod:`neuropose.migrations` before "
"pydantic validation."
),
)
video_name: str = Field(
description="Basename of the benchmarked video (no directory components).",
)
@ -280,6 +403,14 @@ class BenchmarkResult(BaseModel):
)
aggregate: BenchmarkAggregate
cpu_comparison: CpuComparisonResult | None = None
provenance: Provenance | None = Field(
default=None,
description=(
"Reproducibility envelope from the benchmark run. ``None`` on "
"tests where the model was injected directly via "
"``Estimator(model=...)``."
),
)
class JointAxisExtractor(BaseModel):
@ -469,9 +600,30 @@ class VideoPredictions(BaseModel):
model_config = ConfigDict(extra="forbid", frozen=True)
schema_version: int = Field(
default=CURRENT_VERSION,
ge=1,
description=(
"Schema version of this VideoPredictions payload. Fresh writes "
"stamp :data:`neuropose.migrations.CURRENT_VERSION`; files written "
"by older NeuroPose versions are migrated to the current version "
"by :mod:`neuropose.migrations` before pydantic validation."
),
)
metadata: VideoMetadata
frames: dict[str, FramePrediction]
segmentations: dict[str, Segmentation] = Field(default_factory=dict)
provenance: Provenance | None = Field(
default=None,
description=(
"Reproducibility envelope populated by the estimator on runs "
"where the MeTRAbs model was loaded via "
":meth:`neuropose.estimator.Estimator.load_model`. ``None`` on "
"test paths where the model was injected via "
"``Estimator(model=...)``, because no model SHA is known in "
"that case."
),
)
def frame_names(self) -> list[str]:
"""Return frame identifiers in insertion order."""
@ -623,9 +775,16 @@ class StatusFile(RootModel[dict[str, JobStatusEntry]]):
def load_video_predictions(path: Path) -> VideoPredictions:
"""Load and validate a per-video predictions JSON file."""
"""Load and validate a per-video predictions JSON file.
Runs the payload through :func:`neuropose.migrations.migrate_video_predictions`
before pydantic validation so files written by older NeuroPose versions
upgrade to the current schema transparently.
"""
with path.open("r", encoding="utf-8") as f:
data: Any = json.load(f)
if isinstance(data, dict):
data = migrate_video_predictions(data)
return VideoPredictions.model_validate(data)
@ -636,9 +795,17 @@ def save_video_predictions(path: Path, predictions: VideoPredictions) -> None:
def load_job_results(path: Path) -> JobResults:
"""Load and validate an aggregated per-job results JSON file."""
"""Load and validate an aggregated per-job results JSON file.
Runs each video's payload through
:func:`neuropose.migrations.migrate_video_predictions` before pydantic
validation. :class:`JobResults` is a ``RootModel`` with no envelope of
its own, so migration happens per-entry rather than at the top level.
"""
with path.open("r", encoding="utf-8") as f:
data: Any = json.load(f)
if isinstance(data, dict):
data = migrate_job_results(data)
return JobResults.model_validate(data)
@ -649,9 +816,16 @@ def save_job_results(path: Path, results: JobResults) -> None:
def load_benchmark_result(path: Path) -> BenchmarkResult:
"""Load and validate a benchmark-result JSON file."""
"""Load and validate a benchmark-result JSON file.
Runs the payload through :func:`neuropose.migrations.migrate_benchmark_result`
before pydantic validation so files written by older NeuroPose versions
upgrade transparently.
"""
with path.open("r", encoding="utf-8") as f:
data: Any = json.load(f)
if isinstance(data, dict):
data = migrate_benchmark_result(data)
return BenchmarkResult.model_validate(data)

View File

@ -81,26 +81,29 @@ class TestMetrabsLoader:
"""Exercises the loader's download → verify → extract → load path."""
def test_download_and_load(self, shared_model_cache_dir: Path) -> None:
model = load_metrabs_model(cache_dir=shared_model_cache_dir)
assert model is not None
loaded = load_metrabs_model(cache_dir=shared_model_cache_dir)
assert loaded.model is not None
assert loaded.sha256
assert loaded.filename
for attr in ("detect_poses", "per_skeleton_joint_names", "per_skeleton_joint_edges"):
assert hasattr(model, attr), f"loaded model is missing {attr}"
assert hasattr(loaded.model, attr), f"loaded model is missing {attr}"
def test_second_call_uses_cache(self, shared_model_cache_dir: Path) -> None:
"""Idempotent: second call should return the cached model cheaply."""
model_a = load_metrabs_model(cache_dir=shared_model_cache_dir)
model_b = load_metrabs_model(cache_dir=shared_model_cache_dir)
loaded_a = load_metrabs_model(cache_dir=shared_model_cache_dir)
loaded_b = load_metrabs_model(cache_dir=shared_model_cache_dir)
# tf.saved_model.load returns a new Python object each call, so
# identity comparison doesn't work — but both should still
# expose the MeTRAbs interface.
assert hasattr(model_a, "detect_poses")
assert hasattr(model_b, "detect_poses")
# expose the MeTRAbs interface, and the SHA should match.
assert hasattr(loaded_a.model, "detect_poses")
assert hasattr(loaded_b.model, "detect_poses")
assert loaded_a.sha256 == loaded_b.sha256
def test_berkeley_mhad_skeleton_is_present(self, shared_model_cache_dir: Path) -> None:
"""The estimator pins skeleton='berkeley_mhad_43'; verify it exists."""
model = load_metrabs_model(cache_dir=shared_model_cache_dir)
joint_names = model.per_skeleton_joint_names["berkeley_mhad_43"]
joint_edges = model.per_skeleton_joint_edges["berkeley_mhad_43"]
loaded = load_metrabs_model(cache_dir=shared_model_cache_dir)
joint_names = loaded.model.per_skeleton_joint_names["berkeley_mhad_43"]
joint_edges = loaded.model.per_skeleton_joint_edges["berkeley_mhad_43"]
# MeTRAbs exposes these as tf.Tensor objects; just verify we
# can pull a shape out.
assert joint_names.shape[0] == 43

View File

@ -50,8 +50,8 @@ def test_joint_names_match_pinned_model(metrabs_model_cache_dir: Path) -> None:
commit that bumps the model pin in :mod:`neuropose._model`.
2. Cross-check any CLI or docs that embed hardcoded joint names.
"""
model = load_metrabs_model(cache_dir=metrabs_model_cache_dir)
tensor = model.per_skeleton_joint_names["berkeley_mhad_43"]
loaded = load_metrabs_model(cache_dir=metrabs_model_cache_dir)
tensor = loaded.model.per_skeleton_joint_names["berkeley_mhad_43"]
model_names = tuple(tensor.numpy().astype(str).tolist())
assert model_names == JOINT_NAMES, (
"JOINT_NAMES drift detected — the hardcoded tuple in "

View File

@ -683,9 +683,15 @@ def stub_estimator_with_metrics(monkeypatch: pytest.MonkeyPatch):
"poses2d": np.array([[[0.0, 0.0], [1.0, 1.0]]]),
}
def fake_loader(cache_dir: Path | None = None) -> object:
from neuropose._model import LoadedModel
def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
del cache_dir
return RecordingFake()
return LoadedModel(
model=RecordingFake(),
sha256="smoke_sha",
filename="metrabs_smoke.tar.gz",
)
monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)

View File

@ -70,17 +70,21 @@ class TestModelGuard:
network: the loader is monkeypatched to return a sentinel, and we
assert it ends up as the estimator's model.
"""
from neuropose._model import LoadedModel
sentinel = object()
called_with: list[Path | None] = []
def fake_loader(cache_dir: Path | None = None) -> object:
def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
called_with.append(cache_dir)
return sentinel
return LoadedModel(model=sentinel, sha256="deadbeef", filename="fake.tar.gz")
monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
estimator = Estimator()
estimator.load_model(cache_dir=Path("/tmp/fake-cache"))
assert estimator.model is sentinel
assert estimator.model_sha256 == "deadbeef"
assert estimator.model_filename == "fake.tar.gz"
assert called_with == [Path("/tmp/fake-cache")]
def test_load_model_is_idempotent_when_already_loaded(
@ -278,9 +282,15 @@ class TestPerformanceMetrics:
"poses2d": np.array([[[0.0, 0.0]]]),
}
def fake_loader(cache_dir: Path | None = None) -> object:
from neuropose._model import LoadedModel
def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
del cache_dir
return Recorder()
return LoadedModel(
model=Recorder(),
sha256="fake_sha",
filename="metrabs_fake.tar.gz",
)
monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
estimator = Estimator()
@ -312,6 +322,88 @@ class TestPerformanceMetrics:
assert result.metrics.tensorflow_version not in {"", "unknown"}
class TestProvenance:
"""Provenance attachment to VideoPredictions.
Covers the two relevant paths: the injected-model path (no SHA
known ``provenance=None`` on output) and the ``load_model`` path
(SHA is known full ``Provenance`` populated and attached).
"""
def test_injected_model_produces_no_provenance(
self,
synthetic_video: Path,
fake_metrabs_model,
) -> None:
estimator = Estimator(model=fake_metrabs_model)
result = estimator.process_video(synthetic_video)
assert result.predictions.provenance is None
assert estimator.model_sha256 is None
assert estimator.model_filename is None
def test_loaded_model_populates_provenance(
self,
synthetic_video: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
import numpy as np
from neuropose._model import LoadedModel
class Recorder:
def detect_poses(self, image, **kwargs):
del image, kwargs
return {
"boxes": np.array([[0.0, 0.0, 1.0, 1.0, 0.9]]),
"poses3d": np.array([[[0.0, 0.0, 0.0]]]),
"poses2d": np.array([[[0.0, 0.0]]]),
}
def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
del cache_dir
return LoadedModel(
model=Recorder(),
sha256="e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
filename="metrabs_stub.tar.gz",
)
monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
estimator = Estimator()
estimator.load_model()
result = estimator.process_video(synthetic_video)
prov = result.predictions.provenance
assert prov is not None
assert prov.model_sha256.startswith("e3b0c44")
assert prov.model_filename == "metrabs_stub.tar.gz"
assert prov.numpy_version == np.__version__
assert prov.python_version.count(".") == 2 # MAJOR.MINOR.MICRO
# neuropose_version should match the package's __version__
from neuropose import __version__ as pkg_version
assert prov.neuropose_version == pkg_version
# tensorflow_version should also be real (TF is in dev deps).
assert prov.tensorflow_version not in {"", "unknown"}
def test_model_sha256_and_filename_properties_after_load(
self,
monkeypatch: pytest.MonkeyPatch,
) -> None:
from neuropose._model import LoadedModel
def fake_loader(cache_dir: Path | None = None) -> LoadedModel:
del cache_dir
return LoadedModel(model=object(), sha256="abcd", filename="x.tar.gz")
monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader)
estimator = Estimator()
assert estimator.model_sha256 is None
assert estimator.model_filename is None
estimator.load_model()
assert estimator.model_sha256 == "abcd"
assert estimator.model_filename == "x.tar.gz"
class TestErrors:
def test_missing_video(
self,

View File

@ -22,6 +22,7 @@ from neuropose.io import (
JointPairDistanceExtractor,
JointSpeedExtractor,
PerformanceMetrics,
Provenance,
Segment,
Segmentation,
SegmentationConfig,
@ -278,6 +279,102 @@ class TestPerformanceMetricsModel:
m.total_seconds = 2.0
def _minimal_provenance() -> Provenance:
return Provenance(
model_sha256="a" * 64,
model_filename="metrabs_fake.tar.gz",
tensorflow_version="2.18.1",
numpy_version="2.0.2",
neuropose_version="0.1.0.dev0",
python_version="3.11.14",
)
class TestProvenanceModel:
"""Schema-level behaviour of :class:`neuropose.io.Provenance`."""
def test_roundtrip_through_json(self) -> None:
p = Provenance(
model_sha256="a" * 64,
model_filename="metrabs_fake.tar.gz",
tensorflow_version="2.18.1",
tensorflow_metal_version="1.2.0",
numpy_version="2.0.2",
neuropose_version="0.1.0.dev0",
python_version="3.11.14",
seed=42,
deterministic=True,
analysis_config={"step": "dtw", "nan_policy": "propagate"},
)
rehydrated = Provenance.model_validate(p.model_dump(mode="json"))
assert rehydrated == p
def test_optional_fields_default_to_none_and_false(self) -> None:
p = _minimal_provenance()
assert p.tensorflow_metal_version is None
assert p.seed is None
assert p.deterministic is False
assert p.analysis_config is None
def test_is_frozen(self) -> None:
p = _minimal_provenance()
with pytest.raises(ValidationError):
p.model_sha256 = "different"
def test_extra_fields_forbidden(self) -> None:
# Construct via model_validate so pyright doesn't have to prove the
# keyword doesn't exist on the class at static-type time.
with pytest.raises(ValidationError):
Provenance.model_validate(
{
"model_sha256": "x" * 64,
"model_filename": "x.tar.gz",
"tensorflow_version": "2.18",
"numpy_version": "2.0",
"neuropose_version": "0.1",
"python_version": "3.11.14",
"unknown_field": "bogus",
}
)
class TestVideoPredictionsProvenance:
"""``provenance`` field on :class:`VideoPredictions` round-trips."""
def test_default_is_none(self) -> None:
vp = VideoPredictions(
metadata=VideoMetadata(frame_count=0, fps=30.0, width=32, height=32),
frames={},
)
assert vp.provenance is None
def test_roundtrip_with_provenance(self, tmp_path: Path) -> None:
prov = Provenance(
model_sha256="f" * 64,
model_filename="metrabs.tar.gz",
tensorflow_version="2.18.1",
numpy_version="2.0.2",
neuropose_version="0.1.0.dev0",
python_version="3.11.14",
)
vp = VideoPredictions(
metadata=VideoMetadata(frame_count=1, fps=30.0, width=32, height=32),
frames={
"frame_000000": FramePrediction(
boxes=[[0.0, 0.0, 32.0, 32.0, 0.9]],
poses3d=[[[1.0, 2.0, 3.0]]],
poses2d=[[[10.0, 20.0]]],
)
},
provenance=prov,
)
path = tmp_path / "vp.json"
save_video_predictions(path, vp)
loaded = load_video_predictions(path)
assert loaded == vp
assert loaded.provenance == prov
class TestBenchmarkResultPersistence:
def test_roundtrip_to_disk(self, tmp_path: Path) -> None:
result = BenchmarkResult(