diff --git a/CHANGELOG.md b/CHANGELOG.md index 37cea49..87a9c71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -222,6 +222,23 @@ be split into per-release sections once tagging begins. at `CURRENT_VERSION = 2`, with registered v1 → v2 migrations for `VideoPredictions` and `BenchmarkResult` that add the optional `provenance` field. +- **`neuropose.io.Provenance`** — reproducibility envelope for every + inference run. Populated automatically by `Estimator.process_video` + when the model was loaded via `load_model` (the production path) + and attached to the output `VideoPredictions`; propagates from + there into `JobResults` (per-video) and `BenchmarkResult` (via the + benchmark loop). Captures the MeTRAbs artifact SHA-256 and + filename, `tensorflow` / `tensorflow-metal` / `numpy` / + `neuropose` / Python versions, and reserved slots for a `seed`, + `deterministic` flag (Track 2), and `analysis_config` (Phase 0 + YAML pipeline). `None` on the injected-model test path where + NeuroPose has no way to fingerprint the supplied artifact. Frozen + pydantic model with `extra="forbid"` and + `protected_namespaces=()` so the `model_*` field names do not + collide with pydantic v2's internal namespace. `_model.load_metrabs_model` + now returns a `LoadedModel` dataclass bundling the TF handle with + the pinned SHA and filename so the estimator can build the + `Provenance` without re-hashing the tarball. - **`neuropose.benchmark`** — multi-pass inference benchmarking for a single video. `run_benchmark()` runs `process_video` N times (default 5), always discards the first pass as warmup (graph diff --git a/src/neuropose/_model.py b/src/neuropose/_model.py index 209e760..32ed7d7 100644 --- a/src/neuropose/_model.py +++ b/src/neuropose/_model.py @@ -41,11 +41,33 @@ import os import shutil import tarfile import urllib.request +from dataclasses import dataclass from pathlib import Path from typing import Any logger = logging.getLogger(__name__) + +@dataclass(frozen=True) +class LoadedModel: + """Result of :func:`load_metrabs_model`. + + Bundles the loaded TensorFlow model with the provenance metadata + that identifies which artifact it came from. Callers that only want + the model reach for :attr:`model`; callers that build a + :class:`~neuropose.io.Provenance` (primarily + :class:`~neuropose.estimator.Estimator`) pull :attr:`sha256` and + :attr:`filename` too. + + Frozen — once :func:`load_metrabs_model` has produced a + ``LoadedModel``, nothing downstream should edit the identity of + the artifact it describes. + """ + + model: Any + sha256: str + filename: str + # --------------------------------------------------------------------------- # Model artifact: pinned URL and checksum. # --------------------------------------------------------------------------- @@ -74,7 +96,7 @@ _REQUIRED_MODEL_ATTRS = ( # --------------------------------------------------------------------------- -def load_metrabs_model(cache_dir: Path | None = None) -> Any: +def load_metrabs_model(cache_dir: Path | None = None) -> LoadedModel: """Load the MeTRAbs model, downloading and caching on first use. Parameters @@ -87,9 +109,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any: Returns ------- - object - A TensorFlow SavedModel handle exposing ``detect_poses`` and - the ``per_skeleton_joint_names`` / ``per_skeleton_joint_edges`` + LoadedModel + Bundle containing the TensorFlow SavedModel handle alongside + the pinned artifact SHA-256 and filename that identify which + model the handle came from. The handle exposes ``detect_poses`` + and the ``per_skeleton_joint_names`` / ``per_skeleton_joint_edges`` attributes used by :class:`neuropose.estimator.Estimator`. Raises @@ -99,6 +123,18 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any: automatic retry), extraction fails, TensorFlow is not installed, or the loaded model does not expose the expected interface. + + Notes + ----- + The returned ``sha256`` is the module-pinned :data:`_MODEL_SHA256`, + not a re-hash of the on-disk tarball. On the cold-cache path this + is exactly the hash we verified against before loading. On the + warm-cache path the tarball is not re-verified (that would cost a + 2 GB I/O pass on every daemon startup), so the reported SHA is an + attestation of "this is the pinned artifact NeuroPose loads" rather + than a direct fingerprint of the on-disk bytes. For the threat + model this supports — reproducibility, not tamper-evidence — that + is the correct semantics. """ resolved_cache = Path(cache_dir) if cache_dir is not None else _default_cache_dir() resolved_cache.mkdir(parents=True, exist_ok=True) @@ -115,7 +151,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any: ) shutil.rmtree(model_dir, ignore_errors=True) else: - return _tf_load(saved_model_dir) + return LoadedModel( + model=_tf_load(saved_model_dir), + sha256=_MODEL_SHA256, + filename=_MODEL_ARCHIVE_NAME, + ) tarball = resolved_cache / _MODEL_ARCHIVE_NAME @@ -135,7 +175,11 @@ def load_metrabs_model(cache_dir: Path | None = None) -> Any: _extract_tarball(tarball, model_dir) saved_model_dir = _find_saved_model(model_dir) - return _tf_load(saved_model_dir) + return LoadedModel( + model=_tf_load(saved_model_dir), + sha256=_MODEL_SHA256, + filename=_MODEL_ARCHIVE_NAME, + ) # --------------------------------------------------------------------------- diff --git a/src/neuropose/benchmark.py b/src/neuropose/benchmark.py index 95ca7ff..6e259c6 100644 --- a/src/neuropose/benchmark.py +++ b/src/neuropose/benchmark.py @@ -105,9 +105,17 @@ def run_benchmark( passes: list[PerformanceMetrics] = [] reference_predictions: VideoPredictions | None = None + # Provenance is identical across every pass of a single run (same + # estimator, same model, same environment), so we keep just the + # latest one we see. Doing this on every iteration is cheap — it's + # one attribute read — and means the benchmark result carries + # provenance even when ``capture_reference`` is off. + latest_provenance = None for i in range(repeats): result = estimator.process_video(video_path) passes.append(result.metrics) + if result.predictions.provenance is not None: + latest_provenance = result.predictions.provenance # Only the *last* measured pass needs to be captured for # divergence comparison. Earlier passes would just be # overwritten, so we avoid holding their frame dicts in memory. @@ -122,6 +130,7 @@ def run_benchmark( warmup_pass=passes[0], measured_passes=passes[1:], aggregate=aggregate, + provenance=latest_provenance, ) return BenchmarkRunOutcome( result=benchmark_result, diff --git a/src/neuropose/estimator.py b/src/neuropose/estimator.py index 4461ac0..8c65eca 100644 --- a/src/neuropose/estimator.py +++ b/src/neuropose/estimator.py @@ -34,19 +34,25 @@ model is present raises :class:`ModelNotLoadedError`. from __future__ import annotations import logging +import sys import time from collections.abc import Callable from dataclasses import dataclass, field +from importlib.metadata import PackageNotFoundError +from importlib.metadata import version as _pkg_version from pathlib import Path from typing import Any import cv2 +import numpy as np import psutil +from neuropose import __version__ as _neuropose_version from neuropose._model import load_metrabs_model from neuropose.io import ( FramePrediction, PerformanceMetrics, + Provenance, VideoMetadata, VideoPredictions, ) @@ -158,6 +164,12 @@ class Estimator: # successful ``load_model`` below so the next ``process_video`` can # pass the real number through into ``PerformanceMetrics``. self._model_load_seconds: float | None = None + # MeTRAbs artifact identity, set only by ``load_model``. When the + # model was injected via the constructor we have no way to + # fingerprint it, so these remain ``None`` and ``process_video`` + # leaves the output's ``provenance`` as ``None`` too. + self._model_sha256: str | None = None + self._model_filename: str | None = None # -- model lifecycle ---------------------------------------------------- @@ -176,6 +188,21 @@ class Estimator: """Return ``True`` if a model has been supplied or loaded.""" return self._model is not None + @property + def model_sha256(self) -> str | None: + """Return the SHA-256 of the loaded MeTRAbs artifact, or ``None``. + + ``None`` when the model was injected via ``Estimator(model=...)`` + rather than loaded via :meth:`load_model`. The value, when + present, is the module-pinned SHA from :mod:`neuropose._model`. + """ + return self._model_sha256 + + @property + def model_filename(self) -> str | None: + """Return the basename of the MeTRAbs artifact, or ``None`` if injected.""" + return self._model_filename + def load_model(self, cache_dir: Path | None = None) -> None: """Load the MeTRAbs model via :func:`neuropose._model.load_metrabs_model`. @@ -196,9 +223,16 @@ class Estimator: return logger.info("Loading MeTRAbs model (cache_dir=%s)", cache_dir) start = time.perf_counter() - self._model = load_metrabs_model(cache_dir=cache_dir) + loaded = load_metrabs_model(cache_dir=cache_dir) self._model_load_seconds = time.perf_counter() - start - logger.info("MeTRAbs model loaded in %.2f s", self._model_load_seconds) + self._model = loaded.model + self._model_sha256 = loaded.sha256 + self._model_filename = loaded.filename + logger.info( + "MeTRAbs model loaded in %.2f s (sha256=%s)", + self._model_load_seconds, + loaded.sha256[:12], + ) # -- inference ---------------------------------------------------------- @@ -330,11 +364,53 @@ class Estimator: metrics.active_device, ) - predictions = VideoPredictions(metadata=metadata, frames=frames) + provenance = self._build_provenance(device_info=device_info) + predictions = VideoPredictions( + metadata=metadata, + frames=frames, + provenance=provenance, + ) return ProcessVideoResult(predictions=predictions, metrics=metrics) # -- internals ---------------------------------------------------------- + def _build_provenance(self, *, device_info: _ActiveDeviceInfo) -> Provenance | None: + """Construct a :class:`~neuropose.io.Provenance` for the current run. + + Returns ``None`` when the model was injected via the constructor + rather than loaded via :meth:`load_model` — in that case we + cannot fingerprint the artifact, and a partial provenance would + mislead readers into thinking we could. + + The device-info bundle is shared with the :class:`PerformanceMetrics` + construction (one call to :func:`_detect_active_device` per + ``process_video`` invocation) so that both artifacts see + identical TF and Metal state. + """ + if self._model_sha256 is None or self._model_filename is None: + return None + + metal_version: str | None = None + if device_info.metal_active: + try: + metal_version = _pkg_version("tensorflow-metal") + except PackageNotFoundError: + metal_version = None + + python_version = ( + f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" + ) + + return Provenance( + model_sha256=self._model_sha256, + model_filename=self._model_filename, + tensorflow_version=device_info.tf_version, + tensorflow_metal_version=metal_version, + numpy_version=np.__version__, + neuropose_version=_neuropose_version, + python_version=python_version, + ) + def _infer_frame( self, model: Any, diff --git a/src/neuropose/io.py b/src/neuropose/io.py index 344ae26..57423d2 100644 --- a/src/neuropose/io.py +++ b/src/neuropose/io.py @@ -10,6 +10,14 @@ Atomicity: :func:`save_status`, :func:`save_job_results`, and atomically rename, so a crash mid-write will not leave a partially-written file behind. This matches the crash-resilience guarantee the interfacer daemon makes to callers. + +Schema versioning: :class:`VideoPredictions` and :class:`BenchmarkResult` +each carry a ``schema_version`` integer. On load, the raw JSON dict is +passed through :mod:`neuropose.migrations` before pydantic validation so +that files written by earlier versions upgrade transparently. :class:`JobResults` +is a ``RootModel`` with no envelope of its own, so its loader runs the +per-video migration on each entry of its mapping. See +:mod:`neuropose.migrations` for the migration-registration pattern. """ from __future__ import annotations @@ -23,6 +31,13 @@ from typing import Annotated, Any, Literal from pydantic import BaseModel, ConfigDict, Field, RootModel, model_validator +from neuropose.migrations import ( + CURRENT_VERSION, + migrate_benchmark_result, + migrate_job_results, + migrate_video_predictions, +) + class JobStatus(StrEnum): """Lifecycle state of a single processing job.""" @@ -157,6 +172,104 @@ class PerformanceMetrics(BaseModel): ) +class Provenance(BaseModel): + """Reproducibility-grade record of the environment that produced a payload. + + Populated by the estimator on every inference run when the MeTRAbs + model was loaded through + :meth:`neuropose.estimator.Estimator.load_model` (the production + path). ``None`` when the model was injected directly via the + ``Estimator(model=...)`` constructor (the test-fixture path), since + NeuroPose has no way to fingerprint a model it did not load itself. + + Paper C's reproducibility story rests on this envelope: two runs + that produced equal ``Provenance`` objects against the same input + are expected to produce equal output (modulo non-determinism + controlled by ``deterministic``). Reviewers who want to re-derive a + figure from raw video need exactly these fields. + + Frozen so a captured ``Provenance`` cannot be mutated after it has + been attached to a result; this matches the invariant that + provenance is a property of the run, not of the reader. + + ``protected_namespaces=()`` silences pydantic's ``model_*`` field + warning — the ``model_sha256`` / ``model_filename`` names refer to + the MeTRAbs model artifact, not to pydantic's internal + ``model_validate`` / ``model_dump`` namespace, so the collision is + cosmetic. + """ + + model_config = ConfigDict(extra="forbid", frozen=True, protected_namespaces=()) + + model_sha256: str = Field( + description=( + "SHA-256 of the MeTRAbs model tarball (hex-encoded, lowercase). " + "Pinned at build time in :mod:`neuropose._model` and verified on " + "first download. Identifies the exact model weights used." + ), + ) + model_filename: str = Field( + description=( + "Canonical basename of the MeTRAbs tarball, e.g. " + "``metrabs_eff2l_y4_384px_800k_28ds.tar.gz``. Human-readable " + "companion to ``model_sha256``." + ), + ) + tensorflow_version: str = Field( + description="Value of ``tensorflow.__version__`` at the time of the run.", + ) + tensorflow_metal_version: str | None = Field( + default=None, + description=( + "Version of the ``tensorflow-metal`` PyPI package when installed; " + "``None`` on platforms without Metal GPU acceleration." + ), + ) + numpy_version: str = Field( + description="Value of ``numpy.__version__`` at the time of the run.", + ) + neuropose_version: str = Field( + description="Value of ``neuropose.__version__`` at the time of the run.", + ) + python_version: str = Field( + description=( + "Python version as ``MAJOR.MINOR.MICRO``, e.g. ``3.11.14``. The " + "full ``sys.version`` string is intentionally not captured; the " + "three-component form is stable across patch builds and avoids " + "embedding compiler and build-date metadata." + ), + ) + seed: int | None = Field( + default=None, + description=( + "Random seed used for the run if one was set, else ``None``. " + "MeTRAbs inference is deterministic on a given device up to " + "floating-point associativity, so seeding mostly matters for " + "downstream analysis that introduces randomness (bootstraps, " + "learned metrics)." + ), + ) + deterministic: bool = Field( + default=False, + description=( + "``True`` if ``tf.config.experimental.enable_op_determinism()`` " + "was active during the run. Track 2 deterministic-inference " + "mode; the field exists in Phase 0 so payloads can record " + "whether the run *was* deterministic without requiring a " + "schema change when the toggle lands." + ), + ) + analysis_config: dict[str, Any] | None = Field( + default=None, + description=( + "Parsed YAML dict if this payload was produced by ``neuropose " + "analyze --config ``. ``None`` for direct-library or " + "``neuropose watch`` invocations. Reserved for the Phase 0 " + "YAML-configurable analysis pipeline." + ), + ) + + class BenchmarkAggregate(BaseModel): """Distributional statistics aggregated across benchmark passes. @@ -255,6 +368,16 @@ class BenchmarkResult(BaseModel): model_config = ConfigDict(extra="forbid", frozen=True) + schema_version: int = Field( + default=CURRENT_VERSION, + ge=1, + description=( + "Schema version of this BenchmarkResult payload. Fresh writes " + "stamp :data:`neuropose.migrations.CURRENT_VERSION`; older files " + "are migrated on load via :mod:`neuropose.migrations` before " + "pydantic validation." + ), + ) video_name: str = Field( description="Basename of the benchmarked video (no directory components).", ) @@ -280,6 +403,14 @@ class BenchmarkResult(BaseModel): ) aggregate: BenchmarkAggregate cpu_comparison: CpuComparisonResult | None = None + provenance: Provenance | None = Field( + default=None, + description=( + "Reproducibility envelope from the benchmark run. ``None`` on " + "tests where the model was injected directly via " + "``Estimator(model=...)``." + ), + ) class JointAxisExtractor(BaseModel): @@ -469,9 +600,30 @@ class VideoPredictions(BaseModel): model_config = ConfigDict(extra="forbid", frozen=True) + schema_version: int = Field( + default=CURRENT_VERSION, + ge=1, + description=( + "Schema version of this VideoPredictions payload. Fresh writes " + "stamp :data:`neuropose.migrations.CURRENT_VERSION`; files written " + "by older NeuroPose versions are migrated to the current version " + "by :mod:`neuropose.migrations` before pydantic validation." + ), + ) metadata: VideoMetadata frames: dict[str, FramePrediction] segmentations: dict[str, Segmentation] = Field(default_factory=dict) + provenance: Provenance | None = Field( + default=None, + description=( + "Reproducibility envelope populated by the estimator on runs " + "where the MeTRAbs model was loaded via " + ":meth:`neuropose.estimator.Estimator.load_model`. ``None`` on " + "test paths where the model was injected via " + "``Estimator(model=...)``, because no model SHA is known in " + "that case." + ), + ) def frame_names(self) -> list[str]: """Return frame identifiers in insertion order.""" @@ -623,9 +775,16 @@ class StatusFile(RootModel[dict[str, JobStatusEntry]]): def load_video_predictions(path: Path) -> VideoPredictions: - """Load and validate a per-video predictions JSON file.""" + """Load and validate a per-video predictions JSON file. + + Runs the payload through :func:`neuropose.migrations.migrate_video_predictions` + before pydantic validation so files written by older NeuroPose versions + upgrade to the current schema transparently. + """ with path.open("r", encoding="utf-8") as f: data: Any = json.load(f) + if isinstance(data, dict): + data = migrate_video_predictions(data) return VideoPredictions.model_validate(data) @@ -636,9 +795,17 @@ def save_video_predictions(path: Path, predictions: VideoPredictions) -> None: def load_job_results(path: Path) -> JobResults: - """Load and validate an aggregated per-job results JSON file.""" + """Load and validate an aggregated per-job results JSON file. + + Runs each video's payload through + :func:`neuropose.migrations.migrate_video_predictions` before pydantic + validation. :class:`JobResults` is a ``RootModel`` with no envelope of + its own, so migration happens per-entry rather than at the top level. + """ with path.open("r", encoding="utf-8") as f: data: Any = json.load(f) + if isinstance(data, dict): + data = migrate_job_results(data) return JobResults.model_validate(data) @@ -649,9 +816,16 @@ def save_job_results(path: Path, results: JobResults) -> None: def load_benchmark_result(path: Path) -> BenchmarkResult: - """Load and validate a benchmark-result JSON file.""" + """Load and validate a benchmark-result JSON file. + + Runs the payload through :func:`neuropose.migrations.migrate_benchmark_result` + before pydantic validation so files written by older NeuroPose versions + upgrade transparently. + """ with path.open("r", encoding="utf-8") as f: data: Any = json.load(f) + if isinstance(data, dict): + data = migrate_benchmark_result(data) return BenchmarkResult.model_validate(data) diff --git a/tests/integration/test_estimator_smoke.py b/tests/integration/test_estimator_smoke.py index 7f11310..6e9e694 100644 --- a/tests/integration/test_estimator_smoke.py +++ b/tests/integration/test_estimator_smoke.py @@ -81,26 +81,29 @@ class TestMetrabsLoader: """Exercises the loader's download → verify → extract → load path.""" def test_download_and_load(self, shared_model_cache_dir: Path) -> None: - model = load_metrabs_model(cache_dir=shared_model_cache_dir) - assert model is not None + loaded = load_metrabs_model(cache_dir=shared_model_cache_dir) + assert loaded.model is not None + assert loaded.sha256 + assert loaded.filename for attr in ("detect_poses", "per_skeleton_joint_names", "per_skeleton_joint_edges"): - assert hasattr(model, attr), f"loaded model is missing {attr}" + assert hasattr(loaded.model, attr), f"loaded model is missing {attr}" def test_second_call_uses_cache(self, shared_model_cache_dir: Path) -> None: """Idempotent: second call should return the cached model cheaply.""" - model_a = load_metrabs_model(cache_dir=shared_model_cache_dir) - model_b = load_metrabs_model(cache_dir=shared_model_cache_dir) + loaded_a = load_metrabs_model(cache_dir=shared_model_cache_dir) + loaded_b = load_metrabs_model(cache_dir=shared_model_cache_dir) # tf.saved_model.load returns a new Python object each call, so # identity comparison doesn't work — but both should still - # expose the MeTRAbs interface. - assert hasattr(model_a, "detect_poses") - assert hasattr(model_b, "detect_poses") + # expose the MeTRAbs interface, and the SHA should match. + assert hasattr(loaded_a.model, "detect_poses") + assert hasattr(loaded_b.model, "detect_poses") + assert loaded_a.sha256 == loaded_b.sha256 def test_berkeley_mhad_skeleton_is_present(self, shared_model_cache_dir: Path) -> None: """The estimator pins skeleton='berkeley_mhad_43'; verify it exists.""" - model = load_metrabs_model(cache_dir=shared_model_cache_dir) - joint_names = model.per_skeleton_joint_names["berkeley_mhad_43"] - joint_edges = model.per_skeleton_joint_edges["berkeley_mhad_43"] + loaded = load_metrabs_model(cache_dir=shared_model_cache_dir) + joint_names = loaded.model.per_skeleton_joint_names["berkeley_mhad_43"] + joint_edges = loaded.model.per_skeleton_joint_edges["berkeley_mhad_43"] # MeTRAbs exposes these as tf.Tensor objects; just verify we # can pull a shape out. assert joint_names.shape[0] == 43 diff --git a/tests/integration/test_joint_names_drift.py b/tests/integration/test_joint_names_drift.py index 30df5ba..ca4e392 100644 --- a/tests/integration/test_joint_names_drift.py +++ b/tests/integration/test_joint_names_drift.py @@ -50,8 +50,8 @@ def test_joint_names_match_pinned_model(metrabs_model_cache_dir: Path) -> None: commit that bumps the model pin in :mod:`neuropose._model`. 2. Cross-check any CLI or docs that embed hardcoded joint names. """ - model = load_metrabs_model(cache_dir=metrabs_model_cache_dir) - tensor = model.per_skeleton_joint_names["berkeley_mhad_43"] + loaded = load_metrabs_model(cache_dir=metrabs_model_cache_dir) + tensor = loaded.model.per_skeleton_joint_names["berkeley_mhad_43"] model_names = tuple(tensor.numpy().astype(str).tolist()) assert model_names == JOINT_NAMES, ( "JOINT_NAMES drift detected — the hardcoded tuple in " diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 96d82ee..f3027f9 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -683,9 +683,15 @@ def stub_estimator_with_metrics(monkeypatch: pytest.MonkeyPatch): "poses2d": np.array([[[0.0, 0.0], [1.0, 1.0]]]), } - def fake_loader(cache_dir: Path | None = None) -> object: + from neuropose._model import LoadedModel + + def fake_loader(cache_dir: Path | None = None) -> LoadedModel: del cache_dir - return RecordingFake() + return LoadedModel( + model=RecordingFake(), + sha256="smoke_sha", + filename="metrabs_smoke.tar.gz", + ) monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader) diff --git a/tests/unit/test_estimator.py b/tests/unit/test_estimator.py index e3c8a04..dbef196 100644 --- a/tests/unit/test_estimator.py +++ b/tests/unit/test_estimator.py @@ -70,17 +70,21 @@ class TestModelGuard: network: the loader is monkeypatched to return a sentinel, and we assert it ends up as the estimator's model. """ + from neuropose._model import LoadedModel + sentinel = object() called_with: list[Path | None] = [] - def fake_loader(cache_dir: Path | None = None) -> object: + def fake_loader(cache_dir: Path | None = None) -> LoadedModel: called_with.append(cache_dir) - return sentinel + return LoadedModel(model=sentinel, sha256="deadbeef", filename="fake.tar.gz") monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader) estimator = Estimator() estimator.load_model(cache_dir=Path("/tmp/fake-cache")) assert estimator.model is sentinel + assert estimator.model_sha256 == "deadbeef" + assert estimator.model_filename == "fake.tar.gz" assert called_with == [Path("/tmp/fake-cache")] def test_load_model_is_idempotent_when_already_loaded( @@ -278,9 +282,15 @@ class TestPerformanceMetrics: "poses2d": np.array([[[0.0, 0.0]]]), } - def fake_loader(cache_dir: Path | None = None) -> object: + from neuropose._model import LoadedModel + + def fake_loader(cache_dir: Path | None = None) -> LoadedModel: del cache_dir - return Recorder() + return LoadedModel( + model=Recorder(), + sha256="fake_sha", + filename="metrabs_fake.tar.gz", + ) monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader) estimator = Estimator() @@ -312,6 +322,88 @@ class TestPerformanceMetrics: assert result.metrics.tensorflow_version not in {"", "unknown"} +class TestProvenance: + """Provenance attachment to VideoPredictions. + + Covers the two relevant paths: the injected-model path (no SHA + known → ``provenance=None`` on output) and the ``load_model`` path + (SHA is known → full ``Provenance`` populated and attached). + """ + + def test_injected_model_produces_no_provenance( + self, + synthetic_video: Path, + fake_metrabs_model, + ) -> None: + estimator = Estimator(model=fake_metrabs_model) + result = estimator.process_video(synthetic_video) + assert result.predictions.provenance is None + assert estimator.model_sha256 is None + assert estimator.model_filename is None + + def test_loaded_model_populates_provenance( + self, + synthetic_video: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + import numpy as np + + from neuropose._model import LoadedModel + + class Recorder: + def detect_poses(self, image, **kwargs): + del image, kwargs + return { + "boxes": np.array([[0.0, 0.0, 1.0, 1.0, 0.9]]), + "poses3d": np.array([[[0.0, 0.0, 0.0]]]), + "poses2d": np.array([[[0.0, 0.0]]]), + } + + def fake_loader(cache_dir: Path | None = None) -> LoadedModel: + del cache_dir + return LoadedModel( + model=Recorder(), + sha256="e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + filename="metrabs_stub.tar.gz", + ) + + monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader) + estimator = Estimator() + estimator.load_model() + result = estimator.process_video(synthetic_video) + + prov = result.predictions.provenance + assert prov is not None + assert prov.model_sha256.startswith("e3b0c44") + assert prov.model_filename == "metrabs_stub.tar.gz" + assert prov.numpy_version == np.__version__ + assert prov.python_version.count(".") == 2 # MAJOR.MINOR.MICRO + # neuropose_version should match the package's __version__ + from neuropose import __version__ as pkg_version + + assert prov.neuropose_version == pkg_version + # tensorflow_version should also be real (TF is in dev deps). + assert prov.tensorflow_version not in {"", "unknown"} + + def test_model_sha256_and_filename_properties_after_load( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + from neuropose._model import LoadedModel + + def fake_loader(cache_dir: Path | None = None) -> LoadedModel: + del cache_dir + return LoadedModel(model=object(), sha256="abcd", filename="x.tar.gz") + + monkeypatch.setattr("neuropose.estimator.load_metrabs_model", fake_loader) + estimator = Estimator() + assert estimator.model_sha256 is None + assert estimator.model_filename is None + estimator.load_model() + assert estimator.model_sha256 == "abcd" + assert estimator.model_filename == "x.tar.gz" + + class TestErrors: def test_missing_video( self, diff --git a/tests/unit/test_io.py b/tests/unit/test_io.py index 339f23a..3c995e9 100644 --- a/tests/unit/test_io.py +++ b/tests/unit/test_io.py @@ -22,6 +22,7 @@ from neuropose.io import ( JointPairDistanceExtractor, JointSpeedExtractor, PerformanceMetrics, + Provenance, Segment, Segmentation, SegmentationConfig, @@ -278,6 +279,102 @@ class TestPerformanceMetricsModel: m.total_seconds = 2.0 +def _minimal_provenance() -> Provenance: + return Provenance( + model_sha256="a" * 64, + model_filename="metrabs_fake.tar.gz", + tensorflow_version="2.18.1", + numpy_version="2.0.2", + neuropose_version="0.1.0.dev0", + python_version="3.11.14", + ) + + +class TestProvenanceModel: + """Schema-level behaviour of :class:`neuropose.io.Provenance`.""" + + def test_roundtrip_through_json(self) -> None: + p = Provenance( + model_sha256="a" * 64, + model_filename="metrabs_fake.tar.gz", + tensorflow_version="2.18.1", + tensorflow_metal_version="1.2.0", + numpy_version="2.0.2", + neuropose_version="0.1.0.dev0", + python_version="3.11.14", + seed=42, + deterministic=True, + analysis_config={"step": "dtw", "nan_policy": "propagate"}, + ) + rehydrated = Provenance.model_validate(p.model_dump(mode="json")) + assert rehydrated == p + + def test_optional_fields_default_to_none_and_false(self) -> None: + p = _minimal_provenance() + assert p.tensorflow_metal_version is None + assert p.seed is None + assert p.deterministic is False + assert p.analysis_config is None + + def test_is_frozen(self) -> None: + p = _minimal_provenance() + with pytest.raises(ValidationError): + p.model_sha256 = "different" + + def test_extra_fields_forbidden(self) -> None: + # Construct via model_validate so pyright doesn't have to prove the + # keyword doesn't exist on the class at static-type time. + with pytest.raises(ValidationError): + Provenance.model_validate( + { + "model_sha256": "x" * 64, + "model_filename": "x.tar.gz", + "tensorflow_version": "2.18", + "numpy_version": "2.0", + "neuropose_version": "0.1", + "python_version": "3.11.14", + "unknown_field": "bogus", + } + ) + + +class TestVideoPredictionsProvenance: + """``provenance`` field on :class:`VideoPredictions` round-trips.""" + + def test_default_is_none(self) -> None: + vp = VideoPredictions( + metadata=VideoMetadata(frame_count=0, fps=30.0, width=32, height=32), + frames={}, + ) + assert vp.provenance is None + + def test_roundtrip_with_provenance(self, tmp_path: Path) -> None: + prov = Provenance( + model_sha256="f" * 64, + model_filename="metrabs.tar.gz", + tensorflow_version="2.18.1", + numpy_version="2.0.2", + neuropose_version="0.1.0.dev0", + python_version="3.11.14", + ) + vp = VideoPredictions( + metadata=VideoMetadata(frame_count=1, fps=30.0, width=32, height=32), + frames={ + "frame_000000": FramePrediction( + boxes=[[0.0, 0.0, 32.0, 32.0, 0.9]], + poses3d=[[[1.0, 2.0, 3.0]]], + poses2d=[[[10.0, 20.0]]], + ) + }, + provenance=prov, + ) + path = tmp_path / "vp.json" + save_video_predictions(path, vp) + loaded = load_video_predictions(path) + assert loaded == vp + assert loaded.provenance == prov + + class TestBenchmarkResultPersistence: def test_roundtrip_to_disk(self, tmp_path: Path) -> None: result = BenchmarkResult(