From 3720911dac56d0e01f30176966c10b49b3d12baf Mon Sep 17 00:00:00 2001
From: Levi Neuwirth <ln@levineuwirth.org>
Date: Mon, 13 Apr 2026 18:08:05 -0400
Subject: [PATCH] deployment

---
 .github/workflows/docs.yml |  72 ++++++++++++
 docs/api/config.md         |   3 +
 docs/api/estimator.md      |   3 +
 docs/api/interfacer.md     |   3 +
 docs/api/io.md             |   3 +
 docs/api/visualize.md      |   3 +
 docs/architecture.md       | 198 +++++++++++++++++++++++++++++++
 docs/deployment.md         | 145 +++++++++++++++++++++++
 docs/development.md        | 167 ++++++++++++++++++++++++++
 docs/getting-started.md    | 235 +++++++++++++++++++++++++++++++++++++
 docs/index.md              |  83 +++++++++++++
 mkdocs.yml                 |  99 ++++++++++++++++
 pyproject.toml             |   2 +
 13 files changed, 1016 insertions(+)
 create mode 100644 .github/workflows/docs.yml
 create mode 100644 docs/api/config.md
 create mode 100644 docs/api/estimator.md
 create mode 100644 docs/api/interfacer.md
 create mode 100644 docs/api/io.md
 create mode 100644 docs/api/visualize.md
 create mode 100644 docs/architecture.md
 create mode 100644 docs/deployment.md
 create mode 100644 docs/development.md
 create mode 100644 docs/getting-started.md
 create mode 100644 docs/index.md
 create mode 100644 mkdocs.yml

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..5946683
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,72 @@
+# ---------------------------------------------------------------------------
+# NeuroPose docs build.
+#
+# Builds the mkdocs-material site on every push to main and on every PR
+# that touches docs/ or mkdocs.yml, and uploads the rendered site as a
+# workflow artifact for review.
+#
+# Deployment to GitHub Pages is intentionally NOT wired up yet: the repo
+# is private until the data-handling policy (docs/data-policy.md) lands
+# and is reviewed, and GH Pages for private repos requires a paid plan.
+# When the repo flips public, add a deploy job that uploads the artifact
+# via ``actions/deploy-pages@v4``.
+# ---------------------------------------------------------------------------
+name: Docs
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - "docs/**"
+      - "mkdocs.yml"
+      - "src/neuropose/**"      # API reference reflects source docstrings
+      - ".github/workflows/docs.yml"
+  pull_request:
+    branches: [main]
+    paths:
+      - "docs/**"
+      - "mkdocs.yml"
+      - "src/neuropose/**"
+      - ".github/workflows/docs.yml"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: docs-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  UV_VERSION: "0.9.16"
+  PYTHON_VERSION: "3.11"
+
+jobs:
+  build:
+    name: Build (mkdocs --strict)
+    runs-on: ubuntu-24.04
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: ${{ env.UV_VERSION }}
+          python-version: ${{ env.PYTHON_VERSION }}
+          enable-cache: true
+
+      - name: Install project + dev dependencies
+        # Docs build needs the project importable so mkdocstrings can
+        # introspect the source modules for the API reference pages.
+        run: uv sync --group dev
+
+      - name: Build site (strict)
+        run: uv run mkdocs build --strict
+
+      - name: Upload rendered site as artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: neuropose-docs-${{ github.sha }}
+          path: site/
+          retention-days: 14
diff --git a/docs/api/config.md b/docs/api/config.md
new file mode 100644
index 0000000..759b3e3
--- /dev/null
+++ b/docs/api/config.md
@@ -0,0 +1,3 @@
+# `neuropose.config`
+
+::: neuropose.config
diff --git a/docs/api/estimator.md b/docs/api/estimator.md
new file mode 100644
index 0000000..fe0f094
--- /dev/null
+++ b/docs/api/estimator.md
@@ -0,0 +1,3 @@
+# `neuropose.estimator`
+
+::: neuropose.estimator
diff --git a/docs/api/interfacer.md b/docs/api/interfacer.md
new file mode 100644
index 0000000..0e6ab5c
--- /dev/null
+++ b/docs/api/interfacer.md
@@ -0,0 +1,3 @@
+# `neuropose.interfacer`
+
+::: neuropose.interfacer
diff --git a/docs/api/io.md b/docs/api/io.md
new file mode 100644
index 0000000..fa5fe0e
--- /dev/null
+++ b/docs/api/io.md
@@ -0,0 +1,3 @@
+# `neuropose.io`
+
+::: neuropose.io
diff --git a/docs/api/visualize.md b/docs/api/visualize.md
new file mode 100644
index 0000000..b307971
--- /dev/null
+++ b/docs/api/visualize.md
@@ -0,0 +1,3 @@
+# `neuropose.visualize`
+
+::: neuropose.visualize
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 0000000..c153bd3
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,198 @@
+# Architecture
+
+This page describes how NeuroPose is structured and why. It is the
+document to read if you are about to modify the estimator, the daemon,
+or the output schema, and want to understand the constraints the
+existing design is trying to honour.
+
+## Component overview
+
+NeuroPose is a three-stage pipeline:
+
+```text
+┌───────────────────┐     ┌──────────────────┐     ┌───────────────────┐
+│   interfacer      │     │   estimator      │     │    analyzer       │
+│   (daemon)        │────▶│   (inference)    │────▶│   (post-process)  │
+│                   │     │                  │     │                   │
+│ watches filesystem│     │ MeTRAbs wrapper  │     │ DTW, features,    │
+│ manages job state │     │ per-video worker │     │  classification   │
+└───────────────────┘     └──────────────────┘     └───────────────────┘
+       │                           │                       │
+       ▼                           ▼                       ▼
+ status.json +            VideoPredictions            analysis results
+ job directories          (validated schema)          (pending commit 10)
+```
+
+Each stage is a separate module with one job, and the contracts between
+them are defined by validated pydantic schemas in
+[`neuropose.io`](api/io.md).
+
+### estimator
+
+**Role:** pure inference library. Given a video path and a MeTRAbs
+model, produces a validated `VideoPredictions` object.
+
+**Does NOT handle:** job directories, status files, polling, locking,
+signal handling, visualization, or anywhere-to-save decisions. It is a
+library, not a daemon.
+
+The estimator streams frames directly from OpenCV into the model — no
+intermediate write-to-disk-then-read-back-as-PNG round trip like the
+previous prototype had. `process_video()` returns a typed
+`ProcessVideoResult` containing the predictions and does not touch the
+filesystem unless the caller explicitly asks it to save the result.
+
+See [`neuropose.estimator`](api/estimator.md) for the API reference.
+
+### interfacer
+
+**Role:** job-lifecycle daemon. Watches `input_dir` for new job
+subdirectories, dispatches each to an injected `Estimator`, and manages
+the persistent `status.json` that tracks every job's lifecycle.
+
+**Owns:** the `input_dir → output_dir → failed_dir` transitions, the
+single-instance lock, signal handling, and crash recovery.
+
+**Does NOT handle:** inference — that is the estimator's job, which is
+injected via the constructor so tests can supply a fake.
+
+Key guarantees:
+
+- **Single instance.** An exclusive `fcntl.flock` on
+  `data_dir/.neuropose.lock` blocks a second daemon from running against
+  the same data directory. The lock is released automatically on
+  process exit, even SIGKILL.
+- **Crash recovery.** On startup, any status entries left in
+  `processing` state are marked failed with an "interrupted" error and
+  their inputs quarantined. The operator decides whether to retry by
+  moving them back to `input_dir`.
+- **Graceful shutdown.** SIGINT and SIGTERM request an orderly stop.
+  The current job finishes before the loop exits.
+- **Structured errors.** Every failed job records a short
+  `"<ExceptionType>: <message>"` in its status entry so operators have
+  a grep target without digging through logs.
+
+See [`neuropose.interfacer`](api/interfacer.md) for the API reference.
+
+### analyzer (pending commit 10)
+
+**Role:** post-processing. Takes a `results.json` and produces analysis
+output (DTW comparisons, joint-angle features, classification). Each
+piece is a pure function of the predictions, so the module is a set of
+testable utilities rather than a daemon.
+
+Pending the commit-10 rewrite. The previous prototype's `analyzer.py`
+was non-functional (it had imports that could not resolve and
+infinite-recursion bugs) and is not being ported forward.
+
+## Data flow
+
+```text
+                 ┌──────────────────────────┐
+                 │ $XDG_DATA_HOME/neuropose/│
+                 └──────────────────────────┘
+                              │
+                ┌─────────────┼─────────────┐
+                ▼             ▼             ▼
+            jobs/in/      jobs/out/     jobs/failed/
+                │             ▲             ▲
+                │ discovered  │ on success  │ on failure
+                │             │             │
+                └─────────▶ process_job ────┘
+                              │
+                              ▼
+                      status.json (atomic)
+```
+
+1. The operator drops a video (or several) into
+   `data_dir/in/<job_name>/`.
+2. The daemon detects the new job directory on its next poll.
+3. For each video in the job, the estimator runs inference and returns
+   a `VideoPredictions` object.
+4. The daemon aggregates per-video predictions into a `JobResults`
+   object and writes it to `data_dir/out/<job_name>/results.json`.
+5. The status entry is updated to `completed`, with the path to
+   `results.json` recorded.
+6. On catastrophic failure (no videos, decode error, model crash), the
+   job's input directory is moved to `data_dir/failed/<job_name>/` and
+   the status entry is updated to `failed` with an error message.
+
+All filesystem writes that affect application state (status file, job
+results) go through atomic tmp-file-then-rename helpers in
+[`neuropose.io`](api/io.md), so a crash mid-write cannot leave a
+truncated file behind.
+
+## Runtime directory layout
+
+The daemon operates within a single base `data_dir`:
+
+```text
+$data_dir/
+├── .neuropose.lock           # fcntl lock file; contains owner PID
+├── in/
+│   ├── job_001/              # operator-created
+│   │   ├── video_01.mp4
+│   │   └── video_02.mp4
+│   └── job_002/
+│       └── trial.mov
+├── out/
+│   ├── status.json           # persistent lifecycle state
+│   ├── job_001/
+│   │   └── results.json      # aggregated JobResults
+│   └── job_002/
+│       └── results.json
+└── failed/
+    └── job_003/              # quarantined inputs
+        └── broken_video.mov
+```
+
+`data_dir` defaults to `$XDG_DATA_HOME/neuropose/jobs` and **is never
+inside the repository.** This is deliberate: the previous prototype kept
+job directories under `backend/neuropose/in/`, which is exactly how
+subject-identifying data ended up on the same tree as `git add`. The
+current design makes it mechanically difficult for subject data to
+leak into source control.
+
+Model weights are cached separately at `$XDG_DATA_HOME/neuropose/models/`.
+
+## Design principles
+
+A few choices run through every module and are worth knowing if you
+plan to extend the package:
+
+**Immutable schemas.** `FramePrediction` and `VideoMetadata` are
+frozen pydantic models. The previous prototype had a bug where its
+visualizer mutated `poses3d` in place via a numpy view, invisibly
+corrupting the data if you visualized before saving. The frozen schema
+makes that class of bug impossible.
+
+**Validate at the boundary.** Every load/save helper in `neuropose.io`
+validates on entry. Malformed files fail at load time with a pydantic
+validation error, not three call sites later as an `AttributeError` on
+a missing key.
+
+**Library / daemon separation.** The estimator is pure library — give
+it a video and a model, get back validated predictions. The daemon is
+the wrapper that adds filesystem semantics. This makes the estimator
+trivially testable (inject a fake model, inject any video) and lets
+downstream users embed it in other pipelines without inheriting the
+daemon's lifecycle.
+
+**Dependency injection.** The `Interfacer` takes its `Estimator` as a
+constructor argument. Tests inject fakes; production wires the real
+thing. There is no singleton model state.
+
+**No implicit config discovery.** Configuration is loaded explicitly
+via `--config` or environment variables. The previous prototype's
+`load_config('config.yaml')` was a relative path footgun — it worked
+only when the daemon was launched from a specific directory. The new
+`Settings` class refuses to guess.
+
+**Atomic writes for all stateful files.** Status file, job results,
+predictions — every write goes through a tmp-file-then-rename so a
+crash mid-write cannot corrupt state.
+
+**Fail fast, fail specifically.** Each module defines a small hierarchy
+of typed exceptions (`EstimatorError`, `InterfacerError`, etc.).
+Exception types carry semantic meaning; callers can distinguish
+recoverable failures from programmer errors.
diff --git a/docs/deployment.md b/docs/deployment.md
new file mode 100644
index 0000000..3b46885
--- /dev/null
+++ b/docs/deployment.md
@@ -0,0 +1,145 @@
+# Deployment
+
+This page covers running NeuroPose in production — on a research
+server, in a container, or as a managed system service. The target
+audience is whoever is actually setting up the pipeline for a study.
+
+!!! warning "Data handling policy"
+    Before deploying NeuroPose against subject data, read the (pending)
+    `docs/data-policy.md` — it describes the IRB constraints on
+    retention, sharing, and derived-data handling. If you are reading
+    this before the data policy has landed, **pause and ask the project
+    lead** before proceeding.
+
+## Choosing a deployment mode
+
+| Mode | Use when | Notes |
+|---|---|---|
+| Local (bare) | Developer machine, one-off experiments | Fastest feedback loop. Use `neuropose process`. |
+| Systemd service | Single-host lab server | Recommended for study runs. Auto-restart, log capture, clean shutdown. |
+| Docker | Shared infra, CI pipelines, reproducible runs | Image build is pending commit 12. |
+| Kubernetes | Multi-study labs with shared GPU pools | Not currently supported; would layer on top of the Docker image. |
+
+## Local (bare-metal)
+
+For one-off processing, the CLI is enough:
+
+```bash
+neuropose --config ./config.yaml process path/to/video.mp4
+```
+
+For batch mode, run the daemon in a `tmux` or `screen` session:
+
+```bash
+tmux new -s neuropose
+neuropose --config ./config.yaml --verbose watch
+# Ctrl-B D to detach
+```
+
+## Systemd user service
+
+A systemd *user* unit (not a root-privileged one) is the right way to
+run the daemon on a research server where the researcher owns the job
+queue.
+
+Create `~/.config/systemd/user/neuropose.service`:
+
+```ini
+[Unit]
+Description=NeuroPose job daemon
+After=network-online.target
+
+[Service]
+Type=simple
+WorkingDirectory=%h/neuropose
+Environment=XDG_DATA_HOME=%h/.local/share
+ExecStart=%h/neuropose/.venv/bin/neuropose --config %h/neuropose/config.yaml watch
+Restart=on-failure
+RestartSec=10
+
+[Install]
+WantedBy=default.target
+```
+
+Enable it:
+
+```bash
+systemctl --user daemon-reload
+systemctl --user enable --now neuropose.service
+journalctl --user -u neuropose.service -f
+```
+
+The interfacer's `fcntl`-based lock file prevents a second daemon from
+starting if systemd restarts it before the first instance has fully
+released the lock.
+
+## Docker
+
+*Pending commit 12.* The plan is to ship two Dockerfiles:
+
+- `Dockerfile` — CPU base, suitable for small studies.
+- `Dockerfile.gpu` — CUDA base derived from `tensorflow/tensorflow:<pinned>-gpu`.
+
+Both images will have the `neuropose` command as their `ENTRYPOINT` so
+they can be invoked as:
+
+```bash
+docker run --rm \
+  -v /srv/neuropose:/data \
+  -e NEUROPOSE_DATA_DIR=/data/jobs \
+  -e NEUROPOSE_MODEL_CACHE_DIR=/data/models \
+  ghcr.io/.../neuropose:latest \
+  watch
+```
+
+## GPU considerations
+
+- NeuroPose delegates device selection to TensorFlow via the
+  `device` field in `Settings` (`"/CPU:0"` or `"/GPU:0"`). No multi-GPU
+  dispatch yet — a single daemon instance uses a single device.
+- If you need to run inference on multiple GPUs in parallel, run one
+  daemon per GPU with distinct `data_dir` values and divide jobs
+  between them. The fcntl lock is keyed on the data directory, so
+  separate daemons on separate data dirs do not conflict.
+- The first call to `Estimator.process_video` triggers MeTRAbs model
+  load, which in turn initializes the TensorFlow GPU runtime. Expect
+  a one-time startup delay of several seconds.
+
+## Log management
+
+The daemon writes to stdlib `logging`. Under systemd, logs land in the
+user journal. For other deployment modes, redirect stdout/stderr to
+your log collector of choice — NeuroPose writes one line per event with
+a structured `%(asctime)s %(levelname)-8s %(name)s: %(message)s`
+format, which any log aggregator can parse.
+
+Log verbosity is controlled via the CLI:
+
+```bash
+neuropose --verbose watch   # DEBUG
+neuropose watch             # INFO (default)
+neuropose --quiet watch     # WARNING
+```
+
+## Monitoring
+
+The canonical state of the daemon lives in
+`$data_dir/out/status.json`, which is a JSON object keyed by job name.
+A tiny Prometheus exporter or a nightly cron that tails the file is
+enough to alert on stuck jobs. A richer monitoring story is out of
+scope for v0.1.
+
+## Backups and retention
+
+Two things are worth backing up:
+
+1. `$data_dir/out/*/results.json` — the aggregated predictions for each
+   job. These are the outputs of the research process.
+2. `$data_dir/out/status.json` — the daemon's record of which jobs ran
+   when, which failed, and why.
+
+**Do not back up `$data_dir/in/` or `$data_dir/failed/` indiscriminately.**
+These contain source video files that may be IRB-protected subject data,
+and your backup store may not be covered by the same data-handling
+agreement as the primary server. Consult the (pending)
+`docs/data-policy.md` before designing a retention plan.
diff --git a/docs/development.md b/docs/development.md
new file mode 100644
index 0000000..21d90c2
--- /dev/null
+++ b/docs/development.md
@@ -0,0 +1,167 @@
+# Development
+
+This page is for contributors working on NeuroPose itself.
+
+## Environment setup
+
+NeuroPose uses [`uv`](https://github.com/astral-sh/uv) for dependency
+management and Python 3.11. After cloning the repository:
+
+```bash
+uv venv --python 3.11
+source .venv/bin/activate
+uv sync --group dev
+uv run pre-commit install
+```
+
+`uv sync --group dev` installs the project in editable mode alongside
+the full dev dependency set (pytest, ruff, pyright, pre-commit,
+mkdocs-material, and mkdocstrings).
+
+`pre-commit install` wires the git hooks declared in
+`.pre-commit-config.yaml` into your local repo so every commit is
+linted, formatted, and scanned for secrets before it lands.
+
+## Running tests
+
+Unit tests are fast and do not require the MeTRAbs model or TensorFlow
+inference:
+
+```bash
+uv run pytest
+```
+
+Integration tests that require a downloaded model are marked with
+`@pytest.mark.slow` and are skipped by default. Run them with:
+
+```bash
+uv run pytest -m slow
+```
+
+Run a specific test file or test class:
+
+```bash
+uv run pytest tests/unit/test_estimator.py
+uv run pytest tests/unit/test_estimator.py::TestProcessVideo
+uv run pytest -k "frame_count"
+```
+
+The autouse `_isolate_environment` fixture in `tests/conftest.py` points
+`$HOME` and `$XDG_DATA_HOME` at a per-test temp directory, so no test
+can accidentally write to your real home directory. It also clears any
+`NEUROPOSE_*` variables from your shell so test outcomes do not depend
+on who is running them.
+
+## Linting and formatting
+
+NeuroPose uses [`ruff`](https://docs.astral.sh/ruff/) for both lint and
+format. Configuration lives in `pyproject.toml` under `[tool.ruff]`.
+
+```bash
+uv run ruff check .          # Lint
+uv run ruff check --fix .    # Lint + auto-fix
+uv run ruff format .         # Format (equivalent to black)
+uv run ruff format --check . # Verify formatted
+```
+
+The selected lint rules are deliberately broad — pycodestyle, pyflakes,
+isort, bugbear, pyupgrade, simplify, ruff-specific, pep8-naming,
+comprehensions, pathlib, pytest-style, tidy-imports, numpy-specific,
+and pydocstyle (numpy convention). The rationale is "lint noise early
+rather than cruft late": we would rather annoy a contributor with a
+style fix than let a real bug slip through because the linter was lax.
+
+## Type checking
+
+NeuroPose uses [`pyright`](https://github.com/microsoft/pyright) in
+`standard` mode (not `strict` — the TensorFlow / OpenCV / scikit-learn
+stubs would generate thousands of false positives under strict). The
+plan is to tighten toward strict after the MeTRAbs stack is pinned in
+commit 11.
+
+```bash
+uv run pyright
+```
+
+## Documentation
+
+Documentation is built with [MkDocs](https://www.mkdocs.org/) and the
+[Material theme](https://squidfunk.github.io/mkdocs-material/). API
+reference pages are auto-generated from the source docstrings by
+[mkdocstrings](https://mkdocstrings.github.io/).
+
+Live preview at `http://localhost:8000`:
+
+```bash
+uv run mkdocs serve
+```
+
+Strict build (the same one CI runs):
+
+```bash
+uv run mkdocs build --strict
+```
+
+`--strict` promotes every warning (broken internal link, missing nav
+entry, unparseable docstring) to an error, so broken docs fail the
+build instead of silently producing a broken site.
+
+Adding a new module means:
+
+1. Write it with numpy-style docstrings (the plugin's
+   `docstring_style: numpy` setting).
+2. Add a stub page under `docs/api/` containing a single `:::` directive:
+   ```markdown
+   ::: neuropose.your_module
+   ```
+3. Add a nav entry in `mkdocs.yml` under `API Reference`.
+
+## Project structure
+
+```text
+neuropose/
+├── src/neuropose/              # The package itself
+│   ├── config.py               # pydantic-settings Settings class
+│   ├── estimator.py            # per-video MeTRAbs worker
+│   ├── interfacer.py           # filesystem-polling daemon
+│   ├── visualize.py            # matplotlib overlay rendering
+│   ├── io.py                   # prediction schema + atomic save/load
+│   ├── cli.py                  # typer CLI entrypoint
+│   ├── _model.py               # MeTRAbs loader (stub pending commit 11)
+│   └── analyzer/               # post-processing (pending commit 10)
+├── tests/
+│   ├── conftest.py             # isolated env + synthetic video fixtures
+│   ├── unit/                   # fast, no model download
+│   └── integration/            # marked slow, downloads the model
+├── docs/                       # this documentation
+├── .github/workflows/          # CI + docs workflows
+├── pyproject.toml              # package metadata, deps, tool configs
+└── mkdocs.yml                  # docs site configuration
+```
+
+## Commit hygiene
+
+- **Small commits.** Each commit should do one thing and leave the repo
+  in a green-CI state.
+- **Descriptive commit messages.** The body should explain *why*, not
+  restate the diff. References to audit sections or issue numbers are
+  welcome.
+- **No force-push on `main`.** Use a feature branch and open a merge
+  request on the primary forge. `main` is protected; the CI checks
+  must pass before merging.
+- **No `git commit --no-verify`.** If a pre-commit hook fails, fix the
+  underlying issue rather than skipping the hook. The hooks exist
+  because the previous prototype was the poster child for what happens
+  when hygiene slips.
+
+## Release process
+
+*To be documented when the first tagged release is cut.* The short
+version of the plan:
+
+1. Bump `version` in `pyproject.toml` and `__version__` in
+   `src/neuropose/__init__.py`.
+2. Update `CHANGELOG.md`.
+3. Tag the commit (`git tag v0.1.0`).
+4. Push the tag. A release workflow builds the wheel + sdist and
+   uploads to PyPI once we claim the name.
diff --git a/docs/getting-started.md b/docs/getting-started.md
new file mode 100644
index 0000000..c59db0b
--- /dev/null
+++ b/docs/getting-started.md
@@ -0,0 +1,235 @@
+# Getting Started
+
+This page walks through installing NeuroPose, running your first pose
+estimation, and understanding the output. It targets researchers who are
+comfortable on a Linux command line but may not have used the package
+before.
+
+!!! info "Model loader status"
+    The MeTRAbs model loader is pending the commit-11 rewrite, during
+    which the upstream model URL and TensorFlow version will be pinned.
+    Until it lands, the `neuropose watch` and `neuropose process`
+    commands will exit with a clear "pending commit 11" message. The
+    Python API still works if you inject a model manually — see the
+    *Python API* section below for the current workaround.
+
+## Prerequisites
+
+- Linux (Ubuntu 22.04+ or equivalent)
+- Python 3.11
+- [`uv`](https://github.com/astral-sh/uv) for dependency management
+- CUDA-capable GPU (optional, recommended for long videos)
+- Internet access on first run (for the model download, once the loader
+  lands)
+
+## Installation
+
+Clone the repository and install in editable mode:
+
+```bash
+git clone https://git.levineuwirth.org/neuwirth/neuropose.git
+cd neuropose
+uv venv --python 3.11
+source .venv/bin/activate
+uv sync --group dev
+```
+
+`uv sync --group dev` installs the runtime dependencies (pydantic, typer,
+OpenCV, TensorFlow, matplotlib) plus the dev tooling (pytest, ruff,
+pyright, pre-commit, mkdocs-material). The first run will download
+TensorFlow, which is roughly 600 MB; subsequent runs hit the uv cache.
+
+Confirm the CLI is installed:
+
+```bash
+neuropose --version
+# neuropose 0.1.0.dev0
+```
+
+## Configuration
+
+NeuroPose reads configuration from one of three sources, in order of
+decreasing precedence:
+
+1. A YAML file passed via `--config`.
+2. Environment variables prefixed with `NEUROPOSE_` (e.g.
+   `NEUROPOSE_DEVICE=/GPU:0`).
+3. Built-in defaults.
+
+The default runtime data directory is `$XDG_DATA_HOME/neuropose/jobs`
+(typically `~/.local/share/neuropose/jobs`). Runtime data never lives
+inside the repository.
+
+A complete example config:
+
+```yaml title="config.yaml"
+# TensorFlow device string. "/CPU:0" or "/GPU:N".
+device: "/GPU:0"
+
+# Base directory for job inputs, outputs, and failed quarantine.
+data_dir: "/srv/neuropose/jobs"
+
+# Where the MeTRAbs model is cached after download.
+model_cache_dir: "/srv/neuropose/models"
+
+# How often the interfacer daemon scans the input directory.
+poll_interval_seconds: 10
+
+# Horizontal field-of-view passed to MeTRAbs. Override per call if you
+# know the camera intrinsics; otherwise MeTRAbs's 55° default is fine.
+default_fov_degrees: 55.0
+```
+
+See the [`neuropose.config`](api/config.md) API reference for the full
+list of fields and their validation rules.
+
+## Processing a single video
+
+The `process` subcommand is the quickest way to run the estimator on one
+video:
+
+```bash
+neuropose process path/to/video.mp4
+```
+
+By default this writes `<video-stem>_predictions.json` in the current
+working directory. Override with `--output`:
+
+```bash
+neuropose process path/to/video.mp4 --output /srv/results/trial_01.json
+```
+
+## Running the daemon
+
+For batch processing, use the `watch` subcommand. Point a config at a
+data directory, drop videos into job subdirectories under `data_dir/in/`,
+and the daemon processes each one in order.
+
+```bash
+# 1. Prepare the data directory
+neuropose --config ./config.yaml watch &
+
+# 2. In another shell, add a job
+mkdir -p /srv/neuropose/jobs/in/trial_01
+cp video_01.mp4 video_02.mp4 /srv/neuropose/jobs/in/trial_01/
+
+# 3. The daemon will pick it up within poll_interval_seconds
+#    and write /srv/neuropose/jobs/out/trial_01/results.json
+```
+
+The daemon writes a persistent `status.json` tracking every job's
+lifecycle. On startup, any jobs left in the `processing` state from a
+previous crash are marked failed and their inputs are moved to
+`data_dir/failed/` for operator review. See the
+[`neuropose.interfacer`](api/interfacer.md) API reference for the full
+lifecycle contract.
+
+Stop the daemon with `Ctrl-C` or `kill -TERM <pid>`. The current job
+finishes before the loop exits.
+
+## Output schema
+
+Each processed video produces a JSON file with the following shape:
+
+```json
+{
+  "metadata": {
+    "frame_count": 180,
+    "fps": 30.0,
+    "width": 1920,
+    "height": 1080
+  },
+  "frames": {
+    "frame_000000": {
+      "boxes": [[10.2, 20.5, 200.0, 400.0, 0.97]],
+      "poses3d": [[[x, y, z], ...]],
+      "poses2d": [[[x, y], ...]]
+    },
+    "frame_000001": { ... }
+  }
+}
+```
+
+Key details:
+
+- **Frame identifiers** are `frame_000000`, `frame_000001`, ...
+  (six-digit zero-padded). These are identifiers, not filenames — no
+  PNG files exist on disk.
+- **`boxes`** are `[x, y, width, height, confidence]` in pixels.
+- **`poses3d`** are `[x, y, z]` in millimetres, per the MeTRAbs
+  convention.
+- **`poses2d`** are `[x, y]` in pixels.
+- **`metadata`** carries the source video's frame count, fps, and
+  resolution. This is essential for reproducibility — downstream
+  analysis can convert frame indices to real time without needing the
+  original video file.
+
+Use [`neuropose.io.load_video_predictions`](api/io.md) to read the JSON
+back into a validated `VideoPredictions` object.
+
+## Python API
+
+For scripting, debugging, or integrating NeuroPose into a larger
+pipeline, you can use the `Estimator` class directly. This is also the
+current workaround for the pending model loader:
+
+```python
+from neuropose.estimator import Estimator
+from neuropose.io import save_video_predictions
+from pathlib import Path
+
+# Load the MeTRAbs model however you like — e.g. via tensorflow_hub once
+# you know the canonical URL. Until commit 11 pins it, you'll need to
+# load it yourself here.
+import tensorflow_hub as tfhub
+model = tfhub.load("...")  # TODO: pin upstream URL
+
+estimator = Estimator(model=model, device="/GPU:0")
+result = estimator.process_video(Path("trial_01.mp4"))
+
+print(f"Processed {result.frame_count} frames")
+save_video_predictions(Path("trial_01_predictions.json"), result.predictions)
+```
+
+You can also wire up a progress callback for long videos:
+
+```python
+from rich.progress import Progress
+
+with Progress() as progress:
+    task = progress.add_task("Processing", total=None)
+    result = estimator.process_video(
+        Path("trial_01.mp4"),
+        progress=lambda processed, total_hint: progress.update(task, completed=processed),
+    )
+```
+
+## Visualization
+
+To generate per-frame overlay images (2D skeleton on the source frame
+plus a 3D scatter plot), use `neuropose.visualize`:
+
+```python
+from neuropose.visualize import visualize_predictions
+
+visualize_predictions(
+    video_path=Path("trial_01.mp4"),
+    predictions=result.predictions,
+    output_dir=Path("trial_01_viz/"),
+    frame_indices=[0, 30, 60, 90],  # pick a handful of frames for spot-checking
+)
+```
+
+Visualization is a separate module to keep the estimator's import graph
+free of matplotlib. Matplotlib's `Agg` backend is set inside the
+function, so importing `neuropose.visualize` has no global side effects.
+
+## Troubleshooting
+
+| Problem | Resolution |
+|---|---|
+| `error: pending commit 11` from `neuropose watch` or `process` | The model loader is not yet implemented. Use the Python API with a manually-loaded model. |
+| `AlreadyRunningError` from the daemon | Another NeuroPose daemon already holds the lock file. Check `data_dir/.neuropose.lock` for the PID. |
+| `VideoDecodeError` on valid-looking video | The file may be corrupted or in a codec OpenCV was built without. Try re-encoding with `ffmpeg -i in.mov -c:v libx264 out.mp4`. |
+| Jobs stuck in `processing` state on startup | The daemon now recovers these automatically — they'll be marked failed and quarantined to `data_dir/failed/` on the next run. |
+| Daemon not detecting a new job | Check that the job is inside a **subdirectory** of `data_dir/in/`, not directly in `data_dir/in/`. Empty subdirectories are silently skipped (the daemon assumes you are still copying files). |
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..1c244bc
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,83 @@
+# NeuroPose
+
+3D human pose estimation pipeline for clinical movement research, built on
+[MeTRAbs](https://github.com/isarandi/metrabs). Developed by the Shu Lab at
+Brown University.
+
+!!! warning "Pre-alpha software"
+    NeuroPose is under active development at version `0.1.0.dev0`. APIs,
+    schemas, and the command-line interface may change without notice
+    between commits until the first tagged release. This is research
+    software and **must not** be used for clinical decision-making.
+
+## What NeuroPose does
+
+NeuroPose takes a video (or a directory of videos organised into "jobs"),
+runs the MeTRAbs 3D pose-estimation model on every frame, and produces a
+validated JSON output containing per-frame 3D and 2D joint positions and
+the original video's metadata (frame count, fps, resolution). The output
+schema is designed to be loaded back into Python, numpy, or any downstream
+analysis pipeline without ambiguity.
+
+Three core components:
+
+- **`neuropose.estimator`** — the per-video inference worker. Streams
+  frames from an input video, runs MeTRAbs on each one, and returns a
+  validated `VideoPredictions` object. No filesystem or job-queue
+  semantics.
+- **`neuropose.interfacer`** — a filesystem-polling daemon that watches an
+  input directory for new job subdirectories, dispatches each to the
+  estimator, and manages the status-file lifecycle.
+- **`neuropose.analyzer`** — a post-processing subpackage for motion
+  analysis and classification (FastDTW, joint-angle features, sktime).
+  *(Pending the rewrite in commit 10.)*
+
+## Where to go next
+
+<div class="grid cards" markdown>
+
+- :material-rocket-launch: **[Getting Started](getting-started.md)** —
+  install, run your first job, understand the output.
+
+- :material-cube-outline: **[Architecture](architecture.md)** — how the
+  pieces fit together and why.
+
+- :material-api: **[API Reference](api/config.md)** — auto-generated from
+  the source docstrings.
+
+- :material-tools: **[Development](development.md)** — contributing,
+  testing, and the release workflow.
+
+- :material-server: **[Deployment](deployment.md)** — running the daemon
+  in production.
+
+</div>
+
+## Intended use
+
+NeuroPose is built for:
+
+- Clinical gait and movement-assessment research
+- Biomechanics work using standard RGB video
+- Research reproducibility — the output schema carries enough metadata
+  (frame count, fps, resolution) to recover real time from frame indices
+  without needing access to the original video.
+
+It is **not** intended for:
+
+- Clinical diagnosis or treatment decisions.
+- General-purpose motion capture outside the research use cases actively
+  supported by the Shu Lab.
+
+## Citing NeuroPose
+
+If you use NeuroPose in academic work, please cite it using the metadata
+in [`CITATION.cff`](https://git.levineuwirth.org/neuwirth/neuropose/src/branch/main/CITATION.cff).
+A DOI and a manuscript citation will be added once the first paper is
+submitted.
+
+## License and attribution
+
+NeuroPose is distributed under the MIT License. It builds on MeTRAbs
+(Copyright &copy; 2020 István Sárándi), also distributed under MIT. Full
+attribution lives in [`AUTHORS.md`](https://git.levineuwirth.org/neuwirth/neuropose/src/branch/main/AUTHORS.md).
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..71966cd
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,99 @@
+# ---------------------------------------------------------------------------
+# NeuroPose documentation site configuration.
+#
+# Local preview:   uv run mkdocs serve
+# Strict build:    uv run mkdocs build --strict   (run by .github/workflows/docs.yml)
+#
+# The API Reference pages are generated by mkdocstrings from the module
+# docstrings; adding a new module means adding a stub file under docs/api/
+# and a nav entry below.
+# ---------------------------------------------------------------------------
+
+site_name: NeuroPose
+site_description: 3D human pose estimation pipeline for clinical movement research.
+site_url: https://levineuwirth.github.io/neuropose/
+repo_url: https://git.levineuwirth.org/neuwirth/neuropose
+repo_name: neuwirth/neuropose
+edit_uri: _edit/main/docs/
+copyright: Copyright &copy; 2026 The NeuroPose Authors
+
+theme:
+  name: material
+  features:
+    - navigation.tabs
+    - navigation.top
+    - navigation.tracking
+    - content.code.copy
+    - content.code.annotate
+    - content.action.edit
+    - search.suggest
+    - search.highlight
+    - toc.follow
+  palette:
+    - media: "(prefers-color-scheme: light)"
+      scheme: default
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/toggle-switch
+        name: Switch to dark mode
+    - media: "(prefers-color-scheme: dark)"
+      scheme: slate
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/toggle-switch-off-outline
+        name: Switch to light mode
+  icon:
+    repo: fontawesome/brands/git-alt
+
+plugins:
+  - search
+  - mkdocstrings:
+      handlers:
+        python:
+          options:
+            show_source: true
+            show_root_heading: true
+            show_root_toc_entry: false
+            show_category_heading: true
+            show_signature_annotations: true
+            separate_signature: true
+            docstring_style: numpy
+            docstring_section_style: table
+            members_order: source
+            filters:
+              - "!^_"
+
+markdown_extensions:
+  - admonition
+  - attr_list
+  - def_list
+  - footnotes
+  - md_in_html
+  - tables
+  - toc:
+      permalink: true
+  - pymdownx.details
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  - pymdownx.superfences
+  - pymdownx.tabbed:
+      alternate_style: true
+
+nav:
+  - Home: index.md
+  - Getting Started: getting-started.md
+  - Architecture: architecture.md
+  - API Reference:
+      - neuropose.config: api/config.md
+      - neuropose.estimator: api/estimator.md
+      - neuropose.interfacer: api/interfacer.md
+      - neuropose.io: api/io.md
+      - neuropose.visualize: api/visualize.md
+  - Development: development.md
+  - Deployment: deployment.md
diff --git a/pyproject.toml b/pyproject.toml
index eed8119..3950a8a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,6 +80,8 @@ dev = [
   "ruff>=0.8",
   "pyright>=1.1.390",
   "pre-commit>=4.0",
+  "mkdocs-material>=9.5",
+  "mkdocstrings[python]>=0.26",
 ]
 
 # ---------------------------------------------------------------------------