Robustness fixes

2026-05-03 20:56:46 -04:00 · 2026-05-03 20:56:46 -04:00 · d22f48ae08
parent 1996db37b2
commit d22f48ae08
6 changed files with 749 additions and 56 deletions
--- a/crates/levcs-cli/src/repo_cmds.rs
+++ b/crates/levcs-cli/src/repo_cmds.rs
@ -783,6 +783,16 @@ pub fn branch(args: BranchArgs) -> Result<()> {
            _ => bail!("branch tip is not a commit"),
        };
        repo.checkout_tree(tree_id, &repo.workdir)?;
        // Refresh the index from the new tree. Without this the index
        // keeps the previous branch's blob hashes — invisible to most
        // workflows because the next `commit` rebuilds the index from
        // the working tree, but visible to anything that compares
        // index-vs-workdir (e.g., the merge command's dirty-tree
        // precondition, which would otherwise false-positive on every
        // branch switch).
        let mut idx = Index::new();
        rebuild_index_from_tree(&repo, tree_id, "", &mut idx)?;
        repo.write_index(&idx)?;
        eprintln!("switched to branch {name}");
    }
    if let Some(name) = args.delete {
@ -806,6 +816,51 @@ pub fn merge(args: MergeArgs) -> Result<()> {
    merge_run(args)
 }
 /// Return the list of tracked paths whose working-tree contents differ from
 /// the index, including paths that are tracked but missing from disk. Used
 /// as a precondition for any operation that overwrites the working tree
 /// (currently: `merge`, both fast-forward and three-way). Callers should
 /// refuse to proceed when the returned list is non-empty so users don't
 /// silently lose uncommitted work.
 fn dirty_tracked_paths(repo: &Repository) -> Result<Vec<String>> {
    let idx = repo.read_index()?;
    let mut workdir_set: HashSet<String> = HashSet::new();
    for path in repo.walk_workdir()? {
        let rel = path
            .strip_prefix(&repo.workdir)?
            .to_string_lossy()
            .replace('\\', "/");
        workdir_set.insert(rel);
    }
    let mut dirty = Vec::new();
    for entry in &idx.entries {
        if !entry.flags.is_tracked() {
            continue;
        }
        let abs = repo.workdir.join(&entry.path);
        if !workdir_set.contains(&entry.path) {
            // Tracked file removed from working tree without `levcs commit`
            // — counts as dirty for merge purposes since the merge would
            // resurrect it (or compute against stale on-disk state).
            dirty.push(entry.path.clone());
            continue;
        }
        let bytes = match fs::read(&abs) {
            Ok(b) => b,
            Err(_) => {
                dirty.push(entry.path.clone());
                continue;
            }
        };
        let id = Blob::new(bytes).object_id();
        if id != entry.blob_hash {
            dirty.push(entry.path.clone());
        }
    }
    dirty.sort();
    Ok(dirty)
 }
 fn merge_run(args: MergeArgs) -> Result<()> {
    let branch_name = args
        .branch
@ -815,6 +870,29 @@ fn merge_run(args: MergeArgs) -> Result<()> {
    if repo.levcs_dir.join("MERGE_HEAD").exists() {
        bail!("a merge is already in progress; run `levcs merge --abort` to cancel");
    }
    // Refuse to start a merge when tracked files have uncommitted changes —
    // both the fast-forward and three-way paths overwrite the working
    // tree, and silently clobbering local edits is the kind of bug that
    // costs users hours of work. Mirrors git's `Your local changes to
    // the following files would be overwritten by merge` precondition.
    let dirty = dirty_tracked_paths(&repo)?;
    if !dirty.is_empty() {
        let listing = dirty
            .iter()
            .take(10)
            .map(|p| format!("  {p}"))
            .collect::<Vec<_>>()
            .join("\n");
        let more = if dirty.len() > 10 {
            format!("\n  ... and {} more", dirty.len() - 10)
        } else {
            String::new()
        };
        bail!(
            "uncommitted changes to tracked files would be overwritten by merge:\n{listing}{more}\n\
             commit them (or revert to HEAD) before merging — see `levcs status`."
        );
    }
    let head = repo
        .refs
        .resolve_head()?
@ -1001,6 +1079,21 @@ fn merge_run(args: MergeArgs) -> Result<()> {
        record.files.push(fr);
    }
    // Repo-side policy ceiling: every handler reference in the record must
    // be permitted by `.levcs/merge.toml` (§6.6). This used to run *after*
    // we applied the merge to the working tree, which left the user's
    // files clobbered when the policy check then bailed. Validate up
    // front, before any disk write, so a rejected merge leaves the
    // working tree exactly as we found it.
    let allowed = load_merge_policy_allowed(&repo);
    let bad = validate_record_against_policy(&record, &allowed);
    if !bad.is_empty() {
        bail!(
            "merge produced records referencing handlers not in repository policy: {}",
            bad.join(", ")
        );
    }
    // Apply to working tree.
    for (path, bytes) in &merged_files {
        let abs = repo.workdir.join(path);
@ -1039,19 +1132,6 @@ fn merge_run(args: MergeArgs) -> Result<()> {
    }
    repo.write_index(&idx)?;
    // Repo-side policy ceiling: every handler reference in the record must
    // be permitted by `.levcs/merge.toml` (§6.6). Catch this before we
    // touch the working tree's merge state so an aborted policy-violating
    // merge leaves nothing to clean up.
    let allowed = load_merge_policy_allowed(&repo);
    let bad = validate_record_against_policy(&record, &allowed);
    if !bad.is_empty() {
        bail!(
            "merge produced records referencing handlers not in repository policy: {}",
            bad.join(", ")
        );
    }
    // Persist merge state and the in-progress merge-record.
    fs::write(repo.levcs_dir.join("MERGE_HEAD"), theirs_id.to_hex())?;
    fs::write(repo.levcs_dir.join("MERGE_BASE"), base_id.to_hex())?;
--- a/crates/levcs-cli/tests/merge.rs
+++ b/crates/levcs-cli/tests/merge.rs
@ -417,6 +417,102 @@ fn merge_local_toml_promotion_is_rejected() {
    assert!(e.contains("promote"), "error must say 'promote': {e}");
 }
 /// Regression test for the dirty-tree merge precondition. Before this
 /// guard, `levcs merge` would silently overwrite uncommitted edits to
 /// tracked files — clobbering work the user hadn't yet committed.
 #[test]
 fn merge_refuses_when_workdir_has_uncommitted_changes() {
    let (work, xdg) = init_repo();
    std::fs::write(work.join("a.txt"), b"original\n").unwrap();
    assert_eq!(run(&["track", "--all"], &work, &xdg).0, 0);
    assert_eq!(run(&["commit", "-m", "base"], &work, &xdg).0, 0);
    // Create feat branch with a divergent change so a real merge would run.
    assert_eq!(run(&["branch", "--create", "feat"], &work, &xdg).0, 0);
    assert_eq!(run(&["branch", "--switch", "feat"], &work, &xdg).0, 0);
    std::fs::write(work.join("b.txt"), b"feat side\n").unwrap();
    assert_eq!(run(&["track", "--all"], &work, &xdg).0, 0);
    assert_eq!(run(&["commit", "-m", "feat add"], &work, &xdg).0, 0);
    // Back on main, dirty `a.txt` *without committing*.
    assert_eq!(run(&["branch", "--switch", "main"], &work, &xdg).0, 0);
    std::fs::write(work.join("a.txt"), b"local-uncommitted-edit\n").unwrap();
    // Merge must refuse and name the dirty file in the error.
    let (code, _o, e) = run(&["merge", "feat"], &work, &xdg);
    assert_ne!(code, 0, "merge must refuse on dirty tree: stderr={e}");
    assert!(
        e.contains("uncommitted changes") && e.contains("a.txt"),
        "error must explain refusal and name the file: {e}"
    );
    // Critically: the local edit must NOT have been overwritten, and no
    // merge state should have been created.
    let bytes = std::fs::read(work.join("a.txt")).unwrap();
    assert_eq!(
        bytes, b"local-uncommitted-edit\n",
        "user's uncommitted edit must be preserved when merge is refused"
    );
    assert!(!work.join(".levcs/MERGE_HEAD").exists());
    assert!(!work.join(".levcs/merge-record").exists());
 }
 /// When the merge engine produces a record that violates the repo's
 /// `[policy].allowed_handlers`, the merge must fail BEFORE touching the
 /// working tree. Previously the policy check ran after `fs::write`, so
 /// a rejected merge still left half-merged content on disk and stale
 /// blob hashes in the index.
 #[test]
 fn policy_violation_rejects_merge_before_writing_working_tree() {
    let (work, xdg) = init_repo();
    std::fs::write(work.join("a.txt"), b"original\n").unwrap();
    assert_eq!(run(&["track", "--all"], &work, &xdg).0, 0);
    assert_eq!(run(&["commit", "-m", "base"], &work, &xdg).0, 0);
    assert_eq!(run(&["branch", "--create", "feat"], &work, &xdg).0, 0);
    std::fs::write(work.join("a.txt"), b"main side\n").unwrap();
    assert_eq!(run(&["commit", "-m", "main"], &work, &xdg).0, 0);
    assert_eq!(run(&["branch", "--switch", "feat"], &work, &xdg).0, 0);
    std::fs::write(work.join("a.txt"), b"feat side\n").unwrap();
    assert_eq!(run(&["commit", "-m", "feat"], &work, &xdg).0, 0);
    // Switch to main and write a policy that forbids EVERYTHING — every
    // file the engine touches will be flagged. (An empty allow-list with
    // a non-empty handlers field on every record entry guarantees a
    // mismatch; we want to verify that even though the engine produced
    // a complete merge plan, the writes never landed.)
    assert_eq!(run(&["branch", "--switch", "main"], &work, &xdg).0, 0);
    let main_a = std::fs::read(work.join("a.txt")).unwrap();
    std::fs::write(
        work.join(".levcs/merge.toml"),
        b"[policy]\nallowed_handlers = [\"this-handler-does-not-exist\"]\n",
    )
    .unwrap();
    let (code, _, e) = run(&["merge", "feat"], &work, &xdg);
    assert_ne!(code, 0, "merge must fail under restrictive policy");
    assert!(
        e.contains("not in repository policy"),
        "error must explain policy mismatch: {e}"
    );
    // Working tree must be untouched: `a.txt` still holds main's content,
    // not a partial merge result. And no merge state was committed.
    let after = std::fs::read(work.join("a.txt")).unwrap();
    assert_eq!(
        after, main_a,
        "policy-rejected merge must not modify the working tree"
    );
    assert!(
        !work.join(".levcs/MERGE_HEAD").exists(),
        "policy-rejected merge must not leave MERGE_HEAD behind"
    );
    assert!(
        !work.join(".levcs/merge-record").exists(),
        "policy-rejected merge must not persist a merge-record"
    );
 }
 #[test]
 fn explain_dumps_merge_record() {
    let (work, xdg) = init_repo();
--- a/crates/levcs-identity/src/verify.rs
+++ b/crates/levcs-identity/src/verify.rs
@ -11,6 +11,7 @@
 //! remote-fetching shim.
 use std::collections::HashMap;
 use std::sync::Arc;
 use thiserror::Error;
@ -186,16 +187,86 @@ fn verify_authority_step(
 /// Walk an authority back to genesis, verifying each step. Returns the body
 /// of the genesis authority on success.
 ///
 /// Standalone calls allocate a fresh verifier each time. Callers that
 /// verify many tips in a loop (mirror sync, push handler iterating
 /// commits) should use `ChainVerifier` directly so the chain walk can be
 /// shared — without it, verifying N commits that all cite the same
 /// authority is O(N × chain_depth).
 pub fn verify_authority_chain<S: ObjectSource>(
    src: &S,
    start: ObjectId,
 ) -> Verification<AuthorityBody> {
    let mut v = ChainVerifier::new();
    let body = v.verify_chain(src, start)?;
    Ok((*body).clone())
 }
 /// Caches authority chains that have been fully verified back to genesis.
 /// Hand the same verifier to a sequence of `verify_chain` / `verify_commit`
 /// / `verify_release` calls and the per-call cost drops from O(chain
 /// depth) to O(1) once an ancestor has been seen.
 ///
 /// The cache is keyed by authority id, which is a BLAKE3 of the signed
 /// object — collisions are infeasible — so a hit is sound: the underlying
 /// bytes are guaranteed identical to whatever produced the original
 /// success. Insertions are atomic per call: a partial walk that fails
 /// midway leaves the cache untouched.
 ///
 /// Not internally synchronized. Callers that share a verifier across
 /// threads should wrap it in `Mutex<_>`.
 #[derive(Default)]
 pub struct ChainVerifier {
    /// Maps any id along a verified chain → the chain's genesis body.
    /// The same `Arc<AuthorityBody>` is shared across every entry that
    /// belongs to one chain, so memory cost scales with the number of
    /// distinct chains, not with the number of authorities.
    verified: HashMap<ObjectId, Arc<AuthorityBody>>,
 }
 impl ChainVerifier {
    pub fn new() -> Self {
        Self::default()
    }
    /// Verify the authority chain rooted at `start` back to genesis.
    /// On a cache hit (any id along a previously-verified chain), returns
    /// the cached genesis body in O(1). On a cache miss, walks the chain,
    /// verifies each step, and on full success records every walked id
    /// against a single shared genesis body.
    pub fn verify_chain<S: ObjectSource>(
        &mut self,
        src: &S,
        start: ObjectId,
    ) -> Verification<Arc<AuthorityBody>> {
        if let Some(g) = self.verified.get(&start) {
            return Ok(g.clone());
        }
        // Walk back, accumulating the path. We don't insert anything into
        // the cache until the entire walk succeeds — a partial walk that
        // errors out must not leave half-trusted ids cached.
        let mut walked: Vec<ObjectId> = Vec::new();
        let mut cur_id = start;
        let mut cur_signed = read_signed(src, cur_id)?;
        let mut cur_body = AuthorityBody::parse(&cur_signed.body)
            .map_err(|e| VerifyError::Authority(e.to_string()))?;
-    while !cur_body.previous_authority.is_zero() {
+        let genesis: Arc<AuthorityBody> = loop {
            walked.push(cur_id);
            if cur_body.previous_authority.is_zero() {
                break Arc::new(verify_genesis(&cur_signed)?);
            }
            let prev_id = cur_body.previous_authority;
            // Cache hit on the predecessor: we still need to verify the
            // step from cur → prev (because the *step's* signature isn't
            // covered by prev being known-good), but the rest of the
            // chain back to genesis is already trusted.
            if let Some(g) = self.verified.get(&prev_id).cloned() {
                let prev_signed = read_signed(src, prev_id)?;
                let prev_body = AuthorityBody::parse(&prev_signed.body)
                    .map_err(|e| VerifyError::Authority(e.to_string()))?;
                verify_authority_step(&cur_signed, &cur_body, &prev_body, prev_id)?;
                break g;
            }
            let prev_signed = read_signed(src, prev_id)?;
            let prev_body = AuthorityBody::parse(&prev_signed.body)
                .map_err(|e| VerifyError::Authority(e.to_string()))?;
@ -203,10 +274,38 @@ pub fn verify_authority_chain<S: ObjectSource>(
            cur_signed = prev_signed;
            cur_body = prev_body;
            cur_id = prev_id;
        };
        for id in walked {
            self.verified.insert(id, genesis.clone());
        }
        Ok(genesis)
    }
    /// Cache-aware variant of `verify_commit`. Identical semantics; the
    /// only behavioural difference is that authority chains visited in
    /// prior calls don't get re-walked.
    pub fn verify_commit<S: ObjectSource>(
        &mut self,
        src: &S,
        commit_id: ObjectId,
        target_ref: Option<&str>,
    ) -> Verification<()> {
        verify_commit_inner(src, commit_id, target_ref, self)
    }
    /// Cache-aware variant of `verify_release`.
    pub fn verify_release<S: ObjectSource>(
        &mut self,
        src: &S,
        release_id: ObjectId,
    ) -> Verification<()> {
        verify_release_inner(src, release_id, self)
    }
    #[cfg(test)]
    pub(crate) fn cache_size(&self) -> usize {
        self.verified.len()
    }
    let _ = cur_id;
    let body = verify_genesis(&cur_signed)?;
    Ok(body)
 }
 /// Verify a successor authority object against `A_old`, given the signer key
@ -303,10 +402,23 @@ pub fn role_for_commit(
 /// Full commit verification per §3.6 algorithm. `target_ref` is the ref the
 /// commit is being applied to (used for protected-branch role checks); pass
 /// `None` if not applicable (e.g., during walking).
 ///
 /// Each call walks the authority chain from scratch. To share that work
 /// across many commits — e.g., during a mirror sync that verifies a
 /// branch's worth of tips — use `ChainVerifier::verify_commit` instead.
 pub fn verify_commit<S: ObjectSource>(
    src: &S,
    commit_id: ObjectId,
    target_ref: Option<&str>,
 ) -> Verification<()> {
    verify_commit_inner(src, commit_id, target_ref, &mut ChainVerifier::new())
 }
 fn verify_commit_inner<S: ObjectSource>(
    src: &S,
    commit_id: ObjectId,
    target_ref: Option<&str>,
    verifier: &mut ChainVerifier,
 ) -> Verification<()> {
    let bytes = src.read_raw(commit_id)?;
    let actual = blake3::hash(&bytes);
@ -356,7 +468,7 @@ pub fn verify_commit<S: ObjectSource>(
    let auth_signed = read_signed(src, commit.authority)?;
    let auth_body = AuthorityBody::parse(&auth_signed.body)
        .map_err(|e| VerifyError::Authority(e.to_string()))?;
-    let _ = verify_authority_chain(src, commit.authority)?;
+    let _ = verifier.verify_chain(src, commit.authority)?;
    let member = auth_body
        .find_member(&pk)
        .ok_or_else(|| VerifyError::Commit {
@ -425,6 +537,14 @@ pub fn verify_commit<S: ObjectSource>(
 /// against the authority body. A release with no listed members signing
 /// it is rejected.
 pub fn verify_release<S: ObjectSource>(src: &S, release_id: ObjectId) -> Verification<()> {
    verify_release_inner(src, release_id, &mut ChainVerifier::new())
 }
 fn verify_release_inner<S: ObjectSource>(
    src: &S,
    release_id: ObjectId,
    verifier: &mut ChainVerifier,
 ) -> Verification<()> {
    let bytes = src.read_raw(release_id)?;
    let actual = blake3::hash(&bytes);
    if *actual.as_bytes() != release_id.0 {
@ -455,7 +575,7 @@ pub fn verify_release<S: ObjectSource>(src: &S, release_id: ObjectId) -> Verific
            hash: release_id.to_hex(),
            kind: e.to_string(),
        })?;
-    let _ = verify_authority_chain(src, release.authority)?;
+    let _ = verifier.verify_chain(src, release.authority)?;
    let auth_signed = read_signed(src, release.authority)?;
    let auth_body = AuthorityBody::parse(&auth_signed.body)
        .map_err(|e| VerifyError::Authority(e.to_string()))?;
@ -731,4 +851,172 @@ mod tests {
        signed.body[0] ^= 0xFF; // tamper
        assert!(verify_signed_object(&signed).is_err());
    }
    /// `ObjectSource` wrapper that counts each `read_raw` call. Lets us
    /// assert exactly how many bytes the verifier actually fetched, which
    /// is the only direct way to observe a chain-cache hit.
    struct CountingSource<'a> {
        inner: &'a MemorySource,
        reads: std::cell::Cell<usize>,
    }
    impl<'a> CountingSource<'a> {
        fn new(inner: &'a MemorySource) -> Self {
            Self {
                inner,
                reads: std::cell::Cell::new(0),
            }
        }
        fn reads(&self) -> usize {
            self.reads.get()
        }
    }
    impl<'a> ObjectSource for CountingSource<'a> {
        fn read_raw(&self, id: ObjectId) -> Verification<Vec<u8>> {
            self.reads.set(self.reads.get() + 1);
            self.inner.read_raw(id)
        }
    }
    /// Build a chain of `length` authorities — genesis at index 0, each
    /// later entry a properly-signed successor of the previous — backed
    /// by `alice` (an Owner) for the whole walk. Returns the
    /// `MemorySource` plus the list of authority ids in order.
    fn build_chain(length: usize, alice: &SecretKey) -> (MemorySource, Vec<ObjectId>) {
        assert!(length >= 1);
        let alice_pk = alice.public();
        let now = 1_700_000_000_000_000;
        let mut genesis = AuthorityBody {
            schema_version: 1,
            repo_id: ObjectId([0u8; 32]),
            previous_authority: ObjectId([0u8; 32]),
            version: 1,
            created_micros: now,
            members: vec![crate::authority::MemberEntry {
                key: alice_pk,
                handle: "alice".into(),
                role: Role::Owner,
                added_micros: now,
                added_by: alice_pk,
            }],
            policy: vec![crate::authority::PolicyEntry {
                key: "public_read".into(),
                value: vec![0x01],
            }],
        };
        genesis.normalize().unwrap();
        genesis.assign_genesis_repo_id().unwrap();
        let genesis_signed = sign_authority(&genesis, alice).unwrap();
        let genesis_id = ObjectId(*blake3::hash(&genesis_signed.serialize()).as_bytes());
        let mut store: HashMap<ObjectId, Vec<u8>> = HashMap::new();
        let mut ids = vec![genesis_id];
        store.insert(genesis_id, genesis_signed.serialize());
        let mut prev_id = genesis_id;
        let mut prev_body = genesis;
        for v in 2..=length as u32 {
            let mut body = AuthorityBody {
                schema_version: prev_body.schema_version,
                repo_id: prev_body.repo_id,
                previous_authority: prev_id,
                version: v,
                created_micros: now + v as i64 * 1_000_000,
                members: prev_body.members.clone(),
                policy: prev_body.policy.clone(),
            };
            body.normalize().unwrap();
            let signed = sign_authority(&body, alice).unwrap();
            let id = ObjectId(*blake3::hash(&signed.serialize()).as_bytes());
            store.insert(id, signed.serialize());
            ids.push(id);
            prev_id = id;
            prev_body = body;
        }
        (MemorySource(store), ids)
    }
    /// Verifying a chain of length N from the tip touches every
    /// authority object exactly once. A second verification of the same
    /// tip — using the same `ChainVerifier` — must perform zero reads.
    /// This is the user-visible win: O(N²) → O(N) total work for N
    /// commits citing the same chain.
    #[test]
    fn chain_verifier_caches_walked_authorities() {
        let alice = SecretKey::generate();
        let (mem, ids) = build_chain(5, &alice);
        let src = CountingSource::new(&mem);
        let mut verifier = ChainVerifier::new();
        let _ = verifier.verify_chain(&src, *ids.last().unwrap()).unwrap();
        let first_pass_reads = src.reads();
        assert!(
            first_pass_reads >= 5,
            "first verification must read every authority at least once; got {first_pass_reads}"
        );
        // Every walked authority should now sit in the cache pointing
        // at the same shared genesis body.
        assert_eq!(verifier.cache_size(), ids.len());
        let before = src.reads();
        let _ = verifier.verify_chain(&src, *ids.last().unwrap()).unwrap();
        assert_eq!(
            src.reads(),
            before,
            "cached re-verification must perform zero reads"
        );
    }
    /// Verifying an *ancestor* after the tip must also be a cache hit —
    /// this is the practical case during a sync where one tip points at
    /// `A_n` and a second tip points at `A_{n-1}`.
    #[test]
    fn chain_verifier_serves_ancestors_from_cache() {
        let alice = SecretKey::generate();
        let (mem, ids) = build_chain(4, &alice);
        let src = CountingSource::new(&mem);
        let mut verifier = ChainVerifier::new();
        let _ = verifier.verify_chain(&src, *ids.last().unwrap()).unwrap();
        let after_tip = src.reads();
        // Ask for A_2 — should be a hit.
        let _ = verifier.verify_chain(&src, ids[1]).unwrap();
        assert_eq!(
            src.reads(),
            after_tip,
            "ancestor verification must hit cache, not re-walk"
        );
    }
    /// A failing verification must NOT pollute the cache. The test
    /// rebuilds a chain, corrupts the bytes of the most recent
    /// authority, and checks that (a) verification fails, (b) the
    /// cache stays empty, and (c) a follow-up valid lookup still walks
    /// the chain rather than serving a phantom hit.
    #[test]
    fn chain_verifier_does_not_cache_on_failure() {
        let alice = SecretKey::generate();
        let (mut mem, ids) = build_chain(3, &alice);
        let tip_id = *ids.last().unwrap();
        // Mutate the byte at offset 12 of the tip's signed bytes — that
        // sits inside the authority body, so the recorded BLAKE3 hash
        // will no longer match the id that names it. The verifier reads
        // by id and re-hashes the bytes; mismatched bytes propagate as
        // a `repo_id derivation invalid` or a parse-time error depending
        // on the offset, but in either case the chain walk fails.
        let bytes = mem.0.get(&tip_id).unwrap().clone();
        let mut bad = bytes.clone();
        bad[12] ^= 0xFF;
        mem.0.insert(tip_id, bad);
        let src = CountingSource::new(&mem);
        let mut verifier = ChainVerifier::new();
        assert!(
            verifier.verify_chain(&src, tip_id).is_err(),
            "corrupted tip must fail verification"
        );
        assert_eq!(
            verifier.cache_size(),
            0,
            "failed verification must leave the cache empty"
        );
    }
 }
--- a/crates/levcs-instance/src/lib.rs
+++ b/crates/levcs-instance/src/lib.rs
@ -39,7 +39,7 @@ use levcs_identity::verify::{
 };
 use levcs_merge::engine::check_handler_allowed;
 use levcs_merge::record::MergeRecord;
-use levcs_protocol::auth::{verify_request, AuthRequest, DEFAULT_CLOCK_SKEW};
+use levcs_protocol::auth::{verify_request, AuthRequest, DEFAULT_CLOCK_SKEW, NONCE_TTL_SECS};
 use levcs_protocol::wire::{InfoResponse, InstanceInfo, RefList};
 use levcs_protocol::Pack;
@ -149,24 +149,66 @@ impl AppState {
    }
 }
 /// Replay-protection cache for §5.3 request nonces.
 ///
 /// `verify_request` already rejects timestamps outside ±`DEFAULT_CLOCK_SKEW`,
 /// so a nonce only needs to be remembered while its parent timestamp is
 /// still within the skew window — anything older is rejected for skew
 /// before the cache is even consulted. We use `NONCE_TTL_SECS` (the
 /// protocol-level constant) as the retention horizon, which is wider than
 /// the skew window so that a small clock difference between client and
 /// server can't open a replay window between the two checks.
 ///
 /// The earlier implementation was a `HashSet` that called `clear()` once
 /// it grew past a count cap. That was a real replay vulnerability: an
 /// attacker who captured a recent signed request could replay it the
 /// instant the cache wiped, regardless of how long the original was
 /// supposed to remain "seen." The TTL approach below is bounded in
 /// memory by the rate of accepted requests times the TTL — at typical
 /// federation load that's a few thousand entries, kilobytes of state.
 /// How many inserts to accept before sweeping expired entries. Eviction
 /// is O(len), so amortizing keeps the per-call cost O(1) average. Stale
 /// entries that sit in the map a little longer cost nothing — they
 /// would just match the TTL skew check upstream and be rejected anyway.
 const NONCE_EVICT_BATCH: usize = 1024;
 #[derive(Default)]
 pub struct NonceCache {
-    /// Maps nonce → expiry epoch (in micros).
+    /// `nonce → request timestamp (micros since epoch)`. We index by
-    seen: HashSet<[u8; 16]>,
+    /// timestamp rather than insertion time so a delayed request whose
    /// own clock is slightly behind ours can't sneak past TTL eviction.
    seen: HashMap<[u8; 16], i64>,
    inserts_since_evict: usize,
 }
 impl NonceCache {
-    pub fn check_and_insert(&mut self, nonce: [u8; 16]) -> bool {
+    /// Check whether `nonce` (carried with `request_ts_micros`) has been
-        if self.seen.contains(&nonce) {
+    /// seen, and if not, record it. `now_micros` is the verifier's notion
-            false
+    /// of the current time, used to evict stale entries periodically.
-        } else {
+    /// Returns `true` if the nonce was *new* (request should proceed),
-            self.seen.insert(nonce);
+    /// `false` if it was a replay.
-            // Cap memory by clearing periodically.
+    pub fn check_and_insert(
-            if self.seen.len() > 100_000 {
+        &mut self,
-                self.seen.clear();
+        nonce: [u8; 16],
        request_ts_micros: i64,
        now_micros: i64,
    ) -> bool {
        self.inserts_since_evict += 1;
        if self.inserts_since_evict >= NONCE_EVICT_BATCH {
            let cutoff = now_micros - NONCE_TTL_SECS * 1_000_000;
            self.seen.retain(|_, ts| *ts >= cutoff);
            self.inserts_since_evict = 0;
        }
        if self.seen.contains_key(&nonce) {
            return false;
        }
        self.seen.insert(nonce, request_ts_micros);
        true
    }
    #[cfg(test)]
    pub fn len(&self) -> usize {
        self.seen.len()
    }
 }
@ -201,6 +243,18 @@ struct ApiError(StatusCode, String);
 impl IntoResponse for ApiError {
    fn into_response(self) -> Response {
        // Surface every error response in the server log before sending it
        // to the client. Without this, 5xx and auth failures would vanish
        // — the client sees the body but nothing reaches the operator.
        // 5xx is a server-side bug worth `error!`; 4xx is the caller's
        // problem (bad signature, malformed pack, conflict) and lands at
        // `warn!` so it's still grep-able but doesn't blow up alerts.
        let level_5xx = self.0.is_server_error();
        if level_5xx {
            tracing::error!(status = %self.0, error = %self.1, "request failed");
        } else {
            tracing::warn!(status = %self.0, error = %self.1, "request rejected");
        }
        (self.0, self.1).into_response()
    }
 }
@ -444,7 +498,7 @@ fn verify_request_against(
    let auth = verify_request(&req, key, ts, nonce, sig, now, DEFAULT_CLOCK_SKEW)
        .map_err(|e| err(StatusCode::UNAUTHORIZED, e.to_string()))?;
    let mut cache = s.nonce_cache.lock().unwrap();
-    if !cache.check_and_insert(auth.nonce) {
+    if !cache.check_and_insert(auth.nonce, auth.timestamp_micros, now) {
        return Err(err(StatusCode::UNAUTHORIZED, "replayed nonce"));
    }
    Ok(AuthCheck { key: auth.key })
@ -875,3 +929,95 @@ fn find_merge_record(
 // Allow `verify_authority_chain` to use ObjectStore directly.
 #[allow(dead_code)]
 fn _vs(_: &dyn VerifySource) {}
 #[cfg(test)]
 mod tests {
    use super::*;
    fn micros_from_secs(s: i64) -> i64 {
        s * 1_000_000
    }
    /// Re-inserting the same nonce within the TTL window must be rejected.
    /// This is the core anti-replay invariant; before the TTL rewrite the
    /// cache also satisfied this property, so a green test here is the
    /// floor, not the ceiling.
    #[test]
    fn nonce_replay_within_ttl_is_rejected() {
        let mut cache = NonceCache::default();
        let nonce = [0x42u8; 16];
        let ts = micros_from_secs(1_700_000_000);
        let now = ts + micros_from_secs(1);
        assert!(cache.check_and_insert(nonce, ts, now));
        // Same nonce, slightly later "now": still within TTL, must reject.
        assert!(!cache.check_and_insert(nonce, ts, now + micros_from_secs(60)));
    }
    /// Once a nonce ages past `NONCE_TTL_SECS` it must be evicted from
    /// the cache; what bounds memory growth is precisely this release.
    /// (`verify_request` will reject the timestamp for skew long before
    /// the cache ever sees a stale request again, so re-accepting the
    /// nonce bytes is safe.)
    ///
    /// We drive `NONCE_EVICT_BATCH` distinct inserts at a fresh timestamp
    /// to trigger one full eviction pass, then assert the original
    /// (now-stale) entry has been swept.
    #[test]
    fn nonce_evicted_after_ttl_expires() {
        let mut cache = NonceCache::default();
        let stale = [0x42u8; 16];
        let stale_ts = micros_from_secs(1_700_000_000);
        assert!(cache.check_and_insert(stale, stale_ts, stale_ts));
        // Fast-forward "now" past the TTL window and force an eviction
        // sweep by inserting a batch of fresh nonces.
        let later = stale_ts + micros_from_secs(NONCE_TTL_SECS + 1);
        for i in 0..(NONCE_EVICT_BATCH as u32) {
            let mut n = [0u8; 16];
            n[..4].copy_from_slice(&i.to_le_bytes());
            n[15] = 0xFF; // disambiguate from `stale`
            assert!(cache.check_and_insert(n, later, later));
        }
        // The stale entry is gone; same-nonce-bytes with a fresh
        // timestamp are allowed.
        assert!(cache.check_and_insert(stale, later, later));
    }
    /// Regression test for the original CVE-shaped bug: the previous
    /// implementation called `seen.clear()` once it grew past 100k
    /// entries, which dropped every recently-seen nonce in one step and
    /// allowed any captured request still within the 5-minute clock-skew
    /// window to be replayed.
    ///
    /// Here we (a) drive the cache through several eviction passes with
    /// junk-but-fresh nonces, then (b) try to replay a still-fresh nonce
    /// inserted at the start. With time-bounded eviction the replay
    /// must be rejected, because the original entry's timestamp is still
    /// inside the TTL window. With the old count-bounded `clear()`, this
    /// test would erroneously succeed (the replay would be accepted).
    /// The flood size is intentionally a small multiple of
    /// `NONCE_EVICT_BATCH` — the property doesn't depend on the exact
    /// count, just on triggering the eviction path.
    #[test]
    fn nonce_cache_does_not_drop_fresh_entries_under_load() {
        let mut cache = NonceCache::default();
        let base_ts = micros_from_secs(1_700_000_000);
        let mut victim = [0u8; 16];
        victim[..8].copy_from_slice(&u64::MAX.to_le_bytes());
        // Insert the "captured" request first.
        assert!(cache.check_and_insert(victim, base_ts, base_ts));
        // Flood with NONCE_EVICT_BATCH * 4 fresh-but-distinct nonces, all
        // dated within the same TTL window so eviction can't help us.
        let flood: u32 = (NONCE_EVICT_BATCH as u32) * 4;
        for i in 0..flood {
            let mut n = [0u8; 16];
            n[..4].copy_from_slice(&i.to_le_bytes());
            let now = base_ts + (i as i64) * 1_000;
            assert!(cache.check_and_insert(n, now, now));
        }
        let replay_now = base_ts + micros_from_secs(60);
        assert!(
            !cache.check_and_insert(victim, base_ts, replay_now),
            "replay of fresh nonce must be rejected even when cache is large"
        );
    }
 }
--- a/crates/levcs-instance/src/mirror.rs
+++ b/crates/levcs-instance/src/mirror.rs
@ -17,7 +17,7 @@ use std::time::Duration;
 use levcs_client::{Client, ClientError};
 use levcs_core::{ObjectId, ObjectStore, Refs, Repository};
-use levcs_identity::verify::{verify_authority_chain, verify_commit, verify_release};
+use levcs_identity::verify::ChainVerifier;
 use thiserror::Error;
 use crate::{InstanceConfig, MirrorConfig};
@ -122,22 +122,25 @@ pub fn sync_mirror(
        store.write_raw(&ent.bytes)?;
    }
    // Share one chain-verification cache across every per-tip check below.
    // Without this, each tip independently walks its authority chain back
    // to genesis — repeating identical work for every tip that cites the
    // same authority. With the cache the second tip onwards is O(1).
    let mut verifier = ChainVerifier::new();
    // Verify the authority chain on the announced current authority.
-    // Every commit / release we're about to advance to must chain back to
+    // This populates the cache with the entire chain so the per-tip
-    // a member that is rooted in `genesis_authority` — the chain walk
+    // checks below get cache hits.
    // checks that, so doing it once here covers all the per-tip checks
    // below. (verify_commit re-walks the chain internally; that is
    // redundant but cheap and keeps the per-tip checks self-contained.)
    if !info.current_authority.is_empty() {
        let cur_auth = parse_hash(&info.current_authority)?;
-        verify_authority_chain(&store, cur_auth)?;
+        verifier.verify_chain(&store, cur_auth)?;
    }
    // Per-branch verification — fully checks signature, author membership,
    // and authority chain. If verification fails on any tip we abort
    // before touching local refs, so a bad source can never poison us.
    for (name, id) in &want_branches {
-        verify_commit(&store, *id, Some(&format!("refs/branches/{name}")))?;
+        verifier.verify_commit(&store, *id, Some(&format!("refs/branches/{name}")))?;
    }
    // Per-release verification: check the signed object itself and its
    // authority. The release object's full schema check (predecessor /
@ -146,7 +149,7 @@ pub fn sync_mirror(
    // the signing key is actually a member of the chain rooted in our
    // local genesis.
    for (_, id) in &want_releases {
-        verify_release(&store, *id)?;
+        verifier.verify_release(&store, *id)?;
    }
    // All checks passed — advance local refs. We do branches first, then
--- a/crates/levcs-protocol/src/pack.rs
+++ b/crates/levcs-protocol/src/pack.rs
@ -52,6 +52,19 @@ pub const COMPRESSION_THRESHOLD: usize = 256;
 /// 3 is libzstd's default — a sensible balance of throughput and ratio.
 pub const COMPRESSION_LEVEL: i32 = 3;
 /// Default ceiling on a single object's *uncompressed* size when decoding
 /// a pack. The recorded `size` field is read straight off the wire and is
 /// otherwise used as the destination capacity for decompression — a
 /// hostile peer can declare `size = 1 TiB` against a tiny zstd frame and
 /// trigger a multi-gigabyte allocation before any data has been
 /// validated. Capping `size` at decode time short-circuits that.
 ///
 /// 256 MiB is generous for normal repository content (source files, even
 /// large binaries) while remaining well below practical RAM limits on a
 /// modest VPS. Callers that genuinely need to move larger blobs should
 /// use `Pack::decode_prefix_with_limit` and pick their own ceiling.
 pub const DEFAULT_MAX_OBJECT_BYTES: usize = 256 * 1024 * 1024;
 #[derive(Debug, Error)]
 pub enum PackError {
    #[error("malformed pack: {0}")]
@ -171,11 +184,22 @@ impl Pack {
        Ok(pack)
    }
-    /// Decode a pack from the start of `bytes`. Returns the pack and the
+    /// Decode a pack from the start of `bytes` using the default per-object
    /// size ceiling (`DEFAULT_MAX_OBJECT_BYTES`). Returns the pack and the
    /// number of bytes consumed; trailing bytes are not an error. This is
    /// used by the push wire format, which appends a manifest after the
    /// pack.
    pub fn decode_prefix(bytes: &[u8]) -> Result<(Self, usize), PackError> {
        Self::decode_prefix_with_limit(bytes, DEFAULT_MAX_OBJECT_BYTES)
    }
    /// Like `decode_prefix`, but lets the caller pick the per-object size
    /// ceiling. Any entry whose recorded `size` exceeds `max_object_bytes`
    /// is rejected before any allocation or decompression takes place.
    pub fn decode_prefix_with_limit(
        bytes: &[u8],
        max_object_bytes: usize,
    ) -> Result<(Self, usize), PackError> {
        if bytes.len() < 16 {
            return Err(PackError::Malformed("header truncated".into()));
        }
@ -206,8 +230,20 @@ impl Pack {
            }
            let object_type = bytes[p];
            p += 1;
-            let size = LittleEndian::read_u64(&bytes[p..p + 8]) as usize;
+            let size_u64 = LittleEndian::read_u64(&bytes[p..p + 8]);
            p += 8;
            // Reject implausibly-large `size` declarations *before* we
            // touch the data. zstd's `decompress(_, size)` allocates the
            // declared size up front, so leaving this unbounded is a
            // memory-exhaustion vector. We also reject `size > usize::MAX`
            // explicitly on 32-bit targets where the cast below would
            // truncate.
            if size_u64 > max_object_bytes as u64 {
                return Err(PackError::Malformed(format!(
                    "entry size {size_u64} exceeds limit {max_object_bytes}"
                )));
            }
            let size = size_u64 as usize;
            let flags = bytes[p];
            p += 1;
            let unknown = flags & !(FLAG_ZSTD | FLAG_DELTA);
@ -555,6 +591,50 @@ mod tests {
        }
    }
    #[test]
    fn pack_rejects_oversized_object_declaration() {
        // Hand-craft a pack whose single entry declares a uncompressed
        // size of 1 TiB. The decoder must reject this before allocating
        // anything, regardless of how much data actually follows on the
        // wire — a hostile peer can pair this with a tiny zstd frame to
        // trigger a multi-gigabyte allocation in `zstd::bulk::decompress`.
        let mut bytes = Vec::new();
        bytes.extend_from_slice(&PACK_MAGIC);
        bytes.extend_from_slice(&PACK_VERSION.to_le_bytes());
        bytes.extend_from_slice(&1u64.to_le_bytes());
        bytes.push(1); // type
        bytes.extend_from_slice(&(1u64 << 40).to_le_bytes()); // 1 TiB
        bytes.push(0); // flags: raw
                       // (no body needed — the size check should fire before we look)
        let err = Pack::decode(&bytes).unwrap_err();
        match err {
            PackError::Malformed(s) => assert!(
                s.contains("exceeds limit"),
                "error must mention size limit: {s}"
            ),
        }
    }
    #[test]
    fn pack_decode_with_limit_admits_objects_under_caller_ceiling() {
        // The custom-limit decoder should accept any entry up to its
        // configured ceiling, even when smaller than the default. Build
        // a 1 KiB raw entry, then decode with a 4 KiB limit.
        let payload = vec![0xABu8; 1024];
        let mut pk = Pack::new();
        pk.push(1, payload.clone());
        let encoded = pk.encode();
        let (pk2, _) = Pack::decode_prefix_with_limit(&encoded, 4096).unwrap();
        assert_eq!(pk2.entries.len(), 1);
        assert_eq!(pk2.entries[0].bytes, payload);
        // Same encoded pack, decoded with a 512-byte limit, must reject.
        let err = Pack::decode_prefix_with_limit(&encoded, 512).unwrap_err();
        match err {
            PackError::Malformed(s) => assert!(s.contains("exceeds limit")),
        }
    }
    #[test]
    fn pack_rejects_delta_with_truncated_base_hash() {
        let mut bytes = Vec::new();