Robustness fixes
This commit is contained in:
parent
1996db37b2
commit
d22f48ae08
|
|
@ -783,6 +783,16 @@ pub fn branch(args: BranchArgs) -> Result<()> {
|
||||||
_ => bail!("branch tip is not a commit"),
|
_ => bail!("branch tip is not a commit"),
|
||||||
};
|
};
|
||||||
repo.checkout_tree(tree_id, &repo.workdir)?;
|
repo.checkout_tree(tree_id, &repo.workdir)?;
|
||||||
|
// Refresh the index from the new tree. Without this the index
|
||||||
|
// keeps the previous branch's blob hashes — invisible to most
|
||||||
|
// workflows because the next `commit` rebuilds the index from
|
||||||
|
// the working tree, but visible to anything that compares
|
||||||
|
// index-vs-workdir (e.g., the merge command's dirty-tree
|
||||||
|
// precondition, which would otherwise false-positive on every
|
||||||
|
// branch switch).
|
||||||
|
let mut idx = Index::new();
|
||||||
|
rebuild_index_from_tree(&repo, tree_id, "", &mut idx)?;
|
||||||
|
repo.write_index(&idx)?;
|
||||||
eprintln!("switched to branch {name}");
|
eprintln!("switched to branch {name}");
|
||||||
}
|
}
|
||||||
if let Some(name) = args.delete {
|
if let Some(name) = args.delete {
|
||||||
|
|
@ -806,6 +816,51 @@ pub fn merge(args: MergeArgs) -> Result<()> {
|
||||||
merge_run(args)
|
merge_run(args)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the list of tracked paths whose working-tree contents differ from
|
||||||
|
/// the index, including paths that are tracked but missing from disk. Used
|
||||||
|
/// as a precondition for any operation that overwrites the working tree
|
||||||
|
/// (currently: `merge`, both fast-forward and three-way). Callers should
|
||||||
|
/// refuse to proceed when the returned list is non-empty so users don't
|
||||||
|
/// silently lose uncommitted work.
|
||||||
|
fn dirty_tracked_paths(repo: &Repository) -> Result<Vec<String>> {
|
||||||
|
let idx = repo.read_index()?;
|
||||||
|
let mut workdir_set: HashSet<String> = HashSet::new();
|
||||||
|
for path in repo.walk_workdir()? {
|
||||||
|
let rel = path
|
||||||
|
.strip_prefix(&repo.workdir)?
|
||||||
|
.to_string_lossy()
|
||||||
|
.replace('\\', "/");
|
||||||
|
workdir_set.insert(rel);
|
||||||
|
}
|
||||||
|
let mut dirty = Vec::new();
|
||||||
|
for entry in &idx.entries {
|
||||||
|
if !entry.flags.is_tracked() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let abs = repo.workdir.join(&entry.path);
|
||||||
|
if !workdir_set.contains(&entry.path) {
|
||||||
|
// Tracked file removed from working tree without `levcs commit`
|
||||||
|
// — counts as dirty for merge purposes since the merge would
|
||||||
|
// resurrect it (or compute against stale on-disk state).
|
||||||
|
dirty.push(entry.path.clone());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let bytes = match fs::read(&abs) {
|
||||||
|
Ok(b) => b,
|
||||||
|
Err(_) => {
|
||||||
|
dirty.push(entry.path.clone());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let id = Blob::new(bytes).object_id();
|
||||||
|
if id != entry.blob_hash {
|
||||||
|
dirty.push(entry.path.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dirty.sort();
|
||||||
|
Ok(dirty)
|
||||||
|
}
|
||||||
|
|
||||||
fn merge_run(args: MergeArgs) -> Result<()> {
|
fn merge_run(args: MergeArgs) -> Result<()> {
|
||||||
let branch_name = args
|
let branch_name = args
|
||||||
.branch
|
.branch
|
||||||
|
|
@ -815,6 +870,29 @@ fn merge_run(args: MergeArgs) -> Result<()> {
|
||||||
if repo.levcs_dir.join("MERGE_HEAD").exists() {
|
if repo.levcs_dir.join("MERGE_HEAD").exists() {
|
||||||
bail!("a merge is already in progress; run `levcs merge --abort` to cancel");
|
bail!("a merge is already in progress; run `levcs merge --abort` to cancel");
|
||||||
}
|
}
|
||||||
|
// Refuse to start a merge when tracked files have uncommitted changes —
|
||||||
|
// both the fast-forward and three-way paths overwrite the working
|
||||||
|
// tree, and silently clobbering local edits is the kind of bug that
|
||||||
|
// costs users hours of work. Mirrors git's `Your local changes to
|
||||||
|
// the following files would be overwritten by merge` precondition.
|
||||||
|
let dirty = dirty_tracked_paths(&repo)?;
|
||||||
|
if !dirty.is_empty() {
|
||||||
|
let listing = dirty
|
||||||
|
.iter()
|
||||||
|
.take(10)
|
||||||
|
.map(|p| format!(" {p}"))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("\n");
|
||||||
|
let more = if dirty.len() > 10 {
|
||||||
|
format!("\n ... and {} more", dirty.len() - 10)
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
bail!(
|
||||||
|
"uncommitted changes to tracked files would be overwritten by merge:\n{listing}{more}\n\
|
||||||
|
commit them (or revert to HEAD) before merging — see `levcs status`."
|
||||||
|
);
|
||||||
|
}
|
||||||
let head = repo
|
let head = repo
|
||||||
.refs
|
.refs
|
||||||
.resolve_head()?
|
.resolve_head()?
|
||||||
|
|
@ -1001,6 +1079,21 @@ fn merge_run(args: MergeArgs) -> Result<()> {
|
||||||
record.files.push(fr);
|
record.files.push(fr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Repo-side policy ceiling: every handler reference in the record must
|
||||||
|
// be permitted by `.levcs/merge.toml` (§6.6). This used to run *after*
|
||||||
|
// we applied the merge to the working tree, which left the user's
|
||||||
|
// files clobbered when the policy check then bailed. Validate up
|
||||||
|
// front, before any disk write, so a rejected merge leaves the
|
||||||
|
// working tree exactly as we found it.
|
||||||
|
let allowed = load_merge_policy_allowed(&repo);
|
||||||
|
let bad = validate_record_against_policy(&record, &allowed);
|
||||||
|
if !bad.is_empty() {
|
||||||
|
bail!(
|
||||||
|
"merge produced records referencing handlers not in repository policy: {}",
|
||||||
|
bad.join(", ")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Apply to working tree.
|
// Apply to working tree.
|
||||||
for (path, bytes) in &merged_files {
|
for (path, bytes) in &merged_files {
|
||||||
let abs = repo.workdir.join(path);
|
let abs = repo.workdir.join(path);
|
||||||
|
|
@ -1039,19 +1132,6 @@ fn merge_run(args: MergeArgs) -> Result<()> {
|
||||||
}
|
}
|
||||||
repo.write_index(&idx)?;
|
repo.write_index(&idx)?;
|
||||||
|
|
||||||
// Repo-side policy ceiling: every handler reference in the record must
|
|
||||||
// be permitted by `.levcs/merge.toml` (§6.6). Catch this before we
|
|
||||||
// touch the working tree's merge state so an aborted policy-violating
|
|
||||||
// merge leaves nothing to clean up.
|
|
||||||
let allowed = load_merge_policy_allowed(&repo);
|
|
||||||
let bad = validate_record_against_policy(&record, &allowed);
|
|
||||||
if !bad.is_empty() {
|
|
||||||
bail!(
|
|
||||||
"merge produced records referencing handlers not in repository policy: {}",
|
|
||||||
bad.join(", ")
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Persist merge state and the in-progress merge-record.
|
// Persist merge state and the in-progress merge-record.
|
||||||
fs::write(repo.levcs_dir.join("MERGE_HEAD"), theirs_id.to_hex())?;
|
fs::write(repo.levcs_dir.join("MERGE_HEAD"), theirs_id.to_hex())?;
|
||||||
fs::write(repo.levcs_dir.join("MERGE_BASE"), base_id.to_hex())?;
|
fs::write(repo.levcs_dir.join("MERGE_BASE"), base_id.to_hex())?;
|
||||||
|
|
|
||||||
|
|
@ -417,6 +417,102 @@ fn merge_local_toml_promotion_is_rejected() {
|
||||||
assert!(e.contains("promote"), "error must say 'promote': {e}");
|
assert!(e.contains("promote"), "error must say 'promote': {e}");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Regression test for the dirty-tree merge precondition. Before this
|
||||||
|
/// guard, `levcs merge` would silently overwrite uncommitted edits to
|
||||||
|
/// tracked files — clobbering work the user hadn't yet committed.
|
||||||
|
#[test]
|
||||||
|
fn merge_refuses_when_workdir_has_uncommitted_changes() {
|
||||||
|
let (work, xdg) = init_repo();
|
||||||
|
std::fs::write(work.join("a.txt"), b"original\n").unwrap();
|
||||||
|
assert_eq!(run(&["track", "--all"], &work, &xdg).0, 0);
|
||||||
|
assert_eq!(run(&["commit", "-m", "base"], &work, &xdg).0, 0);
|
||||||
|
|
||||||
|
// Create feat branch with a divergent change so a real merge would run.
|
||||||
|
assert_eq!(run(&["branch", "--create", "feat"], &work, &xdg).0, 0);
|
||||||
|
assert_eq!(run(&["branch", "--switch", "feat"], &work, &xdg).0, 0);
|
||||||
|
std::fs::write(work.join("b.txt"), b"feat side\n").unwrap();
|
||||||
|
assert_eq!(run(&["track", "--all"], &work, &xdg).0, 0);
|
||||||
|
assert_eq!(run(&["commit", "-m", "feat add"], &work, &xdg).0, 0);
|
||||||
|
|
||||||
|
// Back on main, dirty `a.txt` *without committing*.
|
||||||
|
assert_eq!(run(&["branch", "--switch", "main"], &work, &xdg).0, 0);
|
||||||
|
std::fs::write(work.join("a.txt"), b"local-uncommitted-edit\n").unwrap();
|
||||||
|
|
||||||
|
// Merge must refuse and name the dirty file in the error.
|
||||||
|
let (code, _o, e) = run(&["merge", "feat"], &work, &xdg);
|
||||||
|
assert_ne!(code, 0, "merge must refuse on dirty tree: stderr={e}");
|
||||||
|
assert!(
|
||||||
|
e.contains("uncommitted changes") && e.contains("a.txt"),
|
||||||
|
"error must explain refusal and name the file: {e}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Critically: the local edit must NOT have been overwritten, and no
|
||||||
|
// merge state should have been created.
|
||||||
|
let bytes = std::fs::read(work.join("a.txt")).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
bytes, b"local-uncommitted-edit\n",
|
||||||
|
"user's uncommitted edit must be preserved when merge is refused"
|
||||||
|
);
|
||||||
|
assert!(!work.join(".levcs/MERGE_HEAD").exists());
|
||||||
|
assert!(!work.join(".levcs/merge-record").exists());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// When the merge engine produces a record that violates the repo's
|
||||||
|
/// `[policy].allowed_handlers`, the merge must fail BEFORE touching the
|
||||||
|
/// working tree. Previously the policy check ran after `fs::write`, so
|
||||||
|
/// a rejected merge still left half-merged content on disk and stale
|
||||||
|
/// blob hashes in the index.
|
||||||
|
#[test]
|
||||||
|
fn policy_violation_rejects_merge_before_writing_working_tree() {
|
||||||
|
let (work, xdg) = init_repo();
|
||||||
|
std::fs::write(work.join("a.txt"), b"original\n").unwrap();
|
||||||
|
assert_eq!(run(&["track", "--all"], &work, &xdg).0, 0);
|
||||||
|
assert_eq!(run(&["commit", "-m", "base"], &work, &xdg).0, 0);
|
||||||
|
|
||||||
|
assert_eq!(run(&["branch", "--create", "feat"], &work, &xdg).0, 0);
|
||||||
|
std::fs::write(work.join("a.txt"), b"main side\n").unwrap();
|
||||||
|
assert_eq!(run(&["commit", "-m", "main"], &work, &xdg).0, 0);
|
||||||
|
assert_eq!(run(&["branch", "--switch", "feat"], &work, &xdg).0, 0);
|
||||||
|
std::fs::write(work.join("a.txt"), b"feat side\n").unwrap();
|
||||||
|
assert_eq!(run(&["commit", "-m", "feat"], &work, &xdg).0, 0);
|
||||||
|
|
||||||
|
// Switch to main and write a policy that forbids EVERYTHING — every
|
||||||
|
// file the engine touches will be flagged. (An empty allow-list with
|
||||||
|
// a non-empty handlers field on every record entry guarantees a
|
||||||
|
// mismatch; we want to verify that even though the engine produced
|
||||||
|
// a complete merge plan, the writes never landed.)
|
||||||
|
assert_eq!(run(&["branch", "--switch", "main"], &work, &xdg).0, 0);
|
||||||
|
let main_a = std::fs::read(work.join("a.txt")).unwrap();
|
||||||
|
std::fs::write(
|
||||||
|
work.join(".levcs/merge.toml"),
|
||||||
|
b"[policy]\nallowed_handlers = [\"this-handler-does-not-exist\"]\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let (code, _, e) = run(&["merge", "feat"], &work, &xdg);
|
||||||
|
assert_ne!(code, 0, "merge must fail under restrictive policy");
|
||||||
|
assert!(
|
||||||
|
e.contains("not in repository policy"),
|
||||||
|
"error must explain policy mismatch: {e}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Working tree must be untouched: `a.txt` still holds main's content,
|
||||||
|
// not a partial merge result. And no merge state was committed.
|
||||||
|
let after = std::fs::read(work.join("a.txt")).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
after, main_a,
|
||||||
|
"policy-rejected merge must not modify the working tree"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
!work.join(".levcs/MERGE_HEAD").exists(),
|
||||||
|
"policy-rejected merge must not leave MERGE_HEAD behind"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
!work.join(".levcs/merge-record").exists(),
|
||||||
|
"policy-rejected merge must not persist a merge-record"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn explain_dumps_merge_record() {
|
fn explain_dumps_merge_record() {
|
||||||
let (work, xdg) = init_repo();
|
let (work, xdg) = init_repo();
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@
|
||||||
//! remote-fetching shim.
|
//! remote-fetching shim.
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
|
|
@ -186,16 +187,86 @@ fn verify_authority_step(
|
||||||
|
|
||||||
/// Walk an authority back to genesis, verifying each step. Returns the body
|
/// Walk an authority back to genesis, verifying each step. Returns the body
|
||||||
/// of the genesis authority on success.
|
/// of the genesis authority on success.
|
||||||
|
///
|
||||||
|
/// Standalone calls allocate a fresh verifier each time. Callers that
|
||||||
|
/// verify many tips in a loop (mirror sync, push handler iterating
|
||||||
|
/// commits) should use `ChainVerifier` directly so the chain walk can be
|
||||||
|
/// shared — without it, verifying N commits that all cite the same
|
||||||
|
/// authority is O(N × chain_depth).
|
||||||
pub fn verify_authority_chain<S: ObjectSource>(
|
pub fn verify_authority_chain<S: ObjectSource>(
|
||||||
src: &S,
|
src: &S,
|
||||||
start: ObjectId,
|
start: ObjectId,
|
||||||
) -> Verification<AuthorityBody> {
|
) -> Verification<AuthorityBody> {
|
||||||
|
let mut v = ChainVerifier::new();
|
||||||
|
let body = v.verify_chain(src, start)?;
|
||||||
|
Ok((*body).clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Caches authority chains that have been fully verified back to genesis.
|
||||||
|
/// Hand the same verifier to a sequence of `verify_chain` / `verify_commit`
|
||||||
|
/// / `verify_release` calls and the per-call cost drops from O(chain
|
||||||
|
/// depth) to O(1) once an ancestor has been seen.
|
||||||
|
///
|
||||||
|
/// The cache is keyed by authority id, which is a BLAKE3 of the signed
|
||||||
|
/// object — collisions are infeasible — so a hit is sound: the underlying
|
||||||
|
/// bytes are guaranteed identical to whatever produced the original
|
||||||
|
/// success. Insertions are atomic per call: a partial walk that fails
|
||||||
|
/// midway leaves the cache untouched.
|
||||||
|
///
|
||||||
|
/// Not internally synchronized. Callers that share a verifier across
|
||||||
|
/// threads should wrap it in `Mutex<_>`.
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct ChainVerifier {
|
||||||
|
/// Maps any id along a verified chain → the chain's genesis body.
|
||||||
|
/// The same `Arc<AuthorityBody>` is shared across every entry that
|
||||||
|
/// belongs to one chain, so memory cost scales with the number of
|
||||||
|
/// distinct chains, not with the number of authorities.
|
||||||
|
verified: HashMap<ObjectId, Arc<AuthorityBody>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChainVerifier {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verify the authority chain rooted at `start` back to genesis.
|
||||||
|
/// On a cache hit (any id along a previously-verified chain), returns
|
||||||
|
/// the cached genesis body in O(1). On a cache miss, walks the chain,
|
||||||
|
/// verifies each step, and on full success records every walked id
|
||||||
|
/// against a single shared genesis body.
|
||||||
|
pub fn verify_chain<S: ObjectSource>(
|
||||||
|
&mut self,
|
||||||
|
src: &S,
|
||||||
|
start: ObjectId,
|
||||||
|
) -> Verification<Arc<AuthorityBody>> {
|
||||||
|
if let Some(g) = self.verified.get(&start) {
|
||||||
|
return Ok(g.clone());
|
||||||
|
}
|
||||||
|
// Walk back, accumulating the path. We don't insert anything into
|
||||||
|
// the cache until the entire walk succeeds — a partial walk that
|
||||||
|
// errors out must not leave half-trusted ids cached.
|
||||||
|
let mut walked: Vec<ObjectId> = Vec::new();
|
||||||
let mut cur_id = start;
|
let mut cur_id = start;
|
||||||
let mut cur_signed = read_signed(src, cur_id)?;
|
let mut cur_signed = read_signed(src, cur_id)?;
|
||||||
let mut cur_body = AuthorityBody::parse(&cur_signed.body)
|
let mut cur_body = AuthorityBody::parse(&cur_signed.body)
|
||||||
.map_err(|e| VerifyError::Authority(e.to_string()))?;
|
.map_err(|e| VerifyError::Authority(e.to_string()))?;
|
||||||
while !cur_body.previous_authority.is_zero() {
|
let genesis: Arc<AuthorityBody> = loop {
|
||||||
|
walked.push(cur_id);
|
||||||
|
if cur_body.previous_authority.is_zero() {
|
||||||
|
break Arc::new(verify_genesis(&cur_signed)?);
|
||||||
|
}
|
||||||
let prev_id = cur_body.previous_authority;
|
let prev_id = cur_body.previous_authority;
|
||||||
|
// Cache hit on the predecessor: we still need to verify the
|
||||||
|
// step from cur → prev (because the *step's* signature isn't
|
||||||
|
// covered by prev being known-good), but the rest of the
|
||||||
|
// chain back to genesis is already trusted.
|
||||||
|
if let Some(g) = self.verified.get(&prev_id).cloned() {
|
||||||
|
let prev_signed = read_signed(src, prev_id)?;
|
||||||
|
let prev_body = AuthorityBody::parse(&prev_signed.body)
|
||||||
|
.map_err(|e| VerifyError::Authority(e.to_string()))?;
|
||||||
|
verify_authority_step(&cur_signed, &cur_body, &prev_body, prev_id)?;
|
||||||
|
break g;
|
||||||
|
}
|
||||||
let prev_signed = read_signed(src, prev_id)?;
|
let prev_signed = read_signed(src, prev_id)?;
|
||||||
let prev_body = AuthorityBody::parse(&prev_signed.body)
|
let prev_body = AuthorityBody::parse(&prev_signed.body)
|
||||||
.map_err(|e| VerifyError::Authority(e.to_string()))?;
|
.map_err(|e| VerifyError::Authority(e.to_string()))?;
|
||||||
|
|
@ -203,10 +274,38 @@ pub fn verify_authority_chain<S: ObjectSource>(
|
||||||
cur_signed = prev_signed;
|
cur_signed = prev_signed;
|
||||||
cur_body = prev_body;
|
cur_body = prev_body;
|
||||||
cur_id = prev_id;
|
cur_id = prev_id;
|
||||||
|
};
|
||||||
|
for id in walked {
|
||||||
|
self.verified.insert(id, genesis.clone());
|
||||||
|
}
|
||||||
|
Ok(genesis)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Cache-aware variant of `verify_commit`. Identical semantics; the
|
||||||
|
/// only behavioural difference is that authority chains visited in
|
||||||
|
/// prior calls don't get re-walked.
|
||||||
|
pub fn verify_commit<S: ObjectSource>(
|
||||||
|
&mut self,
|
||||||
|
src: &S,
|
||||||
|
commit_id: ObjectId,
|
||||||
|
target_ref: Option<&str>,
|
||||||
|
) -> Verification<()> {
|
||||||
|
verify_commit_inner(src, commit_id, target_ref, self)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Cache-aware variant of `verify_release`.
|
||||||
|
pub fn verify_release<S: ObjectSource>(
|
||||||
|
&mut self,
|
||||||
|
src: &S,
|
||||||
|
release_id: ObjectId,
|
||||||
|
) -> Verification<()> {
|
||||||
|
verify_release_inner(src, release_id, self)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub(crate) fn cache_size(&self) -> usize {
|
||||||
|
self.verified.len()
|
||||||
}
|
}
|
||||||
let _ = cur_id;
|
|
||||||
let body = verify_genesis(&cur_signed)?;
|
|
||||||
Ok(body)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Verify a successor authority object against `A_old`, given the signer key
|
/// Verify a successor authority object against `A_old`, given the signer key
|
||||||
|
|
@ -303,10 +402,23 @@ pub fn role_for_commit(
|
||||||
/// Full commit verification per §3.6 algorithm. `target_ref` is the ref the
|
/// Full commit verification per §3.6 algorithm. `target_ref` is the ref the
|
||||||
/// commit is being applied to (used for protected-branch role checks); pass
|
/// commit is being applied to (used for protected-branch role checks); pass
|
||||||
/// `None` if not applicable (e.g., during walking).
|
/// `None` if not applicable (e.g., during walking).
|
||||||
|
///
|
||||||
|
/// Each call walks the authority chain from scratch. To share that work
|
||||||
|
/// across many commits — e.g., during a mirror sync that verifies a
|
||||||
|
/// branch's worth of tips — use `ChainVerifier::verify_commit` instead.
|
||||||
pub fn verify_commit<S: ObjectSource>(
|
pub fn verify_commit<S: ObjectSource>(
|
||||||
src: &S,
|
src: &S,
|
||||||
commit_id: ObjectId,
|
commit_id: ObjectId,
|
||||||
target_ref: Option<&str>,
|
target_ref: Option<&str>,
|
||||||
|
) -> Verification<()> {
|
||||||
|
verify_commit_inner(src, commit_id, target_ref, &mut ChainVerifier::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn verify_commit_inner<S: ObjectSource>(
|
||||||
|
src: &S,
|
||||||
|
commit_id: ObjectId,
|
||||||
|
target_ref: Option<&str>,
|
||||||
|
verifier: &mut ChainVerifier,
|
||||||
) -> Verification<()> {
|
) -> Verification<()> {
|
||||||
let bytes = src.read_raw(commit_id)?;
|
let bytes = src.read_raw(commit_id)?;
|
||||||
let actual = blake3::hash(&bytes);
|
let actual = blake3::hash(&bytes);
|
||||||
|
|
@ -356,7 +468,7 @@ pub fn verify_commit<S: ObjectSource>(
|
||||||
let auth_signed = read_signed(src, commit.authority)?;
|
let auth_signed = read_signed(src, commit.authority)?;
|
||||||
let auth_body = AuthorityBody::parse(&auth_signed.body)
|
let auth_body = AuthorityBody::parse(&auth_signed.body)
|
||||||
.map_err(|e| VerifyError::Authority(e.to_string()))?;
|
.map_err(|e| VerifyError::Authority(e.to_string()))?;
|
||||||
let _ = verify_authority_chain(src, commit.authority)?;
|
let _ = verifier.verify_chain(src, commit.authority)?;
|
||||||
let member = auth_body
|
let member = auth_body
|
||||||
.find_member(&pk)
|
.find_member(&pk)
|
||||||
.ok_or_else(|| VerifyError::Commit {
|
.ok_or_else(|| VerifyError::Commit {
|
||||||
|
|
@ -425,6 +537,14 @@ pub fn verify_commit<S: ObjectSource>(
|
||||||
/// against the authority body. A release with no listed members signing
|
/// against the authority body. A release with no listed members signing
|
||||||
/// it is rejected.
|
/// it is rejected.
|
||||||
pub fn verify_release<S: ObjectSource>(src: &S, release_id: ObjectId) -> Verification<()> {
|
pub fn verify_release<S: ObjectSource>(src: &S, release_id: ObjectId) -> Verification<()> {
|
||||||
|
verify_release_inner(src, release_id, &mut ChainVerifier::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn verify_release_inner<S: ObjectSource>(
|
||||||
|
src: &S,
|
||||||
|
release_id: ObjectId,
|
||||||
|
verifier: &mut ChainVerifier,
|
||||||
|
) -> Verification<()> {
|
||||||
let bytes = src.read_raw(release_id)?;
|
let bytes = src.read_raw(release_id)?;
|
||||||
let actual = blake3::hash(&bytes);
|
let actual = blake3::hash(&bytes);
|
||||||
if *actual.as_bytes() != release_id.0 {
|
if *actual.as_bytes() != release_id.0 {
|
||||||
|
|
@ -455,7 +575,7 @@ pub fn verify_release<S: ObjectSource>(src: &S, release_id: ObjectId) -> Verific
|
||||||
hash: release_id.to_hex(),
|
hash: release_id.to_hex(),
|
||||||
kind: e.to_string(),
|
kind: e.to_string(),
|
||||||
})?;
|
})?;
|
||||||
let _ = verify_authority_chain(src, release.authority)?;
|
let _ = verifier.verify_chain(src, release.authority)?;
|
||||||
let auth_signed = read_signed(src, release.authority)?;
|
let auth_signed = read_signed(src, release.authority)?;
|
||||||
let auth_body = AuthorityBody::parse(&auth_signed.body)
|
let auth_body = AuthorityBody::parse(&auth_signed.body)
|
||||||
.map_err(|e| VerifyError::Authority(e.to_string()))?;
|
.map_err(|e| VerifyError::Authority(e.to_string()))?;
|
||||||
|
|
@ -731,4 +851,172 @@ mod tests {
|
||||||
signed.body[0] ^= 0xFF; // tamper
|
signed.body[0] ^= 0xFF; // tamper
|
||||||
assert!(verify_signed_object(&signed).is_err());
|
assert!(verify_signed_object(&signed).is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `ObjectSource` wrapper that counts each `read_raw` call. Lets us
|
||||||
|
/// assert exactly how many bytes the verifier actually fetched, which
|
||||||
|
/// is the only direct way to observe a chain-cache hit.
|
||||||
|
struct CountingSource<'a> {
|
||||||
|
inner: &'a MemorySource,
|
||||||
|
reads: std::cell::Cell<usize>,
|
||||||
|
}
|
||||||
|
impl<'a> CountingSource<'a> {
|
||||||
|
fn new(inner: &'a MemorySource) -> Self {
|
||||||
|
Self {
|
||||||
|
inner,
|
||||||
|
reads: std::cell::Cell::new(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn reads(&self) -> usize {
|
||||||
|
self.reads.get()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a> ObjectSource for CountingSource<'a> {
|
||||||
|
fn read_raw(&self, id: ObjectId) -> Verification<Vec<u8>> {
|
||||||
|
self.reads.set(self.reads.get() + 1);
|
||||||
|
self.inner.read_raw(id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a chain of `length` authorities — genesis at index 0, each
|
||||||
|
/// later entry a properly-signed successor of the previous — backed
|
||||||
|
/// by `alice` (an Owner) for the whole walk. Returns the
|
||||||
|
/// `MemorySource` plus the list of authority ids in order.
|
||||||
|
fn build_chain(length: usize, alice: &SecretKey) -> (MemorySource, Vec<ObjectId>) {
|
||||||
|
assert!(length >= 1);
|
||||||
|
let alice_pk = alice.public();
|
||||||
|
let now = 1_700_000_000_000_000;
|
||||||
|
let mut genesis = AuthorityBody {
|
||||||
|
schema_version: 1,
|
||||||
|
repo_id: ObjectId([0u8; 32]),
|
||||||
|
previous_authority: ObjectId([0u8; 32]),
|
||||||
|
version: 1,
|
||||||
|
created_micros: now,
|
||||||
|
members: vec![crate::authority::MemberEntry {
|
||||||
|
key: alice_pk,
|
||||||
|
handle: "alice".into(),
|
||||||
|
role: Role::Owner,
|
||||||
|
added_micros: now,
|
||||||
|
added_by: alice_pk,
|
||||||
|
}],
|
||||||
|
policy: vec![crate::authority::PolicyEntry {
|
||||||
|
key: "public_read".into(),
|
||||||
|
value: vec![0x01],
|
||||||
|
}],
|
||||||
|
};
|
||||||
|
genesis.normalize().unwrap();
|
||||||
|
genesis.assign_genesis_repo_id().unwrap();
|
||||||
|
let genesis_signed = sign_authority(&genesis, alice).unwrap();
|
||||||
|
let genesis_id = ObjectId(*blake3::hash(&genesis_signed.serialize()).as_bytes());
|
||||||
|
|
||||||
|
let mut store: HashMap<ObjectId, Vec<u8>> = HashMap::new();
|
||||||
|
let mut ids = vec![genesis_id];
|
||||||
|
store.insert(genesis_id, genesis_signed.serialize());
|
||||||
|
|
||||||
|
let mut prev_id = genesis_id;
|
||||||
|
let mut prev_body = genesis;
|
||||||
|
for v in 2..=length as u32 {
|
||||||
|
let mut body = AuthorityBody {
|
||||||
|
schema_version: prev_body.schema_version,
|
||||||
|
repo_id: prev_body.repo_id,
|
||||||
|
previous_authority: prev_id,
|
||||||
|
version: v,
|
||||||
|
created_micros: now + v as i64 * 1_000_000,
|
||||||
|
members: prev_body.members.clone(),
|
||||||
|
policy: prev_body.policy.clone(),
|
||||||
|
};
|
||||||
|
body.normalize().unwrap();
|
||||||
|
let signed = sign_authority(&body, alice).unwrap();
|
||||||
|
let id = ObjectId(*blake3::hash(&signed.serialize()).as_bytes());
|
||||||
|
store.insert(id, signed.serialize());
|
||||||
|
ids.push(id);
|
||||||
|
prev_id = id;
|
||||||
|
prev_body = body;
|
||||||
|
}
|
||||||
|
(MemorySource(store), ids)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verifying a chain of length N from the tip touches every
|
||||||
|
/// authority object exactly once. A second verification of the same
|
||||||
|
/// tip — using the same `ChainVerifier` — must perform zero reads.
|
||||||
|
/// This is the user-visible win: O(N²) → O(N) total work for N
|
||||||
|
/// commits citing the same chain.
|
||||||
|
#[test]
|
||||||
|
fn chain_verifier_caches_walked_authorities() {
|
||||||
|
let alice = SecretKey::generate();
|
||||||
|
let (mem, ids) = build_chain(5, &alice);
|
||||||
|
let src = CountingSource::new(&mem);
|
||||||
|
let mut verifier = ChainVerifier::new();
|
||||||
|
|
||||||
|
let _ = verifier.verify_chain(&src, *ids.last().unwrap()).unwrap();
|
||||||
|
let first_pass_reads = src.reads();
|
||||||
|
assert!(
|
||||||
|
first_pass_reads >= 5,
|
||||||
|
"first verification must read every authority at least once; got {first_pass_reads}"
|
||||||
|
);
|
||||||
|
// Every walked authority should now sit in the cache pointing
|
||||||
|
// at the same shared genesis body.
|
||||||
|
assert_eq!(verifier.cache_size(), ids.len());
|
||||||
|
|
||||||
|
let before = src.reads();
|
||||||
|
let _ = verifier.verify_chain(&src, *ids.last().unwrap()).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
src.reads(),
|
||||||
|
before,
|
||||||
|
"cached re-verification must perform zero reads"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verifying an *ancestor* after the tip must also be a cache hit —
|
||||||
|
/// this is the practical case during a sync where one tip points at
|
||||||
|
/// `A_n` and a second tip points at `A_{n-1}`.
|
||||||
|
#[test]
|
||||||
|
fn chain_verifier_serves_ancestors_from_cache() {
|
||||||
|
let alice = SecretKey::generate();
|
||||||
|
let (mem, ids) = build_chain(4, &alice);
|
||||||
|
let src = CountingSource::new(&mem);
|
||||||
|
let mut verifier = ChainVerifier::new();
|
||||||
|
let _ = verifier.verify_chain(&src, *ids.last().unwrap()).unwrap();
|
||||||
|
let after_tip = src.reads();
|
||||||
|
// Ask for A_2 — should be a hit.
|
||||||
|
let _ = verifier.verify_chain(&src, ids[1]).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
src.reads(),
|
||||||
|
after_tip,
|
||||||
|
"ancestor verification must hit cache, not re-walk"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A failing verification must NOT pollute the cache. The test
|
||||||
|
/// rebuilds a chain, corrupts the bytes of the most recent
|
||||||
|
/// authority, and checks that (a) verification fails, (b) the
|
||||||
|
/// cache stays empty, and (c) a follow-up valid lookup still walks
|
||||||
|
/// the chain rather than serving a phantom hit.
|
||||||
|
#[test]
|
||||||
|
fn chain_verifier_does_not_cache_on_failure() {
|
||||||
|
let alice = SecretKey::generate();
|
||||||
|
let (mut mem, ids) = build_chain(3, &alice);
|
||||||
|
let tip_id = *ids.last().unwrap();
|
||||||
|
// Mutate the byte at offset 12 of the tip's signed bytes — that
|
||||||
|
// sits inside the authority body, so the recorded BLAKE3 hash
|
||||||
|
// will no longer match the id that names it. The verifier reads
|
||||||
|
// by id and re-hashes the bytes; mismatched bytes propagate as
|
||||||
|
// a `repo_id derivation invalid` or a parse-time error depending
|
||||||
|
// on the offset, but in either case the chain walk fails.
|
||||||
|
let bytes = mem.0.get(&tip_id).unwrap().clone();
|
||||||
|
let mut bad = bytes.clone();
|
||||||
|
bad[12] ^= 0xFF;
|
||||||
|
mem.0.insert(tip_id, bad);
|
||||||
|
|
||||||
|
let src = CountingSource::new(&mem);
|
||||||
|
let mut verifier = ChainVerifier::new();
|
||||||
|
assert!(
|
||||||
|
verifier.verify_chain(&src, tip_id).is_err(),
|
||||||
|
"corrupted tip must fail verification"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
verifier.cache_size(),
|
||||||
|
0,
|
||||||
|
"failed verification must leave the cache empty"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ use levcs_identity::verify::{
|
||||||
};
|
};
|
||||||
use levcs_merge::engine::check_handler_allowed;
|
use levcs_merge::engine::check_handler_allowed;
|
||||||
use levcs_merge::record::MergeRecord;
|
use levcs_merge::record::MergeRecord;
|
||||||
use levcs_protocol::auth::{verify_request, AuthRequest, DEFAULT_CLOCK_SKEW};
|
use levcs_protocol::auth::{verify_request, AuthRequest, DEFAULT_CLOCK_SKEW, NONCE_TTL_SECS};
|
||||||
use levcs_protocol::wire::{InfoResponse, InstanceInfo, RefList};
|
use levcs_protocol::wire::{InfoResponse, InstanceInfo, RefList};
|
||||||
use levcs_protocol::Pack;
|
use levcs_protocol::Pack;
|
||||||
|
|
||||||
|
|
@ -149,24 +149,66 @@ impl AppState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Replay-protection cache for §5.3 request nonces.
|
||||||
|
///
|
||||||
|
/// `verify_request` already rejects timestamps outside ±`DEFAULT_CLOCK_SKEW`,
|
||||||
|
/// so a nonce only needs to be remembered while its parent timestamp is
|
||||||
|
/// still within the skew window — anything older is rejected for skew
|
||||||
|
/// before the cache is even consulted. We use `NONCE_TTL_SECS` (the
|
||||||
|
/// protocol-level constant) as the retention horizon, which is wider than
|
||||||
|
/// the skew window so that a small clock difference between client and
|
||||||
|
/// server can't open a replay window between the two checks.
|
||||||
|
///
|
||||||
|
/// The earlier implementation was a `HashSet` that called `clear()` once
|
||||||
|
/// it grew past a count cap. That was a real replay vulnerability: an
|
||||||
|
/// attacker who captured a recent signed request could replay it the
|
||||||
|
/// instant the cache wiped, regardless of how long the original was
|
||||||
|
/// supposed to remain "seen." The TTL approach below is bounded in
|
||||||
|
/// memory by the rate of accepted requests times the TTL — at typical
|
||||||
|
/// federation load that's a few thousand entries, kilobytes of state.
|
||||||
|
/// How many inserts to accept before sweeping expired entries. Eviction
|
||||||
|
/// is O(len), so amortizing keeps the per-call cost O(1) average. Stale
|
||||||
|
/// entries that sit in the map a little longer cost nothing — they
|
||||||
|
/// would just match the TTL skew check upstream and be rejected anyway.
|
||||||
|
const NONCE_EVICT_BATCH: usize = 1024;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct NonceCache {
|
pub struct NonceCache {
|
||||||
/// Maps nonce → expiry epoch (in micros).
|
/// `nonce → request timestamp (micros since epoch)`. We index by
|
||||||
seen: HashSet<[u8; 16]>,
|
/// timestamp rather than insertion time so a delayed request whose
|
||||||
|
/// own clock is slightly behind ours can't sneak past TTL eviction.
|
||||||
|
seen: HashMap<[u8; 16], i64>,
|
||||||
|
inserts_since_evict: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NonceCache {
|
impl NonceCache {
|
||||||
pub fn check_and_insert(&mut self, nonce: [u8; 16]) -> bool {
|
/// Check whether `nonce` (carried with `request_ts_micros`) has been
|
||||||
if self.seen.contains(&nonce) {
|
/// seen, and if not, record it. `now_micros` is the verifier's notion
|
||||||
false
|
/// of the current time, used to evict stale entries periodically.
|
||||||
} else {
|
/// Returns `true` if the nonce was *new* (request should proceed),
|
||||||
self.seen.insert(nonce);
|
/// `false` if it was a replay.
|
||||||
// Cap memory by clearing periodically.
|
pub fn check_and_insert(
|
||||||
if self.seen.len() > 100_000 {
|
&mut self,
|
||||||
self.seen.clear();
|
nonce: [u8; 16],
|
||||||
|
request_ts_micros: i64,
|
||||||
|
now_micros: i64,
|
||||||
|
) -> bool {
|
||||||
|
self.inserts_since_evict += 1;
|
||||||
|
if self.inserts_since_evict >= NONCE_EVICT_BATCH {
|
||||||
|
let cutoff = now_micros - NONCE_TTL_SECS * 1_000_000;
|
||||||
|
self.seen.retain(|_, ts| *ts >= cutoff);
|
||||||
|
self.inserts_since_evict = 0;
|
||||||
}
|
}
|
||||||
|
if self.seen.contains_key(&nonce) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
self.seen.insert(nonce, request_ts_micros);
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.seen.len()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -201,6 +243,18 @@ struct ApiError(StatusCode, String);
|
||||||
|
|
||||||
impl IntoResponse for ApiError {
|
impl IntoResponse for ApiError {
|
||||||
fn into_response(self) -> Response {
|
fn into_response(self) -> Response {
|
||||||
|
// Surface every error response in the server log before sending it
|
||||||
|
// to the client. Without this, 5xx and auth failures would vanish
|
||||||
|
// — the client sees the body but nothing reaches the operator.
|
||||||
|
// 5xx is a server-side bug worth `error!`; 4xx is the caller's
|
||||||
|
// problem (bad signature, malformed pack, conflict) and lands at
|
||||||
|
// `warn!` so it's still grep-able but doesn't blow up alerts.
|
||||||
|
let level_5xx = self.0.is_server_error();
|
||||||
|
if level_5xx {
|
||||||
|
tracing::error!(status = %self.0, error = %self.1, "request failed");
|
||||||
|
} else {
|
||||||
|
tracing::warn!(status = %self.0, error = %self.1, "request rejected");
|
||||||
|
}
|
||||||
(self.0, self.1).into_response()
|
(self.0, self.1).into_response()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -444,7 +498,7 @@ fn verify_request_against(
|
||||||
let auth = verify_request(&req, key, ts, nonce, sig, now, DEFAULT_CLOCK_SKEW)
|
let auth = verify_request(&req, key, ts, nonce, sig, now, DEFAULT_CLOCK_SKEW)
|
||||||
.map_err(|e| err(StatusCode::UNAUTHORIZED, e.to_string()))?;
|
.map_err(|e| err(StatusCode::UNAUTHORIZED, e.to_string()))?;
|
||||||
let mut cache = s.nonce_cache.lock().unwrap();
|
let mut cache = s.nonce_cache.lock().unwrap();
|
||||||
if !cache.check_and_insert(auth.nonce) {
|
if !cache.check_and_insert(auth.nonce, auth.timestamp_micros, now) {
|
||||||
return Err(err(StatusCode::UNAUTHORIZED, "replayed nonce"));
|
return Err(err(StatusCode::UNAUTHORIZED, "replayed nonce"));
|
||||||
}
|
}
|
||||||
Ok(AuthCheck { key: auth.key })
|
Ok(AuthCheck { key: auth.key })
|
||||||
|
|
@ -875,3 +929,95 @@ fn find_merge_record(
|
||||||
// Allow `verify_authority_chain` to use ObjectStore directly.
|
// Allow `verify_authority_chain` to use ObjectStore directly.
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
fn _vs(_: &dyn VerifySource) {}
|
fn _vs(_: &dyn VerifySource) {}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
fn micros_from_secs(s: i64) -> i64 {
|
||||||
|
s * 1_000_000
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Re-inserting the same nonce within the TTL window must be rejected.
|
||||||
|
/// This is the core anti-replay invariant; before the TTL rewrite the
|
||||||
|
/// cache also satisfied this property, so a green test here is the
|
||||||
|
/// floor, not the ceiling.
|
||||||
|
#[test]
|
||||||
|
fn nonce_replay_within_ttl_is_rejected() {
|
||||||
|
let mut cache = NonceCache::default();
|
||||||
|
let nonce = [0x42u8; 16];
|
||||||
|
let ts = micros_from_secs(1_700_000_000);
|
||||||
|
let now = ts + micros_from_secs(1);
|
||||||
|
assert!(cache.check_and_insert(nonce, ts, now));
|
||||||
|
// Same nonce, slightly later "now": still within TTL, must reject.
|
||||||
|
assert!(!cache.check_and_insert(nonce, ts, now + micros_from_secs(60)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Once a nonce ages past `NONCE_TTL_SECS` it must be evicted from
|
||||||
|
/// the cache; what bounds memory growth is precisely this release.
|
||||||
|
/// (`verify_request` will reject the timestamp for skew long before
|
||||||
|
/// the cache ever sees a stale request again, so re-accepting the
|
||||||
|
/// nonce bytes is safe.)
|
||||||
|
///
|
||||||
|
/// We drive `NONCE_EVICT_BATCH` distinct inserts at a fresh timestamp
|
||||||
|
/// to trigger one full eviction pass, then assert the original
|
||||||
|
/// (now-stale) entry has been swept.
|
||||||
|
#[test]
|
||||||
|
fn nonce_evicted_after_ttl_expires() {
|
||||||
|
let mut cache = NonceCache::default();
|
||||||
|
let stale = [0x42u8; 16];
|
||||||
|
let stale_ts = micros_from_secs(1_700_000_000);
|
||||||
|
assert!(cache.check_and_insert(stale, stale_ts, stale_ts));
|
||||||
|
// Fast-forward "now" past the TTL window and force an eviction
|
||||||
|
// sweep by inserting a batch of fresh nonces.
|
||||||
|
let later = stale_ts + micros_from_secs(NONCE_TTL_SECS + 1);
|
||||||
|
for i in 0..(NONCE_EVICT_BATCH as u32) {
|
||||||
|
let mut n = [0u8; 16];
|
||||||
|
n[..4].copy_from_slice(&i.to_le_bytes());
|
||||||
|
n[15] = 0xFF; // disambiguate from `stale`
|
||||||
|
assert!(cache.check_and_insert(n, later, later));
|
||||||
|
}
|
||||||
|
// The stale entry is gone; same-nonce-bytes with a fresh
|
||||||
|
// timestamp are allowed.
|
||||||
|
assert!(cache.check_and_insert(stale, later, later));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Regression test for the original CVE-shaped bug: the previous
|
||||||
|
/// implementation called `seen.clear()` once it grew past 100k
|
||||||
|
/// entries, which dropped every recently-seen nonce in one step and
|
||||||
|
/// allowed any captured request still within the 5-minute clock-skew
|
||||||
|
/// window to be replayed.
|
||||||
|
///
|
||||||
|
/// Here we (a) drive the cache through several eviction passes with
|
||||||
|
/// junk-but-fresh nonces, then (b) try to replay a still-fresh nonce
|
||||||
|
/// inserted at the start. With time-bounded eviction the replay
|
||||||
|
/// must be rejected, because the original entry's timestamp is still
|
||||||
|
/// inside the TTL window. With the old count-bounded `clear()`, this
|
||||||
|
/// test would erroneously succeed (the replay would be accepted).
|
||||||
|
/// The flood size is intentionally a small multiple of
|
||||||
|
/// `NONCE_EVICT_BATCH` — the property doesn't depend on the exact
|
||||||
|
/// count, just on triggering the eviction path.
|
||||||
|
#[test]
|
||||||
|
fn nonce_cache_does_not_drop_fresh_entries_under_load() {
|
||||||
|
let mut cache = NonceCache::default();
|
||||||
|
let base_ts = micros_from_secs(1_700_000_000);
|
||||||
|
let mut victim = [0u8; 16];
|
||||||
|
victim[..8].copy_from_slice(&u64::MAX.to_le_bytes());
|
||||||
|
// Insert the "captured" request first.
|
||||||
|
assert!(cache.check_and_insert(victim, base_ts, base_ts));
|
||||||
|
// Flood with NONCE_EVICT_BATCH * 4 fresh-but-distinct nonces, all
|
||||||
|
// dated within the same TTL window so eviction can't help us.
|
||||||
|
let flood: u32 = (NONCE_EVICT_BATCH as u32) * 4;
|
||||||
|
for i in 0..flood {
|
||||||
|
let mut n = [0u8; 16];
|
||||||
|
n[..4].copy_from_slice(&i.to_le_bytes());
|
||||||
|
let now = base_ts + (i as i64) * 1_000;
|
||||||
|
assert!(cache.check_and_insert(n, now, now));
|
||||||
|
}
|
||||||
|
let replay_now = base_ts + micros_from_secs(60);
|
||||||
|
assert!(
|
||||||
|
!cache.check_and_insert(victim, base_ts, replay_now),
|
||||||
|
"replay of fresh nonce must be rejected even when cache is large"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ use std::time::Duration;
|
||||||
|
|
||||||
use levcs_client::{Client, ClientError};
|
use levcs_client::{Client, ClientError};
|
||||||
use levcs_core::{ObjectId, ObjectStore, Refs, Repository};
|
use levcs_core::{ObjectId, ObjectStore, Refs, Repository};
|
||||||
use levcs_identity::verify::{verify_authority_chain, verify_commit, verify_release};
|
use levcs_identity::verify::ChainVerifier;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::{InstanceConfig, MirrorConfig};
|
use crate::{InstanceConfig, MirrorConfig};
|
||||||
|
|
@ -122,22 +122,25 @@ pub fn sync_mirror(
|
||||||
store.write_raw(&ent.bytes)?;
|
store.write_raw(&ent.bytes)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Share one chain-verification cache across every per-tip check below.
|
||||||
|
// Without this, each tip independently walks its authority chain back
|
||||||
|
// to genesis — repeating identical work for every tip that cites the
|
||||||
|
// same authority. With the cache the second tip onwards is O(1).
|
||||||
|
let mut verifier = ChainVerifier::new();
|
||||||
|
|
||||||
// Verify the authority chain on the announced current authority.
|
// Verify the authority chain on the announced current authority.
|
||||||
// Every commit / release we're about to advance to must chain back to
|
// This populates the cache with the entire chain so the per-tip
|
||||||
// a member that is rooted in `genesis_authority` — the chain walk
|
// checks below get cache hits.
|
||||||
// checks that, so doing it once here covers all the per-tip checks
|
|
||||||
// below. (verify_commit re-walks the chain internally; that is
|
|
||||||
// redundant but cheap and keeps the per-tip checks self-contained.)
|
|
||||||
if !info.current_authority.is_empty() {
|
if !info.current_authority.is_empty() {
|
||||||
let cur_auth = parse_hash(&info.current_authority)?;
|
let cur_auth = parse_hash(&info.current_authority)?;
|
||||||
verify_authority_chain(&store, cur_auth)?;
|
verifier.verify_chain(&store, cur_auth)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Per-branch verification — fully checks signature, author membership,
|
// Per-branch verification — fully checks signature, author membership,
|
||||||
// and authority chain. If verification fails on any tip we abort
|
// and authority chain. If verification fails on any tip we abort
|
||||||
// before touching local refs, so a bad source can never poison us.
|
// before touching local refs, so a bad source can never poison us.
|
||||||
for (name, id) in &want_branches {
|
for (name, id) in &want_branches {
|
||||||
verify_commit(&store, *id, Some(&format!("refs/branches/{name}")))?;
|
verifier.verify_commit(&store, *id, Some(&format!("refs/branches/{name}")))?;
|
||||||
}
|
}
|
||||||
// Per-release verification: check the signed object itself and its
|
// Per-release verification: check the signed object itself and its
|
||||||
// authority. The release object's full schema check (predecessor /
|
// authority. The release object's full schema check (predecessor /
|
||||||
|
|
@ -146,7 +149,7 @@ pub fn sync_mirror(
|
||||||
// the signing key is actually a member of the chain rooted in our
|
// the signing key is actually a member of the chain rooted in our
|
||||||
// local genesis.
|
// local genesis.
|
||||||
for (_, id) in &want_releases {
|
for (_, id) in &want_releases {
|
||||||
verify_release(&store, *id)?;
|
verifier.verify_release(&store, *id)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// All checks passed — advance local refs. We do branches first, then
|
// All checks passed — advance local refs. We do branches first, then
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,19 @@ pub const COMPRESSION_THRESHOLD: usize = 256;
|
||||||
/// 3 is libzstd's default — a sensible balance of throughput and ratio.
|
/// 3 is libzstd's default — a sensible balance of throughput and ratio.
|
||||||
pub const COMPRESSION_LEVEL: i32 = 3;
|
pub const COMPRESSION_LEVEL: i32 = 3;
|
||||||
|
|
||||||
|
/// Default ceiling on a single object's *uncompressed* size when decoding
|
||||||
|
/// a pack. The recorded `size` field is read straight off the wire and is
|
||||||
|
/// otherwise used as the destination capacity for decompression — a
|
||||||
|
/// hostile peer can declare `size = 1 TiB` against a tiny zstd frame and
|
||||||
|
/// trigger a multi-gigabyte allocation before any data has been
|
||||||
|
/// validated. Capping `size` at decode time short-circuits that.
|
||||||
|
///
|
||||||
|
/// 256 MiB is generous for normal repository content (source files, even
|
||||||
|
/// large binaries) while remaining well below practical RAM limits on a
|
||||||
|
/// modest VPS. Callers that genuinely need to move larger blobs should
|
||||||
|
/// use `Pack::decode_prefix_with_limit` and pick their own ceiling.
|
||||||
|
pub const DEFAULT_MAX_OBJECT_BYTES: usize = 256 * 1024 * 1024;
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum PackError {
|
pub enum PackError {
|
||||||
#[error("malformed pack: {0}")]
|
#[error("malformed pack: {0}")]
|
||||||
|
|
@ -171,11 +184,22 @@ impl Pack {
|
||||||
Ok(pack)
|
Ok(pack)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decode a pack from the start of `bytes`. Returns the pack and the
|
/// Decode a pack from the start of `bytes` using the default per-object
|
||||||
|
/// size ceiling (`DEFAULT_MAX_OBJECT_BYTES`). Returns the pack and the
|
||||||
/// number of bytes consumed; trailing bytes are not an error. This is
|
/// number of bytes consumed; trailing bytes are not an error. This is
|
||||||
/// used by the push wire format, which appends a manifest after the
|
/// used by the push wire format, which appends a manifest after the
|
||||||
/// pack.
|
/// pack.
|
||||||
pub fn decode_prefix(bytes: &[u8]) -> Result<(Self, usize), PackError> {
|
pub fn decode_prefix(bytes: &[u8]) -> Result<(Self, usize), PackError> {
|
||||||
|
Self::decode_prefix_with_limit(bytes, DEFAULT_MAX_OBJECT_BYTES)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Like `decode_prefix`, but lets the caller pick the per-object size
|
||||||
|
/// ceiling. Any entry whose recorded `size` exceeds `max_object_bytes`
|
||||||
|
/// is rejected before any allocation or decompression takes place.
|
||||||
|
pub fn decode_prefix_with_limit(
|
||||||
|
bytes: &[u8],
|
||||||
|
max_object_bytes: usize,
|
||||||
|
) -> Result<(Self, usize), PackError> {
|
||||||
if bytes.len() < 16 {
|
if bytes.len() < 16 {
|
||||||
return Err(PackError::Malformed("header truncated".into()));
|
return Err(PackError::Malformed("header truncated".into()));
|
||||||
}
|
}
|
||||||
|
|
@ -206,8 +230,20 @@ impl Pack {
|
||||||
}
|
}
|
||||||
let object_type = bytes[p];
|
let object_type = bytes[p];
|
||||||
p += 1;
|
p += 1;
|
||||||
let size = LittleEndian::read_u64(&bytes[p..p + 8]) as usize;
|
let size_u64 = LittleEndian::read_u64(&bytes[p..p + 8]);
|
||||||
p += 8;
|
p += 8;
|
||||||
|
// Reject implausibly-large `size` declarations *before* we
|
||||||
|
// touch the data. zstd's `decompress(_, size)` allocates the
|
||||||
|
// declared size up front, so leaving this unbounded is a
|
||||||
|
// memory-exhaustion vector. We also reject `size > usize::MAX`
|
||||||
|
// explicitly on 32-bit targets where the cast below would
|
||||||
|
// truncate.
|
||||||
|
if size_u64 > max_object_bytes as u64 {
|
||||||
|
return Err(PackError::Malformed(format!(
|
||||||
|
"entry size {size_u64} exceeds limit {max_object_bytes}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
let size = size_u64 as usize;
|
||||||
let flags = bytes[p];
|
let flags = bytes[p];
|
||||||
p += 1;
|
p += 1;
|
||||||
let unknown = flags & !(FLAG_ZSTD | FLAG_DELTA);
|
let unknown = flags & !(FLAG_ZSTD | FLAG_DELTA);
|
||||||
|
|
@ -555,6 +591,50 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pack_rejects_oversized_object_declaration() {
|
||||||
|
// Hand-craft a pack whose single entry declares a uncompressed
|
||||||
|
// size of 1 TiB. The decoder must reject this before allocating
|
||||||
|
// anything, regardless of how much data actually follows on the
|
||||||
|
// wire — a hostile peer can pair this with a tiny zstd frame to
|
||||||
|
// trigger a multi-gigabyte allocation in `zstd::bulk::decompress`.
|
||||||
|
let mut bytes = Vec::new();
|
||||||
|
bytes.extend_from_slice(&PACK_MAGIC);
|
||||||
|
bytes.extend_from_slice(&PACK_VERSION.to_le_bytes());
|
||||||
|
bytes.extend_from_slice(&1u64.to_le_bytes());
|
||||||
|
bytes.push(1); // type
|
||||||
|
bytes.extend_from_slice(&(1u64 << 40).to_le_bytes()); // 1 TiB
|
||||||
|
bytes.push(0); // flags: raw
|
||||||
|
// (no body needed — the size check should fire before we look)
|
||||||
|
let err = Pack::decode(&bytes).unwrap_err();
|
||||||
|
match err {
|
||||||
|
PackError::Malformed(s) => assert!(
|
||||||
|
s.contains("exceeds limit"),
|
||||||
|
"error must mention size limit: {s}"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pack_decode_with_limit_admits_objects_under_caller_ceiling() {
|
||||||
|
// The custom-limit decoder should accept any entry up to its
|
||||||
|
// configured ceiling, even when smaller than the default. Build
|
||||||
|
// a 1 KiB raw entry, then decode with a 4 KiB limit.
|
||||||
|
let payload = vec![0xABu8; 1024];
|
||||||
|
let mut pk = Pack::new();
|
||||||
|
pk.push(1, payload.clone());
|
||||||
|
let encoded = pk.encode();
|
||||||
|
let (pk2, _) = Pack::decode_prefix_with_limit(&encoded, 4096).unwrap();
|
||||||
|
assert_eq!(pk2.entries.len(), 1);
|
||||||
|
assert_eq!(pk2.entries[0].bytes, payload);
|
||||||
|
|
||||||
|
// Same encoded pack, decoded with a 512-byte limit, must reject.
|
||||||
|
let err = Pack::decode_prefix_with_limit(&encoded, 512).unwrap_err();
|
||||||
|
match err {
|
||||||
|
PackError::Malformed(s) => assert!(s.contains("exceeds limit")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn pack_rejects_delta_with_truncated_base_hash() {
|
fn pack_rejects_delta_with_truncated_base_hash() {
|
||||||
let mut bytes = Vec::new();
|
let mut bytes = Vec::new();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue