LeVCS/crates/levcs-merge/tests/conformance.rs

270 lines
10 KiB
Rust

//! Conformance corpus runner (§8.2).
//!
//! Walks `tests/corpus/` and runs every scenario through the default
//! cascade engine. Each scenario lives in its own directory:
//!
//! ```text
//! tests/corpus/<NNN-scenario-name>/
//! manifest.toml # what to assert about the merge result
//! base.<ext> # base / common ancestor input
//! ours.<ext> # left side input
//! theirs.<ext> # right side input
//! expected.<ext> # (optional) exact expected merged output
//! ```
//!
//! The corpus is the canonical conformance fixture. Any independent
//! implementation of LeVCS that produces the same outcome on every
//! scenario here is conformant per §8.2; in particular every scenario
//! tagged `git_false_conflict = true` is a case where naive Git
//! produces a spurious conflict and LeVCS MUST resolve correctly.
//!
//! The runner deliberately does no expensive setup — it is one
//! integration test that iterates the corpus, so adding a scenario
//! does not change the binary count or build graph.
use std::path::{Path, PathBuf};
use levcs_merge::engine::CascadeEngine;
use levcs_merge::handler::MergeStatus;
use serde::Deserialize;
#[derive(Debug, Deserialize)]
struct Manifest {
/// Human-readable summary of what this scenario tests. Required.
description: String,
/// Logical path used for handler routing — only the extension and
/// basename matter (the engine doesn't read from disk for this).
path: String,
/// File extension used to find base/ours/theirs files. Defaults to
/// the extension of `path`.
#[serde(default)]
input_ext: Option<String>,
/// True iff naive Git would produce a false conflict on this
/// scenario. Only used to compute corpus statistics; not asserted.
#[serde(default)]
git_false_conflict: bool,
expected: Expected,
}
#[derive(Debug, Deserialize)]
struct Expected {
/// "merged" or "conflict". (NotApplicable is never a final cascade
/// outcome — the engine always falls through to textual.)
status: String,
/// Required handler name (e.g., "json", "tree-sitter:rust",
/// "textual"). Asserts the cascade routed correctly.
handler: String,
/// Substrings every one of which must appear in the merged output.
/// Useful when ordering is implementation-defined (object key
/// ordering, etc.) but specific content must be present.
#[serde(default)]
contains: Vec<String>,
/// Substrings none of which may appear. Used to check that
/// drop-and-modify resolutions actually drop, or that conflict
/// markers are absent on a merged outcome.
#[serde(default)]
not_contains: Vec<String>,
/// If present, the merged output must equal this file's bytes
/// exactly. (`expected.<ext>` next to base/ours/theirs.)
#[serde(default)]
content_file: Option<String>,
/// Conflict-only: the number of conflict regions. None means "any
/// nonzero count".
#[serde(default)]
conflict_count: Option<usize>,
/// Conflict-only: each region's description must contain at least
/// one of these substrings. Used to assert that the cascade landed
/// on the expected diagnosis (e.g., "modify-vs-delete", "concurrent
/// edits").
#[serde(default)]
region_descriptions_contain: Vec<String>,
/// Merged-only: any of these notes' messages must contain at least
/// one of these substrings. Used to assert that, e.g., recursive
/// descent fired.
#[serde(default)]
notes_contain: Vec<String>,
}
fn corpus_root() -> PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/corpus")
}
fn collect_scenarios() -> Vec<PathBuf> {
let root = corpus_root();
assert!(
root.is_dir(),
"corpus directory missing: {}",
root.display()
);
let mut out = Vec::new();
for ent in std::fs::read_dir(&root).expect("read corpus dir").flatten() {
let p = ent.path();
if !p.is_dir() {
continue;
}
if p.join("manifest.toml").is_file() {
out.push(p);
}
}
out.sort();
out
}
fn run_scenario(dir: &Path) -> Result<(), String> {
let manifest_text = std::fs::read_to_string(dir.join("manifest.toml"))
.map_err(|e| format!("read manifest: {e}"))?;
let manifest: Manifest =
toml::from_str(&manifest_text).map_err(|e| format!("parse manifest: {e}"))?;
let ext = manifest
.input_ext
.clone()
.or_else(|| {
Path::new(&manifest.path)
.extension()
.and_then(|e| e.to_str().map(String::from))
})
.ok_or_else(|| "manifest.path has no extension and input_ext is unset".to_string())?;
let base = std::fs::read(dir.join(format!("base.{ext}")))
.map_err(|e| format!("read base.{ext}: {e}"))?;
let ours = std::fs::read(dir.join(format!("ours.{ext}")))
.map_err(|e| format!("read ours.{ext}: {e}"))?;
let theirs = std::fs::read(dir.join(format!("theirs.{ext}")))
.map_err(|e| format!("read theirs.{ext}: {e}"))?;
let engine = CascadeEngine::default();
let result = engine.merge_file(Path::new(&manifest.path), &base, &ours, &theirs);
if result.handler != manifest.expected.handler {
return Err(format!(
"[{}] expected handler {:?}, got {:?}",
manifest.description, manifest.expected.handler, result.handler
));
}
match (manifest.expected.status.as_str(), &result.status) {
("merged", MergeStatus::Merged { content, notes }) => {
let s = String::from_utf8_lossy(content);
for needle in &manifest.expected.contains {
if !s.contains(needle) {
return Err(format!(
"[{}] merged content missing substring {:?}\n--- output ---\n{s}\n",
manifest.description, needle
));
}
}
for needle in &manifest.expected.not_contains {
if s.contains(needle) {
return Err(format!(
"[{}] merged content unexpectedly contains {:?}\n--- output ---\n{s}\n",
manifest.description, needle
));
}
}
if let Some(content_file) = &manifest.expected.content_file {
let want = std::fs::read(dir.join(content_file))
.map_err(|e| format!("read {content_file}: {e}"))?;
if want != *content {
return Err(format!(
"[{}] merged content does not match {content_file}\n--- want ---\n{}\n--- got ---\n{s}\n",
manifest.description,
String::from_utf8_lossy(&want)
));
}
}
for needle in &manifest.expected.notes_contain {
if !notes.iter().any(|n| n.message.contains(needle)) {
let messages: Vec<&str> = notes.iter().map(|n| n.message.as_str()).collect();
return Err(format!(
"[{}] expected a note containing {:?}, saw {:?}",
manifest.description, needle, messages
));
}
}
// Sanity check: a merged outcome must not carry conflict markers.
// A handler that wrote markers but reported Merged would silently
// smuggle conflicts past CI.
if s.contains("<<<<<<< ours") || s.contains("=======") || s.contains(">>>>>>> theirs") {
return Err(format!(
"[{}] merged outcome contains conflict markers — handler {:?} is buggy\n--- output ---\n{s}\n",
manifest.description, result.handler
));
}
Ok(())
}
("conflict", MergeStatus::Conflict { regions, .. }) => {
if let Some(want) = manifest.expected.conflict_count {
if regions.len() != want {
return Err(format!(
"[{}] expected {} conflict region(s), got {}",
manifest.description,
want,
regions.len()
));
}
} else if regions.is_empty() {
return Err(format!(
"[{}] expected at least one conflict region, got zero",
manifest.description
));
}
for needle in &manifest.expected.region_descriptions_contain {
if !regions.iter().any(|r| r.description.contains(needle)) {
let descs: Vec<&str> = regions.iter().map(|r| r.description.as_str()).collect();
return Err(format!(
"[{}] expected a region with description containing {:?}, saw {:?}",
manifest.description, needle, descs
));
}
}
Ok(())
}
(want, got) => Err(format!(
"[{}] expected status {want:?}, got {got:?}",
manifest.description
)),
}
}
#[test]
fn corpus_is_non_empty() {
let scenarios = collect_scenarios();
assert!(
scenarios.len() >= 10,
"conformance corpus is too thin: {} scenario(s) found at {}",
scenarios.len(),
corpus_root().display()
);
}
#[test]
fn corpus_runs_clean() {
let scenarios = collect_scenarios();
let mut failures: Vec<String> = Vec::new();
let mut git_false_conflict_count = 0usize;
for dir in &scenarios {
// Re-read the manifest so we can count git-false-conflict
// scenarios separately for the summary line below.
if let Ok(t) = std::fs::read_to_string(dir.join("manifest.toml")) {
if let Ok(m) = toml::from_str::<Manifest>(&t) {
if m.git_false_conflict {
git_false_conflict_count += 1;
}
}
}
if let Err(e) = run_scenario(dir) {
failures.push(format!("\n in {}:\n {e}", dir.display()));
}
}
eprintln!(
"conformance: {} scenario(s) total, {} flagged as git-false-conflict",
scenarios.len(),
git_false_conflict_count
);
assert!(
failures.is_empty(),
"conformance failures:{}",
failures.join("")
);
}