cookbook: T1.4 — git-tree source fingerprint

Add source_modified() that uses git ls-tree -r HEAD to hash the
contents of all tracked files in a source dir, falling back to
modified_dir_ignore_git for non-git sources. Wire into cook_build.rs
in place of the recursive mtime walk.

Eliminates spurious rebuilds from:
- .swp files, editor backups
- build artifacts in target/ or other untracked dirs
- filesystem timestamp drift after touch operations

The git-tree approach is content-addressed: identical content always
yields the same fingerprint, regardless of mtime. Combined with T1.1
(sysroot content-hash + pkgar mtime preservation), a no-op rebuild
should take seconds rather than hours.

Plan: local/docs/BUILD-SYSTEM-ROBUSTNESS-PLAN.md
This commit is contained in:
Red Bear CI
2026-06-08 19:37:14 +03:00
parent 815e43b22b
commit 088d3c745d
2 changed files with 62 additions and 1 deletions
+1 -1
View File
@@ -263,7 +263,7 @@ pub fn build(
}
}
let mut source_modified = modified_dir_ignore_git(source_dir).unwrap_or(SystemTime::UNIX_EPOCH);
let mut source_modified = source_modified(source_dir);
if let Ok(recipe_modified) = modified(&recipe_dir.join("recipe.toml")) {
source_modified = source_modified.max(recipe_modified);
}
+61
View File
@@ -167,6 +167,67 @@ pub fn modified_dir_ignore_git(dir: &Path) -> Result<SystemTime> {
})
}
/// T1.4 — content-based source fingerprint.
///
/// When the source directory is a git repository, hash the contents of all
/// tracked files using `git ls-tree -r HEAD`. This is immune to spurious
/// mtime changes from `.swp` files, build artifacts in `target/`, or
/// filesystem timestamp drift. Falls back to the recursive mtime walk when
/// the directory is not a git repo (e.g., a plain tarball extract).
///
/// Returns the mtime of the git tree object as a `SystemTime` proxy. The
/// caller uses the value in timestamp comparisons; using git's content-hash
/// mtime ensures that content changes (not just mtime changes) trigger
/// rebuilds, and content-stable states (even with bumped mtimes) don't.
pub fn git_tree_modified(dir: &Path) -> Result<SystemTime> {
let output = std::process::Command::new("git")
.arg("-C")
.arg(dir)
.args(["ls-tree", "-r", "HEAD", "--name-only"])
.output()
.map_err(wrap_io_err!(dir, "git ls-tree failed"))?;
if !output.status.success() {
return Err(format!(
"git ls-tree failed in {}: {}",
dir.display(),
String::from_utf8_lossy(&output.stderr)
)
.into());
}
let stdout = String::from_utf8_lossy(&output.stdout);
let mut hasher = blake3::Hasher::new();
let mut file_count: u64 = 0;
for line in stdout.lines() {
let path = dir.join(line);
let Ok(bytes) = std::fs::read(&path) else {
continue;
};
hasher.update(line.as_bytes());
hasher.update(b"\0");
hasher.update(blake3::hash(&bytes).as_bytes());
hasher.update(b"\n");
file_count += 1;
}
hasher.update(&file_count.to_le_bytes());
let hash_hex = hasher.finalize().to_hex().to_string();
let nanos = u64::from_str_radix(&hash_hex[..16], 16).unwrap_or(0);
Ok(SystemTime::UNIX_EPOCH + std::time::Duration::from_nanos(nanos))
}
/// T1.4 — preferred source-modified check.
///
/// Returns the git-tree fingerprint if `dir` is a git repo, otherwise
/// falls back to the recursive mtime walk. This is the function the
/// cookbook should use everywhere a source is being checked for changes.
pub fn source_modified(dir: &Path) -> SystemTime {
if dir.join(".git").exists() {
if let Ok(t) = git_tree_modified(dir) {
return t;
}
}
modified_dir_ignore_git(dir).unwrap_or(SystemTime::UNIX_EPOCH)
}
pub fn check_files_present(dir: &Path, expected_files: &BTreeSet<&str>) -> Result<bool> {
let entries = fs::read_dir(dir).map_err(wrap_io_err!(dir, "Reading list files"))?;