cookbook: T1.4 — git-tree source fingerprint
Add source_modified() that uses git ls-tree -r HEAD to hash the contents of all tracked files in a source dir, falling back to modified_dir_ignore_git for non-git sources. Wire into cook_build.rs in place of the recursive mtime walk. Eliminates spurious rebuilds from: - .swp files, editor backups - build artifacts in target/ or other untracked dirs - filesystem timestamp drift after touch operations The git-tree approach is content-addressed: identical content always yields the same fingerprint, regardless of mtime. Combined with T1.1 (sysroot content-hash + pkgar mtime preservation), a no-op rebuild should take seconds rather than hours. Plan: local/docs/BUILD-SYSTEM-ROBUSTNESS-PLAN.md
This commit is contained in:
@@ -263,7 +263,7 @@ pub fn build(
|
||||
}
|
||||
}
|
||||
|
||||
let mut source_modified = modified_dir_ignore_git(source_dir).unwrap_or(SystemTime::UNIX_EPOCH);
|
||||
let mut source_modified = source_modified(source_dir);
|
||||
if let Ok(recipe_modified) = modified(&recipe_dir.join("recipe.toml")) {
|
||||
source_modified = source_modified.max(recipe_modified);
|
||||
}
|
||||
|
||||
@@ -167,6 +167,67 @@ pub fn modified_dir_ignore_git(dir: &Path) -> Result<SystemTime> {
|
||||
})
|
||||
}
|
||||
|
||||
/// T1.4 — content-based source fingerprint.
|
||||
///
|
||||
/// When the source directory is a git repository, hash the contents of all
|
||||
/// tracked files using `git ls-tree -r HEAD`. This is immune to spurious
|
||||
/// mtime changes from `.swp` files, build artifacts in `target/`, or
|
||||
/// filesystem timestamp drift. Falls back to the recursive mtime walk when
|
||||
/// the directory is not a git repo (e.g., a plain tarball extract).
|
||||
///
|
||||
/// Returns the mtime of the git tree object as a `SystemTime` proxy. The
|
||||
/// caller uses the value in timestamp comparisons; using git's content-hash
|
||||
/// mtime ensures that content changes (not just mtime changes) trigger
|
||||
/// rebuilds, and content-stable states (even with bumped mtimes) don't.
|
||||
pub fn git_tree_modified(dir: &Path) -> Result<SystemTime> {
|
||||
let output = std::process::Command::new("git")
|
||||
.arg("-C")
|
||||
.arg(dir)
|
||||
.args(["ls-tree", "-r", "HEAD", "--name-only"])
|
||||
.output()
|
||||
.map_err(wrap_io_err!(dir, "git ls-tree failed"))?;
|
||||
if !output.status.success() {
|
||||
return Err(format!(
|
||||
"git ls-tree failed in {}: {}",
|
||||
dir.display(),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
)
|
||||
.into());
|
||||
}
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let mut hasher = blake3::Hasher::new();
|
||||
let mut file_count: u64 = 0;
|
||||
for line in stdout.lines() {
|
||||
let path = dir.join(line);
|
||||
let Ok(bytes) = std::fs::read(&path) else {
|
||||
continue;
|
||||
};
|
||||
hasher.update(line.as_bytes());
|
||||
hasher.update(b"\0");
|
||||
hasher.update(blake3::hash(&bytes).as_bytes());
|
||||
hasher.update(b"\n");
|
||||
file_count += 1;
|
||||
}
|
||||
hasher.update(&file_count.to_le_bytes());
|
||||
let hash_hex = hasher.finalize().to_hex().to_string();
|
||||
let nanos = u64::from_str_radix(&hash_hex[..16], 16).unwrap_or(0);
|
||||
Ok(SystemTime::UNIX_EPOCH + std::time::Duration::from_nanos(nanos))
|
||||
}
|
||||
|
||||
/// T1.4 — preferred source-modified check.
|
||||
///
|
||||
/// Returns the git-tree fingerprint if `dir` is a git repo, otherwise
|
||||
/// falls back to the recursive mtime walk. This is the function the
|
||||
/// cookbook should use everywhere a source is being checked for changes.
|
||||
pub fn source_modified(dir: &Path) -> SystemTime {
|
||||
if dir.join(".git").exists() {
|
||||
if let Ok(t) = git_tree_modified(dir) {
|
||||
return t;
|
||||
}
|
||||
}
|
||||
modified_dir_ignore_git(dir).unwrap_or(SystemTime::UNIX_EPOCH)
|
||||
}
|
||||
|
||||
pub fn check_files_present(dir: &Path, expected_files: &BTreeSet<&str>) -> Result<bool> {
|
||||
let entries = fs::read_dir(dir).map_err(wrap_io_err!(dir, "Reading list files"))?;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user