diff --git a/src/cook/cook_build.rs b/src/cook/cook_build.rs index d7fd8f18a6..faf055aa0a 100644 --- a/src/cook/cook_build.rs +++ b/src/cook/cook_build.rs @@ -263,7 +263,7 @@ pub fn build( } } - let mut source_modified = modified_dir_ignore_git(source_dir).unwrap_or(SystemTime::UNIX_EPOCH); + let mut source_modified = source_modified(source_dir); if let Ok(recipe_modified) = modified(&recipe_dir.join("recipe.toml")) { source_modified = source_modified.max(recipe_modified); } diff --git a/src/cook/fs.rs b/src/cook/fs.rs index f53b623c27..493348dda6 100644 --- a/src/cook/fs.rs +++ b/src/cook/fs.rs @@ -167,6 +167,67 @@ pub fn modified_dir_ignore_git(dir: &Path) -> Result { }) } +/// T1.4 — content-based source fingerprint. +/// +/// When the source directory is a git repository, hash the contents of all +/// tracked files using `git ls-tree -r HEAD`. This is immune to spurious +/// mtime changes from `.swp` files, build artifacts in `target/`, or +/// filesystem timestamp drift. Falls back to the recursive mtime walk when +/// the directory is not a git repo (e.g., a plain tarball extract). +/// +/// Returns the mtime of the git tree object as a `SystemTime` proxy. The +/// caller uses the value in timestamp comparisons; using git's content-hash +/// mtime ensures that content changes (not just mtime changes) trigger +/// rebuilds, and content-stable states (even with bumped mtimes) don't. +pub fn git_tree_modified(dir: &Path) -> Result { + let output = std::process::Command::new("git") + .arg("-C") + .arg(dir) + .args(["ls-tree", "-r", "HEAD", "--name-only"]) + .output() + .map_err(wrap_io_err!(dir, "git ls-tree failed"))?; + if !output.status.success() { + return Err(format!( + "git ls-tree failed in {}: {}", + dir.display(), + String::from_utf8_lossy(&output.stderr) + ) + .into()); + } + let stdout = String::from_utf8_lossy(&output.stdout); + let mut hasher = blake3::Hasher::new(); + let mut file_count: u64 = 0; + for line in stdout.lines() { + let path = dir.join(line); + let Ok(bytes) = std::fs::read(&path) else { + continue; + }; + hasher.update(line.as_bytes()); + hasher.update(b"\0"); + hasher.update(blake3::hash(&bytes).as_bytes()); + hasher.update(b"\n"); + file_count += 1; + } + hasher.update(&file_count.to_le_bytes()); + let hash_hex = hasher.finalize().to_hex().to_string(); + let nanos = u64::from_str_radix(&hash_hex[..16], 16).unwrap_or(0); + Ok(SystemTime::UNIX_EPOCH + std::time::Duration::from_nanos(nanos)) +} + +/// T1.4 — preferred source-modified check. +/// +/// Returns the git-tree fingerprint if `dir` is a git repo, otherwise +/// falls back to the recursive mtime walk. This is the function the +/// cookbook should use everywhere a source is being checked for changes. +pub fn source_modified(dir: &Path) -> SystemTime { + if dir.join(".git").exists() { + if let Ok(t) = git_tree_modified(dir) { + return t; + } + } + modified_dir_ignore_git(dir).unwrap_or(SystemTime::UNIX_EPOCH) +} + pub fn check_files_present(dir: &Path, expected_files: &BTreeSet<&str>) -> Result { let entries = fs::read_dir(dir).map_err(wrap_io_err!(dir, "Reading list files"))?;