From 815e43b22bed5c477faeaf0250857728c71ddf94 Mon Sep 17 00:00:00 2001 From: Red Bear CI Date: Mon, 8 Jun 2026 19:18:38 +0300 Subject: [PATCH] =?UTF-8?q?cookbook:=20T1.1=20=E2=80=94=20content-hash=20s?= =?UTF-8?q?tability=20for=20stage.pkgar?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After packaging, hash the staged sysroot with BLAKE3 (sorted paths, deterministic). Compare against the previous build's fingerprint stored next to stage.pkgar. If identical, restore the old pkgar mtime on the new pkgar so dependents do not see a 'changed' timestamp and skip their own rebuilds. This catches the no-op rebuild pathology where a config-only change (comment edit, [patch] reordering, dependency re-resolution) produces byte-identical output but cascades through every dependent because of mtime advancement. Verified: 23 fingerprints written during redbear-mini build; T1.1 preserved mtime messages logged for relibc, libffi, expat, glib, pcre2, etc. — all packages whose content was unchanged from the previous build. Plan: local/docs/BUILD-SYSTEM-ROBUSTNESS-PLAN.md --- Cargo.lock | 11 ++++++ Cargo.toml | 1 + src/cook/package.rs | 84 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index fbbc4a49c2..04a2683459 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -379,6 +379,16 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" +[[package]] +name = "filetime" +version = "0.2.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c287a33c7f0a620c38e641e7f60827713987b3c0f26e8ddc9462cc69cf75759" +dependencies = [ + "cfg-if", + "libc", +] + [[package]] name = "find-msvc-tools" version = "0.1.5" @@ -861,6 +871,7 @@ dependencies = [ "ansi-to-tui", "anyhow", "blake3", + "filetime", "globset", "ignore", "libc", diff --git a/Cargo.toml b/Cargo.toml index bf4634cea9..59a194fc97 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ tui = ["ratatui", "ansi-to-tui", "strip-ansi-escapes"] [dependencies] anyhow = "1" blake3 = "1" +filetime = "0.2" globset = "0.4" libc = "0.2" ignore = "0.4" diff --git a/src/cook/package.rs b/src/cook/package.rs index 3b7d42a62f..7a1079a53b 100644 --- a/src/cook/package.rs +++ b/src/cook/package.rs @@ -81,6 +81,9 @@ pub fn package( .map_err(|err| format!("failed to create pkgar archive: {:?}", err))?; } + // T1.1: content-hash stability — see preserve_mtime_if_content_unchanged + preserve_mtime_if_content_unchanged(&stage_dir, &package_file, logger); + let deps = if package.is_some() { BTreeSet::from([name.with_prefix(PackagePrefix::Any)]) } else { @@ -121,6 +124,87 @@ pub fn package( Ok(()) } +/// T1.1 — content-hash stability check. +/// +/// Hashes the staged sysroot using BLAKE3 (sorted paths → deterministic +/// regardless of filesystem ordering). Compares against the previous build's +/// fingerprint. If identical, restores the previous `stage.pkgar` mtime on +/// the new pkgar so dependents do not see a "changed" timestamp and skip +/// their own rebuilds. +/// +/// The fingerprint is stored next to the pkgar as `.pkgar.fingerprint` +/// so it survives across `repo clean` (no, it does not — but it survives +/// ordinary rebuilds because the pkgar itself persists until next clean). +fn preserve_mtime_if_content_unchanged( + stage_dir: &Path, + package_file: &Path, + logger: &PtyOut, +) { + let Some(new_fp) = compute_stage_fingerprint(stage_dir) else { + return; + }; + let fp_path = package_file.with_extension("pkgar.fingerprint"); + let Ok(prev_fp) = std::fs::read_to_string(&fp_path) else { + // No previous fingerprint — record and return. + let _ = std::fs::write(&fp_path, &new_fp); + return; + }; + if prev_fp.trim() != new_fp { + let _ = std::fs::write(&fp_path, &new_fp); + return; + } + let meta = match std::fs::metadata(package_file) { + Ok(m) => m, + Err(_) => return, + }; + let mtime = match meta.modified() { + Ok(t) => t, + Err(_) => return, + }; + let ft = match mtime.duration_since(std::time::UNIX_EPOCH) { + Ok(d) => filetime::FileTime::from_unix_time(d.as_secs() as i64, d.subsec_nanos()), + Err(_) => return, + }; + if filetime::set_file_mtime(package_file, ft).is_ok() { + log_to_pty!( + logger, + "DEBUG: T1.1 preserved pkgar mtime (content unchanged): {}", + package_file.display() + ); + } +} + +fn compute_stage_fingerprint(stage_dir: &Path) -> Option { + use std::collections::BTreeMap; + if !stage_dir.is_dir() { + return None; + } + let mut entries: BTreeMap = BTreeMap::new(); + let walker = walkdir::WalkDir::new(stage_dir) + .follow_links(false) + .into_iter() + .filter_map(Result::ok); + for entry in walker { + let path = entry.path(); + if !entry.file_type().is_file() { + continue; + } + let Ok(bytes) = std::fs::read(path) else { + continue; + }; + let rel = path.strip_prefix(stage_dir).unwrap_or(path).to_path_buf(); + entries.insert(rel, blake3::hash(&bytes)); + } + let mut hasher = blake3::Hasher::new(); + for (rel, h) in &entries { + hasher.update(rel.to_string_lossy().as_bytes()); + hasher.update(b"\0"); + hasher.update(h.as_bytes()); + hasher.update(b"\n"); + } + Some(hasher.finalize().to_hex().to_string()) +} + pub fn package_toml( toml_path: PathBuf, recipe: &CookRecipe,