#!/usr/bin/env python3 """Validate the idempotency of every external patch in local/patches/. Per AGENTS.md "NO OVERLAY-STYLE PATCHES — AMENDED 2026" Rule 2, big external projects use the cookbook's `cookbook_apply_patches` helper which checks `git apply --reverse --check` to skip already-applied patches. If a patch's reverse check fails (because the upstream source drifted from the patch's expected state), the helper tries to JSON SCHEMA (with --json): Top-level: patches: [PatchEntry, ...] one per patch in local/patches/ total: int len(patches) errors: int count of all_errors across all entries skipped: int count of entries that were --no-fetch Per-entry: component: str e.g. "mesa", "libdrm" patch: str filename, e.g. "01-foo.patch" status: "ok" | "fail" | "skipped" errors: [str, ...] empty unless status == "fail" Exit code: 0 if errors == 0, else 1. With --no-fetch, all entries are "skipped" and the exit code is still 0, so the make lint-patches target chains should treat skipped_count == total as a soft failure. apply the patch forward, which fails too because some hunks no longer apply. The result is a confusing cook failure. This script catches that class of bug at lint time. For every [0-9]*.patch under local/patches//, it: 1. Clones the upstream repo at the pinned rev into a temp dir 2. Applies the patch 3. Verifies `git apply --reverse --check` succeeds on the result (i.e. the patch is fully reversible — idempotency invariant) 4. Re-applies the patch 5. Verifies the source is byte-identical to step 2's result (i.e. the patch is idempotent — applying it twice = applying it once) 6. Verifies the result is reproducible: re-clone, re-apply, byte-equal If any check fails, the script exits non-zero and prints which patches are non-idempotent. CI or `make lint` should run this on every PR. Usage: ./local/scripts/audit-patch-idempotency.py [--component ] [--verbose] """ import argparse import re import shutil import subprocess import sys import tempfile import tomllib from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parents[2] PATCHES_ROOT = PROJECT_ROOT / "local" / "patches" SOURCE_ROOT = PROJECT_ROOT / "local" / "sources" RECIPES_ROOT = PROJECT_ROOT / "local" / "recipes" MAINLINE_RECIPES = PROJECT_ROOT / "recipes" PATCH_NAME_RE = re.compile(r"^\d+-[A-Za-z0-9_.-]+\.patch$") NUM_PREFIX_RE = re.compile(r"^(\d+)-") def run(cmd, **kwargs): """Run a subprocess, returning (returncode, stdout, stderr).""" proc = subprocess.run( cmd, capture_output=True, text=True, check=False, **kwargs, ) return proc.returncode, proc.stdout, proc.stderr def collect_patches(component_filter=None): """Yield (component, patch_path) for every external patch.""" if not PATCHES_ROOT.is_dir(): return for component_dir in sorted(PATCHES_ROOT.iterdir()): if not component_dir.is_dir(): continue if component_filter and component_dir.name != component_filter: continue for patch_path in sorted(component_dir.iterdir()): if patch_path.is_file() and PATCH_NAME_RE.match(patch_path.name): yield component_dir.name, patch_path def resolve_upstream(component) -> "tuple[str | None, str | None] | tuple[str, str | None, Path]": """Return (url, rev) for a component by reading its mainline recipe. The component is matched by the recipe.toml's parent directory name (e.g. recipes/libs/mesa/recipe.toml matches component="mesa"), not the category. This means multiple categories with the same package name (e.g. recipes/wip/demos/mesa-demos) won't accidentally match. """ candidates: list[tuple[str, str, Path]] = [] for recipes_root in (RECIPES_ROOT, MAINLINE_RECIPES): if not recipes_root.is_dir(): continue for recipe_toml in recipes_root.rglob("recipe.toml"): if "source" in recipe_toml.parts or "target" in recipe_toml.parts: continue if recipe_toml.parent.name != component: continue try: with open(recipe_toml, "rb") as f: data = tomllib.load(f) except (OSError, tomllib.TOMLDecodeError): continue source = data.get("source") or {} if "git" in source: # Either pinned rev or branch tip — both are valid # upstream reference points for a patch's "from" state. if "rev" in source: rev = str(source["rev"]) elif "branch" in source: # Branch resolution requires a network call to # the upstream's `git ls-remote`. Patches that # track a branch should ideally pin a rev for # reproducibility; warn but proceed. rev = f"refs/heads/{source['branch']}" else: continue candidates.append((source["git"], rev, recipe_toml)) elif "tar" in source: return ("tar", source.get("tar"), recipe_toml) if not candidates: return None, None if len(candidates) > 1: candidates.sort(key=lambda c: "local" in str(c[2])) url, rev, _ = candidates[0] return url, rev def clone_source(url, rev, target): """Clone the upstream repo at the pinned rev into target/.""" if target.exists(): shutil.rmtree(target) target.mkdir(parents=True) rc, out, err = run( ["git", "clone", "--quiet", "--no-checkout", url, str(target)], ) if rc != 0: return False, f"clone failed: {err.strip()}" rc, out, err = run( ["git", "-C", str(target), "checkout", "--quiet", rev], ) if rc != 0: return False, f"checkout {rev} failed: {err.strip()}" return True, None def apply_patch(source_dir, patch_path): """Apply patch in source_dir. Return (ok, error_msg).""" rc, out, err = run( ["git", "-C", str(source_dir), "apply", "--whitespace=nowarn", str(patch_path)], ) if rc != 0: return False, (err or out).strip() return True, None def check_reverse(source_dir, patch_path): """git apply --reverse --check. Returns (ok, error_msg).""" rc, out, err = run( ["git", "-C", str(source_dir), "apply", "--reverse", "--check", str(patch_path)], ) if rc != 0: return False, (err or out).strip() return True, None def diff_trees(a, b): """Return a unified diff between two source dirs, excluding .git/. The .git/ directory has timestamps and refs that always differ between clones, so we exclude it. The actual source tree is the signal we care about. """ proc = subprocess.run( ["diff", "-ruN", "--exclude=.git", "--exclude=*.pyc", "--exclude=__pycache__", str(a), str(b)], capture_output=True, text=True, check=False, ) return proc.stdout def audit_one(component, patch_path, verbose=False): """Audit a single patch. Return a list of error strings (empty = OK).""" errors: list[str] = [] upstream = resolve_upstream(component) if isinstance(upstream, tuple) and len(upstream) == 3 and upstream[0] == "tar": return [f"{component}/{patch_path.name}: tar-based source, " f"manual audit required"] if not upstream or upstream[0] is None: return [f"{component}/{patch_path.name}: no upstream recipe found " f"in local/recipes/ or recipes/"] url, rev = upstream[0], upstream[1] if url is None or rev is None: return [f"{component}/{patch_path.name}: could not resolve upstream " f"git URL or rev for component {component!r}"] url = str(url) rev = str(rev) # Phase 1: clone, apply, verify reverse + idempotency with tempfile.TemporaryDirectory(prefix="audit-patch-") as tmp: tmp_path = Path(tmp) work = tmp_path / "work" work2 = tmp_path / "work2" if verbose: print(f" cloning {url} @ {rev[:12]}...") ok, err = clone_source(url, rev, work) if not ok: return [f"{component}/{patch_path.name}: clone failed: {err}"] # Apply once ok, err = apply_patch(work, patch_path) if not err: patch_applied_ok = True else: patch_applied_ok = False errors.append(f"{component}/{patch_path.name}: apply failed: {err}") if patch_applied_ok: # Reverse check (idempotency invariant) ok, rev_err = check_reverse(work, patch_path) if not ok: err_msg = rev_err or "unknown error" errors.append( f"{component}/{patch_path.name}: --reverse --check FAILED — " f"patch is not idempotent. Cookbook's cookbook_apply_patches " f"will fail on a re-cook. Underlying error: {err_msg[:500]}" ) # Idempotency: apply twice = apply once ok, err = apply_patch(work, patch_path) if not err: # The patch is now applied twice (or rather, applied when # already applied, which might fail). The cookbook's # --reverse --check is meant to skip this case. If the # second apply succeeded, the patch is non-idempotent # (applying twice is meaningful). If it failed, check # that the second failure is the expected "already # applied" error. errors.append( f"{component}/{patch_path.name}: second apply SUCCEEDED — " f"patch is not idempotent. Re-applying after a fresh " f"cook will apply it twice. Cookbook should skip via " f"--reverse --check; verify the helper still works." ) else: # Expected: second apply fails. Confirm the working tree # is byte-identical to the first apply. if verbose: print(f" re-cloning to verify reproducibility...") ok, err = clone_source(url, rev, work2) if not ok: errors.append( f"{component}/{patch_path.name}: re-clone failed: {err}" ) else: ok, err = apply_patch(work2, patch_path) if err: errors.append( f"{component}/{patch_path.name}: " f"reproducibility — second apply failed: {err}" ) else: diff_out = diff_trees(work, work2) if diff_out: errors.append( f"{component}/{patch_path.name}: non-reproducible — " f"second apply produces a different tree:\n" f"{diff_out[:1000]}" ) return errors def main(): parser = argparse.ArgumentParser( description=( "Validate the idempotency of every external patch in " "local/patches/." ) ) parser.add_argument( "--component", help="Audit only the given component (default: all)", ) parser.add_argument( "--verbose", "-v", action="store_true", help="Print progress as patches are checked", ) parser.add_argument( "--no-fetch", action="store_true", help="Skip fetching upstream (useful when network is unavailable)", ) parser.add_argument( "--json", action="store_true", help="Emit a machine-readable JSON summary on stdout " "(use for CI hooks or `make lint` integration).", ) args = parser.parse_args() patches = list(collect_patches(args.component)) if not patches: if args.json: import json print(json.dumps({"patches": [], "errors": 0, "skipped": 0})) else: print(f"No patches found{' for component ' + args.component if args.component else ''}.", file=sys.stderr) return 0 if not args.json: print(f"Auditing {len(patches)} patch(es)...") all_errors = [] skipped = 0 json_results = [] for component, patch_path in patches: entry = { "component": component, "patch": patch_path.name, "status": "ok", "errors": [], } if args.verbose and not args.json: print(f"[{component}/{patch_path.name}]") if args.no_fetch: entry["status"] = "skipped" if not args.json: print(f" {component}/{patch_path.name}: SKIPPED (--no-fetch)") skipped += 1 json_results.append(entry) continue errors = audit_one(component, patch_path, verbose=args.verbose and not args.json) if errors: entry["status"] = "fail" entry["errors"] = list(errors) for e in errors: if not args.json: print(f" FAIL: {e}") all_errors.extend(errors) elif args.verbose and not args.json: print(f" OK") json_results.append(entry) if args.json: import json print(json.dumps({ "patches": json_results, "total": len(patches), "errors": len(all_errors), "skipped": skipped, }, indent=2)) if skipped == len(patches): return 2 return 0 if not all_errors else 1 if all_errors: print() print(f"FAILED: {len(all_errors)} error(s) across {len(patches)} patch(es).") print() print("Common fixes:") print(" 1. Patch hunks reference content that no longer exists in") print(" the upstream source. Re-generate the patch from a fresh") print(" checkout: git diff > local/patches//NN-...patch") print(" 2. Patch is order-dependent with a sibling. The cookbook") print(" applies them in lexical order — make sure NN-prefix order") print(" matches the actual dependency order.") print(" 3. Patch has whitespace conflicts with the upstream source.") print(" Try regenerating with `git diff --ignore-all-space`.") return 1 if skipped == len(patches): print() print(f"All {len(patches)} patch(es) SKIPPED (--no-fetch). " "No audit was performed; the count of 0 errors is not a " "pass, just an absence of network-dependent checks.") return 2 print(f"All {len(patches)} patch(es) are idempotent and reproducible.") return 0 if __name__ == "__main__": sys.exit(main())