From 44d434e36905ef81505ff4420b5f4a7f1858688c Mon Sep 17 00:00:00 2001 From: vasilito Date: Mon, 29 Jun 2026 19:34:35 +0300 Subject: [PATCH] build: detect working-tree dirtiness in stale-source check The stale-build check in build-redbear.sh compared HEAD commit hashes against a stored fingerprint, which silently ignored uncommitted changes in local/sources/{relibc,kernel,base,bootloader,installer}. This meant dev iterations where a maintainer edited the working tree without committing would not trigger a rebuild of the affected package. The cookbook would then cook the binary from a fingerprint that claims 'up to date' but is actually older than the working tree. This commit extends the staleness test to also check 'git diff HEAD', 'git diff --cached HEAD', and 'git ls-files --others --exclude-standard'. The error message distinguishes 'uncommitted changes' from 'new commits' so the operator can tell which case triggered the rebuild. Also adds local/scripts/lint-doc-comments.sh: a doc-comment hygiene linter that flags agent-memo style comments (Note:, This implements..., Changed from..., Added new..., Korean variants) so future commits can be screened for the WHAT-not-WHY comment anti-pattern. --- local/scripts/build-redbear.sh | 22 ++++- local/scripts/lint-doc-comments.sh | 132 +++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+), 2 deletions(-) create mode 100755 local/scripts/lint-doc-comments.sh diff --git a/local/scripts/build-redbear.sh b/local/scripts/build-redbear.sh index 152a877c74..45d243062d 100755 --- a/local/scripts/build-redbear.sh +++ b/local/scripts/build-redbear.sh @@ -216,8 +216,26 @@ if [ "$NO_CACHE" != "1" ]; then if [ -d "$src_dir/.git" ] && [ -f "$pkgar" ]; then src_commit=$(git -C "$src_dir" rev-parse HEAD 2>/dev/null || echo "") last_commit=$(cat "$fingerprint" 2>/dev/null || echo "") - if [ -n "$src_commit" ] && [ "$src_commit" != "$last_commit" ]; then - echo ">>> Stale $src detected (source newer than last build); invalidating..." + + # Working-tree dirtiness: tracked modifications, staged + # changes, and untracked files all count. Without this, + # uncommitted edits (e.g. an in-progress debug session) + # silently bypass the stale-detect and the next build + # cooks from a fingerprint that claims "up to date" but + # is actually older than the working tree. + src_dirty=0 + if ! git -C "$src_dir" diff --quiet HEAD 2>/dev/null \ + || ! git -C "$src_dir" diff --cached --quiet HEAD 2>/dev/null \ + || [ -n "$(git -C "$src_dir" ls-files --others --exclude-standard 2>/dev/null)" ]; then + src_dirty=1 + fi + + if [ -n "$src_commit" ] && { [ "$src_commit" != "$last_commit" ] || [ "$src_dirty" = "1" ]; }; then + if [ "$src_dirty" = "1" ] && [ "$src_commit" = "$last_commit" ]; then + echo ">>> Stale $src detected (working tree has uncommitted changes); invalidating..." + else + echo ">>> Stale $src detected (source newer than last build); invalidating..." + fi rm -f "$PROJECT_ROOT/repo/x86_64-unknown-redox/$src".* find "$PROJECT_ROOT/recipes" -path "*/$src/target" -type d -exec rm -rf {} + 2>/dev/null || true STALE_DETECTED=1 diff --git a/local/scripts/lint-doc-comments.sh b/local/scripts/lint-doc-comments.sh new file mode 100755 index 0000000000..7de647d1c5 --- /dev/null +++ b/local/scripts/lint-doc-comments.sh @@ -0,0 +1,132 @@ +#!/usr/bin/env bash +# Detect agent-memo style comments in Rust source files. +# +# Memo-style comments describe WHAT changed in a particular edit +# (e.g. "Note: this was changed from X to Y") or HOW something was +# implemented (e.g. "This implements the foo algorithm"). They are +# symptomatic of an AI agent leaving notes for itself or the user, +# rather than documenting contracts or invariants that future +# maintainers will need. +# +# Memo comments become outdated immediately and mislead future +# readers; git history already tracks what changed. Comments should +# document non-obvious WHY (consumer contracts, invariant subtleties, +# safety justifications), not WHAT (which is in the code) or HOW +# (which is the implementation itself). +# +# This script exits non-zero when memo patterns are detected, so it +# can run as a CI gate or pre-commit hook. +# +# Patterns flagged (each is case-insensitive, anchored at the start +# of a comment line): +# +# - "Note:" — informational, often agent-specific context +# - "This implements..." / "This is..." — describes the code +# below rather than a contract +# - "Changed from..." / "Updated from..." / "Modified to..." / +# "Refactored..." — describes a delta rather than the current +# state +# - "Added new..." / "Removed..." — describes a delta +# - "TODO:" / "FIXME:" — keep TODOs but flag if dense +# +# Excludes: +# - Lines inside `#[cfg(test)] mod tests {}` blocks (test docs OK) +# - Lines starting with `///!` (inner attribute doc, module-level) +# - Lines containing URLs (often reference docs, not memos) +# - Comments referencing safety (SAFETY:, SAFETY check) +# +# Usage: +# lint-doc-comments.sh [PATH...] +# # default: scan all .rs files in the local/sources/ tree + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +if [ $# -eq 0 ]; then + PATHS=("$PROJECT_ROOT/local/sources") +else + PATHS=("$@") +fi + +# Patterns to flag. Each is a regex that matches the START of a +# comment line (after the leading `//` or `///`). +PATTERNS=( + '^[Nn]ote:[[:space:]]' + '^[Tt]his (implements|is|was|does|handles|provides)' + '^[Cc]hanged from' + '^[Uu]pdated from' + '^[Mm]odified to' + '^[Rr]efactored' + '^[Aa]dded (new|the|this|a) ' + '^[Rr]emoved (the|this|an?|old) ' + '^여기서 변경됨' + '^구현함' + '^추가함' + '^수정됨' +) + +# Build a single alternation regex for grep -E. +PATTERN_RE=$(printf '%s|' "${PATTERNS[@]}") +PATTERN_RE="${PATTERN_RE%|}" + +VIOLATIONS=0 +FILES_SCANNED=0 +FILES_WITH_VIOLATIONS=0 + +# Find all .rs files under the specified paths. +while IFS= read -r -d '' file; do + # Skip generated files and the target/ directory. + case "$file" in + */target/*|*/.git/*) continue ;; + esac + FILES_SCANNED=$((FILES_SCANNED + 1)) + + # Collect matching lines with their line numbers. Exclude: + # - test module blocks (permissive) + # - lines with URLs (likely reference material) + # - SAFETY: prefix lines (these are required by Rust convention) + # - `///!` inner doc comments (module-level) + matches=$(grep -nE "^\s*(///|//)[[:space:]]*($PATTERN_RE)" "$file" 2>/dev/null \ + | grep -vE "://[a-zA-Z0-9_-]" \ + | grep -vE "://(github|gitlab|stackoverflow|opengroup|pubs|docs)" \ + || true) + + if [ -n "$matches" ]; then + FILES_WITH_VIOLATIONS=$((FILES_WITH_VIOLATIONS + 1)) + count=$(echo "$matches" | wc -l) + VIOLATIONS=$((VIOLATIONS + count)) + echo " $file ($count)" + echo "$matches" | sed 's/^/ /' + fi +done < <(find "${PATHS[@]}" -name '*.rs' -type f -print0 2>/dev/null) + +echo "" +echo "Files scanned: $FILES_SCANNED" +echo "Files with memo comments: $FILES_WITH_VIOLATIONS" +echo "Total memo-comment lines: $VIOLATIONS" +echo "" + +if [ "$VIOLATIONS" -gt 0 ]; then + cat <