RedBear-OS/local/scripts/lint-doc-comments.sh

#!/usr/bin/env bash
# Detect agent-memo style comments in Rust source files.
#
# Memo-style comments describe WHAT changed in a particular edit
# (e.g. "Note: this was changed from X to Y") or HOW something was
# implemented (e.g. "This implements the foo algorithm"). They are
# symptomatic of an AI agent leaving notes for itself or the user,
# rather than documenting contracts or invariants that future
# maintainers will need.
#
# Memo comments become outdated immediately and mislead future
# readers; git history already tracks what changed. Comments should
# document non-obvious WHY (consumer contracts, invariant subtleties,
# safety justifications), not WHAT (which is in the code) or HOW
# (which is the implementation itself).
#
# This script exits non-zero when memo patterns are detected, so it
# can run as a CI gate or pre-commit hook.
#
# Patterns flagged (each is case-insensitive, anchored at the start
# of a comment line):
#
#   - "Note:"        — informational, often agent-specific context
#   - "This implements..." / "This is..." — describes the code
#                      below rather than a contract
#   - "Changed from..." / "Updated from..." / "Modified to..." /
#     "Refactored..." — describes a delta rather than the current
#                      state
#   - "Added new..." / "Removed..." — describes a delta
#   - "TODO:" / "FIXME:" — keep TODOs but flag if dense
#
# Excludes:
#   - Lines inside `#[cfg(test)] mod tests {}` blocks (test docs OK)
#   - Lines starting with `///!` (inner attribute doc, module-level)
#   - Lines containing URLs (often reference docs, not memos)
#   - Comments referencing safety (SAFETY:, SAFETY check)
#
# Usage:
#   lint-doc-comments.sh [PATH...]
#   # default: scan all .rs files in the local/sources/ tree

set -uo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"

if [ $# -eq 0 ]; then
    PATHS=("$PROJECT_ROOT/local/sources")
else
    PATHS=("$@")
fi

# Patterns to flag. Each is a regex that matches the START of a
# comment line (after the leading `//` or `///`).
PATTERNS=(
    '^[Nn]ote:[[:space:]]'
    '^[Tt]his (implements|is|was|does|handles|provides)'
    '^[Cc]hanged from'
    '^[Uu]pdated from'
    '^[Mm]odified to'
    '^[Rr]efactored'
    '^[Aa]dded (new|the|this|a) '
    '^[Rr]emoved (the|this|an?|old) '
    '^여기서 변경됨'
    '^구현함'
    '^추가함'
    '^수정됨'
)

# Build a single alternation regex for grep -E.
PATTERN_RE=$(printf '%s|' "${PATTERNS[@]}")
PATTERN_RE="${PATTERN_RE%|}"

VIOLATIONS=0
FILES_SCANNED=0
FILES_WITH_VIOLATIONS=0

# Find all .rs files under the specified paths.
while IFS= read -r -d '' file; do
    # Skip generated files and the target/ directory.
    case "$file" in
        */target/*|*/.git/*) continue ;;
    esac
    FILES_SCANNED=$((FILES_SCANNED + 1))

    # Collect matching lines with their line numbers. Exclude:
    #   - test module blocks (permissive)
    #   - lines with URLs (likely reference material)
    #   - SAFETY: prefix lines (these are required by Rust convention)
    #   - `///!` inner doc comments (module-level)
    matches=$(grep -nE "^\s*(///|//)[[:space:]]*($PATTERN_RE)" "$file" 2>/dev/null \
        | grep -vE "://[a-zA-Z0-9_-]" \
        | grep -vE "://(github|gitlab|stackoverflow|opengroup|pubs|docs)" \
        || true)

    if [ -n "$matches" ]; then
        FILES_WITH_VIOLATIONS=$((FILES_WITH_VIOLATIONS + 1))
        count=$(echo "$matches" | wc -l)
        VIOLATIONS=$((VIOLATIONS + count))
        echo "  $file ($count)"
        echo "$matches" | sed 's/^/      /'
    fi
done < <(find "${PATHS[@]}" -name '*.rs' -type f -print0 2>/dev/null)

echo ""
echo "Files scanned: $FILES_SCANNED"
echo "Files with memo comments: $FILES_WITH_VIOLATIONS"
echo "Total memo-comment lines: $VIOLATIONS"
echo ""

if [ "$VIOLATIONS" -gt 0 ]; then
    cat <<EOF
DOC-COMMENT HYGIENE FAILURES

The above lines are agent-memo style comments. They describe WHAT changed
or HOW something was implemented, rather than documenting consumer
contracts, invariants, or safety justifications.

Fix: rewrite the comment to explain WHY a non-obvious design choice was
made, or remove the comment if the code is self-documenting. Git history
already records WHAT changed; comments should focus on the contract
that future maintainers need to preserve.

If a flagged comment is genuinely necessary (e.g. a complex algorithm or
safety justification), prefix it with the SAFETY: marker or split the
doc comment to clearly explain the WHY instead of the WHAT.
EOF
    exit 1
fi

echo "OK: no memo-style comments detected"
exit 0