From 081ed10a8bbbc4a88a589512902f984e03f0dd42 Mon Sep 17 00:00:00 2001 From: Admin Pupkin Date: Sun, 17 May 2026 13:57:37 +0300 Subject: [PATCH] fix: x2APIC ICR format + build system durability docs - Fix LocalX2Apic handler: use local_apic.x2 to select correct ICR format (<<32 for x2APIC, <<56 for xAPIC) instead of hardcoded <<32 - Promote x2APIC/xAPIC detection from debug! to info! for bootlog - Document build system durability in AGENTS.md: cardinal rule, two-layer architecture, correct workflow, anti-patterns --- AGENTS.md | 104 +++++++++- .../kernel/P20-x2apic-icr-mode-fix.patch | 64 ++++++ recipes/core/kernel/recipe.toml | 1 + .../kernel/source/src/acpi/madt/arch/x86.rs | 22 +- .../source/src/acpi/madt/arch/x86.rs.orig | 162 +++++++++++++++ .../source/src/acpi/madt/arch/x86.rs.rej | 194 ++++++++++++++++++ .../src/arch/x86_shared/device/local_apic.rs | 4 +- .../source/src/arch/x86_shared/idt.rs.rej | 11 + .../kernel/source/src/context/switch.rs.rej | 87 ++++++++ 9 files changed, 643 insertions(+), 6 deletions(-) create mode 100644 local/patches/kernel/P20-x2apic-icr-mode-fix.patch create mode 100644 recipes/core/kernel/source/src/acpi/madt/arch/x86.rs.orig create mode 100644 recipes/core/kernel/source/src/acpi/madt/arch/x86.rs.rej create mode 100644 recipes/core/kernel/source/src/arch/x86_shared/idt.rs.rej create mode 100644 recipes/core/kernel/source/src/context/switch.rs.rej diff --git a/AGENTS.md b/AGENTS.md index ef1975320b..9d3cad69b3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,7 +19,109 @@ human-initiated operations. Durable Red Bear state belongs in `local/patches/`, The current baseline is **Red Bear OS 0.1.0** (Redox snapshot at build-system commit `f55acba68`). All recipe sources are pinned and archived in `sources/redbear-0.1.0/`. -## NO SILENT UPSTREAM PULLS — OFFLINE-FIRST POLICY +## BUILD SYSTEM DURABILITY — THE CARDINAL RULE + +**Never edit files under `recipes/*/source/` directly. Those trees are ephemeral — they are +destroyed and regenerated on every `repo fetch`, `repo cook`, `make clean`, and +`make distclean`. Any edit made there will be silently lost on the next build.** + +This is the #1 mistake AI agents and new contributors make. It has caused repeated work loss +in this project. The rule is: + +| What you want to do | Where to do it | +|---|---| +| Change a kernel source file | Create or update a patch in `local/patches/kernel/` | +| Change an init or daemon source file | Create or update a patch in `local/patches/base/` | +| Change relibc | Create or update a patch in `local/patches/relibc/` | +| Change a driver | Create or update a patch in `local/patches/base/` or `local/patches//` | +| Add a new package | Create a recipe in `local/recipes///` | +| Change build config | Edit `config/redbear-*.toml` | +| Add documentation | Write to `local/docs/` | + +### How the build system works + +``` +repo cook + ├── repo fetch + │ ├── Clone/fetch upstream source → recipes//source/ + │ ├── Apply patches from recipe.toml → patches are read from local/patches// + │ └── Source tree is now fully patched and ready for build + ├── Cargo/cmake/configure build + └── Stage artifacts into sysroot +``` + +The `source/` directory is a disposable working copy. It is produced at the start of every +build by cloning the upstream source + applying patches sequentially. The recipe's +`patches = [...]` list in `recipe.toml` controls which patches are applied. + +### Two-layer architecture + +``` +Layer 1: Ephemeral (destroyed on clean/fetch/rebuild) + recipes//source/ ← working tree, cloned + patched + build/ ← build outputs + target/ ← cargo target dir + +Layer 2: Durable (survives clean/fetch/rebuild/release provisioning) + local/patches// ← .patch files — the actual source code changes + local/recipes// ← custom recipe directories + config/redbear-*.toml ← Red Bear OS build configs + local/docs/ ← planning and integration docs + recipes//recipe.toml ← the patches list (git-tracked) +``` + +### The correct workflow for any source change + +1. **Make the change** in `recipes//source/` to validate it compiles +2. **Generate a patch**: `cd recipes//source && git diff > ../../../local/patches//my-fix.patch` +3. **Wire the patch**: add `"my-fix.patch"` to the recipe's `recipe.toml` `patches = [...]` list +4. **Validate**: `./target/release/repo validate-patches ` +5. **Rebuild**: `./target/release/repo cook ` +6. **Commit**: `git add local/patches/ recipes//recipe.toml && git commit` + +### Common anti-patterns + +| Anti-pattern | Why it fails | +|---|---| +| Editing `source/` files then running `make all` | `make all` calls `repo fetch` which regenerates `source/` — edits are lost | +| Creating a patch but not wiring it into `recipe.toml` | Patch file exists but is never applied — build uses unpatched source | +| Editing `recipe.toml` patches list without creating the actual `.patch` file | Build fails with "missing patch" error | +| Expecting `source/` changes to survive `make clean` | `make clean` deletes `source/` directories | +| Running `repo cook` without `--allow-protected` for core packages | Protected recipes (kernel, relibc, base) are offline-only by default | + +### Patch file location convention + +- `local/patches/base/` — for the `base` package (init, daemon, all drivers) +- `local/patches/kernel/` — for the kernel +- `local/patches/relibc/` — for relibc +- `local/patches/installer/` — for the installer +- `local/patches/bootloader/` — for the bootloader +- `local/patches//` — for any other patched package + +### Recipe patch wiring + +Each recipe's `recipe.toml` lists patches relative to `local/patches//`: + +```toml +[source] +git = "https://gitlab.redox-os.org/redox-os/base.git" +rev = "463f76b96..." +patches = [ + "P0-daemon-fix-init-notify-unwrap.patch", # applied first + "P9-init-scheduler-completed.patch", # applied second + # ... more patches +] +``` + +Patches are applied in listed order. Dependencies between patches must be respected (a patch +that defines a type must come before a patch that uses it). + +### Kernel-specific notes + +The kernel source at `recipes/core/kernel/source/` is a separate git worktree (rev `866dfad`). +The kernel recipe is at `recipes/core/kernel/recipe.toml` and patches are at +`local/patches/kernel/`. The same durability rules apply — all kernel changes must be +in `local/patches/kernel/*.patch`, never in the `source/` tree directly. **Red Bear OS is offline-first by default. No script, build target, or tool may silently pull from any upstream repository without explicit user instruction.** diff --git a/local/patches/kernel/P20-x2apic-icr-mode-fix.patch b/local/patches/kernel/P20-x2apic-icr-mode-fix.patch new file mode 100644 index 0000000000..692aeff855 --- /dev/null +++ b/local/patches/kernel/P20-x2apic-icr-mode-fix.patch @@ -0,0 +1,64 @@ +diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs +--- a/src/acpi/madt/arch/x86.rs ++++ b/src/acpi/madt/arch/x86.rs +@@ -444,28 +444,40 @@ + // Send INIT IPI (Assert) + { + let mut icr = 0x4500u64; +- icr |= u64::from(apic_id) << 32; ++ if local_apic.x2 { ++ icr |= u64::from(apic_id) << 32; ++ } else { ++ icr |= u64::from(apic_id as u8) << 56; ++ } + local_apic.set_icr(icr); + } +- ++ + // Intel SDM Vol 3A §8.4.4: wait 10ms after INIT + early_udelay(10_000); +- ++ + // Send START IPI #1 + { + let ap_segment = (TRAMPOLINE >> 12) & 0xFF; + let mut icr = 0x0600u64 | ap_segment as u64; +- icr |= u64::from(apic_id) << 32; ++ if local_apic.x2 { ++ icr |= u64::from(apic_id) << 32; ++ } else { ++ icr |= u64::from(apic_id as u8) << 56; ++ } + local_apic.set_icr(icr); + } +- ++ + // Intel SDM: wait 200µs between SIPIs + early_udelay(200); +- ++ + // Send START IPI #2 (recommended for compatibility) + { + let ap_segment = (TRAMPOLINE >> 12) & 0xFF; + let mut icr = 0x0600u64 | ap_segment as u64; +- icr |= u64::from(apic_id) << 32; ++ if local_apic.x2 { ++ icr |= u64::from(apic_id) << 32; ++ } else { ++ icr |= u64::from(apic_id as u8) << 56; ++ } + local_apic.set_icr(icr); + } +diff --git a/src/arch/x86_shared/device/local_apic.rs b/src/arch/x86_shared/device/local_apic.rs +--- a/src/arch/x86_shared/device/local_apic.rs ++++ b/src/arch/x86_shared/device/local_apic.rs +@@ -60,7 +60,7 @@ + + if !self.x2 { +- debug!("Detected xAPIC at {:#x}", physaddr.data()); ++ info!("Detected xAPIC at {:#x}", physaddr.data()); + self.address = map_device_memory(physaddr, 4096).data(); + } else { +- debug!("Detected x2APIC"); ++ info!("Detected x2APIC"); + } diff --git a/recipes/core/kernel/recipe.toml b/recipes/core/kernel/recipe.toml index ed293261f3..122849a46d 100644 --- a/recipes/core/kernel/recipe.toml +++ b/recipes/core/kernel/recipe.toml @@ -34,6 +34,7 @@ patches = [ "../../../local/patches/kernel/P17-3-sched-affinity.patch", "../../../local/patches/kernel/P17-3-syscall-dispatch.patch", "../../../local/patches/kernel/P19-2-irq-debug.patch", + "../../../local/patches/kernel/P20-x2apic-icr-mode-fix.patch", ] [build] diff --git a/recipes/core/kernel/source/src/acpi/madt/arch/x86.rs b/recipes/core/kernel/source/src/acpi/madt/arch/x86.rs index 489f0fc952..a4d5a98b23 100644 --- a/recipes/core/kernel/source/src/acpi/madt/arch/x86.rs +++ b/recipes/core/kernel/source/src/acpi/madt/arch/x86.rs @@ -383,6 +383,8 @@ pub(super) fn init(madt: Madt) { } RmmA::invalidate_all(); + } else { + debug!("KERNEL AP: LAPIC CPU {} disabled in MADT, skipping", u32::from(ap_local_apic.id)); } } else if let MadtEntry::LocalX2Apic(ap_x2apic) = madt_entry { let apic_id = ap_x2apic.x2apic_id; @@ -444,7 +446,11 @@ pub(super) fn init(madt: Madt) { // Send INIT IPI (Assert) { let mut icr = 0x4500u64; - icr |= u64::from(apic_id) << 32; + if local_apic.x2 { + icr |= u64::from(apic_id) << 32; + } else { + icr |= u64::from(apic_id as u8) << 56; + } local_apic.set_icr(icr); } @@ -455,7 +461,11 @@ pub(super) fn init(madt: Madt) { { let ap_segment = (TRAMPOLINE >> 12) & 0xFF; let mut icr = 0x0600u64 | ap_segment as u64; - icr |= u64::from(apic_id) << 32; + if local_apic.x2 { + icr |= u64::from(apic_id) << 32; + } else { + icr |= u64::from(apic_id as u8) << 56; + } local_apic.set_icr(icr); } @@ -466,7 +476,11 @@ pub(super) fn init(madt: Madt) { { let ap_segment = (TRAMPOLINE >> 12) & 0xFF; let mut icr = 0x0600u64 | ap_segment as u64; - icr |= u64::from(apic_id) << 32; + if local_apic.x2 { + icr |= u64::from(apic_id) << 32; + } else { + icr |= u64::from(apic_id as u8) << 56; + } local_apic.set_icr(icr); } @@ -520,6 +534,8 @@ pub(super) fn init(madt: Madt) { } RmmA::invalidate_all(); + } else { + debug!("KERNEL AP: x2APIC CPU {} disabled in MADT (flags={:#x}), skipping", apic_id, flags); } } else if let MadtEntry::LocalApicNmi(nmi) = madt_entry { let target_apic = nmi.processor; diff --git a/recipes/core/kernel/source/src/acpi/madt/arch/x86.rs.orig b/recipes/core/kernel/source/src/acpi/madt/arch/x86.rs.orig new file mode 100644 index 0000000000..637f96ac0e --- /dev/null +++ b/recipes/core/kernel/source/src/acpi/madt/arch/x86.rs.orig @@ -0,0 +1,162 @@ +use core::{ + hint, + sync::atomic::{AtomicU8, Ordering}, +}; + +use crate::{ + arch::{ + device::local_apic::the_local_apic, + start::{kstart_ap, KernelArgsAp}, + }, + cpu_set::LogicalCpuId, + memory::{ + allocate_p2frame, Frame, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch, + VirtualAddress, PAGE_SIZE, + }, + startup::AP_READY, +}; + +use super::{Madt, MadtEntry}; + +const TRAMPOLINE: usize = 0x8000; +static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline")); + +pub(super) fn init(madt: Madt) { + let local_apic = unsafe { the_local_apic() }; + let me = local_apic.id(); + + if local_apic.x2 { + debug!(" X2APIC {}", me.get()); + } else { + debug!(" XAPIC {}: {:>08X}", me.get(), local_apic.address); + } + + if cfg!(not(feature = "multi_core")) { + return; + } + + // Map trampoline + let trampoline_frame = Frame::containing(PhysicalAddress::new(TRAMPOLINE)); + let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE)); + let (result, page_table_physaddr) = unsafe { + //TODO: do not have writable and executable! + let mut mapper = KernelMapper::lock_rw(); + + let result = mapper + .map_phys( + trampoline_page.start_address(), + trampoline_frame.base(), + PageFlags::new().execute(true).write(true), + ) + .expect("failed to map trampoline"); + + (result, mapper.table().phys().data()) + }; + result.flush(); + + // Write trampoline, make sure TRAMPOLINE page is free for use + for (i, val) in TRAMPOLINE_DATA.iter().enumerate() { + unsafe { + (*((TRAMPOLINE as *mut u8).add(i) as *const AtomicU8)).store(*val, Ordering::SeqCst); + } + } + + unsafe { + let preliminary_cpu_count = madt.iter().filter(|e| matches!(e, MadtEntry::LocalApic(entry) if u32::from(entry.id) == me.get() || entry.flags & 1 == 1)).count(); + crate::profiling::allocate(preliminary_cpu_count as u32); + } + + for madt_entry in madt.iter() { + debug!(" {:x?}", madt_entry); + if let MadtEntry::LocalApic(ap_local_apic) = madt_entry { + if u32::from(ap_local_apic.id) == me.get() { + debug!(" This is my local APIC"); + } else if ap_local_apic.flags & 1 == 1 { + let cpu_id = LogicalCpuId::next(); + + // Allocate a stack + let stack_start = RmmA::phys_to_virt( + allocate_p2frame(4) + .expect("no more frames in acpi stack_start") + .base(), + ) + .data(); + let stack_end = stack_start + (PAGE_SIZE << 4); + + let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end); + + let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id); + + let args = KernelArgsAp { + stack_end: stack_end as *mut u8, + cpu_id, + pcr_ptr, + idt_ptr, + }; + + let ap_ready = (TRAMPOLINE + 8) as *mut u64; + let ap_args_ptr = unsafe { ap_ready.add(1) }; + let ap_page_table = unsafe { ap_ready.add(2) }; + let ap_code = unsafe { ap_ready.add(3) }; + + // Set the ap_ready to 0, volatile + unsafe { + ap_ready.write(0); + ap_args_ptr.write(&args as *const _ as u64); + ap_page_table.write(page_table_physaddr as u64); + #[expect(clippy::fn_to_numeric_cast)] + ap_code.write(kstart_ap as u64); + + // Ensure all trampoline writes are visible to the AP before + // it starts executing. asm!("") is only a compiler barrier; + // fence(SeqCst) is a full hardware memory barrier. + core::sync::atomic::fence(Ordering::SeqCst); + }; + AP_READY.store(false, Ordering::SeqCst); + + // Send INIT IPI + { + let mut icr = 0x4500; + if local_apic.x2 { + icr |= u64::from(ap_local_apic.id) << 32; + } else { + icr |= u64::from(ap_local_apic.id) << 56; + } + local_apic.set_icr(icr); + } + + // Send START IPI + { + let ap_segment = (TRAMPOLINE >> 12) & 0xFF; + let mut icr = 0x4600 | ap_segment as u64; + + if local_apic.x2 { + icr |= u64::from(ap_local_apic.id) << 32; + } else { + icr |= u64::from(ap_local_apic.id) << 56; + } + + local_apic.set_icr(icr); + } + + // Wait for trampoline ready + while unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } == 0 { + hint::spin_loop(); + } + while !AP_READY.load(Ordering::SeqCst) { + hint::spin_loop(); + } + + RmmA::invalidate_all(); + } + } + } + + // Unmap trampoline + let (_frame, _, flush) = unsafe { + KernelMapper::lock_rw() + .unmap_phys(trampoline_page.start_address()) + .expect("failed to unmap trampoline page") + }; + flush.flush(); +} diff --git a/recipes/core/kernel/source/src/acpi/madt/arch/x86.rs.rej b/recipes/core/kernel/source/src/acpi/madt/arch/x86.rs.rej new file mode 100644 index 0000000000..76763f99cb --- /dev/null +++ b/recipes/core/kernel/source/src/acpi/madt/arch/x86.rs.rej @@ -0,0 +1,194 @@ +--- src/acpi/madt/arch/x86.rs ++++ src/acpi/madt/arch/x86.rs +@@ -20,6 +22,7 @@ + + use super::{Madt, MadtEntry}; + ++use alloc::collections::BTreeSet; + use alloc::vec::Vec; + + /// Maximum number of APIC→CPU mappings we track for NUMA topology. +@@ -47,6 +50,67 @@ + const TRAMPOLINE: usize = 0x8000; + static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline")); + ++/// Estimate TSC frequency in MHz from CPUID. ++/// ++/// Tries CPUID leaf 0x16 (Processor Frequency Information) first, ++/// then CPUID leaf 0x15 (TSC/Core Crystal Clock Ratio). ++/// Returns None if frequency cannot be determined. ++fn tsc_freq_mhz_cpuid() -> Option { ++ let max_leaf = unsafe { core::arch::x86_64::__cpuid(0).eax as u32 }; ++ ++ // CPUID leaf 0x16: EAX = Core Base Frequency in MHz (Intel) ++ if max_leaf >= 0x16 { ++ let mhz = unsafe { core::arch::x86_64::__cpuid(0x16) }.eax as u64; ++ if mhz > 0 { ++ return Some(mhz); ++ } ++ } ++ ++ // CPUID leaf 0x15: EAX = denominator, EBX = numerator, ECX = crystal Hz ++ if max_leaf >= 0x15 { ++ let res = unsafe { core::arch::x86_64::__cpuid(0x15) }; ++ let denom = res.eax as u64; ++ let numer = res.ebx as u64; ++ let crystal_hz = res.ecx as u64; ++ if denom > 0 && numer > 0 && crystal_hz > 0 { ++ // TSC freq = crystal_hz * numer / denom ++ let tsc_hz = crystal_hz * numer / denom; ++ return Some(tsc_hz / 1_000_000); // Hz → MHz ++ } ++ } ++ ++ None ++} ++ ++/// Early-boot microsecond delay using the Time Stamp Counter. ++/// ++/// Uses CPUID-based TSC frequency estimation when available. ++/// Falls back to a conservative spin loop calibrated for the ++/// minimum expected CPU speed (1 GHz). ++/// ++/// # Safety ++/// Must only be called after the BSP TSC is running (always true ++/// after CPU reset on x86). ++fn early_udelay(us: u64) { ++ if let Some(mhz) = tsc_freq_mhz_cpuid() { ++ // TSC-based delay: precise on invariant TSC (all modern x86). ++ // MHz = cycles per µs. ++ let target = unsafe { rdtsc() } + us * mhz; ++ while unsafe { rdtsc() } < target { ++ hint::spin_loop(); ++ } ++ } else { ++ // Fallback: conservative spin loop. ++ // spin_loop() (PAUSE) is ~40 cycles on modern Intel, ~1 on AMD. ++ // At 1 GHz minimum: 1000 cycles/µs ÷ 40 cycles/iter = 25 iters/µs. ++ // Use 50 iters/µs for safety margin on slower/variable CPUs. ++ let iters = us.saturating_mul(50); ++ for _ in 0..iters { ++ hint::spin_loop(); ++ } ++ } ++} ++ + fn current_x2apic_processor_uid(madt: &Madt, apic_id: u32) -> Option { + madt.iter().find_map(|entry| match entry { + MadtEntry::LocalX2Apic(x2apic) if x2apic.x2apic_id == apic_id => Some(x2apic.processor_uid), +@@ -235,20 +329,53 @@ + local_apic.set_icr(icr); + } + +- // Send START IPI ++ // Intel SDM Vol 3A §8.4.4: wait 10ms after INIT deassert ++ // before sending first SIPI. Modern CPUs may need less, ++ // but 10ms is the safe specification-compliant value. ++ early_udelay(10_000); ++ ++ // Send START IPI #1 + { + let ap_segment = (TRAMPOLINE >> 12) & 0xFF; +- let mut icr = 0x4600 | ap_segment as u64; +- ++ // ICR: Delivery Mode=StartUp(110), Vector=ap_segment ++ // Note: bit 14 (Level) must be 0 for SIPI per Intel SDM. ++ let mut icr = 0x0600 | ap_segment as u64; + if local_apic.x2 { + icr |= u64::from(ap_local_apic.id) << 32; + } else { + icr |= u64::from(ap_local_apic.id) << 56; + } ++ local_apic.set_icr(icr); ++ } ++ ++ // Intel SDM: wait 200µs between SIPIs ++ early_udelay(200); + ++ // Send START IPI #2 (recommended for compatibility) ++ { ++ let ap_segment = (TRAMPOLINE >> 12) & 0xFF; ++ let mut icr = 0x0600 | ap_segment as u64; ++ if local_apic.x2 { ++ icr |= u64::from(ap_local_apic.id) << 32; ++ } else { ++ icr |= u64::from(ap_local_apic.id) << 56; ++ } + local_apic.set_icr(icr); + } + ++ // Wait briefly for SIPI to be accepted ++ early_udelay(200); ++ ++ // Check ESR for delivery errors after SIPI sequence. ++ // Bit 5 = Send Accept Error, Bit 6 = Send Illegal Vector. ++ let esr_val = unsafe { local_apic.esr() }; ++ if esr_val != 0 { ++ println!( ++ "KERNEL AP: CPU {} SIPI delivery error (ESR={:#x}), continuing", ++ ap_local_apic.id, esr_val ++ ); ++ } ++ + // Wait for trampoline ready with timeout + let mut trampoline_ready = false; + for _ in 0..AP_SPIN_LIMIT { +@@ -343,34 +470,50 @@ + } + AP_READY.store(false, Ordering::SeqCst); + ++ // Clear APIC Error Status Register before starting AP. ++ unsafe { local_apic.esr(); } ++ ++ // Send INIT IPI (Assert) + { + let mut icr = 0x4500u64; + icr |= u64::from(apic_id) << 32; + local_apic.set_icr(icr); + } + +- for _ in 0..100_000 { +- hint::spin_loop(); +- } ++ // Intel SDM Vol 3A §8.4.4: wait 10ms after INIT ++ early_udelay(10_000); + ++ // Send START IPI #1 + { + let ap_segment = (TRAMPOLINE >> 12) & 0xFF; +- let mut icr = 0x4600u64 | ap_segment as u64; ++ let mut icr = 0x0600u64 | ap_segment as u64; + icr |= u64::from(apic_id) << 32; + local_apic.set_icr(icr); + } + +- for _ in 0..2_000_000 { +- hint::spin_loop(); +- } ++ // Intel SDM: wait 200µs between SIPIs ++ early_udelay(200); + ++ // Send START IPI #2 (recommended for compatibility) + { + let ap_segment = (TRAMPOLINE >> 12) & 0xFF; +- let mut icr = 0x4600u64 | ap_segment as u64; ++ let mut icr = 0x0600u64 | ap_segment as u64; + icr |= u64::from(apic_id) << 32; + local_apic.set_icr(icr); + } + ++ // Wait briefly for SIPI acceptance ++ early_udelay(200); ++ ++ // Check ESR for delivery errors. ++ let esr_val = unsafe { local_apic.esr() }; ++ if esr_val != 0 { ++ println!( ++ "KERNEL AP: CPU {} SIPI delivery error (ESR={:#x}), continuing", ++ apic_id, esr_val ++ ); ++ } ++ + let mut trampoline_ready = false; + for _ in 0..AP_SPIN_LIMIT { + if unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } != 0 { diff --git a/recipes/core/kernel/source/src/arch/x86_shared/device/local_apic.rs b/recipes/core/kernel/source/src/arch/x86_shared/device/local_apic.rs index b300e6fea5..87c5a31ff3 100644 --- a/recipes/core/kernel/source/src/arch/x86_shared/device/local_apic.rs +++ b/recipes/core/kernel/source/src/arch/x86_shared/device/local_apic.rs @@ -59,10 +59,10 @@ impl LocalApic { .is_some_and(|feature_info| feature_info.has_x2apic()); if !self.x2 { - debug!("Detected xAPIC at {:#x}", physaddr.data()); + info!("Detected xAPIC at {:#x}", physaddr.data()); self.address = map_device_memory(physaddr, 4096).data(); } else { - debug!("Detected x2APIC"); + info!("Detected x2APIC"); } self.init_ap(); diff --git a/recipes/core/kernel/source/src/arch/x86_shared/idt.rs.rej b/recipes/core/kernel/source/src/arch/x86_shared/idt.rs.rej new file mode 100644 index 0000000000..fa130181dc --- /dev/null +++ b/recipes/core/kernel/source/src/arch/x86_shared/idt.rs.rej @@ -0,0 +1,11 @@ +--- src/arch/x86_shared/idt.rs ++++ src/arch/x86_shared/idt.rs +@@ -110,6 +110,8 @@ + } + + pub fn available_irqs_iter(cpu_id: LogicalCpuId) -> impl Iterator + 'static { ++ let count = (32..=254).filter(|&index| !is_reserved(cpu_id, index)).count(); ++ info!("available_irqs_iter: cpu_id={} count={}", cpu_id.get(), count); + (32..=254).filter(move |&index| !is_reserved(cpu_id, index)) + } + diff --git a/recipes/core/kernel/source/src/context/switch.rs.rej b/recipes/core/kernel/source/src/context/switch.rs.rej new file mode 100644 index 0000000000..4dab10c9a5 --- /dev/null +++ b/recipes/core/kernel/source/src/context/switch.rs.rej @@ -0,0 +1,87 @@ +--- src/context/switch.rs ++++ src/context/switch.rs +@@ -361,6 +361,7 @@ + } + + /// This is the scheduler function which currently utilises Deficit Weighted Round Robin Scheduler ++/// with NUMA-aware context selection preference. + fn select_next_context( + token: &mut CleanLockToken, + percpu: &PercpuBlock, +@@ -386,6 +387,10 @@ + let total_contexts: usize = contexts_list.iter().map(|q| q.len()).sum(); + let mut skipped_contexts = 0; + ++ // NUMA-aware selection: remember cross-node fallback candidate. ++ let my_numa_node = percpu.numa_node.get(); ++ let mut cross_node_fallback: Option<(usize, ArcContextLockWriteGuard)> = None; ++ + 'priority: loop { + i = (i + 1) % 40; + total_iters += 1; +@@ -450,11 +455,44 @@ + // Is this context runnable on this CPU? + let sw = unsafe { update_runnable(&mut next_context_guard, cpu_id, switch_time) }; + if let UpdateResult::CanSwitch = sw { +- // Cache the new context's priority for MCS lock priority donation. +- percpu.current_prio.set(next_context_guard.prio); +- next_context_guard_opt = Some(next_context_guard); +- balance[i] -= SCHED_PRIO_TO_WEIGHT[20]; +- break 'priority; ++ // NUMA-aware selection: check if this context's last CPU was on the same node. ++ let same_node = if my_numa_node != u8::MAX { ++ next_context_guard.cpu_id ++ .map(|cid| { ++ crate::percpu::get_for_cpu(cid) ++ .map(|p| p.numa_node.get() == my_numa_node) ++ .unwrap_or(false) ++ }) ++ .unwrap_or(true) // New context (no last CPU) — treat as same node ++ } else { ++ true // No NUMA info — treat all as same node ++ }; ++ ++ if same_node { ++ // Cache-warm: select immediately ++ percpu.current_prio.set(next_context_guard.prio); ++ next_context_guard_opt = Some(next_context_guard); ++ balance[i] -= SCHED_PRIO_TO_WEIGHT[20]; ++ break 'priority; ++ } else { ++ // Cross-node candidate: save as fallback, keep scanning for same-node ++ if cross_node_fallback.is_none() { ++ // Cache the priority and balance for later ++ cross_node_fallback = ++ Some((next_context_guard.prio, next_context_guard)); ++ balance[i] -= SCHED_PRIO_TO_WEIGHT[20]; ++ // Don't break — keep looking for a same-node context ++ continue; ++ } else { ++ // Already have a cross-node fallback; push this one back ++ contexts.push_back(next_context_ref); ++ skipped_contexts += 1; ++ if skipped_contexts >= total_contexts { ++ break 'priority; ++ } ++ continue; ++ } ++ } + } else { + if matches!(sw, UpdateResult::Blocked) { + idle_contexts(token.token()).push_back(next_context_ref); +@@ -469,6 +507,15 @@ + } + } + } ++ ++ // If we found a cross-node fallback but no same-node context, use it ++ if next_context_guard_opt.is_none() { ++ if let Some((prio, guard)) = cross_node_fallback { ++ percpu.current_prio.set(prio); ++ next_context_guard_opt = Some(guard); ++ } ++ } ++ + percpu.balance.set(balance); + percpu.last_queue.set(i); +