fix: boot process improvements — dependency cycle, INIT_NOTIFY, probing loop, and log spam fixes

- Fix P15-8-init-cycle-detection.patch: replace visiting+error with seen+silent-skip
  to eliminate 11 false-positive 'dependency cycle detected' errors on shared deps
- Fix P0-daemon-fix-init-notify-unwrap.patch: remove eprintln! for missing
  INIT_NOTIFY (expected for oneshot_async services, ~7 daemons affected)
- Fix driver-manager hotplug loop: add PERMANENTLY_SKIPPED static set shared
  between hotplug handler and DriverConfig::probe() to stop infinite re-probing
  of Fatal/NotSupported/deferred-exhausted device+driver pairs (e.g. ided)
- Fix driver-manager log_timeline: suppress repeated EPIPE/ENOENT errors with
  AtomicI32 dedup and AtomicBool one-shot guards for boot timeline JSON
- Add driver-manager SIGTERM handler, ACPI bus registration, --status mode,
  driver reap loop, graceful shutdown, and reduced deferred retries (30→3)
This commit is contained in:
2026-05-17 12:34:02 +03:00
parent 7914626765
commit cee25393d8
4002 changed files with 574970 additions and 1680003 deletions
@@ -0,0 +1,414 @@
diff --git a/src/main.rs b/src/main.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -70,6 +70,9 @@ mod log;
/// Memory management
mod memory;
+/// NUMA topology
+mod numa;
+
/// Panic
mod panic;
diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs
--- a/src/acpi/madt/arch/x86.rs
+++ b/src/acpi/madt/arch/x86.rs
@@ -18,6 +18,29 @@
use super::{Madt, MadtEntry};
+use alloc::vec::Vec;
+
+/// Maximum number of APIC→CPU mappings we track for NUMA topology.
+const MAX_APIC_MAPPINGS: usize = 256;
+
+struct ApicMapping {
+ apic_id: u32,
+ cpu_id: LogicalCpuId,
+}
+
+const UNINIT_MAPPING: ApicMapping = ApicMapping { apic_id: u32::MAX, cpu_id: LogicalCpuId::new(0) };
+
+static mut APIC_MAPPINGS: [ApicMapping; MAX_APIC_MAPPINGS] = [UNINIT_MAPPING; MAX_APIC_MAPPINGS];
+static mut APIC_MAPPING_COUNT: usize = 0;
+
+unsafe fn record_apic_mapping(apic_id: u32, cpu_id: LogicalCpuId) {
+ let count = APIC_MAPPING_COUNT;
+ if count < MAX_APIC_MAPPINGS {
+ APIC_MAPPINGS[count] = ApicMapping { apic_id, cpu_id };
+ APIC_MAPPING_COUNT = count + 1;
+ }
+}
+
const AP_SPIN_LIMIT: u32 = 1_000_000;
const TRAMPOLINE: usize = 0x8000;
static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline"));
@@ -61,6 +82,10 @@ pub(super) fn init(madt: Madt) {
}
if cfg!(not(feature = "multi_core")) {
+ unsafe {
+ record_apic_mapping(me.get(), LogicalCpuId::new(0));
+ }
+ crate::numa::init_default();
return;
}
@@ -216,6 +241,17 @@ pub(super) fn init(madt: Madt) {
crate::CPU_COUNT.fetch_add(1, Ordering::Relaxed);
+ // Record APIC→CPU mapping for NUMA topology.
+ unsafe {
+ record_apic_mapping(u32::from(ap_local_apic.id), cpu_id);
+ }
+ // Set NUMA node from SRAT data.
+ if let Some(percpu) = crate::percpu::get_for_cpu(cpu_id) {
+ if let Some(node) = crate::acpi::srat::numa_node_for_apic(u32::from(ap_local_apic.id)) {
+ percpu.numa_node.set(node);
+ }
+ }
+
RmmA::invalidate_all();
}
} else if let MadtEntry::LocalX2Apic(ap_x2apic) = madt_entry {
@@ -325,6 +361,18 @@ pub(super) fn init(madt: Madt) {
}
crate::CPU_COUNT.fetch_add(1, Ordering::Relaxed);
+
+ // Record APIC→CPU mapping for NUMA topology.
+ unsafe {
+ record_apic_mapping(apic_id, cpu_id);
+ }
+ // Set NUMA node from SRAT data.
+ if let Some(percpu) = crate::percpu::get_for_cpu(cpu_id) {
+ if let Some(node) = crate::acpi::srat::numa_node_for_apic(apic_id) {
+ percpu.numa_node.set(node);
+ }
+ }
+
RmmA::invalidate_all();
}
} else if let MadtEntry::LocalApicNmi(nmi) = madt_entry {
@@ -342,6 +390,20 @@ pub(super) fn init(madt: Madt) {
}
}
+ // Initialize NUMA topology from APIC→CPU mappings and SRAT.
+ {
+ let mappings = unsafe { &APIC_MAPPINGS[..APIC_MAPPING_COUNT] };
+ let mappings_ref: Vec<(u32, LogicalCpuId)> = mappings
+ .iter()
+ .map(|m| (m.apic_id, m.cpu_id))
+ .collect();
+ crate::numa::init_from_srat(&mappings_ref);
+ }
+ // Set BSP's NUMA node from SRAT.
+ if let Some(node) = crate::acpi::srat::numa_node_for_apic(me.get()) {
+ crate::percpu::PercpuBlock::current().numa_node.set(node);
+ }
+
// Unmap trampoline
if let Some((_frame, _, flush)) = unsafe {
KernelMapper::lock_rw()
diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs
--- a/src/acpi/mod.rs
+++ b/src/acpi/mod.rs
@@ -20,6 +20,8 @@ mod rxsdt;
pub mod sdt;
#[cfg(target_arch = "aarch64")]
mod spcr;
+pub mod slit;
+pub mod srat;
mod xsdt;
unsafe fn map_linearly(addr: PhysicalAddress, len: usize, mapper: &mut crate::memory::PageMapper) {
@@ -163,7 +165,14 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) {
// TODO: Enumerate processors in userspace, and then provide an ACPI-independent interface
// to initialize enumerated processors to userspace?
+ // Parse SRAT BEFORE MADT so NUMA node mapping is available
+ // when APs are started and PercpuBlocks are created.
+ srat::init();
+
Madt::init();
+
+ // Parse SLIT after MADT for the NUMA distance matrix.
+ slit::init();
//TODO: support this on any arch
// SPCR must be initialized after MADT for interrupt controllers
#[cfg(target_arch = "aarch64")]
diff --git a/src/acpi/slit.rs b/src/acpi/slit.rs
--- /dev/null
+++ b/src/acpi/slit.rs
@@ -0,0 +1,45 @@
+//! SLIT (System Locality Information Table) parser.
+//!
+//! Parses the NUMA distance matrix for scheduler NUMA-aware work stealing.
+
+use super::sdt::Sdt;
+use crate::acpi::find_sdt;
+
+const MAX_NODES: usize = 8;
+
+static mut SLIT_MATRIX: [[u8; MAX_NODES]; MAX_NODES] = [[10u8; MAX_NODES]; MAX_NODES];
+static mut SLIT_NUM_NODES: usize = 0;
+static mut SLIT_AVAILABLE: bool = false;
+
+pub fn is_available() -> bool { unsafe { SLIT_AVAILABLE } }
+pub fn num_nodes() -> usize { unsafe { SLIT_NUM_NODES } }
+
+pub fn distance(from: u8, to: u8) -> u8 {
+ if !unsafe { SLIT_AVAILABLE } { return 10; }
+ let (from, to) = (from as usize, to as usize);
+ if from >= MAX_NODES || to >= MAX_NODES { return 10; }
+ unsafe { SLIT_MATRIX[from][to] }
+}
+
+pub fn same_socket(node1: u8, node2: u8) -> bool { distance(node1, node2) <= 20 }
+
+pub fn init() {
+ let sdt = match find_sdt("SLIT").as_slice() {
+ [] => return,
+ [x] => *x,
+ xs => { println!("SLIT: {} tables found, expected 1", xs.len()); return; }
+ };
+ if &sdt.signature != b"SLIT" { return; }
+ let data_addr = sdt.data_address();
+ let data_len = sdt.data_len();
+ if data_len < 8 { return; }
+ let num_nodes = unsafe { *(data_addr as *const u64) } as usize;
+ if num_nodes == 0 || num_nodes > MAX_NODES { println!("SLIT: {num_nodes} nodes (max {MAX_NODES}), ignoring"); return; }
+ let matrix_start = 8;
+ let matrix_size = num_nodes * num_nodes;
+ if data_len < matrix_start + matrix_size { println!("SLIT: matrix truncated ({data_len} < {})", matrix_start + matrix_size); return; }
+ let matrix = unsafe { &mut SLIT_MATRIX };
+ for i in 0..num_nodes { for j in 0..num_nodes { matrix[i][j] = unsafe { *((data_addr + matrix_start + i * num_nodes + j) as *const u8) }; } }
+ unsafe { SLIT_NUM_NODES = num_nodes; SLIT_AVAILABLE = true; }
+ debug!("SLIT: {} nodes, distance matrix loaded", num_nodes);
+}
diff --git a/src/acpi/srat.rs b/src/acpi/srat.rs
--- /dev/null
+++ b/src/acpi/srat.rs
@@ -0,0 +1,102 @@
+//! SRAT (System Resource Affinity Table) parser.
+//!
+//! Parses CPU-to-NUMA-node and memory-to-NUMA-node affinity information.
+//! Called before MADT init so that NUMA data is available during AP startup.
+
+use super::sdt::Sdt;
+use crate::acpi::find_sdt;
+
+const MAX_CPU_ENTRIES: usize = 256;
+const MAX_MEM_ENTRIES: usize = 64;
+
+#[derive(Clone, Copy)]
+struct SratCpuEntry { apic_id: u32, node: u8, enabled: bool }
+
+#[derive(Clone, Copy)]
+struct SratMemEntry { node: u8, base: u64, length: u64, enabled: bool }
+
+const CPU_NONE: SratCpuEntry = SratCpuEntry { apic_id: u32::MAX, node: 0, enabled: false };
+const MEM_NONE: SratMemEntry = SratMemEntry { node: 0, base: 0, length: 0, enabled: false };
+
+static mut SRAT_CPU_ENTRIES: [SratCpuEntry; MAX_CPU_ENTRIES] = [CPU_NONE; MAX_CPU_ENTRIES];
+static mut SRAT_MEM_ENTRIES: [SratMemEntry; MAX_MEM_ENTRIES] = [MEM_NONE; MAX_MEM_ENTRIES];
+static mut SRAT_CPU_COUNT: usize = 0;
+static mut SRAT_MEM_COUNT: usize = 0;
+static mut SRAT_AVAILABLE: bool = false;
+
+pub fn is_available() -> bool { unsafe { SRAT_AVAILABLE } }
+
+pub fn numa_node_for_apic(apic_id: u32) -> Option<u8> {
+ if !unsafe { SRAT_AVAILABLE } { return None; }
+ let count = unsafe { SRAT_CPU_COUNT };
+ let entries = unsafe { &SRAT_CPU_ENTRIES };
+ for i in 0..count {
+ if entries[i].apic_id == apic_id && entries[i].enabled { return Some(entries[i].node); }
+ }
+ None
+}
+
+pub fn numa_node_count() -> usize {
+ if !unsafe { SRAT_AVAILABLE } { return 1; }
+ let mut max_node: u8 = 0;
+ let count = unsafe { SRAT_CPU_COUNT };
+ let entries = unsafe { &SRAT_CPU_ENTRIES };
+ for i in 0..count { if entries[i].enabled && entries[i].node > max_node { max_node = entries[i].node; } }
+ (max_node as usize) + 1
+}
+
+#[repr(C, packed)]
+struct SratLocalApic { _proximity_lo: u8, apic_id: u8, flags: u32, _local_sapic_eid: u8, _proximity_hi: [u8; 3], _clock_domain: u32 }
+
+#[repr(C, packed)]
+struct SratMemoryAffinity { proximity_domain: u32, _reserved1: u16, base_address_lo: u32, base_address_hi: u32, length_lo: u32, length_hi: u32, _reserved2: u32, flags: u32, _reserved3: u64 }
+
+#[repr(C, packed)]
+struct SratLocalX2Apic { _reserved: u16, proximity_domain: u32, x2apic_id: u32, flags: u32, _clock_domain: u32, _reserved2: u32 }
+
+pub fn init() {
+ let sdt = match find_sdt("SRAT").as_slice() {
+ [] => return,
+ [x] => *x,
+ xs => { println!("SRAT: {} tables found, expected 1", xs.len()); return; }
+ };
+ if &sdt.signature != b"SRAT" { return; }
+ let data_addr = sdt.data_address();
+ let data_len = sdt.data_len();
+ if data_len < 12 { println!("SRAT: table too short ({data_len} bytes)"); return; }
+ let mut offset: usize = 12;
+ let cpu_entries = unsafe { &mut SRAT_CPU_ENTRIES };
+ let mem_entries = unsafe { &mut SRAT_MEM_ENTRIES };
+ let mut cpu_count: usize = 0;
+ let mut mem_count: usize = 0;
+ while offset + 2 <= data_len {
+ let entry_type = unsafe { *((data_addr + offset) as *const u8) };
+ let entry_len = unsafe { *((data_addr + offset + 1) as *const u8) } as usize;
+ if entry_len < 2 || offset + entry_len > data_len { break; }
+ let entry_data = data_addr + offset + 2;
+ match entry_type {
+ 0x0 if entry_len >= size_of::<SratLocalApic>() + 2 => {
+ let e = unsafe { &*(entry_data as *const SratLocalApic) };
+ let enabled = (e.flags & 1) == 1;
+ let node = (e._proximity_lo as u32) | ((e._proximity_hi[0] as u32) << 8) | ((e._proximity_hi[1] as u32) << 16) | ((e._proximity_hi[2] as u32) << 24);
+ if cpu_count < MAX_CPU_ENTRIES { cpu_entries[cpu_count] = SratCpuEntry { apic_id: e.apic_id as u32, node: node as u8, enabled }; cpu_count += 1; }
+ }
+ 0x1 if entry_len >= size_of::<SratMemoryAffinity>() + 2 => {
+ let e = unsafe { &*(entry_data as *const SratMemoryAffinity) };
+ let enabled = (e.flags & 1) == 1;
+ let base = (e.base_address_hi as u64) << 32 | e.base_address_lo as u64;
+ let length = (e.length_hi as u64) << 32 | e.length_lo as u64;
+ if mem_count < MAX_MEM_ENTRIES { mem_entries[mem_count] = SratMemEntry { node: e.proximity_domain as u8, base, length, enabled }; mem_count += 1; }
+ }
+ 0x2 if entry_len >= size_of::<SratLocalX2Apic>() + 2 => {
+ let e = unsafe { &*(entry_data as *const SratLocalX2Apic) };
+ let enabled = (e.flags & 1) == 1;
+ if cpu_count < MAX_CPU_ENTRIES { cpu_entries[cpu_count] = SratCpuEntry { apic_id: e.x2apic_id, node: e.proximity_domain as u8, enabled }; cpu_count += 1; }
+ }
+ _ => {}
+ }
+ offset += entry_len;
+ }
+ unsafe { SRAT_CPU_COUNT = cpu_count; SRAT_MEM_COUNT = mem_count; SRAT_AVAILABLE = true; }
+ debug!("SRAT: {} CPU entries, {} memory entries", cpu_count, mem_count);
+}
diff --git a/src/numa.rs b/src/numa.rs
--- a/src/numa.rs
+++ b/src/numa.rs
@@ -1,13 +1,15 @@
/// NUMA topology hints for the kernel scheduler.
-/// NUMA discovery (SRAT/SLIT parsing) is performed by a userspace daemon
-/// (numad) via /scheme/acpi/, then pushed to the kernel via scheme:numa.
-/// The kernel stores a lightweight copy for O(1) scheduling lookups.
+///
+/// NUMA discovery (SRAT/SLIT parsing) is performed during kernel ACPI init
+/// (`acpi::init()`). The kernel stores a lightweight copy for O(1) scheduling
+/// lookups. If no SRAT is found, `init_default()` creates a single-node topology.
+use crate::acpi::srat;
use crate::cpu_set::{LogicalCpuId, LogicalCpuSet};
use core::sync::atomic::{AtomicBool, Ordering};
const MAX_NUMA_NODES: usize = 8;
-#[derive(Clone, Debug)]
+#[derive(Debug)]
pub struct NumaHint {
pub node_id: u8,
pub cpus: LogicalCpuSet,
@@ -21,17 +23,12 @@
impl NumaTopology {
pub const fn new() -> Self {
const NONE: Option<NumaHint> = None;
- Self {
- nodes: [NONE; MAX_NUMA_NODES],
- initialized: AtomicBool::new(false),
- }
+ Self { nodes: [NONE; MAX_NUMA_NODES], initialized: AtomicBool::new(false) }
}
pub fn node_for_cpu(&self, cpu: LogicalCpuId) -> Option<u8> {
for node in self.nodes.iter().flatten() {
- if node.cpus.contains(cpu) {
- return Some(node.node_id);
- }
+ if node.cpus.contains(cpu) { return Some(node.node_id); }
}
None
}
@@ -43,20 +40,42 @@
static mut NUMA_TOPOLOGY: NumaTopology = NumaTopology::new();
-pub fn topology() -> &'static NumaTopology {
- unsafe { &NUMA_TOPOLOGY }
+pub fn topology() -> &'static NumaTopology { unsafe { &NUMA_TOPOLOGY } }
+
+/// Initialize NUMA topology from SRAT data parsed during ACPI init.
+pub fn init_from_srat(apic_ids: &[(u32, LogicalCpuId)]) {
+ let topo = topology();
+ if topo.initialized.swap(true, Ordering::AcqRel) { return; }
+ if !srat::is_available() { init_default_inner(); return; }
+ unsafe {
+ let topo_mut = &mut *core::ptr::addr_of_mut!(NUMA_TOPOLOGY);
+ for &(apic_id, cpu_id) in apic_ids {
+ if let Some(node) = srat::numa_node_for_apic(apic_id) {
+ let idx = node as usize;
+ if idx < MAX_NUMA_NODES {
+ topo_mut.nodes[idx].get_or_insert_with(|| NumaHint { node_id: node, cpus: LogicalCpuSet::empty() }).cpus.atomic_set(cpu_id);
+ }
+ }
+ }
+ if topo_mut.nodes.iter().all(|n| n.is_none()) {
+ topo_mut.nodes[0] = Some(NumaHint { node_id: 0, cpus: LogicalCpuSet::all() });
+ }
+ }
+ let node_count = topology().nodes.iter().filter(|n| n.is_some()).count();
+ debug!("NUMA: {node_count} node(s) from SRAT");
}
+/// Fallback: single-node topology.
pub fn init_default() {
let topo = topology();
- if topo.initialized.swap(true, Ordering::AcqRel) {
- return;
- }
+ if topo.initialized.swap(true, Ordering::AcqRel) { return; }
+ init_default_inner();
+}
+
+fn init_default_inner() {
unsafe {
let topo_mut = &mut *core::ptr::addr_of_mut!(NUMA_TOPOLOGY);
- topo_mut.nodes[0] = Some(NumaHint {
- node_id: 0,
- cpus: LogicalCpuSet::all(),
- });
+ topo_mut.nodes[0] = Some(NumaHint { node_id: 0, cpus: LogicalCpuSet::all() });
}
+ debug!("NUMA: single-node topology (no SRAT)");
}
diff --git a/src/percpu.rs b/src/percpu.rs
--- a/src/percpu.rs
+++ b/src/percpu.rs
@@ -62,6 +62,10 @@ pub struct PercpuBlock {
/// from the spin loop. Default 39 (lowest priority).
pub current_prio: Cell<usize>,
+ /// NUMA proximity domain for this CPU. Set during ACPI init from SRAT.
+ /// `u8::MAX` means unknown (no SRAT or APIC ID not listed).
+ pub numa_node: Cell<u8>,
+
// TODO: Put mailbox queues here, e.g. for TLB shootdown? Just be sure to 128-byte align it
// first to avoid cache invalidation.
pub profiling: Option<&'static crate::profiling::RingBuffer>,
@@ -354,6 +358,7 @@ impl PercpuBlock {
tlb_flush_count: AtomicU32::new(0),
pi_donated_prio: AtomicU32::new(u32::MAX),
current_prio: Cell::new(39),
+ numa_node: Cell::new(u8::MAX),
ptrace_flags: Cell::new(PtraceFlags::empty()),
ptrace_session: RefCell::new(None),
inside_syscall: Cell::new(false),