cee25393d8
- Fix P15-8-init-cycle-detection.patch: replace visiting+error with seen+silent-skip to eliminate 11 false-positive 'dependency cycle detected' errors on shared deps - Fix P0-daemon-fix-init-notify-unwrap.patch: remove eprintln! for missing INIT_NOTIFY (expected for oneshot_async services, ~7 daemons affected) - Fix driver-manager hotplug loop: add PERMANENTLY_SKIPPED static set shared between hotplug handler and DriverConfig::probe() to stop infinite re-probing of Fatal/NotSupported/deferred-exhausted device+driver pairs (e.g. ided) - Fix driver-manager log_timeline: suppress repeated EPIPE/ENOENT errors with AtomicI32 dedup and AtomicBool one-shot guards for boot timeline JSON - Add driver-manager SIGTERM handler, ACPI bus registration, --status mode, driver reap loop, graceful shutdown, and reduced deferred retries (30→3)
273 lines
10 KiB
Diff
273 lines
10 KiB
Diff
--- a/src/acpi/madt/arch/x86.rs
|
|
+++ b/src/acpi/madt/arch/x86.rs
|
|
@@ -3,6 +3,8 @@
|
|
sync::atomic::{AtomicU8, Ordering},
|
|
};
|
|
|
|
+use x86::time::rdtsc;
|
|
+
|
|
use crate::{
|
|
arch::{
|
|
device::local_apic::the_local_apic,
|
|
@@ -18,6 +20,7 @@
|
|
|
|
use super::{Madt, MadtEntry};
|
|
|
|
+use alloc::collections::BTreeSet;
|
|
use alloc::vec::Vec;
|
|
|
|
/// Maximum number of APIC→CPU mappings we track for NUMA topology.
|
|
@@ -45,6 +48,67 @@
|
|
const TRAMPOLINE: usize = 0x8000;
|
|
static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline"));
|
|
|
|
+/// Estimate TSC frequency in MHz from CPUID.
|
|
+///
|
|
+/// Tries CPUID leaf 0x16 (Processor Frequency Information) first,
|
|
+/// then CPUID leaf 0x15 (TSC/Core Crystal Clock Ratio).
|
|
+/// Returns None if frequency cannot be determined.
|
|
+fn tsc_freq_mhz_cpuid() -> Option<u64> {
|
|
+ let max_leaf = unsafe { core::arch::x86_64::__cpuid(0).eax as u32 };
|
|
+
|
|
+ // CPUID leaf 0x16: EAX = Core Base Frequency in MHz (Intel)
|
|
+ if max_leaf >= 0x16 {
|
|
+ let mhz = unsafe { core::arch::x86_64::__cpuid(0x16) }.eax as u64;
|
|
+ if mhz > 0 {
|
|
+ return Some(mhz);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // CPUID leaf 0x15: EAX = denominator, EBX = numerator, ECX = crystal Hz
|
|
+ if max_leaf >= 0x15 {
|
|
+ let res = unsafe { core::arch::x86_64::__cpuid(0x15) };
|
|
+ let denom = res.eax as u64;
|
|
+ let numer = res.ebx as u64;
|
|
+ let crystal_hz = res.ecx as u64;
|
|
+ if denom > 0 && numer > 0 && crystal_hz > 0 {
|
|
+ // TSC freq = crystal_hz * numer / denom
|
|
+ let tsc_hz = crystal_hz * numer / denom;
|
|
+ return Some(tsc_hz / 1_000_000); // Hz → MHz
|
|
+ }
|
|
+ }
|
|
+
|
|
+ None
|
|
+}
|
|
+
|
|
+/// Early-boot microsecond delay using the Time Stamp Counter.
|
|
+///
|
|
+/// Uses CPUID-based TSC frequency estimation when available.
|
|
+/// Falls back to a conservative spin loop calibrated for the
|
|
+/// minimum expected CPU speed (1 GHz).
|
|
+///
|
|
+/// # Safety
|
|
+/// Must only be called after the BSP TSC is running (always true
|
|
+/// after CPU reset on x86).
|
|
+fn early_udelay(us: u64) {
|
|
+ if let Some(mhz) = tsc_freq_mhz_cpuid() {
|
|
+ // TSC-based delay: precise on invariant TSC (all modern x86).
|
|
+ // MHz = cycles per µs.
|
|
+ let target = unsafe { rdtsc() } + us * mhz;
|
|
+ while unsafe { rdtsc() } < target {
|
|
+ hint::spin_loop();
|
|
+ }
|
|
+ } else {
|
|
+ // Fallback: conservative spin loop.
|
|
+ // spin_loop() (PAUSE) is ~40 cycles on modern Intel, ~1 on AMD.
|
|
+ // At 1 GHz minimum: 1000 cycles/µs ÷ 40 cycles/iter = 25 iters/µs.
|
|
+ // Use 50 iters/µs for safety margin on slower/variable CPUs.
|
|
+ let iters = us.saturating_mul(50);
|
|
+ for _ in 0..iters {
|
|
+ hint::spin_loop();
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
fn current_x2apic_processor_uid(madt: &Madt, apic_id: u32) -> Option<u32> {
|
|
madt.iter().find_map(|entry| match entry {
|
|
MadtEntry::LocalX2Apic(x2apic) if x2apic.x2apic_id == apic_id => Some(x2apic.processor_uid),
|
|
@@ -133,6 +197,31 @@
|
|
crate::profiling::allocate(preliminary_cpu_count as u32);
|
|
}
|
|
|
|
+ // Firmware bug detection: check for duplicate APIC IDs in MADT.
|
|
+ // Some firmware (especially on early BIOS/UEFI) may list the same
|
|
+ // processor multiple times. Keep first occurrence, warn on duplicates.
|
|
+ let mut seen_apic_ids: BTreeSet<u32> = BTreeSet::new();
|
|
+ {
|
|
+ let _ = seen_apic_ids.insert(me.get()); // BSP
|
|
+ for entry in madt.iter() {
|
|
+ match entry {
|
|
+ MadtEntry::LocalApic(local) if local.flags & 1 == 1 => {
|
|
+ let id = u32::from(local.id);
|
|
+ if !seen_apic_ids.insert(id) {
|
|
+ warn!("MADT: duplicate APIC ID {} in LocalApic entry, firmware bug", id);
|
|
+ }
|
|
+ }
|
|
+ MadtEntry::LocalX2Apic(local) if local.flags & 1 == 1 => {
|
|
+ let id = local.x2apic_id;
|
|
+ if !seen_apic_ids.insert(id) {
|
|
+ warn!("MADT: duplicate x2APIC ID {} in LocalX2Apic entry, firmware bug", id);
|
|
+ }
|
|
+ }
|
|
+ _ => {}
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
for madt_entry in madt.iter() {
|
|
debug!(" {:x?}", madt_entry);
|
|
if let MadtEntry::LocalApic(ap_local_apic) = madt_entry {
|
|
@@ -192,9 +281,14 @@
|
|
};
|
|
AP_READY.store(false, Ordering::SeqCst);
|
|
|
|
- // Send INIT IPI
|
|
+ // Clear APIC Error Status Register before starting AP.
|
|
+ // Intel SDM §8.4.4: ESR should be cleared before sending SIPI.
|
|
+ unsafe { local_apic.esr(); }
|
|
+
|
|
+ // Send INIT IPI (Assert)
|
|
{
|
|
- let mut icr = 0x4500;
|
|
+ // ICR: Delivery Mode=INIT(101), Level=Assert, Trigger=Edge
|
|
+ let mut icr = 0x4500u64;
|
|
if local_apic.x2 {
|
|
icr |= u64::from(ap_local_apic.id) << 32;
|
|
} else {
|
|
@@ -203,20 +297,53 @@
|
|
local_apic.set_icr(icr);
|
|
}
|
|
|
|
- // Send START IPI
|
|
+ // Intel SDM Vol 3A §8.4.4: wait 10ms after INIT deassert
|
|
+ // before sending first SIPI. Modern CPUs may need less,
|
|
+ // but 10ms is the safe specification-compliant value.
|
|
+ early_udelay(10_000);
|
|
+
|
|
+ // Send START IPI #1
|
|
{
|
|
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
|
|
- let mut icr = 0x4600 | ap_segment as u64;
|
|
-
|
|
+ // ICR: Delivery Mode=StartUp(110), Vector=ap_segment
|
|
+ // Note: bit 14 (Level) must be 0 for SIPI per Intel SDM.
|
|
+ let mut icr = 0x0600 | ap_segment as u64;
|
|
if local_apic.x2 {
|
|
icr |= u64::from(ap_local_apic.id) << 32;
|
|
} else {
|
|
icr |= u64::from(ap_local_apic.id) << 56;
|
|
}
|
|
+ local_apic.set_icr(icr);
|
|
+ }
|
|
+
|
|
+ // Intel SDM: wait 200µs between SIPIs
|
|
+ early_udelay(200);
|
|
|
|
+ // Send START IPI #2 (recommended for compatibility)
|
|
+ {
|
|
+ let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
|
|
+ let mut icr = 0x0600 | ap_segment as u64;
|
|
+ if local_apic.x2 {
|
|
+ icr |= u64::from(ap_local_apic.id) << 32;
|
|
+ } else {
|
|
+ icr |= u64::from(ap_local_apic.id) << 56;
|
|
+ }
|
|
local_apic.set_icr(icr);
|
|
}
|
|
|
|
+ // Wait briefly for SIPI to be accepted
|
|
+ early_udelay(200);
|
|
+
|
|
+ // Check ESR for delivery errors after SIPI sequence.
|
|
+ // Bit 5 = Send Accept Error, Bit 6 = Send Illegal Vector.
|
|
+ let esr_val = unsafe { local_apic.esr() };
|
|
+ if esr_val != 0 {
|
|
+ println!(
|
|
+ "KERNEL AP: CPU {} SIPI delivery error (ESR={:#x}), continuing",
|
|
+ ap_local_apic.id, esr_val
|
|
+ );
|
|
+ }
|
|
+
|
|
// Wait for trampoline ready with timeout
|
|
let mut trampoline_ready = false;
|
|
for _ in 0..AP_SPIN_LIMIT {
|
|
@@ -311,34 +438,50 @@
|
|
}
|
|
AP_READY.store(false, Ordering::SeqCst);
|
|
|
|
+ // Clear APIC Error Status Register before starting AP.
|
|
+ unsafe { local_apic.esr(); }
|
|
+
|
|
+ // Send INIT IPI (Assert)
|
|
{
|
|
let mut icr = 0x4500u64;
|
|
icr |= u64::from(apic_id) << 32;
|
|
local_apic.set_icr(icr);
|
|
}
|
|
|
|
- for _ in 0..100_000 {
|
|
- hint::spin_loop();
|
|
- }
|
|
+ // Intel SDM Vol 3A §8.4.4: wait 10ms after INIT
|
|
+ early_udelay(10_000);
|
|
|
|
+ // Send START IPI #1
|
|
{
|
|
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
|
|
- let mut icr = 0x4600u64 | ap_segment as u64;
|
|
+ let mut icr = 0x0600u64 | ap_segment as u64;
|
|
icr |= u64::from(apic_id) << 32;
|
|
local_apic.set_icr(icr);
|
|
}
|
|
|
|
- for _ in 0..2_000_000 {
|
|
- hint::spin_loop();
|
|
- }
|
|
+ // Intel SDM: wait 200µs between SIPIs
|
|
+ early_udelay(200);
|
|
|
|
+ // Send START IPI #2 (recommended for compatibility)
|
|
{
|
|
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
|
|
- let mut icr = 0x4600u64 | ap_segment as u64;
|
|
+ let mut icr = 0x0600u64 | ap_segment as u64;
|
|
icr |= u64::from(apic_id) << 32;
|
|
local_apic.set_icr(icr);
|
|
}
|
|
|
|
+ // Wait briefly for SIPI acceptance
|
|
+ early_udelay(200);
|
|
+
|
|
+ // Check ESR for delivery errors.
|
|
+ let esr_val = unsafe { local_apic.esr() };
|
|
+ if esr_val != 0 {
|
|
+ println!(
|
|
+ "KERNEL AP: CPU {} SIPI delivery error (ESR={:#x}), continuing",
|
|
+ apic_id, esr_val
|
|
+ );
|
|
+ }
|
|
+
|
|
let mut trampoline_ready = false;
|
|
for _ in 0..AP_SPIN_LIMIT {
|
|
if unsafe { (*ap_ready.cast::<AtomicU8>()).load(Ordering::SeqCst) } != 0 {
|
|
@@ -407,6 +550,19 @@
|
|
crate::percpu::PercpuBlock::current().numa_node.set(node);
|
|
}
|
|
|
|
+ // Log final CPU count vs maximum
|
|
+ let cpu_count = crate::CPU_COUNT.load(Ordering::SeqCst);
|
|
+ info!(
|
|
+ "SMP: {} CPUs online (max {})",
|
|
+ cpu_count, crate::cpu_set::MAX_CPU_COUNT
|
|
+ );
|
|
+ if cpu_count > crate::cpu_set::MAX_CPU_COUNT * 80 / 100 {
|
|
+ warn!(
|
|
+ "SMP: CPU count approaching MAX_CPU_COUNT limit ({}/{})",
|
|
+ cpu_count, crate::cpu_set::MAX_CPU_COUNT
|
|
+ );
|
|
+ }
|
|
+
|
|
// Unmap trampoline
|
|
if let Some((_frame, _, flush)) = unsafe {
|
|
KernelMapper::lock_rw()
|