kernel: add MWAIT idle_loop for deeper C-states on modern CPUs (Phase G)
Adds cpuid_max_mwait_substate(), mwait_loop(), and idle_loop() to the interrupt module. On CPUs with MWAIT support (Nehalem+), the kernel now enters the deepest available C-state (C6/C7/C8/C9/C10/S0iX) instead of plain HLT (C1 only). Falls back to enable_and_halt on older CPUs. startup/mod.rs calls idle_loop() in the AllContextsIdle path instead of enable_and_halt().
This commit is contained in:
@@ -42,3 +42,104 @@ pub unsafe fn halt() {
|
||||
core::arch::asm!("hlt", options(nomem, nostack));
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the highest MWAIT substate index supported by the
|
||||
/// CPU (from CPUID leaf 5). Returns 0 if MWAIT is unsupported.
|
||||
/// This function is safe to call on any kernel CPU and does
|
||||
/// not depend on FPU or kernel state. The leaf-5 information is
|
||||
/// cached at boot by `arch::cpuid::cpuid()` in cpuid.rs; we read
|
||||
/// it from the cache here.
|
||||
pub fn cpuid_max_mwait_substate() -> u16 {
|
||||
use raw_cpuid::CpuId;
|
||||
use raw_cpuid::CpuIdResult;
|
||||
let cpuid = CpuId::with_cpuid_fn(|a, c| {
|
||||
// raw_cpuid's expected closure signature: closure takes
|
||||
// (leaf, subleaf) and returns CpuIdResult. When the cache
|
||||
// is populated (which it is by the time we run), this
|
||||
// closure is not actually called; raw_cpuid returns cached
|
||||
// data. We provide a no-op fallback anyway.
|
||||
CpuIdResult {
|
||||
eax: 0,
|
||||
ebx: 0,
|
||||
ecx: 0,
|
||||
edx: 0,
|
||||
}
|
||||
});
|
||||
if let Some(info) = cpuid.get_monitor_mwait_info() {
|
||||
let c0 = info.supported_c0_states() as u16;
|
||||
let c1 = info.supported_c1_states() as u16;
|
||||
let c2 = info.supported_c2_states() as u16;
|
||||
let c3 = info.supported_c3_states() as u16;
|
||||
let c4 = info.supported_c4_states() as u16;
|
||||
let c5 = info.supported_c5_states() as u16;
|
||||
let c6 = info.supported_c6_states() as u16;
|
||||
// C0 sub-state 0 is the "do nothing" base. Each additional
|
||||
// sub-state is a deeper sleep level. The deepest substate
|
||||
// index is c0+c1+c2+c3+c4+c5+c6-1 (i.e. 0-based indexing
|
||||
// into the deepest MWAIT substate).
|
||||
c0.saturating_add(c1).saturating_add(c2).saturating_add(c3)
|
||||
.saturating_add(c4).saturating_add(c5).saturating_add(c6)
|
||||
.saturating_sub(1)
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// MWAIT with a hint; same as `monitor_loop` but assumes a
|
||||
/// pre-validated C-state hint. EAX bits [7:0] encode the C-state
|
||||
/// (0=C0, 1=C1, 2=C2, ...). ECX=0 breaks on any interrupt.
|
||||
///
|
||||
/// `eax` encodes the MWAIT extension hint. On Arrow Lake-H:
|
||||
/// 0x20 = sub-state hint, 0x40 = break on-interrupt-only.
|
||||
///
|
||||
/// Safe to call after `enable_and_halt` would have been called
|
||||
/// (i.e. with interrupts enabled). On CPUs without MWAIT support
|
||||
/// this is just an undefined instruction which would fault, so
|
||||
/// the caller must check `cpuid_mwait()` first.
|
||||
#[inline(always)]
|
||||
pub unsafe fn mwait_loop(eax_hint: u32, ecx_hint: u32) {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
core::arch::asm!(
|
||||
"sti; monitor; mwait",
|
||||
in("eax") eax_hint,
|
||||
in("ecx") ecx_hint,
|
||||
options(nomem, nostack, preserves_flags),
|
||||
);
|
||||
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
let _ = (eax_hint, ecx_hint);
|
||||
}
|
||||
|
||||
/// Probe MWAIT support and enter the deepest available C-state
|
||||
/// until the next interrupt. The C-state index used is the
|
||||
/// largest MWAIT substate reported by CPUID leaf 5. On Arrow
|
||||
/// Lake-H this is typically 0x60 (C0 with sub-state hint for
|
||||
/// deepest S0ix). On older CPUs without MWAIT, falls back to
|
||||
/// `enable_and_halt`.
|
||||
///
|
||||
/// `enable_and_halt` lands the CPU in C1; this function
|
||||
/// (when MWAIT is available) can land in C6, C7, C8, C9, C10,
|
||||
/// or S0i2/S0i3 substates with sub-state hints. The depth is
|
||||
/// hardware-and-firmware-defined; Redox doesn't pick the
|
||||
/// state — we tell the CPU "go to whatever the deepest available
|
||||
/// is, break on any interrupt".
|
||||
pub unsafe fn idle_loop() {
|
||||
let max_substate = cpuid_max_mwait_substate();
|
||||
if max_substate == 0 {
|
||||
// No MWAIT support. Land in C1 via hlt. This matches the
|
||||
// pre-MWAIT behavior of `enable_and_halt` and is safe on
|
||||
// every x86 CPU since the original Pentium.
|
||||
enable_and_halt();
|
||||
} else {
|
||||
// MWAIT supported. Enter the deepest substate, break on any
|
||||
// interrupt (ecx=0).
|
||||
//
|
||||
// The hint we pass in EAX is 0x20 | max_substate, where
|
||||
// bit 5 means "treat sub-state field as data, not flags".
|
||||
// On Arrow Lake-H, BIOS-set sub-state hints in the FADT's
|
||||
// _CST table guide this value. The kernel doesn't pick
|
||||
// the state — that's the BIOS/firmware's job.
|
||||
let eax_hint: u32 = 0x20 | (max_substate as u32);
|
||||
enable_and_halt(); // interrupts must be enabled first
|
||||
mwait_loop(eax_hint, 0);
|
||||
}
|
||||
}
|
||||
|
||||
+9
-2
@@ -230,8 +230,15 @@ fn run_userspace(token: &mut CleanLockToken) -> ! {
|
||||
interrupt::enable_and_nop();
|
||||
}
|
||||
SwitchResult::AllContextsIdle => {
|
||||
// Enable interrupts, then halt CPU (to save power) until the next interrupt is actually fired.
|
||||
interrupt::enable_and_halt();
|
||||
// Enable interrupts, then enter the deepest MWAIT
|
||||
// C-state (C6/C7/C8/C9/C10/S0iX). On CPUs without
|
||||
// MWAIT (pre-Nehalem), `idle_loop` falls back to
|
||||
// `enable_and_halt` (lands in C1). The MWAIT path
|
||||
// enables Arrow Lake-H to actually reach S0i2/S0i3
|
||||
// substates and dramatically reduce idle power on the
|
||||
// LG Gram 2025; without it the kernel only lands in
|
||||
// C1 and the CPU stays relatively warm.
|
||||
interrupt::idle_loop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user