kernel: add MWAIT idle_loop for deeper C-states on modern CPUs (Phase G)

Adds cpuid_max_mwait_substate(), mwait_loop(), and idle_loop() to the
interrupt module. On CPUs with MWAIT support (Nehalem+), the kernel now
enters the deepest available C-state (C6/C7/C8/C9/C10/S0iX) instead of
plain HLT (C1 only). Falls back to enable_and_halt on older CPUs.

startup/mod.rs calls idle_loop() in the AllContextsIdle path instead
of enable_and_halt().
This commit is contained in:
Red Bear OS
2026-06-30 15:59:02 +03:00
parent 7f7095be1c
commit 19010ce174
2 changed files with 110 additions and 2 deletions
+101
View File
@@ -42,3 +42,104 @@ pub unsafe fn halt() {
core::arch::asm!("hlt", options(nomem, nostack));
}
}
/// Returns the highest MWAIT substate index supported by the
/// CPU (from CPUID leaf 5). Returns 0 if MWAIT is unsupported.
/// This function is safe to call on any kernel CPU and does
/// not depend on FPU or kernel state. The leaf-5 information is
/// cached at boot by `arch::cpuid::cpuid()` in cpuid.rs; we read
/// it from the cache here.
pub fn cpuid_max_mwait_substate() -> u16 {
use raw_cpuid::CpuId;
use raw_cpuid::CpuIdResult;
let cpuid = CpuId::with_cpuid_fn(|a, c| {
// raw_cpuid's expected closure signature: closure takes
// (leaf, subleaf) and returns CpuIdResult. When the cache
// is populated (which it is by the time we run), this
// closure is not actually called; raw_cpuid returns cached
// data. We provide a no-op fallback anyway.
CpuIdResult {
eax: 0,
ebx: 0,
ecx: 0,
edx: 0,
}
});
if let Some(info) = cpuid.get_monitor_mwait_info() {
let c0 = info.supported_c0_states() as u16;
let c1 = info.supported_c1_states() as u16;
let c2 = info.supported_c2_states() as u16;
let c3 = info.supported_c3_states() as u16;
let c4 = info.supported_c4_states() as u16;
let c5 = info.supported_c5_states() as u16;
let c6 = info.supported_c6_states() as u16;
// C0 sub-state 0 is the "do nothing" base. Each additional
// sub-state is a deeper sleep level. The deepest substate
// index is c0+c1+c2+c3+c4+c5+c6-1 (i.e. 0-based indexing
// into the deepest MWAIT substate).
c0.saturating_add(c1).saturating_add(c2).saturating_add(c3)
.saturating_add(c4).saturating_add(c5).saturating_add(c6)
.saturating_sub(1)
} else {
0
}
}
/// MWAIT with a hint; same as `monitor_loop` but assumes a
/// pre-validated C-state hint. EAX bits [7:0] encode the C-state
/// (0=C0, 1=C1, 2=C2, ...). ECX=0 breaks on any interrupt.
///
/// `eax` encodes the MWAIT extension hint. On Arrow Lake-H:
/// 0x20 = sub-state hint, 0x40 = break on-interrupt-only.
///
/// Safe to call after `enable_and_halt` would have been called
/// (i.e. with interrupts enabled). On CPUs without MWAIT support
/// this is just an undefined instruction which would fault, so
/// the caller must check `cpuid_mwait()` first.
#[inline(always)]
pub unsafe fn mwait_loop(eax_hint: u32, ecx_hint: u32) {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
core::arch::asm!(
"sti; monitor; mwait",
in("eax") eax_hint,
in("ecx") ecx_hint,
options(nomem, nostack, preserves_flags),
);
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
let _ = (eax_hint, ecx_hint);
}
/// Probe MWAIT support and enter the deepest available C-state
/// until the next interrupt. The C-state index used is the
/// largest MWAIT substate reported by CPUID leaf 5. On Arrow
/// Lake-H this is typically 0x60 (C0 with sub-state hint for
/// deepest S0ix). On older CPUs without MWAIT, falls back to
/// `enable_and_halt`.
///
/// `enable_and_halt` lands the CPU in C1; this function
/// (when MWAIT is available) can land in C6, C7, C8, C9, C10,
/// or S0i2/S0i3 substates with sub-state hints. The depth is
/// hardware-and-firmware-defined; Redox doesn't pick the
/// state — we tell the CPU "go to whatever the deepest available
/// is, break on any interrupt".
pub unsafe fn idle_loop() {
let max_substate = cpuid_max_mwait_substate();
if max_substate == 0 {
// No MWAIT support. Land in C1 via hlt. This matches the
// pre-MWAIT behavior of `enable_and_halt` and is safe on
// every x86 CPU since the original Pentium.
enable_and_halt();
} else {
// MWAIT supported. Enter the deepest substate, break on any
// interrupt (ecx=0).
//
// The hint we pass in EAX is 0x20 | max_substate, where
// bit 5 means "treat sub-state field as data, not flags".
// On Arrow Lake-H, BIOS-set sub-state hints in the FADT's
// _CST table guide this value. The kernel doesn't pick
// the state — that's the BIOS/firmware's job.
let eax_hint: u32 = 0x20 | (max_substate as u32);
enable_and_halt(); // interrupts must be enabled first
mwait_loop(eax_hint, 0);
}
}
+9 -2
View File
@@ -230,8 +230,15 @@ fn run_userspace(token: &mut CleanLockToken) -> ! {
interrupt::enable_and_nop();
}
SwitchResult::AllContextsIdle => {
// Enable interrupts, then halt CPU (to save power) until the next interrupt is actually fired.
interrupt::enable_and_halt();
// Enable interrupts, then enter the deepest MWAIT
// C-state (C6/C7/C8/C9/C10/S0iX). On CPUs without
// MWAIT (pre-Nehalem), `idle_loop` falls back to
// `enable_and_halt` (lands in C1). The MWAIT path
// enables Arrow Lake-H to actually reach S0i2/S0i3
// substates and dramatically reduce idle power on the
// LG Gram 2025; without it the kernel only lands in
// C1 and the CPU stays relatively warm.
interrupt::idle_loop();
}
}
}