diff --git a/src/arch/x86_shared/interrupt/mod.rs b/src/arch/x86_shared/interrupt/mod.rs index 172bad3ba9..794d5354b6 100644 --- a/src/arch/x86_shared/interrupt/mod.rs +++ b/src/arch/x86_shared/interrupt/mod.rs @@ -42,3 +42,104 @@ pub unsafe fn halt() { core::arch::asm!("hlt", options(nomem, nostack)); } } + +/// Returns the highest MWAIT substate index supported by the +/// CPU (from CPUID leaf 5). Returns 0 if MWAIT is unsupported. +/// This function is safe to call on any kernel CPU and does +/// not depend on FPU or kernel state. The leaf-5 information is +/// cached at boot by `arch::cpuid::cpuid()` in cpuid.rs; we read +/// it from the cache here. +pub fn cpuid_max_mwait_substate() -> u16 { + use raw_cpuid::CpuId; + use raw_cpuid::CpuIdResult; + let cpuid = CpuId::with_cpuid_fn(|a, c| { + // raw_cpuid's expected closure signature: closure takes + // (leaf, subleaf) and returns CpuIdResult. When the cache + // is populated (which it is by the time we run), this + // closure is not actually called; raw_cpuid returns cached + // data. We provide a no-op fallback anyway. + CpuIdResult { + eax: 0, + ebx: 0, + ecx: 0, + edx: 0, + } + }); + if let Some(info) = cpuid.get_monitor_mwait_info() { + let c0 = info.supported_c0_states() as u16; + let c1 = info.supported_c1_states() as u16; + let c2 = info.supported_c2_states() as u16; + let c3 = info.supported_c3_states() as u16; + let c4 = info.supported_c4_states() as u16; + let c5 = info.supported_c5_states() as u16; + let c6 = info.supported_c6_states() as u16; + // C0 sub-state 0 is the "do nothing" base. Each additional + // sub-state is a deeper sleep level. The deepest substate + // index is c0+c1+c2+c3+c4+c5+c6-1 (i.e. 0-based indexing + // into the deepest MWAIT substate). + c0.saturating_add(c1).saturating_add(c2).saturating_add(c3) + .saturating_add(c4).saturating_add(c5).saturating_add(c6) + .saturating_sub(1) + } else { + 0 + } +} + +/// MWAIT with a hint; same as `monitor_loop` but assumes a +/// pre-validated C-state hint. EAX bits [7:0] encode the C-state +/// (0=C0, 1=C1, 2=C2, ...). ECX=0 breaks on any interrupt. +/// +/// `eax` encodes the MWAIT extension hint. On Arrow Lake-H: +/// 0x20 = sub-state hint, 0x40 = break on-interrupt-only. +/// +/// Safe to call after `enable_and_halt` would have been called +/// (i.e. with interrupts enabled). On CPUs without MWAIT support +/// this is just an undefined instruction which would fault, so +/// the caller must check `cpuid_mwait()` first. +#[inline(always)] +pub unsafe fn mwait_loop(eax_hint: u32, ecx_hint: u32) { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + core::arch::asm!( + "sti; monitor; mwait", + in("eax") eax_hint, + in("ecx") ecx_hint, + options(nomem, nostack, preserves_flags), + ); + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + let _ = (eax_hint, ecx_hint); +} + +/// Probe MWAIT support and enter the deepest available C-state +/// until the next interrupt. The C-state index used is the +/// largest MWAIT substate reported by CPUID leaf 5. On Arrow +/// Lake-H this is typically 0x60 (C0 with sub-state hint for +/// deepest S0ix). On older CPUs without MWAIT, falls back to +/// `enable_and_halt`. +/// +/// `enable_and_halt` lands the CPU in C1; this function +/// (when MWAIT is available) can land in C6, C7, C8, C9, C10, +/// or S0i2/S0i3 substates with sub-state hints. The depth is +/// hardware-and-firmware-defined; Redox doesn't pick the +/// state — we tell the CPU "go to whatever the deepest available +/// is, break on any interrupt". +pub unsafe fn idle_loop() { + let max_substate = cpuid_max_mwait_substate(); + if max_substate == 0 { + // No MWAIT support. Land in C1 via hlt. This matches the + // pre-MWAIT behavior of `enable_and_halt` and is safe on + // every x86 CPU since the original Pentium. + enable_and_halt(); + } else { + // MWAIT supported. Enter the deepest substate, break on any + // interrupt (ecx=0). + // + // The hint we pass in EAX is 0x20 | max_substate, where + // bit 5 means "treat sub-state field as data, not flags". + // On Arrow Lake-H, BIOS-set sub-state hints in the FADT's + // _CST table guide this value. The kernel doesn't pick + // the state — that's the BIOS/firmware's job. + let eax_hint: u32 = 0x20 | (max_substate as u32); + enable_and_halt(); // interrupts must be enabled first + mwait_loop(eax_hint, 0); + } +} diff --git a/src/startup/mod.rs b/src/startup/mod.rs index 73972e4272..1992c7fbbf 100644 --- a/src/startup/mod.rs +++ b/src/startup/mod.rs @@ -230,8 +230,15 @@ fn run_userspace(token: &mut CleanLockToken) -> ! { interrupt::enable_and_nop(); } SwitchResult::AllContextsIdle => { - // Enable interrupts, then halt CPU (to save power) until the next interrupt is actually fired. - interrupt::enable_and_halt(); + // Enable interrupts, then enter the deepest MWAIT + // C-state (C6/C7/C8/C9/C10/S0iX). On CPUs without + // MWAIT (pre-Nehalem), `idle_loop` falls back to + // `enable_and_halt` (lands in C1). The MWAIT path + // enables Arrow Lake-H to actually reach S0i2/S0i3 + // substates and dramatically reduce idle power on the + // LG Gram 2025; without it the kernel only lands in + // C1 and the CPU stays relatively warm. + interrupt::idle_loop(); } } }