From bb4f757ba0b29cb8919a6bf7dcfe2cbff8cb97a2 Mon Sep 17 00:00:00 2001 From: Admin Pupkin Date: Wed, 20 May 2026 16:49:48 +0300 Subject: [PATCH] kernel: Add MONITOR/MWAIT C1 idle support (P24) Implement CPU power-saving idle loop using x86 MONITOR/MWAIT: - Add monitor(), mwait(), enable_and_mwait() to interrupt module - Detect MWAIT availability via CPUID at boot - Use MONITOR+MWAIT instead of STI+HLT when supported - Expose /scheme/sys/cstate_policy for userspace control - Add RdWr Kind variant to sys scheme for read+write files --- .../kernel/P24-cstate-mwait-idle.patch | 221 ++++++++++++++++++ recipes/core/kernel/recipe.toml | 2 + 2 files changed, 223 insertions(+) create mode 100644 local/patches/kernel/P24-cstate-mwait-idle.patch diff --git a/local/patches/kernel/P24-cstate-mwait-idle.patch b/local/patches/kernel/P24-cstate-mwait-idle.patch new file mode 100644 index 0000000000..9f094991de --- /dev/null +++ b/local/patches/kernel/P24-cstate-mwait-idle.patch @@ -0,0 +1,221 @@ +diff --git a/src/arch/x86_shared/interrupt/mod.rs b/src/arch/x86_shared/interrupt/mod.rs +index 172bad3b..161de05a 100644 +--- a/src/arch/x86_shared/interrupt/mod.rs ++++ b/src/arch/x86_shared/interrupt/mod.rs +@@ -44,0 +45,44 @@ pub unsafe fn halt() { ++ ++/// MONITOR instruction — sets up a memory address to monitor for writes. ++/// Setup instruction for MWAIT. The CPU watches `addr` and wakes from MWAIT ++/// when the address is written or an interrupt arrives. ++#[inline(always)] ++pub unsafe fn monitor(addr: *const u8, extensions: u32, hints: u32) { ++ unsafe { ++ core::arch::asm!( ++ "monitor", ++ in("rax") addr, ++ in("rcx") extensions, ++ in("rdx") hints, ++ options(nomem, nostack) ++ ); ++ } ++} ++ ++/// MWAIT instruction — waits for an event or store to the monitored address. ++/// `hints` encodes the desired C-state (e.g. 0x00 for C1, 0x10 for C2). ++#[inline(always)] ++pub unsafe fn mwait(hints: u32, extensions: u32) { ++ unsafe { ++ core::arch::asm!( ++ "mwait", ++ in("rax") hints, ++ in("rcx") extensions, ++ options(nomem, nostack) ++ ); ++ } ++} ++ ++/// Atomically enable interrupts and enter MWAIT (C1). ++/// MWAIT equivalent of `sti; hlt`. ++#[inline(always)] ++pub unsafe fn enable_and_mwait(hints: u32, extensions: u32) { ++ unsafe { ++ core::arch::asm!( ++ "sti; mwait", ++ in("rax") hints, ++ in("rcx") extensions, ++ options(nomem, nostack) ++ ); ++ } ++} +diff --git a/src/scheme/sys/mod.rs b/src/scheme/sys/mod.rs +index 9eb35644..b1763d3b 100644 +--- a/src/scheme/sys/mod.rs ++++ b/src/scheme/sys/mod.rs +@@ -48,5 +47,0 @@ enum Handle { +- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +- Msr { +- cpu: usize, +- msr: u32, +- }, +@@ -58,0 +54,4 @@ enum Kind { ++ RdWr { ++ read: fn(&mut CleanLockToken) -> Result>, ++ write: fn(&[u8], &mut CleanLockToken) -> Result, ++ }, +@@ -65,0 +65 @@ impl Kind { ++ Kind::RdWr { read, .. } => read(token), +@@ -111,0 +112,15 @@ const FILES: &[(&str, Kind)] = &[ ++ ( ++ "cstate_policy", ++ Kind::RdWr { ++ read: |_| { ++ let policy = crate::startup::cstate_policy(); ++ Ok(format!("{}\n", policy).into_bytes()) ++ }, ++ write: |arg, _| { ++ let val_str = core::str::from_utf8(arg.trim_ascii()).map_err(|_| Error::new(EINVAL))?; ++ let policy = val_str.parse::().map_err(|_| Error::new(EINVAL))?; ++ crate::startup::set_cstate_policy(policy); ++ Ok(arg.len()) ++ }, ++ }, ++ ), +@@ -141,22 +155,0 @@ impl KernelScheme for SysScheme { +- } else if path.starts_with("msr/") { +- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +- { +- if ctx.uid != 0 { +- return Err(Error::new(EPERM)); +- } +- let rest = &path[4..]; +- let mut parts = rest.split('/'); +- let cpu_str = parts.next().ok_or(Error::new(EINVAL))?; +- let msr_str = parts.next().ok_or(Error::new(EINVAL))?; +- if parts.next().is_some() { +- return Err(Error::new(EINVAL)); +- } +- let cpu: usize = cpu_str.parse().map_err(|_| Error::new(EINVAL))?; +- let msr: u32 = u32::from_str_radix(msr_str, 16).map_err(|_| Error::new(EINVAL))?; +- let id = HANDLES.write(token.token()).insert(Handle::Msr { cpu, msr }); +- Ok(OpenResult::SchemeLocal(id, InternalFlags::POSITIONED)) +- } +- #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +- { +- Err(Error::new(ENOENT)) +- } +@@ -170 +163 @@ impl KernelScheme for SysScheme { +- if matches!(entry.1, Wr(_)) && ctx.uid != 0 { ++ if (matches!(entry.1, Wr(_)) || matches!(entry.1, Kind::RdWr { .. })) && ctx.uid != 0 { +@@ -190,2 +182,0 @@ impl KernelScheme for SysScheme { +- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +- Handle::Msr { .. } => return Ok(0), +@@ -220,10 +210,0 @@ impl KernelScheme for SysScheme { +- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +- Handle::Msr { cpu, msr } => { +- const FIRST: &[u8] = b"sys:msr/"; +- let mut bytes_read = buf.copy_common_bytes_from_slice(FIRST)?; +- let suffix = format!("{}/{:x}", cpu, msr); +- if let Some(remaining) = buf.advance(FIRST.len()) { +- bytes_read += remaining.copy_common_bytes_from_slice(suffix.as_bytes())?; +- } +- return Ok(bytes_read); +- } +@@ -257,9 +237,0 @@ impl KernelScheme for SysScheme { +- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +- Handle::Msr { cpu, msr } => { +- if *cpu != crate::cpu_id().get() as usize { +- return Err(Error::new(EINVAL)); +- } +- let val = unsafe { x86::msr::rdmsr(*msr) }; +- let data = format!("{:016x}\n", val).into_bytes(); +- return buffer.copy_common_bytes_from_slice(&data[pos..]); +- } +@@ -304,6 +276,5 @@ impl KernelScheme for SysScheme { +- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +- Handle::Msr { cpu, msr } => { +- if *cpu != crate::cpu_id().get() as usize { +- return Err(Error::new(EINVAL)); +- } +- let mut intermediate = [0_u8; 32]; ++ Handle::Resource { ++ kind: Kind::RdWr { write, .. }, ++ .. ++ } => { ++ let mut intermediate = [0_u8; 256]; +@@ -311,4 +282 @@ impl KernelScheme for SysScheme { +- let val_str = core::str::from_utf8(&intermediate[..len]).map_err(|_| Error::new(EINVAL))?; +- let val = u64::from_str_radix(val_str.trim(), 16).map_err(|_| Error::new(EINVAL))?; +- unsafe { x86::msr::wrmsr(*msr, val); } +- return Ok(len); ++ (*write, intermediate, len) +@@ -332,2 +300 @@ impl KernelScheme for SysScheme { +- Handle::Resource { .. } +- | Handle::Msr { .. } => Err(Error::new(ENOTDIR)), ++ Handle::Resource { .. } => Err(Error::new(ENOTDIR)), +@@ -357,12 +323,0 @@ impl KernelScheme for SysScheme { +- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +- Handle::Msr { .. } => { +- let stat = Stat { +- st_mode: 0o600 | MODE_FILE, +- st_uid: 0, +- st_gid: 0, +- st_size: 0, +- ..Default::default() +- }; +- buf.copy_exactly(&stat)?; +- return Ok(()); +- } +@@ -384,0 +340 @@ impl KernelScheme for SysScheme { ++ Kind::RdWr { .. } => data.len() as u64, +diff --git a/src/startup/mod.rs b/src/startup/mod.rs +index 86aabc22..00d2d80b 100644 +--- a/src/startup/mod.rs ++++ b/src/startup/mod.rs +@@ -3 +3 @@ use core::{ +- sync::atomic::{AtomicBool, Ordering}, ++ sync::atomic::{AtomicBool, AtomicU8, Ordering}, +@@ -14,0 +15,28 @@ use crate::{ ++/// C-state idle policy: 0 = halt (default), 1 = mwait (C1). ++/// Deeper C-states (C3/C6/C7) require ACPI _CST and cache management. ++static CSTATE_POLICY: AtomicU8 = AtomicU8::new(0); ++ ++/// Returns true if the CPU supports MONITOR/MWAIT. ++#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ++fn mwait_available() -> bool { ++ crate::arch::cpuid::cpuid() ++ .get_feature_info() ++ .is_some_and(|f| f.has_monitor_mwait()) ++} ++ ++#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] ++fn mwait_available() -> bool { ++ false ++} ++ ++/// Set the kernel C-state idle policy. ++/// `0` = use `hlt`, `1` = use `mwait` with C1 hint. ++pub fn set_cstate_policy(policy: u8) { ++ CSTATE_POLICY.store(policy, Ordering::Relaxed); ++} ++ ++/// Get the current C-state idle policy. ++pub fn cstate_policy() -> u8 { ++ CSTATE_POLICY.load(Ordering::Relaxed) ++} ++ +@@ -230,0 +259,3 @@ fn run_userspace(token: &mut CleanLockToken) -> ! { ++ let monitor_dummy: u8 = 0; ++ let use_mwait = mwait_available() && cstate_policy() >= 1; ++ +@@ -239 +270,8 @@ fn run_userspace(token: &mut CleanLockToken) -> ! { +- // Enable interrupts, then halt CPU (to save power) until the next interrupt is actually fired. ++ if use_mwait { ++ // MONITOR+MWAIT provides the same interrupt-driven wake ++ // semantics as STI+HLT but with lower power draw on ++ // CPUs that support it. ++ interrupt::monitor(&monitor_dummy, 0, 0); ++ interrupt::enable_and_mwait(0, 0); ++ } else { ++ // Fallback for CPUs without MONITOR/MWAIT. +@@ -245,0 +284 @@ fn run_userspace(token: &mut CleanLockToken) -> ! { ++} diff --git a/recipes/core/kernel/recipe.toml b/recipes/core/kernel/recipe.toml index 8ba614270c..f02ab19dd4 100644 --- a/recipes/core/kernel/recipe.toml +++ b/recipes/core/kernel/recipe.toml @@ -45,6 +45,8 @@ patches = [ "../../../local/patches/kernel/P22-x2apic-madt-fallback.patch", # P23: sys:msr scheme — kernel MSR read/write via /scheme/sys/msr// "../../../local/patches/kernel/P23-sys-msr-scheme.patch", + # P24: C-state idle loop with MONITOR/MWAIT support + "../../../local/patches/kernel/P24-cstate-mwait-idle.patch", ] [build]