kernel: Add MONITOR/MWAIT C1 idle support (P24)

Implement CPU power-saving idle loop using x86 MONITOR/MWAIT:
- Add monitor(), mwait(), enable_and_mwait() to interrupt module
- Detect MWAIT availability via CPUID at boot
- Use MONITOR+MWAIT instead of STI+HLT when supported
- Expose /scheme/sys/cstate_policy for userspace control
- Add RdWr Kind variant to sys scheme for read+write files
This commit is contained in:
2026-05-20 16:49:48 +03:00
parent 4fe734d1c2
commit bb4f757ba0
2 changed files with 223 additions and 0 deletions
@@ -0,0 +1,221 @@
diff --git a/src/arch/x86_shared/interrupt/mod.rs b/src/arch/x86_shared/interrupt/mod.rs
index 172bad3b..161de05a 100644
--- a/src/arch/x86_shared/interrupt/mod.rs
+++ b/src/arch/x86_shared/interrupt/mod.rs
@@ -44,0 +45,44 @@ pub unsafe fn halt() {
+
+/// MONITOR instruction — sets up a memory address to monitor for writes.
+/// Setup instruction for MWAIT. The CPU watches `addr` and wakes from MWAIT
+/// when the address is written or an interrupt arrives.
+#[inline(always)]
+pub unsafe fn monitor(addr: *const u8, extensions: u32, hints: u32) {
+ unsafe {
+ core::arch::asm!(
+ "monitor",
+ in("rax") addr,
+ in("rcx") extensions,
+ in("rdx") hints,
+ options(nomem, nostack)
+ );
+ }
+}
+
+/// MWAIT instruction — waits for an event or store to the monitored address.
+/// `hints` encodes the desired C-state (e.g. 0x00 for C1, 0x10 for C2).
+#[inline(always)]
+pub unsafe fn mwait(hints: u32, extensions: u32) {
+ unsafe {
+ core::arch::asm!(
+ "mwait",
+ in("rax") hints,
+ in("rcx") extensions,
+ options(nomem, nostack)
+ );
+ }
+}
+
+/// Atomically enable interrupts and enter MWAIT (C1).
+/// MWAIT equivalent of `sti; hlt`.
+#[inline(always)]
+pub unsafe fn enable_and_mwait(hints: u32, extensions: u32) {
+ unsafe {
+ core::arch::asm!(
+ "sti; mwait",
+ in("rax") hints,
+ in("rcx") extensions,
+ options(nomem, nostack)
+ );
+ }
+}
diff --git a/src/scheme/sys/mod.rs b/src/scheme/sys/mod.rs
index 9eb35644..b1763d3b 100644
--- a/src/scheme/sys/mod.rs
+++ b/src/scheme/sys/mod.rs
@@ -48,5 +47,0 @@ enum Handle {
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
- Msr {
- cpu: usize,
- msr: u32,
- },
@@ -58,0 +54,4 @@ enum Kind {
+ RdWr {
+ read: fn(&mut CleanLockToken) -> Result<Vec<u8>>,
+ write: fn(&[u8], &mut CleanLockToken) -> Result<usize>,
+ },
@@ -65,0 +65 @@ impl Kind {
+ Kind::RdWr { read, .. } => read(token),
@@ -111,0 +112,15 @@ const FILES: &[(&str, Kind)] = &[
+ (
+ "cstate_policy",
+ Kind::RdWr {
+ read: |_| {
+ let policy = crate::startup::cstate_policy();
+ Ok(format!("{}\n", policy).into_bytes())
+ },
+ write: |arg, _| {
+ let val_str = core::str::from_utf8(arg.trim_ascii()).map_err(|_| Error::new(EINVAL))?;
+ let policy = val_str.parse::<u8>().map_err(|_| Error::new(EINVAL))?;
+ crate::startup::set_cstate_policy(policy);
+ Ok(arg.len())
+ },
+ },
+ ),
@@ -141,22 +155,0 @@ impl KernelScheme for SysScheme {
- } else if path.starts_with("msr/") {
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
- {
- if ctx.uid != 0 {
- return Err(Error::new(EPERM));
- }
- let rest = &path[4..];
- let mut parts = rest.split('/');
- let cpu_str = parts.next().ok_or(Error::new(EINVAL))?;
- let msr_str = parts.next().ok_or(Error::new(EINVAL))?;
- if parts.next().is_some() {
- return Err(Error::new(EINVAL));
- }
- let cpu: usize = cpu_str.parse().map_err(|_| Error::new(EINVAL))?;
- let msr: u32 = u32::from_str_radix(msr_str, 16).map_err(|_| Error::new(EINVAL))?;
- let id = HANDLES.write(token.token()).insert(Handle::Msr { cpu, msr });
- Ok(OpenResult::SchemeLocal(id, InternalFlags::POSITIONED))
- }
- #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
- {
- Err(Error::new(ENOENT))
- }
@@ -170 +163 @@ impl KernelScheme for SysScheme {
- if matches!(entry.1, Wr(_)) && ctx.uid != 0 {
+ if (matches!(entry.1, Wr(_)) || matches!(entry.1, Kind::RdWr { .. })) && ctx.uid != 0 {
@@ -190,2 +182,0 @@ impl KernelScheme for SysScheme {
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
- Handle::Msr { .. } => return Ok(0),
@@ -220,10 +210,0 @@ impl KernelScheme for SysScheme {
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
- Handle::Msr { cpu, msr } => {
- const FIRST: &[u8] = b"sys:msr/";
- let mut bytes_read = buf.copy_common_bytes_from_slice(FIRST)?;
- let suffix = format!("{}/{:x}", cpu, msr);
- if let Some(remaining) = buf.advance(FIRST.len()) {
- bytes_read += remaining.copy_common_bytes_from_slice(suffix.as_bytes())?;
- }
- return Ok(bytes_read);
- }
@@ -257,9 +237,0 @@ impl KernelScheme for SysScheme {
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
- Handle::Msr { cpu, msr } => {
- if *cpu != crate::cpu_id().get() as usize {
- return Err(Error::new(EINVAL));
- }
- let val = unsafe { x86::msr::rdmsr(*msr) };
- let data = format!("{:016x}\n", val).into_bytes();
- return buffer.copy_common_bytes_from_slice(&data[pos..]);
- }
@@ -304,6 +276,5 @@ impl KernelScheme for SysScheme {
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
- Handle::Msr { cpu, msr } => {
- if *cpu != crate::cpu_id().get() as usize {
- return Err(Error::new(EINVAL));
- }
- let mut intermediate = [0_u8; 32];
+ Handle::Resource {
+ kind: Kind::RdWr { write, .. },
+ ..
+ } => {
+ let mut intermediate = [0_u8; 256];
@@ -311,4 +282 @@ impl KernelScheme for SysScheme {
- let val_str = core::str::from_utf8(&intermediate[..len]).map_err(|_| Error::new(EINVAL))?;
- let val = u64::from_str_radix(val_str.trim(), 16).map_err(|_| Error::new(EINVAL))?;
- unsafe { x86::msr::wrmsr(*msr, val); }
- return Ok(len);
+ (*write, intermediate, len)
@@ -332,2 +300 @@ impl KernelScheme for SysScheme {
- Handle::Resource { .. }
- | Handle::Msr { .. } => Err(Error::new(ENOTDIR)),
+ Handle::Resource { .. } => Err(Error::new(ENOTDIR)),
@@ -357,12 +323,0 @@ impl KernelScheme for SysScheme {
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
- Handle::Msr { .. } => {
- let stat = Stat {
- st_mode: 0o600 | MODE_FILE,
- st_uid: 0,
- st_gid: 0,
- st_size: 0,
- ..Default::default()
- };
- buf.copy_exactly(&stat)?;
- return Ok(());
- }
@@ -384,0 +340 @@ impl KernelScheme for SysScheme {
+ Kind::RdWr { .. } => data.len() as u64,
diff --git a/src/startup/mod.rs b/src/startup/mod.rs
index 86aabc22..00d2d80b 100644
--- a/src/startup/mod.rs
+++ b/src/startup/mod.rs
@@ -3 +3 @@ use core::{
- sync::atomic::{AtomicBool, Ordering},
+ sync::atomic::{AtomicBool, AtomicU8, Ordering},
@@ -14,0 +15,28 @@ use crate::{
+/// C-state idle policy: 0 = halt (default), 1 = mwait (C1).
+/// Deeper C-states (C3/C6/C7) require ACPI _CST and cache management.
+static CSTATE_POLICY: AtomicU8 = AtomicU8::new(0);
+
+/// Returns true if the CPU supports MONITOR/MWAIT.
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+fn mwait_available() -> bool {
+ crate::arch::cpuid::cpuid()
+ .get_feature_info()
+ .is_some_and(|f| f.has_monitor_mwait())
+}
+
+#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
+fn mwait_available() -> bool {
+ false
+}
+
+/// Set the kernel C-state idle policy.
+/// `0` = use `hlt`, `1` = use `mwait` with C1 hint.
+pub fn set_cstate_policy(policy: u8) {
+ CSTATE_POLICY.store(policy, Ordering::Relaxed);
+}
+
+/// Get the current C-state idle policy.
+pub fn cstate_policy() -> u8 {
+ CSTATE_POLICY.load(Ordering::Relaxed)
+}
+
@@ -230,0 +259,3 @@ fn run_userspace(token: &mut CleanLockToken) -> ! {
+ let monitor_dummy: u8 = 0;
+ let use_mwait = mwait_available() && cstate_policy() >= 1;
+
@@ -239 +270,8 @@ fn run_userspace(token: &mut CleanLockToken) -> ! {
- // Enable interrupts, then halt CPU (to save power) until the next interrupt is actually fired.
+ if use_mwait {
+ // MONITOR+MWAIT provides the same interrupt-driven wake
+ // semantics as STI+HLT but with lower power draw on
+ // CPUs that support it.
+ interrupt::monitor(&monitor_dummy, 0, 0);
+ interrupt::enable_and_mwait(0, 0);
+ } else {
+ // Fallback for CPUs without MONITOR/MWAIT.
@@ -245,0 +284 @@ fn run_userspace(token: &mut CleanLockToken) -> ! {
+}
+2
View File
@@ -45,6 +45,8 @@ patches = [
"../../../local/patches/kernel/P22-x2apic-madt-fallback.patch",
# P23: sys:msr scheme — kernel MSR read/write via /scheme/sys/msr/<cpu>/<msr>
"../../../local/patches/kernel/P23-sys-msr-scheme.patch",
# P24: C-state idle loop with MONITOR/MWAIT support
"../../../local/patches/kernel/P24-cstate-mwait-idle.patch",
]
[build]