kernel: Add MONITOR/MWAIT C1 idle support (P24)
Implement CPU power-saving idle loop using x86 MONITOR/MWAIT: - Add monitor(), mwait(), enable_and_mwait() to interrupt module - Detect MWAIT availability via CPUID at boot - Use MONITOR+MWAIT instead of STI+HLT when supported - Expose /scheme/sys/cstate_policy for userspace control - Add RdWr Kind variant to sys scheme for read+write files
This commit is contained in:
@@ -0,0 +1,221 @@
|
||||
diff --git a/src/arch/x86_shared/interrupt/mod.rs b/src/arch/x86_shared/interrupt/mod.rs
|
||||
index 172bad3b..161de05a 100644
|
||||
--- a/src/arch/x86_shared/interrupt/mod.rs
|
||||
+++ b/src/arch/x86_shared/interrupt/mod.rs
|
||||
@@ -44,0 +45,44 @@ pub unsafe fn halt() {
|
||||
+
|
||||
+/// MONITOR instruction — sets up a memory address to monitor for writes.
|
||||
+/// Setup instruction for MWAIT. The CPU watches `addr` and wakes from MWAIT
|
||||
+/// when the address is written or an interrupt arrives.
|
||||
+#[inline(always)]
|
||||
+pub unsafe fn monitor(addr: *const u8, extensions: u32, hints: u32) {
|
||||
+ unsafe {
|
||||
+ core::arch::asm!(
|
||||
+ "monitor",
|
||||
+ in("rax") addr,
|
||||
+ in("rcx") extensions,
|
||||
+ in("rdx") hints,
|
||||
+ options(nomem, nostack)
|
||||
+ );
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/// MWAIT instruction — waits for an event or store to the monitored address.
|
||||
+/// `hints` encodes the desired C-state (e.g. 0x00 for C1, 0x10 for C2).
|
||||
+#[inline(always)]
|
||||
+pub unsafe fn mwait(hints: u32, extensions: u32) {
|
||||
+ unsafe {
|
||||
+ core::arch::asm!(
|
||||
+ "mwait",
|
||||
+ in("rax") hints,
|
||||
+ in("rcx") extensions,
|
||||
+ options(nomem, nostack)
|
||||
+ );
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/// Atomically enable interrupts and enter MWAIT (C1).
|
||||
+/// MWAIT equivalent of `sti; hlt`.
|
||||
+#[inline(always)]
|
||||
+pub unsafe fn enable_and_mwait(hints: u32, extensions: u32) {
|
||||
+ unsafe {
|
||||
+ core::arch::asm!(
|
||||
+ "sti; mwait",
|
||||
+ in("rax") hints,
|
||||
+ in("rcx") extensions,
|
||||
+ options(nomem, nostack)
|
||||
+ );
|
||||
+ }
|
||||
+}
|
||||
diff --git a/src/scheme/sys/mod.rs b/src/scheme/sys/mod.rs
|
||||
index 9eb35644..b1763d3b 100644
|
||||
--- a/src/scheme/sys/mod.rs
|
||||
+++ b/src/scheme/sys/mod.rs
|
||||
@@ -48,5 +47,0 @@ enum Handle {
|
||||
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
- Msr {
|
||||
- cpu: usize,
|
||||
- msr: u32,
|
||||
- },
|
||||
@@ -58,0 +54,4 @@ enum Kind {
|
||||
+ RdWr {
|
||||
+ read: fn(&mut CleanLockToken) -> Result<Vec<u8>>,
|
||||
+ write: fn(&[u8], &mut CleanLockToken) -> Result<usize>,
|
||||
+ },
|
||||
@@ -65,0 +65 @@ impl Kind {
|
||||
+ Kind::RdWr { read, .. } => read(token),
|
||||
@@ -111,0 +112,15 @@ const FILES: &[(&str, Kind)] = &[
|
||||
+ (
|
||||
+ "cstate_policy",
|
||||
+ Kind::RdWr {
|
||||
+ read: |_| {
|
||||
+ let policy = crate::startup::cstate_policy();
|
||||
+ Ok(format!("{}\n", policy).into_bytes())
|
||||
+ },
|
||||
+ write: |arg, _| {
|
||||
+ let val_str = core::str::from_utf8(arg.trim_ascii()).map_err(|_| Error::new(EINVAL))?;
|
||||
+ let policy = val_str.parse::<u8>().map_err(|_| Error::new(EINVAL))?;
|
||||
+ crate::startup::set_cstate_policy(policy);
|
||||
+ Ok(arg.len())
|
||||
+ },
|
||||
+ },
|
||||
+ ),
|
||||
@@ -141,22 +155,0 @@ impl KernelScheme for SysScheme {
|
||||
- } else if path.starts_with("msr/") {
|
||||
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
- {
|
||||
- if ctx.uid != 0 {
|
||||
- return Err(Error::new(EPERM));
|
||||
- }
|
||||
- let rest = &path[4..];
|
||||
- let mut parts = rest.split('/');
|
||||
- let cpu_str = parts.next().ok_or(Error::new(EINVAL))?;
|
||||
- let msr_str = parts.next().ok_or(Error::new(EINVAL))?;
|
||||
- if parts.next().is_some() {
|
||||
- return Err(Error::new(EINVAL));
|
||||
- }
|
||||
- let cpu: usize = cpu_str.parse().map_err(|_| Error::new(EINVAL))?;
|
||||
- let msr: u32 = u32::from_str_radix(msr_str, 16).map_err(|_| Error::new(EINVAL))?;
|
||||
- let id = HANDLES.write(token.token()).insert(Handle::Msr { cpu, msr });
|
||||
- Ok(OpenResult::SchemeLocal(id, InternalFlags::POSITIONED))
|
||||
- }
|
||||
- #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
- {
|
||||
- Err(Error::new(ENOENT))
|
||||
- }
|
||||
@@ -170 +163 @@ impl KernelScheme for SysScheme {
|
||||
- if matches!(entry.1, Wr(_)) && ctx.uid != 0 {
|
||||
+ if (matches!(entry.1, Wr(_)) || matches!(entry.1, Kind::RdWr { .. })) && ctx.uid != 0 {
|
||||
@@ -190,2 +182,0 @@ impl KernelScheme for SysScheme {
|
||||
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
- Handle::Msr { .. } => return Ok(0),
|
||||
@@ -220,10 +210,0 @@ impl KernelScheme for SysScheme {
|
||||
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
- Handle::Msr { cpu, msr } => {
|
||||
- const FIRST: &[u8] = b"sys:msr/";
|
||||
- let mut bytes_read = buf.copy_common_bytes_from_slice(FIRST)?;
|
||||
- let suffix = format!("{}/{:x}", cpu, msr);
|
||||
- if let Some(remaining) = buf.advance(FIRST.len()) {
|
||||
- bytes_read += remaining.copy_common_bytes_from_slice(suffix.as_bytes())?;
|
||||
- }
|
||||
- return Ok(bytes_read);
|
||||
- }
|
||||
@@ -257,9 +237,0 @@ impl KernelScheme for SysScheme {
|
||||
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
- Handle::Msr { cpu, msr } => {
|
||||
- if *cpu != crate::cpu_id().get() as usize {
|
||||
- return Err(Error::new(EINVAL));
|
||||
- }
|
||||
- let val = unsafe { x86::msr::rdmsr(*msr) };
|
||||
- let data = format!("{:016x}\n", val).into_bytes();
|
||||
- return buffer.copy_common_bytes_from_slice(&data[pos..]);
|
||||
- }
|
||||
@@ -304,6 +276,5 @@ impl KernelScheme for SysScheme {
|
||||
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
- Handle::Msr { cpu, msr } => {
|
||||
- if *cpu != crate::cpu_id().get() as usize {
|
||||
- return Err(Error::new(EINVAL));
|
||||
- }
|
||||
- let mut intermediate = [0_u8; 32];
|
||||
+ Handle::Resource {
|
||||
+ kind: Kind::RdWr { write, .. },
|
||||
+ ..
|
||||
+ } => {
|
||||
+ let mut intermediate = [0_u8; 256];
|
||||
@@ -311,4 +282 @@ impl KernelScheme for SysScheme {
|
||||
- let val_str = core::str::from_utf8(&intermediate[..len]).map_err(|_| Error::new(EINVAL))?;
|
||||
- let val = u64::from_str_radix(val_str.trim(), 16).map_err(|_| Error::new(EINVAL))?;
|
||||
- unsafe { x86::msr::wrmsr(*msr, val); }
|
||||
- return Ok(len);
|
||||
+ (*write, intermediate, len)
|
||||
@@ -332,2 +300 @@ impl KernelScheme for SysScheme {
|
||||
- Handle::Resource { .. }
|
||||
- | Handle::Msr { .. } => Err(Error::new(ENOTDIR)),
|
||||
+ Handle::Resource { .. } => Err(Error::new(ENOTDIR)),
|
||||
@@ -357,12 +323,0 @@ impl KernelScheme for SysScheme {
|
||||
- #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
- Handle::Msr { .. } => {
|
||||
- let stat = Stat {
|
||||
- st_mode: 0o600 | MODE_FILE,
|
||||
- st_uid: 0,
|
||||
- st_gid: 0,
|
||||
- st_size: 0,
|
||||
- ..Default::default()
|
||||
- };
|
||||
- buf.copy_exactly(&stat)?;
|
||||
- return Ok(());
|
||||
- }
|
||||
@@ -384,0 +340 @@ impl KernelScheme for SysScheme {
|
||||
+ Kind::RdWr { .. } => data.len() as u64,
|
||||
diff --git a/src/startup/mod.rs b/src/startup/mod.rs
|
||||
index 86aabc22..00d2d80b 100644
|
||||
--- a/src/startup/mod.rs
|
||||
+++ b/src/startup/mod.rs
|
||||
@@ -3 +3 @@ use core::{
|
||||
- sync::atomic::{AtomicBool, Ordering},
|
||||
+ sync::atomic::{AtomicBool, AtomicU8, Ordering},
|
||||
@@ -14,0 +15,28 @@ use crate::{
|
||||
+/// C-state idle policy: 0 = halt (default), 1 = mwait (C1).
|
||||
+/// Deeper C-states (C3/C6/C7) require ACPI _CST and cache management.
|
||||
+static CSTATE_POLICY: AtomicU8 = AtomicU8::new(0);
|
||||
+
|
||||
+/// Returns true if the CPU supports MONITOR/MWAIT.
|
||||
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
+fn mwait_available() -> bool {
|
||||
+ crate::arch::cpuid::cpuid()
|
||||
+ .get_feature_info()
|
||||
+ .is_some_and(|f| f.has_monitor_mwait())
|
||||
+}
|
||||
+
|
||||
+#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
+fn mwait_available() -> bool {
|
||||
+ false
|
||||
+}
|
||||
+
|
||||
+/// Set the kernel C-state idle policy.
|
||||
+/// `0` = use `hlt`, `1` = use `mwait` with C1 hint.
|
||||
+pub fn set_cstate_policy(policy: u8) {
|
||||
+ CSTATE_POLICY.store(policy, Ordering::Relaxed);
|
||||
+}
|
||||
+
|
||||
+/// Get the current C-state idle policy.
|
||||
+pub fn cstate_policy() -> u8 {
|
||||
+ CSTATE_POLICY.load(Ordering::Relaxed)
|
||||
+}
|
||||
+
|
||||
@@ -230,0 +259,3 @@ fn run_userspace(token: &mut CleanLockToken) -> ! {
|
||||
+ let monitor_dummy: u8 = 0;
|
||||
+ let use_mwait = mwait_available() && cstate_policy() >= 1;
|
||||
+
|
||||
@@ -239 +270,8 @@ fn run_userspace(token: &mut CleanLockToken) -> ! {
|
||||
- // Enable interrupts, then halt CPU (to save power) until the next interrupt is actually fired.
|
||||
+ if use_mwait {
|
||||
+ // MONITOR+MWAIT provides the same interrupt-driven wake
|
||||
+ // semantics as STI+HLT but with lower power draw on
|
||||
+ // CPUs that support it.
|
||||
+ interrupt::monitor(&monitor_dummy, 0, 0);
|
||||
+ interrupt::enable_and_mwait(0, 0);
|
||||
+ } else {
|
||||
+ // Fallback for CPUs without MONITOR/MWAIT.
|
||||
@@ -245,0 +284 @@ fn run_userspace(token: &mut CleanLockToken) -> ! {
|
||||
+}
|
||||
Reference in New Issue
Block a user