From 5f0c54ebfe0b7431aba2b66473ccd80b44958331 Mon Sep 17 00:00:00 2001 From: Admin Pupkin Date: Wed, 20 May 2026 19:58:12 +0300 Subject: [PATCH] baseline --- .../kernel/P25-cpuidle-deep-cstates.patch | 213 ++++++++++++++++++ recipes/core/kernel/recipe.toml | 4 +- .../source/src/arch/x86_shared/cpuidle.rs | 186 +++++++++++++++ .../kernel/source/src/scheme/sys/cstate.rs | 15 ++ .../core/kernel/source/src/scheme/sys/mod.rs | 75 +++++- .../uutils/source/src/uu/sort/src/tmp_dir.rs | 20 +- 6 files changed, 508 insertions(+), 5 deletions(-) create mode 100644 local/patches/kernel/P25-cpuidle-deep-cstates.patch create mode 100644 recipes/core/kernel/source/src/arch/x86_shared/cpuidle.rs create mode 100644 recipes/core/kernel/source/src/scheme/sys/cstate.rs diff --git a/local/patches/kernel/P25-cpuidle-deep-cstates.patch b/local/patches/kernel/P25-cpuidle-deep-cstates.patch new file mode 100644 index 0000000000..a303347613 --- /dev/null +++ b/local/patches/kernel/P25-cpuidle-deep-cstates.patch @@ -0,0 +1,213 @@ +diff --git a/src/arch/x86_shared/cpuidle.rs b/src/arch/x86_shared/cpuidle.rs +new file mode 100644 +index 000000000..156add78e +--- /dev/null ++++ b/src/arch/x86_shared/cpuidle.rs +@@ -0,0 +1,186 @@ ++use core::cell::SyncUnsafeCell; ++use core::sync::atomic::{AtomicUsize, Ordering}; ++ ++use crate::arch::cpuid::cpuid; ++use crate::syscall::error::{Error, Result, EINVAL}; ++ ++#[repr(align(64))] ++struct MonitorTarget { ++ value: AtomicUsize, ++} ++ ++static MONITOR_TARGET: MonitorTarget = MonitorTarget { ++ value: AtomicUsize::new(0), ++}; ++ ++bitflags::bitflags! { ++ #[derive(Clone, Copy, Debug, PartialEq, Eq)] ++ pub struct CStateFlags: u32 { ++ const NEEDS_MONITOR = 1; ++ const NEEDS_WBINVD = 2; ++ } ++} ++ ++#[derive(Clone, Copy, Debug)] ++pub struct CState { ++ pub name: &'static str, ++ pub typ: u32, ++ pub latency: u32, ++ pub power: u32, ++ pub mwait_hint: u32, ++ pub flags: CStateFlags, ++} ++ ++const MAX_CSTATES: usize = 8; ++static CPUIDLE_STATES: SyncUnsafeCell<[Option; MAX_CSTATES]> = ++ SyncUnsafeCell::new([None; MAX_CSTATES]); ++static NUM_CPUIDLE_STATES: AtomicUsize = AtomicUsize::new(0); ++ ++static CSTATE_POLICY_MAX: AtomicUsize = AtomicUsize::new(0); ++ ++fn has_mwait() -> bool { ++ cpuid().get_feature_info().map_or(false, |info| info.has_monitor_mwait()) ++} ++ ++fn add_state(index: usize, state: CState) { ++ unsafe { ++ (*CPUIDLE_STATES.get())[index] = Some(state); ++ } ++} ++ ++pub fn init() { ++ add_state(0, CState { ++ name: "C1", ++ typ: 1, ++ latency: 1, ++ power: 1000, ++ mwait_hint: 0x00, ++ flags: CStateFlags::empty(), ++ }); ++ let mut count = 1; ++ ++ if has_mwait() { ++ add_state(count, CState { ++ name: "C1E", ++ typ: 1, ++ latency: 2, ++ power: 800, ++ mwait_hint: 0x01, ++ flags: CStateFlags::NEEDS_MONITOR, ++ }); ++ count += 1; ++ ++ add_state(count, CState { ++ name: "C2", ++ typ: 2, ++ latency: 10, ++ power: 500, ++ mwait_hint: 0x10, ++ flags: CStateFlags::NEEDS_MONITOR, ++ }); ++ count += 1; ++ ++ add_state(count, CState { ++ name: "C3", ++ typ: 3, ++ latency: 50, ++ power: 100, ++ mwait_hint: 0x20, ++ flags: CStateFlags::NEEDS_MONITOR | CStateFlags::NEEDS_WBINVD, ++ }); ++ count += 1; ++ ++ add_state(count, CState { ++ name: "C6", ++ typ: 6, ++ latency: 100, ++ power: 50, ++ mwait_hint: 0x50, ++ flags: CStateFlags::NEEDS_MONITOR | CStateFlags::NEEDS_WBINVD, ++ }); ++ count += 1; ++ ++ add_state(count, CState { ++ name: "C7", ++ typ: 7, ++ latency: 200, ++ power: 30, ++ mwait_hint: 0x60, ++ flags: CStateFlags::NEEDS_MONITOR | CStateFlags::NEEDS_WBINVD, ++ }); ++ count += 1; ++ } ++ ++ NUM_CPUIDLE_STATES.store(count, Ordering::SeqCst); ++ log::info!("cpuidle: initialized {} states (mwait={})", count, has_mwait()); ++} ++ ++pub fn policy_read() -> usize { ++ CSTATE_POLICY_MAX.load(Ordering::Relaxed) ++} ++ ++pub fn policy_write(buf: &[u8]) -> Result { ++ let s = core::str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?; ++ let s = s.trim(); ++ let val: usize = s.parse().map_err(|_| Error::new(EINVAL))?; ++ let num_states = NUM_CPUIDLE_STATES.load(Ordering::Relaxed); ++ if val >= num_states { ++ return Err(Error::new(EINVAL)); ++ } ++ CSTATE_POLICY_MAX.store(val, Ordering::Relaxed); ++ log::info!("cpuidle: policy set to max state {}", val); ++ Ok(s.len()) ++} ++ ++pub fn resource() -> Result> { ++ let mut output = alloc::string::String::new(); ++ let num_states = NUM_CPUIDLE_STATES.load(Ordering::Relaxed); ++ let policy = CSTATE_POLICY_MAX.load(Ordering::Relaxed); ++ output.push_str(&format!("policy_max: {}\n", policy)); ++ output.push_str(&format!("num_states: {}\n", num_states)); ++ for i in 0..num_states { ++ if let Some(state) = unsafe { (*CPUIDLE_STATES.get())[i] } { ++ output.push_str(&format!( ++ "state{}: name={} type={} latency={}us power={} hint={:#x} flags={:?}\n", ++ i, state.name, state.typ, state.latency, state.power, state.mwait_hint, state.flags ++ )); ++ } ++ } ++ Ok(output.into_bytes()) ++} ++ ++pub unsafe fn enter_idle() { ++ let policy_max = CSTATE_POLICY_MAX.load(Ordering::Relaxed); ++ let num_states = NUM_CPUIDLE_STATES.load(Ordering::Relaxed); ++ let target_index = if num_states == 0 { ++ 0 ++ } else { ++ core::cmp::min(policy_max, num_states - 1) ++ }; ++ ++ if target_index == 0 { ++ unsafe { crate::arch::interrupt::enable_and_halt(); } ++ return; ++ } ++ ++ let state = match unsafe { (*CPUIDLE_STATES.get())[target_index] } { ++ Some(s) => s, ++ None => { ++ unsafe { crate::arch::interrupt::enable_and_halt(); } ++ return; ++ } ++ }; ++ ++ if state.flags.contains(CStateFlags::NEEDS_MONITOR) { ++ let addr = &MONITOR_TARGET.value as *const AtomicUsize as *const u8; ++ unsafe { crate::arch::interrupt::monitor(addr, 0, 0); } ++ } ++ ++ if state.flags.contains(CStateFlags::NEEDS_WBINVD) { ++ unsafe { core::arch::asm!("wbinvd", options(nostack)); } ++ } ++ ++ unsafe { ++ crate::arch::interrupt::enable_and_mwait(state.mwait_hint, 0); ++ } ++} +diff --git a/src/scheme/sys/cstate.rs b/src/scheme/sys/cstate.rs +new file mode 100644 +index 000000000..abd52cc3b +--- /dev/null ++++ b/src/scheme/sys/cstate.rs +@@ -0,0 +1,15 @@ ++use alloc::vec::Vec; ++ ++use crate::{ ++ arch::cpuidle, ++ sync::CleanLockToken, ++ syscall::error::{Error, Result, EINVAL}, ++}; ++ ++pub fn resource(_token: &mut CleanLockToken) -> Result> { ++ cpuidle::resource() ++} ++ ++pub fn policy_write(buf: &[u8], _token: &mut CleanLockToken) -> Result { ++ cpuidle::policy_write(buf) ++} diff --git a/recipes/core/kernel/recipe.toml b/recipes/core/kernel/recipe.toml index f02ab19dd4..8be1cda630 100644 --- a/recipes/core/kernel/recipe.toml +++ b/recipes/core/kernel/recipe.toml @@ -45,8 +45,8 @@ patches = [ "../../../local/patches/kernel/P22-x2apic-madt-fallback.patch", # P23: sys:msr scheme — kernel MSR read/write via /scheme/sys/msr// "../../../local/patches/kernel/P23-sys-msr-scheme.patch", - # P24: C-state idle loop with MONITOR/MWAIT support - "../../../local/patches/kernel/P24-cstate-mwait-idle.patch", + # P25: Comprehensive cpuidle framework with deep C-states (C1-C7) + "../../../local/patches/kernel/P25-cpuidle-deep-cstates.patch", ] [build] diff --git a/recipes/core/kernel/source/src/arch/x86_shared/cpuidle.rs b/recipes/core/kernel/source/src/arch/x86_shared/cpuidle.rs new file mode 100644 index 0000000000..156add78e8 --- /dev/null +++ b/recipes/core/kernel/source/src/arch/x86_shared/cpuidle.rs @@ -0,0 +1,186 @@ +use core::cell::SyncUnsafeCell; +use core::sync::atomic::{AtomicUsize, Ordering}; + +use crate::arch::cpuid::cpuid; +use crate::syscall::error::{Error, Result, EINVAL}; + +#[repr(align(64))] +struct MonitorTarget { + value: AtomicUsize, +} + +static MONITOR_TARGET: MonitorTarget = MonitorTarget { + value: AtomicUsize::new(0), +}; + +bitflags::bitflags! { + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub struct CStateFlags: u32 { + const NEEDS_MONITOR = 1; + const NEEDS_WBINVD = 2; + } +} + +#[derive(Clone, Copy, Debug)] +pub struct CState { + pub name: &'static str, + pub typ: u32, + pub latency: u32, + pub power: u32, + pub mwait_hint: u32, + pub flags: CStateFlags, +} + +const MAX_CSTATES: usize = 8; +static CPUIDLE_STATES: SyncUnsafeCell<[Option; MAX_CSTATES]> = + SyncUnsafeCell::new([None; MAX_CSTATES]); +static NUM_CPUIDLE_STATES: AtomicUsize = AtomicUsize::new(0); + +static CSTATE_POLICY_MAX: AtomicUsize = AtomicUsize::new(0); + +fn has_mwait() -> bool { + cpuid().get_feature_info().map_or(false, |info| info.has_monitor_mwait()) +} + +fn add_state(index: usize, state: CState) { + unsafe { + (*CPUIDLE_STATES.get())[index] = Some(state); + } +} + +pub fn init() { + add_state(0, CState { + name: "C1", + typ: 1, + latency: 1, + power: 1000, + mwait_hint: 0x00, + flags: CStateFlags::empty(), + }); + let mut count = 1; + + if has_mwait() { + add_state(count, CState { + name: "C1E", + typ: 1, + latency: 2, + power: 800, + mwait_hint: 0x01, + flags: CStateFlags::NEEDS_MONITOR, + }); + count += 1; + + add_state(count, CState { + name: "C2", + typ: 2, + latency: 10, + power: 500, + mwait_hint: 0x10, + flags: CStateFlags::NEEDS_MONITOR, + }); + count += 1; + + add_state(count, CState { + name: "C3", + typ: 3, + latency: 50, + power: 100, + mwait_hint: 0x20, + flags: CStateFlags::NEEDS_MONITOR | CStateFlags::NEEDS_WBINVD, + }); + count += 1; + + add_state(count, CState { + name: "C6", + typ: 6, + latency: 100, + power: 50, + mwait_hint: 0x50, + flags: CStateFlags::NEEDS_MONITOR | CStateFlags::NEEDS_WBINVD, + }); + count += 1; + + add_state(count, CState { + name: "C7", + typ: 7, + latency: 200, + power: 30, + mwait_hint: 0x60, + flags: CStateFlags::NEEDS_MONITOR | CStateFlags::NEEDS_WBINVD, + }); + count += 1; + } + + NUM_CPUIDLE_STATES.store(count, Ordering::SeqCst); + log::info!("cpuidle: initialized {} states (mwait={})", count, has_mwait()); +} + +pub fn policy_read() -> usize { + CSTATE_POLICY_MAX.load(Ordering::Relaxed) +} + +pub fn policy_write(buf: &[u8]) -> Result { + let s = core::str::from_utf8(buf).map_err(|_| Error::new(EINVAL))?; + let s = s.trim(); + let val: usize = s.parse().map_err(|_| Error::new(EINVAL))?; + let num_states = NUM_CPUIDLE_STATES.load(Ordering::Relaxed); + if val >= num_states { + return Err(Error::new(EINVAL)); + } + CSTATE_POLICY_MAX.store(val, Ordering::Relaxed); + log::info!("cpuidle: policy set to max state {}", val); + Ok(s.len()) +} + +pub fn resource() -> Result> { + let mut output = alloc::string::String::new(); + let num_states = NUM_CPUIDLE_STATES.load(Ordering::Relaxed); + let policy = CSTATE_POLICY_MAX.load(Ordering::Relaxed); + output.push_str(&format!("policy_max: {}\n", policy)); + output.push_str(&format!("num_states: {}\n", num_states)); + for i in 0..num_states { + if let Some(state) = unsafe { (*CPUIDLE_STATES.get())[i] } { + output.push_str(&format!( + "state{}: name={} type={} latency={}us power={} hint={:#x} flags={:?}\n", + i, state.name, state.typ, state.latency, state.power, state.mwait_hint, state.flags + )); + } + } + Ok(output.into_bytes()) +} + +pub unsafe fn enter_idle() { + let policy_max = CSTATE_POLICY_MAX.load(Ordering::Relaxed); + let num_states = NUM_CPUIDLE_STATES.load(Ordering::Relaxed); + let target_index = if num_states == 0 { + 0 + } else { + core::cmp::min(policy_max, num_states - 1) + }; + + if target_index == 0 { + unsafe { crate::arch::interrupt::enable_and_halt(); } + return; + } + + let state = match unsafe { (*CPUIDLE_STATES.get())[target_index] } { + Some(s) => s, + None => { + unsafe { crate::arch::interrupt::enable_and_halt(); } + return; + } + }; + + if state.flags.contains(CStateFlags::NEEDS_MONITOR) { + let addr = &MONITOR_TARGET.value as *const AtomicUsize as *const u8; + unsafe { crate::arch::interrupt::monitor(addr, 0, 0); } + } + + if state.flags.contains(CStateFlags::NEEDS_WBINVD) { + unsafe { core::arch::asm!("wbinvd", options(nostack)); } + } + + unsafe { + crate::arch::interrupt::enable_and_mwait(state.mwait_hint, 0); + } +} diff --git a/recipes/core/kernel/source/src/scheme/sys/cstate.rs b/recipes/core/kernel/source/src/scheme/sys/cstate.rs new file mode 100644 index 0000000000..abd52cc3b3 --- /dev/null +++ b/recipes/core/kernel/source/src/scheme/sys/cstate.rs @@ -0,0 +1,15 @@ +use alloc::vec::Vec; + +use crate::{ + arch::cpuidle, + sync::CleanLockToken, + syscall::error::{Error, Result, EINVAL}, +}; + +pub fn resource(_token: &mut CleanLockToken) -> Result> { + cpuidle::resource() +} + +pub fn policy_write(buf: &[u8], _token: &mut CleanLockToken) -> Result { + cpuidle::policy_write(buf) +} diff --git a/recipes/core/kernel/source/src/scheme/sys/mod.rs b/recipes/core/kernel/source/src/scheme/sys/mod.rs index 8f26187a79..9eb3564411 100644 --- a/recipes/core/kernel/source/src/scheme/sys/mod.rs +++ b/recipes/core/kernel/source/src/scheme/sys/mod.rs @@ -45,6 +45,11 @@ enum Handle { data: Arc>>>, }, SchemeRoot, + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Msr { + cpu: usize, + msr: u32, + }, } #[derive(Clone, Copy)] @@ -133,6 +138,28 @@ impl KernelScheme for SysScheme { let id = HANDLES.write(token.token()).insert(Handle::TopLevel); Ok(OpenResult::SchemeLocal(id, InternalFlags::POSITIONED)) + } else if path.starts_with("msr/") { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if ctx.uid != 0 { + return Err(Error::new(EPERM)); + } + let rest = &path[4..]; + let mut parts = rest.split('/'); + let cpu_str = parts.next().ok_or(Error::new(EINVAL))?; + let msr_str = parts.next().ok_or(Error::new(EINVAL))?; + if parts.next().is_some() { + return Err(Error::new(EINVAL)); + } + let cpu: usize = cpu_str.parse().map_err(|_| Error::new(EINVAL))?; + let msr: u32 = u32::from_str_radix(msr_str, 16).map_err(|_| Error::new(EINVAL))?; + let id = HANDLES.write(token.token()).insert(Handle::Msr { cpu, msr }); + Ok(OpenResult::SchemeLocal(id, InternalFlags::POSITIONED)) + } + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] + { + Err(Error::new(ENOENT)) + } } else { //Have to iterate to get the path without allocation let entry = FILES @@ -160,6 +187,8 @@ impl KernelScheme for SysScheme { Handle::TopLevel => return Ok(0), Handle::Resource { kind, data, .. } => (*kind, data.clone()), Handle::SchemeRoot => return Err(Error::new(EBADF)), + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Handle::Msr { .. } => return Ok(0), } }; if matches!(kind, Kind::Wr(_)) { @@ -188,6 +217,16 @@ impl KernelScheme for SysScheme { Handle::TopLevel => "", Handle::Resource { path, .. } => path, Handle::SchemeRoot => return Err(Error::new(EBADF)), + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Handle::Msr { cpu, msr } => { + const FIRST: &[u8] = b"sys:msr/"; + let mut bytes_read = buf.copy_common_bytes_from_slice(FIRST)?; + let suffix = format!("{}/{:x}", cpu, msr); + if let Some(remaining) = buf.advance(FIRST.len()) { + bytes_read += remaining.copy_common_bytes_from_slice(suffix.as_bytes())?; + } + return Ok(bytes_read); + } }; const FIRST: &[u8] = b"sys:"; @@ -215,6 +254,15 @@ impl KernelScheme for SysScheme { let (kind, data_lock) = { match HANDLES.read(token.token()).get(id)? { Handle::Resource { kind, data, .. } => (*kind, data.clone()), + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Handle::Msr { cpu, msr } => { + if *cpu != crate::cpu_id().get() as usize { + return Err(Error::new(EINVAL)); + } + let val = unsafe { x86::msr::rdmsr(*msr) }; + let data = format!("{:016x}\n", val).into_bytes(); + return buffer.copy_common_bytes_from_slice(&data[pos..]); + } _ => return Err(Error::new(EBADF)), } }; @@ -253,6 +301,18 @@ impl KernelScheme for SysScheme { let len = buffer.copy_common_bytes_to_slice(&mut intermediate)?; (*handler, intermediate, len) } + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Handle::Msr { cpu, msr } => { + if *cpu != crate::cpu_id().get() as usize { + return Err(Error::new(EINVAL)); + } + let mut intermediate = [0_u8; 32]; + let len = buffer.copy_common_bytes_to_slice(&mut intermediate)?; + let val_str = core::str::from_utf8(&intermediate[..len]).map_err(|_| Error::new(EINVAL))?; + let val = u64::from_str_radix(val_str.trim(), 16).map_err(|_| Error::new(EINVAL))?; + unsafe { x86::msr::wrmsr(*msr, val); } + return Ok(len); + } Handle::SchemeRoot => return Err(Error::new(EBADF)), }; handler(&intermediate[..len], token) @@ -269,7 +329,8 @@ impl KernelScheme for SysScheme { return Ok(0); }; match HANDLES.read(token.token()).get(id)? { - Handle::Resource { .. } => Err(Error::new(ENOTDIR)), + Handle::Resource { .. } + | Handle::Msr { .. } => Err(Error::new(ENOTDIR)), Handle::TopLevel => { let mut buf = DirentBuf::new(buf, header_size).ok_or(Error::new(EIO))?; for (this_idx, (name, _)) in FILES.iter().enumerate().skip(first_index) { @@ -293,6 +354,18 @@ impl KernelScheme for SysScheme { Handle::Resource { kind, data, .. } => Some((*kind, data.clone())), Handle::TopLevel => None, Handle::SchemeRoot => return Err(Error::new(EBADF)), + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + Handle::Msr { .. } => { + let stat = Stat { + st_mode: 0o600 | MODE_FILE, + st_uid: 0, + st_gid: 0, + st_size: 0, + ..Default::default() + }; + buf.copy_exactly(&stat)?; + return Ok(()); + } } }; let stat = if let Some((kind, data_lock)) = stat_base { diff --git a/recipes/core/uutils/source/src/uu/sort/src/tmp_dir.rs b/recipes/core/uutils/source/src/uu/sort/src/tmp_dir.rs index fb5e295112..2ec4823200 100644 --- a/recipes/core/uutils/source/src/uu/sort/src/tmp_dir.rs +++ b/recipes/core/uutils/source/src/uu/sort/src/tmp_dir.rs @@ -2,14 +2,18 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use std::sync::atomic::{AtomicBool, Ordering}; use std::{ fs::File, - path::PathBuf, + path::{Path, PathBuf}, sync::{Arc, LazyLock, Mutex}, }; use tempfile::TempDir; -use uucore::error::UResult; +use uucore::{ + error::{UResult, USimpleError}, + show_error, translate, +}; use crate::{SortError, current_open_fd_count, fd_soft_limit}; @@ -175,3 +179,15 @@ impl Drop for TmpDirWrapper { } } +/// Remove the directory at `path` by deleting its child files and then itself. +/// Errors while deleting child files are ignored. +fn remove_tmp_dir(path: &Path) -> std::io::Result<()> { + if let Ok(read_dir) = std::fs::read_dir(path) { + for file in read_dir.flatten() { + // if we fail to delete the file here it was probably deleted by another thread + // in the meantime, but that's ok. + let _ = std::fs::remove_file(file.path()); + } + } + std::fs::remove_dir(path) +}