diff --git a/local/recipes/system/cpufreqd/source/src/main.rs b/local/recipes/system/cpufreqd/source/src/main.rs index 800b3c42e1..b6d2615297 100644 --- a/local/recipes/system/cpufreqd/source/src/main.rs +++ b/local/recipes/system/cpufreqd/source/src/main.rs @@ -4,7 +4,18 @@ use std::io::{Read, Write}; use std::time::{Duration, Instant}; use log::{info, warn, LevelFilter}; -const IA32_PERF_CTL: u32 = 0x199; +// MSR addresses — see Intel SDM Vol 3B §14 +const IA32_PERF_CTL: u32 = 0x199; // legacy P-state +const IA32_HWP_REQUEST: u32 = 0x774; // HWP control +const IA32_HWP_CAPABILITIES: u32 = 0x771; // HWP range +const IA32_PM_ENABLE: u32 = 0x770; // HWP enable bit + +// EPP values for IA32_HWP_REQUEST[31:24] +const EPP_PERFORMANCE: u64 = 0x00; +const EPP_BALANCE_PERFORMANCE: u64 = 0x80; +const EPP_BALANCE_POWER: u64 = 0xC0; +const EPP_POWERSAVE: u64 = 0xFF; + const POLL_MS: u64 = 100; const SAMPLE_WINDOW: usize = 10; const STATE_WRITE_INTERVAL_S: u64 = 1; @@ -21,18 +32,39 @@ impl log::Log for StderrLogger { fn flush(&self) {} } -#[derive(Clone)] -struct PState { freq_khz: u32, power_mw: u32, latency_us: u32, ctl: u64 } +/// HWP = Hardware P-states (Intel Speed Shift). +/// Arrow Lake-H always has HWP enabled by BIOS. Legacy IA32_PERF_CTL +/// writes are ignored when HWP is active. We detect HWP via MSR 0x770 +/// bit 0 and use IA32_HWP_REQUEST (0x774) with EPP hints. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum PstateMode { LegacyPerfCtl, Hwp } +#[derive(Clone)] +struct PState { + freq_khz: u32, + power_mw: u32, + latency_us: u32, + ctl: u64, +} + +#[derive(Clone)] struct CpuInfo { - id: u32, pstates: Vec, current_idx: usize, - load_history: [f64; SAMPLE_WINDOW], load_idx: usize, throttle: bool, - msr_errors: u32, msr_suppressed: bool, + id: u32, + pstates: Vec, + current_idx: usize, + load_history: [f64; SAMPLE_WINDOW], + load_idx: usize, + throttle: bool, + msr_errors: u32, + msr_suppressed: bool, + mode: PstateMode, + hwp_min: u8, // from MSR 0x771[15:8] + hwp_max: u8, // from MSR 0x771[7:0] + hwp_guaranteed: u8, // from MSR 0x771[23:16] + hwp_efficient: u8, // from MSR 0x771[31:24] } fn detect_cpus() -> Vec { - // Redox exposes the CPU count via the sys:cpu scheme file, not via a - // /dev/cpu/ directory (kernel/src/scheme/sys/cpu.rs). if let Ok(data) = fs::read_to_string("/scheme/sys/cpu") { for line in data.lines() { if let Some(rest) = line.strip_prefix("CPUs: ") { @@ -54,6 +86,36 @@ fn detect_cpus() -> Vec { v } +fn read_msr_u32(cpu: u32, msr: u32) -> Option { + let path = format!("/scheme/sys/msr/{}/0x{:x}", cpu, msr); + let mut f = fs::File::open(&path).ok()?; + let mut buf = [0u8; 8]; + f.read_exact(&mut buf).ok()?; + Some(u32::from_le_bytes(buf[..4].try_into().ok()?)) +} + +fn detect_pstate_mode(cpu: u32) -> PstateMode { + // IA32_PM_ENABLE bit 0 == HWP_ENABLE + match read_msr_u32(cpu, IA32_PM_ENABLE) { + Some(pm) if (pm & 1) != 0 => PstateMode::Hwp, + _ => PstateMode::LegacyPerfCtl, + } +} + +fn read_hwp_capabilities(cpu: u32) -> (u8, u8, u8, u8) { + // IA32_HWP_CAPABILITIES layout (Vol 3B §14.4.3): + // [7:0] Highest Performance + // [15:8] Guaranteed Performance + // [23:16] Most Efficient Performance + // [31:24] Lowest Performance + let cap = read_msr_u32(cpu, IA32_HWP_CAPABILITIES).unwrap_or(0); + let max = (cap & 0xFF) as u8; + let guaranteed = ((cap >> 8) & 0xFF) as u8; + let efficient = ((cap >> 16) & 0xFF) as u8; + let min = ((cap >> 24) & 0xFF) as u8; + (min, max, guaranteed, efficient) +} + fn read_acpi_pss(cpu: u32) -> Vec { let path = format!("/scheme/acpi/processor/CPU{}/pss", cpu); if let Ok(d) = fs::read_to_string(&path) { @@ -77,14 +139,34 @@ fn read_acpi_pss(cpu: u32) -> Vec { } fn write_msr(cpu: u32, msr: u32, val: u64) -> bool { - // Redox exposes MSR as /scheme/sys/msr/{cpu}/{msr_hex}, not the - // Linux-style /dev/cpu/{cpu}/msr. The scheme requires - // CAP_SYS_MSR, which cpufreqd receives because it runs as root. let path = format!("/scheme/sys/msr/{}/0x{:x}", cpu, msr); fs::OpenOptions::new().write(true).open(&path).ok() .map(|mut f| f.write_all(&val.to_ne_bytes()).is_ok()).unwrap_or(false) } +/// Map a P-state index to IA32_HWP_REQUEST value. +/// IA32_HWP_REQUEST layout (Vol 3B §14.4.4): +/// [7:0] Minimum Performance +/// [15:8] Maximum Performance +/// [23:16] Desired Performance +/// [31:24] Energy-Performance Preference +/// [42:32] Activity Window (set to 0 = auto) +/// [42] Package Control +fn hwp_request_for(idx: usize, ci: &CpuInfo) -> u64 { + let m = ci.pstates.len().saturating_sub(1).max(1); + // Map index 0 (lowest perf, "powersave") to lowest HWP performance + // and index m (highest perf, "performance") to highest HWP performance. + let frac = idx as f64 / m as f64; + let range = ci.hwp_max.saturating_sub(ci.hwp_min) as f64; + let desired = ci.hwp_min as f64 + range * frac; + // EPP follows the same map: performance=0, powersave=255 + let epp = ((1.0 - frac) * 255.0) as u64; + (ci.hwp_min as u64) + | ((desired as u64) << 8) + | ((desired as u64) << 16) + | (epp << 24) +} + fn measure_load(cpu: u32, prev: &mut (u64, u64)) -> f64 { if let Ok(d) = fs::read_to_string(format!("/scheme/sys/cpu/{}/stat", cpu)) { let p: Vec = d.split_whitespace().filter_map(|s| s.parse().ok()).collect(); @@ -111,6 +193,39 @@ fn choose_pstate(g: Governor, ci: &CpuInfo) -> usize { } } +fn apply_pstate(ci: &mut CpuInfo, idx: usize) { + match ci.mode { + PstateMode::Hwp => { + let val = hwp_request_for(idx, ci); + if write_msr(ci.id, IA32_HWP_REQUEST, val) { + ci.current_idx = idx; + ci.msr_errors = 0; + ci.msr_suppressed = false; + } else { + ci.msr_errors += 1; + if !ci.msr_suppressed { + warn!("CPU{}: HWP write failed ({}/{})", ci.id, ci.msr_errors, MSR_ERROR_SUPPRESS_COUNT); + if ci.msr_errors >= MSR_ERROR_SUPPRESS_COUNT { ci.msr_suppressed = true; } + } + } + } + PstateMode::LegacyPerfCtl => { + let ct = ci.pstates[idx].ctl; + if write_msr(ci.id, IA32_PERF_CTL, ct) { + ci.current_idx = idx; + ci.msr_errors = 0; + ci.msr_suppressed = false; + } else { + ci.msr_errors += 1; + if !ci.msr_suppressed { + warn!("CPU{}: MSR write failed ({}/{})", ci.id, ci.msr_errors, MSR_ERROR_SUPPRESS_COUNT); + if ci.msr_errors >= MSR_ERROR_SUPPRESS_COUNT { ci.msr_suppressed = true; } + } + } + } + } +} + struct ThermalCache { data: bool, last_check: Instant } impl ThermalCache { fn new() -> Self { Self { data: false, last_check: Instant::now() - Duration::from_secs(10) } } @@ -129,7 +244,11 @@ fn write_scheme_state(governor: Governor, cpus: &[CpuInfo]) { for ci in cpus { if ci.pstates.is_empty() { continue; } let p = &ci.pstates[ci.current_idx.min(ci.pstates.len() - 1)]; - out.push_str(&format!("CPU{}: {} kHz, {} mW, load={:.1}%\n", ci.id, p.freq_khz, p.power_mw, avg_load(ci) * 100.0)); + let mode_s = match ci.mode { + PstateMode::Hwp => "HWP", + PstateMode::LegacyPerfCtl => "legacy", + }; + out.push_str(&format!("CPU{} [{}]: {} kHz, {} mW, load={:.1}%\n", ci.id, mode_s, p.freq_khz, p.power_mw, avg_load(ci) * 100.0)); } let _ = fs::write("/scheme/cpufreq/state", out); } @@ -145,14 +264,37 @@ fn main() { let cpus = detect_cpus(); info!("detected {} CPU(s), governor={:?}", cpus.len(), governor); let mut ci: Vec = cpus.iter().map(|&id| { + let mode = detect_pstate_mode(id); + let (hwp_min, hwp_max, hwp_guaranteed, hwp_efficient) = if mode == PstateMode::Hwp { + read_hwp_capabilities(id) + } else { + (0, 0, 0, 0) + }; + if mode == PstateMode::Hwp { + info!("CPU{}: HWP active (range {}-{}, EPP cap {}-{})", id, hwp_min, hwp_max, hwp_efficient, hwp_guaranteed); + } else { + info!("CPU{}: legacy P-states (HWP not enabled)", id); + } let ps = read_acpi_pss(id); info!("CPU{}: {} P-states ({} - {} kHz)", id, ps.len(), ps.first().map_or(0, |p| p.freq_khz), ps.last().map_or(0, |p| p.freq_khz)); - CpuInfo { id, pstates: ps, current_idx: 0, load_history: [0.0; SAMPLE_WINDOW], load_idx: 0, throttle: false, msr_errors: 0, msr_suppressed: false } + CpuInfo { id, pstates: ps, current_idx: 0, load_history: [0.0; SAMPLE_WINDOW], load_idx: 0, throttle: false, msr_errors: 0, msr_suppressed: false, mode, hwp_min, hwp_max, hwp_guaranteed, hwp_efficient } }).collect(); let mut prev: Vec<(u64, u64)> = vec![(0, 0); cpus.len()]; let mut thermal = ThermalCache::new(); let mut last_state_write = Instant::now(); - for c in &ci { if !c.pstates.is_empty() { write_msr(c.id, IA32_PERF_CTL, c.pstates[0].ctl); } } + // Set initial P-state. For HWP we leave MSR 0x774 as BIOS-set + // (defaults to performance) and just let the governor pick a + // starting index. For legacy, write the lowest P-state's IA32_PERF_CTL. + for c in &ci { + if !c.pstates.is_empty() { + match c.mode { + PstateMode::Hwp => {} // HWP starts at the BIOS default + PstateMode::LegacyPerfCtl => { + let _ = write_msr(c.id, IA32_PERF_CTL, c.pstates[0].ctl); + } + } + } + } loop { std::thread::sleep(Duration::from_millis(POLL_MS)); let tt = thermal.get(); @@ -162,13 +304,18 @@ fn main() { c.load_history[c.load_idx] = l; c.load_idx = (c.load_idx + 1) % SAMPLE_WINDOW; c.throttle = tt; let n = choose_pstate(governor, c); if n != c.current_idx && n < c.pstates.len() { - let ct = c.pstates[n].ctl; - if write_msr(c.id, IA32_PERF_CTL, ct) { - info!("CPU{}: P{}→P{} ({}→{} kHz, load={:.0}%)", c.id, c.current_idx, n, c.pstates[c.current_idx].freq_khz, c.pstates[n].freq_khz, l * 100.0); - c.current_idx = n; c.msr_errors = 0; c.msr_suppressed = false; - } else { - c.msr_errors += 1; - if !c.msr_suppressed { warn!("CPU{}: MSR write failed ({}/{})", c.id, c.msr_errors, MSR_ERROR_SUPPRESS_COUNT); if c.msr_errors >= MSR_ERROR_SUPPRESS_COUNT { c.msr_suppressed = true; } } + let prev_freq = c.pstates[c.current_idx].freq_khz; + let next_freq = c.pstates[n].freq_khz; + let l_pct = l * 100.0; + match c.mode { + PstateMode::Hwp => { + apply_pstate(c, n); + info!("CPU{} HWP→{}% ({}→{} kHz, load={:.0}%)", c.id, c.hwp_max.saturating_sub(n as u8 * (c.hwp_max - c.hwp_min) / c.pstates.len().saturating_sub(1).max(1) as u8), prev_freq, next_freq, l_pct); + } + PstateMode::LegacyPerfCtl => { + apply_pstate(c, n); + info!("CPU{}: P{}→P{} ({}→{} kHz, load={:.0}%)", c.id, c.current_idx, n, prev_freq, next_freq, l_pct); + } } } }