cpufreqd: add HWP (Hardware P-states / Intel Speed Shift) detection
Phase G.2 of the ACPI/Arrow Lake port. The LG Gram 2025 (Core Ultra 7
255H, Arrow Lake-H) uses Intel HWP for P-state control — legacy
IA32_PERF_CTL writes are silently ignored when HWP is active.
The previous cpufreqd always wrote IA32_PERF_CTL (MSR 0x199), which
on Arrow Lake-H had zero effect. We now:
1. Detect HWP at startup by reading IA32_PM_ENABLE (MSR 0x770) bit 0
2. If HWP is active:
a. Read IA32_HWP_CAPABILITIES (MSR 0x771) for the
min/max/guaranteed/efficient performance range
b. Translate the governor's P-state index into the HWP
"Desired Performance" field + EPP hint
c. Write IA32_HWP_REQUEST (MSR 0x774) instead of IA32_PERF_CTL
3. If HWP is not active, fall back to the legacy IA32_PERF_CTL path
(preserves backward compatibility for older CPUs)
The kernel's new /scheme/sys/msr/ scheme (Phase G.1) provides the
in-memory storage backing the MSR reads/writes. On the real LG Gram
2025 hardware, the kernel's MSR scheme will be wired to the actual
hardware MSRs (Phase G+ work); the cpufreqd interface is unchanged.
HWP layout (Intel SDM Vol 3B §14.4.4):
[7:0] Minimum Performance
[15:8] Maximum Performance
[23:16] Desired Performance
[31:24] Energy-Performance Preference (EPP)
[42:32] Activity Window (0 = auto)
[42] Package Control
EPP follows the same index as desired perf: 0 = performance,
255 = power-save. We map the linear P-state index to both the
"Desired Performance" and EPP so the H/W sees a single hint that
the OS wants both the performance and energy level it implies.
Includes:
- PstateMode enum (LegacyPerfCtl | Hwp) for compile-time dispatch
- detect_pstate_mode() reads MSR 0x770
- read_hwp_capabilities() reads MSR 0x771, returns (min, max,
guaranteed, efficient) bytes
- hwp_request_for() maps P-state index to IA32_HWP_REQUEST u64
- apply_pstate() dispatches to the right MSR based on ci.mode
- The /scheme/cpufreq/state output now tags each CPU with [HWP] or
[legacy] for observability
Hardware test plan: on the LG Gram 2025, "performance" governor
should pin IA32_HWP_REQUEST.Desired = hwp_max with EPP=0; "powersave"
should pin it to hwp_min with EPP=255; "ondemand" should ramp
between. Reading IA32_PERF_STATUS (MSR 0x198) via /scheme/sys/msr
should reflect the new operating point within ~1ms.
This commit is contained in:
@@ -4,7 +4,18 @@ use std::io::{Read, Write};
|
||||
use std::time::{Duration, Instant};
|
||||
use log::{info, warn, LevelFilter};
|
||||
|
||||
const IA32_PERF_CTL: u32 = 0x199;
|
||||
// MSR addresses — see Intel SDM Vol 3B §14
|
||||
const IA32_PERF_CTL: u32 = 0x199; // legacy P-state
|
||||
const IA32_HWP_REQUEST: u32 = 0x774; // HWP control
|
||||
const IA32_HWP_CAPABILITIES: u32 = 0x771; // HWP range
|
||||
const IA32_PM_ENABLE: u32 = 0x770; // HWP enable bit
|
||||
|
||||
// EPP values for IA32_HWP_REQUEST[31:24]
|
||||
const EPP_PERFORMANCE: u64 = 0x00;
|
||||
const EPP_BALANCE_PERFORMANCE: u64 = 0x80;
|
||||
const EPP_BALANCE_POWER: u64 = 0xC0;
|
||||
const EPP_POWERSAVE: u64 = 0xFF;
|
||||
|
||||
const POLL_MS: u64 = 100;
|
||||
const SAMPLE_WINDOW: usize = 10;
|
||||
const STATE_WRITE_INTERVAL_S: u64 = 1;
|
||||
@@ -21,18 +32,39 @@ impl log::Log for StderrLogger {
|
||||
fn flush(&self) {}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct PState { freq_khz: u32, power_mw: u32, latency_us: u32, ctl: u64 }
|
||||
/// HWP = Hardware P-states (Intel Speed Shift).
|
||||
/// Arrow Lake-H always has HWP enabled by BIOS. Legacy IA32_PERF_CTL
|
||||
/// writes are ignored when HWP is active. We detect HWP via MSR 0x770
|
||||
/// bit 0 and use IA32_HWP_REQUEST (0x774) with EPP hints.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
enum PstateMode { LegacyPerfCtl, Hwp }
|
||||
|
||||
#[derive(Clone)]
|
||||
struct PState {
|
||||
freq_khz: u32,
|
||||
power_mw: u32,
|
||||
latency_us: u32,
|
||||
ctl: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct CpuInfo {
|
||||
id: u32, pstates: Vec<PState>, current_idx: usize,
|
||||
load_history: [f64; SAMPLE_WINDOW], load_idx: usize, throttle: bool,
|
||||
msr_errors: u32, msr_suppressed: bool,
|
||||
id: u32,
|
||||
pstates: Vec<PState>,
|
||||
current_idx: usize,
|
||||
load_history: [f64; SAMPLE_WINDOW],
|
||||
load_idx: usize,
|
||||
throttle: bool,
|
||||
msr_errors: u32,
|
||||
msr_suppressed: bool,
|
||||
mode: PstateMode,
|
||||
hwp_min: u8, // from MSR 0x771[15:8]
|
||||
hwp_max: u8, // from MSR 0x771[7:0]
|
||||
hwp_guaranteed: u8, // from MSR 0x771[23:16]
|
||||
hwp_efficient: u8, // from MSR 0x771[31:24]
|
||||
}
|
||||
|
||||
fn detect_cpus() -> Vec<u32> {
|
||||
// Redox exposes the CPU count via the sys:cpu scheme file, not via a
|
||||
// /dev/cpu/ directory (kernel/src/scheme/sys/cpu.rs).
|
||||
if let Ok(data) = fs::read_to_string("/scheme/sys/cpu") {
|
||||
for line in data.lines() {
|
||||
if let Some(rest) = line.strip_prefix("CPUs: ") {
|
||||
@@ -54,6 +86,36 @@ fn detect_cpus() -> Vec<u32> {
|
||||
v
|
||||
}
|
||||
|
||||
fn read_msr_u32(cpu: u32, msr: u32) -> Option<u32> {
|
||||
let path = format!("/scheme/sys/msr/{}/0x{:x}", cpu, msr);
|
||||
let mut f = fs::File::open(&path).ok()?;
|
||||
let mut buf = [0u8; 8];
|
||||
f.read_exact(&mut buf).ok()?;
|
||||
Some(u32::from_le_bytes(buf[..4].try_into().ok()?))
|
||||
}
|
||||
|
||||
fn detect_pstate_mode(cpu: u32) -> PstateMode {
|
||||
// IA32_PM_ENABLE bit 0 == HWP_ENABLE
|
||||
match read_msr_u32(cpu, IA32_PM_ENABLE) {
|
||||
Some(pm) if (pm & 1) != 0 => PstateMode::Hwp,
|
||||
_ => PstateMode::LegacyPerfCtl,
|
||||
}
|
||||
}
|
||||
|
||||
fn read_hwp_capabilities(cpu: u32) -> (u8, u8, u8, u8) {
|
||||
// IA32_HWP_CAPABILITIES layout (Vol 3B §14.4.3):
|
||||
// [7:0] Highest Performance
|
||||
// [15:8] Guaranteed Performance
|
||||
// [23:16] Most Efficient Performance
|
||||
// [31:24] Lowest Performance
|
||||
let cap = read_msr_u32(cpu, IA32_HWP_CAPABILITIES).unwrap_or(0);
|
||||
let max = (cap & 0xFF) as u8;
|
||||
let guaranteed = ((cap >> 8) & 0xFF) as u8;
|
||||
let efficient = ((cap >> 16) & 0xFF) as u8;
|
||||
let min = ((cap >> 24) & 0xFF) as u8;
|
||||
(min, max, guaranteed, efficient)
|
||||
}
|
||||
|
||||
fn read_acpi_pss(cpu: u32) -> Vec<PState> {
|
||||
let path = format!("/scheme/acpi/processor/CPU{}/pss", cpu);
|
||||
if let Ok(d) = fs::read_to_string(&path) {
|
||||
@@ -77,14 +139,34 @@ fn read_acpi_pss(cpu: u32) -> Vec<PState> {
|
||||
}
|
||||
|
||||
fn write_msr(cpu: u32, msr: u32, val: u64) -> bool {
|
||||
// Redox exposes MSR as /scheme/sys/msr/{cpu}/{msr_hex}, not the
|
||||
// Linux-style /dev/cpu/{cpu}/msr. The scheme requires
|
||||
// CAP_SYS_MSR, which cpufreqd receives because it runs as root.
|
||||
let path = format!("/scheme/sys/msr/{}/0x{:x}", cpu, msr);
|
||||
fs::OpenOptions::new().write(true).open(&path).ok()
|
||||
.map(|mut f| f.write_all(&val.to_ne_bytes()).is_ok()).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Map a P-state index to IA32_HWP_REQUEST value.
|
||||
/// IA32_HWP_REQUEST layout (Vol 3B §14.4.4):
|
||||
/// [7:0] Minimum Performance
|
||||
/// [15:8] Maximum Performance
|
||||
/// [23:16] Desired Performance
|
||||
/// [31:24] Energy-Performance Preference
|
||||
/// [42:32] Activity Window (set to 0 = auto)
|
||||
/// [42] Package Control
|
||||
fn hwp_request_for(idx: usize, ci: &CpuInfo) -> u64 {
|
||||
let m = ci.pstates.len().saturating_sub(1).max(1);
|
||||
// Map index 0 (lowest perf, "powersave") to lowest HWP performance
|
||||
// and index m (highest perf, "performance") to highest HWP performance.
|
||||
let frac = idx as f64 / m as f64;
|
||||
let range = ci.hwp_max.saturating_sub(ci.hwp_min) as f64;
|
||||
let desired = ci.hwp_min as f64 + range * frac;
|
||||
// EPP follows the same map: performance=0, powersave=255
|
||||
let epp = ((1.0 - frac) * 255.0) as u64;
|
||||
(ci.hwp_min as u64)
|
||||
| ((desired as u64) << 8)
|
||||
| ((desired as u64) << 16)
|
||||
| (epp << 24)
|
||||
}
|
||||
|
||||
fn measure_load(cpu: u32, prev: &mut (u64, u64)) -> f64 {
|
||||
if let Ok(d) = fs::read_to_string(format!("/scheme/sys/cpu/{}/stat", cpu)) {
|
||||
let p: Vec<u64> = d.split_whitespace().filter_map(|s| s.parse().ok()).collect();
|
||||
@@ -111,6 +193,39 @@ fn choose_pstate(g: Governor, ci: &CpuInfo) -> usize {
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_pstate(ci: &mut CpuInfo, idx: usize) {
|
||||
match ci.mode {
|
||||
PstateMode::Hwp => {
|
||||
let val = hwp_request_for(idx, ci);
|
||||
if write_msr(ci.id, IA32_HWP_REQUEST, val) {
|
||||
ci.current_idx = idx;
|
||||
ci.msr_errors = 0;
|
||||
ci.msr_suppressed = false;
|
||||
} else {
|
||||
ci.msr_errors += 1;
|
||||
if !ci.msr_suppressed {
|
||||
warn!("CPU{}: HWP write failed ({}/{})", ci.id, ci.msr_errors, MSR_ERROR_SUPPRESS_COUNT);
|
||||
if ci.msr_errors >= MSR_ERROR_SUPPRESS_COUNT { ci.msr_suppressed = true; }
|
||||
}
|
||||
}
|
||||
}
|
||||
PstateMode::LegacyPerfCtl => {
|
||||
let ct = ci.pstates[idx].ctl;
|
||||
if write_msr(ci.id, IA32_PERF_CTL, ct) {
|
||||
ci.current_idx = idx;
|
||||
ci.msr_errors = 0;
|
||||
ci.msr_suppressed = false;
|
||||
} else {
|
||||
ci.msr_errors += 1;
|
||||
if !ci.msr_suppressed {
|
||||
warn!("CPU{}: MSR write failed ({}/{})", ci.id, ci.msr_errors, MSR_ERROR_SUPPRESS_COUNT);
|
||||
if ci.msr_errors >= MSR_ERROR_SUPPRESS_COUNT { ci.msr_suppressed = true; }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ThermalCache { data: bool, last_check: Instant }
|
||||
impl ThermalCache {
|
||||
fn new() -> Self { Self { data: false, last_check: Instant::now() - Duration::from_secs(10) } }
|
||||
@@ -129,7 +244,11 @@ fn write_scheme_state(governor: Governor, cpus: &[CpuInfo]) {
|
||||
for ci in cpus {
|
||||
if ci.pstates.is_empty() { continue; }
|
||||
let p = &ci.pstates[ci.current_idx.min(ci.pstates.len() - 1)];
|
||||
out.push_str(&format!("CPU{}: {} kHz, {} mW, load={:.1}%\n", ci.id, p.freq_khz, p.power_mw, avg_load(ci) * 100.0));
|
||||
let mode_s = match ci.mode {
|
||||
PstateMode::Hwp => "HWP",
|
||||
PstateMode::LegacyPerfCtl => "legacy",
|
||||
};
|
||||
out.push_str(&format!("CPU{} [{}]: {} kHz, {} mW, load={:.1}%\n", ci.id, mode_s, p.freq_khz, p.power_mw, avg_load(ci) * 100.0));
|
||||
}
|
||||
let _ = fs::write("/scheme/cpufreq/state", out);
|
||||
}
|
||||
@@ -145,14 +264,37 @@ fn main() {
|
||||
let cpus = detect_cpus();
|
||||
info!("detected {} CPU(s), governor={:?}", cpus.len(), governor);
|
||||
let mut ci: Vec<CpuInfo> = cpus.iter().map(|&id| {
|
||||
let mode = detect_pstate_mode(id);
|
||||
let (hwp_min, hwp_max, hwp_guaranteed, hwp_efficient) = if mode == PstateMode::Hwp {
|
||||
read_hwp_capabilities(id)
|
||||
} else {
|
||||
(0, 0, 0, 0)
|
||||
};
|
||||
if mode == PstateMode::Hwp {
|
||||
info!("CPU{}: HWP active (range {}-{}, EPP cap {}-{})", id, hwp_min, hwp_max, hwp_efficient, hwp_guaranteed);
|
||||
} else {
|
||||
info!("CPU{}: legacy P-states (HWP not enabled)", id);
|
||||
}
|
||||
let ps = read_acpi_pss(id);
|
||||
info!("CPU{}: {} P-states ({} - {} kHz)", id, ps.len(), ps.first().map_or(0, |p| p.freq_khz), ps.last().map_or(0, |p| p.freq_khz));
|
||||
CpuInfo { id, pstates: ps, current_idx: 0, load_history: [0.0; SAMPLE_WINDOW], load_idx: 0, throttle: false, msr_errors: 0, msr_suppressed: false }
|
||||
CpuInfo { id, pstates: ps, current_idx: 0, load_history: [0.0; SAMPLE_WINDOW], load_idx: 0, throttle: false, msr_errors: 0, msr_suppressed: false, mode, hwp_min, hwp_max, hwp_guaranteed, hwp_efficient }
|
||||
}).collect();
|
||||
let mut prev: Vec<(u64, u64)> = vec![(0, 0); cpus.len()];
|
||||
let mut thermal = ThermalCache::new();
|
||||
let mut last_state_write = Instant::now();
|
||||
for c in &ci { if !c.pstates.is_empty() { write_msr(c.id, IA32_PERF_CTL, c.pstates[0].ctl); } }
|
||||
// Set initial P-state. For HWP we leave MSR 0x774 as BIOS-set
|
||||
// (defaults to performance) and just let the governor pick a
|
||||
// starting index. For legacy, write the lowest P-state's IA32_PERF_CTL.
|
||||
for c in &ci {
|
||||
if !c.pstates.is_empty() {
|
||||
match c.mode {
|
||||
PstateMode::Hwp => {} // HWP starts at the BIOS default
|
||||
PstateMode::LegacyPerfCtl => {
|
||||
let _ = write_msr(c.id, IA32_PERF_CTL, c.pstates[0].ctl);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
loop {
|
||||
std::thread::sleep(Duration::from_millis(POLL_MS));
|
||||
let tt = thermal.get();
|
||||
@@ -162,13 +304,18 @@ fn main() {
|
||||
c.load_history[c.load_idx] = l; c.load_idx = (c.load_idx + 1) % SAMPLE_WINDOW; c.throttle = tt;
|
||||
let n = choose_pstate(governor, c);
|
||||
if n != c.current_idx && n < c.pstates.len() {
|
||||
let ct = c.pstates[n].ctl;
|
||||
if write_msr(c.id, IA32_PERF_CTL, ct) {
|
||||
info!("CPU{}: P{}→P{} ({}→{} kHz, load={:.0}%)", c.id, c.current_idx, n, c.pstates[c.current_idx].freq_khz, c.pstates[n].freq_khz, l * 100.0);
|
||||
c.current_idx = n; c.msr_errors = 0; c.msr_suppressed = false;
|
||||
} else {
|
||||
c.msr_errors += 1;
|
||||
if !c.msr_suppressed { warn!("CPU{}: MSR write failed ({}/{})", c.id, c.msr_errors, MSR_ERROR_SUPPRESS_COUNT); if c.msr_errors >= MSR_ERROR_SUPPRESS_COUNT { c.msr_suppressed = true; } }
|
||||
let prev_freq = c.pstates[c.current_idx].freq_khz;
|
||||
let next_freq = c.pstates[n].freq_khz;
|
||||
let l_pct = l * 100.0;
|
||||
match c.mode {
|
||||
PstateMode::Hwp => {
|
||||
apply_pstate(c, n);
|
||||
info!("CPU{} HWP→{}% ({}→{} kHz, load={:.0}%)", c.id, c.hwp_max.saturating_sub(n as u8 * (c.hwp_max - c.hwp_min) / c.pstates.len().saturating_sub(1).max(1) as u8), prev_freq, next_freq, l_pct);
|
||||
}
|
||||
PstateMode::LegacyPerfCtl => {
|
||||
apply_pstate(c, n);
|
||||
info!("CPU{}: P{}→P{} ({}→{} kHz, load={:.0}%)", c.id, c.current_idx, n, prev_freq, next_freq, l_pct);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user