cpufreqd: add HWP (Hardware P-states / Intel Speed Shift) detection

Phase G.2 of the ACPI/Arrow Lake port. The LG Gram 2025 (Core Ultra 7
255H, Arrow Lake-H) uses Intel HWP for P-state control — legacy
IA32_PERF_CTL writes are silently ignored when HWP is active.

The previous cpufreqd always wrote IA32_PERF_CTL (MSR 0x199), which
on Arrow Lake-H had zero effect. We now:

1. Detect HWP at startup by reading IA32_PM_ENABLE (MSR 0x770) bit 0
2. If HWP is active:
   a. Read IA32_HWP_CAPABILITIES (MSR 0x771) for the
      min/max/guaranteed/efficient performance range
   b. Translate the governor's P-state index into the HWP
      "Desired Performance" field + EPP hint
   c. Write IA32_HWP_REQUEST (MSR 0x774) instead of IA32_PERF_CTL
3. If HWP is not active, fall back to the legacy IA32_PERF_CTL path
   (preserves backward compatibility for older CPUs)

The kernel's new /scheme/sys/msr/ scheme (Phase G.1) provides the
in-memory storage backing the MSR reads/writes. On the real LG Gram
2025 hardware, the kernel's MSR scheme will be wired to the actual
hardware MSRs (Phase G+ work); the cpufreqd interface is unchanged.

HWP layout (Intel SDM Vol 3B §14.4.4):
  [7:0]    Minimum Performance
  [15:8]   Maximum Performance
  [23:16] Desired Performance
  [31:24] Energy-Performance Preference (EPP)
  [42:32] Activity Window (0 = auto)
  [42]    Package Control

EPP follows the same index as desired perf: 0 = performance,
255 = power-save. We map the linear P-state index to both the
"Desired Performance" and EPP so the H/W sees a single hint that
the OS wants both the performance and energy level it implies.

Includes:
- PstateMode enum (LegacyPerfCtl | Hwp) for compile-time dispatch
- detect_pstate_mode() reads MSR 0x770
- read_hwp_capabilities() reads MSR 0x771, returns (min, max,
  guaranteed, efficient) bytes
- hwp_request_for() maps P-state index to IA32_HWP_REQUEST u64
- apply_pstate() dispatches to the right MSR based on ci.mode
- The /scheme/cpufreq/state output now tags each CPU with [HWP] or
  [legacy] for observability

Hardware test plan: on the LG Gram 2025, "performance" governor
should pin IA32_HWP_REQUEST.Desired = hwp_max with EPP=0; "powersave"
should pin it to hwp_min with EPP=255; "ondemand" should ramp
between. Reading IA32_PERF_STATUS (MSR 0x198) via /scheme/sys/msr
should reflect the new operating point within ~1ms.
This commit is contained in:
Red Bear OS
2026-06-30 12:53:57 +03:00
committed by vasilito
parent 7a24b854c3
commit d24d0e2174
+168 -21
View File
@@ -4,7 +4,18 @@ use std::io::{Read, Write};
use std::time::{Duration, Instant};
use log::{info, warn, LevelFilter};
const IA32_PERF_CTL: u32 = 0x199;
// MSR addresses — see Intel SDM Vol 3B §14
const IA32_PERF_CTL: u32 = 0x199; // legacy P-state
const IA32_HWP_REQUEST: u32 = 0x774; // HWP control
const IA32_HWP_CAPABILITIES: u32 = 0x771; // HWP range
const IA32_PM_ENABLE: u32 = 0x770; // HWP enable bit
// EPP values for IA32_HWP_REQUEST[31:24]
const EPP_PERFORMANCE: u64 = 0x00;
const EPP_BALANCE_PERFORMANCE: u64 = 0x80;
const EPP_BALANCE_POWER: u64 = 0xC0;
const EPP_POWERSAVE: u64 = 0xFF;
const POLL_MS: u64 = 100;
const SAMPLE_WINDOW: usize = 10;
const STATE_WRITE_INTERVAL_S: u64 = 1;
@@ -21,18 +32,39 @@ impl log::Log for StderrLogger {
fn flush(&self) {}
}
#[derive(Clone)]
struct PState { freq_khz: u32, power_mw: u32, latency_us: u32, ctl: u64 }
/// HWP = Hardware P-states (Intel Speed Shift).
/// Arrow Lake-H always has HWP enabled by BIOS. Legacy IA32_PERF_CTL
/// writes are ignored when HWP is active. We detect HWP via MSR 0x770
/// bit 0 and use IA32_HWP_REQUEST (0x774) with EPP hints.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum PstateMode { LegacyPerfCtl, Hwp }
#[derive(Clone)]
struct PState {
freq_khz: u32,
power_mw: u32,
latency_us: u32,
ctl: u64,
}
#[derive(Clone)]
struct CpuInfo {
id: u32, pstates: Vec<PState>, current_idx: usize,
load_history: [f64; SAMPLE_WINDOW], load_idx: usize, throttle: bool,
msr_errors: u32, msr_suppressed: bool,
id: u32,
pstates: Vec<PState>,
current_idx: usize,
load_history: [f64; SAMPLE_WINDOW],
load_idx: usize,
throttle: bool,
msr_errors: u32,
msr_suppressed: bool,
mode: PstateMode,
hwp_min: u8, // from MSR 0x771[15:8]
hwp_max: u8, // from MSR 0x771[7:0]
hwp_guaranteed: u8, // from MSR 0x771[23:16]
hwp_efficient: u8, // from MSR 0x771[31:24]
}
fn detect_cpus() -> Vec<u32> {
// Redox exposes the CPU count via the sys:cpu scheme file, not via a
// /dev/cpu/ directory (kernel/src/scheme/sys/cpu.rs).
if let Ok(data) = fs::read_to_string("/scheme/sys/cpu") {
for line in data.lines() {
if let Some(rest) = line.strip_prefix("CPUs: ") {
@@ -54,6 +86,36 @@ fn detect_cpus() -> Vec<u32> {
v
}
fn read_msr_u32(cpu: u32, msr: u32) -> Option<u32> {
let path = format!("/scheme/sys/msr/{}/0x{:x}", cpu, msr);
let mut f = fs::File::open(&path).ok()?;
let mut buf = [0u8; 8];
f.read_exact(&mut buf).ok()?;
Some(u32::from_le_bytes(buf[..4].try_into().ok()?))
}
fn detect_pstate_mode(cpu: u32) -> PstateMode {
// IA32_PM_ENABLE bit 0 == HWP_ENABLE
match read_msr_u32(cpu, IA32_PM_ENABLE) {
Some(pm) if (pm & 1) != 0 => PstateMode::Hwp,
_ => PstateMode::LegacyPerfCtl,
}
}
fn read_hwp_capabilities(cpu: u32) -> (u8, u8, u8, u8) {
// IA32_HWP_CAPABILITIES layout (Vol 3B §14.4.3):
// [7:0] Highest Performance
// [15:8] Guaranteed Performance
// [23:16] Most Efficient Performance
// [31:24] Lowest Performance
let cap = read_msr_u32(cpu, IA32_HWP_CAPABILITIES).unwrap_or(0);
let max = (cap & 0xFF) as u8;
let guaranteed = ((cap >> 8) & 0xFF) as u8;
let efficient = ((cap >> 16) & 0xFF) as u8;
let min = ((cap >> 24) & 0xFF) as u8;
(min, max, guaranteed, efficient)
}
fn read_acpi_pss(cpu: u32) -> Vec<PState> {
let path = format!("/scheme/acpi/processor/CPU{}/pss", cpu);
if let Ok(d) = fs::read_to_string(&path) {
@@ -77,14 +139,34 @@ fn read_acpi_pss(cpu: u32) -> Vec<PState> {
}
fn write_msr(cpu: u32, msr: u32, val: u64) -> bool {
// Redox exposes MSR as /scheme/sys/msr/{cpu}/{msr_hex}, not the
// Linux-style /dev/cpu/{cpu}/msr. The scheme requires
// CAP_SYS_MSR, which cpufreqd receives because it runs as root.
let path = format!("/scheme/sys/msr/{}/0x{:x}", cpu, msr);
fs::OpenOptions::new().write(true).open(&path).ok()
.map(|mut f| f.write_all(&val.to_ne_bytes()).is_ok()).unwrap_or(false)
}
/// Map a P-state index to IA32_HWP_REQUEST value.
/// IA32_HWP_REQUEST layout (Vol 3B §14.4.4):
/// [7:0] Minimum Performance
/// [15:8] Maximum Performance
/// [23:16] Desired Performance
/// [31:24] Energy-Performance Preference
/// [42:32] Activity Window (set to 0 = auto)
/// [42] Package Control
fn hwp_request_for(idx: usize, ci: &CpuInfo) -> u64 {
let m = ci.pstates.len().saturating_sub(1).max(1);
// Map index 0 (lowest perf, "powersave") to lowest HWP performance
// and index m (highest perf, "performance") to highest HWP performance.
let frac = idx as f64 / m as f64;
let range = ci.hwp_max.saturating_sub(ci.hwp_min) as f64;
let desired = ci.hwp_min as f64 + range * frac;
// EPP follows the same map: performance=0, powersave=255
let epp = ((1.0 - frac) * 255.0) as u64;
(ci.hwp_min as u64)
| ((desired as u64) << 8)
| ((desired as u64) << 16)
| (epp << 24)
}
fn measure_load(cpu: u32, prev: &mut (u64, u64)) -> f64 {
if let Ok(d) = fs::read_to_string(format!("/scheme/sys/cpu/{}/stat", cpu)) {
let p: Vec<u64> = d.split_whitespace().filter_map(|s| s.parse().ok()).collect();
@@ -111,6 +193,39 @@ fn choose_pstate(g: Governor, ci: &CpuInfo) -> usize {
}
}
fn apply_pstate(ci: &mut CpuInfo, idx: usize) {
match ci.mode {
PstateMode::Hwp => {
let val = hwp_request_for(idx, ci);
if write_msr(ci.id, IA32_HWP_REQUEST, val) {
ci.current_idx = idx;
ci.msr_errors = 0;
ci.msr_suppressed = false;
} else {
ci.msr_errors += 1;
if !ci.msr_suppressed {
warn!("CPU{}: HWP write failed ({}/{})", ci.id, ci.msr_errors, MSR_ERROR_SUPPRESS_COUNT);
if ci.msr_errors >= MSR_ERROR_SUPPRESS_COUNT { ci.msr_suppressed = true; }
}
}
}
PstateMode::LegacyPerfCtl => {
let ct = ci.pstates[idx].ctl;
if write_msr(ci.id, IA32_PERF_CTL, ct) {
ci.current_idx = idx;
ci.msr_errors = 0;
ci.msr_suppressed = false;
} else {
ci.msr_errors += 1;
if !ci.msr_suppressed {
warn!("CPU{}: MSR write failed ({}/{})", ci.id, ci.msr_errors, MSR_ERROR_SUPPRESS_COUNT);
if ci.msr_errors >= MSR_ERROR_SUPPRESS_COUNT { ci.msr_suppressed = true; }
}
}
}
}
}
struct ThermalCache { data: bool, last_check: Instant }
impl ThermalCache {
fn new() -> Self { Self { data: false, last_check: Instant::now() - Duration::from_secs(10) } }
@@ -129,7 +244,11 @@ fn write_scheme_state(governor: Governor, cpus: &[CpuInfo]) {
for ci in cpus {
if ci.pstates.is_empty() { continue; }
let p = &ci.pstates[ci.current_idx.min(ci.pstates.len() - 1)];
out.push_str(&format!("CPU{}: {} kHz, {} mW, load={:.1}%\n", ci.id, p.freq_khz, p.power_mw, avg_load(ci) * 100.0));
let mode_s = match ci.mode {
PstateMode::Hwp => "HWP",
PstateMode::LegacyPerfCtl => "legacy",
};
out.push_str(&format!("CPU{} [{}]: {} kHz, {} mW, load={:.1}%\n", ci.id, mode_s, p.freq_khz, p.power_mw, avg_load(ci) * 100.0));
}
let _ = fs::write("/scheme/cpufreq/state", out);
}
@@ -145,14 +264,37 @@ fn main() {
let cpus = detect_cpus();
info!("detected {} CPU(s), governor={:?}", cpus.len(), governor);
let mut ci: Vec<CpuInfo> = cpus.iter().map(|&id| {
let mode = detect_pstate_mode(id);
let (hwp_min, hwp_max, hwp_guaranteed, hwp_efficient) = if mode == PstateMode::Hwp {
read_hwp_capabilities(id)
} else {
(0, 0, 0, 0)
};
if mode == PstateMode::Hwp {
info!("CPU{}: HWP active (range {}-{}, EPP cap {}-{})", id, hwp_min, hwp_max, hwp_efficient, hwp_guaranteed);
} else {
info!("CPU{}: legacy P-states (HWP not enabled)", id);
}
let ps = read_acpi_pss(id);
info!("CPU{}: {} P-states ({} - {} kHz)", id, ps.len(), ps.first().map_or(0, |p| p.freq_khz), ps.last().map_or(0, |p| p.freq_khz));
CpuInfo { id, pstates: ps, current_idx: 0, load_history: [0.0; SAMPLE_WINDOW], load_idx: 0, throttle: false, msr_errors: 0, msr_suppressed: false }
CpuInfo { id, pstates: ps, current_idx: 0, load_history: [0.0; SAMPLE_WINDOW], load_idx: 0, throttle: false, msr_errors: 0, msr_suppressed: false, mode, hwp_min, hwp_max, hwp_guaranteed, hwp_efficient }
}).collect();
let mut prev: Vec<(u64, u64)> = vec![(0, 0); cpus.len()];
let mut thermal = ThermalCache::new();
let mut last_state_write = Instant::now();
for c in &ci { if !c.pstates.is_empty() { write_msr(c.id, IA32_PERF_CTL, c.pstates[0].ctl); } }
// Set initial P-state. For HWP we leave MSR 0x774 as BIOS-set
// (defaults to performance) and just let the governor pick a
// starting index. For legacy, write the lowest P-state's IA32_PERF_CTL.
for c in &ci {
if !c.pstates.is_empty() {
match c.mode {
PstateMode::Hwp => {} // HWP starts at the BIOS default
PstateMode::LegacyPerfCtl => {
let _ = write_msr(c.id, IA32_PERF_CTL, c.pstates[0].ctl);
}
}
}
}
loop {
std::thread::sleep(Duration::from_millis(POLL_MS));
let tt = thermal.get();
@@ -162,13 +304,18 @@ fn main() {
c.load_history[c.load_idx] = l; c.load_idx = (c.load_idx + 1) % SAMPLE_WINDOW; c.throttle = tt;
let n = choose_pstate(governor, c);
if n != c.current_idx && n < c.pstates.len() {
let ct = c.pstates[n].ctl;
if write_msr(c.id, IA32_PERF_CTL, ct) {
info!("CPU{}: P{}→P{} ({}→{} kHz, load={:.0}%)", c.id, c.current_idx, n, c.pstates[c.current_idx].freq_khz, c.pstates[n].freq_khz, l * 100.0);
c.current_idx = n; c.msr_errors = 0; c.msr_suppressed = false;
} else {
c.msr_errors += 1;
if !c.msr_suppressed { warn!("CPU{}: MSR write failed ({}/{})", c.id, c.msr_errors, MSR_ERROR_SUPPRESS_COUNT); if c.msr_errors >= MSR_ERROR_SUPPRESS_COUNT { c.msr_suppressed = true; } }
let prev_freq = c.pstates[c.current_idx].freq_khz;
let next_freq = c.pstates[n].freq_khz;
let l_pct = l * 100.0;
match c.mode {
PstateMode::Hwp => {
apply_pstate(c, n);
info!("CPU{} HWP→{}% ({}{} kHz, load={:.0}%)", c.id, c.hwp_max.saturating_sub(n as u8 * (c.hwp_max - c.hwp_min) / c.pstates.len().saturating_sub(1).max(1) as u8), prev_freq, next_freq, l_pct);
}
PstateMode::LegacyPerfCtl => {
apply_pstate(c, n);
info!("CPU{}: P{}→P{} ({}→{} kHz, load={:.0}%)", c.id, c.current_idx, n, prev_freq, next_freq, l_pct);
}
}
}
}