fix: coretempd MSR probe for QEMU default machine type
On QEMU's default i440FX machine type, rdmsr on unsupported MSRs (0x19c IA32_THERM_STATUS, 0x1a2 IA32_TEMPERATURE_TARGET) causes a kernel #GP that kills the process. Same pattern as cpufreqd: spawn a child with --probe-msr to test readability before the main loop. If probe fails, disable all MSR reads and report all CPUs as Unknown.
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -17,6 +19,8 @@ const IA32_THERM_STATUS: u32 = 0x19c;
|
||||
const IA32_TEMPERATURE_TARGET: u32 = 0x1a2;
|
||||
const AMD_TCTL: u32 = 0xc0010293;
|
||||
|
||||
static MSR_AVAILABLE: AtomicBool = AtomicBool::new(true);
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
enum Vendor {
|
||||
Intel,
|
||||
@@ -25,12 +29,29 @@ enum Vendor {
|
||||
}
|
||||
|
||||
fn read_msr(cpu: u32, msr: u32) -> Option<u64> {
|
||||
if !MSR_AVAILABLE.load(Ordering::Relaxed) {
|
||||
return None;
|
||||
}
|
||||
let path = format!("/scheme/sys/msr/{}/{:x}", cpu, msr);
|
||||
fs::read_to_string(&path)
|
||||
.ok()
|
||||
.and_then(|s| u64::from_str_radix(s.trim(), 16).ok())
|
||||
}
|
||||
|
||||
fn probe_msr_available() -> bool {
|
||||
let exe = match env::current_exe() {
|
||||
Ok(p) => p,
|
||||
Err(_) => return false,
|
||||
};
|
||||
match Command::new(exe)
|
||||
.arg("--probe-msr")
|
||||
.status()
|
||||
{
|
||||
Ok(s) => s.success(),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn detect_vendor(cpu: u32) -> Vendor {
|
||||
if read_msr(cpu, IA32_THERM_STATUS).is_some() {
|
||||
Vendor::Intel
|
||||
@@ -275,6 +296,15 @@ fn notify_scheme_ready(
|
||||
fn run_daemon() -> Result<(), String> {
|
||||
let notify_fd = init_notify_fd();
|
||||
|
||||
// Probe MSR availability before attempting any reads.
|
||||
// On QEMU's default machine type, rdmsr on unsupported MSRs (0x19c, 0x1a2)
|
||||
// causes a kernel #GP that kills the process. By spawning a child to test
|
||||
// the read first, we detect this safely and degrade to Unknown vendor.
|
||||
if !probe_msr_available() {
|
||||
MSR_AVAILABLE.store(false, Ordering::Relaxed);
|
||||
eprintln!("[INFO] coretempd: MSR reads unavailable — reporting all CPUs as Unknown");
|
||||
}
|
||||
|
||||
let cpus = detect_cpus();
|
||||
eprintln!("[INFO] coretempd: detected {} CPU(s)", cpus.len());
|
||||
|
||||
@@ -345,6 +375,13 @@ fn run_daemon() -> Result<(), String> {
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// Probe mode: child process tests MSR read and exits with status.
|
||||
let args: Vec<String> = env::args().collect();
|
||||
if args.len() >= 2 && args[1] == "--probe-msr" {
|
||||
let probe_ok = read_msr(0, IA32_THERM_STATUS).is_some();
|
||||
std::process::exit(if probe_ok { 0 } else { 1 });
|
||||
}
|
||||
|
||||
if let Err(e) = run_daemon() {
|
||||
eprintln!("[ERROR] coretempd: {e}");
|
||||
std::process::exit(1);
|
||||
|
||||
Reference in New Issue
Block a user