From f95576841d84ec6ed798061e9006e555a45eb0c6 Mon Sep 17 00:00:00 2001 From: vasilito Date: Thu, 2 Jul 2026 23:27:16 +0300 Subject: [PATCH] =?UTF-8?q?kernel:=20Tier=203=20=E2=80=94=20C-state=20trac?= =?UTF-8?q?king=20and=20CPU=20topology?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit C-state tracking: - Add hlt_entries and mwait_entries counters to CpuStats - Record C-state entries in idle_loop (HLT vs MWAIT path) - Expose per-CPU C-state entry counts via /scheme/sys/cstates CPU topology: - New cpu_topology module: CPUID-based SMT/core detection (AMD leaf 0x80000008, fallback to Intel leaf 1) - Record APIC ID and derive core_id/thread_id during LAPIC init - Expose topology via /scheme/sys/topology Both new sys scheme resources are readable by any userspace process and integrate with the existing percpu stats infrastructure. --- src/arch/x86_shared/device/local_apic.rs | 8 +- src/arch/x86_shared/interrupt/mod.rs | 16 +--- src/cpu_stats.rs | 18 ++++ src/cpu_topology.rs | 109 +++++++++++++++++++++++ src/main.rs | 3 + src/scheme/sys/cstates.rs | 36 ++++++++ src/scheme/sys/mod.rs | 4 + src/scheme/sys/topology.rs | 31 +++++++ 8 files changed, 212 insertions(+), 13 deletions(-) create mode 100644 src/cpu_topology.rs create mode 100644 src/scheme/sys/cstates.rs create mode 100644 src/scheme/sys/topology.rs diff --git a/src/arch/x86_shared/device/local_apic.rs b/src/arch/x86_shared/device/local_apic.rs index 93a1372d9a..c6631ac4f6 100644 --- a/src/arch/x86_shared/device/local_apic.rs +++ b/src/arch/x86_shared/device/local_apic.rs @@ -94,10 +94,16 @@ impl LocalApic { self.setup_error_int(); //self.setup_timer(); + let apic_id = self.id(); PercpuBlock::current() .misc_arch_info .apic_id_opt - .set(Some(self.id())); + .set(Some(apic_id)); + + crate::cpu_topology::record_cpu( + PercpuBlock::current().cpu_id, + apic_id.get(), + ); } } diff --git a/src/arch/x86_shared/interrupt/mod.rs b/src/arch/x86_shared/interrupt/mod.rs index ad1ba0367f..df33ef5b5a 100644 --- a/src/arch/x86_shared/interrupt/mod.rs +++ b/src/arch/x86_shared/interrupt/mod.rs @@ -137,22 +137,14 @@ pub unsafe fn mwait_loop(eax_hint: u32, ecx_hint: u32) { /// is, break on any interrupt". pub unsafe fn idle_loop() { let max_substate = cpuid_max_mwait_substate(); + let percpu = crate::percpu::PercpuBlock::current(); if max_substate == 0 { - // No MWAIT support. Land in C1 via hlt. This matches the - // pre-MWAIT behavior of `enable_and_halt` and is safe on - // every x86 CPU since the original Pentium. + percpu.stats.record_hlt_entry(); enable_and_halt(); } else { - // MWAIT supported. Enter the deepest substate, break on any - // interrupt (ecx=0). - // - // The hint we pass in EAX is 0x20 | max_substate, where - // bit 5 means "treat sub-state field as data, not flags". - // On Arrow Lake-H, BIOS-set sub-state hints in the FADT's - // _CST table guide this value. The kernel doesn't pick - // the state — that's the BIOS/firmware's job. let eax_hint: u32 = 0x20 | (max_substate as u32); - enable_and_halt(); // interrupts must be enabled first + percpu.stats.record_mwait_entry(); + enable_and_halt(); mwait_loop(eax_hint, 0); } } diff --git a/src/cpu_stats.rs b/src/cpu_stats.rs index ec9642a8d8..1c678e1d5f 100644 --- a/src/cpu_stats.rs +++ b/src/cpu_stats.rs @@ -36,6 +36,8 @@ pub struct CpuStats { state: AtomicU8, pub context_switches: AtomicU64, pub steals: AtomicU64, + hlt_entries: AtomicU64, + mwait_entries: AtomicU64, } impl CpuStats { @@ -49,6 +51,8 @@ impl CpuStats { state: AtomicU8::new(0), context_switches: AtomicU64::new(0), steals: AtomicU64::new(0), + hlt_entries: AtomicU64::new(0), + mwait_entries: AtomicU64::new(0), } } } @@ -61,6 +65,8 @@ pub struct CpuStatsData { pub irq: u64, pub context_switches: u64, pub steals: u64, + pub hlt_entries: u64, + pub mwait_entries: u64, } impl CpuStats { @@ -103,6 +109,16 @@ impl CpuStats { IRQ_COUNT[irq as usize].fetch_add(1, Ordering::Relaxed); self.irq.fetch_add(1, Ordering::Relaxed); } + + #[inline] + pub fn record_hlt_entry(&self) { + self.hlt_entries.fetch_add(1, Ordering::Relaxed); + } + + #[inline] + pub fn record_mwait_entry(&self) { + self.mwait_entries.fetch_add(1, Ordering::Relaxed); + } } impl fmt::Display for CpuStatsData { @@ -125,6 +141,8 @@ impl From<&CpuStats> for CpuStatsData { irq: val.irq.load(Ordering::Relaxed), context_switches: val.context_switches.load(Ordering::Relaxed), steals: val.steals.load(Ordering::Relaxed), + hlt_entries: val.hlt_entries.load(Ordering::Relaxed), + mwait_entries: val.mwait_entries.load(Ordering::Relaxed), } } } diff --git a/src/cpu_topology.rs b/src/cpu_topology.rs new file mode 100644 index 0000000000..e9ef6a7d0c --- /dev/null +++ b/src/cpu_topology.rs @@ -0,0 +1,109 @@ +use alloc::vec::Vec; +use crate::cpu_set::{LogicalCpuId, MAX_CPU_COUNT}; +use core::sync::atomic::{AtomicU32, Ordering}; + +#[derive(Debug, Clone, Copy)] +pub struct CpuTopologyEntry { + pub apic_id: u32, + pub core_id: u32, + pub thread_id: u32, + pub threads_per_core: u32, +} + +static APIC_IDS: [AtomicU32; MAX_CPU_COUNT as usize] = + [const { AtomicU32::new(u32::MAX) }; MAX_CPU_COUNT as usize]; + +static CORE_IDS: [AtomicU32; MAX_CPU_COUNT as usize] = + [const { AtomicU32::new(u32::MAX) }; MAX_CPU_COUNT as usize]; + +static THREADS_PER_CORE: AtomicU32 = AtomicU32::new(0); + +pub fn record_cpu(cpu_id: LogicalCpuId, apic_id: u32) { + let idx = cpu_id.get() as usize; + if idx >= MAX_CPU_COUNT as usize { + return; + } + + let (core_id, thread_id, threads_per_core) = detect_topology(apic_id); + let core_id_combined = (core_id << 16) | (thread_id & 0xFFFF); + + APIC_IDS[idx].store(apic_id, Ordering::Release); + CORE_IDS[idx].store(core_id_combined, Ordering::Release); + + let prev = THREADS_PER_CORE.load(Ordering::Acquire); + if prev == 0 || threads_per_core > prev { + THREADS_PER_CORE.store(threads_per_core, Ordering::Release); + } +} + +pub fn get_entry(cpu_id: LogicalCpuId) -> Option { + let idx = cpu_id.get() as usize; + if idx >= MAX_CPU_COUNT as usize { + return None; + } + + let apic_id = APIC_IDS[idx].load(Ordering::Acquire); + if apic_id == u32::MAX { + return None; + } + + let core_combined = CORE_IDS[idx].load(Ordering::Acquire); + if core_combined == u32::MAX { + return None; + } + + let core_id = core_combined >> 16; + let thread_id = core_combined & 0xFFFF; + let threads_per_core = THREADS_PER_CORE.load(Ordering::Acquire).max(1); + + Some(CpuTopologyEntry { + apic_id, + core_id, + thread_id, + threads_per_core, + }) +} + +pub fn all_entries() -> Vec<(LogicalCpuId, CpuTopologyEntry)> { + let count = crate::cpu_count(); + let mut result = Vec::new(); + for i in 0..count { + let id = LogicalCpuId::new(i); + if let Some(entry) = get_entry(id) { + result.push((id, entry)); + } + } + result +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn detect_topology(initial_apic_id: u32) -> (u32, u32, u32) { + use raw_cpuid::CpuId; + + let cpuid = CpuId::new(); + + if let Some(topo) = cpuid.get_processor_topology_info() { + let threads_per_core = topo.threads_per_core() as u32; + let core_id = topo.core_id() as u32; + if threads_per_core > 0 { + let thread_id = initial_apic_id % threads_per_core; + return (core_id, thread_id, threads_per_core); + } + } + + if let Some(feature_info) = cpuid.get_feature_info() { + let threads_per_core = feature_info.max_logical_processor_ids() as u32; + if threads_per_core > 1 { + let core_id = initial_apic_id / threads_per_core; + let thread_id = initial_apic_id % threads_per_core; + return (core_id, thread_id, threads_per_core); + } + } + + (initial_apic_id, 0, 1) +} + +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +fn detect_topology(initial_apic_id: u32) -> (u32, u32, u32) { + (initial_apic_id, 0, 1) +} diff --git a/src/main.rs b/src/main.rs index 75c68c371f..398246647d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -51,6 +51,9 @@ mod cpu_set; /// Stats for the CPUs mod cpu_stats; +/// CPU topology detection (SMT/core mapping) +mod cpu_topology; + /// Context management mod context; diff --git a/src/scheme/sys/cstates.rs b/src/scheme/sys/cstates.rs new file mode 100644 index 0000000000..128caabb02 --- /dev/null +++ b/src/scheme/sys/cstates.rs @@ -0,0 +1,36 @@ +use core::fmt::Write as _; + +use crate::{ + percpu::get_all_stats, + sync::CleanLockToken, + syscall::error::Result, +}; +use alloc::{string::String, vec::Vec}; + +pub fn resource(_token: &mut CleanLockToken) -> Result> { + let stats = get_all_stats(); + let mut out = String::new(); + let mut total_hlt: u64 = 0; + let mut total_mwait: u64 = 0; + + for (id, stat) in &stats { + let _ = writeln!( + &mut out, + "cpu{} hlt_entries {} mwait_entries {}", + id.get(), + stat.hlt_entries, + stat.mwait_entries, + ); + + total_hlt += stat.hlt_entries; + total_mwait += stat.mwait_entries; + } + + let _ = writeln!( + &mut out, + "total hlt_entries {} mwait_entries {}", + total_hlt, total_mwait, + ); + + Ok(out.into_bytes()) +} diff --git a/src/scheme/sys/mod.rs b/src/scheme/sys/mod.rs index 5c97ab665c..486c82506c 100644 --- a/src/scheme/sys/mod.rs +++ b/src/scheme/sys/mod.rs @@ -28,6 +28,7 @@ use super::{CallerCtx, HandleMap, KernelScheme, OpenResult, StrOrBytes}; mod block; mod context; mod cpu; +mod cstates; mod exe; mod fdstat; mod iostat; @@ -37,6 +38,7 @@ mod msr; mod sched; mod stat; mod syscall; +mod topology; mod uname; /// Extract the (cpu<<32 | msr) u64 handle stored in an MSR fd's @@ -104,6 +106,8 @@ const FILES: &[(&str, Kind)] = &[ ("block", Rd(block::resource)), ("context", Rd(context::resource)), ("cpu", Rd(cpu::resource)), + ("cstates", Rd(cstates::resource)), + ("topology", Rd(topology::resource)), #[cfg(feature = "sys_fdstat")] ("fdstat", Rd(fdstat::resource)), ("exe", Rd(exe::resource)), diff --git a/src/scheme/sys/topology.rs b/src/scheme/sys/topology.rs new file mode 100644 index 0000000000..f4fe450771 --- /dev/null +++ b/src/scheme/sys/topology.rs @@ -0,0 +1,31 @@ +use core::fmt::Write as _; + +use crate::{ + cpu_topology, + sync::CleanLockToken, + syscall::error::Result, +}; +use alloc::{string::String, vec::Vec}; + +pub fn resource(_token: &mut CleanLockToken) -> Result> { + let entries = cpu_topology::all_entries(); + let mut out = String::new(); + + for (id, entry) in &entries { + let _ = writeln!( + &mut out, + "cpu{} apic_id {} core_id {} thread_id {} threads_per_core {}", + id.get(), + entry.apic_id, + entry.core_id, + entry.thread_id, + entry.threads_per_core, + ); + } + + if entries.is_empty() { + let _ = writeln!(&mut out, "(topology not yet detected)"); + } + + Ok(out.into_bytes()) +}