3 Commits

Author SHA1 Message Date
vasilito f95576841d kernel: Tier 3 — C-state tracking and CPU topology
C-state tracking:
- Add hlt_entries and mwait_entries counters to CpuStats
- Record C-state entries in idle_loop (HLT vs MWAIT path)
- Expose per-CPU C-state entry counts via /scheme/sys/cstates

CPU topology:
- New cpu_topology module: CPUID-based SMT/core detection
  (AMD leaf 0x80000008, fallback to Intel leaf 1)
- Record APIC ID and derive core_id/thread_id during LAPIC init
- Expose topology via /scheme/sys/topology

Both new sys scheme resources are readable by any userspace process
and integrate with the existing percpu stats infrastructure.
2026-07-02 23:27:16 +03:00
vasilito 88b661fb18 lapic: fix spurious interrupt vector from 0x00 to 0xFF
The LAPIC Spurious Interrupt Vector Register (SVR) was set to 0x100,
meaning bit 8 (APIC enable) was set but the vector field was 0x00.
Vector 0 is the divide-error exception handler, so any LAPIC spurious
interrupt was misrouted to divide_by_zero — a latent bug.

Fix: set SVR to 0x1FF (vector 0xFF + enable bit). Vector 255 is the
conventional spurious vector per Intel SDM Vol 3 §10.9.

Add a dedicated lapic_spurious IDT handler at vector 0xFF that
increments a counter and returns WITHOUT sending EOI (spurious
interrupts must not be EOId per the spec). Override the generic
interrupt stub at position 255 in the IDT and reserve bit 255.

Also fix spurious_irq_resource to report LAPIC spurious count for
both PIC and APIC modes instead of returning a placeholder string.
2026-07-02 23:09:07 +03:00
vasilito c46d3a90eb fix: handle early-boot exceptions in excp_handler gracefully
excp_handler() called context::current() unconditionally, which panics
with 'not inside of context' when no context exists yet (before
context::init() runs in kmain/kmain_ap). On bare metal, a page fault
during BSP's start() — e.g. ACPI table access or device MMIO — caused
page_fault_handler() to return Err, falling through to excp_handler(),
which then panicked at context::current() instead of reporting the
actual fault.

Replace context::current() with context::try_current(). When None,
log the exception details (kind, code, faulting address) and panic
with a descriptive message. This turns an uninformative cascading
panic into a diagnostic one that reveals the real faulting address.
2026-07-02 22:33:33 +03:00
13 changed files with 267 additions and 28 deletions
+23 -3
View File
@@ -69,21 +69,41 @@ impl LocalApic {
}
}
/// LAPIC Spurious Interrupt Vector.
///
/// Must not overlap with exception vectors (031) or generic IRQ vectors
/// (32254). Vector 255 is the conventional choice per Intel SDM Vol 3
/// § 10.9 ("Spurious Interrupt").
///
/// The previous value was 0x00 (divide-error exception), meaning any LAPIC
/// spurious interrupt was misrouted to the divide-by-zero handler — a
/// latent bug that causes undefined behaviour on real hardware.
const SPURIOUS_VECTOR: u32 = 0xFF;
/// Spurious Interrupt Vector Register value: vector | bit 8 (APIC enable).
const SVR_VALUE: u32 = Self::SPURIOUS_VECTOR | 0x100;
unsafe fn init_ap(&mut self) {
unsafe {
if self.x2 {
wrmsr(IA32_APIC_BASE, rdmsr(IA32_APIC_BASE) | (1 << 10));
wrmsr(IA32_X2APIC_SIVR, 0x100);
wrmsr(IA32_X2APIC_SIVR, u64::from(Self::SVR_VALUE));
} else {
self.write(0xF0, 0x100);
self.write(0xF0, Self::SVR_VALUE);
}
self.setup_error_int();
//self.setup_timer();
let apic_id = self.id();
PercpuBlock::current()
.misc_arch_info
.apic_id_opt
.set(Some(self.id()));
.set(Some(apic_id));
crate::cpu_topology::record_cpu(
PercpuBlock::current().cpu_id,
apic_id.get(),
);
}
}
+5
View File
@@ -208,6 +208,10 @@ fn init_generic(cpu_id: LogicalCpuId, idt: &mut Idt, backup_stack_end: usize) {
});
}
// Override vector 0xFF (255) with the LAPIC spurious interrupt handler.
// The generic stub loop above fills 32..=255, so this must come after.
current_idt[0xFF].set_func(irq::lapic_spurious);
// reserve bits 31:0, i.e. the first 32 interrupts, which are reserved for exceptions
*current_reservations[0].get_mut() |= 0x0000_0000_FFFF_FFFF;
@@ -251,6 +255,7 @@ fn init_generic(cpu_id: LogicalCpuId, idt: &mut Idt, backup_stack_end: usize) {
idt.set_reserved_mut(IpiKind::Switch as u8, true);
idt.set_reserved_mut(IpiKind::Tlb as u8, true);
idt.set_reserved_mut(IpiKind::Pit as u8, true);
idt.set_reserved_mut(0xFF, true);
#[cfg(target_arch = "x86")]
{
+17 -11
View File
@@ -26,6 +26,7 @@ pub enum IrqMethod {
static SPURIOUS_COUNT_IRQ7: AtomicUsize = AtomicUsize::new(0);
static SPURIOUS_COUNT_IRQ15: AtomicUsize = AtomicUsize::new(0);
static SPURIOUS_COUNT_LAPIC: AtomicUsize = AtomicUsize::new(0);
pub fn spurious_count_irq7() -> usize {
SPURIOUS_COUNT_IRQ7.load(Ordering::Relaxed)
@@ -33,20 +34,21 @@ pub fn spurious_count_irq7() -> usize {
pub fn spurious_count_irq15() -> usize {
SPURIOUS_COUNT_IRQ15.load(Ordering::Relaxed)
}
pub fn spurious_count_lapic() -> usize {
SPURIOUS_COUNT_LAPIC.load(Ordering::Relaxed)
}
pub fn spurious_count() -> usize {
spurious_count_irq7() + spurious_count_irq15()
spurious_count_irq7() + spurious_count_irq15() + spurious_count_lapic()
}
pub fn spurious_irq_resource(_token: &mut CleanLockToken) -> syscall::Result<Vec<u8>> {
match irq_method() {
IrqMethod::Apic => Ok(Vec::from(&b"(not implemented for APIC yet)"[..])),
IrqMethod::Pic => Ok(format!(
"{}\tIRQ7\n{}\tIRQ15\n{}\ttotal\n",
spurious_count_irq7(),
spurious_count_irq15(),
spurious_count()
)
.into_bytes()),
}
Ok(format!(
"{}\tIRQ7\n{}\tIRQ15\n{}\tLAPIC\n{}\ttotal\n",
spurious_count_irq7(),
spurious_count_irq15(),
spurious_count_lapic(),
spurious_count()
)
.into_bytes())
}
static IRQ_METHOD: AtomicUsize = AtomicUsize::new(IrqMethod::Pic as usize);
@@ -320,6 +322,10 @@ interrupt!(lapic_error, || {
unsafe { lapic_eoi() };
});
interrupt!(lapic_spurious, || {
SPURIOUS_COUNT_LAPIC.fetch_add(1, Ordering::Relaxed);
});
interrupt_error!(generic_irq, |_stack, code| {
let mut token = unsafe { CleanLockToken::new() };
+4 -12
View File
@@ -137,22 +137,14 @@ pub unsafe fn mwait_loop(eax_hint: u32, ecx_hint: u32) {
/// is, break on any interrupt".
pub unsafe fn idle_loop() {
let max_substate = cpuid_max_mwait_substate();
let percpu = crate::percpu::PercpuBlock::current();
if max_substate == 0 {
// No MWAIT support. Land in C1 via hlt. This matches the
// pre-MWAIT behavior of `enable_and_halt` and is safe on
// every x86 CPU since the original Pentium.
percpu.stats.record_hlt_entry();
enable_and_halt();
} else {
// MWAIT supported. Enter the deepest substate, break on any
// interrupt (ecx=0).
//
// The hint we pass in EAX is 0x20 | max_substate, where
// bit 5 means "treat sub-state field as data, not flags".
// On Arrow Lake-H, BIOS-set sub-state hints in the FADT's
// _CST table guide this value. The kernel doesn't pick
// the state — that's the BIOS/firmware's job.
let eax_hint: u32 = 0x20 | (max_substate as u32);
enable_and_halt(); // interrupts must be enabled first
percpu.stats.record_mwait_entry();
enable_and_halt();
mwait_loop(eax_hint, 0);
}
}
+13 -1
View File
@@ -74,7 +74,19 @@ pub fn signal_handler(token: &mut CleanLockToken) {
pub fn excp_handler(excp: syscall::Exception) {
let mut token = unsafe { CleanLockToken::new() };
let current = context::current();
let Some(current) = context::try_current() else {
let kind = excp.kind;
let code = excp.code;
let address = excp.address;
info!(
"excp_handler: no current context (early boot), CPU {}, kind {}, code {}, address {:#x}",
crate::cpu_id(),
kind,
code,
address
);
panic!("unhandled exception during early boot (no context)");
};
let context = current.write(token.token());
+18
View File
@@ -36,6 +36,8 @@ pub struct CpuStats {
state: AtomicU8,
pub context_switches: AtomicU64,
pub steals: AtomicU64,
hlt_entries: AtomicU64,
mwait_entries: AtomicU64,
}
impl CpuStats {
@@ -49,6 +51,8 @@ impl CpuStats {
state: AtomicU8::new(0),
context_switches: AtomicU64::new(0),
steals: AtomicU64::new(0),
hlt_entries: AtomicU64::new(0),
mwait_entries: AtomicU64::new(0),
}
}
}
@@ -61,6 +65,8 @@ pub struct CpuStatsData {
pub irq: u64,
pub context_switches: u64,
pub steals: u64,
pub hlt_entries: u64,
pub mwait_entries: u64,
}
impl CpuStats {
@@ -103,6 +109,16 @@ impl CpuStats {
IRQ_COUNT[irq as usize].fetch_add(1, Ordering::Relaxed);
self.irq.fetch_add(1, Ordering::Relaxed);
}
#[inline]
pub fn record_hlt_entry(&self) {
self.hlt_entries.fetch_add(1, Ordering::Relaxed);
}
#[inline]
pub fn record_mwait_entry(&self) {
self.mwait_entries.fetch_add(1, Ordering::Relaxed);
}
}
impl fmt::Display for CpuStatsData {
@@ -125,6 +141,8 @@ impl From<&CpuStats> for CpuStatsData {
irq: val.irq.load(Ordering::Relaxed),
context_switches: val.context_switches.load(Ordering::Relaxed),
steals: val.steals.load(Ordering::Relaxed),
hlt_entries: val.hlt_entries.load(Ordering::Relaxed),
mwait_entries: val.mwait_entries.load(Ordering::Relaxed),
}
}
}
+109
View File
@@ -0,0 +1,109 @@
use alloc::vec::Vec;
use crate::cpu_set::{LogicalCpuId, MAX_CPU_COUNT};
use core::sync::atomic::{AtomicU32, Ordering};
#[derive(Debug, Clone, Copy)]
pub struct CpuTopologyEntry {
pub apic_id: u32,
pub core_id: u32,
pub thread_id: u32,
pub threads_per_core: u32,
}
static APIC_IDS: [AtomicU32; MAX_CPU_COUNT as usize] =
[const { AtomicU32::new(u32::MAX) }; MAX_CPU_COUNT as usize];
static CORE_IDS: [AtomicU32; MAX_CPU_COUNT as usize] =
[const { AtomicU32::new(u32::MAX) }; MAX_CPU_COUNT as usize];
static THREADS_PER_CORE: AtomicU32 = AtomicU32::new(0);
pub fn record_cpu(cpu_id: LogicalCpuId, apic_id: u32) {
let idx = cpu_id.get() as usize;
if idx >= MAX_CPU_COUNT as usize {
return;
}
let (core_id, thread_id, threads_per_core) = detect_topology(apic_id);
let core_id_combined = (core_id << 16) | (thread_id & 0xFFFF);
APIC_IDS[idx].store(apic_id, Ordering::Release);
CORE_IDS[idx].store(core_id_combined, Ordering::Release);
let prev = THREADS_PER_CORE.load(Ordering::Acquire);
if prev == 0 || threads_per_core > prev {
THREADS_PER_CORE.store(threads_per_core, Ordering::Release);
}
}
pub fn get_entry(cpu_id: LogicalCpuId) -> Option<CpuTopologyEntry> {
let idx = cpu_id.get() as usize;
if idx >= MAX_CPU_COUNT as usize {
return None;
}
let apic_id = APIC_IDS[idx].load(Ordering::Acquire);
if apic_id == u32::MAX {
return None;
}
let core_combined = CORE_IDS[idx].load(Ordering::Acquire);
if core_combined == u32::MAX {
return None;
}
let core_id = core_combined >> 16;
let thread_id = core_combined & 0xFFFF;
let threads_per_core = THREADS_PER_CORE.load(Ordering::Acquire).max(1);
Some(CpuTopologyEntry {
apic_id,
core_id,
thread_id,
threads_per_core,
})
}
pub fn all_entries() -> Vec<(LogicalCpuId, CpuTopologyEntry)> {
let count = crate::cpu_count();
let mut result = Vec::new();
for i in 0..count {
let id = LogicalCpuId::new(i);
if let Some(entry) = get_entry(id) {
result.push((id, entry));
}
}
result
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn detect_topology(initial_apic_id: u32) -> (u32, u32, u32) {
use raw_cpuid::CpuId;
let cpuid = CpuId::new();
if let Some(topo) = cpuid.get_processor_topology_info() {
let threads_per_core = topo.threads_per_core() as u32;
let core_id = topo.core_id() as u32;
if threads_per_core > 0 {
let thread_id = initial_apic_id % threads_per_core;
return (core_id, thread_id, threads_per_core);
}
}
if let Some(feature_info) = cpuid.get_feature_info() {
let threads_per_core = feature_info.max_logical_processor_ids() as u32;
if threads_per_core > 1 {
let core_id = initial_apic_id / threads_per_core;
let thread_id = initial_apic_id % threads_per_core;
return (core_id, thread_id, threads_per_core);
}
}
(initial_apic_id, 0, 1)
}
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
fn detect_topology(initial_apic_id: u32) -> (u32, u32, u32) {
(initial_apic_id, 0, 1)
}
+1
View File
@@ -0,0 +1 @@
#![cfg_attr(not(test), no_std)]
+5
View File
@@ -51,6 +51,9 @@ mod cpu_set;
/// Stats for the CPUs
mod cpu_stats;
/// CPU topology detection (SMT/core mapping)
mod cpu_topology;
/// Context management
mod context;
@@ -70,6 +73,8 @@ mod log;
/// Memory management
mod memory;
mod numa;
/// Panic
mod panic;
+1 -1
View File
@@ -7,7 +7,7 @@ use core::sync::atomic::{AtomicBool, Ordering};
const MAX_NUMA_NODES: usize = 8;
#[derive(Clone, Debug)]
#[derive(Debug)]
pub struct NumaHint {
pub node_id: u8,
pub cpus: LogicalCpuSet,
+36
View File
@@ -0,0 +1,36 @@
use core::fmt::Write as _;
use crate::{
percpu::get_all_stats,
sync::CleanLockToken,
syscall::error::Result,
};
use alloc::{string::String, vec::Vec};
pub fn resource(_token: &mut CleanLockToken) -> Result<Vec<u8>> {
let stats = get_all_stats();
let mut out = String::new();
let mut total_hlt: u64 = 0;
let mut total_mwait: u64 = 0;
for (id, stat) in &stats {
let _ = writeln!(
&mut out,
"cpu{} hlt_entries {} mwait_entries {}",
id.get(),
stat.hlt_entries,
stat.mwait_entries,
);
total_hlt += stat.hlt_entries;
total_mwait += stat.mwait_entries;
}
let _ = writeln!(
&mut out,
"total hlt_entries {} mwait_entries {}",
total_hlt, total_mwait,
);
Ok(out.into_bytes())
}
+4
View File
@@ -28,6 +28,7 @@ use super::{CallerCtx, HandleMap, KernelScheme, OpenResult, StrOrBytes};
mod block;
mod context;
mod cpu;
mod cstates;
mod exe;
mod fdstat;
mod iostat;
@@ -37,6 +38,7 @@ mod msr;
mod sched;
mod stat;
mod syscall;
mod topology;
mod uname;
/// Extract the (cpu<<32 | msr) u64 handle stored in an MSR fd's
@@ -104,6 +106,8 @@ const FILES: &[(&str, Kind)] = &[
("block", Rd(block::resource)),
("context", Rd(context::resource)),
("cpu", Rd(cpu::resource)),
("cstates", Rd(cstates::resource)),
("topology", Rd(topology::resource)),
#[cfg(feature = "sys_fdstat")]
("fdstat", Rd(fdstat::resource)),
("exe", Rd(exe::resource)),
+31
View File
@@ -0,0 +1,31 @@
use core::fmt::Write as _;
use crate::{
cpu_topology,
sync::CleanLockToken,
syscall::error::Result,
};
use alloc::{string::String, vec::Vec};
pub fn resource(_token: &mut CleanLockToken) -> Result<Vec<u8>> {
let entries = cpu_topology::all_entries();
let mut out = String::new();
for (id, entry) in &entries {
let _ = writeln!(
&mut out,
"cpu{} apic_id {} core_id {} thread_id {} threads_per_core {}",
id.get(),
entry.apic_id,
entry.core_id,
entry.thread_id,
entry.threads_per_core,
);
}
if entries.is_empty() {
let _ = writeln!(&mut out, "(topology not yet detected)");
}
Ok(out.into_bytes())
}