Tier 2: per-CPU sched stats, NUMA-aware scheduling, init numa

- CpuStats: add context_switches and steals AtomicU64 counters,
  remove redundant per_cpu field from CpuStatsData
- context/switch.rs: increment per-CPU switches at context switch,
  increment steals at work-steal; add NUMA vruntime bonus (1/8 for
  exact-CPU match, 1/16 for same-node)
- context/mod.rs: least_loaded_cpu() now NUMA-aware, prefers same-node
  CPUs (accepts <=1 extra queued context vs cross-node best)
- scheme/sys/sched.rs: new kernel handler exposing per-CPU scheduler
  stats (switches, steals, queue_depth) via /scheme/sys/sched
- startup/mod.rs: call numa::init_default() during boot (was dead code)
This commit is contained in:
2026-07-02 21:40:20 +03:00
parent e812356cf0
commit c6a5b7a1ad
6 changed files with 83 additions and 13 deletions
+13 -1
View File
@@ -127,8 +127,11 @@ pub fn run_contexts(token: LockToken<'_, L0>) -> MutexGuard<'_, L1, RunContextDa
fn least_loaded_cpu() -> LogicalCpuId {
let current_cpu = crate::cpu_id();
let topo = crate::numa::topology();
let mut best_cpu = current_cpu;
let mut best_depth = usize::MAX;
let mut best_local_cpu = current_cpu;
let mut best_local_depth = usize::MAX;
for raw_id in 0..crate::cpu_count() {
let cpu_id = LogicalCpuId::new(raw_id);
@@ -144,9 +147,18 @@ fn least_loaded_cpu() -> LogicalCpuId {
best_depth = depth;
best_cpu = cpu_id;
}
if topo.same_node(current_cpu, cpu_id) && depth < best_local_depth {
best_local_depth = depth;
best_local_cpu = cpu_id;
}
}
best_cpu
if best_local_depth < usize::MAX && best_local_depth <= best_depth + 1 {
best_local_cpu
} else {
best_cpu
}
}
pub fn init(token: &mut CleanLockToken) {
+12
View File
@@ -162,6 +162,7 @@ pub fn switch(token: &mut CleanLockToken) -> SwitchResult {
let percpu = PercpuBlock::current();
cpu_stats::add_context_switch();
percpu.stats.context_switches.fetch_add(1, Ordering::Relaxed);
//set PIT Interrupt counter to 0, giving each process same amount of PIT ticks
percpu.switch_internals.pit_ticks.set(0);
@@ -424,6 +425,9 @@ fn steal_work(
if let UpdateResult::CanSwitch = sw {
assign_context_to_cpu(&mut context_guard, cpu_id);
SCHED_STEAL_COUNT.fetch_add(1, Ordering::Relaxed);
if let Some(thief) = get_percpu_block(cpu_id) {
thief.stats.steals.fetch_add(1, Ordering::Relaxed);
}
return Some(context_guard);
}
@@ -695,6 +699,10 @@ fn pick_next_from_queues(
let mut vruntime = guard.vruntime;
if guard.last_cpu == Some(cpu_id) {
vruntime = vruntime.saturating_sub(vruntime / 8);
} else if let Some(last_cpu) = guard.last_cpu {
if crate::numa::topology().same_node(cpu_id, last_cpu) {
vruntime = vruntime.saturating_sub(vruntime / 16);
}
}
drop(guard);
if vruntime < min_vruntime {
@@ -870,6 +878,10 @@ fn pick_next_from_global_queues(
let mut vruntime = guard.vruntime;
if guard.last_cpu == Some(cpu_id) {
vruntime = vruntime.saturating_sub(vruntime / 8);
} else if let Some(last_cpu) = guard.last_cpu {
if crate::numa::topology().same_node(cpu_id, last_cpu) {
vruntime = vruntime.saturating_sub(vruntime / 16);
}
}
drop(guard);
if vruntime < min_vruntime {
+8 -11
View File
@@ -28,18 +28,14 @@ pub enum CpuState {
/// Statistics for the CPUs.
#[derive(Debug, Default)]
pub struct CpuStats {
/// Number of ticks spent on userspace contexts
user: AtomicU64,
/// Number of ticks spent on Niced userspace contexts
nice: AtomicU64,
/// Number of ticks spent on kernel contexts
kernel: AtomicU64,
/// Number of ticks spent idle
idle: AtomicU64,
/// Number of times the CPU handled an interrupt
irq: AtomicU64,
/// Current state of the CPU
state: AtomicU8,
pub context_switches: AtomicU64,
pub steals: AtomicU64,
}
impl CpuStats {
@@ -51,21 +47,20 @@ impl CpuStats {
idle: AtomicU64::new(0),
irq: AtomicU64::new(0),
state: AtomicU8::new(0),
context_switches: AtomicU64::new(0),
steals: AtomicU64::new(0),
}
}
}
pub struct CpuStatsData {
/// Number of ticks spent on userspace contexts
pub user: u64,
/// Number of ticks spent on Niced userspace contexts
pub nice: u64,
/// Number of ticks spent on kernel contexts
pub kernel: u64,
/// Number of ticks spent idle
pub idle: u64,
/// Number of times the CPU handled an interrupt
pub irq: u64,
pub context_switches: u64,
pub steals: u64,
}
impl CpuStats {
@@ -128,6 +123,8 @@ impl From<&CpuStats> for CpuStatsData {
kernel: val.kernel.load(Ordering::Relaxed),
idle: val.idle.load(Ordering::Relaxed),
irq: val.irq.load(Ordering::Relaxed),
context_switches: val.context_switches.load(Ordering::Relaxed),
steals: val.steals.load(Ordering::Relaxed),
}
}
}
+2
View File
@@ -34,6 +34,7 @@ mod iostat;
mod irq;
mod log;
mod msr;
mod sched;
mod stat;
mod syscall;
mod uname;
@@ -115,6 +116,7 @@ const FILES: &[(&str, Kind)] = &[
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
("spurious_irq", Rd(interrupt::irq::spurious_irq_resource)),
("stat", Rd(stat::resource)),
("sched", Rd(sched::resource)),
// Disabled because the debugger is inherently unsafe and probably will break the system.
/*
("trigger_debugger", Rd(|token| unsafe {
+46
View File
@@ -0,0 +1,46 @@
use core::fmt::Write as _;
use crate::{
percpu::{get_all_stats, get_percpu_block},
sync::CleanLockToken,
syscall::error::Result,
};
use alloc::{string::String, vec::Vec};
pub fn resource(_token: &mut CleanLockToken) -> Result<Vec<u8>> {
let stats = get_all_stats();
let mut out = String::new();
let mut total_switches: u64 = 0;
let mut total_steals: u64 = 0;
for (id, stat) in &stats {
let queue_depth = get_percpu_block(*id)
.map(|p| {
p.sched.take_lock();
let d = unsafe { p.sched.queues().iter().map(|q| q.len()).sum::<usize>() };
p.sched.release_lock();
d
})
.unwrap_or(0);
let _ = writeln!(
&mut out,
"cpu{} switches {} steals {} queue_depth {}",
id.get(),
stat.context_switches,
stat.steals,
queue_depth,
);
total_switches += stat.context_switches;
total_steals += stat.steals;
}
let _ = writeln!(
&mut out,
"total switches {} steals {}",
total_switches, total_steals,
);
Ok(out.into_bytes())
}
+2 -1
View File
@@ -159,7 +159,8 @@ pub(crate) fn kmain(bootstrap: Bootstrap) -> ! {
//Initialize the first context, stored in kernel/src/context/mod.rs
context::init(&mut token);
//Initialize global schemes, such as `acpi:`.
crate::numa::init_default();
scheme::init_globals();
debug!("BSP: {} CPUs", crate::cpu_count());