Tier 2: per-CPU sched stats, NUMA-aware scheduling, init numa
- CpuStats: add context_switches and steals AtomicU64 counters, remove redundant per_cpu field from CpuStatsData - context/switch.rs: increment per-CPU switches at context switch, increment steals at work-steal; add NUMA vruntime bonus (1/8 for exact-CPU match, 1/16 for same-node) - context/mod.rs: least_loaded_cpu() now NUMA-aware, prefers same-node CPUs (accepts <=1 extra queued context vs cross-node best) - scheme/sys/sched.rs: new kernel handler exposing per-CPU scheduler stats (switches, steals, queue_depth) via /scheme/sys/sched - startup/mod.rs: call numa::init_default() during boot (was dead code)
This commit is contained in:
+13
-1
@@ -127,8 +127,11 @@ pub fn run_contexts(token: LockToken<'_, L0>) -> MutexGuard<'_, L1, RunContextDa
|
||||
|
||||
fn least_loaded_cpu() -> LogicalCpuId {
|
||||
let current_cpu = crate::cpu_id();
|
||||
let topo = crate::numa::topology();
|
||||
let mut best_cpu = current_cpu;
|
||||
let mut best_depth = usize::MAX;
|
||||
let mut best_local_cpu = current_cpu;
|
||||
let mut best_local_depth = usize::MAX;
|
||||
|
||||
for raw_id in 0..crate::cpu_count() {
|
||||
let cpu_id = LogicalCpuId::new(raw_id);
|
||||
@@ -144,9 +147,18 @@ fn least_loaded_cpu() -> LogicalCpuId {
|
||||
best_depth = depth;
|
||||
best_cpu = cpu_id;
|
||||
}
|
||||
|
||||
if topo.same_node(current_cpu, cpu_id) && depth < best_local_depth {
|
||||
best_local_depth = depth;
|
||||
best_local_cpu = cpu_id;
|
||||
}
|
||||
}
|
||||
|
||||
best_cpu
|
||||
if best_local_depth < usize::MAX && best_local_depth <= best_depth + 1 {
|
||||
best_local_cpu
|
||||
} else {
|
||||
best_cpu
|
||||
}
|
||||
}
|
||||
|
||||
pub fn init(token: &mut CleanLockToken) {
|
||||
|
||||
@@ -162,6 +162,7 @@ pub fn switch(token: &mut CleanLockToken) -> SwitchResult {
|
||||
|
||||
let percpu = PercpuBlock::current();
|
||||
cpu_stats::add_context_switch();
|
||||
percpu.stats.context_switches.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
//set PIT Interrupt counter to 0, giving each process same amount of PIT ticks
|
||||
percpu.switch_internals.pit_ticks.set(0);
|
||||
@@ -424,6 +425,9 @@ fn steal_work(
|
||||
if let UpdateResult::CanSwitch = sw {
|
||||
assign_context_to_cpu(&mut context_guard, cpu_id);
|
||||
SCHED_STEAL_COUNT.fetch_add(1, Ordering::Relaxed);
|
||||
if let Some(thief) = get_percpu_block(cpu_id) {
|
||||
thief.stats.steals.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
return Some(context_guard);
|
||||
}
|
||||
|
||||
@@ -695,6 +699,10 @@ fn pick_next_from_queues(
|
||||
let mut vruntime = guard.vruntime;
|
||||
if guard.last_cpu == Some(cpu_id) {
|
||||
vruntime = vruntime.saturating_sub(vruntime / 8);
|
||||
} else if let Some(last_cpu) = guard.last_cpu {
|
||||
if crate::numa::topology().same_node(cpu_id, last_cpu) {
|
||||
vruntime = vruntime.saturating_sub(vruntime / 16);
|
||||
}
|
||||
}
|
||||
drop(guard);
|
||||
if vruntime < min_vruntime {
|
||||
@@ -870,6 +878,10 @@ fn pick_next_from_global_queues(
|
||||
let mut vruntime = guard.vruntime;
|
||||
if guard.last_cpu == Some(cpu_id) {
|
||||
vruntime = vruntime.saturating_sub(vruntime / 8);
|
||||
} else if let Some(last_cpu) = guard.last_cpu {
|
||||
if crate::numa::topology().same_node(cpu_id, last_cpu) {
|
||||
vruntime = vruntime.saturating_sub(vruntime / 16);
|
||||
}
|
||||
}
|
||||
drop(guard);
|
||||
if vruntime < min_vruntime {
|
||||
|
||||
+8
-11
@@ -28,18 +28,14 @@ pub enum CpuState {
|
||||
/// Statistics for the CPUs.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct CpuStats {
|
||||
/// Number of ticks spent on userspace contexts
|
||||
user: AtomicU64,
|
||||
/// Number of ticks spent on Niced userspace contexts
|
||||
nice: AtomicU64,
|
||||
/// Number of ticks spent on kernel contexts
|
||||
kernel: AtomicU64,
|
||||
/// Number of ticks spent idle
|
||||
idle: AtomicU64,
|
||||
/// Number of times the CPU handled an interrupt
|
||||
irq: AtomicU64,
|
||||
/// Current state of the CPU
|
||||
state: AtomicU8,
|
||||
pub context_switches: AtomicU64,
|
||||
pub steals: AtomicU64,
|
||||
}
|
||||
|
||||
impl CpuStats {
|
||||
@@ -51,21 +47,20 @@ impl CpuStats {
|
||||
idle: AtomicU64::new(0),
|
||||
irq: AtomicU64::new(0),
|
||||
state: AtomicU8::new(0),
|
||||
context_switches: AtomicU64::new(0),
|
||||
steals: AtomicU64::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CpuStatsData {
|
||||
/// Number of ticks spent on userspace contexts
|
||||
pub user: u64,
|
||||
/// Number of ticks spent on Niced userspace contexts
|
||||
pub nice: u64,
|
||||
/// Number of ticks spent on kernel contexts
|
||||
pub kernel: u64,
|
||||
/// Number of ticks spent idle
|
||||
pub idle: u64,
|
||||
/// Number of times the CPU handled an interrupt
|
||||
pub irq: u64,
|
||||
pub context_switches: u64,
|
||||
pub steals: u64,
|
||||
}
|
||||
|
||||
impl CpuStats {
|
||||
@@ -128,6 +123,8 @@ impl From<&CpuStats> for CpuStatsData {
|
||||
kernel: val.kernel.load(Ordering::Relaxed),
|
||||
idle: val.idle.load(Ordering::Relaxed),
|
||||
irq: val.irq.load(Ordering::Relaxed),
|
||||
context_switches: val.context_switches.load(Ordering::Relaxed),
|
||||
steals: val.steals.load(Ordering::Relaxed),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ mod iostat;
|
||||
mod irq;
|
||||
mod log;
|
||||
mod msr;
|
||||
mod sched;
|
||||
mod stat;
|
||||
mod syscall;
|
||||
mod uname;
|
||||
@@ -115,6 +116,7 @@ const FILES: &[(&str, Kind)] = &[
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
("spurious_irq", Rd(interrupt::irq::spurious_irq_resource)),
|
||||
("stat", Rd(stat::resource)),
|
||||
("sched", Rd(sched::resource)),
|
||||
// Disabled because the debugger is inherently unsafe and probably will break the system.
|
||||
/*
|
||||
("trigger_debugger", Rd(|token| unsafe {
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
use core::fmt::Write as _;
|
||||
|
||||
use crate::{
|
||||
percpu::{get_all_stats, get_percpu_block},
|
||||
sync::CleanLockToken,
|
||||
syscall::error::Result,
|
||||
};
|
||||
use alloc::{string::String, vec::Vec};
|
||||
|
||||
pub fn resource(_token: &mut CleanLockToken) -> Result<Vec<u8>> {
|
||||
let stats = get_all_stats();
|
||||
let mut out = String::new();
|
||||
let mut total_switches: u64 = 0;
|
||||
let mut total_steals: u64 = 0;
|
||||
|
||||
for (id, stat) in &stats {
|
||||
let queue_depth = get_percpu_block(*id)
|
||||
.map(|p| {
|
||||
p.sched.take_lock();
|
||||
let d = unsafe { p.sched.queues().iter().map(|q| q.len()).sum::<usize>() };
|
||||
p.sched.release_lock();
|
||||
d
|
||||
})
|
||||
.unwrap_or(0);
|
||||
|
||||
let _ = writeln!(
|
||||
&mut out,
|
||||
"cpu{} switches {} steals {} queue_depth {}",
|
||||
id.get(),
|
||||
stat.context_switches,
|
||||
stat.steals,
|
||||
queue_depth,
|
||||
);
|
||||
|
||||
total_switches += stat.context_switches;
|
||||
total_steals += stat.steals;
|
||||
}
|
||||
|
||||
let _ = writeln!(
|
||||
&mut out,
|
||||
"total switches {} steals {}",
|
||||
total_switches, total_steals,
|
||||
);
|
||||
|
||||
Ok(out.into_bytes())
|
||||
}
|
||||
+2
-1
@@ -159,7 +159,8 @@ pub(crate) fn kmain(bootstrap: Bootstrap) -> ! {
|
||||
//Initialize the first context, stored in kernel/src/context/mod.rs
|
||||
context::init(&mut token);
|
||||
|
||||
//Initialize global schemes, such as `acpi:`.
|
||||
crate::numa::init_default();
|
||||
|
||||
scheme::init_globals();
|
||||
|
||||
debug!("BSP: {} CPUs", crate::cpu_count());
|
||||
|
||||
Reference in New Issue
Block a user