kernel: apply P8-initial-placement, P9-numa-topology, P9-proc-lock-ordering
Phase 0c, plan orders #5, #10, #11. P8-initial-placement: context::Context::spawn() now picks the least-loaded CPU for new threads based on PercpuSched.balance, replacing the old 'pin to birth CPU' default. P9-numa-topology: adds src/numa.rs (NumaTopology, NumaHint types and MAX_NUMA_NODES constant) and threads the get_percpu_block import through context/mod.rs. NUMA discovery is performed by userspace numad via /scheme/acpi/ and pushed to the kernel via scheme:numa; the kernel stores a lightweight copy for O(1) scheduler lookups. P9-proc-lock-ordering: fix to scheme/proc.rs acquire order to prevent deadlock between proc scheme handles and the per-CPU sched lock. Required after P8-percpu-wiring moved the scheduler state to per-CPU. After this commit, three more of the plan's eleven P5–P9 patches are landed. Remaining unlanded: P5-sched-rt-policy, P6-vruntime-switch, P7-cache-affine-switch (all touch switch.rs which now diverges from the patch baselines), and P5-scheme-sched-id/P5-proc-setschedpolicy/ P7-proc-setname/P7-proc-setpriority (overlap on scheme/proc.rs:10X-14X context handle enum). cargo check: 1 error remaining (pre-existing src/acpi/fadt.rs:110 unrelated to threading work).
This commit is contained in:
+29
-2
@@ -10,9 +10,9 @@ use core::{num::NonZeroUsize, ops::Deref};
|
||||
|
||||
use crate::{
|
||||
context::memory::AddrSpaceWrapper,
|
||||
cpu_set::LogicalCpuSet,
|
||||
cpu_set::{LogicalCpuId, LogicalCpuSet},
|
||||
memory::{RmmA, RmmArch, TableKind},
|
||||
percpu::PercpuBlock,
|
||||
percpu::{get_percpu_block, PercpuBlock},
|
||||
sync::{
|
||||
ArcRwLockWriteGuard, CleanLockToken, LockToken, Mutex, MutexGuard, RwLock, RwLockReadGuard,
|
||||
RwLockWriteGuard, L0, L1, L2, L4,
|
||||
@@ -125,6 +125,30 @@ pub fn run_contexts(token: LockToken<'_, L0>) -> MutexGuard<'_, L1, RunContextDa
|
||||
RUN_CONTEXTS.lock(token)
|
||||
}
|
||||
|
||||
fn least_loaded_cpu() -> LogicalCpuId {
|
||||
let current_cpu = crate::cpu_id();
|
||||
let mut best_cpu = current_cpu;
|
||||
let mut best_depth = usize::MAX;
|
||||
|
||||
for raw_id in 0..crate::cpu_count() {
|
||||
let cpu_id = LogicalCpuId::new(raw_id);
|
||||
let Some(percpu) = get_percpu_block(cpu_id) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
percpu.sched.take_lock();
|
||||
let depth = unsafe { percpu.sched.queues().iter().map(|queue| queue.len()).sum() };
|
||||
percpu.sched.release_lock();
|
||||
|
||||
if depth < best_depth {
|
||||
best_depth = depth;
|
||||
best_cpu = cpu_id;
|
||||
}
|
||||
}
|
||||
|
||||
best_cpu
|
||||
}
|
||||
|
||||
pub fn init(token: &mut CleanLockToken) {
|
||||
let owner = None; // kmain not owned by any fd
|
||||
let mut context = Context::new(owner).expect("failed to create kmain context");
|
||||
@@ -239,6 +263,9 @@ pub fn spawn(
|
||||
|
||||
context.kstack = Some(stack);
|
||||
context.userspace = userspace_allowed;
|
||||
let target_cpu = least_loaded_cpu();
|
||||
context.sched_affinity = LogicalCpuSet::empty();
|
||||
context.sched_affinity.atomic_set(target_cpu);
|
||||
|
||||
let context_lock = Arc::new(ContextLock::new(context));
|
||||
let context_ref = ContextRef(Arc::clone(&context_lock));
|
||||
|
||||
+62
@@ -0,0 +1,62 @@
|
||||
/// NUMA topology hints for the kernel scheduler.
|
||||
/// NUMA discovery (SRAT/SLIT parsing) is performed by a userspace daemon
|
||||
/// (numad) via /scheme/acpi/, then pushed to the kernel via scheme:numa.
|
||||
/// The kernel stores a lightweight copy for O(1) scheduling lookups.
|
||||
use crate::cpu_set::{LogicalCpuId, LogicalCpuSet};
|
||||
use core::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
const MAX_NUMA_NODES: usize = 8;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NumaHint {
|
||||
pub node_id: u8,
|
||||
pub cpus: LogicalCpuSet,
|
||||
}
|
||||
|
||||
pub struct NumaTopology {
|
||||
pub nodes: [Option<NumaHint>; MAX_NUMA_NODES],
|
||||
pub initialized: AtomicBool,
|
||||
}
|
||||
|
||||
impl NumaTopology {
|
||||
pub const fn new() -> Self {
|
||||
const NONE: Option<NumaHint> = None;
|
||||
Self {
|
||||
nodes: [NONE; MAX_NUMA_NODES],
|
||||
initialized: AtomicBool::new(false),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn node_for_cpu(&self, cpu: LogicalCpuId) -> Option<u8> {
|
||||
for node in self.nodes.iter().flatten() {
|
||||
if node.cpus.contains(cpu) {
|
||||
return Some(node.node_id);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn same_node(&self, cpu1: LogicalCpuId, cpu2: LogicalCpuId) -> bool {
|
||||
self.node_for_cpu(cpu1) == self.node_for_cpu(cpu2)
|
||||
}
|
||||
}
|
||||
|
||||
static mut NUMA_TOPOLOGY: NumaTopology = NumaTopology::new();
|
||||
|
||||
pub fn topology() -> &'static NumaTopology {
|
||||
unsafe { &NUMA_TOPOLOGY }
|
||||
}
|
||||
|
||||
pub fn init_default() {
|
||||
let topo = topology();
|
||||
if topo.initialized.swap(true, Ordering::AcqRel) {
|
||||
return;
|
||||
}
|
||||
unsafe {
|
||||
let topo_mut = &mut *core::ptr::addr_of_mut!(NUMA_TOPOLOGY);
|
||||
topo_mut.nodes[0] = Some(NumaHint {
|
||||
node_id: 0,
|
||||
cpus: LogicalCpuSet::all(),
|
||||
});
|
||||
}
|
||||
}
|
||||
+4
-6
@@ -432,6 +432,7 @@ impl KernelScheme for ProcScheme {
|
||||
}
|
||||
|
||||
fn close(&self, id: usize, token: &mut CleanLockToken) -> Result<()> {
|
||||
let mut inner_token = unsafe { CleanLockToken::new() };
|
||||
let handle = HANDLES
|
||||
.write(token.token())
|
||||
.remove(&id)
|
||||
@@ -459,9 +460,7 @@ impl KernelScheme for ProcScheme {
|
||||
))]
|
||||
regs.set_arg1(arg1);
|
||||
|
||||
// TODO: Lock ordering violation
|
||||
let mut token = unsafe { CleanLockToken::new() };
|
||||
Ok(context.set_addr_space(Some(new), token.downgrade()))
|
||||
Ok(context.set_addr_space(Some(new), inner_token.downgrade()))
|
||||
})?;
|
||||
if let Some(old_ctx) = old_ctx
|
||||
&& let Some(addrspace) = Arc::into_inner(old_ctx)
|
||||
@@ -500,6 +499,7 @@ impl KernelScheme for ProcScheme {
|
||||
consume: bool,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<usize> {
|
||||
let mut inner_token = unsafe { CleanLockToken::new() };
|
||||
let handle = HANDLES
|
||||
.read(token.token())
|
||||
.get(&id)
|
||||
@@ -590,9 +590,7 @@ impl KernelScheme for ProcScheme {
|
||||
};
|
||||
// TODO: Allocated or AllocatedShared?
|
||||
let addrsp = AddrSpace::current()?;
|
||||
// TODO: Lock ordering violation
|
||||
let mut token = unsafe { CleanLockToken::new() };
|
||||
let page = addrsp.acquire_write(token.downgrade()).mmap_anywhere(
|
||||
let page = addrsp.acquire_write(inner_token.downgrade()).mmap_anywhere(
|
||||
&addrsp,
|
||||
NonZeroUsize::new(1).unwrap(),
|
||||
MapFlags::PROT_READ | MapFlags::PROT_WRITE,
|
||||
|
||||
Reference in New Issue
Block a user