feat: P0-P6 kernel scheduler + relibc threading comprehensive implementation

P0-P2: Barrier SMP, sigmask/pthread_kill races, robust mutexes, RT scheduling, POSIX sched API
P3: PerCpuSched struct, per-CPU wiring, work stealing, load balancing, initial placement
P4: 64-shard futex table, REQUEUE, PI futexes (LOCK_PI/UNLOCK_PI/TRYLOCK_PI), robust futexes, vruntime tracking, min-vruntime SCHED_OTHER selection
P5: setpriority/getpriority, pthread_setaffinity_np, pthread_setname_np, pthread_setschedparam (Redox)
P6: Cache-affine scheduling (last_cpu + vruntime bonus), NUMA topology kernel hints + numad userspace daemon

Stability fixes: make_consistent stores 0 (dead TID fix), cond.rs error propagation, SPIN_COUNT adaptive spinning, Sys::open &str fix, PI futex CAS race, proc.rs lock ordering, barrier destroy

Patches: 33 kernel + 58 relibc patches, all tracked in recipes
Docs: KERNEL-SCHEDULER-MULTITHREAD-IMPROVEMENT-PLAN.md updated, SCHEDULER-REVIEW-FINAL.md created
Architecture: NUMA topology parsing stays userspace (numad daemon), kernel stores lightweight NumaTopology hints
This commit is contained in:
2026-04-30 18:21:48 +01:00
parent 55d00c3a24
commit 34360e1e4f
70 changed files with 15268 additions and 10 deletions
@@ -0,0 +1,180 @@
diff --git a/src/context/context.rs b/src/context/context.rs
index c97c516..a0814fa 100644
--- a/src/context/context.rs
+++ b/src/context/context.rs
@@ -18,7 +18,8 @@ use crate::{
cpu_stats,
ipi::{ipi, IpiKind, IpiTarget},
memory::{
- allocate_p2frame, deallocate_p2frame, Enomem, Frame, RaiiFrame, RmmA, RmmArch, PAGE_SIZE,
+ allocate_p2frame, deallocate_p2frame, Enomem, Frame, PhysicalAddress, RaiiFrame, RmmA,
+ RmmArch, PAGE_SIZE,
},
percpu::PercpuBlock,
scheme::{CallerCtx, FileHandle, SchemeId},
@@ -62,6 +63,38 @@ impl Status {
}
}
+pub const SCHED_PRIORITY_LEVELS: usize = 40;
+pub const DEFAULT_SCHED_OTHER_PRIORITY: usize = 20;
+pub const DEFAULT_SCHED_RR_QUANTUM: u128 = 100_000_000;
+
+#[repr(u8)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum SchedPolicy {
+ Fifo = 0,
+ RoundRobin = 1,
+ Other = 2,
+}
+
+impl SchedPolicy {
+ pub fn try_from_raw(raw: u8) -> Option<Self> {
+ match raw {
+ 0 => Some(Self::Fifo),
+ 1 => Some(Self::RoundRobin),
+ 2 => Some(Self::Other),
+ _ => None,
+ }
+ }
+}
+
+pub fn rt_priority_to_kernel_prio(rt_priority: u8) -> usize {
+ (SCHED_PRIORITY_LEVELS - 1)
+ .saturating_sub((usize::from(rt_priority.min(99)) * (SCHED_PRIORITY_LEVELS - 1)) / 99)
+}
+
+fn clamp_sched_other_prio(prio: usize) -> usize {
+ prio.min(SCHED_PRIORITY_LEVELS - 1)
+}
+
#[derive(Clone, Debug)]
pub enum HardBlockedReason {
/// "SIGSTOP", only procmgr is allowed to switch contexts this state
@@ -140,6 +173,20 @@ pub struct Context {
pub fmap_ret: Option<Frame>,
/// Priority
pub prio: usize,
+ pub sched_policy: SchedPolicy,
+ pub sched_rt_priority: u8,
+ pub sched_rr_ticks_consumed: u32,
+ pub sched_static_prio: usize,
+pub sched_rr_quantum: u128,
+ /// Virtual runtime for SCHED_OTHER fair scheduling.
+ /// CPU-bound threads accumulate vruntime faster; I/O-bound stay lower.
+ pub vruntime: u128,
+ #[allow(dead_code)]
+ pub futex_pi_boost: bool,
+ #[allow(dead_code)]
+ pub futex_pi_original_prio: usize,
+ #[allow(dead_code)]
+ pub futex_pi_waiters: Vec<PhysicalAddress>,
// TODO: id can reappear after wraparound?
pub owner_proc_id: Option<NonZeroUsize>,
@@ -148,6 +195,8 @@ pub struct Context {
pub euid: u32,
pub egid: u32,
pub pid: usize,
+ /// Supplementary group IDs for access control decisions.
+ pub groups: Vec<u32>,
// See [`PreemptGuard`]
//
@@ -197,13 +246,23 @@ impl Context {
files: Arc::new(RwLock::new(FdTbl::new())),
userspace: false,
fmap_ret: None,
- prio: 20,
+ prio: DEFAULT_SCHED_OTHER_PRIORITY,
+ sched_policy: SchedPolicy::Other,
+ sched_rt_priority: 0,
+ sched_rr_ticks_consumed: 0,
+ sched_static_prio: DEFAULT_SCHED_OTHER_PRIORITY,
+ sched_rr_quantum: DEFAULT_SCHED_RR_QUANTUM,
+ vruntime: 0u128,
+ futex_pi_boost: false,
+ futex_pi_original_prio: DEFAULT_SCHED_OTHER_PRIORITY,
+ futex_pi_waiters: Vec::new(),
being_sigkilled: false,
owner_proc_id,
euid: 0,
egid: 0,
pid: 0,
+ groups: Vec::new(),
#[cfg(feature = "syscall_debug")]
syscall_debug_info: crate::syscall::debug::SyscallDebugInfo::default(),
@@ -218,11 +277,47 @@ impl Context {
self.preempt_locks == 0
}
+ fn base_sched_prio(&self) -> usize {
+ match self.sched_policy {
+ SchedPolicy::Other => clamp_sched_other_prio(self.sched_static_prio),
+ SchedPolicy::Fifo | SchedPolicy::RoundRobin => {
+ rt_priority_to_kernel_prio(self.sched_rt_priority)
+ }
+ }
+ }
+
+ fn apply_sched_prio(&mut self) {
+ let base_prio = self.base_sched_prio();
+ if self.futex_pi_boost {
+ self.futex_pi_original_prio = base_prio;
+ self.prio = self.prio.min(base_prio);
+ } else {
+ self.futex_pi_original_prio = base_prio;
+ self.prio = base_prio;
+ }
+ }
+
+ pub fn set_sched_other_prio(&mut self, prio: usize) {
+ self.sched_static_prio = clamp_sched_other_prio(prio);
+ self.apply_sched_prio();
+ }
+
+ pub fn set_sched_policy(&mut self, sched_policy: SchedPolicy, rt_priority: u8) {
+ self.sched_policy = sched_policy;
+ self.sched_rt_priority = match sched_policy {
+ SchedPolicy::Other => 0,
+ SchedPolicy::Fifo | SchedPolicy::RoundRobin => rt_priority.min(99),
+ };
+ self.sched_rr_ticks_consumed = 0;
+ self.apply_sched_prio();
+ }
+
/// Block the context, and return true if it was runnable before being blocked
pub fn block(&mut self, reason: &'static str) -> bool {
if self.status.is_runnable() {
self.status = Status::Blocked;
self.status_reason = reason;
+ self.sched_rr_ticks_consumed = 0;
true
} else {
false
@@ -232,6 +327,7 @@ impl Context {
pub fn hard_block(&mut self, reason: HardBlockedReason) -> bool {
if self.status.is_runnable() {
self.status = Status::HardBlocked { reason };
+ self.sched_rr_ticks_consumed = 0;
true
} else {
@@ -261,6 +357,7 @@ impl Context {
if self.status.is_soft_blocked() {
self.status = Status::Runnable;
self.status_reason = "";
+ self.sched_rr_ticks_consumed = 0;
true
} else {
@@ -479,6 +576,7 @@ impl Context {
uid: self.euid,
gid: self.egid,
pid: self.pid,
+ groups: self.groups.clone(),
}
}
}