34360e1e4f
P0-P2: Barrier SMP, sigmask/pthread_kill races, robust mutexes, RT scheduling, POSIX sched API P3: PerCpuSched struct, per-CPU wiring, work stealing, load balancing, initial placement P4: 64-shard futex table, REQUEUE, PI futexes (LOCK_PI/UNLOCK_PI/TRYLOCK_PI), robust futexes, vruntime tracking, min-vruntime SCHED_OTHER selection P5: setpriority/getpriority, pthread_setaffinity_np, pthread_setname_np, pthread_setschedparam (Redox) P6: Cache-affine scheduling (last_cpu + vruntime bonus), NUMA topology kernel hints + numad userspace daemon Stability fixes: make_consistent stores 0 (dead TID fix), cond.rs error propagation, SPIN_COUNT adaptive spinning, Sys::open &str fix, PI futex CAS race, proc.rs lock ordering, barrier destroy Patches: 33 kernel + 58 relibc patches, all tracked in recipes Docs: KERNEL-SCHEDULER-MULTITHREAD-IMPROVEMENT-PLAN.md updated, SCHEDULER-REVIEW-FINAL.md created Architecture: NUMA topology parsing stays userspace (numad daemon), kernel stores lightweight NumaTopology hints
151 lines
5.7 KiB
Diff
151 lines
5.7 KiB
Diff
diff --git a/src/context/switch.rs b/src/context/switch.rs
|
|
index 86684c8..aeb29c9 100644
|
|
--- a/src/context/switch.rs
|
|
+++ b/src/context/switch.rs
|
|
@@ -5,7 +5,7 @@
|
|
use crate::{
|
|
context::{
|
|
self, arch, idle_contexts, idle_contexts_try, run_contexts, ArcContextLockWriteGuard,
|
|
- Context, ContextLock, WeakContextRef,
|
|
+ Context, ContextLock, SchedPolicy, WeakContextRef,
|
|
},
|
|
cpu_set::LogicalCpuId,
|
|
cpu_stats::{self, CpuState},
|
|
@@ -33,35 +33,17 @@ const SCHED_PRIO_TO_WEIGHT: [usize; 40] = [
|
|
70, 56, 45, 36, 29, 23, 18, 15,
|
|
];
|
|
|
|
-/// Determines if a given context is eligible to be scheduled on a given CPU (in
|
|
-/// principle, the current CPU).
|
|
-///
|
|
-/// # Safety
|
|
-/// This function is unsafe because it modifies the `context`'s state directly without synchronization.
|
|
-///
|
|
-/// # Parameters
|
|
-/// - `context`: The context (process/thread) to be checked.
|
|
-/// - `cpu_id`: The logical ID of the CPU on which the context is being scheduled.
|
|
-///
|
|
-/// # Returns
|
|
-/// - `UpdateResult::CanSwitch`: If the context can be switched to.
|
|
-/// - `UpdateResult::Skip`: If the context should be skipped (e.g., it's running on another CPU).
|
|
unsafe fn update_runnable(
|
|
context: &mut Context,
|
|
cpu_id: LogicalCpuId,
|
|
switch_time: u128,
|
|
) -> UpdateResult {
|
|
- // Ignore contexts that are already running.
|
|
if context.running {
|
|
return UpdateResult::Skip;
|
|
}
|
|
-
|
|
- // Ignore contexts assigned to other CPUs.
|
|
if !context.sched_affinity.contains(cpu_id) {
|
|
return UpdateResult::Skip;
|
|
}
|
|
-
|
|
- // If context is soft-blocked and has a wake-up time, check if it should wake up.
|
|
if context.status.is_soft_blocked()
|
|
&& let Some(wake) = context.wake
|
|
&& switch_time >= wake
|
|
@@ -69,8 +51,6 @@ unsafe fn update_runnable(
|
|
context.wake = None;
|
|
context.unblock_no_ipi();
|
|
}
|
|
-
|
|
- // If the context is runnable, indicate it can be switched to.
|
|
if context.status.is_runnable() {
|
|
UpdateResult::CanSwitch
|
|
} else {
|
|
@@ -95,7 +75,7 @@ pub fn tick(token: &mut CleanLockToken) {
|
|
let new_ticks = ticks_cell.get() + 1;
|
|
ticks_cell.set(new_ticks);
|
|
|
|
- // Trigger a context switch after every 3 ticks (approx. 6.75 ms).
|
|
+ // Trigger a context switch after every 3 ticks.
|
|
if new_ticks >= 3 {
|
|
switch(token);
|
|
crate::context::signal::signal_handler(token);
|
|
@@ -167,10 +147,7 @@ pub fn switch(token: &mut CleanLockToken) -> SwitchResult {
|
|
let mut prev_context_guard = unsafe { prev_context_lock.write_arc() };
|
|
|
|
if !prev_context_guard.is_preemptable() {
|
|
- // Unset global lock
|
|
arch::CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst);
|
|
-
|
|
- // Pretend to have finished switching, so CPU is not idled
|
|
return SwitchResult::Switched;
|
|
}
|
|
|
|
@@ -377,6 +354,71 @@ fn select_next_context(
|
|
let total_contexts: usize = contexts_list.iter().map(|q| q.len()).sum();
|
|
let mut skipped_contexts = 0;
|
|
|
|
+ // PASS 0: SCHED_FIFO and SCHED_RR — scan for RT contexts to schedule.
|
|
+ // When a runnable RT context is found, it takes priority over all SCHED_OTHER.
|
|
+ for prio in 0..40 {
|
|
+ let rt_contexts = contexts_list
|
|
+ .get_mut(prio)
|
|
+ .expect("prio should be between [0, 39]");
|
|
+ let len = rt_contexts.len();
|
|
+ for _ in 0..len {
|
|
+ let (rt_ref, rt_lock) = match rt_contexts.pop_front() {
|
|
+ Some(lock) => match lock.upgrade() {
|
|
+ Some(l) => (lock, l),
|
|
+ None => {
|
|
+ skipped_contexts += 1;
|
|
+ continue;
|
|
+ }
|
|
+ },
|
|
+ None => break,
|
|
+ };
|
|
+ if Arc::ptr_eq(&rt_lock, &idle_context) {
|
|
+ rt_contexts.push_back(rt_ref);
|
|
+ continue;
|
|
+ }
|
|
+ // Current RT thread: if runnable with no higher-prio RT found yet,
|
|
+ // keep it running (no demotion to SCHED_OTHER)
|
|
+ if Arc::ptr_eq(&rt_lock, &prev_context_lock) {
|
|
+ let mut rt_guard = unsafe { rt_lock.write_arc() };
|
|
+ if rt_guard.status.is_runnable()
|
|
+ && (rt_guard.sched_policy == SchedPolicy::Fifo
|
|
+ || rt_guard.sched_policy == SchedPolicy::RoundRobin)
|
|
+ {
|
|
+ percpu.balance.set(balance);
|
|
+ percpu.last_queue.set(i);
|
|
+ return Ok(Some(rt_guard));
|
|
+ }
|
|
+ rt_contexts.push_back(rt_ref);
|
|
+ continue;
|
|
+ }
|
|
+ let mut rt_guard = unsafe { rt_lock.write_arc() };
|
|
+ if !rt_guard.status.is_runnable() || rt_guard.running
|
|
+ || !rt_guard.sched_affinity.contains(cpu_id)
|
|
+ {
|
|
+ rt_contexts.push_back(rt_ref);
|
|
+ continue;
|
|
+ }
|
|
+ if rt_guard.sched_policy == SchedPolicy::Fifo
|
|
+ || rt_guard.sched_policy == SchedPolicy::RoundRobin
|
|
+ {
|
|
+ percpu.balance.set(balance);
|
|
+ percpu.last_queue.set(i);
|
|
+ if !Arc::ptr_eq(&prev_context_lock, &idle_context) {
|
|
+ let prev_ctx = WeakContextRef(Arc::downgrade(&prev_context_lock));
|
|
+ if prev_context_guard.status.is_runnable() {
|
|
+ contexts_list[prev_context_guard.prio].push_back(prev_ctx);
|
|
+ } else {
|
|
+ idle_contexts(token.token()).push_back(prev_ctx);
|
|
+ }
|
|
+ }
|
|
+ return Ok(Some(rt_guard));
|
|
+ }
|
|
+ rt_contexts.push_back(rt_ref);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // PASS 1: SCHED_OTHER — existing DWRR deficit tracking
|
|
+
|
|
'priority: loop {
|
|
i = (i + 1) % 40;
|
|
total_iters += 1;
|