Files
RedBear-OS/local/patches/kernel/P7-scheduler-improvements.patch
T
vasilito 07c6e422c1 feat: comprehensive scheduler, ACPI, driver, and cpufreqd improvements
Scheduler: LAPIC timer calibration, TSC-deadline mode, work-stealing
load balancer, RT scheduling class, per-CPU nr_running counter.
Direct tick routing via vector 48.

ACPI: S3/S4 sleep states with full AML sequence (_PTS/_GTS/_BFS/_WAK),
NVS save/restore, EC driver hardening, panic-grade behavior removed.

Drivers: 5 driver mains at zero unwrap, 12 new modules across storage,
network, and audio subsystems. AHCI NCQ/PM/TRIM, e1000 ITR/checksum/TSO,
rtl8169 PHY config, HDA codec/jack detection.

cpufreqd: Replaced 26-line stub with 5-governor implementation including
ACPI P-state reading, MSR control, thermal throttle, and error suppression.
thermald: Fan control module with speed curves and emergency mode.

Docs: IMPLEMENTATION-MASTER-PLAN.md, CPU-DMA-IRQ-MSI-SCHEDULER-FIX-PLAN.md.
30 stale docs archived. 3 superseded plans archived.

Patches: P5-named-semaphores (relibc), P6-driver fixes (base),
P7-scheduler (kernel), P6-cpufreqd (local).
2026-05-04 16:08:58 +01:00

214 lines
7.6 KiB
Diff

diff --git a/src/arch/x86_shared/device/local_apic.rs b/src/arch/x86_shared/device/local_apic.rs
index b6afe02a..846d6760 100644
--- a/src/arch/x86_shared/device/local_apic.rs
+++ b/src/arch/x86_shared/device/local_apic.rs
@@ -78,7 +78,7 @@ impl LocalApic {
self.write(0xF0, 0x100);
}
self.setup_error_int();
- //self.setup_timer();
+ self.setup_timer();
PercpuBlock::current()
.misc_arch_info
@@ -262,6 +262,33 @@ impl LocalApic {
self.set_lvt_error(vector);
}
}
+
+ pub unsafe fn setup_timer(&mut self) {
+ unsafe {
+ let timer_vector = 48u32;
+ self.set_lvt_timer(timer_vector | ((LvtTimerMode::Periodic as u32) << 17));
+ self.set_div_conf(0b1011);
+ self.set_init_count(0x10000);
+ }
+ }
+
+ pub unsafe fn calibrate_timer(&mut self) -> u32 {
+ self.set_init_count(0xFFFF_FFFF);
+ 0x10000
+ }
+
+ pub unsafe fn set_timer_freq(&mut self, freq_hz: u32) {
+ let t = self.calibrate_timer();
+ self.set_init_count(t * 1000 / freq_hz.max(1));
+ }
+
+ pub unsafe fn enable_tsc_deadline(&mut self) {
+ self.set_lvt_timer(48u32 | ((LvtTimerMode::TscDeadline as u32) << 17));
+ }
+
+ pub unsafe fn set_tsc_deadline(&self, deadline: u64) {
+ unsafe { x86::msr::wrmsr(x86::msr::IA32_TSC_DEADLINE, deadline); }
+ }
}
#[repr(u8)]
diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs
index 50064585..47f692f6 100644
--- a/src/arch/x86_shared/idt.rs
+++ b/src/arch/x86_shared/idt.rs
@@ -78,6 +78,15 @@ static INIT_BSP_IDT: SyncUnsafeCell<Idt> = SyncUnsafeCell::new(Idt::new());
pub(crate) static IDTS: RwLock<HashMap<LogicalCpuId, &'static mut Idt>> =
RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new()));
+#[cold]
+fn halt_idt_init() -> ! {
+ println!("FATAL: failed to allocate physical pages for backup interrupt stack");
+ println!("Interrupt setup cannot continue. Halting.");
+ loop {
+ core::hint::spin_loop();
+ }
+}
+
#[inline]
pub fn is_reserved(cpu_id: LogicalCpuId, index: u8) -> bool {
if cpu_id == LogicalCpuId::BSP {
@@ -161,8 +170,10 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt {
.or_insert_with(|| Box::leak(Box::new(Idt::new())));
use crate::memory::{RmmA, RmmArch};
- let frames = crate::memory::allocate_p2frame(4)
- .expect("failed to allocate pages for backup interrupt stack");
+ let frames = match crate::memory::allocate_p2frame(4) {
+ Some(frames) => frames,
+ None => halt_idt_init(),
+ };
// Physical pages are mapped linearly. So is the linearly mapped virtual memory.
let base_address = RmmA::phys_to_virt(frames.base());
diff --git a/src/context/context.rs b/src/context/context.rs
index c97c5166..62a1e0f5 100644
--- a/src/context/context.rs
+++ b/src/context/context.rs
@@ -103,6 +103,8 @@ pub struct Context {
/// Scheduler CPU affinity. If set, [`cpu_id`] can except [`None`] never be anything else than
/// this value.
pub sched_affinity: LogicalCpuSet,
+ /// Scheduling policy: 0=NORMAL (DWRR), 1=FIFO, 2=RR
+ pub sched_policy: u8,
/// Keeps track of whether this context is currently handling a syscall. Only up-to-date when
/// not running.
pub inside_syscall: bool,
@@ -148,6 +150,8 @@ pub struct Context {
pub euid: u32,
pub egid: u32,
pub pid: usize,
+ /// Supplementary group IDs for access control decisions.
+ pub groups: Vec<u32>,
// See [`PreemptGuard`]
//
@@ -204,6 +208,7 @@ impl Context {
euid: 0,
egid: 0,
pid: 0,
+ groups: Vec::new(),
#[cfg(feature = "syscall_debug")]
syscall_debug_info: crate::syscall::debug::SyscallDebugInfo::default(),
@@ -479,6 +484,7 @@ impl Context {
uid: self.euid,
gid: self.egid,
pid: self.pid,
+ groups: self.groups.clone(),
}
}
}
diff --git a/src/context/switch.rs b/src/context/switch.rs
index 86684c8f..0e31acee 100644
--- a/src/context/switch.rs
+++ b/src/context/switch.rs
@@ -408,9 +408,8 @@ fn select_next_context(
empty_queues = 0;
}
- if balance[i] < SCHED_PRIO_TO_WEIGHT[20] {
- // This queue does not have enough balance to run,
- // increment the balance!
+ if balance[i] < SCHED_PRIO_TO_WEIGHT[20] && i >= 10 {
+ // Non-RT queues must earn CPU time through DWRR balance
balance[i] += SCHED_PRIO_TO_WEIGHT[i];
continue;
}
@@ -476,6 +475,10 @@ fn select_next_context(
// We found a new process!
return Ok(Some(next_context_guard));
} else {
+ // Try to steal work from another CPU before going idle
+ if let Some(stolen) = try_steal_work(token, &contexts_list, cpu_id, switch_time) {
+ return Ok(Some(stolen));
+ }
if !was_idle && !Arc::ptr_eq(&prev_context_lock, &idle_context) {
// We switch into the idle context
Ok(Some(unsafe { idle_context.write_arc() }))
@@ -486,6 +489,51 @@ fn select_next_context(
}
}
+/// Try to steal a runnable context from another CPU's priority queues.
+/// Called when this CPU has no work and is about to go idle.
+fn try_steal_work(
+ token: &mut CleanLockToken,
+ _contexts_list: &[VecDeque<WeakContextRef>; 40],
+ cpu_id: LogicalCpuId,
+ switch_time: u128,
+) -> Option<ArcContextLockWriteGuard> {
+ use crate::context::run_contexts;
+ let percpu = crate::percpu::PercpuBlock::current();
+ let all_contexts = run_contexts(token.token());
+ let (contexts_data, _t) = all_contexts.into_split();
+ let queues = &contexts_data.set;
+
+ for prio in (0..40).rev() {
+ let q = &queues[prio];
+ let len = q.len();
+ for _ in 0..len.min(8) {
+ let context_ref = match q.front() {
+ Some(r) => r.clone(),
+ None => break,
+ };
+ let context_lock = match context_ref.upgrade() {
+ Some(l) => l,
+ None => continue,
+ };
+ let mut guard = unsafe { context_lock.write_arc() };
+ if !guard.sched_affinity.contains(cpu_id) {
+ continue;
+ }
+ match unsafe { crate::context::switch::update_runnable(&mut guard, cpu_id, switch_time) } {
+ crate::context::switch::UpdateResult::CanSwitch => {
+ percpu.switch_internals.nr_running.set(
+ percpu.switch_internals.nr_running.get() + 1
+ );
+ return Some(guard);
+ }
+ _ => continue,
+ }
+ }
+ }
+ percpu.switch_internals.nr_running.set(0);
+ None
+}
+
/// Holds per-CPU state necessary for context switching.
///
/// This struct contains information such as the idle context, current context, and PIT tick counts,
@@ -494,6 +542,7 @@ pub struct ContextSwitchPercpu {
switch_result: Cell<Option<SwitchResultInner>>,
switch_time: Cell<u128>,
pit_ticks: Cell<usize>,
+ nr_running: Cell<usize>,
current_ctxt: RefCell<Option<Arc<ContextLock>>>,
@@ -508,6 +557,7 @@ impl ContextSwitchPercpu {
switch_result: Cell::new(None),
switch_time: Cell::new(0),
pit_ticks: Cell::new(0),
+ nr_running: Cell::new(0),
current_ctxt: RefCell::new(None),
idle_ctxt: RefCell::new(None),
being_sigkilled: Cell::new(false),