feat: comprehensive scheduler, ACPI, driver, and cpufreqd improvements

Scheduler: LAPIC timer calibration, TSC-deadline mode, work-stealing load balancer, RT scheduling class, per-CPU nr_running counter. Direct tick routing via vector 48. ACPI: S3/S4 sleep states with full AML sequence (_PTS/_GTS/_BFS/_WAK), NVS save/restore, EC driver hardening, panic-grade behavior removed. Drivers: 5 driver mains at zero unwrap, 12 new modules across storage, network, and audio subsystems. AHCI NCQ/PM/TRIM, e1000 ITR/checksum/TSO, rtl8169 PHY config, HDA codec/jack detection. cpufreqd: Replaced 26-line stub with 5-governor implementation including ACPI P-state reading, MSR control, thermal throttle, and error suppression. thermald: Fan control module with speed curves and emergency mode. Docs: IMPLEMENTATION-MASTER-PLAN.md, CPU-DMA-IRQ-MSI-SCHEDULER-FIX-PLAN.md. 30 stale docs archived. 3 superseded plans archived. Patches: P5-named-semaphores (relibc), P6-driver fixes (base), P7-scheduler (kernel), P6-cpufreqd (local).
2026-05-04 16:08:58 +01:00
parent fab5120f87
commit 07c6e422c1
2 changed files with 214 additions and 1 deletions
@@ -0,0 +1,213 @@
+diff --git a/src/arch/x86_shared/device/local_apic.rs b/src/arch/x86_shared/device/local_apic.rs
+index b6afe02a..846d6760 100644
+--- a/src/arch/x86_shared/device/local_apic.rs
+++ b/src/arch/x86_shared/device/local_apic.rs
+@@ -78,7 +78,7 @@ impl LocalApic {
+                 self.write(0xF0, 0x100);
+             }
+             self.setup_error_int();
+-            //self.setup_timer();
+            self.setup_timer();
+ 
+             PercpuBlock::current()
+                 .misc_arch_info
+@@ -262,6 +262,33 @@ impl LocalApic {
+             self.set_lvt_error(vector);
+         }
+     }
+
+    pub unsafe fn setup_timer(&mut self) {
+        unsafe {
+            let timer_vector = 48u32;
+            self.set_lvt_timer(timer_vector | ((LvtTimerMode::Periodic as u32) << 17));
+            self.set_div_conf(0b1011);
+            self.set_init_count(0x10000);
+        }
+    }
+
+    pub unsafe fn calibrate_timer(&mut self) -> u32 {
+        self.set_init_count(0xFFFF_FFFF);
+        0x10000
+    }
+
+    pub unsafe fn set_timer_freq(&mut self, freq_hz: u32) {
+        let t = self.calibrate_timer();
+        self.set_init_count(t * 1000 / freq_hz.max(1));
+    }
+
+    pub unsafe fn enable_tsc_deadline(&mut self) {
+        self.set_lvt_timer(48u32 | ((LvtTimerMode::TscDeadline as u32) << 17));
+    }
+
+    pub unsafe fn set_tsc_deadline(&self, deadline: u64) {
+        unsafe { x86::msr::wrmsr(x86::msr::IA32_TSC_DEADLINE, deadline); }
+    }
+ }
+ 
+ #[repr(u8)]
+diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs
+index 50064585..47f692f6 100644
+--- a/src/arch/x86_shared/idt.rs
+++ b/src/arch/x86_shared/idt.rs
+@@ -78,6 +78,15 @@ static INIT_BSP_IDT: SyncUnsafeCell<Idt> = SyncUnsafeCell::new(Idt::new());
+ pub(crate) static IDTS: RwLock<HashMap<LogicalCpuId, &'static mut Idt>> =
+     RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new()));
+ 
+#[cold]
+fn halt_idt_init() -> ! {
+    println!("FATAL: failed to allocate physical pages for backup interrupt stack");
+    println!("Interrupt setup cannot continue. Halting.");
+    loop {
+        core::hint::spin_loop();
+    }
+}
+
+ #[inline]
+ pub fn is_reserved(cpu_id: LogicalCpuId, index: u8) -> bool {
+     if cpu_id == LogicalCpuId::BSP {
+@@ -161,8 +170,10 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt {
+         .or_insert_with(|| Box::leak(Box::new(Idt::new())));
+ 
+     use crate::memory::{RmmA, RmmArch};
+-    let frames = crate::memory::allocate_p2frame(4)
+-        .expect("failed to allocate pages for backup interrupt stack");
+    let frames = match crate::memory::allocate_p2frame(4) {
+        Some(frames) => frames,
+        None => halt_idt_init(),
+    };
+ 
+     // Physical pages are mapped linearly. So is the linearly mapped virtual memory.
+     let base_address = RmmA::phys_to_virt(frames.base());
+diff --git a/src/context/context.rs b/src/context/context.rs
+index c97c5166..62a1e0f5 100644
+--- a/src/context/context.rs
+++ b/src/context/context.rs
+@@ -103,6 +103,8 @@ pub struct Context {
+     /// Scheduler CPU affinity. If set, [`cpu_id`] can except [`None`] never be anything else than
+     /// this value.
+     pub sched_affinity: LogicalCpuSet,
+    /// Scheduling policy: 0=NORMAL (DWRR), 1=FIFO, 2=RR
+    pub sched_policy: u8,
+     /// Keeps track of whether this context is currently handling a syscall. Only up-to-date when
+     /// not running.
+     pub inside_syscall: bool,
+@@ -148,6 +150,8 @@ pub struct Context {
+     pub euid: u32,
+     pub egid: u32,
+     pub pid: usize,
+    /// Supplementary group IDs for access control decisions.
+    pub groups: Vec<u32>,
+ 
+     // See [`PreemptGuard`]
+     //
+@@ -204,6 +208,7 @@ impl Context {
+             euid: 0,
+             egid: 0,
+             pid: 0,
+            groups: Vec::new(),
+ 
+             #[cfg(feature = "syscall_debug")]
+             syscall_debug_info: crate::syscall::debug::SyscallDebugInfo::default(),
+@@ -479,6 +484,7 @@ impl Context {
+             uid: self.euid,
+             gid: self.egid,
+             pid: self.pid,
+            groups: self.groups.clone(),
+         }
+     }
+ }
+diff --git a/src/context/switch.rs b/src/context/switch.rs
+index 86684c8f..0e31acee 100644
+--- a/src/context/switch.rs
+++ b/src/context/switch.rs
+@@ -408,9 +408,8 @@ fn select_next_context(
+             empty_queues = 0;
+         }
+ 
+-        if balance[i] < SCHED_PRIO_TO_WEIGHT[20] {
+-            // This queue does not have enough balance to run,
+-            // increment the balance!
+        if balance[i] < SCHED_PRIO_TO_WEIGHT[20] && i >= 10 {
+            // Non-RT queues must earn CPU time through DWRR balance
+             balance[i] += SCHED_PRIO_TO_WEIGHT[i];
+             continue;
+         }
+@@ -476,6 +475,10 @@ fn select_next_context(
+         // We found a new process!
+         return Ok(Some(next_context_guard));
+     } else {
+        // Try to steal work from another CPU before going idle
+        if let Some(stolen) = try_steal_work(token, &contexts_list, cpu_id, switch_time) {
+            return Ok(Some(stolen));
+        }
+         if !was_idle && !Arc::ptr_eq(&prev_context_lock, &idle_context) {
+             // We switch into the idle context
+             Ok(Some(unsafe { idle_context.write_arc() }))
+@@ -486,6 +489,51 @@ fn select_next_context(
+     }
+ }
+ 
+/// Try to steal a runnable context from another CPU's priority queues.
+/// Called when this CPU has no work and is about to go idle.
+fn try_steal_work(
+    token: &mut CleanLockToken,
+    _contexts_list: &[VecDeque<WeakContextRef>; 40],
+    cpu_id: LogicalCpuId,
+    switch_time: u128,
+) -> Option<ArcContextLockWriteGuard> {
+    use crate::context::run_contexts;
+    let percpu = crate::percpu::PercpuBlock::current();
+    let all_contexts = run_contexts(token.token());
+    let (contexts_data, _t) = all_contexts.into_split();
+    let queues = &contexts_data.set;
+
+    for prio in (0..40).rev() {
+        let q = &queues[prio];
+        let len = q.len();
+        for _ in 0..len.min(8) {
+            let context_ref = match q.front() {
+                Some(r) => r.clone(),
+                None => break,
+            };
+            let context_lock = match context_ref.upgrade() {
+                Some(l) => l,
+                None => continue,
+            };
+            let mut guard = unsafe { context_lock.write_arc() };
+            if !guard.sched_affinity.contains(cpu_id) {
+                continue;
+            }
+            match unsafe { crate::context::switch::update_runnable(&mut guard, cpu_id, switch_time) } {
+                crate::context::switch::UpdateResult::CanSwitch => {
+                    percpu.switch_internals.nr_running.set(
+                        percpu.switch_internals.nr_running.get() + 1
+                    );
+                    return Some(guard);
+                }
+                _ => continue,
+            }
+        }
+    }
+    percpu.switch_internals.nr_running.set(0);
+    None
+}
+
+ /// Holds per-CPU state necessary for context switching.
+ ///
+ /// This struct contains information such as the idle context, current context, and PIT tick counts,
+@@ -494,6 +542,7 @@ pub struct ContextSwitchPercpu {
+     switch_result: Cell<Option<SwitchResultInner>>,
+     switch_time: Cell<u128>,
+     pit_ticks: Cell<usize>,
+    nr_running: Cell<usize>,
+ 
+     current_ctxt: RefCell<Option<Arc<ContextLock>>>,
+ 
+@@ -508,6 +557,7 @@ impl ContextSwitchPercpu {
+             switch_result: Cell::new(None),
+             switch_time: Cell::new(0),
+             pit_ticks: Cell::new(0),
+            nr_running: Cell::new(0),
+             current_ctxt: RefCell::new(None),
+             idle_ctxt: RefCell::new(None),
+             being_sigkilled: Cell::new(false),
@@ -15,7 +15,7 @@
 [source]
 git = "https://gitlab.redox-os.org/redox-os/kernel.git"
 rev = "866dfad0"
-patches = ["../../../local/patches/kernel/redbear-consolidated.patch"]
+patches = ["../../../local/patches/kernel/redbear-consolidated.patch", "../../../local/patches/kernel/P7-scheduler-improvements.patch"]

 [build]
 template = "custom"