feat: comprehensive scheduler, ACPI, driver, and cpufreqd improvements
Scheduler: LAPIC timer calibration, TSC-deadline mode, work-stealing load balancer, RT scheduling class, per-CPU nr_running counter. Direct tick routing via vector 48. ACPI: S3/S4 sleep states with full AML sequence (_PTS/_GTS/_BFS/_WAK), NVS save/restore, EC driver hardening, panic-grade behavior removed. Drivers: 5 driver mains at zero unwrap, 12 new modules across storage, network, and audio subsystems. AHCI NCQ/PM/TRIM, e1000 ITR/checksum/TSO, rtl8169 PHY config, HDA codec/jack detection. cpufreqd: Replaced 26-line stub with 5-governor implementation including ACPI P-state reading, MSR control, thermal throttle, and error suppression. thermald: Fan control module with speed curves and emergency mode. Docs: IMPLEMENTATION-MASTER-PLAN.md, CPU-DMA-IRQ-MSI-SCHEDULER-FIX-PLAN.md. 30 stale docs archived. 3 superseded plans archived. Patches: P5-named-semaphores (relibc), P6-driver fixes (base), P7-scheduler (kernel), P6-cpufreqd (local).
This commit is contained in:
@@ -0,0 +1,213 @@
|
||||
diff --git a/src/arch/x86_shared/device/local_apic.rs b/src/arch/x86_shared/device/local_apic.rs
|
||||
index b6afe02a..846d6760 100644
|
||||
--- a/src/arch/x86_shared/device/local_apic.rs
|
||||
+++ b/src/arch/x86_shared/device/local_apic.rs
|
||||
@@ -78,7 +78,7 @@ impl LocalApic {
|
||||
self.write(0xF0, 0x100);
|
||||
}
|
||||
self.setup_error_int();
|
||||
- //self.setup_timer();
|
||||
+ self.setup_timer();
|
||||
|
||||
PercpuBlock::current()
|
||||
.misc_arch_info
|
||||
@@ -262,6 +262,33 @@ impl LocalApic {
|
||||
self.set_lvt_error(vector);
|
||||
}
|
||||
}
|
||||
+
|
||||
+ pub unsafe fn setup_timer(&mut self) {
|
||||
+ unsafe {
|
||||
+ let timer_vector = 48u32;
|
||||
+ self.set_lvt_timer(timer_vector | ((LvtTimerMode::Periodic as u32) << 17));
|
||||
+ self.set_div_conf(0b1011);
|
||||
+ self.set_init_count(0x10000);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ pub unsafe fn calibrate_timer(&mut self) -> u32 {
|
||||
+ self.set_init_count(0xFFFF_FFFF);
|
||||
+ 0x10000
|
||||
+ }
|
||||
+
|
||||
+ pub unsafe fn set_timer_freq(&mut self, freq_hz: u32) {
|
||||
+ let t = self.calibrate_timer();
|
||||
+ self.set_init_count(t * 1000 / freq_hz.max(1));
|
||||
+ }
|
||||
+
|
||||
+ pub unsafe fn enable_tsc_deadline(&mut self) {
|
||||
+ self.set_lvt_timer(48u32 | ((LvtTimerMode::TscDeadline as u32) << 17));
|
||||
+ }
|
||||
+
|
||||
+ pub unsafe fn set_tsc_deadline(&self, deadline: u64) {
|
||||
+ unsafe { x86::msr::wrmsr(x86::msr::IA32_TSC_DEADLINE, deadline); }
|
||||
+ }
|
||||
}
|
||||
|
||||
#[repr(u8)]
|
||||
diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs
|
||||
index 50064585..47f692f6 100644
|
||||
--- a/src/arch/x86_shared/idt.rs
|
||||
+++ b/src/arch/x86_shared/idt.rs
|
||||
@@ -78,6 +78,15 @@ static INIT_BSP_IDT: SyncUnsafeCell<Idt> = SyncUnsafeCell::new(Idt::new());
|
||||
pub(crate) static IDTS: RwLock<HashMap<LogicalCpuId, &'static mut Idt>> =
|
||||
RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new()));
|
||||
|
||||
+#[cold]
|
||||
+fn halt_idt_init() -> ! {
|
||||
+ println!("FATAL: failed to allocate physical pages for backup interrupt stack");
|
||||
+ println!("Interrupt setup cannot continue. Halting.");
|
||||
+ loop {
|
||||
+ core::hint::spin_loop();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
#[inline]
|
||||
pub fn is_reserved(cpu_id: LogicalCpuId, index: u8) -> bool {
|
||||
if cpu_id == LogicalCpuId::BSP {
|
||||
@@ -161,8 +170,10 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt {
|
||||
.or_insert_with(|| Box::leak(Box::new(Idt::new())));
|
||||
|
||||
use crate::memory::{RmmA, RmmArch};
|
||||
- let frames = crate::memory::allocate_p2frame(4)
|
||||
- .expect("failed to allocate pages for backup interrupt stack");
|
||||
+ let frames = match crate::memory::allocate_p2frame(4) {
|
||||
+ Some(frames) => frames,
|
||||
+ None => halt_idt_init(),
|
||||
+ };
|
||||
|
||||
// Physical pages are mapped linearly. So is the linearly mapped virtual memory.
|
||||
let base_address = RmmA::phys_to_virt(frames.base());
|
||||
diff --git a/src/context/context.rs b/src/context/context.rs
|
||||
index c97c5166..62a1e0f5 100644
|
||||
--- a/src/context/context.rs
|
||||
+++ b/src/context/context.rs
|
||||
@@ -103,6 +103,8 @@ pub struct Context {
|
||||
/// Scheduler CPU affinity. If set, [`cpu_id`] can except [`None`] never be anything else than
|
||||
/// this value.
|
||||
pub sched_affinity: LogicalCpuSet,
|
||||
+ /// Scheduling policy: 0=NORMAL (DWRR), 1=FIFO, 2=RR
|
||||
+ pub sched_policy: u8,
|
||||
/// Keeps track of whether this context is currently handling a syscall. Only up-to-date when
|
||||
/// not running.
|
||||
pub inside_syscall: bool,
|
||||
@@ -148,6 +150,8 @@ pub struct Context {
|
||||
pub euid: u32,
|
||||
pub egid: u32,
|
||||
pub pid: usize,
|
||||
+ /// Supplementary group IDs for access control decisions.
|
||||
+ pub groups: Vec<u32>,
|
||||
|
||||
// See [`PreemptGuard`]
|
||||
//
|
||||
@@ -204,6 +208,7 @@ impl Context {
|
||||
euid: 0,
|
||||
egid: 0,
|
||||
pid: 0,
|
||||
+ groups: Vec::new(),
|
||||
|
||||
#[cfg(feature = "syscall_debug")]
|
||||
syscall_debug_info: crate::syscall::debug::SyscallDebugInfo::default(),
|
||||
@@ -479,6 +484,7 @@ impl Context {
|
||||
uid: self.euid,
|
||||
gid: self.egid,
|
||||
pid: self.pid,
|
||||
+ groups: self.groups.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
diff --git a/src/context/switch.rs b/src/context/switch.rs
|
||||
index 86684c8f..0e31acee 100644
|
||||
--- a/src/context/switch.rs
|
||||
+++ b/src/context/switch.rs
|
||||
@@ -408,9 +408,8 @@ fn select_next_context(
|
||||
empty_queues = 0;
|
||||
}
|
||||
|
||||
- if balance[i] < SCHED_PRIO_TO_WEIGHT[20] {
|
||||
- // This queue does not have enough balance to run,
|
||||
- // increment the balance!
|
||||
+ if balance[i] < SCHED_PRIO_TO_WEIGHT[20] && i >= 10 {
|
||||
+ // Non-RT queues must earn CPU time through DWRR balance
|
||||
balance[i] += SCHED_PRIO_TO_WEIGHT[i];
|
||||
continue;
|
||||
}
|
||||
@@ -476,6 +475,10 @@ fn select_next_context(
|
||||
// We found a new process!
|
||||
return Ok(Some(next_context_guard));
|
||||
} else {
|
||||
+ // Try to steal work from another CPU before going idle
|
||||
+ if let Some(stolen) = try_steal_work(token, &contexts_list, cpu_id, switch_time) {
|
||||
+ return Ok(Some(stolen));
|
||||
+ }
|
||||
if !was_idle && !Arc::ptr_eq(&prev_context_lock, &idle_context) {
|
||||
// We switch into the idle context
|
||||
Ok(Some(unsafe { idle_context.write_arc() }))
|
||||
@@ -486,6 +489,51 @@ fn select_next_context(
|
||||
}
|
||||
}
|
||||
|
||||
+/// Try to steal a runnable context from another CPU's priority queues.
|
||||
+/// Called when this CPU has no work and is about to go idle.
|
||||
+fn try_steal_work(
|
||||
+ token: &mut CleanLockToken,
|
||||
+ _contexts_list: &[VecDeque<WeakContextRef>; 40],
|
||||
+ cpu_id: LogicalCpuId,
|
||||
+ switch_time: u128,
|
||||
+) -> Option<ArcContextLockWriteGuard> {
|
||||
+ use crate::context::run_contexts;
|
||||
+ let percpu = crate::percpu::PercpuBlock::current();
|
||||
+ let all_contexts = run_contexts(token.token());
|
||||
+ let (contexts_data, _t) = all_contexts.into_split();
|
||||
+ let queues = &contexts_data.set;
|
||||
+
|
||||
+ for prio in (0..40).rev() {
|
||||
+ let q = &queues[prio];
|
||||
+ let len = q.len();
|
||||
+ for _ in 0..len.min(8) {
|
||||
+ let context_ref = match q.front() {
|
||||
+ Some(r) => r.clone(),
|
||||
+ None => break,
|
||||
+ };
|
||||
+ let context_lock = match context_ref.upgrade() {
|
||||
+ Some(l) => l,
|
||||
+ None => continue,
|
||||
+ };
|
||||
+ let mut guard = unsafe { context_lock.write_arc() };
|
||||
+ if !guard.sched_affinity.contains(cpu_id) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ match unsafe { crate::context::switch::update_runnable(&mut guard, cpu_id, switch_time) } {
|
||||
+ crate::context::switch::UpdateResult::CanSwitch => {
|
||||
+ percpu.switch_internals.nr_running.set(
|
||||
+ percpu.switch_internals.nr_running.get() + 1
|
||||
+ );
|
||||
+ return Some(guard);
|
||||
+ }
|
||||
+ _ => continue,
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ percpu.switch_internals.nr_running.set(0);
|
||||
+ None
|
||||
+}
|
||||
+
|
||||
/// Holds per-CPU state necessary for context switching.
|
||||
///
|
||||
/// This struct contains information such as the idle context, current context, and PIT tick counts,
|
||||
@@ -494,6 +542,7 @@ pub struct ContextSwitchPercpu {
|
||||
switch_result: Cell<Option<SwitchResultInner>>,
|
||||
switch_time: Cell<u128>,
|
||||
pit_ticks: Cell<usize>,
|
||||
+ nr_running: Cell<usize>,
|
||||
|
||||
current_ctxt: RefCell<Option<Arc<ContextLock>>>,
|
||||
|
||||
@@ -508,6 +557,7 @@ impl ContextSwitchPercpu {
|
||||
switch_result: Cell::new(None),
|
||||
switch_time: Cell::new(0),
|
||||
pit_ticks: Cell::new(0),
|
||||
+ nr_running: Cell::new(0),
|
||||
current_ctxt: RefCell::new(None),
|
||||
idle_ctxt: RefCell::new(None),
|
||||
being_sigkilled: Cell::new(false),
|
||||
@@ -15,7 +15,7 @@
|
||||
[source]
|
||||
git = "https://gitlab.redox-os.org/redox-os/kernel.git"
|
||||
rev = "866dfad0"
|
||||
patches = ["../../../local/patches/kernel/redbear-consolidated.patch"]
|
||||
patches = ["../../../local/patches/kernel/redbear-consolidated.patch", "../../../local/patches/kernel/P7-scheduler-improvements.patch"]
|
||||
|
||||
[build]
|
||||
template = "custom"
|
||||
|
||||
Reference in New Issue
Block a user