diff --git a/src/arch/x86_shared/device/local_apic.rs b/src/arch/x86_shared/device/local_apic.rs index b6afe02a..846d6760 100644 --- a/src/arch/x86_shared/device/local_apic.rs +++ b/src/arch/x86_shared/device/local_apic.rs @@ -78,7 +78,7 @@ impl LocalApic { self.write(0xF0, 0x100); } self.setup_error_int(); - //self.setup_timer(); + self.setup_timer(); PercpuBlock::current() .misc_arch_info @@ -262,6 +262,33 @@ impl LocalApic { self.set_lvt_error(vector); } } + + pub unsafe fn setup_timer(&mut self) { + unsafe { + let timer_vector = 48u32; + self.set_lvt_timer(timer_vector | ((LvtTimerMode::Periodic as u32) << 17)); + self.set_div_conf(0b1011); + self.set_init_count(0x10000); + } + } + + pub unsafe fn calibrate_timer(&mut self) -> u32 { + self.set_init_count(0xFFFF_FFFF); + 0x10000 + } + + pub unsafe fn set_timer_freq(&mut self, freq_hz: u32) { + let t = self.calibrate_timer(); + self.set_init_count(t * 1000 / freq_hz.max(1)); + } + + pub unsafe fn enable_tsc_deadline(&mut self) { + self.set_lvt_timer(48u32 | ((LvtTimerMode::TscDeadline as u32) << 17)); + } + + pub unsafe fn set_tsc_deadline(&self, deadline: u64) { + unsafe { x86::msr::wrmsr(x86::msr::IA32_TSC_DEADLINE, deadline); } + } } #[repr(u8)] diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs index 50064585..47f692f6 100644 --- a/src/arch/x86_shared/idt.rs +++ b/src/arch/x86_shared/idt.rs @@ -78,6 +78,15 @@ static INIT_BSP_IDT: SyncUnsafeCell = SyncUnsafeCell::new(Idt::new()); pub(crate) static IDTS: RwLock> = RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new())); +#[cold] +fn halt_idt_init() -> ! { + println!("FATAL: failed to allocate physical pages for backup interrupt stack"); + println!("Interrupt setup cannot continue. Halting."); + loop { + core::hint::spin_loop(); + } +} + #[inline] pub fn is_reserved(cpu_id: LogicalCpuId, index: u8) -> bool { if cpu_id == LogicalCpuId::BSP { @@ -161,8 +170,10 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt { .or_insert_with(|| Box::leak(Box::new(Idt::new()))); use crate::memory::{RmmA, RmmArch}; - let frames = crate::memory::allocate_p2frame(4) - .expect("failed to allocate pages for backup interrupt stack"); + let frames = match crate::memory::allocate_p2frame(4) { + Some(frames) => frames, + None => halt_idt_init(), + }; // Physical pages are mapped linearly. So is the linearly mapped virtual memory. let base_address = RmmA::phys_to_virt(frames.base()); diff --git a/src/context/context.rs b/src/context/context.rs index c97c5166..62a1e0f5 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -103,6 +103,8 @@ pub struct Context { /// Scheduler CPU affinity. If set, [`cpu_id`] can except [`None`] never be anything else than /// this value. pub sched_affinity: LogicalCpuSet, + /// Scheduling policy: 0=NORMAL (DWRR), 1=FIFO, 2=RR + pub sched_policy: u8, /// Keeps track of whether this context is currently handling a syscall. Only up-to-date when /// not running. pub inside_syscall: bool, @@ -148,6 +150,8 @@ pub struct Context { pub euid: u32, pub egid: u32, pub pid: usize, + /// Supplementary group IDs for access control decisions. + pub groups: Vec, // See [`PreemptGuard`] // @@ -204,6 +208,7 @@ impl Context { euid: 0, egid: 0, pid: 0, + groups: Vec::new(), #[cfg(feature = "syscall_debug")] syscall_debug_info: crate::syscall::debug::SyscallDebugInfo::default(), @@ -479,6 +484,7 @@ impl Context { uid: self.euid, gid: self.egid, pid: self.pid, + groups: self.groups.clone(), } } } diff --git a/src/context/switch.rs b/src/context/switch.rs index 86684c8f..0e31acee 100644 --- a/src/context/switch.rs +++ b/src/context/switch.rs @@ -408,9 +408,8 @@ fn select_next_context( empty_queues = 0; } - if balance[i] < SCHED_PRIO_TO_WEIGHT[20] { - // This queue does not have enough balance to run, - // increment the balance! + if balance[i] < SCHED_PRIO_TO_WEIGHT[20] && i >= 10 { + // Non-RT queues must earn CPU time through DWRR balance balance[i] += SCHED_PRIO_TO_WEIGHT[i]; continue; } @@ -476,6 +475,10 @@ fn select_next_context( // We found a new process! return Ok(Some(next_context_guard)); } else { + // Try to steal work from another CPU before going idle + if let Some(stolen) = try_steal_work(token, &contexts_list, cpu_id, switch_time) { + return Ok(Some(stolen)); + } if !was_idle && !Arc::ptr_eq(&prev_context_lock, &idle_context) { // We switch into the idle context Ok(Some(unsafe { idle_context.write_arc() })) @@ -486,6 +489,51 @@ fn select_next_context( } } +/// Try to steal a runnable context from another CPU's priority queues. +/// Called when this CPU has no work and is about to go idle. +fn try_steal_work( + token: &mut CleanLockToken, + _contexts_list: &[VecDeque; 40], + cpu_id: LogicalCpuId, + switch_time: u128, +) -> Option { + use crate::context::run_contexts; + let percpu = crate::percpu::PercpuBlock::current(); + let all_contexts = run_contexts(token.token()); + let (contexts_data, _t) = all_contexts.into_split(); + let queues = &contexts_data.set; + + for prio in (0..40).rev() { + let q = &queues[prio]; + let len = q.len(); + for _ in 0..len.min(8) { + let context_ref = match q.front() { + Some(r) => r.clone(), + None => break, + }; + let context_lock = match context_ref.upgrade() { + Some(l) => l, + None => continue, + }; + let mut guard = unsafe { context_lock.write_arc() }; + if !guard.sched_affinity.contains(cpu_id) { + continue; + } + match unsafe { crate::context::switch::update_runnable(&mut guard, cpu_id, switch_time) } { + crate::context::switch::UpdateResult::CanSwitch => { + percpu.switch_internals.nr_running.set( + percpu.switch_internals.nr_running.get() + 1 + ); + return Some(guard); + } + _ => continue, + } + } + } + percpu.switch_internals.nr_running.set(0); + None +} + /// Holds per-CPU state necessary for context switching. /// /// This struct contains information such as the idle context, current context, and PIT tick counts, @@ -494,6 +542,7 @@ pub struct ContextSwitchPercpu { switch_result: Cell>, switch_time: Cell, pit_ticks: Cell, + nr_running: Cell, current_ctxt: RefCell>>, @@ -508,6 +557,7 @@ impl ContextSwitchPercpu { switch_result: Cell::new(None), switch_time: Cell::new(0), pit_ticks: Cell::new(0), + nr_running: Cell::new(0), current_ctxt: RefCell::new(None), idle_ctxt: RefCell::new(None), being_sigkilled: Cell::new(false),