diff --git a/src/context/arch/aarch64.rs b/src/context/arch/aarch64.rs index 33dc83a..b8f8ac9 100644 --- a/src/context/arch/aarch64.rs +++ b/src/context/arch/aarch64.rs @@ -4,16 +4,10 @@ use crate::{ percpu::PercpuBlock, syscall::FloatRegisters, }; -use core::{mem::offset_of, ptr, sync::atomic::AtomicBool}; +use core::{mem::offset_of, ptr}; use spin::Once; use syscall::{EnvRegisters, Result}; -/// This must be used by the kernel to ensure that context switches are done atomically -/// Compare and exchange this to true when beginning a context switch on any CPU -/// The `Context::switch_to` function will set it back to false, allowing other CPU's to switch -/// This must be done, as no locks can be held on the stack during switch -pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false); - // 512 bytes for registers, extra bytes for fpcr and fpsr pub const KFX_ALIGN: usize = 16; diff --git a/src/context/arch/riscv64.rs b/src/context/arch/riscv64.rs index 4bd843e..fe63639 100644 --- a/src/context/arch/riscv64.rs +++ b/src/context/arch/riscv64.rs @@ -2,13 +2,11 @@ use crate::{ arch::interrupt::InterruptStack, context::context::Kstack, memory::RmmA, percpu::PercpuBlock, syscall::FloatRegisters, }; -use core::{mem::offset_of, sync::atomic::AtomicBool}; +use core::mem::offset_of; use rmm::{Arch, VirtualAddress}; use spin::Once; use syscall::{error::*, EnvRegisters}; -pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false); - pub const KFX_ALIGN: usize = 16; #[derive(Clone, Debug, Default)] diff --git a/src/context/arch/x86.rs b/src/context/arch/x86.rs index 2862d35..dc01f6e 100644 --- a/src/context/arch/x86.rs +++ b/src/context/arch/x86.rs @@ -1,4 +1,4 @@ -use core::{mem::offset_of, sync::atomic::AtomicBool}; +use core::mem::offset_of; use rmm::{Arch, VirtualAddress}; use spin::Once; use syscall::{error::*, EnvRegisters}; @@ -14,12 +14,6 @@ use crate::{ syscall::FloatRegisters, }; -/// This must be used by the kernel to ensure that context switches are done atomically -/// Compare and exchange this to true when beginning a context switch on any CPU -/// The `Context::switch_to` function will set it back to false, allowing other CPU's to switch -/// This must be done, as no locks can be held on the stack during switch -pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false); - const ST_RESERVED: u128 = 0xFFFF_FFFF_FFFF_0000_0000_0000_0000_0000; pub const KFX_ALIGN: usize = 16; diff --git a/src/context/arch/x86_64.rs b/src/context/arch/x86_64.rs index 6758c9f..574d373 100644 --- a/src/context/arch/x86_64.rs +++ b/src/context/arch/x86_64.rs @@ -1,6 +1,5 @@ use core::{ ptr::{addr_of, addr_of_mut}, - sync::atomic::AtomicBool, }; use crate::syscall::FloatRegisters; @@ -12,12 +11,6 @@ use spin::Once; use syscall::{error::*, EnvRegisters}; use x86::msr; -/// This must be used by the kernel to ensure that context switches are done atomically -/// Compare and exchange this to true when beginning a context switch on any CPU -/// The `Context::switch_to` function will set it back to false, allowing other CPU's to switch -/// This must be done, as no locks can be held on the stack during switch -pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false); - const ST_RESERVED: u128 = 0xFFFF_FFFF_FFFF_0000_0000_0000_0000_0000; #[cfg(cpu_feature_never = "xsave")] diff --git a/src/context/switch.rs b/src/context/switch.rs index 86684c8..d509984 100644 --- a/src/context/switch.rs +++ b/src/context/switch.rs @@ -15,8 +15,7 @@ use crate::{ use alloc::{sync::Arc, vec::Vec}; use core::{ cell::{Cell, RefCell}, - hint, mem, - sync::atomic::Ordering, + mem, }; use syscall::PtraceFlags; @@ -120,7 +119,10 @@ pub unsafe extern "C" fn switch_finish_hook() { crate::arch::stop::emergency_reset(); } } - arch::CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst); + PercpuBlock::current() + .switch_internals + .in_context_switch + .set(false); crate::percpu::switch_arch_hook(); } } @@ -150,16 +152,15 @@ pub fn switch(token: &mut CleanLockToken) -> SwitchResult { //set PIT Interrupt counter to 0, giving each process same amount of PIT ticks percpu.switch_internals.pit_ticks.set(0); - // Acquire the global lock to ensure exclusive access during context switch and avoid - // issues that would be caused by the unsafe operations below - // TODO: Better memory orderings? - while arch::CONTEXT_SWITCH_LOCK - .compare_exchange_weak(false, true, Ordering::SeqCst, Ordering::Relaxed) - .is_err() - { - hint::spin_loop(); - percpu.maybe_handle_tlb_shootdown(); - } + // Acquire the per-CPU context switch flag. Each CPU can only be in one context + // switch at a time. The per-context write locks provide cross-CPU safety; this + // flag catches re-entrant switches on the same CPU (a kernel bug). + debug_assert!( + !percpu.switch_internals.in_context_switch.get(), + "context switch re-entry on CPU {}", + percpu.cpu_id + ); + percpu.switch_internals.in_context_switch.set(true); // Lock the previous context. let prev_context_lock = crate::context::current(); @@ -167,8 +168,8 @@ pub fn switch(token: &mut CleanLockToken) -> SwitchResult { let mut prev_context_guard = unsafe { prev_context_lock.write_arc() }; if !prev_context_guard.is_preemptable() { - // Unset global lock - arch::CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst); + // Unset per-CPU context switch flag + percpu.switch_internals.in_context_switch.set(false); // Pretend to have finished switching, so CPU is not idled return SwitchResult::Switched; @@ -292,8 +293,8 @@ pub fn switch(token: &mut CleanLockToken) -> SwitchResult { SwitchResult::Switched } _ => { - // No target was found, unset global lock and return - arch::CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst); + // No target was found, unset per-CPU context switch flag and return + percpu.switch_internals.in_context_switch.set(false); percpu.stats.set_state(cpu_stats::CpuState::Idle); @@ -494,6 +495,9 @@ pub struct ContextSwitchPercpu { switch_result: Cell>, switch_time: Cell, pit_ticks: Cell, + /// Per-CPU context switch flag. Set to true during a context switch on this CPU. + /// Replaced the global CONTEXT_SWITCH_LOCK to eliminate cross-CPU serialization. + in_context_switch: Cell, current_ctxt: RefCell>>, @@ -508,6 +512,7 @@ impl ContextSwitchPercpu { switch_result: Cell::new(None), switch_time: Cell::new(0), pit_ticks: Cell::new(0), + in_context_switch: Cell::new(false), current_ctxt: RefCell::new(None), idle_ctxt: RefCell::new(None), being_sigkilled: Cell::new(false),