Files
RedBear-OS/local/patches/kernel/P12-range-tlb-flush.patch
T
vasilito cee25393d8 fix: boot process improvements — dependency cycle, INIT_NOTIFY, probing loop, and log spam fixes
- Fix P15-8-init-cycle-detection.patch: replace visiting+error with seen+silent-skip
  to eliminate 11 false-positive 'dependency cycle detected' errors on shared deps
- Fix P0-daemon-fix-init-notify-unwrap.patch: remove eprintln! for missing
  INIT_NOTIFY (expected for oneshot_async services, ~7 daemons affected)
- Fix driver-manager hotplug loop: add PERMANENTLY_SKIPPED static set shared
  between hotplug handler and DriverConfig::probe() to stop infinite re-probing
  of Fatal/NotSupported/deferred-exhausted device+driver pairs (e.g. ided)
- Fix driver-manager log_timeline: suppress repeated EPIPE/ENOENT errors with
  AtomicI32 dedup and AtomicBool one-shot guards for boot timeline JSON
- Add driver-manager SIGTERM handler, ACPI bus registration, --status mode,
  driver reap loop, graceful shutdown, and reduced deferred retries (30→3)
2026-05-17 12:34:02 +03:00

196 lines
7.6 KiB
Diff

--- a/src/percpu.rs
+++ b/src/percpu.rs
@@ -5,9 +5,13 @@
use core::{
cell::{Cell, RefCell},
hint,
- sync::atomic::{AtomicBool, AtomicPtr, Ordering},
+ sync::atomic::{AtomicBool, AtomicPtr, AtomicU32, AtomicU64, Ordering},
};
+/// Maximum number of pages to flush individually using INVLPG before falling
+/// back to a full TLB flush (CR3 reload).
+const TLB_RANGE_THRESHOLD: u32 = 32;
+
use rmm::Arch;
use syscall::PtraceFlags;
@@ -41,6 +45,23 @@
/// Counts how many times the scheduler MCS lock acquisition was contended.
pub mcs_contention_count: Cell<u64>,
+ /// TLB shootdown range: start virtual address (page-aligned).
+ /// Set to 0 for a full flush. Only valid when `wants_tlb_shootdown` is true.
+ pub tlb_flush_start: AtomicU64,
+ /// TLB shootdown range: number of pages to invalidate.
+ pub tlb_flush_count: AtomicU32,
+
+ /// Priority inheritance donation. When another CPU is blocked waiting on a
+ /// lock this CPU holds, the blocked CPU may donate its priority here.
+ /// `u32::MAX` means no donation; otherwise it's a priority level (0-39).
+ pub pi_donated_prio: AtomicU32,
+
+ /// Cached priority of the currently-running context on this CPU.
+ /// Set by the scheduler when selecting a new context. Read by the MCS
+ /// lock during priority donation — avoids acquiring the context RwLock
+ /// from the spin loop. Default 39 (lowest priority).
+ pub current_prio: Cell<usize>,
+
// TODO: Put mailbox queues here, e.g. for TLB shootdown? Just be sure to 128-byte align it
// first to avoid cache invalidation.
pub profiling: Option<&'static crate::profiling::RingBuffer>,
@@ -64,6 +85,15 @@
ALL_PERCPU_BLOCKS[id.get() as usize].store(block, Ordering::Release)
}
+/// Get a reference to another CPU's PercpuBlock by logical CPU ID.
+pub fn get_for_cpu(id: LogicalCpuId) -> Option<&'static PercpuBlock> {
+ unsafe {
+ ALL_PERCPU_BLOCKS[id.get() as usize]
+ .load(Ordering::Acquire)
+ .as_ref()
+ }
+}
+
pub fn get_all_stats() -> Vec<(LogicalCpuId, CpuStatsData)> {
let mut res = ALL_PERCPU_BLOCKS
.iter()
@@ -108,6 +138,9 @@
core::hint::spin_loop();
}
}
+ // Full flush — clear range info
+ percpublock.tlb_flush_start.store(0, Ordering::Relaxed);
+ percpublock.tlb_flush_count.store(0, Ordering::Relaxed);
crate::ipi::ipi_single(crate::ipi::IpiKind::Tlb, percpublock);
} else {
@@ -138,20 +171,114 @@
hint::spin_loop();
}
}
+ // Full flush — clear range info
+ percpublock.tlb_flush_start.store(0, Ordering::Relaxed);
+ percpublock.tlb_flush_count.store(0, Ordering::Relaxed);
}
// Single broadcast IPI to all other CPUs using destination shorthand
crate::ipi::ipi(crate::ipi::IpiKind::Tlb, crate::ipi::IpiTarget::Other);
}
}
+
+/// Range-based TLB shootdown IPI. Only invalidates the specified virtual address
+/// range using INVLPG per page for ranges up to TLB_RANGE_THRESHOLD pages.
+/// Falls back to full flush for larger ranges.
+pub fn shootdown_tlb_ipi_range(target: Option<LogicalCpuId>, start: usize, count: usize) {
+ if cfg!(not(feature = "multi_core")) {
+ return;
+ }
+
+ let start_aligned = start as u64 & !0xFFF;
+ let count_u32 = count as u32;
+ let use_range = count_u32 > 0 && count_u32 <= TLB_RANGE_THRESHOLD;
+
+ let set_range = |percpublock: &PercpuBlock| {
+ if use_range {
+ percpublock.tlb_flush_start.store(start_aligned, Ordering::Release);
+ percpublock.tlb_flush_count.store(count_u32, Ordering::Release);
+ } else {
+ percpublock.tlb_flush_start.store(0, Ordering::Release);
+ percpublock.tlb_flush_count.store(0, Ordering::Release);
+ }
+ };
+
+ if let Some(target) = target {
+ let my_percpublock = PercpuBlock::current();
+ assert_ne!(target, my_percpublock.cpu_id);
+
+ let Some(percpublock) = (unsafe {
+ ALL_PERCPU_BLOCKS[target.get() as usize]
+ .load(Ordering::Acquire)
+ .as_ref()
+ }) else {
+ return;
+ };
+ #[expect(clippy::bool_comparison)]
+ while percpublock.wants_tlb_shootdown.swap(true, Ordering::Release) == true {
+ while percpublock.wants_tlb_shootdown.load(Ordering::Relaxed) == true {
+ my_percpublock.maybe_handle_tlb_shootdown();
+ hint::spin_loop();
+ }
+ }
+ set_range(percpublock);
+ crate::ipi::ipi_single(crate::ipi::IpiKind::Tlb, percpublock);
+ } else {
+ let my_percpublock = PercpuBlock::current();
+ for id in 0..crate::cpu_count() {
+ let target_id = LogicalCpuId::new(id);
+ if target_id == my_percpublock.cpu_id {
+ continue;
+ }
+ let Some(percpublock) = (unsafe {
+ ALL_PERCPU_BLOCKS[id as usize]
+ .load(Ordering::Acquire)
+ .as_ref()
+ }) else {
+ continue;
+ };
+ #[expect(clippy::bool_comparison)]
+ while percpublock.wants_tlb_shootdown.swap(true, Ordering::Release) == true {
+ while percpublock.wants_tlb_shootdown.load(Ordering::Relaxed) == true {
+ my_percpublock.maybe_handle_tlb_shootdown();
+ hint::spin_loop();
+ }
+ }
+ set_range(percpublock);
+ }
+ crate::ipi::ipi(crate::ipi::IpiKind::Tlb, crate::ipi::IpiTarget::Other);
+ }
+}
impl PercpuBlock {
+ /// Return the effective scheduling priority, accounting for priority inheritance.
+ /// Lower number = higher priority (0-39 range).
+ pub fn effective_prio(&self, context_prio: usize) -> usize {
+ let donated = self.pi_donated_prio.load(Ordering::Relaxed);
+ if donated < context_prio as u32 {
+ donated as usize
+ } else {
+ context_prio
+ }
+ }
+
pub fn maybe_handle_tlb_shootdown(&self) {
#[expect(clippy::bool_comparison)]
if self.wants_tlb_shootdown.swap(false, Ordering::Relaxed) == false {
return;
}
- // TODO: Finer-grained flush
- crate::memory::RmmA::invalidate_all();
+ let start = self.tlb_flush_start.load(Ordering::Acquire);
+ let count = self.tlb_flush_count.load(Ordering::Acquire);
+
+ if start != 0 && count > 0 && count <= TLB_RANGE_THRESHOLD {
+ // Range-based flush using INVLPG per page — cheaper than full CR3 reload.
+ for i in 0..count {
+ let addr = start + (i as u64) * 4096;
+ crate::memory::RmmA::invalidate(rmm::VirtualAddress::new(addr as usize));
+ }
+ } else {
+ // Full TLB flush (CR3 reload) for large ranges or global shootdowns.
+ crate::memory::RmmA::invalidate_all();
+ }
if let Some(addrsp) = &*self.current_addrsp.borrow() {
addrsp.tlb_ack.fetch_add(1, Ordering::Release);
@@ -223,6 +350,10 @@
last_queue: Cell::new(39),
mcs_sched_node: McsNode::new(),
mcs_contention_count: Cell::new(0),
+ tlb_flush_start: AtomicU64::new(0),
+ tlb_flush_count: AtomicU32::new(0),
+ pi_donated_prio: AtomicU32::new(u32::MAX),
+ current_prio: Cell::new(39),
ptrace_flags: Cell::new(PtraceFlags::empty()),
ptrace_session: RefCell::new(None),
inside_syscall: Cell::new(false),