cee25393d8
- Fix P15-8-init-cycle-detection.patch: replace visiting+error with seen+silent-skip to eliminate 11 false-positive 'dependency cycle detected' errors on shared deps - Fix P0-daemon-fix-init-notify-unwrap.patch: remove eprintln! for missing INIT_NOTIFY (expected for oneshot_async services, ~7 daemons affected) - Fix driver-manager hotplug loop: add PERMANENTLY_SKIPPED static set shared between hotplug handler and DriverConfig::probe() to stop infinite re-probing of Fatal/NotSupported/deferred-exhausted device+driver pairs (e.g. ided) - Fix driver-manager log_timeline: suppress repeated EPIPE/ENOENT errors with AtomicI32 dedup and AtomicBool one-shot guards for boot timeline JSON - Add driver-manager SIGTERM handler, ACPI bus registration, --status mode, driver reap loop, graceful shutdown, and reduced deferred retries (30→3)
122 lines
5.2 KiB
Diff
122 lines
5.2 KiB
Diff
--- a/src/sync/mcs.rs
|
|
+++ b/src/sync/mcs.rs
|
|
@@ -3,12 +3,21 @@
|
|
//! Each waiter spins on its own local `locked` flag instead of a shared lock
|
|
//! word, eliminating cache-line bouncing under contention. FIFO ordering
|
|
//! guarantees fairness. O(1) cache-line transfers on unlock.
|
|
+//!
|
|
+//! Supports transitive priority inheritance: when CPU A waits on a lock held
|
|
+//! by CPU B, and CPU B waits on a lock held by CPU C, A's priority is
|
|
+//! propagated through the chain to C (up to MAX_PI_CHAIN_DEPTH hops).
|
|
|
|
use core::sync::atomic::{AtomicBool, AtomicPtr, AtomicU32, Ordering};
|
|
use core::{hint, ptr};
|
|
|
|
use crate::percpu::PercpuBlock;
|
|
|
|
+/// Maximum depth for transitive priority inheritance chain following.
|
|
+/// Prevents infinite loops from theoretical lock cycles and bounds latency.
|
|
+/// Linux uses 20; 8 is conservative for a microkernel with fewer nesting levels.
|
|
+const MAX_PI_CHAIN_DEPTH: u32 = 8;
|
|
+
|
|
/// A node in the MCS lock queue.
|
|
pub struct McsNode {
|
|
pub next: AtomicPtr<McsNode>,
|
|
@@ -55,17 +64,23 @@
|
|
(*prev).next.store((node as *const McsNode).cast_mut(), Ordering::Release);
|
|
}
|
|
let percpu = PercpuBlock::current();
|
|
+ // Record which lock we're spinning on (for transitive PI chain following)
|
|
+ percpu.waiting_on_lock.store(
|
|
+ (self as *const McsRawLock).cast_mut(),
|
|
+ Ordering::Release,
|
|
+ );
|
|
let mut donated = false;
|
|
while node.locked.load(Ordering::Acquire) {
|
|
percpu.maybe_handle_tlb_shootdown();
|
|
- // Donate priority to the lock holder once per acquisition
|
|
+ // Donate priority to the lock holder (transitively) once per acquisition
|
|
if !donated {
|
|
self.maybe_donate_priority(percpu);
|
|
donated = true;
|
|
}
|
|
hint::spin_loop();
|
|
}
|
|
- // We now hold the lock
|
|
+ // Clear waiting_on_lock before proceeding — we now hold the lock
|
|
+ percpu.waiting_on_lock.store(ptr::null_mut(), Ordering::Release);
|
|
self.holder_cpu.store(percpu.cpu_id.get(), Ordering::Release);
|
|
true
|
|
}
|
|
@@ -120,27 +135,54 @@
|
|
ok
|
|
}
|
|
|
|
- /// Donate current CPU's context priority to the lock holder's CPU.
|
|
+ /// Donate current CPU's context priority to the lock holder's CPU,
|
|
+ /// following the PI chain transitively (A→B→C).
|
|
+ ///
|
|
/// Reads priority from PercpuBlock::current_prio (cached by the scheduler)
|
|
/// to avoid acquiring any lock in the MCS spin loop.
|
|
+ ///
|
|
+ /// Chain following: if the holder is itself waiting on another lock,
|
|
+ /// we propagate our priority to that lock's holder too, up to
|
|
+ /// MAX_PI_CHAIN_DEPTH hops.
|
|
fn maybe_donate_priority(&self, my_percpu: &PercpuBlock) {
|
|
- let holder_cpu_id = self.holder_cpu.load(Ordering::Relaxed);
|
|
- if holder_cpu_id == u32::MAX {
|
|
- return;
|
|
- }
|
|
- // Read our own priority from the per-CPU cache (set by scheduler,
|
|
- // no lock required).
|
|
- let my_prio = my_percpu.current_prio.get();
|
|
- // Look up holder's PercpuBlock
|
|
- let holder_percpu = crate::percpu::get_for_cpu(
|
|
- crate::cpu_set::LogicalCpuId::new(holder_cpu_id),
|
|
- );
|
|
- if let Some(holder) = holder_percpu {
|
|
+ let my_prio = my_percpu.current_prio.get() as u32;
|
|
+ let mut current_holder_cpu = self.holder_cpu.load(Ordering::Relaxed);
|
|
+
|
|
+ for _ in 0..MAX_PI_CHAIN_DEPTH {
|
|
+ if current_holder_cpu == u32::MAX {
|
|
+ return;
|
|
+ }
|
|
+ let holder_percpu = crate::percpu::get_for_cpu(
|
|
+ crate::cpu_set::LogicalCpuId::new(current_holder_cpu),
|
|
+ );
|
|
+ let Some(holder) = holder_percpu else {
|
|
+ return;
|
|
+ };
|
|
+
|
|
+ // Donate if our priority is higher (lower number) than current donation
|
|
let current_donated = holder.pi_donated_prio.load(Ordering::Relaxed);
|
|
- // Donate if our priority is higher (lower number)
|
|
- if (my_prio as u32) < current_donated {
|
|
- holder.pi_donated_prio.store(my_prio as u32, Ordering::Release);
|
|
+ if my_prio < current_donated {
|
|
+ holder.pi_donated_prio.store(my_prio, Ordering::Release);
|
|
+ }
|
|
+
|
|
+ // Follow the chain: is this holder also waiting on another lock?
|
|
+ let next_lock_ptr = holder.waiting_on_lock.load(Ordering::Relaxed);
|
|
+ if next_lock_ptr.is_null() {
|
|
+ return;
|
|
+ }
|
|
+ // SAFETY: The pointed-to McsRawLock is a long-lived struct field
|
|
+ // (e.g., part of the run queue). The holder is currently spinning
|
|
+ // in acquire(), so the pointer is valid. We only read holder_cpu
|
|
+ // (an atomic u32) — no mutable access needed.
|
|
+ let next_holder_cpu =
|
|
+ unsafe { (*next_lock_ptr).holder_cpu.load(Ordering::Relaxed) };
|
|
+
|
|
+ // Cycle detection: if the next holder is the same CPU we just visited, stop
|
|
+ if next_holder_cpu == current_holder_cpu {
|
|
+ return;
|
|
}
|
|
+ current_holder_cpu = next_holder_cpu;
|
|
}
|
|
+ // Chain depth exhausted — stop to bound latency
|
|
}
|
|
}
|