Files
RedBear-OS/local/patches/base/P18-9-msi-allocation-resilience.patch
T
vasilito cee25393d8 fix: boot process improvements — dependency cycle, INIT_NOTIFY, probing loop, and log spam fixes
- Fix P15-8-init-cycle-detection.patch: replace visiting+error with seen+silent-skip
  to eliminate 11 false-positive 'dependency cycle detected' errors on shared deps
- Fix P0-daemon-fix-init-notify-unwrap.patch: remove eprintln! for missing
  INIT_NOTIFY (expected for oneshot_async services, ~7 daemons affected)
- Fix driver-manager hotplug loop: add PERMANENTLY_SKIPPED static set shared
  between hotplug handler and DriverConfig::probe() to stop infinite re-probing
  of Fatal/NotSupported/deferred-exhausted device+driver pairs (e.g. ided)
- Fix driver-manager log_timeline: suppress repeated EPIPE/ENOENT errors with
  AtomicI32 dedup and AtomicBool one-shot guards for boot timeline JSON
- Add driver-manager SIGTERM handler, ACPI bus registration, --status mode,
  driver reap loop, graceful shutdown, and reduced deferred retries (30→3)
2026-05-17 12:34:02 +03:00

308 lines
11 KiB
Diff

--- a/drivers/pcid/src/driver_interface/irq_helpers.rs
+++ b/drivers/pcid/src/driver_interface/irq_helpers.rs
@@ -118,7 +118,7 @@
let mut handles = Vec::with_capacity(usize::from(count));
let mut index = 0;
- let mut first = None;
+ let mut first_aligned: Option<u8> = None;
while let Some(number) = available_irq_numbers.next() {
let number = number?;
@@ -127,8 +127,8 @@
if number % u8::from(alignment) != 0 {
continue;
}
- let first = *first.get_or_insert(number);
- let irq_number = first + index;
+ let base = *first_aligned.get_or_insert(number);
+ let irq_number = base + index;
// From the point where the range is aligned, we can start to advance until `count` IRQs
// have been allocated.
@@ -141,6 +141,15 @@
match File::create(format!("/scheme/irq/cpu-{:02x}/{}", cpu_id, irq_number)) {
Ok(handle) => handle,
+ // Vector already allocated by another process; release any partial range and
+ // restart the search from the next aligned position.
+ Err(err) if err.kind() == io::ErrorKind::AlreadyExists => {
+ drop(handles.drain(..));
+ first_aligned = None;
+ index = 0;
+ continue;
+ }
+
// return early if the entire range couldn't be allocated
Err(err) if err.kind() == io::ErrorKind::NotFound => break,
@@ -152,7 +161,7 @@
if handles.is_empty() {
return Ok(None);
}
- let first = match first {
+ let first = match first_aligned {
Some(f) => f,
None => return Ok(None),
};
@@ -180,40 +189,60 @@
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-pub fn allocate_single_interrupt_vector_for_msi(cpu_id: usize) -> (MsiAddrAndData, File) {
+pub fn allocate_single_interrupt_vector_for_msi(cpu_id: usize) -> Option<(MsiAddrAndData, File)> {
use crate::driver_interface::msi::x86 as x86_msix;
// FIXME for cpu_id >255 we need to use the IOMMU to use IRQ remapping
- let lapic_id = u8::try_from(cpu_id).expect("CPU id couldn't fit inside u8");
+ let lapic_id = match u8::try_from(cpu_id) {
+ Ok(id) => id,
+ Err(_) => {
+ log::warn!("cpu_id {} too large for MSI address format", cpu_id);
+ return None;
+ }
+ };
let rh = false;
let dm = false;
let addr = x86_msix::message_address(lapic_id, rh, dm);
- let (vector, interrupt_handle) = allocate_single_interrupt_vector(cpu_id)
- .expect("failed to allocate interrupt vector")
- .expect("no interrupt vectors left");
+ let (vector, interrupt_handle) = match allocate_single_interrupt_vector(cpu_id) {
+ Ok(Some(result)) => result,
+ Ok(None) => {
+ log::warn!("no interrupt vectors available for MSI on CPU {}", cpu_id);
+ return None;
+ }
+ Err(err) => {
+ log::warn!("failed to allocate interrupt vector for MSI on CPU {}: {}", cpu_id, err);
+ return None;
+ }
+ };
let msg_data = x86_msix::message_data_edge_triggered(x86_msix::DeliveryMode::Fixed, vector);
- (
+ Some((
MsiAddrAndData {
addr,
data: msg_data,
},
interrupt_handle,
- )
+ ))
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn allocate_first_msi_interrupt_on_bsp(
pcid_handle: &mut crate::driver_interface::PciFunctionHandle,
-) -> File {
+) -> Option<File> {
use crate::driver_interface::{MsiSetFeatureInfo, PciFeature, SetFeatureInfo};
// TODO: Allow allocation of up to 32 vectors.
- let destination_id = read_bsp_apic_id().expect("failed to read BSP apic id");
+ let destination_id = match read_bsp_apic_id() {
+ Ok(id) => id,
+ Err(err) => {
+ log::warn!("failed to read BSP APIC ID: {}", err);
+ return None;
+ }
+ };
let (msg_addr_and_data, interrupt_handle) =
- allocate_single_interrupt_vector_for_msi(destination_id);
+ allocate_single_interrupt_vector_for_msi(destination_id)?;
let set_feature_info = MsiSetFeatureInfo {
multi_message_enable: Some(0),
@@ -225,7 +254,7 @@
pcid_handle.enable_feature(PciFeature::Msi);
log::debug!("Enabled MSI");
- interrupt_handle
+ Some(interrupt_handle)
}
pub struct InterruptVector {
@@ -275,6 +304,7 @@
let has_msi = features.iter().any(|feature| feature.is_msi());
let has_msix = features.iter().any(|feature| feature.is_msix());
+ // Try MSI-X first, then MSI, then fall back to legacy INTx#.
if has_msix {
let msix_info = match pcid_handle.feature_info(super::PciFeature::MsiX) {
super::PciFeatureInfo::MsiX(msix) => msix,
@@ -282,28 +312,45 @@
};
let mut info = unsafe { msix_info.map_and_mask_all(pcid_handle) };
- pcid_handle.enable_feature(crate::driver_interface::PciFeature::MsiX);
-
let entry = info.table_entry_pointer(0);
let bsp_cpu_id = read_bsp_apic_id()
.unwrap_or_else(|err| panic!("{driver}: failed to read BSP APIC ID: {err}"));
- let (msg_addr_and_data, irq_handle) = allocate_single_interrupt_vector_for_msi(bsp_cpu_id);
- entry.write_addr_and_data(msg_addr_and_data);
- entry.unmask();
- InterruptVector {
- irq_handle,
- vector: 0,
- kind: InterruptVectorKind::MsiX { table_entry: entry },
+ if let Some((msg_addr_and_data, irq_handle)) =
+ allocate_single_interrupt_vector_for_msi(bsp_cpu_id)
+ {
+ // Vector allocated: enable MSI-X and configure the table entry.
+ pcid_handle.enable_feature(crate::driver_interface::PciFeature::MsiX);
+ entry.write_addr_and_data(msg_addr_and_data);
+ entry.unmask();
+
+ return InterruptVector {
+ irq_handle,
+ vector: 0,
+ kind: InterruptVectorKind::MsiX { table_entry: entry },
+ };
}
- } else if has_msi {
- InterruptVector {
- irq_handle: allocate_first_msi_interrupt_on_bsp(pcid_handle),
- vector: 0,
- kind: InterruptVectorKind::Msi,
+
+ // MSI-X vector allocation failed; MSI-X was never enabled in config space so the
+ // device will fall back to INTx# or MSI. Mapped BARs are released when `info` drops.
+ log::warn!("{driver}: MSI-X vector allocation failed, falling back");
+ }
+
+ if has_msi {
+ if let Some(irq_handle) = allocate_first_msi_interrupt_on_bsp(pcid_handle) {
+ return InterruptVector {
+ irq_handle,
+ vector: 0,
+ kind: InterruptVectorKind::Msi,
+ };
}
- } else if let Some(irq) = pcid_handle.config().func.legacy_interrupt_line {
+
+ // MSI allocation failed; fall back to legacy.
+ log::warn!("{driver}: MSI allocation failed, falling back to legacy");
+ }
+
+ if let Some(irq) = pcid_handle.config().func.legacy_interrupt_line {
// INTx# pin based interrupts.
InterruptVector {
irq_handle: irq.irq_handle(driver),
--- a/drivers/virtio-core/src/transport.rs
+++ b/drivers/virtio-core/src/transport.rs
@@ -19,6 +19,8 @@
SyscallError(#[from] libredox::error::Error),
#[error("the device is incapable of {0:?}")]
InCapable(CfgType),
+ #[error("MSI/MSI-X vector allocation failed")]
+ MsiAllocationFailed,
}
/// Returns the queue part sizes in bytes.
--- a/drivers/virtio-core/src/arch/x86.rs
+++ b/drivers/virtio-core/src/arch/x86.rs
@@ -23,7 +23,8 @@
let destination_id = read_bsp_apic_id().expect("virtio_core: `read_bsp_apic_id()` failed");
let (msg_addr_and_data, interrupt_handle) =
- allocate_single_interrupt_vector_for_msi(destination_id);
+ allocate_single_interrupt_vector_for_msi(destination_id)
+ .ok_or(Error::MsiAllocationFailed)?;
table_entry_pointer.write_addr_and_data(msg_addr_and_data);
table_entry_pointer.unmask();
--- a/drivers/net/virtio-netd/src/main.rs
+++ b/drivers/net/virtio-netd/src/main.rs
@@ -31,8 +31,13 @@
}
fn daemon_runner(daemon: daemon::Daemon, pcid_handle: PciFunctionHandle) -> ! {
- deamon(daemon, pcid_handle).unwrap();
- unreachable!();
+ match deamon(daemon, pcid_handle) {
+ Ok(()) => unreachable!(),
+ Err(err) => {
+ log::error!("virtio-netd: fatal error: {err}");
+ std::process::exit(1);
+ }
+ }
}
fn deamon(
--- a/drivers/storage/virtio-blkd/src/main.rs
+++ b/drivers/storage/virtio-blkd/src/main.rs
@@ -103,8 +103,13 @@
}
fn daemon_runner(redox_daemon: daemon::Daemon, pcid_handle: PciFunctionHandle) -> ! {
- daemon(redox_daemon, pcid_handle).unwrap();
- unreachable!();
+ match daemon(redox_daemon, pcid_handle) {
+ Ok(()) => unreachable!(),
+ Err(err) => {
+ log::error!("virtio-blkd: fatal error: {err}");
+ std::process::exit(1);
+ }
+ }
}
fn daemon(daemon: daemon::Daemon, mut pcid_handle: PciFunctionHandle) -> anyhow::Result<()> {
--- a/drivers/usb/xhcid/src/main.rs
+++ b/drivers/usb/xhcid/src/main.rs
@@ -76,22 +76,32 @@
let table_entry_pointer = info.table_entry_pointer(k);
let destination_id = read_bsp_apic_id().expect("xhcid: failed to read BSP apic id");
- let (msg_addr_and_data, interrupt_handle) =
- allocate_single_interrupt_vector_for_msi(destination_id);
- table_entry_pointer.write_addr_and_data(msg_addr_and_data);
- table_entry_pointer.unmask();
- (Some(interrupt_handle), InterruptMethod::Msi)
+ if let Some((msg_addr_and_data, interrupt_handle)) =
+ allocate_single_interrupt_vector_for_msi(destination_id)
+ {
+ table_entry_pointer.write_addr_and_data(msg_addr_and_data);
+ table_entry_pointer.unmask();
+
+ pcid_handle.enable_feature(PciFeature::MsiX);
+ log::debug!("Enabled MSI-X");
+
+ return (Some(interrupt_handle), InterruptMethod::Msi);
+ }
+
+ // MSI-X allocation failed; fall through to MSI or legacy.
+ log::warn!("xhcid: MSI-X vector allocation failed, falling back");
};
+ }
- pcid_handle.enable_feature(PciFeature::MsiX);
- log::debug!("Enabled MSI-X");
+ if has_msi {
+ if let Some(interrupt_handle) = allocate_first_msi_interrupt_on_bsp(pcid_handle) {
+ return (Some(interrupt_handle), InterruptMethod::Msi);
+ }
+ log::warn!("xhcid: MSI allocation failed, falling back to legacy");
+ }
- method
- } else if has_msi {
- let interrupt_handle = allocate_first_msi_interrupt_on_bsp(pcid_handle);
- (Some(interrupt_handle), InterruptMethod::Msi)
- } else if let Some(irq) = pci_config.func.legacy_interrupt_line {
+ if let Some(irq) = pci_config.func.legacy_interrupt_line {
log::debug!("Legacy IRQ {}", irq);
// legacy INTx# interrupt pins.