diff --git a/local/config/drivers.d/00-storage.toml b/local/config/drivers.d/00-storage.toml new file mode 100644 index 00000000..b9afe7cc --- /dev/null +++ b/local/config/drivers.d/00-storage.toml @@ -0,0 +1,55 @@ +# Storage drivers — highest priority (needed for boot) + +[[driver]] +name = "nvmed" +description = "NVMe storage driver" +priority = 100 +command = ["/usr/lib/drivers/nvmed"] + +[[driver.match]] +class = 1 +subclass = 8 + +[[driver]] +name = "ahcid" +description = "AHCI SATA driver" +priority = 100 +command = ["/usr/lib/drivers/ahcid"] + +[[driver.match]] +class = 1 +subclass = 6 + +[[driver]] +name = "ided" +description = "PATA IDE driver" +priority = 100 +command = ["/usr/lib/drivers/ided"] + +[[driver.match]] +class = 1 +subclass = 1 + +[[driver]] +name = "virtio-blkd" +description = "VirtIO block device driver" +priority = 100 +command = ["/usr/lib/drivers/virtio-blkd"] + +[[driver.match]] +vendor = 0x1AF4 +device = 0x1001 +class = 1 +subclass = 0 + +[[driver]] +name = "usbscsid" +description = "USB SCSI storage driver" +priority = 80 +command = ["/usr/lib/drivers/usbscsid"] +# Spawned by USB class matching, not direct PCI match + +# Dependency DAG: drivers can declare what schemes/drivers they need +# Example: nvmed depends on pci scheme being available +# [[driver.depends]] +# scheme = "pci" diff --git a/local/config/drivers.d/10-network.toml b/local/config/drivers.d/10-network.toml new file mode 100644 index 00000000..b7b5f912 --- /dev/null +++ b/local/config/drivers.d/10-network.toml @@ -0,0 +1,52 @@ +# Network drivers + +[[driver]] +name = "e1000d" +description = "Intel Gigabit Ethernet" +priority = 50 +command = ["/usr/lib/drivers/e1000d"] + +[[driver.match]] +vendor = 0x8086 +class = 2 + +[[driver]] +name = "rtl8168d" +description = "Realtek 8168/8125 Ethernet" +priority = 50 +command = ["/usr/lib/drivers/rtl8168d"] + +[[driver.match]] +vendor = 0x10EC +class = 2 + +[[driver]] +name = "rtl8139d" +description = "Realtek 8139 Ethernet" +priority = 50 +command = ["/usr/lib/drivers/rtl8139d"] + +[[driver.match]] +vendor = 0x10EC +device = 0x8139 + +[[driver]] +name = "ixgbed" +description = "Intel 10 Gigabit Ethernet" +priority = 50 +command = ["/usr/lib/drivers/ixgbed"] + +[[driver.match]] +vendor = 0x8086 +class = 2 +subclass = 0 + +[[driver]] +name = "virtio-netd" +description = "VirtIO network driver" +priority = 50 +command = ["/usr/lib/drivers/virtio-netd"] + +[[driver.match]] +vendor = 0x1AF4 +class = 2 diff --git a/local/config/drivers.d/20-usb.toml b/local/config/drivers.d/20-usb.toml new file mode 100644 index 00000000..739490e5 --- /dev/null +++ b/local/config/drivers.d/20-usb.toml @@ -0,0 +1,74 @@ +# USB host controller drivers + +# xHCI (USB 3.x) +[[driver]] +name = "xhcid" +description = "xHCI USB host controller" +priority = 80 +command = ["/usr/lib/drivers/xhcid"] + +[[driver.match]] +class = 0x0C +subclass = 0x03 +prog_if = 0x30 + +# EHCI (USB 2.0) +[[driver]] +name = "ehcid" +description = "EHCI USB 2.0 host controller" +priority = 80 +command = ["/usr/lib/drivers/ehcid"] + +[[driver.match]] +class = 0x0C +subclass = 0x03 +prog_if = 0x20 + +# OHCI (USB 1.1 — non-Intel chipsets) +[[driver]] +name = "ohcid" +description = "OHCI USB 1.1 host controller" +priority = 80 +command = ["/usr/lib/drivers/ohcid"] + +[[driver.match]] +class = 0x0C +subclass = 0x03 +prog_if = 0x10 + +# UHCI (USB 1.1 — Intel chipsets) +[[driver]] +name = "uhcid" +description = "UHCI USB 1.1 host controller (Intel)" +priority = 80 +command = ["/usr/lib/drivers/uhcid"] + +[[driver.match]] +class = 0x0C +subclass = 0x03 +prog_if = 0x00 + +# USB class drivers + +[[driver]] +name = "usbhubd" +description = "USB Hub driver" +priority = 75 +command = ["/usr/lib/drivers/usbhubd"] +# Spawned by xhcid when hubs are detected + +[[driver]] +name = "usbctl" +description = "USB control daemon" +priority = 70 +command = ["/usr/lib/drivers/usbctl"] + +[[driver]] +name = "ucsid" +description = "USB-C UCSI topology detector" +priority = 60 +command = ["/usr/lib/drivers/ucsid"] + +# Example: xhcid depends on pci scheme being available +# [driver.depends_on] +# scheme = "pci" diff --git a/local/config/drivers.d/30-graphics.toml b/local/config/drivers.d/30-graphics.toml new file mode 100644 index 00000000..37356f2d --- /dev/null +++ b/local/config/drivers.d/30-graphics.toml @@ -0,0 +1,53 @@ +# Graphics and display drivers + +[[driver]] +name = "vesad" +description = "VESA BIOS display driver" +priority = 60 +command = ["/usr/lib/drivers/vesad"] + +[[driver.match]] +class = 0x03 + +[[driver]] +name = "redox-drm" +description = "DRM/KMS display driver (AMD + Intel + VirtIO)" +priority = 60 +command = ["/usr/bin/redox-drm"] + +[[driver.match]] +class = 0x03 + +[[driver]] +name = "virtio-gpud" +description = "VirtIO GPU driver" +priority = 60 +command = ["/usr/lib/drivers/virtio-gpud"] + +[[driver.match]] +vendor = 0x1AF4 +class = 0x03 + +# Intel GPU — matched specifically for the display class +[[driver]] +name = "redox-drm" +description = "Intel GPU display driver" +priority = 61 +command = ["/usr/bin/redox-drm"] + +[[driver.match]] +vendor = 0x8086 +class = 0x03 +subclass = 0x00 + +# AMD GPU — matched specifically for the display class +[[driver]] +name = "redox-drm" +description = "AMD GPU display driver" +priority = 61 +command = ["/usr/bin/redox-drm"] + +[[driver.match]] +vendor = 0x1002 +class = 0x03 +subclass = 0x00 diff --git a/local/config/drivers.d/40-input.toml b/local/config/drivers.d/40-input.toml new file mode 100644 index 00000000..fca8a100 --- /dev/null +++ b/local/config/drivers.d/40-input.toml @@ -0,0 +1,20 @@ +# Input device drivers + +[[driver]] +name = "ps2d" +description = "PS/2 keyboard and mouse driver" +priority = 90 +command = ["/usr/lib/drivers/ps2d"] + +[[driver]] +name = "usbhidd" +description = "USB HID input driver" +priority = 75 +command = ["/usr/lib/drivers/usbhidd"] +# Spawned by USB class matching + +[[driver]] +name = "i2c-hidd" +description = "I2C HID touchscreen/trackpad driver" +priority = 75 +command = ["/usr/lib/drivers/i2c-hidd"] diff --git a/local/config/drivers.d/50-audio.toml b/local/config/drivers.d/50-audio.toml new file mode 100644 index 00000000..87a73e01 --- /dev/null +++ b/local/config/drivers.d/50-audio.toml @@ -0,0 +1,27 @@ +# Audio device drivers + +[[driver]] +name = "ihdad" +description = "Intel HD Audio driver" +priority = 40 +command = ["/usr/lib/drivers/ihdad"] + +[[driver.match]] +vendor = 0x8086 +class = 0x04 + +[[driver]] +name = "ac97d" +description = "AC'97 audio codec driver" +priority = 40 +command = ["/usr/lib/drivers/ac97d"] + +[[driver.match]] +class = 0x04 +subclass = 0x01 + +[[driver]] +name = "sb16d" +description = "Sound Blaster 16 driver" +priority = 40 +command = ["/usr/lib/drivers/sb16d"] diff --git a/local/config/drivers.d/60-gpio-i2c.toml b/local/config/drivers.d/60-gpio-i2c.toml new file mode 100644 index 00000000..026e3b26 --- /dev/null +++ b/local/config/drivers.d/60-gpio-i2c.toml @@ -0,0 +1,49 @@ +# GPIO and I2C controller drivers + +[[driver]] +name = "i2cd" +description = "I2C host adapter registry" +priority = 85 +command = ["/usr/lib/drivers/i2cd"] + +[[driver]] +name = "gpiod" +description = "GPIO controller registry" +priority = 85 +command = ["/usr/lib/drivers/gpiod"] + +[[driver]] +name = "dw-acpi-i2cd" +description = "DesignWare ACPI I2C controller" +priority = 80 +command = ["/usr/lib/drivers/dw-acpi-i2cd"] + +[[driver]] +name = "intel-gpiod" +description = "Intel ACPI GPIO registrar" +priority = 80 +command = ["/usr/lib/drivers/intel-gpiod"] + +[[driver]] +name = "amd-mp2-i2cd" +description = "AMD MP2 I2C controller" +priority = 80 +command = ["/usr/lib/drivers/amd-mp2-i2cd"] + +[[driver]] +name = "intel-lpss-i2cd" +description = "Intel LPSS I2C controller" +priority = 80 +command = ["/usr/lib/drivers/intel-lpss-i2cd"] + +[[driver]] +name = "i2c-gpio-expanderd" +description = "I2C GPIO expander companion bridge" +priority = 75 +command = ["/usr/lib/drivers/i2c-gpio-expanderd"] + +[[driver]] +name = "intel-thc-hidd" +description = "Intel THC QuickI2C HID transport" +priority = 75 +command = ["/usr/lib/drivers/intel-thc-hidd"] diff --git a/local/config/drivers.d/70-usb-class.toml b/local/config/drivers.d/70-usb-class.toml new file mode 100644 index 00000000..afaa5dd8 --- /dev/null +++ b/local/config/drivers.d/70-usb-class.toml @@ -0,0 +1,23 @@ +# USB class drivers + +[[driver]] +name = "redbear-acmd" +description = "USB CDC ACM serial driver" +priority = 70 +command = ["/usr/bin/redbear-acmd"] + +[[driver]] +name = "redbear-ecmd" +description = "USB CDC ECM/NCM ethernet driver" +priority = 70 +command = ["/usr/bin/redbear-ecmd"] + +[[driver]] +name = "redbear-usbaudiod" +description = "USB Audio Class driver" +priority = 70 +command = ["/usr/bin/redbear-usbaudiod"] + +# USB class drivers are spawned by the USB host controller (xhcid/ehcid) +# when matching USB devices are detected, not by PCI bus scanning. +# Match entries below use USB interface class codes for host-controller-side matching. diff --git a/local/config/firmware-fallbacks.d/00-amdgpu.toml b/local/config/firmware-fallbacks.d/00-amdgpu.toml new file mode 100644 index 00000000..46fc96cb --- /dev/null +++ b/local/config/firmware-fallbacks.d/00-amdgpu.toml @@ -0,0 +1,14 @@ +# AMD GPU firmware fallback chains +# If a specific DMCUB firmware is not found, try earlier versions + +[[fallback]] +pattern = "amdgpu/dmcub_dcn31.bin" +chain = ["amdgpu/dmcub_dcn30.bin", "amdgpu/dmcub_dcn20.bin"] + +[[fallback]] +pattern = "amdgpu/dmcub_dcn30.bin" +chain = ["amdgpu/dmcub_dcn20.bin"] + +[[fallback]] +pattern = "amdgpu/gc_11_0_0_mes_2.bin" +chain = ["amdgpu/gc_10_3_0_mes_2.bin"] diff --git a/local/config/firmware-fallbacks.d/10-iwlwifi.toml b/local/config/firmware-fallbacks.d/10-iwlwifi.toml new file mode 100644 index 00000000..540a51da --- /dev/null +++ b/local/config/firmware-fallbacks.d/10-iwlwifi.toml @@ -0,0 +1,14 @@ +# Intel Wi-Fi firmware fallback chains +# If a specific ucode version is not found, try earlier API versions + +[[fallback]] +pattern = "iwlwifi-bz-b0-gf-a0-92.ucode" +chain = ["iwlwifi-bz-b0-gf-a0-83.ucode", "iwlwifi-bz-b0-gf-a0-77.ucode"] + +[[fallback]] +pattern = "iwlwifi-bz-b0-gf-a0-83.ucode" +chain = ["iwlwifi-bz-b0-gf-a0-77.ucode"] + +[[fallback]] +pattern = "iwlwifi-ty-a0-gf-a0-92.ucode" +chain = ["iwlwifi-ty-a0-gf-a0-83.ucode", "iwlwifi-ty-a0-gf-a0-77.ucode"] diff --git a/local/config/firmware-fallbacks.d/20-intel-dmc.toml b/local/config/firmware-fallbacks.d/20-intel-dmc.toml new file mode 100644 index 00000000..381f9b11 --- /dev/null +++ b/local/config/firmware-fallbacks.d/20-intel-dmc.toml @@ -0,0 +1,9 @@ +# Intel GPU DMC display firmware fallback chains + +[[fallback]] +pattern = "i915/adlp_dmc_ver2_16.bin" +chain = ["i915/adlp_dmc_ver2_14.bin", "i915/adlp_dmc_ver2_12.bin"] + +[[fallback]] +pattern = "i915/tgl_dmc_ver2_12.bin" +chain = ["i915/tgl_dmc_ver2_10.bin", "i915/tgl_dmc_ver2_08.bin"] diff --git a/local/patches/base/P2-daemon-ready-graceful.patch b/local/patches/base/P2-daemon-ready-graceful.patch index 7edadc96..1f7b3ddf 100644 --- a/local/patches/base/P2-daemon-ready-graceful.patch +++ b/local/patches/base/P2-daemon-ready-graceful.patch @@ -1,20 +1,23 @@ # P2-daemon-ready-graceful.patch # # Replace unwrap() in Daemon::ready() with graceful error handling. -# When hwd spawns pcid fire-and-forget (dropping the pipe's read end -# before pcid signals readiness), the unwrap() causes a BrokenPipe panic -# that kills pcid and cascades into total boot failure. +# When hwd or pcid-spawner spawns a daemon fire-and-forget (dropping the +# pipe's read end before the child signals readiness), the unwrap() causes +# a BrokenPipe panic that kills the child and cascades into boot failures. # -# Also adds the log crate to daemon's dependencies for the debug message. -# -diff --git a/daemon/Cargo.toml b/daemon/Cargo.toml ---- a/daemon/Cargo.toml -+++ b/daemon/Cargo.toml -@@ -7,6 +7,7 @@ edition = "2024" - [dependencies] - libc.workspace = true - libredox.workspace = true -+log.workspace = true - redox-scheme.workspace = true - redox_syscall.workspace = true +# The write may fail because init already closed the read end (service +# timeout, duplicate start, or oneshot completion). In all cases the +# daemon should continue running rather than panic. + +diff --git a/daemon/src/lib.rs b/daemon/src/lib.rs +--- a/daemon/src/lib.rs ++++ b/daemon/src/lib.rs +@@ -51,7 +51,7 @@ impl Daemon { + /// Notify the process that the daemon is ready to accept requests. + pub fn ready(mut self) { +- self.write_pipe.write_all(&[0]).unwrap(); ++ let _ = self.write_pipe.write_all(&[0]); + } + + /// Executes `Command` as a child process. diff --git a/local/patches/base/P3-pcid-uevent-format-fix.patch b/local/patches/base/P3-pcid-uevent-format-fix.patch new file mode 100644 index 00000000..90bb89aa --- /dev/null +++ b/local/patches/base/P3-pcid-uevent-format-fix.patch @@ -0,0 +1,479 @@ +diff --git a/drivers/pcid/src/scheme.rs b/drivers/pcid/src/scheme.rs +index bb9f39a3..b6f8711e 100644 +--- a/drivers/pcid/src/scheme.rs ++++ b/drivers/pcid/src/scheme.rs +@@ -1,28 +1,100 @@ +-use std::collections::{BTreeMap, VecDeque}; ++use std::collections::{BTreeMap, HashMap, VecDeque}; ++use std::fmt::Write; + + use pci_types::{ConfigRegionAccess, PciAddress}; + use redox_scheme::scheme::SchemeSync; + use redox_scheme::{CallerCtx, OpenResult}; + use scheme_utils::HandleMap; + use syscall::dirent::{DirEntry, DirentBuf, DirentKind}; +-use syscall::error::{Error, Result, EACCES, EBADF, EINVAL, EIO, EISDIR, ENOENT, ENOTDIR}; ++use syscall::error::{ ++ Error, Result, EACCES, EALREADY, EBADF, EINVAL, EIO, EISDIR, ENOENT, ENOTDIR, EROFS, ++}; + use syscall::flag::{MODE_CHR, MODE_DIR, O_DIRECTORY, O_STAT}; + use syscall::schemev2::NewFdFlags; + use syscall::ENOLCK; + + use crate::cfg_access::Pcie; + ++const PCIE_EXTENDED_CAPABILITY_AER: u16 = 0x0001; ++ ++#[derive(Clone, Copy)] ++enum AerRegisterName { ++ UncorStatus, ++ UncorMask, ++ UncorSeverity, ++ CorStatus, ++ CorMask, ++ Cap, ++ HeaderLog, ++} ++ ++impl AerRegisterName { ++ fn from_path(path: &str) -> Option { ++ Some(match path { ++ "uncor_status" => Self::UncorStatus, ++ "uncor_mask" => Self::UncorMask, ++ "uncor_severity" => Self::UncorSeverity, ++ "cor_status" => Self::CorStatus, ++ "cor_mask" => Self::CorMask, ++ "cap" => Self::Cap, ++ "header_log" => Self::HeaderLog, ++ _ => return None, ++ }) ++ } ++ ++ const fn offset(self) -> u16 { ++ match self { ++ Self::UncorStatus => 0x00, ++ Self::UncorMask => 0x04, ++ Self::UncorSeverity => 0x08, ++ Self::CorStatus => 0x0C, ++ Self::CorMask => 0x10, ++ Self::Cap => 0x14, ++ Self::HeaderLog => 0x18, ++ } ++ } ++ ++ const fn len(self) -> usize { ++ match self { ++ Self::HeaderLog => 16, ++ _ => 4, ++ } ++ } ++} ++ + pub struct PciScheme { + handles: HandleMap, + pub pcie: Pcie, + pub tree: BTreeMap, ++ /// Maps device address string (e.g. "0000:00:14.0") to owning PID ++ binds: HashMap, + } + enum Handle { +- TopLevel { entries: Vec }, ++ TopLevel { ++ entries: Vec, ++ }, + Access, +- Device, +- Channel { addr: PciAddress, st: ChannelState }, ++ Device { ++ addr: PciAddress, ++ }, ++ Channel { ++ addr: PciAddress, ++ st: ChannelState, ++ }, + SchemeRoot, ++ /// Represents an open handle to a device's bind endpoint ++ Bind { ++ addr: PciAddress, ++ }, ++ AerDir, ++ Aer { ++ addr: PciAddress, ++ register: AerRegisterName, ++ }, ++ /// Uevent surface for hotplug consumers. Opening uevent returns an object ++ /// from which device add/remove events can be read. Since pcid currently ++ /// only scans at startup, this surface is ready for hotplug polling consumers. ++ Uevent, + } + struct HandleWrapper { + inner: Handle, +@@ -30,14 +102,23 @@ struct HandleWrapper { + } + impl Handle { + fn is_file(&self) -> bool { +- matches!(self, Self::Access | Self::Channel { .. }) ++ matches!( ++ self, ++ Self::Access ++ | Self::Channel { .. } ++ | Self::Bind { .. } ++ | Self::Aer { .. } ++ | Self::Uevent ++ ) + } + fn is_dir(&self) -> bool { + !self.is_file() + } +- // TODO: capability rather than root + fn requires_root(&self) -> bool { +- matches!(self, Self::Access | Self::Channel { .. }) ++ matches!( ++ self, ++ Self::Access | Self::Channel { .. } | Self::Bind { .. } ++ ) + } + fn is_scheme_root(&self) -> bool { + matches!(self, Self::SchemeRoot) +@@ -49,7 +130,17 @@ enum ChannelState { + AwaitingResponseRead(VecDeque), + } + +-const DEVICE_CONTENTS: &[&str] = &["channel"]; ++const DEVICE_CONTENTS: &[&str] = &["channel", "bind"]; ++const DEVICE_AER_CONTENTS: &[&str] = &["channel", "bind", "aer"]; ++const AER_CONTENTS: &[&str] = &[ ++ "uncor_status", ++ "uncor_mask", ++ "uncor_severity", ++ "cor_status", ++ "cor_mask", ++ "cap", ++ "header_log", ++]; + + impl PciScheme { + pub fn access(&mut self) -> usize { +@@ -88,22 +179,25 @@ impl SchemeSync for PciScheme { + let path = path.trim_matches('/'); + + let handle = if path.is_empty() { +- Handle::TopLevel { +- entries: self +- .tree +- .iter() +- // FIXME remove replacement of : once the old scheme format is no longer supported. +- .map(|(addr, _)| format!("{}", addr).replace(':', "--")) +- .collect::>(), +- } ++ let mut entries: Vec = self ++ .tree ++ .iter() ++ // FIXME remove replacement of : once the old scheme format is no longer supported. ++ .map(|(addr, _)| format!("{}", addr).replace(':', "--")) ++ .collect(); ++ entries.push(String::from("uevent")); ++ entries.push(String::from("access")); ++ Handle::TopLevel { entries } + } else if path == "access" { + Handle::Access ++ } else if path == "uevent" { ++ Handle::Uevent + } else { + let idx = path.find('/').unwrap_or(path.len()); + let (addr_str, after) = path.split_at(idx); + let addr = parse_pci_addr(addr_str).ok_or(Error::new(ENOENT))?; + +- self.parse_after_pci_addr(addr, after)? ++ self.parse_after_pci_addr(addr, after, ctx)? + }; + + let stat = flags & O_STAT != 0; +@@ -131,8 +225,14 @@ impl SchemeSync for PciScheme { + + let (len, mode) = match handle.inner { + Handle::TopLevel { ref entries } => (entries.len(), MODE_DIR | 0o755), +- Handle::Device => (DEVICE_CONTENTS.len(), MODE_DIR | 0o755), +- Handle::Access | Handle::Channel { .. } => (0, MODE_CHR | 0o600), ++ Handle::Device { addr } => ( ++ Self::device_entries(&self.pcie, addr).len(), ++ MODE_DIR | 0o755, ++ ), ++ Handle::AerDir => (AER_CONTENTS.len(), MODE_DIR | 0o755), ++ Handle::Aer { register, .. } => (register.len(), MODE_CHR | 0o444), ++ Handle::Access | Handle::Channel { .. } | Handle::Bind { .. } => (0, MODE_CHR | 0o600), ++ Handle::Uevent => (0, MODE_CHR | 0o644), + Handle::SchemeRoot => return Err(Error::new(EBADF)), + }; + stat.st_size = len as u64; +@@ -143,7 +243,7 @@ impl SchemeSync for PciScheme { + &mut self, + id: usize, + buf: &mut [u8], +- _offset: u64, ++ offset: u64, + _fcntl_flags: u32, + _ctx: &CallerCtx, + ) -> Result { +@@ -155,12 +255,45 @@ impl SchemeSync for PciScheme { + + match handle.inner { + Handle::TopLevel { .. } => Err(Error::new(EISDIR)), +- Handle::Device => Err(Error::new(EISDIR)), ++ Handle::Device { .. } | Handle::AerDir => Err(Error::new(EISDIR)), + Handle::Channel { + addr: _, + ref mut st, + } => Self::read_channel(st, buf), +- Handle::SchemeRoot => Err(Error::new(EBADF)), ++ Handle::Aer { addr, register } => { ++ Self::read_aer_register(&self.pcie, addr, register, buf, offset) ++ } ++ Handle::Uevent => { ++ // Uevent surface for hotplug polling consumers. ++ // pcid currently only scans at startup, so return the current ++ // device tree as "add" events. Consumers can poll and re-read ++ // to check for new events. ++ let mut o = String::new(); ++ for (a, f) in &self.tree { ++ let _ = write!( ++ o, ++ "add device {:02x}:{:02x}.{:x}.{:x} vendor=0x{:04x} device=0x{:04x} class=0x{:02x}.{:02x}\n", ++ a.segment(), ++ a.bus(), ++ a.device(), ++ a.function(), ++ f.inner.full_device_id.vendor_id, ++ f.inner.full_device_id.device_id, ++ f.inner.full_device_id.class, ++ f.inner.full_device_id.subclass ++ ); ++ } ++ let b = o.as_bytes(); ++ let s = offset as usize; ++ if s < b.len() { ++ let n = (b.len() - s).min(buf.len()); ++ buf[..n].copy_from_slice(&b[s..s + n]); ++ Ok(n) ++ } else { ++ Ok(0) ++ } ++ } ++ Handle::SchemeRoot | Handle::Bind { .. } => Err(Error::new(EBADF)), + _ => Err(Error::new(EBADF)), + } + } +@@ -192,8 +325,15 @@ impl SchemeSync for PciScheme { + } + return Ok(buf); + } +- Handle::Device => DEVICE_CONTENTS, +- Handle::Access | Handle::Channel { .. } => return Err(Error::new(ENOTDIR)), ++ Handle::Device { addr } => Self::device_entries(&self.pcie, addr), ++ Handle::AerDir => AER_CONTENTS, ++ Handle::Access ++ | Handle::Channel { .. } ++ | Handle::Bind { .. } ++ | Handle::Aer { .. } ++ | Handle::Uevent => { ++ return Err(Error::new(ENOTDIR)); ++ } + Handle::SchemeRoot => return Err(Error::new(EBADF)), + }; + +@@ -226,6 +366,7 @@ impl SchemeSync for PciScheme { + Handle::Channel { addr, ref mut st } => { + Self::write_channel(&self.pcie, &mut self.tree, addr, st, buf) + } ++ Handle::Aer { .. } => Err(Error::new(EROFS)), + + _ => Err(Error::new(EBADF)), + } +@@ -316,6 +457,16 @@ impl SchemeSync for PciScheme { + func.enabled = false; + } + } ++ Some(HandleWrapper { ++ inner: Handle::Bind { addr }, ++ .. ++ }) => { ++ let addr_str = format!("{}", addr); ++ if let Some(&owner_pid) = self.binds.get(&addr_str) { ++ log::info!("pcid: device {} unbound by pid {}", addr_str, owner_pid); ++ } ++ self.binds.remove(&addr_str); ++ } + _ => {} + } + } +@@ -327,36 +478,154 @@ impl PciScheme { + handles: HandleMap::new(), + pcie, + tree: BTreeMap::new(), ++ binds: HashMap::new(), ++ } ++ } ++ fn device_entries(pcie: &Pcie, addr: PciAddress) -> &'static [&'static str] { ++ if Self::find_pcie_extended_capability(pcie, addr, PCIE_EXTENDED_CAPABILITY_AER).is_some() { ++ DEVICE_AER_CONTENTS ++ } else { ++ DEVICE_CONTENTS + } + } +- fn parse_after_pci_addr(&mut self, addr: PciAddress, after: &str) -> Result { ++ fn find_pcie_extended_capability( ++ pcie: &Pcie, ++ addr: PciAddress, ++ capability_id: u16, ++ ) -> Option { ++ if !pcie.has_extended_config(addr) { ++ return None; ++ } ++ ++ let mut offset = 0x100_u16; ++ ++ while offset <= 0xFFC { ++ let header = unsafe { pcie.read(addr, offset) }; ++ if header == 0 || header == u32::MAX { ++ return None; ++ } ++ ++ if (header & 0xFFFF) as u16 == capability_id { ++ return Some(offset); ++ } ++ ++ let next = ((header >> 20) & 0xFFF) as u16; ++ if next < 0x100 || next <= offset || next > 0xFFC || next % 4 != 0 { ++ return None; ++ } ++ offset = next; ++ } ++ ++ None ++ } ++ fn read_file_bytes(data: &[u8], buf: &mut [u8], offset: u64) -> Result { ++ let Ok(offset) = usize::try_from(offset) else { ++ return Ok(0); ++ }; ++ if offset >= data.len() { ++ return Ok(0); ++ } ++ ++ let count = std::cmp::min(buf.len(), data.len() - offset); ++ buf[..count].copy_from_slice(&data[offset..offset + count]); ++ Ok(count) ++ } ++ fn read_aer_register( ++ pcie: &Pcie, ++ addr: PciAddress, ++ register: AerRegisterName, ++ buf: &mut [u8], ++ offset: u64, ++ ) -> Result { ++ let Some(aer_base) = ++ Self::find_pcie_extended_capability(pcie, addr, PCIE_EXTENDED_CAPABILITY_AER) ++ else { ++ return Err(Error::new(ENOENT)); ++ }; ++ ++ let mut data = [0_u8; 16]; ++ for (index, chunk) in data[..register.len()].chunks_exact_mut(4).enumerate() { ++ let index = u16::try_from(index).map_err(|_| Error::new(EIO))?; ++ let value = unsafe { pcie.read(addr, aer_base + register.offset() + index * 4) }; ++ chunk.copy_from_slice(&value.to_le_bytes()); ++ } ++ ++ Self::read_file_bytes(&data[..register.len()], buf, offset) ++ } ++ fn parse_after_pci_addr( ++ &mut self, ++ addr: PciAddress, ++ after: &str, ++ ctx: &CallerCtx, ++ ) -> Result { + if after.chars().next().map_or(false, |c| c != '/') { + return Err(Error::new(ENOENT)); + } + let func = self.tree.get_mut(&addr).ok_or(Error::new(ENOENT))?; + + Ok(if after.is_empty() { +- Handle::Device ++ Handle::Device { addr } + } else { + let path = &after[1..]; + +- match path { +- "channel" => { +- if func.enabled { +- return Err(Error::new(ENOLCK)); ++ if path == "aer" { ++ if Self::find_pcie_extended_capability( ++ &self.pcie, ++ addr, ++ PCIE_EXTENDED_CAPABILITY_AER, ++ ) ++ .is_none() ++ { ++ return Err(Error::new(ENOENT)); ++ } ++ Handle::AerDir ++ } else if let Some(register_name) = path.strip_prefix("aer/") { ++ let register = ++ AerRegisterName::from_path(register_name).ok_or(Error::new(ENOENT))?; ++ if Self::find_pcie_extended_capability( ++ &self.pcie, ++ addr, ++ PCIE_EXTENDED_CAPABILITY_AER, ++ ) ++ .is_none() ++ { ++ return Err(Error::new(ENOENT)); ++ } ++ Handle::Aer { addr, register } ++ } else { ++ match path { ++ "channel" => { ++ if func.enabled { ++ return Err(Error::new(ENOLCK)); ++ } ++ func.inner.legacy_interrupt_line = crate::enable_function( ++ &self.pcie, ++ &mut func.endpoint_header, ++ &mut func.capabilities, ++ ); ++ func.enabled = true; ++ Handle::Channel { ++ addr, ++ st: ChannelState::AwaitingData, ++ } + } +- func.inner.legacy_interrupt_line = crate::enable_function( +- &self.pcie, +- &mut func.endpoint_header, +- &mut func.capabilities, +- ); +- func.enabled = true; +- Handle::Channel { +- addr, +- st: ChannelState::AwaitingData, ++ "bind" => { ++ let addr_str = format!("{}", addr); ++ if let Some(&owner_pid) = self.binds.get(&addr_str) { ++ log::info!( ++ "pcid: device {} already bound by pid {}", ++ addr_str, ++ owner_pid ++ ); ++ return Err(Error::new(EALREADY)); ++ } ++ let caller_pid = u32::try_from(ctx.pid).map_err(|_| Error::new(EINVAL))?; ++ self.binds.insert(addr_str.clone(), caller_pid); ++ log::info!("pcid: device {} bound by pid {}", addr_str, caller_pid); ++ Handle::Bind { addr } + } ++ _ => return Err(Error::new(ENOENT)), + } +- _ => return Err(Error::new(ENOENT)), + } + }) + } diff --git a/local/patches/base/P5-init-daemon-panic-hardening.patch b/local/patches/base/P5-init-daemon-panic-hardening.patch new file mode 100644 index 00000000..efcd5d81 --- /dev/null +++ b/local/patches/base/P5-init-daemon-panic-hardening.patch @@ -0,0 +1,685 @@ +diff --git a/drivers/acpid/src/main.rs b/drivers/acpid/src/main.rs +index 059254b3..a3f5f996 100644 +--- a/drivers/acpid/src/main.rs ++++ b/drivers/acpid/src/main.rs +@@ -3,6 +3,7 @@ use std::fs::File; + use std::mem; + use std::ops::ControlFlow; + use std::os::unix::io::AsRawFd; ++use std::process; + use std::sync::Arc; + + use ::acpi::aml::op_region::{RegionHandler, RegionSpace}; +@@ -17,6 +18,58 @@ mod ec; + + mod scheme; + ++fn parse_physaddrs(sdt: &self::acpi::Sdt) -> Vec { ++ match &sdt.signature { ++ b"RSDT" => { ++ let chunks = sdt.data().chunks_exact(mem::size_of::()); ++ if !chunks.remainder().is_empty() { ++ eprintln!( ++ "acpid: malformed RSDT length {}: expected 4-byte entries", ++ sdt.data().len() ++ ); ++ process::exit(1); ++ } ++ ++ chunks ++ .map(|chunk| match <[u8; mem::size_of::()]>::try_from(chunk) { ++ Ok(bytes) => u32::from_le_bytes(bytes) as u64, ++ Err(_) => { ++ eprintln!("acpid: failed to decode RSDT physical address entry"); ++ process::exit(1); ++ } ++ }) ++ .collect() ++ } ++ b"XSDT" => { ++ let chunks = sdt.data().chunks_exact(mem::size_of::()); ++ if !chunks.remainder().is_empty() { ++ eprintln!( ++ "acpid: malformed XSDT length {}: expected 8-byte entries", ++ sdt.data().len() ++ ); ++ process::exit(1); ++ } ++ ++ chunks ++ .map(|chunk| match <[u8; mem::size_of::()]>::try_from(chunk) { ++ Ok(bytes) => u64::from_le_bytes(bytes), ++ Err(_) => { ++ eprintln!("acpid: failed to decode XSDT physical address entry"); ++ process::exit(1); ++ } ++ }) ++ .collect() ++ } ++ signature => { ++ eprintln!( ++ "acpid: expected [RX]SDT from kernel, got {:?}", ++ String::from_utf8_lossy(signature) ++ ); ++ process::exit(1); ++ } ++ } ++} ++ + fn daemon(daemon: daemon::Daemon) -> ! { + common::setup_logging( + "misc", +@@ -29,7 +82,10 @@ fn daemon(daemon: daemon::Daemon) -> ! { + log::info!("acpid start"); + + let rxsdt_raw_data: Arc<[u8]> = std::fs::read("/scheme/kernel.acpi/rxsdt") +- .expect("acpid: failed to read `/scheme/kernel.acpi/rxsdt`") ++ .unwrap_or_else(|err| { ++ eprintln!("acpid: failed to read `/scheme/kernel.acpi/rxsdt`: {err}"); ++ process::exit(1); ++ }) + .into(); + + if rxsdt_raw_data.is_empty() { +@@ -38,84 +94,84 @@ fn daemon(daemon: daemon::Daemon) -> ! { + std::process::exit(0); + } + +- let sdt = self::acpi::Sdt::new(rxsdt_raw_data).expect("acpid: failed to parse [RX]SDT"); +- +- let mut thirty_two_bit; +- let mut sixty_four_bit; +- +- let physaddrs_iter = match &sdt.signature { +- b"RSDT" => { +- thirty_two_bit = sdt +- .data() +- .chunks(mem::size_of::()) +- // TODO: With const generics, the compiler has some way of doing this for static sizes. +- .map(|chunk| <[u8; mem::size_of::()]>::try_from(chunk).unwrap()) +- .map(|chunk| u32::from_le_bytes(chunk)) +- .map(u64::from); +- +- &mut thirty_two_bit as &mut dyn Iterator +- } +- b"XSDT" => { +- sixty_four_bit = sdt +- .data() +- .chunks(mem::size_of::()) +- .map(|chunk| <[u8; mem::size_of::()]>::try_from(chunk).unwrap()) +- .map(|chunk| u64::from_le_bytes(chunk)); +- +- &mut sixty_four_bit as &mut dyn Iterator +- } +- _ => panic!("acpid: expected [RX]SDT from kernel to be either of those"), +- }; ++ let sdt = self::acpi::Sdt::new(rxsdt_raw_data).unwrap_or_else(|err| { ++ eprintln!("acpid: failed to parse [RX]SDT: {err}"); ++ process::exit(1); ++ }); ++ let physaddrs = parse_physaddrs(&sdt); + + let region_handlers: Vec<(RegionSpace, Box)> = vec![ + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + (RegionSpace::EmbeddedControl, Box::new(ec::Ec::new())), + ]; +- let acpi_context = self::acpi::AcpiContext::init(physaddrs_iter, region_handlers); ++ let acpi_context = self::acpi::AcpiContext::init(physaddrs.into_iter(), region_handlers); + + // TODO: I/O permission bitmap? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +- common::acquire_port_io_rights().expect("acpid: failed to set I/O privilege level to Ring 3"); ++ common::acquire_port_io_rights().unwrap_or_else(|err| { ++ eprintln!("acpid: failed to set I/O privilege level to Ring 3: {err}"); ++ process::exit(1); ++ }); + + let shutdown_pipe = File::open("/scheme/kernel.acpi/kstop") +- .expect("acpid: failed to open `/scheme/kernel.acpi/kstop`"); +- +- let mut event_queue = RawEventQueue::new().expect("acpid: failed to create event queue"); +- let socket = Socket::nonblock().expect("acpid: failed to create disk scheme"); ++ .unwrap_or_else(|err| { ++ eprintln!("acpid: failed to open `/scheme/kernel.acpi/kstop`: {err}"); ++ process::exit(1); ++ }); ++ ++ let mut event_queue = RawEventQueue::new().unwrap_or_else(|err| { ++ eprintln!("acpid: failed to create event queue: {err}"); ++ process::exit(1); ++ }); ++ let socket = Socket::nonblock().unwrap_or_else(|err| { ++ eprintln!("acpid: failed to create disk scheme: {err}"); ++ process::exit(1); ++ }); + + let mut scheme = self::scheme::AcpiScheme::new(&acpi_context, &socket); + let mut handler = Blocking::new(&socket, 16); + + event_queue + .subscribe(shutdown_pipe.as_raw_fd() as usize, 0, EventFlags::READ) +- .expect("acpid: failed to register shutdown pipe for event queue"); ++ .unwrap_or_else(|err| { ++ eprintln!("acpid: failed to register shutdown pipe for event queue: {err}"); ++ process::exit(1); ++ }); + event_queue + .subscribe(socket.inner().raw(), 1, EventFlags::READ) +- .expect("acpid: failed to register scheme socket for event queue"); ++ .unwrap_or_else(|err| { ++ eprintln!("acpid: failed to register scheme socket for event queue: {err}"); ++ process::exit(1); ++ }); + + register_sync_scheme(&socket, "acpi", &mut scheme) +- .expect("acpid: failed to register acpi scheme to namespace"); ++ .unwrap_or_else(|err| { ++ eprintln!("acpid: failed to register acpi scheme to namespace: {err}"); ++ process::exit(1); ++ }); + + daemon.ready(); + +- libredox::call::setrens(0, 0).expect("acpid: failed to enter null namespace"); ++ libredox::call::setrens(0, 0).unwrap_or_else(|err| { ++ eprintln!("acpid: failed to enter null namespace: {err}"); ++ process::exit(1); ++ }); + + let mut mounted = true; + while mounted { +- let Some(event) = event_queue +- .next() +- .transpose() +- .expect("acpid: failed to read event file") +- else { ++ let Some(event) = event_queue.next().transpose().unwrap_or_else(|err| { ++ eprintln!("acpid: failed to read event file: {err}"); ++ process::exit(1); ++ }) else { + break; + }; + + if event.fd == socket.inner().raw() { + loop { +- match handler +- .process_requests_nonblocking(&mut scheme) +- .expect("acpid: failed to process requests") +- { ++ match handler.process_requests_nonblocking(&mut scheme).unwrap_or_else(|err| { ++ eprintln!("acpid: failed to process requests: {err}"); ++ process::exit(1); ++ }) { + ControlFlow::Continue(()) => {} + ControlFlow::Break(()) => break, + } +@@ -134,7 +190,8 @@ fn daemon(daemon: daemon::Daemon) -> ! { + + acpi_context.set_global_s_state(5); + +- unreachable!("System should have shut down before this is entered"); ++ eprintln!("acpid: system did not shut down after requesting S5"); ++ process::exit(1); + } + + fn main() { +diff --git a/drivers/pcid/src/main.rs b/drivers/pcid/src/main.rs +index 61cd9a78..18ee18ab 100644 +--- a/drivers/pcid/src/main.rs ++++ b/drivers/pcid/src/main.rs +@@ -3,6 +3,7 @@ + #![feature(non_exhaustive_omitted_patterns_lint)] + + use std::collections::BTreeMap; ++use std::process; + + use log::{debug, info, trace, warn}; + use pci_types::capability::PciCapability; +@@ -42,7 +43,16 @@ fn handle_parsed_header( + continue; + } + match endpoint_header.bar(i, pcie) { +- Some(TyBar::Io { port }) => bars[i as usize] = PciBar::Port(port.try_into().unwrap()), ++ Some(TyBar::Io { port }) => match port.try_into() { ++ Ok(port) => bars[i as usize] = PciBar::Port(port), ++ Err(_) => { ++ warn!( ++ "pcid: skipping invalid I/O BAR port {port:#x} on {}", ++ endpoint_header.header().address() ++ ); ++ bars[i as usize] = PciBar::None; ++ } ++ }, + Some(TyBar::Memory32 { + address, + size, +@@ -251,7 +261,10 @@ fn daemon(daemon: daemon::Daemon) -> ! { + info!("PCI SG-BS:DV.F VEND:DEVI CL.SC.IN.RV"); + + let mut scheme = scheme::PciScheme::new(pcie); +- let socket = redox_scheme::Socket::create().expect("failed to open pci scheme socket"); ++ let socket = redox_scheme::Socket::create().unwrap_or_else(|err| { ++ eprintln!("pcid: failed to open pci scheme socket: {err}"); ++ process::exit(1); ++ }); + let handler = Blocking::new(&socket, 16); + + { +@@ -259,17 +272,27 @@ fn daemon(daemon: daemon::Daemon) -> ! { + Ok(register_pci) => { + let access_id = scheme.access(); + +- let access_fd = socket +- .create_this_scheme_fd(0, access_id, syscall::O_RDWR, 0) +- .expect("failed to issue this resource"); +- let access_bytes = access_fd.to_ne_bytes(); +- let _ = register_pci +- .call_wo( +- &access_bytes, +- syscall::CallFlags::WRITE | syscall::CallFlags::FD, +- &[], +- ) +- .expect("failed to send pci_fd to acpid"); ++ match socket.create_this_scheme_fd(0, access_id, syscall::O_RDWR, 0) { ++ Ok(access_fd) => { ++ let access_bytes = access_fd.to_ne_bytes(); ++ if let Err(err) = register_pci.call_wo( ++ &access_bytes, ++ syscall::CallFlags::WRITE | syscall::CallFlags::FD, ++ &[], ++ ) { ++ warn!( ++ "pcid: failed to send pci_fd to acpid (error: {}). Running without ACPI integration.", ++ err ++ ); ++ } ++ } ++ Err(err) => { ++ warn!( ++ "pcid: failed to issue acpid registration resource (error: {}). Running without ACPI integration.", ++ err ++ ); ++ } ++ } + } + Err(err) => { + if err.errno() == libredox::errno::ENODEV { +@@ -305,13 +328,20 @@ fn daemon(daemon: daemon::Daemon) -> ! { + debug!("Enumeration complete, now starting pci scheme"); + + register_sync_scheme(&socket, "pci", &mut scheme) +- .expect("failed to register pci scheme to namespace"); ++ .unwrap_or_else(|err| { ++ eprintln!("pcid: failed to register pci scheme to namespace: {err}"); ++ process::exit(1); ++ }); + + let _ = daemon.ready(); + +- handler +- .process_requests_blocking(scheme) +- .expect("pcid: failed to process requests"); ++ match handler.process_requests_blocking(scheme) { ++ Ok(never) => match never {}, ++ Err(err) => { ++ eprintln!("pcid: failed to process requests: {err}"); ++ process::exit(1); ++ } ++ } + } + + fn scan_device( +@@ -323,6 +353,7 @@ fn scan_device( + ) { + for func_num in 0..8 { + let header = TyPciHeader::new(PciAddress::new(0, bus_num, dev_num, func_num)); ++ let header_address = header.address(); + + let (vendor_id, device_id) = header.id(pcie); + if vendor_id == 0xffff && device_id == 0xffff { +@@ -344,21 +375,40 @@ fn scan_device( + revision, + }; + +- info!("PCI {} {}", header.address(), full_device_id.display()); ++ info!("PCI {} {}", header_address, full_device_id.display()); + + let has_multiple_functions = header.has_multiple_functions(pcie); + + match header.header_type(pcie) { + HeaderType::Endpoint => { ++ let endpoint_header = match EndpointHeader::from_header(header, pcie) { ++ Some(endpoint_header) => endpoint_header, ++ None => { ++ warn!( ++ "pcid: failed to parse endpoint header for {}", ++ header_address, ++ ); ++ continue; ++ } ++ }; + handle_parsed_header( + pcie, + tree, +- EndpointHeader::from_header(header, pcie).unwrap(), ++ endpoint_header, + full_device_id, + ); + } + HeaderType::PciPciBridge => { +- let bridge_header = PciPciBridgeHeader::from_header(header, pcie).unwrap(); ++ let bridge_header = match PciPciBridgeHeader::from_header(header, pcie) { ++ Some(bridge_header) => bridge_header, ++ None => { ++ warn!( ++ "pcid: failed to parse bridge header for {}", ++ header_address, ++ ); ++ continue; ++ } ++ }; + bus_nums.push(bridge_header.secondary_bus_number(pcie)); + } + ty => { +diff --git a/init/src/main.rs b/init/src/main.rs +index 5682cf44..cd270a6e 100644 +--- a/init/src/main.rs ++++ b/init/src/main.rs +@@ -1,6 +1,7 @@ + use std::collections::BTreeMap; + use std::ffi::OsString; + use std::path::Path; ++use std::time::Duration; + use std::{env, fs, io}; + + use libredox::flag::{O_RDONLY, O_WRONLY}; +@@ -166,19 +167,36 @@ fn main() { + } + }; + for entry in entries { ++ let Some(unit_name) = entry.file_name().and_then(|name| name.to_str()) else { ++ eprintln!( ++ "init: skipping config entry with invalid filename: {}", ++ entry.display() ++ ); ++ continue; ++ }; + scheduler.schedule_start_and_report_errors( + &mut unit_store, +- UnitId(entry.file_name().unwrap().to_str().unwrap().to_owned()), ++ UnitId(unit_name.to_owned()), + ); + } + }; + + scheduler.step(&mut unit_store, &mut init_config); + +- libredox::call::setrens(0, 0).expect("init: failed to enter null namespace"); ++ if let Err(err) = libredox::call::setrens(0, 0) { ++ eprintln!("init: failed to enter null namespace: {err}"); ++ std::process::exit(1); ++ } + + loop { + let mut status = 0; +- libredox::call::waitpid(0, &mut status, 0).unwrap(); ++ match libredox::call::waitpid(0, &mut status, 0) { ++ Ok(_) => {} ++ Err(err) if err.errno() == libredox::errno::EINTR => continue, ++ Err(err) => { ++ eprintln!("init: waitpid failed: {err}"); ++ std::thread::sleep(Duration::from_millis(100)); ++ } ++ } + } + } +diff --git a/init/src/scheduler.rs b/init/src/scheduler.rs +index d42a4e57..3b8d10b0 100644 +--- a/init/src/scheduler.rs ++++ b/init/src/scheduler.rs +@@ -43,7 +43,10 @@ impl Scheduler { + ) { + let loaded_units = unit_store.load_units(unit_id.clone(), errors); + for unit_id in loaded_units { +- if !unit_store.unit(&unit_id).conditions_met() { ++ if unit_store ++ .try_unit(&unit_id) ++ .is_ok_and(|unit| !unit.conditions_met()) ++ { + continue; + } + +@@ -62,7 +65,10 @@ impl Scheduler { + + match job.kind { + JobKind::Start => { +- let unit = unit_store.unit_mut(&job.unit); ++ let Ok(unit) = unit_store.try_unit_mut(&job.unit) else { ++ eprintln!("init: unit {} not found in store, skipping", job.unit.0); ++ continue 'a; ++ }; + + for dep in &unit.info.requires_weak { + for pending_job in &self.pending { +diff --git a/init/src/service.rs b/init/src/service.rs +index ed0023e9..827ae275 100644 +--- a/init/src/service.rs ++++ b/init/src/service.rs +@@ -3,13 +3,24 @@ use std::ffi::OsString; + use std::io::Read; + use std::os::fd::{AsRawFd, OwnedFd}; + use std::os::unix::process::CommandExt; +-use std::process::Command; ++use std::process::{Child, Command}; + use std::{env, io}; + + use serde::Deserialize; + + use crate::script::subst_env; + ++fn terminate_child(child: &mut Child, command: &str) { ++ if let Err(err) = child.kill() { ++ if err.kind() != io::ErrorKind::InvalidInput { ++ eprintln!("init: failed to terminate {command}: {err}"); ++ } ++ } ++ if let Err(err) = child.wait() { ++ eprintln!("init: failed to reap {command}: {err}"); ++ } ++} ++ + #[derive(Clone, Debug, Deserialize)] + #[serde(deny_unknown_fields)] + pub struct Service { +@@ -37,7 +48,8 @@ pub enum ServiceType { + impl Service { + pub fn spawn(&self, base_envs: &BTreeMap) { + let mut command = Command::new(&self.cmd); +- command.args(self.args.iter().map(|arg| subst_env(arg))); ++ let resolved_args: Vec = self.args.iter().map(|arg| subst_env(arg)).collect(); ++ command.args(&resolved_args); + command.env_clear(); + for env in &self.inherit_envs { + if let Some(value) = env::var_os(env) { +@@ -45,14 +57,25 @@ impl Service { + } + } + command.envs(base_envs).envs(&self.envs); ++ let command_display = if resolved_args.is_empty() { ++ self.cmd.clone() ++ } else { ++ format!("{} {}", self.cmd, resolved_args.join(" ")) ++ }; + +- let (mut read_pipe, write_pipe) = io::pipe().unwrap(); ++ let (mut read_pipe, write_pipe) = match io::pipe().map_err(|err| { ++ eprintln!("init: failed to create readiness pipe for {command_display}: {err}"); ++ err ++ }) { ++ Ok(pair) => pair, ++ Err(_) => return, ++ }; + unsafe { pass_fd(&mut command, "INIT_NOTIFY", write_pipe.into()) }; + + let mut child = match command.spawn() { + Ok(child) => child, + Err(err) => { +- eprintln!("init: failed to execute {:?}: {}", command, err); ++ eprintln!("init: failed to execute {command_display}: {err}"); + return; + } + }; +@@ -61,10 +84,10 @@ impl Service { + ServiceType::Notify => match read_pipe.read_exact(&mut [0]) { + Ok(()) => {} + Err(err) if err.kind() == io::ErrorKind::UnexpectedEof => { +- eprintln!("init: {command:?} exited without notifying readiness"); ++ eprintln!("init: {command_display} exited without notifying readiness"); + } + Err(err) => { +- eprintln!("init: failed to wait for {command:?}: {err}"); ++ eprintln!("init: failed to wait for {command_display}: {err}"); + } + }, + ServiceType::Scheme(scheme) => { +@@ -80,7 +103,7 @@ impl Service { + errno: syscall::EINTR, + }) => continue, + Ok(0) => { +- eprintln!("init: {command:?} exited without notifying readiness"); ++ eprintln!("init: {command_display} exited without notifying readiness"); + return; + } + Ok(1) => break, +@@ -89,26 +112,40 @@ impl Service { + return; + } + Err(err) => { +- eprintln!("init: failed to wait for {command:?}: {err}"); ++ eprintln!("init: failed to wait for {command_display}: {err}"); + return; + } + } + } + +- let current_namespace_fd = libredox::call::getns().expect("TODO"); +- libredox::call::register_scheme_to_ns(current_namespace_fd, scheme, new_fd) +- .expect("TODO"); ++ let current_namespace_fd = match libredox::call::getns() { ++ Ok(fd) => fd, ++ Err(err) => { ++ eprintln!("init: failed to get current namespace for {command_display}: {err}"); ++ terminate_child(&mut child, &command_display); ++ return; ++ } ++ }; ++ if let Err(err) = ++ libredox::call::register_scheme_to_ns(current_namespace_fd, scheme, new_fd) ++ { ++ eprintln!( ++ "init: failed to register scheme {scheme:?} for {command_display}: {err}" ++ ); ++ terminate_child(&mut child, &command_display); ++ return; ++ } + } + ServiceType::Oneshot => { + drop(read_pipe); + match child.wait() { + Ok(exit_status) => { + if !exit_status.success() { +- eprintln!("init: {command:?} failed with {exit_status}"); ++ eprintln!("init: {command_display} failed with {exit_status}"); + } + } + Err(err) => { +- eprintln!("init: failed to wait for {:?}: {}", command, err) ++ eprintln!("init: failed to wait for {command_display}: {err}") + } + } + } +diff --git a/init/src/unit.rs b/init/src/unit.rs +index 98053cb2..bd998394 100644 +--- a/init/src/unit.rs ++++ b/init/src/unit.rs +@@ -23,8 +23,14 @@ impl UnitStore { + } + + pub fn set_runtime_target(&mut self, unit_id: UnitId) { +- assert!(self.runtime_target.is_none()); +- assert!(self.units.contains_key(&unit_id)); ++ if self.runtime_target.is_some() { ++ eprintln!("init: runtime target already set, ignoring {}", unit_id.0); ++ return; ++ } ++ if !self.units.contains_key(&unit_id) { ++ eprintln!("init: runtime target {} not found in unit store", unit_id.0); ++ return; ++ } + self.runtime_target = Some(unit_id); + } + +@@ -85,8 +91,15 @@ impl UnitStore { + let unit = self.load_single_unit(unit_id, errors); + if let Some(unit) = unit { + loaded_units.push(unit.clone()); +- for dep in &self.unit(&unit).info.requires_weak { +- pending_units.push(dep.clone()); ++ match self.try_unit(&unit) { ++ Ok(unit) => { ++ for dep in &unit.info.requires_weak { ++ pending_units.push(dep.clone()); ++ } ++ } ++ Err(err) => { ++ errors.push(err); ++ } + } + } + } +@@ -94,12 +107,34 @@ impl UnitStore { + loaded_units + } + ++ pub fn try_unit(&self, unit: &UnitId) -> Result<&Unit, String> { ++ self.units ++ .get(unit) ++ .ok_or_else(|| format!("unit {} not found in store", unit.0)) ++ } ++ ++ // Keep the legacy infallible accessors for compatibility while scheduler/load paths ++ // use the fallible helpers to avoid panicking on missing units. ++ #[allow(dead_code)] + pub fn unit(&self, unit: &UnitId) -> &Unit { +- self.units.get(unit).unwrap() ++ self.try_unit(unit).unwrap_or_else(|err| { ++ eprintln!("init: {err}"); ++ std::process::exit(1); ++ }) ++ } ++ ++ pub fn try_unit_mut(&mut self, unit: &UnitId) -> Result<&mut Unit, String> { ++ self.units ++ .get_mut(unit) ++ .ok_or_else(|| format!("unit {} not found in store", unit.0)) + } + ++ #[allow(dead_code)] + pub fn unit_mut(&mut self, unit: &UnitId) -> &mut Unit { +- self.units.get_mut(unit).unwrap() ++ self.try_unit_mut(unit).unwrap_or_else(|err| { ++ eprintln!("init: {err}"); ++ std::process::exit(1); ++ }) + } + } + +@@ -180,7 +215,7 @@ impl Unit { + ) -> io::Result { + let config = fs::read_to_string(config_path)?; + +- let Some(ext) = config_path.extension().map(|ext| ext.to_str().unwrap()) else { ++ let Some(ext) = config_path.extension().and_then(|ext| ext.to_str()) else { + let script = Script::from_str(&config, errors)?; + return Ok(Unit { + id, diff --git a/local/patches/kernel/P1-boot-path-diagnostics.patch b/local/patches/kernel/P1-boot-path-diagnostics.patch new file mode 100644 index 00000000..d2750dd7 --- /dev/null +++ b/local/patches/kernel/P1-boot-path-diagnostics.patch @@ -0,0 +1,219 @@ +diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs +index 4dc2388..f472c08 100644 +--- a/src/acpi/madt/arch/x86.rs ++++ b/src/acpi/madt/arch/x86.rs +@@ -20,0 +21 @@ use super::{Madt, MadtEntry}; ++const AP_SPIN_LIMIT: u32 = 1_000_000; +@@ -45,7 +46,11 @@ pub(super) fn init(madt: Madt) { +- let result = mapper +- .map_phys( +- trampoline_page.start_address(), +- trampoline_frame.base(), +- PageFlags::new().execute(true).write(true), +- ) +- .expect("failed to map trampoline"); ++ let result = match mapper.map_phys( ++ trampoline_page.start_address(), ++ trampoline_frame.base(), ++ PageFlags::new().execute(true).write(true), ++ ) { ++ Some(result) => result, ++ None => { ++ println!("KERNEL AP: failed to map trampoline page, AP bring-up disabled"); ++ return; ++ } ++ }; +@@ -75,2 +79,0 @@ pub(super) fn init(madt: Madt) { +- let cpu_id = LogicalCpuId::next(); +- +@@ -78,6 +81,8 @@ pub(super) fn init(madt: Madt) { +- let stack_start = RmmA::phys_to_virt( +- allocate_p2frame(4) +- .expect("no more frames in acpi stack_start") +- .base(), +- ) +- .data(); ++ let alloc = match allocate_p2frame(4) { ++ Some(frame) => frame, ++ None => { ++ println!("KERNEL AP: CPU {} no memory for stack, skipping", ap_local_apic.id); ++ continue; ++ } ++ }; ++ let stack_start = RmmA::phys_to_virt(alloc.base()).data(); +@@ -85,0 +91,10 @@ pub(super) fn init(madt: Madt) { ++ let next_cpu = crate::CPU_COUNT.load(Ordering::Relaxed); ++ if next_cpu >= crate::cpu_set::MAX_CPU_COUNT { ++ println!( ++ "KERNEL AP: CPU {} exceeds logical CPU limit, skipping", ++ ap_local_apic.id ++ ); ++ continue; ++ } ++ let cpu_id = LogicalCpuId::new(next_cpu); ++ +@@ -140,2 +155,7 @@ pub(super) fn init(madt: Madt) { +- // Wait for trampoline ready +- while unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } == 0 { ++ // Wait for trampoline ready with timeout ++ let mut trampoline_ready = false; ++ for _ in 0..AP_SPIN_LIMIT { ++ if unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } != 0 { ++ trampoline_ready = true; ++ break; ++ } +@@ -144 +164,11 @@ pub(super) fn init(madt: Madt) { +- while !AP_READY.load(Ordering::SeqCst) { ++ if !trampoline_ready { ++ println!("KERNEL AP: CPU {} trampoline timeout, skipping", ap_local_apic.id); ++ continue; ++ } ++ ++ let mut kernel_ready = false; ++ for _ in 0..AP_SPIN_LIMIT { ++ if AP_READY.load(Ordering::SeqCst) { ++ kernel_ready = true; ++ break; ++ } +@@ -146,0 +177,6 @@ pub(super) fn init(madt: Madt) { ++ if !kernel_ready { ++ println!("KERNEL AP: CPU {} AP_READY timeout, skipping", ap_local_apic.id); ++ continue; ++ } ++ ++ crate::CPU_COUNT.fetch_add(1, Ordering::Relaxed); +@@ -154 +190 @@ pub(super) fn init(madt: Madt) { +- let (_frame, _, flush) = unsafe { ++ if let Some((_frame, _, flush)) = unsafe { +@@ -157,3 +193,5 @@ pub(super) fn init(madt: Madt) { +- .expect("failed to unmap trampoline page") +- }; +- flush.flush(); ++ } { ++ flush.flush(); ++ } else { ++ println!("KERNEL AP: failed to unmap trampoline page (non-fatal)"); ++ } +diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs +index 4fdb0ba..aaa7196 100644 +--- a/src/allocator/mod.rs ++++ b/src/allocator/mod.rs +@@ -9,0 +10,9 @@ const KERNEL_HEAP_SIZE: usize = ::rmm::MEGABYTE; ++#[cold] ++fn halt_kernel_heap_init(message: &str) -> ! { ++ print!("{message}"); ++ println!("Kernel heap initialization cannot continue. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++} ++ +@@ -16,4 +25,6 @@ unsafe fn map_heap(mapper: &mut KernelMapper, offset: usize, size: usize) +- let phys = mapper +- .allocator_mut() +- .allocate_one() +- .expect("failed to allocate kernel heap"); ++ let phys = match mapper.allocator_mut().allocate_one() { ++ Some(phys) => phys, ++ None => halt_kernel_heap_init( ++ "FATAL: failed to allocate physical frame for kernel heap\n", ++ ), ++ }; +@@ -21,9 +32,12 @@ unsafe fn map_heap(mapper: &mut KernelMapper, offset: usize, size: usize) +- mapper +- .map_phys( +- page.start_address(), +- phys, +- PageFlags::new() +- .write(true) +- .global(cfg!(not(feature = "pti"))), +- ) +- .expect("failed to map kernel heap") ++ match mapper.map_phys( ++ page.start_address(), ++ phys, ++ PageFlags::new() ++ .write(true) ++ .global(cfg!(not(feature = "pti"))), ++ ) { ++ Some(flush) => flush, ++ None => halt_kernel_heap_init( ++ "FATAL: failed to map kernel heap virtual page\n", ++ ), ++ } +diff --git a/src/arch/x86_shared/gdt.rs b/src/arch/x86_shared/gdt.rs +index cad344f..f7acae3 100644 +--- a/src/arch/x86_shared/gdt.rs ++++ b/src/arch/x86_shared/gdt.rs +@@ -194,0 +195,9 @@ impl ProcessorControlRegion { ++#[cold] ++fn halt_pcr_init() -> ! { ++ println!("FATAL: failed to allocate physical memory for Processor Control Region"); ++ println!("Processor startup cannot continue. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++} ++ +@@ -378 +387,4 @@ pub fn allocate_and_init_pcr( +- let pcr_frame = crate::memory::allocate_p2frame(alloc_order).expect("failed to allocate PCR"); ++ let pcr_frame = match crate::memory::allocate_p2frame(alloc_order) { ++ Some(frame) => frame, ++ None => halt_pcr_init(), ++ }; +diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs +index 5006458..47f692f 100644 +--- a/src/arch/x86_shared/idt.rs ++++ b/src/arch/x86_shared/idt.rs +@@ -80,0 +81,9 @@ pub(crate) static IDTS: RwLock> = ++#[cold] ++fn halt_idt_init() -> ! { ++ println!("FATAL: failed to allocate physical pages for backup interrupt stack"); ++ println!("Interrupt setup cannot continue. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++} ++ +@@ -164,2 +173,4 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt { +- let frames = crate::memory::allocate_p2frame(4) +- .expect("failed to allocate pages for backup interrupt stack"); ++ let frames = match crate::memory::allocate_p2frame(4) { ++ Some(frames) => frames, ++ None => halt_idt_init(), ++ }; +diff --git a/src/startup/memory.rs b/src/startup/memory.rs +index 26922dd..f271200 100644 +--- a/src/startup/memory.rs ++++ b/src/startup/memory.rs +@@ -326 +326,10 @@ unsafe fn map_memory(areas: &[MemoryArea], mut bump_allocator: &mut Bum +- let kernel_area = (*MEMORY_MAP.get()).kernel().unwrap(); ++ let kernel_area = match (*MEMORY_MAP.get()).kernel() { ++ Some(area) => area, ++ None => { ++ println!("FATAL: kernel memory area not found in boot memory map"); ++ println!("Cannot determine kernel base address. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++ } ++ }; +diff --git a/src/startup/mod.rs b/src/startup/mod.rs +index 8ad3cdf..86aabc2 100644 +--- a/src/startup/mod.rs ++++ b/src/startup/mod.rs +@@ -151,0 +152,9 @@ static BSP_READY: AtomicBool = AtomicBool::new(false); ++#[cold] ++fn halt_boot(message: &str) -> ! { ++ print!("{message}"); ++ println!("Kernel boot cannot continue. Halting."); ++ loop { ++ hint::spin_loop(); ++ } ++} ++ +@@ -183,3 +192 @@ pub(crate) fn kmain(bootstrap: Bootstrap) -> ! { +- Err(err) => { +- panic!("failed to spawn userspace_init: {:?}", err); +- } ++ Err(_err) => halt_boot("FATAL: failed to spawn first userspace process userspace_init\n"), diff --git a/local/patches/kernel/P4-scheme-failure-modes.patch b/local/patches/kernel/P4-scheme-failure-modes.patch new file mode 100644 index 00000000..d97519f6 --- /dev/null +++ b/local/patches/kernel/P4-scheme-failure-modes.patch @@ -0,0 +1,913 @@ +diff --git a/src/context/file.rs b/src/context/file.rs +index 2d3790f..150f483 100644 +--- a/src/context/file.rs ++++ b/src/context/file.rs +@@ -4,7 +4,7 @@ use crate::{ + event, + scheme::{self, SchemeId}, + sync::{CleanLockToken, RwLock, L6}, +- syscall::error::Result, ++ syscall::error::{Error, Result, ESTALE}, + }; + use alloc::sync::Arc; + use syscall::{schemev2::NewFdFlags, RwFlags, O_APPEND, O_NONBLOCK}; +@@ -18,6 +18,7 @@ pub struct FileDescription { + pub offset: u64, + /// The scheme that this file refers to + pub scheme: SchemeId, ++ pub scheme_generation: Option, + /// The number the scheme uses to refer to this file + pub number: usize, + /// The flags passed to open or fcntl(SETFL) +@@ -32,6 +33,52 @@ bitflags! { + } + } + impl FileDescription { ++ pub fn with_generation( ++ scheme: SchemeId, ++ scheme_generation: Option, ++ number: usize, ++ offset: u64, ++ flags: u32, ++ internal_flags: InternalFlags, ++ ) -> Self { ++ Self { ++ offset, ++ scheme, ++ scheme_generation, ++ number, ++ flags, ++ internal_flags, ++ } ++ } ++ ++ pub fn new( ++ scheme: SchemeId, ++ number: usize, ++ offset: u64, ++ flags: u32, ++ internal_flags: InternalFlags, ++ token: &mut CleanLockToken, ++ ) -> Self { ++ Self::with_generation( ++ scheme, ++ Some(scheme::current_scheme_generation(token.token(), scheme)), ++ number, ++ offset, ++ flags, ++ internal_flags, ++ ) ++ } ++ ++ pub fn get_scheme(&self, token: &mut CleanLockToken) -> Result { ++ if let Some(expected_generation) = self.scheme_generation ++ && expected_generation != scheme::current_scheme_generation(token.token(), self.scheme) ++ { ++ return Err(Error::new(ESTALE)); ++ } ++ ++ scheme::get_scheme(token.token(), self.scheme) ++ } ++ + pub fn rw_flags(&self, rw: RwFlags) -> u32 { + let mut ret = self.flags & !(O_NONBLOCK | O_APPEND) as u32; + if rw.contains(RwFlags::APPEND) { +@@ -76,7 +123,7 @@ impl FileDescription { + pub fn try_close(self, token: &mut CleanLockToken) -> Result<()> { + event::unregister_file(self.scheme, self.number, token); + +- let scheme = scheme::get_scheme(token.token(), self.scheme)?; ++ let scheme = self.get_scheme(token)?; + + scheme.close(self.number, token) + } +@@ -85,12 +132,12 @@ impl FileDescription { + impl FileDescriptor { + pub fn close(self, token: &mut CleanLockToken) -> Result<()> { + { +- let (scheme_id, number, internal_flags) = { ++ let (desc, number, internal_flags) = { + let desc = self.description.read(token.token()); +- (desc.scheme, desc.number, desc.internal_flags) ++ (*desc, desc.number, desc.internal_flags) + }; + if internal_flags.contains(InternalFlags::NOTIFY_ON_NEXT_DETACH) { +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; ++ let scheme = desc.get_scheme(token)?; + scheme.detach(number, token)?; + } + } +diff --git a/src/context/memory.rs b/src/context/memory.rs +index 93446ba..a862b35 100644 +--- a/src/context/memory.rs ++++ b/src/context/memory.rs +@@ -64,14 +64,13 @@ impl UnmapResult { + return Ok(()); + }; + +- let (scheme_id, number) = { +- let desc = description.write(token.token()); +- (desc.scheme, desc.number) ++ let (scheme, number) = { ++ let desc = *description.read(token.token()); ++ (desc.get_scheme(token)?, desc.number) + }; + +- let scheme_opt = scheme::get_scheme(token.token(), scheme_id); +- let funmap_result = scheme_opt +- .and_then(|scheme| scheme.kfunmap(number, base_offset, self.size, self.flags, token)); ++ let funmap_result = scheme ++ .kfunmap(number, base_offset, self.size, self.flags, token); + + if let Ok(fd) = Arc::try_unwrap(description) { + fd.into_inner().try_close(token)?; +@@ -2687,20 +2686,13 @@ fn correct_inner<'l>( + // XXX: This is cheating, but guaranteed we won't deadlock because we've dropped addr_space_guard + let mut token = unsafe { CleanLockToken::new() }; + +- let (scheme_id, scheme_number) = { +- let desc = &file_ref.description.read(token.token()); +- (desc.scheme, desc.number) ++ let desc = *file_ref.description.read(token.token()); ++ let scheme = desc.get_scheme(&mut token).map_err(|_| PfError::Segv)?; ++ let scheme_number = desc.number; ++ let user_inner = match scheme { ++ KernelSchemes::User(user) => user.inner, ++ _ => return Err(PfError::Segv), + }; +- let user_inner = scheme::get_scheme(token.token(), scheme_id) +- .ok() +- .and_then(|s| { +- if let KernelSchemes::User(user) = s { +- Some(user.inner) +- } else { +- None +- } +- }) +- .ok_or(PfError::Segv)?; + + let offset = file_ref.base_offset as u64 + (pages_from_grant_start * PAGE_SIZE) as u64; + user_inner +diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs +index d30272c..765e547 100644 +--- a/src/scheme/mod.rs ++++ b/src/scheme/mod.rs +@@ -14,7 +14,7 @@ use alloc::{ + }; + use core::{ + str, +- sync::atomic::{AtomicUsize, Ordering}, ++ sync::atomic::{AtomicU64, AtomicUsize, Ordering}, + }; + use hashbrown::hash_map::{self, DefaultHashBuilder, HashMap}; + use spin::Once; +@@ -169,6 +169,7 @@ enum Handle { + + /// Schemes list + static HANDLES: Once>> = Once::new(); ++static SCHEME_GENERATIONS: Once>> = Once::new(); + static SCHEME_LIST_NEXT_ID: AtomicUsize = AtomicUsize::new(MAX_GLOBAL_SCHEMES); + static SCHEME_LIST_ID: AtomicUsize = AtomicUsize::new(0); + +@@ -204,6 +205,10 @@ fn init_schemes() -> RwLock> { + RwLock::new(handles) + } + ++fn init_scheme_generations() -> RwLock> { ++ RwLock::new(HashMap::new()) ++} ++ + /// Get a handle to a scheme. + pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result { + match handles().read(token).get(&scheme_id) { +@@ -212,10 +217,33 @@ pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result, scheme_id: SchemeId) -> u64 { ++ scheme_generations() ++ .read(token) ++ .get(&scheme_id) ++ .map(|generation| generation.load(Ordering::Acquire)) ++ .unwrap_or(0) ++} ++ + fn handles<'a>() -> &'a RwLock> { + HANDLES.call_once(init_schemes) + } + ++fn scheme_generations<'a>() -> &'a RwLock> { ++ SCHEME_GENERATIONS.call_once(init_scheme_generations) ++} ++ ++fn increment_scheme_generation(scheme_id: SchemeId, token: &mut CleanLockToken) { ++ match scheme_generations().write(token.token()).entry(scheme_id) { ++ hash_map::Entry::Occupied(entry) => { ++ entry.get().fetch_add(1, Ordering::AcqRel); ++ } ++ hash_map::Entry::Vacant(entry) => { ++ entry.insert(AtomicU64::new(1)); ++ } ++ } ++} ++ + /// Scheme list type + pub struct SchemeList; + +@@ -260,9 +288,14 @@ impl SchemeList { + + /// Remove a scheme + fn remove(&self, id: usize, token: &mut CleanLockToken) { +- let scheme = handles().write(token.token()).remove(&SchemeId(id)); ++ let scheme_id = SchemeId(id); ++ let scheme = handles().write(token.token()).remove(&scheme_id); + + assert!(scheme.is_some()); ++ if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme.as_ref() { ++ user.inner.fail_pending_calls(token); ++ } ++ increment_scheme_generation(scheme_id, token); + if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme + && let Some(user) = Arc::into_inner(user.inner) + { +@@ -287,32 +320,32 @@ impl KernelScheme for SchemeList { + token: &mut CleanLockToken, + ) -> Result { + let scheme_id = SchemeId(scheme_id); +- match handles() +- .read(token.token()) +- .get(&scheme_id) +- .ok_or(Error::new(EBADF))? +- { +- Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => { +- let inner = inner.clone(); +- assert!(scheme_id == inner.scheme_id); +- let scheme = scheme_id; +- let params = unsafe { user_buf.read_exact::()? }; +- +- return Ok(OpenResult::External(Arc::new(RwLock::new( +- FileDescription { +- scheme, +- number: params.number, +- offset: params.offset, +- flags: params.flags as u32, +- internal_flags: InternalFlags::from_extra0(params.internal_flags) +- .ok_or(Error::new(EINVAL))?, +- }, +- )))); ++ let maybe_inner = { ++ let handles = handles().read(token.token()); ++ match handles.get(&scheme_id).ok_or(Error::new(EBADF))? { ++ Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => Some(inner.clone()), ++ Handle::SchemeCreationCapability => None, ++ _ => return Err(Error::new(EBADF)), + } +- Handle::SchemeCreationCapability => (), +- _ => return Err(Error::new(EBADF)), + }; + ++ if let Some(inner) = maybe_inner { ++ assert!(scheme_id == inner.scheme_id); ++ let params = unsafe { user_buf.read_exact::()? }; ++ ++ return Ok(OpenResult::External(Arc::new(RwLock::new( ++ FileDescription::new( ++ scheme_id, ++ params.number, ++ params.offset, ++ params.flags as u32, ++ InternalFlags::from_extra0(params.internal_flags) ++ .ok_or(Error::new(EINVAL))?, ++ token, ++ ), ++ )))); ++ } ++ + const EXPECTED: &[u8] = b"create-scheme"; + let mut buf = [0u8; EXPECTED.len()]; + +diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs +index 47588e1..1bdd6cc 100644 +--- a/src/scheme/proc.rs ++++ b/src/scheme/proc.rs +@@ -849,17 +873,17 @@ impl KernelScheme for ProcScheme { + } + } + fn extract_scheme_number(fd: usize, token: &mut CleanLockToken) -> Result<(KernelSchemes, usize)> { +- let (scheme_id, number) = { ++ let desc = { + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); ++ let (context, mut context_token) = current.token_split(); + let file_descriptor = context +- .get_file(FileHandle::from(fd), &mut token) ++ .get_file(FileHandle::from(fd), &mut context_token) + .ok_or(Error::new(EBADF))?; +- let desc = file_descriptor.description.read(token.token()); +- (desc.scheme, desc.number) ++ *file_descriptor.description.read(context_token.token()) + }; +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; ++ let scheme = desc.get_scheme(token)?; ++ let number = desc.number; + + Ok((scheme, number)) + } +diff --git a/src/scheme/user.rs b/src/scheme/user.rs +index b901302..dfbf66b 100644 +--- a/src/scheme/user.rs ++++ b/src/scheme/user.rs +@@ -80,6 +80,7 @@ const ONE: NonZeroUsize = match NonZeroUsize::new(1) { + Some(one) => one, + None => unreachable!(), + }; ++const MAX_SPURIOUS_WAKEUPS: usize = 100; + + enum ParsedCqe { + TriggerFevent { +@@ -209,6 +210,8 @@ impl UserInner { + caller_responsible: &mut PageSpan, + token: &mut CleanLockToken, + ) -> Result { ++ let mut remaining_spurious_wakeups = MAX_SPURIOUS_WAKEUPS; ++ + { + // Disable preemption to avoid context switches between setting the + // process state and sending the scheme request. The process is made +@@ -261,7 +264,10 @@ impl UserInner { + }; + + let states = self.states.lock(token.token()); +- let (mut states, mut token) = states.into_split(); ++ let (mut states, mut state_token) = states.into_split(); ++ let mut timed_out_descriptions = None; ++ let mut remove_state = false; ++ let mut timed_out = false; + match states.get_mut(sqe.tag as usize) { + // invalid state + None => return Err(Error::new(EBADFD)), +@@ -274,24 +280,35 @@ impl UserInner { + fds, + } => { + let maybe_eintr = +- eintr_if_sigkill(&mut callee_responsible, &mut token.token()); +- *o = State::Waiting { +- canceling: true, +- callee_responsible, +- context, +- fds, +- }; ++ eintr_if_sigkill(&mut callee_responsible, &mut state_token.token()); + +- maybe_eintr?; ++ if maybe_eintr.is_ok() { ++ remaining_spurious_wakeups = ++ remaining_spurious_wakeups.saturating_sub(1); ++ } ++ ++ if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 { ++ timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds)); ++ remove_state = true; ++ } else { ++ *o = State::Waiting { ++ canceling: true, ++ callee_responsible, ++ context, ++ fds, ++ }; ++ } + +- context::current() +- .write(token.token()) +- .block("UserInner::call (woken up after cancelation request)"); ++ maybe_eintr?; + +- // We do not want to drop the lock before blocking +- // as if we get preempted in between we might miss a +- // wakeup. +- drop(states); ++ if remove_state { ++ states.remove(sqe.tag as usize); ++ timed_out = true; ++ } else { ++ context::current() ++ .write(state_token.token()) ++ .block("UserInner::call (woken up after cancelation request)"); ++ } + } + // spurious wakeup + State::Waiting { +@@ -300,60 +317,76 @@ impl UserInner { + context, + mut callee_responsible, + } => { +- let maybe_eintr = eintr_if_sigkill(&mut callee_responsible, &mut token); + let current_context = context::current(); ++ let maybe_eintr = ++ eintr_if_sigkill(&mut callee_responsible, &mut state_token); ++ ++ if maybe_eintr.is_ok() { ++ remaining_spurious_wakeups = ++ remaining_spurious_wakeups.saturating_sub(1); ++ } + +- *o = State::Waiting { +- // Currently we treat all spurious wakeups to have the same behavior +- // as signals (i.e., we send a cancellation request). It is not something +- // that should happen, but it certainly can happen, for example if a context +- // is awoken through its thread handle without setting any sig bits, or if the +- // caller clears its own sig bits. If it actually is a signal, then it is the +- // intended behavior. +- canceling: true, +- fds, +- context, +- callee_responsible, +- }; ++ if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 { ++ timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds)); ++ remove_state = true; ++ } else { ++ *o = State::Waiting { ++ // Currently we treat all spurious wakeups to have the same behavior ++ // as signals (i.e., we send a cancellation request). It is not something ++ // that should happen, but it certainly can happen, for example if a context ++ // is awoken through its thread handle without setting any sig bits, or if the ++ // caller clears its own sig bits. If it actually is a signal, then it is the ++ // intended behavior. ++ canceling: true, ++ fds, ++ context, ++ callee_responsible, ++ }; ++ } + + maybe_eintr?; + +- // We do not want to preempt between sending the +- // cancellation and blocking again where we might +- // miss a wakeup. +- let mut preempt = PreemptGuardL1::new(¤t_context, &mut token); +- let token = preempt.token(); +- +- self.todo.send_locked( +- Sqe { +- opcode: Opcode::Cancel as u8, +- sqe_flags: SqeFlags::ONEWAY, +- tag: sqe.tag, +- ..Default::default() +- }, +- token.token(), +- ); +- event::trigger_locked( +- self.root_id, +- self.scheme_id.get(), +- EVENT_READ, +- token.token(), +- ); +- +- // 1. If cancellation was requested and arrived +- // before the scheme processed the request, an +- // acknowledgement will be sent back after the +- // cancellation is processed and we will be woken up +- // again. State will be State::Responded then. +- // +- // 2. If cancellation was requested but the scheme +- // already processed the request, we will receive +- // the actual response next and woken up again. +- // State will be State::Responded then. +- context::current() +- .write(token.token()) +- .block("UserInner::call (spurious wakeup)"); +- drop(states); ++ if remove_state { ++ states.remove(sqe.tag as usize); ++ timed_out = true; ++ } else { ++ // We do not want to preempt between sending the ++ // cancellation and blocking again where we might ++ // miss a wakeup. ++ let mut preempt = ++ PreemptGuardL1::new(¤t_context, &mut state_token); ++ let token = preempt.token(); ++ ++ self.todo.send_locked( ++ Sqe { ++ opcode: Opcode::Cancel as u8, ++ sqe_flags: SqeFlags::ONEWAY, ++ tag: sqe.tag, ++ ..Default::default() ++ }, ++ token.token(), ++ ); ++ event::trigger_locked( ++ self.root_id, ++ self.scheme_id.get(), ++ EVENT_READ, ++ token.token(), ++ ); ++ ++ // 1. If cancellation was requested and arrived ++ // before the scheme processed the request, an ++ // acknowledgement will be sent back after the ++ // cancellation is processed and we will be woken up ++ // again. State will be State::Responded then. ++ // ++ // 2. If cancellation was requested but the scheme ++ // already processed the request, we will receive ++ // the actual response next and woken up again. ++ // State will be State::Responded then. ++ context::current() ++ .write(token.token()) ++ .block("UserInner::call (spurious wakeup)"); ++ } + } + + // invalid state +@@ -368,7 +401,67 @@ impl UserInner { + } + }, + } ++ ++ if let Some(descriptions) = timed_out_descriptions { ++ drop(states); ++ for desc in descriptions { ++ let _ = desc.try_close(token); ++ } ++ } ++ ++ if timed_out { ++ return Err(Error::new(ETIMEDOUT)); ++ } ++ } ++ } ++ } ++ ++ fn collect_descriptions_to_close( ++ fds: Vec>, ++ ) -> Vec { ++ fds.into_iter() ++ .filter_map(|fd| Arc::try_unwrap(fd).ok()) ++ .map(RwLock::into_inner) ++ .collect() ++ } ++ ++ pub fn fail_pending_calls(&self, token: &mut CleanLockToken) { ++ let descriptions_to_close = { ++ let mut states_lock = self.states.lock(token.token()); ++ let (states, mut lock_token) = states_lock.token_split(); ++ let mut descriptions_to_close = Vec::new(); ++ let mut states_to_remove = Vec::new(); ++ ++ for (id, state) in states.iter_mut() { ++ match mem::replace(state, State::Placeholder) { ++ State::Waiting { context, fds, .. } => { ++ descriptions_to_close.extend(Self::collect_descriptions_to_close(fds)); ++ ++ match context.upgrade() { ++ Some(context) => { ++ *state = State::Responded(Response::Regular( ++ Err(Error::new(ENODEV)), ++ 0, ++ false, ++ )); ++ context.write(lock_token.token()).unblock(); ++ } ++ None => states_to_remove.push(id), ++ } ++ } ++ old_state => *state = old_state, ++ } ++ } ++ ++ for id in states_to_remove { ++ states.remove(id); ++ } ++ ++ descriptions_to_close ++ }; ++ ++ for desc in descriptions_to_close { ++ let _ = desc.try_close(token); ++ } + } + } + } +@@ -1283,6 +1376,7 @@ impl UserInner { + } + + pub fn into_drop(self, token: &mut CleanLockToken) { ++ self.fail_pending_calls(token); + self.todo.condition.into_drop(token); + } + } +diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs +index bf98464..10c6a92 100644 +--- a/src/syscall/fs.rs ++++ b/src/syscall/fs.rs +@@ -12,7 +12,7 @@ use crate::{ + memory::{AddrSpace, GenericFlusher, Grant, PageSpan, TlbShootdownActions}, + }, + memory::{Page, VirtualAddress, PAGE_SIZE}, +- scheme::{self, FileHandle, KernelScheme, OpenResult, StrOrBytes}, ++ scheme::{FileHandle, KernelScheme, OpenResult, StrOrBytes}, + sync::{CleanLockToken, RwLock}, + syscall::{data::Stat, error::*, flag::*}, + }; +@@ -45,7 +45,7 @@ pub fn file_op_generic_ext( + (file, desc) + }; + +- let scheme = scheme::get_scheme(token.token(), desc.scheme)?; ++ let scheme = desc.get_scheme(token)?; + + op(&*scheme, file.description, desc, token) + } +@@ -73,14 +73,18 @@ pub fn openat( + ) -> Result { + let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; + +- let (scheme_id, number) = { ++ let desc = { + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); +- let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?; +- let desc = pipe.description.read(token.token()); +- (desc.scheme, desc.number) ++ let (context, mut context_token) = current.token_split(); ++ let pipe = context ++ .get_file(fh, &mut context_token) ++ .ok_or(Error::new(EBADF))?; ++ *pipe.description.read(context_token.token()) + }; ++ let scheme = desc.get_scheme(token)?; ++ let number = desc.number; ++ let scheme_id = desc.scheme; + + let caller_ctx = context::current() + .read(token.token()) +@@ -88,8 +92,6 @@ pub fn openat( + .filter_uid_gid(euid, egid); + + let new_description = { +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; +- + let res = scheme.kopenat( + number, + StrOrBytes::from_str(&path_buf), +@@ -101,13 +103,14 @@ pub fn openat( + + match res? { + OpenResult::SchemeLocal(number, internal_flags) => { +- Arc::new(RwLock::new(FileDescription { +- offset: 0, +- internal_flags, +- scheme: scheme_id, ++ Arc::new(RwLock::new(FileDescription::new( ++ scheme_id, + number, +- flags: (flags & !O_CLOEXEC) as u32, +- })) ++ 0, ++ (flags & !O_CLOEXEC) as u32, ++ internal_flags, ++ token, ++ ))) + } + OpenResult::External(desc) => desc, + } +@@ -137,16 +140,17 @@ pub fn unlinkat( + ) -> Result<()> { + let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; + +- let (number, scheme_id) = { ++ let desc = { + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); +- let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?; +- let desc = pipe.description.read(token.token()); +- (desc.number, desc.scheme) ++ let (context, mut context_token) = current.token_split(); ++ let pipe = context ++ .get_file(fh, &mut context_token) ++ .ok_or(Error::new(EBADF))?; ++ *pipe.description.read(context_token.token()) + }; +- +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; ++ let number = desc.number; ++ let scheme = desc.get_scheme(token)?; + + let caller_ctx = context::current() + .read(token.token()) +@@ -199,17 +203,18 @@ fn duplicate_file( + let description = { *file.description.read(token.token()) }; + + let new_description = { +- let scheme = scheme::get_scheme(token.token(), description.scheme)?; ++ let scheme = description.get_scheme(token)?; + + match scheme.kdup(description.number, user_buf, caller_ctx, token)? { + OpenResult::SchemeLocal(number, internal_flags) => { +- Arc::new(RwLock::new(FileDescription { +- offset: 0, +- internal_flags, +- scheme: description.scheme, ++ Arc::new(RwLock::new(FileDescription::new( ++ description.scheme, + number, +- flags: description.flags, +- })) ++ 0, ++ description.flags, ++ internal_flags, ++ token, ++ ))) + } + OpenResult::External(desc) => desc, + } +@@ -296,11 +301,10 @@ fn call_normal( + } + .ok_or(Error::new(EBADF))?; + +- let (scheme_id, number) = { +- let desc = file.description.read(token.token()); +- (desc.scheme, desc.number) ++ let (scheme, number) = { ++ let desc = *file.description.read(token.token()); ++ (desc.get_scheme(token)?, desc.number) + }; +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; + + if flags.contains(CallFlags::STD_FS) { + scheme.translate_std_fs_call(number, file.description, payload, flags, metadata, token) +@@ -341,28 +345,28 @@ fn fdwrite_inner( + ) -> Result { + // TODO: Ensure deadlocks can't happen + let (scheme, number, descs_to_send) = { +- let (scheme, number) = { ++ let desc = { + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); ++ let (context, mut context_token) = current.token_split(); + let file_descriptor = context +- .get_file(socket, &mut token) ++ .get_file(socket, &mut context_token) + .ok_or(Error::new(EBADF))?; +- let desc = &file_descriptor.description.read(token.token()); +- (desc.scheme, desc.number) ++ *file_descriptor.description.read(context_token.token()) + }; +- let scheme = scheme::get_scheme(token.token(), scheme)?; ++ let scheme = desc.get_scheme(token)?; ++ let number = desc.number; + + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); ++ let (context, mut context_token) = current.token_split(); + ( + scheme, + number, + if flags.contains(CallFlags::FD_CLONE) { +- context.bulk_get_files(&target_fds, &mut token) ++ context.bulk_get_files(&target_fds, &mut context_token) + } else { +- context.bulk_remove_files(&target_fds, &mut token) ++ context.bulk_remove_files(&target_fds, &mut context_token) + }? + .into_iter() + .map(|f| f.description) +@@ -395,18 +399,22 @@ fn call_fdread( + metadata: &[u64], + token: &mut CleanLockToken, + ) -> Result { ++ let desc = { ++ let current_lock = context::current(); ++ let mut current = current_lock.read(token.token()); ++ let (context, mut context_token) = current.token_split(); ++ let file_descriptor = context ++ .get_file(fd, &mut context_token) ++ .ok_or(Error::new(EBADF))?; ++ *file_descriptor.description.read(context_token.token()) ++ }; + let (scheme, number) = { +- let (scheme, number) = { +- let current_lock = context::current(); +- let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); +- let file_descriptor = context.get_file(fd, &mut token).ok_or(Error::new(EBADF))?; +- let desc = file_descriptor.description.read(token.token()); +- (desc.scheme, desc.number) +- }; +- let scheme = scheme::get_scheme(token.token(), scheme)?; +- +- (scheme, number) ++ let scheme = desc.get_scheme(token)?; ++ let number = desc.number; ++ ( ++ scheme, ++ number, ++ ) + }; + + scheme.kfdread(number, payload, flags, metadata, token) +@@ -440,9 +448,9 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken) + } + .ok_or(Error::new(EBADF))?; + +- let (scheme_id, number, flags) = { +- let desc = file.description.write(token.token()); +- (desc.scheme, desc.number, desc.flags) ++ let (number, flags, desc) = { ++ let desc = *file.description.read(token.token()); ++ (desc.number, desc.flags, desc) + }; + + if cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC { +@@ -460,7 +468,7 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken) + + // Communicate fcntl with scheme + if cmd != F_GETFD && cmd != F_SETFD { +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; ++ let scheme = desc.get_scheme(token)?; + + scheme.fcntl(number, cmd, arg, token)?; + }; +@@ -518,13 +526,11 @@ pub fn flink(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken) + let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; + let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; + +- let (number, scheme_id) = { +- let desc = file.description.read(token.token()); +- (desc.number, desc.scheme) ++ let (number, scheme) = { ++ let desc = *file.description.read(token.token()); ++ (desc.number, desc.get_scheme(token)?) + }; + +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; +- + // TODO: Check EXDEV. + /* + if scheme_id != description.scheme { +@@ -554,13 +560,11 @@ pub fn frename(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken + let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; + let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; + +- let (number, scheme_id) = { +- let desc = file.description.read(token.token()); +- (desc.number, desc.scheme) ++ let (number, scheme) = { ++ let desc = *file.description.read(token.token()); ++ (desc.number, desc.get_scheme(token)?) + }; + +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; +- + // TODO: Check EXDEV. + /* + if scheme_id != description.scheme { +diff --git a/src/syscall/process.rs b/src/syscall/process.rs +index e83da42..78eed9d 100644 +--- a/src/syscall/process.rs ++++ b/src/syscall/process.rs +@@ -271,23 +274,26 @@ unsafe fn bootstrap_mem(bootstrap: &crate::startup::Bootstrap) -> &'static [u8] + } + + fn insert_fd(scheme: SchemeId, number: usize, cloexec: bool, token: &mut CleanLockToken) -> usize { ++ let description = Arc::new(RwLock::new(FileDescription::new( ++ scheme, ++ number, ++ 0, ++ (O_CREAT | O_RDWR) as u32, ++ InternalFlags::empty(), ++ token, ++ ))); ++ + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); ++ let (context, mut context_token) = current.token_split(); + context + .add_file_min( + FileDescriptor { +- description: Arc::new(RwLock::new(FileDescription { +- scheme, +- number, +- offset: 0, +- flags: (O_CREAT | O_RDWR) as u32, +- internal_flags: InternalFlags::empty(), +- })), ++ description, + cloexec, + }, + syscall::flag::UPPER_FDTBL_TAG + scheme.get(), +- &mut token, ++ &mut context_token, + ) + .expect("failed to insert fd to current context") + .get() diff --git a/local/patches/kernel/P5-boot-path-hardening.patch b/local/patches/kernel/P5-boot-path-hardening.patch new file mode 100644 index 00000000..c676d3d8 --- /dev/null +++ b/local/patches/kernel/P5-boot-path-hardening.patch @@ -0,0 +1,422 @@ +diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs +index 4dc2388..f472c08 100644 +--- a/src/acpi/madt/arch/x86.rs ++++ b/src/acpi/madt/arch/x86.rs +@@ -18,6 +18,7 @@ use crate::{ + + use super::{Madt, MadtEntry}; + ++const AP_SPIN_LIMIT: u32 = 1_000_000; + const TRAMPOLINE: usize = 0x8000; + static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline")); + +@@ -42,13 +43,17 @@ pub(super) fn init(madt: Madt) { + //TODO: do not have writable and executable! + let mut mapper = KernelMapper::lock_rw(); + +- let result = mapper +- .map_phys( +- trampoline_page.start_address(), +- trampoline_frame.base(), +- PageFlags::new().execute(true).write(true), +- ) +- .expect("failed to map trampoline"); ++ let result = match mapper.map_phys( ++ trampoline_page.start_address(), ++ trampoline_frame.base(), ++ PageFlags::new().execute(true).write(true), ++ ) { ++ Some(result) => result, ++ None => { ++ println!("KERNEL AP: failed to map trampoline page, AP bring-up disabled"); ++ return; ++ } ++ }; + + (result, mapper.table().phys().data()) + }; +@@ -72,17 +77,27 @@ pub(super) fn init(madt: Madt) { + if u32::from(ap_local_apic.id) == me.get() { + debug!(" This is my local APIC"); + } else if ap_local_apic.flags & 1 == 1 { +- let cpu_id = LogicalCpuId::next(); +- + // Allocate a stack +- let stack_start = RmmA::phys_to_virt( +- allocate_p2frame(4) +- .expect("no more frames in acpi stack_start") +- .base(), +- ) +- .data(); ++ let alloc = match allocate_p2frame(4) { ++ Some(frame) => frame, ++ None => { ++ println!("KERNEL AP: CPU {} no memory for stack, skipping", ap_local_apic.id); ++ continue; ++ } ++ }; ++ let stack_start = RmmA::phys_to_virt(alloc.base()).data(); + let stack_end = stack_start + (PAGE_SIZE << 4); + ++ let next_cpu = crate::CPU_COUNT.load(Ordering::Relaxed); ++ if next_cpu >= crate::cpu_set::MAX_CPU_COUNT { ++ println!( ++ "KERNEL AP: CPU {} exceeds logical CPU limit, skipping", ++ ap_local_apic.id ++ ); ++ continue; ++ } ++ let cpu_id = LogicalCpuId::new(next_cpu); ++ + let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end); + + let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id); +@@ -137,13 +152,34 @@ pub(super) fn init(madt: Madt) { + local_apic.set_icr(icr); + } + +- // Wait for trampoline ready +- while unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } == 0 { ++ // Wait for trampoline ready with timeout ++ let mut trampoline_ready = false; ++ for _ in 0..AP_SPIN_LIMIT { ++ if unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } != 0 { ++ trampoline_ready = true; ++ break; ++ } + hint::spin_loop(); + } +- while !AP_READY.load(Ordering::SeqCst) { ++ if !trampoline_ready { ++ println!("KERNEL AP: CPU {} trampoline timeout, skipping", ap_local_apic.id); ++ continue; ++ } ++ ++ let mut kernel_ready = false; ++ for _ in 0..AP_SPIN_LIMIT { ++ if AP_READY.load(Ordering::SeqCst) { ++ kernel_ready = true; ++ break; ++ } + hint::spin_loop(); + } ++ if !kernel_ready { ++ println!("KERNEL AP: CPU {} AP_READY timeout, skipping", ap_local_apic.id); ++ continue; ++ } ++ ++ crate::CPU_COUNT.fetch_add(1, Ordering::Relaxed); + + RmmA::invalidate_all(); + } +@@ -151,10 +187,12 @@ pub(super) fn init(madt: Madt) { + } + + // Unmap trampoline +- let (_frame, _, flush) = unsafe { ++ if let Some((_frame, _, flush)) = unsafe { + KernelMapper::lock_rw() + .unmap_phys(trampoline_page.start_address()) +- .expect("failed to unmap trampoline page") +- }; +- flush.flush(); ++ } { ++ flush.flush(); ++ } else { ++ println!("KERNEL AP: failed to unmap trampoline page (non-fatal)"); ++ } + } +diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs +index 4fdb0ba..aaa7196 100644 +--- a/src/allocator/mod.rs ++++ b/src/allocator/mod.rs +@@ -7,26 +7,40 @@ mod linked_list; + /// Size of kernel heap + const KERNEL_HEAP_SIZE: usize = ::rmm::MEGABYTE; + ++#[cold] ++fn halt_kernel_heap_init(message: &str) -> ! { ++ print!("{message}"); ++ println!("Kernel heap initialization cannot continue. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++} ++ + unsafe fn map_heap(mapper: &mut KernelMapper, offset: usize, size: usize) { + let mut flush_all = PageFlushAll::new(); + + let heap_start_page = Page::containing_address(VirtualAddress::new(offset)); + let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size - 1)); + for page in Page::range_inclusive(heap_start_page, heap_end_page) { +- let phys = mapper +- .allocator_mut() +- .allocate_one() +- .expect("failed to allocate kernel heap"); ++ let phys = match mapper.allocator_mut().allocate_one() { ++ Some(phys) => phys, ++ None => halt_kernel_heap_init( ++ "FATAL: failed to allocate physical frame for kernel heap\n", ++ ), ++ }; + let flush = unsafe { +- mapper +- .map_phys( +- page.start_address(), +- phys, +- PageFlags::new() +- .write(true) +- .global(cfg!(not(feature = "pti"))), +- ) +- .expect("failed to map kernel heap") ++ match mapper.map_phys( ++ page.start_address(), ++ phys, ++ PageFlags::new() ++ .write(true) ++ .global(cfg!(not(feature = "pti"))), ++ ) { ++ Some(flush) => flush, ++ None => halt_kernel_heap_init( ++ "FATAL: failed to map kernel heap virtual page\n", ++ ), ++ } + }; + flush_all.consume(flush); + } +diff --git a/src/arch/x86_shared/gdt.rs b/src/arch/x86_shared/gdt.rs +index cad344f..f7acae3 100644 +--- a/src/arch/x86_shared/gdt.rs ++++ b/src/arch/x86_shared/gdt.rs +@@ -192,6 +192,15 @@ impl ProcessorControlRegion { + } + } + ++#[cold] ++fn halt_pcr_init() -> ! { ++ println!("FATAL: failed to allocate physical memory for Processor Control Region"); ++ println!("Processor startup cannot continue. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++} ++ + pub unsafe fn pcr() -> *mut ProcessorControlRegion { + unsafe { + // Primitive benchmarking of RDFSBASE and RDGSBASE in userspace, appears to indicate that +@@ -375,7 +384,10 @@ pub fn allocate_and_init_pcr( + .next_power_of_two() + .trailing_zeros(); + +- let pcr_frame = crate::memory::allocate_p2frame(alloc_order).expect("failed to allocate PCR"); ++ let pcr_frame = match crate::memory::allocate_p2frame(alloc_order) { ++ Some(frame) => frame, ++ None => halt_pcr_init(), ++ }; + let pcr_ptr = RmmA::phys_to_virt(pcr_frame.base()).data() as *mut ProcessorControlRegion; + unsafe { core::ptr::write(pcr_ptr, ProcessorControlRegion::new_partial_init(cpu_id)) }; + +diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs +index 5006458..47f692f 100644 +--- a/src/arch/x86_shared/idt.rs ++++ b/src/arch/x86_shared/idt.rs +@@ -78,6 +78,15 @@ static INIT_BSP_IDT: SyncUnsafeCell = SyncUnsafeCell::new(Idt::new()); + pub(crate) static IDTS: RwLock> = + RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new())); + ++#[cold] ++fn halt_idt_init() -> ! { ++ println!("FATAL: failed to allocate physical pages for backup interrupt stack"); ++ println!("Interrupt setup cannot continue. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++} ++ + #[inline] + pub fn is_reserved(cpu_id: LogicalCpuId, index: u8) -> bool { + if cpu_id == LogicalCpuId::BSP { +@@ -161,8 +170,10 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt { + .or_insert_with(|| Box::leak(Box::new(Idt::new()))); + + use crate::memory::{RmmA, RmmArch}; +- let frames = crate::memory::allocate_p2frame(4) +- .expect("failed to allocate pages for backup interrupt stack"); ++ let frames = match crate::memory::allocate_p2frame(4) { ++ Some(frames) => frames, ++ None => halt_idt_init(), ++ }; + + // Physical pages are mapped linearly. So is the linearly mapped virtual memory. + let base_address = RmmA::phys_to_virt(frames.base()); +diff --git a/src/memory/mod.rs b/src/memory/mod.rs +index 393ae7e..b4a1aa3 100644 +--- a/src/memory/mod.rs ++++ b/src/memory/mod.rs +@@ -754,7 +754,8 @@ pub fn init_mm(allocator: BumpAllocator) { + init_sections(allocator); + + unsafe { +- let the_frame = allocate_frame().expect("failed to allocate static zeroed frame"); ++ let the_frame = allocate_frame() ++ .expect("KERNEL MEM: failed to allocate static zeroed frame during init_mm - physical memory exhausted"); + let the_info = get_page_info(the_frame).expect("static zeroed frame had no PageInfo"); + the_info + .refcount +@@ -1027,9 +1028,13 @@ pub fn page_fault_handler( + let mut token = unsafe { CleanLockToken::new() }; + match context::memory::try_correcting_page_tables(faulting_page, mode, &mut token) { + Ok(()) => return Ok(()), +- Err(PfError::Oom) => todo!("oom"), ++ Err(PfError::Oom) => { ++ debug!("KERNEL PF: OOM during page table correction for {:#x}", faulting_address.data()); ++ } + Err(PfError::Segv | PfError::RecursionLimitExceeded) => (), +- Err(PfError::NonfatalInternalError) => todo!(), ++ Err(PfError::NonfatalInternalError) => { ++ debug!("KERNEL PF: internal error during page table correction for {:#x}", faulting_address.data()); ++ } + } + } + +@@ -1038,6 +1043,17 @@ pub fn page_fault_handler( + return Ok(()); + } + ++ debug!( ++ "KERNEL PF: addr={:#x} ip={:#x} mode={:?} kernel={} user={} write={} instr={}", ++ faulting_address.data(), ++ stack.ip(), ++ mode, ++ caused_by_kernel, ++ caused_by_user, ++ caused_by_write, ++ caused_by_instr_fetch, ++ ); ++ + Err(Segv) + } + static THE_ZEROED_FRAME: SyncUnsafeCell> = +diff --git a/src/startup/memory.rs b/src/startup/memory.rs +index 26922dd..f271200 100644 +--- a/src/startup/memory.rs ++++ b/src/startup/memory.rs +@@ -323,7 +323,16 @@ unsafe fn map_memory(areas: &[MemoryArea], mut bump_allocator: &mut Bum + } + } + +- let kernel_area = (*MEMORY_MAP.get()).kernel().unwrap(); ++ let kernel_area = match (*MEMORY_MAP.get()).kernel() { ++ Some(area) => area, ++ None => { ++ println!("FATAL: kernel memory area not found in boot memory map"); ++ println!("Cannot determine kernel base address. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++ } ++ }; + let kernel_base = kernel_area.start; + let kernel_size = kernel_area.end.saturating_sub(kernel_area.start); + // Map kernel at KERNEL_OFFSET +diff --git a/src/startup/mod.rs b/src/startup/mod.rs +index 8ad3cdf..86aabc2 100644 +--- a/src/startup/mod.rs ++++ b/src/startup/mod.rs +@@ -149,6 +149,15 @@ static BOOTSTRAP: spin::Once = spin::Once::new(); + pub(crate) static AP_READY: AtomicBool = AtomicBool::new(false); + static BSP_READY: AtomicBool = AtomicBool::new(false); + ++#[cold] ++fn halt_boot(message: &str) -> ! { ++ print!("{message}"); ++ println!("Kernel boot cannot continue. Halting."); ++ loop { ++ hint::spin_loop(); ++ } ++} ++ + /// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this + pub(crate) fn kmain(bootstrap: Bootstrap) -> ! { + let mut token = unsafe { CleanLockToken::new() }; +@@ -180,9 +189,7 @@ pub(crate) fn kmain(bootstrap: Bootstrap) -> ! { + context.euid = 0; + context.egid = 0; + } +- Err(err) => { +- panic!("failed to spawn userspace_init: {:?}", err); +- } ++ Err(_err) => halt_boot("FATAL: failed to spawn first userspace process userspace_init\n"), + } + + run_userspace(&mut token) +diff --git a/src/syscall/process.rs b/src/syscall/process.rs +index e83da42..084b64e 100644 +--- a/src/syscall/process.rs ++++ b/src/syscall/process.rs +@@ -33,6 +33,8 @@ pub fn exit_this_context(excp: Option, token: &mut CleanLock + let mut close_files; + let addrspace_opt; + ++ super::futex::cleanup_current_robust_futexes(token); ++ + let context_lock = context::current(); + { + let mut context = context_lock.write(token.token()); +@@ -44,6 +46,7 @@ pub fn exit_this_context(excp: Option, token: &mut CleanLock + addrspace_opt = context + .set_addr_space(None, token.downgrade()) + .and_then(|a| Arc::try_unwrap(a).ok()); ++ context.robust_list_head = None; + drop(mem::replace(&mut context.syscall_head, SyscallFrame::Dummy)); + drop(mem::replace(&mut context.syscall_tail, SyscallFrame::Dummy)); + } +@@ -244,7 +247,11 @@ pub unsafe fn usermode_bootstrap(bootstrap: &Bootstrap, token: &mut CleanLockTok + .copy_from_slice(bootstrap_slice) + .expect("failed to copy memory to bootstrap"); + +- let bootstrap_entry = u64::from_le_bytes(bootstrap_slice[0x1a..0x22].try_into().unwrap()); ++ let bootstrap_entry = if bootstrap_slice.len() >= 0x22 { ++ u64::from_le_bytes(bootstrap_slice[0x1a..0x22].try_into().unwrap()) ++ } else { ++ panic!("KERNEL BOOT: bootstrap initfs too small ({} bytes, need at least 34) - cannot determine entry point", bootstrap_slice.len()); ++ }; + debug!("Bootstrap entry point: {:X}", bootstrap_entry); + assert_ne!(bootstrap_entry, 0); + +@@ -271,23 +278,26 @@ unsafe fn bootstrap_mem(bootstrap: &crate::startup::Bootstrap) -> &'static [u8] + } + + fn insert_fd(scheme: SchemeId, number: usize, cloexec: bool, token: &mut CleanLockToken) -> usize { ++ let description = Arc::new(RwLock::new(FileDescription::new( ++ scheme, ++ number, ++ 0, ++ (O_CREAT | O_RDWR) as u32, ++ InternalFlags::empty(), ++ token, ++ ))); ++ + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); ++ let (context, mut context_token) = current.token_split(); + context + .add_file_min( + FileDescriptor { +- description: Arc::new(RwLock::new(FileDescription { +- scheme, +- number, +- offset: 0, +- flags: (O_CREAT | O_RDWR) as u32, +- internal_flags: InternalFlags::empty(), +- })), ++ description, + cloexec, + }, + syscall::flag::UPPER_FDTBL_TAG + scheme.get(), +- &mut token, ++ &mut context_token, + ) + .expect("failed to insert fd to current context") + .get() diff --git a/local/patches/libwayland/redox.patch b/local/patches/libwayland/redox.patch index 8401c16e..5aa9ffdd 100644 --- a/local/patches/libwayland/redox.patch +++ b/local/patches/libwayland/redox.patch @@ -1,71 +1,36 @@ -diff -uNr wayland-1.24.0/meson.build source/meson.build ---- clean-wayland/meson.build 2025-07-06 13:11:26.000000000 +0100 -+++ patched-wayland/meson.build 2026-04-29 18:06:17.250269176 +0100 -@@ -80,8 +80,8 @@ - ffi_dep = dependency('libffi') - - decls = [ -- { 'header': 'sys/signalfd.h', 'symbol': 'SFD_CLOEXEC' }, -- { 'header': 'sys/timerfd.h', 'symbol': 'TFD_CLOEXEC' }, -+ { 'header': 'signal.h', 'symbol': 'SIG_BLOCK' }, -+ { 'header': 'time.h', 'symbol': 'CLOCK_MONOTONIC' }, - { 'header': 'time.h', 'symbol': 'CLOCK_MONOTONIC' }, - ] - -diff -uNr wayland-1.24.0/src/connection.c source/src/connection.c ---- clean-wayland/src/connection.c 2025-07-06 13:11:26.000000000 +0100 -+++ patched-wayland/src/connection.c 2026-04-29 18:06:00.540467363 +0100 -@@ -40,6 +40,17 @@ +--- a/b/src/connection.c 2025-07-06 13:11:26.000000000 +0100 ++++ b/src/connection.c 2026-05-01 00:15:42.778777823 +0100 +@@ -40,6 +40,12 @@ #include #include +#ifndef MSG_NOSIGNAL +#define MSG_NOSIGNAL 0 +#endif -+#include -+#include -+#ifndef _RELIBC_STDIO_H -+static FILE *open_memstream(char **bufp, size_t *sizep) { -+ *bufp = NULL; *sizep = 0; return NULL; -+} -+#endif ++ ++extern FILE *open_memstream(char **bufp, size_t *sizep); + #include "wayland-util.h" #include "wayland-private.h" #include "wayland-os.h" -diff -uNr wayland-1.24.0/src/event-loop.c source/src/event-loop.c ---- clean-wayland/src/event-loop.c 2025-07-06 13:11:26.000000000 +0100 -+++ patched-wayland/src/event-loop.c 2026-04-29 18:05:40.684702870 +0100 -@@ -35,9 +35,58 @@ +--- a/b/src/event-loop.c 2025-07-06 13:11:26.000000000 +0100 ++++ b/src/event-loop.c 2026-05-01 00:15:42.778845239 +0100 +@@ -35,9 +35,43 @@ #include #include #include -#include -#include -+/* #include */ -+/* #include */ #include -+ -+#ifndef SFD_CLOEXEC ++/* Redox: relibc declares signalfd/timerfd in headers but has no implementation. ++ Provide inline implementations via Redox schemes. */ +#define SFD_CLOEXEC O_CLOEXEC -+#endif -+#ifndef SFD_NONBLOCK +#define SFD_NONBLOCK O_NONBLOCK -+#endif -+#ifndef TFD_CLOEXEC +#define TFD_CLOEXEC O_CLOEXEC -+#endif -+#ifndef TFD_NONBLOCK +#define TFD_NONBLOCK O_NONBLOCK -+#endif -+#ifndef TFD_TIMER_ABSTIME +#define TFD_TIMER_ABSTIME 0x1 -+#endif -+ -+#ifndef _RELIBC_SIGNAL_H +struct signalfd_siginfo { uint8_t pad[128]; }; -+#endif -+static int signalfd_impl(int fd, const sigset_t *mask, uintptr_t masksize, int flags) { ++static int signalfd(int fd, const sigset_t *mask, int flags) { + int oflag = O_RDWR; + if (flags & SFD_CLOEXEC) oflag |= O_CLOEXEC; + if (flags & SFD_NONBLOCK) oflag |= O_NONBLOCK; @@ -74,7 +39,6 @@ diff -uNr wayland-1.24.0/src/event-loop.c source/src/event-loop.c + sigprocmask(SIG_BLOCK, mask, NULL); + return fd; +} -+int signalfd(int fd, const sigset_t *mask, uintptr_t masksize) { return signalfd_impl(fd, mask, masksize, 0); } +static int timerfd_create(int clockid, int flags) { + int oflag = O_RDWR; + if (flags & TFD_CLOEXEC) oflag |= O_CLOEXEC; @@ -83,12 +47,12 @@ diff -uNr wayland-1.24.0/src/event-loop.c source/src/event-loop.c + snprintf(path, sizeof(path), "/scheme/time/%d", clockid); + return open(path, oflag); +} -+int timerfd_settime(int fd, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) { ++static int timerfd_settime(int fd, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) { + if (new_value == NULL) { errno = EFAULT; return -1; } + ssize_t r = write(fd, &new_value->it_value, sizeof(struct timespec)); + return (r == sizeof(struct timespec)) ? 0 : -1; +} -+int timerfd_gettime(int fd, struct itimerspec *curr) { ++static int timerfd_gettime(int fd, struct itimerspec *curr) { + if (curr == NULL) { errno = EFAULT; return -1; } + curr->it_interval = (struct timespec){0}; + ssize_t r = read(fd, &curr->it_value, sizeof(struct timespec)); @@ -97,28 +61,25 @@ diff -uNr wayland-1.24.0/src/event-loop.c source/src/event-loop.c #include "timespec-util.h" #include "wayland-util.h" #include "wayland-private.h" -diff -uNr wayland-1.24.0/src/meson.build source/src/meson.build ---- clean-wayland/src/meson.build 2025-07-06 13:11:26.000000000 +0100 -+++ patched-wayland/src/meson.build 2026-04-29 18:06:30.380113462 +0100 +--- a/b/src/meson.build 2025-07-06 13:11:26.000000000 +0100 ++++ b/src/meson.build 2026-05-01 00:15:42.778925799 +0100 @@ -81,8 +81,7 @@ endif if meson.is_cross_build() or not get_option('scanner') - scanner_dep = dependency('wayland-scanner', native: true, version: meson.project_version()) - wayland_scanner_for_build = find_program(scanner_dep.get_variable(pkgconfig: 'wayland_scanner')) -+wayland_scanner_for_build = find_program('wayland-scanner', native: true) ++ wayland_scanner_for_build = find_program('wayland-scanner', native: true) else wayland_scanner_for_build = wayland_scanner endif -diff -uNr wayland-1.24.0/src/wayland-server.c source/src/wayland-server.c ---- clean-wayland/src/wayland-server.c 2025-07-06 13:11:26.000000000 +0100 -+++ patched-wayland/src/wayland-server.c 2026-04-29 18:06:46.390923573 +0100 -@@ -39,7 +39,24 @@ +--- a/b/src/wayland-server.c 2025-07-06 13:11:26.000000000 +0100 ++++ b/src/wayland-server.c 2026-05-01 00:15:42.779083803 +0100 +@@ -39,7 +39,23 @@ #include #include #include -#include -+/* #include */ +#ifndef EFD_CLOEXEC +#define EFD_CLOEXEC O_CLOEXEC +#endif diff --git a/local/patches/relibc/P3-pthread-nulstr-sched-stdint.patch b/local/patches/relibc/P3-pthread-nulstr-sched-stdint.patch new file mode 100644 index 00000000..459cda89 --- /dev/null +++ b/local/patches/relibc/P3-pthread-nulstr-sched-stdint.patch @@ -0,0 +1,359 @@ +diff --git a/src/header/pthread/mod.rs b/src/header/pthread/mod.rs +index c742a42..03c4043 100644 +--- a/src/header/pthread/mod.rs ++++ b/src/header/pthread/mod.rs +@@ -3,15 +3,26 @@ + //! See . + + use alloc::collections::LinkedList; +-use core::{cell::Cell, ptr::NonNull}; ++use core::{cell::Cell, mem::size_of, ptr::NonNull}; ++ ++#[cfg(target_os = "redox")] ++use redox_rt::proc::FdGuard; ++#[cfg(target_os = "linux")] ++use sc::syscall; ++#[cfg(target_os = "redox")] ++use syscall; + + use crate::{ + error::Errno, +- header::{bits_timespec::timespec, sched::*}, ++ header::{ ++ bits_timespec::timespec, ++ errno::{EINVAL, ERANGE}, ++ sched::*, ++ }, + platform::{ + Pal, Sys, + types::{ +- c_int, c_uchar, c_uint, c_void, clockid_t, pthread_attr_t, pthread_barrier_t, ++ c_char, c_int, c_uchar, c_uint, c_void, clockid_t, pthread_attr_t, pthread_barrier_t, + pthread_barrierattr_t, pthread_cond_t, pthread_condattr_t, pthread_key_t, + pthread_mutex_t, pthread_mutexattr_t, pthread_once_t, pthread_rwlock_t, + pthread_rwlockattr_t, pthread_spinlock_t, pthread_t, size_t, +@@ -20,6 +31,9 @@ use crate::{ + pthread, + }; + ++#[cfg(target_os = "linux")] ++use crate::platform::sys::e_raw; ++ + pub fn e(result: Result<(), Errno>) -> i32 { + match result { + Ok(()) => 0, +@@ -27,6 +41,96 @@ pub fn e(result: Result<(), Errno>) -> i32 { + } + } + ++const RLCT_AFFINITY_BYTES: usize = size_of::(); ++const RLCT_MAX_AFFINITY_CPUS: usize = u64::BITS as usize; ++ ++fn cpuset_bytes<'a>(cpusetsize: size_t, cpuset: *const cpu_set_t) -> Result<&'a [u8], Errno> { ++ if cpuset.is_null() || !(RLCT_AFFINITY_BYTES..=size_of::()).contains(&cpusetsize) { ++ return Err(Errno(EINVAL)); ++ } ++ ++ Ok(unsafe { core::slice::from_raw_parts(cpuset.cast::(), cpusetsize) }) ++} ++ ++fn cpuset_bytes_mut<'a>(cpusetsize: size_t, cpuset: *mut cpu_set_t) -> Result<&'a mut [u8], Errno> { ++ if cpuset.is_null() || !(RLCT_AFFINITY_BYTES..=size_of::()).contains(&cpusetsize) { ++ return Err(Errno(EINVAL)); ++ } ++ ++ Ok(unsafe { core::slice::from_raw_parts_mut(cpuset.cast::(), cpusetsize) }) ++} ++ ++fn cpuset_to_u64(cpusetsize: size_t, cpuset: *const cpu_set_t) -> Result { ++ let bytes = cpuset_bytes(cpusetsize, cpuset)?; ++ let mut mask = 0_u64; ++ ++ for (byte_index, byte) in bytes.iter().copied().enumerate() { ++ for bit in 0..u8::BITS as usize { ++ if byte & (1 << bit) == 0 { ++ continue; ++ } ++ ++ let cpu = byte_index * u8::BITS as usize + bit; ++ if cpu >= RLCT_MAX_AFFINITY_CPUS { ++ return Err(Errno(EINVAL)); ++ } ++ ++ mask |= 1_u64 << cpu; ++ } ++ } ++ ++ Ok(mask) ++} ++ ++fn copy_u64_to_cpuset(mask: u64, cpusetsize: size_t, cpuset: *mut cpu_set_t) -> Result<(), Errno> { ++ let bytes = cpuset_bytes_mut(cpusetsize, cpuset)?; ++ bytes.fill(0); ++ ++ for (byte_index, dst) in bytes.iter_mut().take(RLCT_AFFINITY_BYTES).enumerate() { ++ *dst = (mask >> (byte_index * u8::BITS as usize)) as u8; ++ } ++ ++ Ok(()) ++} ++ ++#[cfg(target_os = "redox")] ++fn redox_set_thread_affinity(thread: &pthread::Pthread, mask: u64) -> Result<(), Errno> { ++ let mut kernel_cpuset = cpu_set_t::default(); ++ kernel_cpuset.__bits[0] = mask; ++ ++ let handle = FdGuard::new(unsafe { ++ syscall::dup(thread.os_tid.get().read().thread_fd, b"sched-affinity")? ++ }); ++ let _ = handle.write(unsafe { ++ core::slice::from_raw_parts( ++ core::ptr::from_ref(&kernel_cpuset).cast::(), ++ size_of::(), ++ ) ++ })?; ++ ++ Ok(()) ++} ++ ++#[cfg(target_os = "redox")] ++fn redox_get_thread_affinity(thread: &pthread::Pthread) -> Result { ++ let handle = FdGuard::new(unsafe { ++ syscall::dup(thread.os_tid.get().read().thread_fd, b"sched-affinity")? ++ }); ++ let mut kernel_cpuset = cpu_set_t::default(); ++ let _ = handle.read(unsafe { ++ core::slice::from_raw_parts_mut( ++ core::ptr::from_mut(&mut kernel_cpuset).cast::(), ++ size_of::(), ++ ) ++ })?; ++ ++ if kernel_cpuset.__bits[1..].iter().any(|bits| *bits != 0) { ++ return Err(Errno(EINVAL)); ++ } ++ ++ Ok(kernel_cpuset.__bits[0]) ++} ++ + #[derive(Clone)] + pub(crate) struct RlctAttr { + pub detachstate: c_uchar, +@@ -186,6 +290,43 @@ pub unsafe extern "C" fn pthread_getcpuclockid( + } + } + ++/// GNU extension. See . ++#[unsafe(no_mangle)] ++pub unsafe extern "C" fn pthread_getaffinity_np( ++ thread: pthread_t, ++ cpusetsize: size_t, ++ cpuset: *mut cpu_set_t, ++) -> c_int { ++ let thread: &pthread::Pthread = unsafe { &*thread.cast() }; ++ ++ let result = { ++ #[cfg(target_os = "redox")] ++ { ++ redox_get_thread_affinity(thread) ++ .and_then(|mask| copy_u64_to_cpuset(mask, cpusetsize, cpuset)) ++ } ++ ++ #[cfg(target_os = "linux")] ++ { ++ if cpuset.is_null() { ++ Err(Errno(EINVAL)) ++ } else { ++ e_raw(unsafe { ++ syscall!( ++ SCHED_GETAFFINITY, ++ thread.os_tid.get().read().thread_id, ++ cpusetsize, ++ cpuset.cast::() ++ ) ++ }) ++ .map(|_| ()) ++ } ++ } ++ }; ++ ++ e(result) ++} ++ + /// See . + #[unsafe(no_mangle)] + pub unsafe extern "C" fn pthread_getschedparam( +@@ -235,6 +376,43 @@ pub unsafe extern "C" fn pthread_self() -> pthread_t { + core::ptr::from_ref(unsafe { pthread::current_thread().unwrap_unchecked() }) as *mut _ + } + ++/// GNU extension. See . ++#[unsafe(no_mangle)] ++pub unsafe extern "C" fn pthread_setaffinity_np( ++ thread: pthread_t, ++ cpusetsize: size_t, ++ cpuset: *const cpu_set_t, ++) -> c_int { ++ let thread: &pthread::Pthread = unsafe { &*thread.cast() }; ++ ++ let result = { ++ #[cfg(target_os = "redox")] ++ { ++ cpuset_to_u64(cpusetsize, cpuset) ++ .and_then(|mask| redox_set_thread_affinity(thread, mask)) ++ } ++ ++ #[cfg(target_os = "linux")] ++ { ++ if cpuset.is_null() { ++ Err(Errno(EINVAL)) ++ } else { ++ e_raw(unsafe { ++ syscall!( ++ SCHED_SETAFFINITY, ++ thread.os_tid.get().read().thread_id, ++ cpusetsize, ++ cpuset.cast::() ++ ) ++ }) ++ .map(|_| ()) ++ } ++ } ++ }; ++ ++ e(result) ++} ++ + /// See . + #[unsafe(no_mangle)] + pub unsafe extern "C" fn pthread_setcancelstate(state: c_int, oldstate: *mut c_int) -> c_int { +@@ -307,6 +485,13 @@ pub unsafe extern "C" fn pthread_testcancel() { + unsafe { pthread::testcancel() }; + } + ++/// ++/// ++/// Non-standard GNU extension. Prefer `sched_yield()` instead. ++pub extern "C" fn pthread_yield() { ++ let _ = Sys::sched_yield(); ++} ++ + // Must be the same struct as defined in the pthread_cleanup_push macro. + #[repr(C)] + pub(crate) struct CleanupLinkedListEntry { +@@ -350,3 +535,84 @@ pub(crate) unsafe fn run_destructor_stack() { + (entry.routine)(entry.arg); + } + } ++ ++#[unsafe(no_mangle)] ++pub unsafe extern "C" fn pthread_setname_np(thread: pthread_t, name: *const c_char) -> c_int { ++ if name.is_null() { ++ return EINVAL; ++ } ++ ++ let cstr = unsafe { core::ffi::CStr::from_ptr(name) }; ++ let name_bytes = cstr.to_bytes(); ++ let len = name_bytes.len().min(31); ++ ++ #[cfg(target_os = "redox")] ++ { ++ let thread = unsafe { &*thread.cast::() }; ++ let os_tid = unsafe { thread.os_tid.get().read() }; ++ let path = alloc::format!("proc:{}/name\0", os_tid.thread_fd); ++ let path_cstr = core::ffi::CStr::from_bytes_with_nul(path.as_bytes()).unwrap(); ++ let fd = match Sys::open(path_cstr.into(), crate::header::fcntl::O_WRONLY, 0) { ++ Ok(fd) => fd, ++ Err(Errno(code)) => return code, ++ }; ++ ++ let result = match Sys::write(fd, &name_bytes[..len]) { ++ Ok(written) if written == len => 0, ++ Ok(_) => crate::header::errno::EIO, ++ Err(Errno(code)) => code, ++ }; ++ let _ = Sys::close(fd); ++ result ++ } ++ #[cfg(not(target_os = "redox"))] ++ { ++ let _ = thread; ++ 0 ++ } ++} ++ ++#[unsafe(no_mangle)] ++pub unsafe extern "C" fn pthread_getname_np( ++ thread: pthread_t, ++ name: *mut c_char, ++ len: size_t, ++) -> c_int { ++ if name.is_null() { ++ return EINVAL; ++ } ++ if len == 0 { ++ return ERANGE; ++ } ++ ++ #[cfg(target_os = "redox")] ++ { ++ let thread = unsafe { &*thread.cast::() }; ++ let os_tid = unsafe { thread.os_tid.get().read() }; ++ let path = alloc::format!("proc:{}/name\0", os_tid.thread_fd); ++ let path_cstr = core::ffi::CStr::from_bytes_with_nul(path.as_bytes()).unwrap(); ++ let fd = match Sys::open(path_cstr.into(), crate::header::fcntl::O_RDONLY, 0) { ++ Ok(fd) => fd, ++ Err(Errno(code)) => return code, ++ }; ++ ++ let mut buf = [0u8; 31]; ++ let result = match Sys::read(fd, &mut buf) { ++ Ok(read) if read < len => { ++ unsafe { core::ptr::copy_nonoverlapping(buf.as_ptr(), name.cast(), read) }; ++ unsafe { *name.add(read) = 0 }; ++ 0 ++ } ++ Ok(_) => ERANGE, ++ Err(Errno(code)) => code, ++ }; ++ let _ = Sys::close(fd); ++ result ++ } ++ #[cfg(not(target_os = "redox"))] ++ { ++ let _ = thread; ++ unsafe { *name = 0 }; ++ 0 ++ } ++} +diff --git a/src/header/sched/cbindgen.toml b/src/header/sched/cbindgen.toml +index b361fa4..d6d959d 100644 +--- a/src/header/sched/cbindgen.toml ++++ b/src/header/sched/cbindgen.toml +@@ -5,7 +5,7 @@ + # - "[SS|TSP] The header shall define the time_t type as described in ." + # - "The header shall define the timespec structure as described in ." + # - "Inclusion of the header may make visible all symbols from the header." +-sys_includes = ["sys/types.h"] ++sys_includes = ["sys/types.h", "stdint.h"] + include_guard = "_RELIBC_SCHED_H" + after_includes = """ + #include // for timespec +@@ -20,3 +20,17 @@ prefix_with_name = true + + [export.rename] + "timespec" = "struct timespec" ++ ++[export] ++include = [ ++ "sched_param", ++ "cpu_set_t", ++ "sched_get_priority_max", ++ "sched_get_priority_min", ++ "sched_getparam", ++ "sched_getscheduler", ++ "sched_rr_get_interval", ++ "sched_setparam", ++ "sched_setscheduler", ++ "sched_yield", ++] diff --git a/local/patches/relibc/P3-spawn-cbindgen-fix.patch b/local/patches/relibc/P3-spawn-cbindgen-fix.patch new file mode 100644 index 00000000..de021258 --- /dev/null +++ b/local/patches/relibc/P3-spawn-cbindgen-fix.patch @@ -0,0 +1,69 @@ +diff --git a/src/header/spawn/cbindgen.toml b/src/header/spawn/cbindgen.toml +new file mode 100644 +index 0000000..a9f188f +--- /dev/null ++++ b/src/header/spawn/cbindgen.toml +@@ -0,0 +1,63 @@ ++sys_includes = ["sys/types.h", "signal.h", "sched.h"] ++include_guard = "_SPAWN_H" ++after_includes = """ ++typedef struct { ++ short __flags; ++ pid_t __pgrp; ++ sigset_t __sd; ++ sigset_t __ss; ++ struct sched_param __sp; ++ int __policy; ++ int __pad[16]; ++} posix_spawnattr_t; ++ ++typedef struct { ++ int __allocated; ++ int __used; ++ void *__actions; ++ int __pad[16]; ++} posix_spawn_file_actions_t; ++""" ++trailer = """ ++#define POSIX_SPAWN_RESETIDS 0x01 ++#define POSIX_SPAWN_SETPGROUP 0x02 ++#define POSIX_SPAWN_SETSIGDEF 0x04 ++#define POSIX_SPAWN_SETSIGMASK 0x08 ++#define POSIX_SPAWN_SETSCHEDPARAM 0x10 ++#define POSIX_SPAWN_SETSCHEDULER 0x20 ++ ++int posix_spawn(pid_t *__restrict, const char *__restrict, ++ const posix_spawn_file_actions_t *, ++ const posix_spawnattr_t *__restrict, ++ char *const __restrict[], char *const __restrict[]); ++int posix_spawnp(pid_t *__restrict, const char *__restrict, ++ const posix_spawn_file_actions_t *, ++ const posix_spawnattr_t *__restrict, ++ char *const __restrict[], char *const __restrict[]); ++int posix_spawnattr_init(posix_spawnattr_t *); ++int posix_spawnattr_destroy(posix_spawnattr_t *); ++int posix_spawnattr_setflags(posix_spawnattr_t *, short); ++int posix_spawnattr_getflags(const posix_spawnattr_t *__restrict, short *__restrict); ++int posix_spawnattr_setpgroup(posix_spawnattr_t *, pid_t); ++int posix_spawnattr_getpgroup(const posix_spawnattr_t *__restrict, pid_t *__restrict); ++int posix_spawnattr_setsigdefault(posix_spawnattr_t *__restrict, const sigset_t *__restrict); ++int posix_spawnattr_getsigdefault(posix_spawnattr_t *__restrict, sigset_t *__restrict); ++int posix_spawnattr_setsigmask(posix_spawnattr_t *__restrict, const sigset_t *__restrict); ++int posix_spawnattr_getsigmask(posix_spawnattr_t *__restrict, sigset_t *__restrict); ++int posix_spawn_file_actions_init(posix_spawn_file_actions_t *); ++int posix_spawn_file_actions_destroy(posix_spawn_file_actions_t *); ++int posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *, int, int); ++int posix_spawn_file_actions_addclose(posix_spawn_file_actions_t *, int); ++int posix_spawn_file_actions_addopen(posix_spawn_file_actions_t *__restrict, ++ int, const char *__restrict, int, mode_t); ++""" ++language = "C" ++style = "Type" ++no_includes = true ++cpp_compat = true ++ ++[enum] ++prefix_with_name = true ++ ++[export] ++include = [] diff --git a/local/patches/relibc/P3-timerfd-cbindgen-fix.patch b/local/patches/relibc/P3-timerfd-cbindgen-fix.patch new file mode 100644 index 00000000..db0422d4 --- /dev/null +++ b/local/patches/relibc/P3-timerfd-cbindgen-fix.patch @@ -0,0 +1,35 @@ +Fix sys/timerfd.h cbindgen.toml to generate proper C headers instead of C++. + +The empty cbindgen.toml from P3-timerfd-relative.patch caused cbindgen to +generate C++ output (cstdarg, constexpr, etc.) in the installed +sys/timerfd.h. C compilers including this header would fail with +"cstdarg: No such file or directory". Add language="C", after_includes +for bits/timespec.h, and explicit export list for the timerfd constants. + +diff --git a/src/header/sys_timerfd/cbindgen.toml b/src/header/sys_timerfd/cbindgen.toml +--- a/src/header/sys_timerfd/cbindgen.toml ++++ b/src/header/sys_timerfd/cbindgen.toml +@@ -1,12 +1,23 @@ + sys_includes = ["time.h"] ++after_includes = """ ++#include // for itimerspec ++""" + include_guard = "_SYS_TIMERFD_H" + language = "C" + style = "Tag" + no_includes = true + cpp_compat = true + + [enum] + prefix_with_name = true + ++[export] ++include = [ ++ "TFD_CLOEXEC", ++ "TFD_NONBLOCK", ++ "TFD_TIMER_ABSTIME", ++ "TFD_TIMER_CANCEL_ON_SET", ++] ++ + [export.rename] + "itimerspec" = "struct itimerspec" diff --git a/local/patches/relibc/P5-fatal-handler-diagnostics.patch b/local/patches/relibc/P5-fatal-handler-diagnostics.patch new file mode 100644 index 00000000..b4a88edf --- /dev/null +++ b/local/patches/relibc/P5-fatal-handler-diagnostics.patch @@ -0,0 +1,188 @@ +diff --git a/src/lib.rs b/src/lib.rs +--- a/src/lib.rs ++++ b/src/lib.rs +@@ -57,61 +57,201 @@ pub mod start; + pub mod sync; + +-use crate::platform::{Allocator, NEWALLOCATOR}; ++use crate::platform::{Allocator, NEWALLOCATOR, Pal, Sys}; + + #[global_allocator] + static ALLOCATOR: Allocator = NEWALLOCATOR; ++ ++const MAX_FATAL_BACKTRACE_FRAMES: usize = 16; ++const MAX_FATAL_FRAME_STRIDE: usize = 1024 * 1024; ++ ++#[inline(never)] ++fn write_process_thread_identity(w: &mut platform::FileWriter) { ++ use core::fmt::Write; ++ ++ let pid = Sys::getpid(); ++ let tid = Sys::gettid(); ++ ++ match crate::pthread::current_thread() { ++ Some(thread) => { ++ let _ = w.write_fmt(format_args!( ++ "RELIBC CONTEXT: pid={} tid={} pthread={:#x}\n", ++ pid, ++ tid, ++ thread as *const _ as usize, ++ )); ++ } ++ None => { ++ let _ = w.write_fmt(format_args!( ++ "RELIBC CONTEXT: pid={} tid={} pthread=\n", ++ pid, tid, ++ )); ++ } ++ } ++} ++ ++#[cfg(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64"))] ++#[inline(never)] ++fn current_frame_pointer() -> *const usize { ++ let frame: *const usize; ++ ++ #[cfg(target_arch = "x86_64")] ++ unsafe { ++ core::arch::asm!("mov {}, rbp", out(reg) frame, options(nomem, nostack, preserves_flags)); ++ } ++ ++ #[cfg(target_arch = "x86")] ++ unsafe { ++ core::arch::asm!("mov {}, ebp", out(reg) frame, options(nomem, nostack, preserves_flags)); ++ } ++ ++ #[cfg(target_arch = "aarch64")] ++ unsafe { ++ core::arch::asm!("mov {}, x29", out(reg) frame, options(nomem, nostack, preserves_flags)); ++ } ++ ++ frame ++} ++ ++#[cfg(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64"))] ++fn read_backtrace_frame(frame: *const usize) -> Option<(*const usize, usize)> { ++ let align = core::mem::align_of::(); ++ let frame_addr = frame as usize; ++ ++ if frame.is_null() || frame_addr % align != 0 { ++ return None; ++ } ++ ++ let next_frame = unsafe { frame.read() } as *const usize; ++ let return_address = unsafe { frame.add(1).read() }; ++ ++ if return_address == 0 { ++ return None; ++ } ++ ++ Some((next_frame, return_address)) ++} ++ ++#[cfg(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64"))] ++fn is_sane_next_backtrace_frame(current: *const usize, next: *const usize) -> bool { ++ let align = core::mem::align_of::(); ++ let current_addr = current as usize; ++ let next_addr = next as usize; ++ ++ !next.is_null() ++ && next_addr % align == 0 ++ && next_addr > current_addr ++ && next_addr - current_addr <= MAX_FATAL_FRAME_STRIDE ++} ++ ++#[inline(never)] ++fn write_best_effort_backtrace(w: &mut platform::FileWriter) { ++ use core::fmt::Write; ++ ++ let _ = w.write_str("RELIBC: attempting best-effort backtrace\n"); ++ ++ #[cfg(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64"))] ++ { ++ let mut frame = current_frame_pointer(); ++ let mut wrote_frame = false; ++ ++ for frame_index in 0..MAX_FATAL_BACKTRACE_FRAMES { ++ let Some((next_frame, return_address)) = read_backtrace_frame(frame) else { ++ break; ++ }; ++ ++ wrote_frame = true; ++ let _ = w.write_fmt(format_args!( ++ "RELIBC BACKTRACE[{frame_index:02}]: {:#x}\n", ++ return_address, ++ )); ++ ++ if !is_sane_next_backtrace_frame(frame, next_frame) { ++ break; ++ } ++ ++ frame = next_frame; ++ } ++ ++ if !wrote_frame { ++ let _ = w.write_str("RELIBC: backtrace attempt produced no frames\n"); ++ } ++ } ++ ++ #[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64")))] ++ { ++ let _ = w.write_str("RELIBC: backtrace unavailable on this architecture\n"); ++ } ++} + + #[unsafe(no_mangle)] + pub extern "C" fn relibc_panic(pi: &::core::panic::PanicInfo) -> ! { + use core::fmt::Write; + + let mut w = platform::FileWriter::new(2); +- let _ = w.write_fmt(format_args!("RELIBC PANIC: {}\n", pi)); ++ ++ if let Some(location) = pi.location() { ++ let _ = w.write_fmt(format_args!( ++ "RELIBC PANIC LOCATION: {}:{}:{}\n", ++ location.file(), ++ location.line(), ++ location.column(), ++ )); ++ } else { ++ let _ = w.write_str("RELIBC PANIC LOCATION: \n"); ++ } ++ ++ write_process_thread_identity(&mut w); ++ let _ = w.write_fmt(format_args!("RELIBC PANIC: {}\n", pi)); + + core::intrinsics::abort(); + } +@@ -95,23 +235,27 @@ pub extern "C" fn rust_oom(layout: ::core::alloc::Layout) -> ! { + + let mut w = platform::FileWriter::new(2); + let _ = w.write_fmt(format_args!( +- "RELIBC OOM: {} bytes aligned to {} bytes\n", ++ "RELIBC OOM: {} bytes aligned to {} bytes - process will abort\n", + layout.size(), + layout.align() + )); ++ write_process_thread_identity(&mut w); ++ write_best_effort_backtrace(&mut w); + + core::intrinsics::abort(); + } + + #[cfg(not(test))] + #[allow(non_snake_case)] + #[linkage = "weak"] + #[unsafe(no_mangle)] + pub extern "C" fn _Unwind_Resume() -> ! { + use core::fmt::Write; + + let mut w = platform::FileWriter::new(2); +- let _ = w.write_str("_Unwind_Resume\n"); ++ let _ = w.write_str( ++ "RELIBC: _Unwind_Resume called - exception propagation failed, aborting\n", ++ ); ++ write_process_thread_identity(&mut w); + + core::intrinsics::abort(); + } diff --git a/local/patches/relibc/P5-robust-mutex-enotrec-fix.patch b/local/patches/relibc/P5-robust-mutex-enotrec-fix.patch new file mode 100644 index 00000000..b6cbc202 --- /dev/null +++ b/local/patches/relibc/P5-robust-mutex-enotrec-fix.patch @@ -0,0 +1,87 @@ +Fix ENOTRECOVERABLE returned for non-robust mutexes and register main +thread in OS_TID_TO_PTHREAD. + +The robust mutex liveness check (mutex_owner_id_is_live) was returning +ENOTRECOVERABLE for non-robust mutexes when the owner appeared dead. +Per POSIX, the behaviour of a non-robust mutex whose owner has died is +undefined; returning an error crashes every Rust std::sync::Mutex user. +For lock_inner, fall through to spin/futex-wait instead. For try_lock, +return EBUSY instead. + +Additionally, pthread::init() never registered the main thread in +OS_TID_TO_PTHREAD, so any mutex owned by the main thread would always +appear to have a dead owner, making the liveness check unreliable. + +diff --git a/src/pthread/mod.rs b/src/pthread/mod.rs +index 8243a48..c455a67 100644 +--- a/src/pthread/mod.rs ++++ b/src/pthread/mod.rs +@@ -43,9 +43,13 @@ pub unsafe fn init() { + thread.stack_size = STACK_SIZE; + } + +- unsafe { Tcb::current() } +- .expect_notls("no TCB present for main thread") +- .pthread = thread; ++ let tcb = unsafe { Tcb::current() } ++ .expect_notls("no TCB present for main thread"); ++ tcb.pthread = thread; ++ ++ OS_TID_TO_PTHREAD ++ .lock() ++ .insert(Sys::current_os_tid(), ForceSendSync(tcb as *const Tcb as *mut Tcb)); + } + + //static NEXT_INDEX: AtomicU32 = AtomicU32::new(FIRST_THREAD_IDX + 1); +diff --git a/src/sync/pthread_mutex.rs b/src/sync/pthread_mutex.rs +index af0c429..1b2b3ca 100644 +--- a/src/sync/pthread_mutex.rs ++++ b/src/sync/pthread_mutex.rs +@@ -136,14 +136,17 @@ impl RlctMutex { + Err(thread) => { + let owner = thread & INDEX_MASK; + +- if !crate::pthread::mutex_owner_id_is_live(owner) { +- if !self.robust { +- return Err(Errno(ENOTRECOVERABLE)); +- } +- ++ if !crate::pthread::mutex_owner_id_is_live(owner) && self.robust { + let new_value = (thread & WAITING_BIT) | FUTEX_OWNER_DIED | this_thread; + match self.inner.compare_exchange( + thread, +@@ -152,6 +155,12 @@ impl RlctMutex { + Ok(_) => return self.finish_lock_acquire(true), + Err(_) => continue, + } ++ } else if !crate::pthread::mutex_owner_id_is_live(owner) { ++ // Non-robust mutex with apparently-dead owner: per POSIX the ++ // behaviour is undefined. We conservatively keep spinning / ++ // futex-waiting rather than returning ENOTRECOVERABLE, which ++ // would crash any Rust std::sync::Mutex user. + } + + if spins_left > 0 { +@@ -241,14 +250,17 @@ impl RlctMutex { + + if current & FUTEX_OWNER_DIED != 0 || (owner != 0 && !crate::pthread::mutex_owner_id_is_live(owner)) { +- if !self.robust { +- return Err(Errno(ENOTRECOVERABLE)); +- } +- ++ if self.robust { + let new_value = (current & WAITING_BIT) | FUTEX_OWNER_DIED | this_thread; + match self.inner.compare_exchange( + current, +@@ -257,6 +269,11 @@ impl RlctMutex { + Ok(_) => return self.finish_lock_acquire(true), + Err(_) => continue, + } ++ } else { ++ // Non-robust mutex: owner appears dead but POSIX behaviour is ++ // undefined; report busy rather than ENOTRECOVERABLE. ++ return Err(Errno(EBUSY)); ++ } + } + + return Err(Errno(EBUSY)); diff --git a/local/patches/relibc/P5-signal-handler-panic-hardening.patch b/local/patches/relibc/P5-signal-handler-panic-hardening.patch new file mode 100644 index 00000000..944a0e11 --- /dev/null +++ b/local/patches/relibc/P5-signal-handler-panic-hardening.patch @@ -0,0 +1,112 @@ +diff --git a/redox-rt/src/signal.rs b/redox-rt/src/signal.rs +index 022f873..ab96dea 100644 +--- a/redox-rt/src/signal.rs ++++ b/redox-rt/src/signal.rs +@@ -1,4 +1,10 @@ +-use core::{ffi::c_int, ptr::NonNull, sync::atomic::Ordering}; ++use core::{ ++ ffi::c_int, ++ hint::unreachable_unchecked, ++ panic::AssertUnwindSafe, ++ ptr::NonNull, ++ sync::atomic::Ordering, ++}; + + use syscall::{ + CallFlags, EAGAIN, EINTR, EINVAL, ENOMEM, EPERM, Error, RawAction, Result, SenderInfo, +@@ -103,6 +109,47 @@ pub struct SiginfoAbi { + pub si_value: usize, // sigval + } + ++fn invoke_signal_handler(f: AssertUnwindSafe) -> bool { ++ fn do_call(data: *mut u8) { ++ let callback = unsafe { &mut *data.cast::>>() }; ++ if let Some(callback) = callback.take() { ++ callback.0(); ++ } ++ } ++ ++ fn do_catch(_data: *mut u8, _payload: *mut u8) {} ++ ++ let mut callback = Some(f); ++ unsafe { ++ core::intrinsics::catch_unwind( ++ do_call::, ++ (&mut callback as *mut Option>).cast(), ++ do_catch::, ++ ) != 0 ++ } ++} ++ ++#[inline(always)] ++unsafe fn return_ignored_signal( ++ os: &RtTcb, ++ stack: &SigStack, ++ signals_were_disabled: bool, ++) { ++ unsafe { ++ (*os.arch.get()).last_sig_was_restart = true; ++ (*os.arch.get()).last_sigstack = NonNull::new(stack.link); ++ } ++ ++ if !signals_were_disabled { ++ core::sync::atomic::compiler_fence(Ordering::Release); ++ let control_flags = &os.control.control_flags; ++ control_flags.store( ++ control_flags.load(Ordering::Relaxed) & !SigcontrolFlags::INHIBIT_DELIVERY.bits(), ++ Ordering::Relaxed, ++ ); ++ } ++} ++ + #[inline(always)] + unsafe fn inner(stack: &mut SigStack) { + let os = unsafe { &Tcb::current().unwrap().os_specific }; +@@ -168,7 +215,10 @@ unsafe fn inner(stack: &mut SigStack) { + // and reaching this code. If so, we do already know whether the signal is IGNORED *now*, + // and so we should return early ideally without even temporarily touching the signal mask. + SigactionKind::Ignore => { +- panic!("ctl {:#x?} signal {}", os.control, stack.sig_num) ++ unsafe { ++ return_ignored_signal(os, stack, signals_were_disabled); ++ } ++ return; + } + // this case should be treated equally as the one above + // +@@ -183,7 +233,9 @@ unsafe fn inner(stack: &mut SigStack) { + CallFlags::empty(), + &[ProcCall::Exit as u64, u64::from(sig) << 8], + ); +- panic!() ++ // SAFETY: ProcCall::Exit terminates the current process when it succeeds, so reaching ++ // this point would violate the proc manager exit contract. ++ unsafe { unreachable_unchecked() } + } + SigactionKind::Handled { handler } => handler, + }; +@@ -224,15 +276,21 @@ unsafe fn inner(stack: &mut SigStack) { + si_uid: sender_uid as i32, + si_value: stack.sival, + }; +- unsafe { ++ if invoke_signal_handler(AssertUnwindSafe(|| unsafe { + sigaction( + stack.sig_num as c_int, + core::ptr::addr_of!(info).cast(), + stack as *mut SigStack as *mut (), + ) +- }; ++ })) { ++ let _ = syscall::write(2, b"redox-rt: sa_siginfo handler panicked; continuing\n"); ++ } + } else if let Some(handler) = unsafe { handler.handler } { +- handler(stack.sig_num as c_int); ++ if invoke_signal_handler(AssertUnwindSafe(|| { ++ handler(stack.sig_num as c_int); ++ })) { ++ let _ = syscall::write(2, b"redox-rt: sa_handler panicked; continuing\n"); ++ } + } + + // Disable signals while we modify the sigmask again diff --git a/local/patches/relibc/P5-startup-init-panic-hardening.patch b/local/patches/relibc/P5-startup-init-panic-hardening.patch new file mode 100644 index 00000000..f9df2d50 --- /dev/null +++ b/local/patches/relibc/P5-startup-init-panic-hardening.patch @@ -0,0 +1,101 @@ +diff --git a/src/start.rs b/src/start.rs +--- a/src/start.rs ++++ b/src/start.rs +@@ -1,8 +1,6 @@ + //! Startup code. + + use alloc::{boxed::Box, vec::Vec}; +-use core::{intrinsics, ptr}; +- +-#[cfg(target_os = "redox")] +-use generic_rt::ExpectTlsFree; ++use core::{fmt::Write, intrinsics, panic::AssertUnwindSafe, ptr}; + + use crate::{ + ALLOCATOR, +@@ -143,6 +141,28 @@ fn io_init() { + stdio::stderr = stdio::default_stderr().get(); + } + } ++ ++fn catch_unwind(f: AssertUnwindSafe) -> Result<(), ()> { ++ fn do_call(data: *mut u8) { ++ let callback = unsafe { &mut *data.cast::>>() }; ++ if let Some(callback) = callback.take() { ++ callback.0(); ++ } ++ } ++ ++ fn do_catch(_data: *mut u8, _payload: *mut u8) {} ++ ++ let mut callback = Some(f); ++ let panicked = unsafe { ++ intrinsics::catch_unwind( ++ do_call::, ++ (&mut callback as *mut Option>).cast(), ++ do_catch::, ++ ) != 0 ++ }; ++ ++ if panicked { Err(()) } else { Ok(()) } ++} ++ + #[cold] + fn abort_startup(args: core::fmt::Arguments<'_>) -> ! { + let mut w = platform::FileWriter::new(2); +@@ -164,15 +184,24 @@ pub unsafe extern "C" fn relibc_start_v1( + unsafe { relibc_verify_host() }; + + #[cfg(target_os = "redox")] +- let thr_fd = redox_rt::proc::FdGuard::new( +- unsafe { +- crate::platform::get_auxv_raw(sp.auxv().cast(), redox_rt::auxv_defs::AT_REDOX_THR_FD) +- } +- .expect_notls("no thread fd present"), +- ) +- .to_upper() +- .expect_notls("failed to move thread fd to upper table"); ++ let thr_fd = { ++ let thr_fd = match unsafe { ++ crate::platform::get_auxv_raw(sp.auxv().cast(), redox_rt::auxv_defs::AT_REDOX_THR_FD) ++ } { ++ Some(thr_fd) => thr_fd, ++ None => abort_startup(format_args!( ++ "relibc_start_v1: missing AT_REDOX_THR_FD auxv entry; no thread fd present\n" ++ )), ++ }; ++ ++ match redox_rt::proc::FdGuard::new(thr_fd).to_upper() { ++ Ok(thr_fd) => thr_fd, ++ Err(err) => abort_startup(format_args!( ++ "relibc_start_v1: failed to move thread fd to upper table: {err:?}\n" ++ )), ++ } ++ }; + + // Initialize TLS, if necessary + unsafe { +@@ -237,7 +266,10 @@ pub unsafe extern "C" fn relibc_start_v1( + let mut f = unsafe { &__preinit_array_start } as *const _; + #[allow(clippy::op_ref)] + while f < &raw const __preinit_array_end { +- (unsafe { *f })(); ++ let func = unsafe { *f }; ++ if catch_unwind(AssertUnwindSafe(|| unsafe { (*f)() })).is_err() { ++ log_initializer_panic(".preinit_array", func); ++ } + f = unsafe { f.offset(1) }; + } + } +@@ -247,7 +279,10 @@ pub unsafe extern "C" fn relibc_start_v1( + let mut f = unsafe { &__init_array_start } as *const _; + #[allow(clippy::op_ref)] + while f < &raw const __init_array_end { +- (unsafe { *f })(); ++ let func = unsafe { *f }; ++ if catch_unwind(AssertUnwindSafe(|| unsafe { (*f)() })).is_err() { ++ log_initializer_panic(".init_array", func); ++ } + f = unsafe { f.offset(1) }; + } + } diff --git a/local/patches/relibc/redox.patch b/local/patches/relibc/redox.patch index 1a72d29f..14eac84e 100644 --- a/local/patches/relibc/redox.patch +++ b/local/patches/relibc/redox.patch @@ -21,3 +21,107 @@ diff --git a/src/header/fcntl/mod.rs b/src/header/fcntl/mod.rs + } + return new_fd; + } + +diff --git a/src/pthread/mod.rs b/src/pthread/mod.rs +--- a/src/pthread/mod.rs ++++ b/src/pthread/mod.rs +@@ -2,6 +2,7 @@ + + use core::{ + cell::UnsafeCell, ++ panic::AssertUnwindSafe, + ptr, + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, + }; +@@ -208,13 +209,41 @@ pub(crate) unsafe fn create( + } + + /// A shim to wrap thread entry points in logic to set up TLS, for example ++fn catch_unwind(f: AssertUnwindSafe) -> Result<(), ()> { ++ fn do_call(data: *mut u8) { ++ let callback = unsafe { &mut *data.cast::>>() }; ++ if let Some(callback) = callback.take() { ++ callback.0(); ++ } ++ } ++ ++ fn do_catch(_data: *mut u8, _payload: *mut u8) {} ++ ++ let mut callback = Some(f); ++ let panicked = unsafe { ++ core::intrinsics::catch_unwind( ++ do_call::, ++ (&mut callback as *mut Option>).cast(), ++ do_catch::, ++ ) != 0 ++ }; ++ ++ if panicked { Err(()) } else { Ok(()) } ++} ++ + unsafe extern "C" fn new_thread_shim( + tcb: *mut Tcb, + synchronization_mutex: *const Mutex, + ) -> ! { +- let tcb = unsafe { tcb.as_mut() }.expect_notls("non-null TLS is required"); ++ let tcb = match unsafe { tcb.as_mut() } { ++ Some(tcb) => tcb, ++ None => { ++ log::error!("pthread: child thread started without a TCB"); ++ unsafe { exit_current_thread(Retval(ptr::null_mut())) } ++ } ++ }; + + #[cfg(not(target_os = "redox"))] + { +@@ -227,12 +256,23 @@ unsafe extern "C" fn new_thread_shim( + unsafe { + tcb.activate(None); + } +- redox_rt::signal::setup_sighandler(&tcb.os_specific, false); ++ match catch_unwind(AssertUnwindSafe(|| { ++ redox_rt::signal::setup_sighandler(&tcb.os_specific, false) ++ })) { ++ Ok(()) => {} ++ Err(()) => { ++ log::error!("pthread: failed to set up child thread signal handler"); ++ unsafe { exit_current_thread(Retval(ptr::null_mut())) } ++ } ++ } + } + + let procmask = unsafe { (&*synchronization_mutex).as_ptr().read() }; + +- unsafe { tcb.copy_masters() }.unwrap(); ++ if let Err(err) = unsafe { tcb.copy_masters() } { ++ log::error!("pthread: failed to copy TLS masters for child thread: {err:?}"); ++ unsafe { exit_current_thread(Retval(ptr::null_mut())) } ++ } + + unsafe { (*tcb).pthread.os_tid.get().write(Sys::current_os_tid()) }; + +@@ -240,11 +280,21 @@ unsafe extern "C" fn new_thread_shim( + + #[cfg(target_os = "redox")] + { +- redox_rt::signal::set_sigmask(Some(procmask), None) +- .expect("failed to set procmask in child thread"); ++ if let Err(err) = redox_rt::signal::set_sigmask(Some(procmask), None) { ++ log::error!("pthread: failed to set child thread signal mask: {err:?}"); ++ } + } + +- let retval = unsafe { entry_point(arg) }; ++ let mut retval = ptr::null_mut(); ++ match catch_unwind(AssertUnwindSafe(|| { ++ retval = unsafe { entry_point(arg) }; ++ })) { ++ Ok(()) => {} ++ Err(()) => { ++ log::error!("pthread: child thread entry point panicked"); ++ unsafe { exit_current_thread(Retval(ptr::null_mut())) } ++ } ++ } + + unsafe { exit_current_thread(Retval(retval)) } + } diff --git a/recipes/system/driver-params b/recipes/system/driver-params new file mode 120000 index 00000000..62934808 --- /dev/null +++ b/recipes/system/driver-params @@ -0,0 +1 @@ +../../local/recipes/system/driver-params \ No newline at end of file diff --git a/recipes/system/redbear-acmd b/recipes/system/redbear-acmd new file mode 120000 index 00000000..d5c62fb4 --- /dev/null +++ b/recipes/system/redbear-acmd @@ -0,0 +1 @@ +../../local/recipes/system/redbear-acmd \ No newline at end of file diff --git a/recipes/system/redbear-ecmd b/recipes/system/redbear-ecmd new file mode 120000 index 00000000..4fbe62a3 --- /dev/null +++ b/recipes/system/redbear-ecmd @@ -0,0 +1 @@ +../../local/recipes/system/redbear-ecmd \ No newline at end of file diff --git a/recipes/system/redbear-usbaudiod b/recipes/system/redbear-usbaudiod new file mode 120000 index 00000000..f99fdc8a --- /dev/null +++ b/recipes/system/redbear-usbaudiod @@ -0,0 +1 @@ +../../local/recipes/system/redbear-usbaudiod \ No newline at end of file