From 11993af01f8540196c06f0d5f0e97d76ae7c47af Mon Sep 17 00:00:00 2001 From: Vasilito Date: Mon, 4 May 2026 11:49:15 +0100 Subject: [PATCH] fix: rebase base patches, commit recipe drift, add relibc rlimit/sysconf Base: fix P6-driver-new-modules.patch (ed format -> unified diff) for new driver modules (ncq, itr, phy). P6-driver-main-fixes.patch now applies with offset on current upstream source. Relibc: remove stale P5-named-semaphores (upstream has stubs), add P10-stack-size-8mb and P11-getrlimit-getrusage (per-process rlimit table, sysconf integration, getdtablesize fix, null-pointer safety). Kernel: consolidate 29 individual patches into single redbear-consolidated.patch. Userutils: P5-redbear-branding replaces P4-login-rate-limit. Recipe.toml changes now committed so they survive source resets. --- .../patches/base/P2-ihdad-graceful-init.patch | 21 + local/patches/base/P6-driver-main-fixes.patch | 178 ++ .../patches/base/P6-driver-new-modules.patch | 193 ++ .../kernel/P10-debug-scheme-serial-fix.patch | 34 + .../patches/kernel/redbear-consolidated.patch | 2592 +++++++++++++++++ local/patches/relibc/P10-stack-size-8mb.patch | 11 + .../relibc/P11-getrlimit-getrusage.patch | 349 +++ .../userutils/P5-redbear-branding.patch | 67 + .../core/base/P2-ihdad-graceful-init.patch | 1 + recipes/core/base/P6-driver-main-fixes.patch | 1 + recipes/core/base/P6-driver-new-modules.patch | 1 + recipes/core/base/recipe.toml | 4 + recipes/core/kernel/recipe.toml | 17 +- .../core/kernel/redbear-consolidated.patch | 1 + recipes/core/relibc/P10-stack-size-8mb.patch | 1 + .../core/relibc/P11-getrlimit-getrusage.patch | 1 + recipes/core/relibc/recipe.toml | 2 +- recipes/core/userutils/recipe.toml | 2 +- 18 files changed, 3473 insertions(+), 3 deletions(-) create mode 100644 local/patches/base/P2-ihdad-graceful-init.patch create mode 100644 local/patches/base/P6-driver-main-fixes.patch create mode 100644 local/patches/base/P6-driver-new-modules.patch create mode 100644 local/patches/kernel/P10-debug-scheme-serial-fix.patch create mode 100644 local/patches/kernel/redbear-consolidated.patch create mode 100644 local/patches/relibc/P10-stack-size-8mb.patch create mode 100644 local/patches/relibc/P11-getrlimit-getrusage.patch create mode 100644 local/patches/userutils/P5-redbear-branding.patch create mode 120000 recipes/core/base/P2-ihdad-graceful-init.patch create mode 120000 recipes/core/base/P6-driver-main-fixes.patch create mode 120000 recipes/core/base/P6-driver-new-modules.patch create mode 120000 recipes/core/kernel/redbear-consolidated.patch create mode 120000 recipes/core/relibc/P10-stack-size-8mb.patch create mode 120000 recipes/core/relibc/P11-getrlimit-getrusage.patch diff --git a/local/patches/base/P2-ihdad-graceful-init.patch b/local/patches/base/P2-ihdad-graceful-init.patch new file mode 100644 index 00000000..e9c3197e --- /dev/null +++ b/local/patches/base/P2-ihdad-graceful-init.patch @@ -0,0 +1,21 @@ +diff --git a/drivers/audio/ihdad/src/main.rs b/drivers/audio/ihdad/src/main.rs +index 31a2add7..a75a0a35 100755 +--- a/drivers/audio/ihdad/src/main.rs ++++ b/drivers/audio/ihdad/src/main.rs +@@ -57,7 +57,15 @@ fn daemon(daemon: daemon::Daemon, mut pcid_handle: PciFunctionHandle) -> ! { + EventQueue::::new().expect("ihdad: Could not create event queue."); + let socket = Socket::nonblock().expect("ihdad: failed to create socket"); + let mut device = unsafe { +- hda::IntelHDA::new(address, vend_prod).expect("ihdad: failed to allocate device") ++ match hda::IntelHDA::new(address, vend_prod) { ++ Ok(dev) => dev, ++ Err(e) => { ++ log::error!("ihdad: failed to initialize HDA device (err {}), exiting gracefully", e); ++ log::info!("ihdad: this is expected in virtual environments without functional HDA hardware"); ++ daemon.ready(); ++ return loop {}; ++ } ++ } + }; + let mut readiness_based = ReadinessBased::new(&socket, 16); + diff --git a/local/patches/base/P6-driver-main-fixes.patch b/local/patches/base/P6-driver-main-fixes.patch new file mode 100644 index 00000000..68f83e1c --- /dev/null +++ b/local/patches/base/P6-driver-main-fixes.patch @@ -0,0 +1,178 @@ +diff --git a/drivers/audio/ac97d/src/main.rs b/drivers/audio/ac97d/src/main.rs +index ffa8a94b..29e189be 100644 +--- a/drivers/audio/ac97d/src/main.rs ++++ b/drivers/audio/ac97d/src/main.rs +@@ -63,14 +63,14 @@ fn daemon(daemon: daemon::Daemon, pcid_handle: PciFunctionHandle) -> ! { + Source::Irq, + event::EventFlags::READ, + ) +- .unwrap(); ++ .expect("ac97d: subscribe IRQ failed"); + event_queue + .subscribe( + socket.inner().raw(), + Source::Scheme, + event::EventFlags::READ, + ) +- .unwrap(); ++ .expect("ac97d: subscribe scheme failed"); + + register_sync_scheme(&socket, "audiohw", &mut device) + .expect("ac97d: failed to register audiohw scheme to namespace"); +@@ -86,12 +86,12 @@ fn daemon(daemon: daemon::Daemon, pcid_handle: PciFunctionHandle) -> ! { + match event { + Source::Irq => { + let mut irq = [0; 8]; +- irq_file.read(&mut irq).unwrap(); ++ irq_file.read(&mut irq).expect("ac97d: IRQ read failed"); + + if !device.irq() { + continue; + } +- irq_file.write(&mut irq).unwrap(); ++ irq_file.write(&mut irq).expect("ac97d: IRQ ack failed"); + + readiness_based + .poll_all_requests(&mut device) +diff --git a/drivers/audio/ihdad/src/main.rs b/drivers/audio/ihdad/src/main.rs +index 31a2add7..8291a550 100755 +--- a/drivers/audio/ihdad/src/main.rs ++++ b/drivers/audio/ihdad/src/main.rs +@@ -71,14 +71,14 @@ fn daemon(daemon: daemon::Daemon, mut pcid_handle: PciFunctionHandle) -> ! { + Source::Scheme, + event::EventFlags::READ, + ) +- .unwrap(); ++ .expect("ihdad: subscribe scheme failed"); + event_queue + .subscribe( + irq_file.irq_handle().as_raw_fd() as usize, + Source::Irq, + event::EventFlags::READ, + ) +- .unwrap(); ++ .expect("ihdad: subscribe IRQ failed"); + + libredox::call::setrens(0, 0).expect("ihdad: failed to enter null namespace"); + +@@ -91,12 +91,12 @@ fn daemon(daemon: daemon::Daemon, mut pcid_handle: PciFunctionHandle) -> ! { + match event { + Source::Irq => { + let mut irq = [0; 8]; +- irq_file.irq_handle().read(&mut irq).unwrap(); ++ irq_file.irq_handle().read(&mut irq).expect("ihdad: IRQ read failed"); + + if !device.irq() { + continue; + } +- irq_file.irq_handle().write(&mut irq).unwrap(); ++ irq_file.irq_handle().write(&mut irq).expect("ihdad: IRQ ack failed"); + + readiness_based + .poll_all_requests(&mut device) +diff --git a/drivers/net/e1000d/src/main.rs b/drivers/net/e1000d/src/main.rs +index 373ea9b3..c66cccd1 100644 +--- a/drivers/net/e1000d/src/main.rs ++++ b/drivers/net/e1000d/src/main.rs +@@ -70,15 +70,15 @@ fn daemon(daemon: daemon::Daemon, mut pcid_handle: PciFunctionHandle) -> ! { + + libredox::call::setrens(0, 0).expect("e1000d: failed to enter null namespace"); + +- scheme.tick().unwrap(); ++ scheme.tick().expect("e1000d: tick failed"); + + for event in event_queue.map(|e| e.expect("e1000d: failed to get event")) { + match event.user_data { + Source::Irq => { + let mut irq = [0; 8]; +- irq_file.read(&mut irq).unwrap(); ++ irq_file.read(&mut irq).expect("e1000d: IRQ read failed"); + if unsafe { scheme.adapter().irq() } { +- irq_file.write(&mut irq).unwrap(); ++ irq_file.write(&mut irq).expect("e1000d: IRQ ack failed"); + + scheme.tick().expect("e1000d: failed to handle IRQ") + } +diff --git a/drivers/net/rtl8168d/src/main.rs b/drivers/net/rtl8168d/src/main.rs +index 1d9963a3..5dc244af 100644 +--- a/drivers/net/rtl8168d/src/main.rs ++++ b/drivers/net/rtl8168d/src/main.rs +@@ -81,33 +81,33 @@ fn daemon(daemon: daemon::Daemon, mut pcid_handle: PciFunctionHandle) -> ! { + Source::Irq, + event::EventFlags::READ, + ) +- .unwrap(); ++ .expect("rtl8168d: subscribe IRQ failed"); + event_queue + .subscribe( + scheme.event_handle().raw(), + Source::Scheme, + event::EventFlags::READ, + ) +- .unwrap(); ++ .expect("rtl8168d: subscribe scheme failed"); + + libredox::call::setrens(0, 0).expect("rtl8168d: failed to enter null namespace"); + +- scheme.tick().unwrap(); ++ scheme.tick().expect("rtl8168d: tick failed"); + + for event in event_queue.map(|e| e.expect("rtl8168d: failed to get next event")) { + match event.user_data { + Source::Irq => { + let mut irq = [0; 8]; +- irq_file.irq_handle().read(&mut irq).unwrap(); ++ irq_file.irq_handle().read(&mut irq).expect("rtl8168d: IRQ read failed"); + //TODO: This may be causing spurious interrupts + if unsafe { scheme.adapter_mut().irq() } { +- irq_file.irq_handle().write(&mut irq).unwrap(); ++ irq_file.irq_handle().write(&mut irq).expect("rtl8168d: IRQ ack failed"); + +- scheme.tick().unwrap(); ++ scheme.tick().expect("rtl8168d: tick failed"); + } + } + Source::Scheme => { +- scheme.tick().unwrap(); ++ scheme.tick().expect("rtl8168d: tick failed"); + } + } + } +diff --git a/drivers/storage/ahcid/src/main.rs b/drivers/storage/ahcid/src/main.rs +index 1f130a29..9a0e3e0d 100644 +--- a/drivers/storage/ahcid/src/main.rs ++++ b/drivers/storage/ahcid/src/main.rs +@@ -66,17 +66,17 @@ fn daemon(daemon: daemon::Daemon, mut pcid_handle: PciFunctionHandle) -> ! { + .expect("ahcid: failed to event scheme socket"); + event_queue + .subscribe(irq_fd, 1, EventFlags::READ) +- .expect("ahcid: failed to event irq scheme"); ++ .expect("ahcid: IRQ failed"); + + for event in event_queue { +- let event = event.unwrap(); ++ let event = event.expect("ahcid: event failed"); + if event.fd == scheme.event_handle().raw() { +- FuturesExecutor.block_on(scheme.tick()).unwrap(); ++ FuturesExecutor.block_on(scheme.tick()).expect("ahcid: tick failed"); + } else if event.fd == irq_fd { + let mut irq = [0; 8]; + if irq_file + .read(&mut irq) +- .expect("ahcid: failed to read irq file") ++ .expect("ahcid: IRQ failed") + >= irq.len() + { + let is = hba_mem.is.read(); +@@ -94,9 +94,9 @@ fn daemon(daemon: daemon::Daemon, mut pcid_handle: PciFunctionHandle) -> ! { + + irq_file + .write(&irq) +- .expect("ahcid: failed to write irq file"); ++ .expect("ahcid: IRQ failed"); + +- FuturesExecutor.block_on(scheme.tick()).unwrap(); ++ FuturesExecutor.block_on(scheme.tick()).expect("ahcid: tick failed"); + } + } + } else { diff --git a/local/patches/base/P6-driver-new-modules.patch b/local/patches/base/P6-driver-new-modules.patch new file mode 100644 index 00000000..ab823dfa --- /dev/null +++ b/local/patches/base/P6-driver-new-modules.patch @@ -0,0 +1,193 @@ +diff --git a/drivers/net/e1000d/src/itr.rs b/drivers/net/e1000d/src/itr.rs +new file mode 100644 +index 00000000..a0d79a5f +--- /dev/null ++++ b/drivers/net/e1000d/src/itr.rs +@@ -0,0 +1,61 @@ ++use crate::device::Intel8254x; ++ ++pub const ITR_IMMEDIATE: u32 = 0; ++pub const ITR_LOW_LATENCY: u32 = 64; ++pub const ITR_BULK: u32 = 256; ++pub const ITR_DEFAULT: u32 = 800; ++ ++#[derive(Clone, Copy, PartialEq)] ++pub enum ItrState { LowLatency, Moderate, Bulk } ++ ++pub struct ItrTracker { ++ state: ItrState, ++ current_itr: u32, ++ packets_since_update: u32, ++} ++ ++impl ItrTracker { ++ pub const fn new() -> Self { ++ Self { state: ItrState::LowLatency, current_itr: ITR_LOW_LATENCY, packets_since_update: 0 } ++ } ++ pub fn record_packet(&mut self, bytes: usize) { ++ self.packets_since_update += 1; ++ let _ = bytes; ++ } ++ pub fn update(&mut self) -> u32 { ++ let new_state = if self.packets_since_update < 8 { ItrState::LowLatency } ++ else if self.packets_since_update < 64 { ItrState::Moderate } ++ else { ItrState::Bulk }; ++ if new_state != self.state { ++ self.state = new_state; ++ self.current_itr = match self.state { ++ ItrState::LowLatency => ITR_LOW_LATENCY, ++ ItrState::Moderate => ITR_DEFAULT, ++ ItrState::Bulk => ITR_BULK, ++ }; ++ } ++ self.packets_since_update = 0; ++ self.current_itr ++ } ++ pub fn current_itr(&self) -> u32 { self.current_itr } ++} ++ ++const E1000_ITR: u32 = 0x00C4; ++ ++pub fn set_itr(device: &Intel8254x, itr_value: u32) { ++ unsafe { device.write_reg(E1000_ITR, itr_value); } ++} ++ ++pub fn configure_default_itr(device: &Intel8254x) { ++ set_itr(device, ITR_DEFAULT); ++} ++ ++pub fn configure_checksum_offload(device: &Intel8254x) { ++ let rctl = unsafe { device.read_reg(0x0100) }; ++ unsafe { device.write_reg(0x0100, rctl | (1 << 4)) }; ++} ++ ++pub fn enable_tso(device: &Intel8254x) { ++ let tctl = unsafe { device.read_reg(0x0400) }; ++ unsafe { device.write_reg(0x0400, tctl | (1 << 11)) }; ++} +diff --git a/drivers/net/rtl8168d/src/phy.rs b/drivers/net/rtl8168d/src/phy.rs +new file mode 100644 +index 00000000..4f9def80 +--- /dev/null ++++ b/drivers/net/rtl8168d/src/phy.rs +@@ -0,0 +1,42 @@ ++#[derive(Clone, Copy, PartialEq, Debug)] ++pub enum ChipVersion { Rtl8168b, Rtl8168c, Rtl8168cp, Rtl8168d, Rtl8168dp, Rtl8168e, Rtl8168evl, Rtl8168f, Rtl8168g, Rtl8168h, Rtl8168ep, Unknown } ++ ++pub fn identify_chip(rev: u8, mac0: u32, _m1: u32, _m2: u32, _m3: u32, _m4: u32) -> ChipVersion { ++ match ((mac0 >> 20) & 0x7, rev) { ++ (0, _) => ChipVersion::Rtl8168b, (1, 0x00..=0x01) => ChipVersion::Rtl8168c, (1, 0x02) => ChipVersion::Rtl8168cp, ++ (2, _) => ChipVersion::Rtl8168d, (3, r) if r <= 0x02 => ChipVersion::Rtl8168e, (3, _) => ChipVersion::Rtl8168evl, ++ (4, _) => ChipVersion::Rtl8168f, (5, _) => ChipVersion::Rtl8168g, (6, _) => ChipVersion::Rtl8168h, ++ (7, _) => ChipVersion::Rtl8168ep, _ => ChipVersion::Unknown, ++ } ++} ++ ++pub mod phy_regs { ++ pub const BMCR: u32 = 0x00; pub const BMSR: u32 = 0x01; pub const PHYID1: u32 = 0x02; pub const PHYID2: u32 = 0x03; ++ pub const ANAR: u32 = 0x04; pub const ANLPAR: u32 = 0x05; ++ pub const BMCR_RESET: u16 = 1 << 15; pub const BMCR_LOOPBACK: u16 = 1 << 14; ++ pub const BMCR_SPEED_1000: u16 = 1 << 6; pub const BMCR_AUTONEG_ENABLE: u16 = 1 << 12; ++ pub const BMCR_AUTONEG_RESTART: u16 = 1 << 9; pub const BMCR_DUPLEX: u16 = 1 << 8; ++ pub const BMSR_AUTONEG_COMPLETE: u16 = 1 << 5; pub const BMSR_LINK_STATUS: u16 = 1 << 2; ++} ++ ++pub fn phy_link_up(read: &dyn Fn(u32) -> u16) -> bool { read(phy_regs::BMSR) & phy_regs::BMSR_LINK_STATUS != 0 } ++ ++pub fn phy_reset(write: &dyn Fn(u32, u16), read: &dyn Fn(u32) -> u16) -> bool { ++ write(phy_regs::BMCR, phy_regs::BMCR_RESET); ++ for _ in 0..500 { if read(phy_regs::BMCR) & phy_regs::BMCR_RESET == 0 { return true; } } ++ false ++} ++ ++pub fn phy_init_for_chip(chip: ChipVersion, write: &dyn Fn(u32, u16), _read: &dyn Fn(u32) -> u16) { ++ match chip { ++ ChipVersion::Rtl8168g | ChipVersion::Rtl8168h | ChipVersion::Rtl8168ep => { ++ write(phy_regs::BMCR, phy_regs::BMCR_AUTONEG_ENABLE | phy_regs::BMCR_AUTONEG_RESTART | phy_regs::BMCR_SPEED_1000 | phy_regs::BMCR_DUPLEX); ++ } ++ _ => { ++ write(phy_regs::BMCR, phy_regs::BMCR_AUTONEG_ENABLE | phy_regs::BMCR_AUTONEG_RESTART); ++ } ++ } ++} ++ ++pub fn set_jumbo_mtu(_write_phy: &dyn Fn(u32, u16), _mtu: u16) { ++} +diff --git a/drivers/storage/ahcid/src/ahci/ncq.rs b/drivers/storage/ahcid/src/ahci/ncq.rs +new file mode 100644 +index 00000000..e08818f0 +--- /dev/null ++++ b/drivers/storage/ahcid/src/ahci/ncq.rs +@@ -0,0 +1,72 @@ ++use core::sync::atomic::{AtomicU32, Ordering}; ++ ++pub const NCQ_MAX_DEPTH: usize = 32; ++ ++pub struct NcqState { ++ pub sactive: AtomicU32, ++ pub pending: AtomicU32, ++} ++ ++impl NcqState { ++ pub const fn new() -> Self { ++ Self { sactive: AtomicU32::new(0), pending: AtomicU32::new(0) } ++ } ++ pub fn allocate_tag(&self) -> Option { ++ let active = self.pending.load(Ordering::Acquire); ++ let free = !active; ++ if free == 0 { return None; } ++ let tag = free.trailing_zeros(); ++ let mask = 1u32 << tag; ++ self.pending.fetch_or(mask, Ordering::AcqRel); ++ self.sactive.fetch_or(mask, Ordering::AcqRel); ++ Some(tag) ++ } ++ pub fn complete_tag(&self, tag: u32) { ++ let mask = 1u32 << tag; ++ self.sactive.fetch_and(!mask, Ordering::AcqRel); ++ self.pending.fetch_and(!mask, Ordering::AcqRel); ++ } ++ pub fn has_pending(&self) -> bool { self.pending.load(Ordering::Acquire) != 0 } ++} ++ ++pub fn build_ncq_read_fis(tag: u32, lba: u64, count: u16) -> [u32; 5] { ++ let mut f = [0u32; 5]; ++ f[0] = 0x0000_8027; ++ f[1] = 0x0060 | ((count as u32 & 0xFF) << 24); ++ f[2] = (lba as u32 & 0xFF) | (((lba >> 8) as u32 & 0xFF) << 8); ++ let mid = ((lba >> 16) as u32 & 0xFF) | ((tag & 0x1F) << 3); ++ f[3] = mid | (((lba >> 24) as u32 & 0xFF) << 8) | (((lba >> 32) as u32 & 0xFF) << 16) | (((lba >> 40) as u32 & 0xFF) << 24); ++ f[4] = (((count >> 8) as u32 & 0xFF) << 16) | (((count >> 8) as u32 & 0xFF) << 24); ++ f ++} ++ ++pub fn build_ncq_write_fis(tag: u32, lba: u64, count: u16) -> [u32; 5] { ++ let mut f = build_ncq_read_fis(tag, lba, count); ++ f[1] = (f[1] & !0xFF00) | 0x6100; ++ f ++} ++ ++pub fn process_ncq_completions(old_sa: u32, new_sa: u32, ncq: &NcqState, completed: &mut [u32; NCQ_MAX_DEPTH]) -> usize { ++ let mask = old_sa & !new_sa; ++ if mask == 0 { return 0; } ++ let mut count = 0; ++ let mut m = mask; ++ while m != 0 { let t = m.trailing_zeros(); ncq.complete_tag(t); completed[count] = t; count += 1; m &= m - 1; } ++ count ++} ++ ++pub fn drive_supports_ncq(id: &[u16; 256]) -> bool { id.get(76).map_or(false, |w| w & (1 << 8) != 0) } ++pub fn ncq_queue_depth(id: &[u16; 256]) -> u32 { ++ id.get(75).map_or(1, |w| { let d = (w & 0x1F) as u32; if d > 0 { (d + 1).min(NCQ_MAX_DEPTH as u32) } else { 1 } }) ++} ++ ++pub fn enable_ncq(hba_mem: &crate::ahci::hba::HbaMem, port_idx: usize) { ++ let port = &hba_mem.ports[port_idx]; ++ let cmd = port.cmd.read(); ++ port.cmd.write(cmd | 1 << 1); ++} ++ ++pub fn issue_ncq_command(port: &crate::ahci::hba::HbaPort, tag: u32) { ++ let ci = port.ci.read(); ++ port.ci.write(ci | (1u32 << tag)); ++} diff --git a/local/patches/kernel/P10-debug-scheme-serial-fix.patch b/local/patches/kernel/P10-debug-scheme-serial-fix.patch new file mode 100644 index 00000000..9d79c436 --- /dev/null +++ b/local/patches/kernel/P10-debug-scheme-serial-fix.patch @@ -0,0 +1,34 @@ +--- a/src/scheme/debug.rs 2026-04-28 07:21:41.000000000 +0100 ++++ b/src/scheme/debug.rs 2026-05-04 08:10:23.688174541 +0100 +@@ -22,9 +22,10 @@ + + static HANDLES: RwLock> = RwLock::new(HandleMap::new()); + +-/// Add to the input queue ++/// Add to the input queue, translating CR to NL (ICRNL) for serial console compatibility. + pub fn debug_input(data: u8, token: &mut CleanLockToken) { +- INPUT.send(data, token); ++ let translated = if data == b'\r' { b'\n' } else { data }; ++ INPUT.send(translated, token); + } + + // Notify readers of input updates +@@ -106,12 +107,16 @@ + fn fevent( + &self, + id: usize, +- _flags: EventFlags, ++ flags: EventFlags, + token: &mut CleanLockToken, + ) -> Result { + let _handle = *HANDLES.read(token.token()).get(id)?; + +- Ok(EventFlags::empty()) ++ let mut ready = EventFlags::empty(); ++ if flags.contains(EventFlags::EVENT_READ) { ++ ready |= EventFlags::EVENT_READ; ++ } ++ Ok(ready) + } + + fn fsync(&self, id: usize, token: &mut CleanLockToken) -> Result<()> { diff --git a/local/patches/kernel/redbear-consolidated.patch b/local/patches/kernel/redbear-consolidated.patch new file mode 100644 index 00000000..ee0531cf --- /dev/null +++ b/local/patches/kernel/redbear-consolidated.patch @@ -0,0 +1,2592 @@ +diff --git a/Cargo.toml b/Cargo.toml +index 6d4f059a..e05f723c 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -12,6 +12,7 @@ cc = "1.0" + toml = "0.8" + + [dependencies] ++acpi_ext = { package = "acpi", git = "https://gitlab.redox-os.org/redox-os/acpi.git", branch = "redox-6.x" } + arrayvec = { version = "0.7.4", default-features = false } + bitfield = "0.13.2" + bitflags = "2" +diff --git a/Makefile b/Makefile +index 68a8c50a..ce59b910 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,3 +1,4 @@ ++# Red Bear OS kernel patches applied via individual patch files + .PHONY: all check + + SOURCE:=$(dir $(realpath $(lastword $(MAKEFILE_LIST)))) +diff --git a/build.rs b/build.rs +index 96c3ea5c..751746cc 100644 +--- a/build.rs ++++ b/build.rs +@@ -77,6 +77,7 @@ fn main() { + } + "x86_64" => { + println!("cargo::rerun-if-changed=src/asm/x86_64/trampoline.asm"); ++ println!("cargo::rerun-if-changed=src/asm/x86_64/s3_wakeup.asm"); + + let status = Command::new("nasm") + .arg("-f") +@@ -89,6 +90,18 @@ fn main() { + if !status.success() { + panic!("nasm failed with exit status {}", status); + } ++ ++ let status = Command::new("nasm") ++ .arg("-f") ++ .arg("bin") ++ .arg("-o") ++ .arg(format!("{}/s3_wakeup", out_dir)) ++ .arg("src/asm/x86_64/s3_wakeup.asm") ++ .status() ++ .expect("failed to run nasm"); ++ if !status.success() { ++ panic!("nasm failed with exit status {}", status); ++ } + } + "riscv64" => { + println!("cargo::rustc-cfg=dtb"); +diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs +index 4dc23883..f472c088 100644 +--- a/src/acpi/madt/arch/x86.rs ++++ b/src/acpi/madt/arch/x86.rs +@@ -18,6 +18,7 @@ use crate::{ + + use super::{Madt, MadtEntry}; + ++const AP_SPIN_LIMIT: u32 = 1_000_000; + const TRAMPOLINE: usize = 0x8000; + static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline")); + +@@ -42,13 +43,17 @@ pub(super) fn init(madt: Madt) { + //TODO: do not have writable and executable! + let mut mapper = KernelMapper::lock_rw(); + +- let result = mapper +- .map_phys( +- trampoline_page.start_address(), +- trampoline_frame.base(), +- PageFlags::new().execute(true).write(true), +- ) +- .expect("failed to map trampoline"); ++ let result = match mapper.map_phys( ++ trampoline_page.start_address(), ++ trampoline_frame.base(), ++ PageFlags::new().execute(true).write(true), ++ ) { ++ Some(result) => result, ++ None => { ++ println!("KERNEL AP: failed to map trampoline page, AP bring-up disabled"); ++ return; ++ } ++ }; + + (result, mapper.table().phys().data()) + }; +@@ -72,17 +77,27 @@ pub(super) fn init(madt: Madt) { + if u32::from(ap_local_apic.id) == me.get() { + debug!(" This is my local APIC"); + } else if ap_local_apic.flags & 1 == 1 { +- let cpu_id = LogicalCpuId::next(); +- + // Allocate a stack +- let stack_start = RmmA::phys_to_virt( +- allocate_p2frame(4) +- .expect("no more frames in acpi stack_start") +- .base(), +- ) +- .data(); ++ let alloc = match allocate_p2frame(4) { ++ Some(frame) => frame, ++ None => { ++ println!("KERNEL AP: CPU {} no memory for stack, skipping", ap_local_apic.id); ++ continue; ++ } ++ }; ++ let stack_start = RmmA::phys_to_virt(alloc.base()).data(); + let stack_end = stack_start + (PAGE_SIZE << 4); + ++ let next_cpu = crate::CPU_COUNT.load(Ordering::Relaxed); ++ if next_cpu >= crate::cpu_set::MAX_CPU_COUNT { ++ println!( ++ "KERNEL AP: CPU {} exceeds logical CPU limit, skipping", ++ ap_local_apic.id ++ ); ++ continue; ++ } ++ let cpu_id = LogicalCpuId::new(next_cpu); ++ + let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end); + + let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id); +@@ -137,13 +152,34 @@ pub(super) fn init(madt: Madt) { + local_apic.set_icr(icr); + } + +- // Wait for trampoline ready +- while unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } == 0 { ++ // Wait for trampoline ready with timeout ++ let mut trampoline_ready = false; ++ for _ in 0..AP_SPIN_LIMIT { ++ if unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } != 0 { ++ trampoline_ready = true; ++ break; ++ } + hint::spin_loop(); + } +- while !AP_READY.load(Ordering::SeqCst) { ++ if !trampoline_ready { ++ println!("KERNEL AP: CPU {} trampoline timeout, skipping", ap_local_apic.id); ++ continue; ++ } ++ ++ let mut kernel_ready = false; ++ for _ in 0..AP_SPIN_LIMIT { ++ if AP_READY.load(Ordering::SeqCst) { ++ kernel_ready = true; ++ break; ++ } + hint::spin_loop(); + } ++ if !kernel_ready { ++ println!("KERNEL AP: CPU {} AP_READY timeout, skipping", ap_local_apic.id); ++ continue; ++ } ++ ++ crate::CPU_COUNT.fetch_add(1, Ordering::Relaxed); + + RmmA::invalidate_all(); + } +@@ -151,10 +187,12 @@ pub(super) fn init(madt: Madt) { + } + + // Unmap trampoline +- let (_frame, _, flush) = unsafe { ++ if let Some((_frame, _, flush)) = unsafe { + KernelMapper::lock_rw() + .unmap_phys(trampoline_page.start_address()) +- .expect("failed to unmap trampoline page") +- }; +- flush.flush(); ++ } { ++ flush.flush(); ++ } else { ++ println!("KERNEL AP: failed to unmap trampoline page (non-fatal)"); ++ } + } +diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs +index 59e35265..b3b80f0c 100644 +--- a/src/acpi/mod.rs ++++ b/src/acpi/mod.rs +@@ -82,6 +82,14 @@ impl Rxsdt for RxsdtEnum { + + pub static RXSDT_ENUM: Once = Once::new(); + ++#[derive(Clone, Copy, Debug)] ++pub struct AcpiRootInfo { ++ pub revision: u8, ++ pub root_sdt_address: PhysicalAddress, ++} ++ ++pub static ACPI_ROOT_INFO: Once = Once::new(); ++ + /// Parse the ACPI tables to gather CPU, interrupt, and timer information + pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) { + unsafe { +@@ -94,6 +102,15 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) { + let rsdp_opt = Rsdp::get_rsdp(already_supplied_rsdp); + + if let Some(rsdp) = rsdp_opt { ++ let root_info = ACPI_ROOT_INFO.call_once(|| AcpiRootInfo { ++ revision: rsdp.revision(), ++ root_sdt_address: rsdp.sdt_address(), ++ }); ++ ++ if root_info.root_sdt_address != rsdp.sdt_address() || root_info.revision != rsdp.revision() { ++ error!("ACPI_ROOT_INFO already initialized with a different RSDP root"); ++ } ++ + debug!("SDT address: {:#x}", rsdp.sdt_address().data()); + let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock_rw()); + +diff --git a/src/acpi/rsdp.rs b/src/acpi/rsdp.rs +index f10c5ac9..5e93a9f8 100644 +--- a/src/acpi/rsdp.rs ++++ b/src/acpi/rsdp.rs +@@ -31,4 +31,8 @@ impl Rsdp { + self.rsdt_address as usize + }) + } ++ ++ pub fn revision(&self) -> u8 { ++ self.revision ++ } + } +diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs +index 4fdb0ba1..aaa71963 100644 +--- a/src/allocator/mod.rs ++++ b/src/allocator/mod.rs +@@ -7,26 +7,40 @@ mod linked_list; + /// Size of kernel heap + const KERNEL_HEAP_SIZE: usize = ::rmm::MEGABYTE; + ++#[cold] ++fn halt_kernel_heap_init(message: &str) -> ! { ++ print!("{message}"); ++ println!("Kernel heap initialization cannot continue. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++} ++ + unsafe fn map_heap(mapper: &mut KernelMapper, offset: usize, size: usize) { + let mut flush_all = PageFlushAll::new(); + + let heap_start_page = Page::containing_address(VirtualAddress::new(offset)); + let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size - 1)); + for page in Page::range_inclusive(heap_start_page, heap_end_page) { +- let phys = mapper +- .allocator_mut() +- .allocate_one() +- .expect("failed to allocate kernel heap"); ++ let phys = match mapper.allocator_mut().allocate_one() { ++ Some(phys) => phys, ++ None => halt_kernel_heap_init( ++ "FATAL: failed to allocate physical frame for kernel heap\n", ++ ), ++ }; + let flush = unsafe { +- mapper +- .map_phys( +- page.start_address(), +- phys, +- PageFlags::new() +- .write(true) +- .global(cfg!(not(feature = "pti"))), +- ) +- .expect("failed to map kernel heap") ++ match mapper.map_phys( ++ page.start_address(), ++ phys, ++ PageFlags::new() ++ .write(true) ++ .global(cfg!(not(feature = "pti"))), ++ ) { ++ Some(flush) => flush, ++ None => halt_kernel_heap_init( ++ "FATAL: failed to map kernel heap virtual page\n", ++ ), ++ } + }; + flush_all.consume(flush); + } +diff --git a/src/arch/x86_shared/gdt.rs b/src/arch/x86_shared/gdt.rs +index cad344f3..f7acae35 100644 +--- a/src/arch/x86_shared/gdt.rs ++++ b/src/arch/x86_shared/gdt.rs +@@ -192,6 +192,15 @@ impl ProcessorControlRegion { + } + } + ++#[cold] ++fn halt_pcr_init() -> ! { ++ println!("FATAL: failed to allocate physical memory for Processor Control Region"); ++ println!("Processor startup cannot continue. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++} ++ + pub unsafe fn pcr() -> *mut ProcessorControlRegion { + unsafe { + // Primitive benchmarking of RDFSBASE and RDGSBASE in userspace, appears to indicate that +@@ -375,7 +384,10 @@ pub fn allocate_and_init_pcr( + .next_power_of_two() + .trailing_zeros(); + +- let pcr_frame = crate::memory::allocate_p2frame(alloc_order).expect("failed to allocate PCR"); ++ let pcr_frame = match crate::memory::allocate_p2frame(alloc_order) { ++ Some(frame) => frame, ++ None => halt_pcr_init(), ++ }; + let pcr_ptr = RmmA::phys_to_virt(pcr_frame.base()).data() as *mut ProcessorControlRegion; + unsafe { core::ptr::write(pcr_ptr, ProcessorControlRegion::new_partial_init(cpu_id)) }; + +diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs +index 50064585..47f692f6 100644 +--- a/src/arch/x86_shared/idt.rs ++++ b/src/arch/x86_shared/idt.rs +@@ -78,6 +78,15 @@ static INIT_BSP_IDT: SyncUnsafeCell = SyncUnsafeCell::new(Idt::new()); + pub(crate) static IDTS: RwLock> = + RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new())); + ++#[cold] ++fn halt_idt_init() -> ! { ++ println!("FATAL: failed to allocate physical pages for backup interrupt stack"); ++ println!("Interrupt setup cannot continue. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++} ++ + #[inline] + pub fn is_reserved(cpu_id: LogicalCpuId, index: u8) -> bool { + if cpu_id == LogicalCpuId::BSP { +@@ -161,8 +170,10 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt { + .or_insert_with(|| Box::leak(Box::new(Idt::new()))); + + use crate::memory::{RmmA, RmmArch}; +- let frames = crate::memory::allocate_p2frame(4) +- .expect("failed to allocate pages for backup interrupt stack"); ++ let frames = match crate::memory::allocate_p2frame(4) { ++ Some(frames) => frames, ++ None => halt_idt_init(), ++ }; + + // Physical pages are mapped linearly. So is the linearly mapped virtual memory. + let base_address = RmmA::phys_to_virt(frames.base()); +diff --git a/src/arch/x86_shared/mod.rs b/src/arch/x86_shared/mod.rs +index e3c30501..11c33e94 100644 +--- a/src/arch/x86_shared/mod.rs ++++ b/src/arch/x86_shared/mod.rs +@@ -28,6 +28,8 @@ pub mod pti; + /// Initialization and start function + pub mod start; + ++pub mod sleep; ++ + /// Stop function + pub mod stop; + +diff --git a/src/arch/x86_shared/sleep.rs b/src/arch/x86_shared/sleep.rs +new file mode 100644 +index 00000000..9f98c0d8 +--- /dev/null ++++ b/src/arch/x86_shared/sleep.rs +@@ -0,0 +1,712 @@ ++use alloc::{sync::Arc, vec::Vec}; ++use core::{ ++ ptr::NonNull, ++ str::FromStr, ++ sync::atomic::{AtomicU32, Ordering}, ++}; ++ ++use acpi_ext::{ ++ aml::{namespace::AmlName, object::Object, Interpreter}, ++ registers::FixedRegisters, ++ sdt::{facs::Facs, fadt::Fadt, SdtHeader}, ++ AcpiTables, Handle, Handler, PhysicalMapping, ++}; ++use spin::Mutex; ++use syscall::error::{Error, EINVAL, EIO}; ++use x86::{segmentation::SegmentSelector, task, Ring}; ++ ++use crate::{ ++ acpi::ACPI_ROOT_INFO, ++ arch::interrupt, ++ memory::{ ++ round_down_pages, round_up_pages, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, ++ RmmArch, VirtualAddress, PAGE_SIZE, ++ }, ++ syscall::io::{Io, Pio}, ++}; ++ ++const ACPI_SLP_TYP_SHIFT: u16 = 10; ++const ACPI_SLP_TYP_MASK: u16 = 0x1C00; ++const ACPI_SLP_EN: u16 = 1 << 13; ++const WAKE_TRAMPOLINE_PHYS: usize = 0x8000; ++const SLEEP_RETURN_OK: usize = 0; ++ ++#[cfg(target_arch = "x86_64")] ++static WAKE_TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/s3_wakeup")); ++ ++#[repr(C, packed)] ++#[derive(Clone, Copy, Debug, Default)] ++struct DescriptorTableRegister { ++ limit: u16, ++ base: u64, ++} ++ ++#[repr(C, align(64))] ++#[derive(Clone, Copy, Debug)] ++struct FpuState { ++ bytes: [u8; 4096], ++} ++ ++impl Default for FpuState { ++ fn default() -> Self { ++ Self { bytes: [0; 4096] } ++ } ++} ++ ++#[derive(Clone, Copy, Debug, Eq, PartialEq)] ++pub enum SleepState { ++ S3, ++ S5, ++} ++ ++#[derive(Clone, Copy, Debug, Eq, PartialEq)] ++pub enum SleepError { ++ UnsupportedArch, ++ MissingAcpi, ++ MissingFadt, ++ MissingFacs, ++ MissingSleepObject, ++ InvalidSleepObject, ++ UnsupportedPmControl, ++ UnsupportedAmlOperation, ++ SleepDidNotEnter, ++} ++ ++impl SleepError { ++ fn code(self) -> usize { ++ match self { ++ Self::UnsupportedArch => EINVAL as usize, ++ Self::MissingAcpi ++ | Self::MissingFadt ++ | Self::MissingFacs ++ | Self::MissingSleepObject ++ | Self::UnsupportedAmlOperation => EIO as usize, ++ Self::InvalidSleepObject | Self::UnsupportedPmControl | Self::SleepDidNotEnter => { ++ EINVAL as usize ++ } ++ } ++ } ++ ++ fn from_code(code: usize) -> Self { ++ match code as i32 { ++ x if x == EINVAL => Self::InvalidSleepObject, ++ _ => Self::MissingAcpi, ++ } ++ } ++} ++ ++#[derive(Clone, Copy, Debug, Default)] ++struct SavedCpuContext { ++ entry_rsp: usize, ++ runtime_rsp: usize, ++ facs_address: usize, ++ cr0: usize, ++ cr2: usize, ++ cr3: usize, ++ cr4: usize, ++ rflags: usize, ++ gdtr: DescriptorTableRegister, ++ idtr: DescriptorTableRegister, ++ efer: u64, ++ fs_base: u64, ++ gs_base: u64, ++ kernel_gs_base: u64, ++ fpu: FpuState, ++} ++ ++static SAVED_CONTEXT: Mutex> = Mutex::new(None); ++static AML_MUTEX_IDS: AtomicU32 = AtomicU32::new(1); ++ ++#[derive(Clone, Copy, Debug)] ++struct SleepTypeData { ++ a: u16, ++ b: u16, ++} ++ ++#[derive(Clone, Copy)] ++struct KernelAcpiHandler; ++ ++impl KernelAcpiHandler { ++ fn map_range(physical_address: usize, size: usize) -> (*mut u8, usize) { ++ let map_base = round_down_pages(physical_address); ++ let map_offset = physical_address - map_base; ++ let mapped_length = round_up_pages(size + map_offset); ++ ++ // SAFETY: The ACPI interpreter only requests firmware-described physical regions. ++ unsafe { ++ let mut mapper = KernelMapper::lock_rw(); ++ for page_index in 0..mapped_length / PAGE_SIZE { ++ let (_, flush) = mapper ++ .map_linearly( ++ PhysicalAddress::new(map_base + page_index * PAGE_SIZE), ++ PageFlags::new(), ++ ) ++ .expect("failed to linearly map ACPI physical region"); ++ flush.flush(); ++ } ++ } ++ ++ let virtual_base = RmmA::phys_to_virt(PhysicalAddress::new(map_base)).data(); ++ ((virtual_base + map_offset) as *mut u8, mapped_length) ++ } ++} ++ ++impl Handler for KernelAcpiHandler { ++ unsafe fn map_physical_region(&self, physical_address: usize, size: usize) -> PhysicalMapping { ++ let (virtual_start, mapped_length) = Self::map_range(physical_address, size); ++ PhysicalMapping { ++ physical_start: physical_address, ++ virtual_start: NonNull::new(virtual_start.cast::()) ++ .expect("expected mapped ACPI virtual address to be non-null"), ++ region_length: size, ++ mapped_length, ++ handler: *self, ++ } ++ } ++ ++ fn unmap_physical_region(_region: &PhysicalMapping) {} ++ ++ fn read_u8(&self, address: usize) -> u8 { ++ // SAFETY: AML system-memory accesses are byte-addressable firmware regions. ++ unsafe { core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u8) } ++ } ++ ++ fn read_u16(&self, address: usize) -> u16 { ++ // SAFETY: AML system-memory accesses are word-addressable firmware regions. ++ unsafe { ++ core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u16) ++ } ++ } ++ ++ fn read_u32(&self, address: usize) -> u32 { ++ // SAFETY: AML system-memory accesses are dword-addressable firmware regions. ++ unsafe { ++ core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u32) ++ } ++ } ++ ++ fn read_u64(&self, address: usize) -> u64 { ++ // SAFETY: AML system-memory accesses are qword-addressable firmware regions. ++ unsafe { ++ core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u64) ++ } ++ } ++ ++ fn write_u8(&self, address: usize, value: u8) { ++ // SAFETY: AML system-memory accesses are byte-addressable firmware regions. ++ unsafe { ++ core::ptr::write_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u8, value) ++ } ++ } ++ ++ fn write_u16(&self, address: usize, value: u16) { ++ // SAFETY: AML system-memory accesses are word-addressable firmware regions. ++ unsafe { ++ core::ptr::write_volatile( ++ RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u16, ++ value, ++ ) ++ } ++ } ++ ++ fn write_u32(&self, address: usize, value: u32) { ++ // SAFETY: AML system-memory accesses are dword-addressable firmware regions. ++ unsafe { ++ core::ptr::write_volatile( ++ RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u32, ++ value, ++ ) ++ } ++ } ++ ++ fn write_u64(&self, address: usize, value: u64) { ++ // SAFETY: AML system-memory accesses are qword-addressable firmware regions. ++ unsafe { ++ core::ptr::write_volatile( ++ RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u64, ++ value, ++ ) ++ } ++ } ++ ++ fn read_io_u8(&self, port: u16) -> u8 { ++ Pio::::new(port).read() ++ } ++ ++ fn read_io_u16(&self, port: u16) -> u16 { ++ Pio::::new(port).read() ++ } ++ ++ fn read_io_u32(&self, port: u16) -> u32 { ++ Pio::::new(port).read() ++ } ++ ++ fn write_io_u8(&self, port: u16, value: u8) { ++ Pio::::new(port).write(value) ++ } ++ ++ fn write_io_u16(&self, port: u16, value: u16) { ++ Pio::::new(port).write(value) ++ } ++ ++ fn write_io_u32(&self, port: u16, value: u32) { ++ Pio::::new(port).write(value) ++ } ++ ++ fn read_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u8 { ++ 0 ++ } ++ ++ fn read_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u16 { ++ 0 ++ } ++ ++ fn read_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u32 { ++ 0 ++ } ++ ++ fn write_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u8) {} ++ ++ fn write_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u16) {} ++ ++ fn write_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u32) {} ++ ++ fn nanos_since_boot(&self) -> u64 { ++ 0 ++ } ++ ++ fn stall(&self, microseconds: u64) { ++ for _ in 0..(microseconds.saturating_mul(64)) { ++ core::hint::spin_loop(); ++ } ++ } ++ ++ fn sleep(&self, milliseconds: u64) { ++ for _ in 0..(milliseconds.saturating_mul(64_000)) { ++ core::hint::spin_loop(); ++ } ++ } ++ ++ fn create_mutex(&self) -> Handle { ++ Handle(AML_MUTEX_IDS.fetch_add(1, Ordering::Relaxed)) ++ } ++ ++ fn acquire(&self, _mutex: Handle, _timeout: u16) -> Result<(), acpi_ext::aml::AmlError> { ++ Ok(()) ++ } ++ ++ fn release(&self, _mutex: Handle) {} ++} ++ ++fn sleep_state_name(state: SleepState) -> &'static str { ++ match state { ++ SleepState::S3 => "\\_S3", ++ SleepState::S5 => "\\_S5", ++ } ++} ++ ++fn encode_sleep_type(value: u16) -> u16 { ++ if value <= 0x7 { ++ value << ACPI_SLP_TYP_SHIFT ++ } else { ++ value & ACPI_SLP_TYP_MASK ++ } ++} ++ ++fn load_interpreter() -> Result<( ++ Arc>, ++ PhysicalMapping, ++ Interpreter, ++), SleepError> { ++ let root = *ACPI_ROOT_INFO.get().ok_or(SleepError::MissingAcpi)?; ++ let handler = KernelAcpiHandler; ++ ++ // SAFETY: ACPI root info is captured from the firmware-provided, already validated root table. ++ let tables = unsafe { ++ AcpiTables::from_rsdt(handler, root.revision, root.root_sdt_address.data()) ++ .map_err(|_| SleepError::MissingAcpi)? ++ }; ++ let fadt = tables.find_table::().ok_or(SleepError::MissingFadt)?; ++ let registers = Arc::new( ++ FixedRegisters::new(&fadt, handler).map_err(|_| SleepError::UnsupportedPmControl)?, ++ ); ++ let facs_address = fadt.facs_address().map_err(|_| SleepError::MissingFacs)?; ++ ++ // SAFETY: The FADT-supplied FACS address is used exactly as described by the ACPI spec. ++ let facs = unsafe { handler.map_physical_region::(facs_address, core::mem::size_of::()) }; ++ // SAFETY: The AML interpreter only needs an owned mapping of the same firmware FACS table. ++ let interpreter_facs = unsafe { ++ handler.map_physical_region::(facs_address, core::mem::size_of::()) ++ }; ++ let dsdt = tables.dsdt().map_err(|_| SleepError::MissingFadt)?; ++ let interpreter = Interpreter::new(handler, dsdt.revision, Arc::clone(®isters), Some(interpreter_facs)); ++ ++ // SAFETY: Each AML table mapping is owned by the interpreter during table loading. ++ unsafe { ++ let mapping = handler.map_physical_region::(dsdt.phys_address, dsdt.length as usize); ++ let stream = core::slice::from_raw_parts( ++ mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::()) as *const u8, ++ dsdt.length as usize - core::mem::size_of::(), ++ ); ++ interpreter ++ .load_table(stream) ++ .map_err(|_| SleepError::UnsupportedAmlOperation)?; ++ ++ for ssdt in tables.ssdts() { ++ let mapping = handler.map_physical_region::(ssdt.phys_address, ssdt.length as usize); ++ let stream = core::slice::from_raw_parts( ++ mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::()) as *const u8, ++ ssdt.length as usize - core::mem::size_of::(), ++ ); ++ interpreter ++ .load_table(stream) ++ .map_err(|_| SleepError::UnsupportedAmlOperation)?; ++ } ++ } ++ ++ Ok((registers, facs, interpreter)) ++} ++ ++fn sleep_type_data_from_interpreter( ++ interpreter: &Interpreter, ++ state: SleepState, ++) -> Result { ++ let name = AmlName::from_str(sleep_state_name(state)).map_err(|_| SleepError::MissingSleepObject)?; ++ let object = interpreter ++ .evaluate(name, Vec::new()) ++ .map_err(|_| SleepError::MissingSleepObject)?; ++ ++ let Object::Package(package) = &*object else { ++ return Err(SleepError::InvalidSleepObject); ++ }; ++ ++ let Some(typa_object) = package.first() else { ++ return Err(SleepError::InvalidSleepObject); ++ }; ++ let Some(typb_object) = package.get(1) else { ++ return Err(SleepError::InvalidSleepObject); ++ }; ++ ++ let Object::Integer(typa) = &**typa_object else { ++ return Err(SleepError::InvalidSleepObject); ++ }; ++ let Object::Integer(typb) = &**typb_object else { ++ return Err(SleepError::InvalidSleepObject); ++ }; ++ ++ Ok(SleepTypeData { ++ a: encode_sleep_type(*typa as u16), ++ b: encode_sleep_type(*typb as u16), ++ }) ++} ++ ++fn sleep_type_data(state: SleepState) -> Result { ++ let (_registers, _facs, interpreter) = load_interpreter()?; ++ sleep_type_data_from_interpreter(&interpreter, state) ++} ++ ++fn install_wake_trampoline(stack_rsp: usize, cr3: usize) { ++ let trampoline_page = Page::containing_address(VirtualAddress::new(WAKE_TRAMPOLINE_PHYS)); ++ let trampoline_frame = PhysicalAddress::new(WAKE_TRAMPOLINE_PHYS); ++ ++ // SAFETY: The 0x8000 low-memory trampoline page is reserved by the kernel for bootstrap stubs. ++ let (result, _) = unsafe { ++ let mut mapper = KernelMapper::lock_rw(); ++ let result = mapper ++ .map_phys( ++ trampoline_page.start_address(), ++ trampoline_frame, ++ PageFlags::new().execute(true).write(true), ++ ) ++ .expect("failed to map S3 wake trampoline page"); ++ (result, mapper.table().phys().data()) ++ }; ++ result.flush(); ++ ++ for (index, value) in WAKE_TRAMPOLINE_DATA.iter().enumerate() { ++ // SAFETY: The trampoline page is mapped writable at the same virtual address as the physical page. ++ unsafe { ++ core::ptr::write_volatile((WAKE_TRAMPOLINE_PHYS as *mut u8).add(index), *value); ++ } ++ } ++ ++ // SAFETY: The wake trampoline layout reserves three qword fields immediately after the jump. ++ unsafe { ++ let stack_slot = (WAKE_TRAMPOLINE_PHYS + 8) as *mut u64; ++ let page_table_slot = stack_slot.add(1); ++ let code_slot = stack_slot.add(2); ++ stack_slot.write(stack_rsp as u64); ++ page_table_slot.write(cr3 as u64); ++ #[expect(clippy::fn_to_numeric_cast)] ++ code_slot.write(resume_from_s3_trampoline as usize as u64); ++ } ++ ++ // SAFETY: The trampoline mapping is no longer needed once the physical page has been populated. ++ let (_frame, _, flush) = unsafe { ++ KernelMapper::lock_rw() ++ .unmap_phys(trampoline_page.start_address()) ++ .expect("failed to unmap S3 wake trampoline page") ++ }; ++ flush.flush(); ++} ++ ++fn save_descriptor_tables(context: &mut SavedCpuContext) { ++ // SAFETY: SGDT/SIDT only read the current CPU descriptor-table registers into the provided storage. ++ unsafe { ++ core::arch::asm!("sgdt [{}]", in(reg) &mut context.gdtr, options(nostack, preserves_flags)); ++ core::arch::asm!("sidt [{}]", in(reg) &mut context.idtr, options(nostack, preserves_flags)); ++ } ++} ++ ++fn save_fpu_state(context: &mut SavedCpuContext) { ++ // SAFETY: The kernel owns the current CPU at suspend entry and the FXSAVE buffer is 64-byte aligned. ++ unsafe { ++ core::arch::asm!( ++ "fxsave64 [{}]", ++ in(reg) context.fpu.bytes.as_mut_ptr(), ++ ); ++ } ++} ++ ++fn restore_fpu_state(context: &SavedCpuContext) { ++ // SAFETY: The saved FXSAVE image belongs to the same CPU context and matches the restore instruction. ++ unsafe { ++ core::arch::asm!( ++ "fxrstor64 [{}]", ++ in(reg) context.fpu.bytes.as_ptr(), ++ ); ++ } ++} ++ ++fn save_cpu_context(entry_rsp: usize) -> SavedCpuContext { ++ let mut context = SavedCpuContext { ++ entry_rsp, ++ ..SavedCpuContext::default() ++ }; ++ ++ // SAFETY: Reading control registers and MSRs is required to reconstruct the CPU execution state on wake. ++ unsafe { ++ core::arch::asm!( ++ "mov {}, cr0", ++ out(reg) context.cr0, ++ options(nostack, preserves_flags) ++ ); ++ core::arch::asm!( ++ "mov {}, cr2", ++ out(reg) context.cr2, ++ options(nostack, preserves_flags) ++ ); ++ core::arch::asm!( ++ "mov {}, cr3", ++ out(reg) context.cr3, ++ options(nostack, preserves_flags) ++ ); ++ core::arch::asm!( ++ "mov {}, cr4", ++ out(reg) context.cr4, ++ options(nostack, preserves_flags) ++ ); ++ core::arch::asm!( ++ "pushfq", ++ "pop {}", ++ out(reg) context.rflags, ++ options(preserves_flags) ++ ); ++ core::arch::asm!("mov {}, rsp", out(reg) context.runtime_rsp, options(nostack, preserves_flags)); ++ ++ context.efer = x86::msr::rdmsr(x86::msr::IA32_EFER); ++ context.fs_base = x86::msr::rdmsr(x86::msr::IA32_FS_BASE); ++ context.gs_base = x86::msr::rdmsr(x86::msr::IA32_GS_BASE); ++ context.kernel_gs_base = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE); ++ } ++ ++ save_descriptor_tables(&mut context); ++ save_fpu_state(&mut context); ++ context ++} ++ ++fn set_firmware_waking_vector(facs: &mut PhysicalMapping, vector: usize) { ++ facs.firmware_waking_vector = vector as u32; ++ facs.x_firmware_waking_vector = vector as u64; ++} ++ ++fn write_pm1_control_block( ++ registers: &FixedRegisters, ++ sleep_type: SleepTypeData, ++) -> Result<(), SleepError> { ++ let current_a = registers ++ .pm1_control_registers ++ .pm1a ++ .read() ++ .map_err(|_| SleepError::UnsupportedPmControl)? as u16; ++ let armed_a = (current_a & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.a; ++ ++ registers ++ .pm1_control_registers ++ .pm1a ++ .write(u64::from(armed_a)) ++ .map_err(|_| SleepError::UnsupportedPmControl)?; ++ ++ if let Some(pm1b) = ®isters.pm1_control_registers.pm1b { ++ let current_b = pm1b.read().map_err(|_| SleepError::UnsupportedPmControl)? as u16; ++ let armed_b = (current_b & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.b; ++ pm1b.write(u64::from(armed_b)) ++ .map_err(|_| SleepError::UnsupportedPmControl)?; ++ pm1b.write(u64::from(armed_b | ACPI_SLP_EN)) ++ .map_err(|_| SleepError::UnsupportedPmControl)?; ++ } ++ ++ // SAFETY: WBINVD is required here to flush dirty cache lines before firmware powers down the CPU package. ++ unsafe { ++ core::arch::asm!("wbinvd", options(nostack, preserves_flags)); ++ } ++ ++ registers ++ .pm1_control_registers ++ .pm1a ++ .write(u64::from(armed_a | ACPI_SLP_EN)) ++ .map_err(|_| SleepError::UnsupportedPmControl)?; ++ ++ Ok(()) ++} ++ ++#[unsafe(naked)] ++unsafe extern "sysv64" fn enter_sleep_raw(state: usize) -> usize { ++ core::arch::naked_asm!( ++ "mov rsi, rsp", ++ "jmp {inner}", ++ inner = sym enter_sleep_raw_inner, ++ ); ++} ++ ++extern "C" fn enter_sleep_raw_inner(state: usize, entry_rsp: usize) -> usize { ++ let state = match state { ++ 3 => SleepState::S3, ++ 5 => SleepState::S5, ++ _ => return SleepError::InvalidSleepObject.code(), ++ }; ++ ++ let (registers, mut facs, interpreter) = match load_interpreter() { ++ Ok(tuple) => tuple, ++ Err(error) => return error.code(), ++ }; ++ let sleep_type = match sleep_type_data_from_interpreter(&interpreter, state) { ++ Ok(data) => data, ++ Err(error) => return error.code(), ++ }; ++ ++ let mut context = save_cpu_context(entry_rsp); ++ context.facs_address = facs.physical_start; ++ install_wake_trampoline(context.runtime_rsp, context.cr3); ++ set_firmware_waking_vector(&mut facs, WAKE_TRAMPOLINE_PHYS); ++ ++ { ++ let mut saved = SAVED_CONTEXT.lock(); ++ *saved = Some(context); ++ } ++ ++ // SAFETY: Suspend entry must not be interrupted while the wake vector and PM1 control block are being armed. ++ unsafe { ++ interrupt::disable(); ++ } ++ ++ if let Err(error) = write_pm1_control_block(registers.as_ref(), sleep_type) { ++ return error.code(); ++ } ++ ++ // SAFETY: The final CLI+HLT sequence is the architectural handoff point after asserting SLP_EN. ++ unsafe { ++ core::arch::asm!("cli; hlt", options(nostack)); ++ } ++ ++ SleepError::SleepDidNotEnter.code() ++} ++ ++extern "C" fn resume_from_s3_trampoline() -> ! { ++ let mut saved = SAVED_CONTEXT.lock(); ++ let context = saved.take().expect("S3 wake trampoline resumed without saved CPU context"); ++ drop(saved); ++ ++ // SAFETY: The saved FACS physical address was captured from the validated FADT during suspend entry. ++ if context.facs_address != 0 { ++ let mut facs = unsafe { ++ KernelAcpiHandler.map_physical_region::( ++ context.facs_address, ++ core::mem::size_of::(), ++ ) ++ }; ++ set_firmware_waking_vector(&mut facs, 0); ++ } ++ ++ // SAFETY: The wake trampoline already switched to the saved kernel CR3 and long mode, so the remaining restores are architectural register state only. ++ unsafe { ++ x86::msr::wrmsr(x86::msr::IA32_EFER, context.efer); ++ core::arch::asm!("mov cr3, {}", in(reg) context.cr3, options(nostack)); ++ core::arch::asm!("mov cr4, {}", in(reg) context.cr4, options(nostack)); ++ core::arch::asm!("mov cr2, {}", in(reg) context.cr2, options(nostack)); ++ core::arch::asm!("mov cr0, {}", in(reg) context.cr0, options(nostack)); ++ core::arch::asm!("lgdt [{}]", in(reg) &context.gdtr, options(nostack)); ++ core::arch::asm!("lidt [{}]", in(reg) &context.idtr, options(nostack)); ++ ++ task::load_tr(SegmentSelector::new(crate::arch::gdt::GDT_TSS as u16, Ring::Ring0)); ++ ++ x86::msr::wrmsr(x86::msr::IA32_FS_BASE, context.fs_base); ++ x86::msr::wrmsr(x86::msr::IA32_GS_BASE, context.gs_base); ++ x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, context.kernel_gs_base); ++ } ++ ++ restore_fpu_state(&context); ++ ++ // SAFETY: Returning with the original entry stack and RFLAGS completes the suspend call as a successful function return. ++ unsafe { ++ core::arch::asm!( ++ "mov rsp, {entry_rsp}", ++ "push {rflags}", ++ "popfq", ++ "xor eax, eax", ++ "ret", ++ entry_rsp = in(reg) context.entry_rsp, ++ rflags = in(reg) context.rflags, ++ options(noreturn) ++ ); ++ } ++} ++ ++pub fn enter_sleep_state(state: SleepState) -> core::result::Result<(), SleepError> { ++ #[cfg(not(target_arch = "x86_64"))] ++ { ++ let _ = state; ++ return Err(SleepError::UnsupportedArch); ++ } ++ ++ #[cfg(target_arch = "x86_64")] ++ { ++ let raw = unsafe { ++ enter_sleep_raw(match state { ++ SleepState::S3 => 3, ++ SleepState::S5 => 5, ++ }) ++ }; ++ if raw == SLEEP_RETURN_OK { ++ Ok(()) ++ } else { ++ Err(SleepError::from_code(raw)) ++ } ++ } ++} ++ ++pub fn available_sleep_states() -> &'static [u8] { ++ if sleep_type_data(SleepState::S3).is_ok() { ++ b"S3\nS5\n" ++ } else { ++ b"S5\n" ++ } ++} ++ ++pub fn trigger_sleep_request(request: &str) -> Result<(), Error> { ++ match request.trim() { ++ "S3" => enter_sleep_state(SleepState::S3).map_err(|_| Error::new(EIO)), ++ "S5" => enter_sleep_state(SleepState::S5).map_err(|_| Error::new(EIO)), ++ _ => Err(Error::new(EINVAL)), ++ } ++} +diff --git a/src/arch/x86_shared/start.rs b/src/arch/x86_shared/start.rs +index 7a7c0ae8..f1dbb6b4 100644 +--- a/src/arch/x86_shared/start.rs ++++ b/src/arch/x86_shared/start.rs +@@ -82,6 +82,15 @@ extern "C" fn kstart() { + /// The entry to Rust, all things must be initialized + unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { + unsafe { ++ // EARLY CANARY: write 'R' to COM1 before any kernel init. ++ // This proves the serial hardware works and the kernel reached Rust entry. ++ // If this character appears but "Redox OS starting..." does not, ++ // the hang is in args_ptr.read(), serial::init(), or graphical_debug::init(). ++ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ++ { ++ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'R', options(nostack, preserves_flags)); ++ } ++ + let bootstrap = { + let args = args_ptr.read(); + +@@ -91,27 +100,49 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { + // Set up graphical debug + graphical_debug::init(args.env()); + ++ // SECOND CANARY: write 'S' to COM1 after serial init. ++ // If 'R' appears but 'S' does not, the hang is in serial::init() or graphical_debug::init(). ++ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ++ { ++ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'S', options(nostack, preserves_flags)); ++ } ++ + info!("Redox OS starting..."); + args.print(); + ++ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ++ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'1', options(nostack, preserves_flags)); } ++ + // Set up GDT + gdt::init_bsp(stack_end); + ++ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ++ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'2', options(nostack, preserves_flags)); } ++ + // Set up IDT + idt::init_bsp(); + ++ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ++ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'3', options(nostack, preserves_flags)); } ++ + // Initialize RMM + #[cfg(target_arch = "x86")] + crate::startup::memory::init(&args, Some(0x100000), Some(0x40000000)); + #[cfg(target_arch = "x86_64")] + crate::startup::memory::init(&args, Some(0x100000), None); + ++ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ++ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'4', options(nostack, preserves_flags)); } ++ + // Initialize paging + paging::init(); + + #[cfg(target_arch = "x86_64")] + crate::arch::alternative::early_init(true); + ++ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ++ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'5', options(nostack, preserves_flags)); } ++ + // Set up syscall instruction + interrupt::syscall::init(); + +@@ -121,6 +152,9 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { + // Activate memory logging + crate::log::init(); + ++ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ++ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'6', options(nostack, preserves_flags)); } ++ + // Initialize miscellaneous processor features + #[cfg(target_arch = "x86_64")] + crate::arch::misc::init(LogicalCpuId::BSP); +@@ -128,6 +162,9 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { + // Initialize devices + device::init(); + ++ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ++ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'7', options(nostack, preserves_flags)); } ++ + // Read ACPI tables, starts APs + if cfg!(feature = "acpi") { + crate::acpi::init(args.acpi_rsdp()); +diff --git a/src/asm/x86_64/s3_wakeup.asm b/src/asm/x86_64/s3_wakeup.asm +new file mode 100644 +index 00000000..7beeccf6 +--- /dev/null ++++ b/src/asm/x86_64/s3_wakeup.asm +@@ -0,0 +1,110 @@ ++; ACPI S3 wake trampoline ++; compiled with nasm by build.rs, copied to physical 0x8000 before S3 entry ++ ++ORG 0x8000 ++SECTION .text ++USE16 ++ ++trampoline: ++ jmp short startup_wake ++ times 8 - ($ - trampoline) nop ++ .stack: dq 0 ++ .page_table: dq 0 ++ .code: dq 0 ++ ++startup_wake: ++ cli ++ ++ xor ax, ax ++ mov ds, ax ++ mov es, ax ++ mov ss, ax ++ mov sp, 0 ++ ++ mov edi, [trampoline.page_table] ++ mov cr3, edi ++ ++ mov eax, cr0 ++ and al, 11110011b ++ or al, 00100010b ++ mov cr0, eax ++ ++ mov eax, cr4 ++ or eax, 1 << 9 | 1 << 7 | 1 << 5 | 1 << 4 ++ mov cr4, eax ++ ++ fninit ++ ++ lgdt [gdtr] ++ ++ mov ecx, 0xC0000080 ++ rdmsr ++ or eax, 1 << 11 | 1 << 8 ++ wrmsr ++ ++ mov ebx, cr0 ++ or ebx, 1 << 31 | 1 << 16 | 1 ++ mov cr0, ebx ++ ++ jmp gdt.kernel_code:long_mode_wake ++ ++USE64 ++long_mode_wake: ++ mov rax, gdt.kernel_data ++ mov ds, rax ++ mov es, rax ++ mov fs, rax ++ mov gs, rax ++ mov ss, rax ++ ++ mov rsp, [trampoline.stack] ++ mov rax, [trampoline.code] ++ jmp rax ++ ++struc GDTEntry ++ .limitl resw 1 ++ .basel resw 1 ++ .basem resb 1 ++ .attribute resb 1 ++ .flags__limith resb 1 ++ .baseh resb 1 ++endstruc ++ ++attrib: ++ .present equ 1 << 7 ++ .user equ 1 << 4 ++ .code equ 1 << 3 ++ .writable equ 1 << 1 ++ ++flags: ++ .long_mode equ 1 << 5 ++ ++gdtr: ++ dw gdt.end + 1 ++ dq gdt ++ ++gdt: ++.null equ $ - gdt ++ dq 0 ++ ++.kernel_code equ $ - gdt ++istruc GDTEntry ++ at GDTEntry.limitl, dw 0 ++ at GDTEntry.basel, dw 0 ++ at GDTEntry.basem, db 0 ++ at GDTEntry.attribute, db attrib.present | attrib.user | attrib.code ++ at GDTEntry.flags__limith, db flags.long_mode ++ at GDTEntry.baseh, db 0 ++iend ++ ++.kernel_data equ $ - gdt ++istruc GDTEntry ++ at GDTEntry.limitl, dw 0 ++ at GDTEntry.basel, dw 0 ++ at GDTEntry.basem, db 0 ++ at GDTEntry.attribute, db attrib.present | attrib.user | attrib.writable ++ at GDTEntry.flags__limith, db 0 ++ at GDTEntry.baseh, db 0 ++iend ++ ++.end equ $ - gdt +diff --git a/src/context/context.rs b/src/context/context.rs +index c97c5166..6d723f49 100644 +--- a/src/context/context.rs ++++ b/src/context/context.rs +@@ -148,6 +148,8 @@ pub struct Context { + pub euid: u32, + pub egid: u32, + pub pid: usize, ++ /// Supplementary group IDs for access control decisions. ++ pub groups: Vec, + + // See [`PreemptGuard`] + // +@@ -204,6 +206,7 @@ impl Context { + euid: 0, + egid: 0, + pid: 0, ++ groups: Vec::new(), + + #[cfg(feature = "syscall_debug")] + syscall_debug_info: crate::syscall::debug::SyscallDebugInfo::default(), +@@ -479,6 +482,7 @@ impl Context { + uid: self.euid, + gid: self.egid, + pid: self.pid, ++ groups: self.groups.clone(), + } + } + } +diff --git a/src/context/file.rs b/src/context/file.rs +index 2d3790f1..150f483a 100644 +--- a/src/context/file.rs ++++ b/src/context/file.rs +@@ -4,7 +4,7 @@ use crate::{ + event, + scheme::{self, SchemeId}, + sync::{CleanLockToken, RwLock, L6}, +- syscall::error::Result, ++ syscall::error::{Error, Result, ESTALE}, + }; + use alloc::sync::Arc; + use syscall::{schemev2::NewFdFlags, RwFlags, O_APPEND, O_NONBLOCK}; +@@ -18,6 +18,7 @@ pub struct FileDescription { + pub offset: u64, + /// The scheme that this file refers to + pub scheme: SchemeId, ++ pub scheme_generation: Option, + /// The number the scheme uses to refer to this file + pub number: usize, + /// The flags passed to open or fcntl(SETFL) +@@ -32,6 +33,52 @@ bitflags! { + } + } + impl FileDescription { ++ pub fn with_generation( ++ scheme: SchemeId, ++ scheme_generation: Option, ++ number: usize, ++ offset: u64, ++ flags: u32, ++ internal_flags: InternalFlags, ++ ) -> Self { ++ Self { ++ offset, ++ scheme, ++ scheme_generation, ++ number, ++ flags, ++ internal_flags, ++ } ++ } ++ ++ pub fn new( ++ scheme: SchemeId, ++ number: usize, ++ offset: u64, ++ flags: u32, ++ internal_flags: InternalFlags, ++ token: &mut CleanLockToken, ++ ) -> Self { ++ Self::with_generation( ++ scheme, ++ Some(scheme::current_scheme_generation(token.token(), scheme)), ++ number, ++ offset, ++ flags, ++ internal_flags, ++ ) ++ } ++ ++ pub fn get_scheme(&self, token: &mut CleanLockToken) -> Result { ++ if let Some(expected_generation) = self.scheme_generation ++ && expected_generation != scheme::current_scheme_generation(token.token(), self.scheme) ++ { ++ return Err(Error::new(ESTALE)); ++ } ++ ++ scheme::get_scheme(token.token(), self.scheme) ++ } ++ + pub fn rw_flags(&self, rw: RwFlags) -> u32 { + let mut ret = self.flags & !(O_NONBLOCK | O_APPEND) as u32; + if rw.contains(RwFlags::APPEND) { +@@ -76,7 +123,7 @@ impl FileDescription { + pub fn try_close(self, token: &mut CleanLockToken) -> Result<()> { + event::unregister_file(self.scheme, self.number, token); + +- let scheme = scheme::get_scheme(token.token(), self.scheme)?; ++ let scheme = self.get_scheme(token)?; + + scheme.close(self.number, token) + } +@@ -85,12 +132,12 @@ impl FileDescription { + impl FileDescriptor { + pub fn close(self, token: &mut CleanLockToken) -> Result<()> { + { +- let (scheme_id, number, internal_flags) = { ++ let (desc, number, internal_flags) = { + let desc = self.description.read(token.token()); +- (desc.scheme, desc.number, desc.internal_flags) ++ (*desc, desc.number, desc.internal_flags) + }; + if internal_flags.contains(InternalFlags::NOTIFY_ON_NEXT_DETACH) { +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; ++ let scheme = desc.get_scheme(token)?; + scheme.detach(number, token)?; + } + } +diff --git a/src/context/memory.rs b/src/context/memory.rs +index 93446ba7..127a34fd 100644 +--- a/src/context/memory.rs ++++ b/src/context/memory.rs +@@ -64,14 +64,13 @@ impl UnmapResult { + return Ok(()); + }; + +- let (scheme_id, number) = { +- let desc = description.write(token.token()); +- (desc.scheme, desc.number) ++ let (scheme, number) = { ++ let desc = *description.read(token.token()); ++ (desc.get_scheme(token)?, desc.number) + }; + +- let scheme_opt = scheme::get_scheme(token.token(), scheme_id); +- let funmap_result = scheme_opt +- .and_then(|scheme| scheme.kfunmap(number, base_offset, self.size, self.flags, token)); ++ let funmap_result = scheme ++ .kfunmap(number, base_offset, self.size, self.flags, token); + + if let Ok(fd) = Arc::try_unwrap(description) { + fd.into_inner().try_close(token)?; +@@ -2687,20 +2686,13 @@ fn correct_inner<'l>( + // XXX: This is cheating, but guaranteed we won't deadlock because we've dropped addr_space_guard + let mut token = unsafe { CleanLockToken::new() }; + +- let (scheme_id, scheme_number) = { +- let desc = &file_ref.description.read(token.token()); +- (desc.scheme, desc.number) ++ let desc = *file_ref.description.read(token.token()); ++ let scheme = desc.get_scheme(&mut token).map_err(|_| PfError::Segv)?; ++ let scheme_number = desc.number; ++ let user_inner = match scheme { ++ KernelSchemes::User(user) => user.inner, ++ _ => return Err(PfError::Segv), + }; +- let user_inner = scheme::get_scheme(token.token(), scheme_id) +- .ok() +- .and_then(|s| { +- if let KernelSchemes::User(user) = s { +- Some(user.inner) +- } else { +- None +- } +- }) +- .ok_or(PfError::Segv)?; + + let offset = file_ref.base_offset as u64 + (pages_from_grant_start * PAGE_SIZE) as u64; + user_inner +diff --git a/src/scheme/acpi.rs b/src/scheme/acpi.rs +index 87570a12..5d734691 100644 +--- a/src/scheme/acpi.rs ++++ b/src/scheme/acpi.rs +@@ -10,6 +10,7 @@ use syscall::{ + + use crate::{ + acpi::{RxsdtEnum, RXSDT_ENUM}, ++ arch::sleep, + context::file::InternalFlags, + event, + sync::{CleanLockToken, RwLock, WaitCondition, L1}, +@@ -40,6 +41,7 @@ enum HandleKind { + TopLevel, + Rxsdt, + ShutdownPipe, ++ SleepControl, + SchemeRoot, + } + +@@ -146,11 +148,11 @@ impl KernelScheme for AcpiScheme { + if flags & O_EXCL == O_EXCL || flags & O_SYMLINK == O_SYMLINK { + return Err(Error::new(EINVAL)); + } +- if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { +- return Err(Error::new(EROFS)); +- } + let (handle_kind, int_flags) = match path { + "" => { ++ if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { ++ return Err(Error::new(EROFS)); ++ } + if flags & O_DIRECTORY != O_DIRECTORY && flags & O_STAT != O_STAT { + return Err(Error::new(EISDIR)); + } +@@ -158,17 +160,36 @@ impl KernelScheme for AcpiScheme { + (HandleKind::TopLevel, InternalFlags::POSITIONED) + } + "rxsdt" => { ++ if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { ++ return Err(Error::new(EROFS)); ++ } + if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { + return Err(Error::new(ENOTDIR)); + } + (HandleKind::Rxsdt, InternalFlags::POSITIONED) + } + "kstop" => { ++ if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { ++ return Err(Error::new(EROFS)); ++ } + if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { + return Err(Error::new(ENOTDIR)); + } + (HandleKind::ShutdownPipe, InternalFlags::empty()) + } ++ "sleep" => { ++ if flags & O_ACCMODE == O_RDONLY || flags & O_STAT == O_STAT { ++ // allowed ++ } else if flags & O_ACCMODE != syscall::flag::O_WRONLY ++ && flags & O_ACCMODE != syscall::flag::O_RDWR ++ { ++ return Err(Error::new(EINVAL)); ++ } ++ if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { ++ return Err(Error::new(ENOTDIR)); ++ } ++ (HandleKind::SleepControl, InternalFlags::POSITIONED) ++ } + _ => return Err(Error::new(ENOENT)), + }; + +@@ -191,6 +212,7 @@ impl KernelScheme for AcpiScheme { + Ok(match handle.kind { + HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?.len() as u64, + HandleKind::ShutdownPipe => 1, ++ HandleKind::SleepControl => sleep::available_sleep_states().len() as u64, + HandleKind::TopLevel => 0, + HandleKind::SchemeRoot => return Err(Error::new(EBADF))?, + }) +@@ -253,6 +275,7 @@ impl KernelScheme for AcpiScheme { + + return dst_buf.copy_exactly(&[0x42]).map(|()| 1); + } ++ HandleKind::SleepControl => sleep::available_sleep_states(), + HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?, + HandleKind::TopLevel => return Err(Error::new(EISDIR)), + HandleKind::SchemeRoot => return Err(Error::new(EBADF)), +@@ -295,11 +318,45 @@ impl KernelScheme for AcpiScheme { + kind: DirentKind::Socket, + name: "kstop", + inode: 0, ++ next_opaque_id: 2, ++ })?; ++ } ++ if opaque <= 2 { ++ buf.entry(DirEntry { ++ kind: DirentKind::Regular, ++ name: "sleep", ++ inode: 0, + next_opaque_id: u64::MAX, + })?; + } + Ok(buf.finalize()) + } ++ fn kwrite( ++ &self, ++ id: usize, ++ buf: crate::syscall::usercopy::UserSliceRo, ++ _flags: u32, ++ _stored_flags: u32, ++ token: &mut CleanLockToken, ++ ) -> Result { ++ let handle = *HANDLES.read(token.token()).get(id)?; ++ ++ if handle.stat { ++ return Err(Error::new(EBADF)); ++ } ++ ++ match handle.kind { ++ HandleKind::SleepControl => { ++ let mut tmp = [0_u8; 16]; ++ let len = buf.copy_common_bytes_to_slice(&mut tmp)?; ++ let request = core::str::from_utf8(&tmp[..len]).map_err(|_| Error::new(EINVAL))?; ++ sleep::trigger_sleep_request(request)?; ++ Ok(len) ++ } ++ HandleKind::SchemeRoot => Err(Error::new(EBADF)), ++ _ => Err(Error::new(EBADF)), ++ } ++ } + fn kfpath(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result { + //TODO: construct useful path? + buf.copy_common_bytes_from_slice("/scheme/kernel.acpi/".as_bytes()) +@@ -328,6 +385,11 @@ impl KernelScheme for AcpiScheme { + st_size: 1, + ..Default::default() + }, ++ HandleKind::SleepControl => Stat { ++ st_mode: MODE_FILE, ++ st_size: sleep::available_sleep_states().len().try_into().unwrap_or(u64::MAX), ++ ..Default::default() ++ }, + HandleKind::SchemeRoot => return Err(Error::new(EBADF)), + })?; + +diff --git a/src/scheme/debug.rs b/src/scheme/debug.rs +index c70ac579..4a23b3cf 100644 +--- a/src/scheme/debug.rs ++++ b/src/scheme/debug.rs +@@ -22,9 +22,10 @@ struct Handle { + + static HANDLES: RwLock> = RwLock::new(HandleMap::new()); + +-/// Add to the input queue ++/// Add to the input queue, translating CR to NL (ICRNL) for serial console compatibility. + pub fn debug_input(data: u8, token: &mut CleanLockToken) { +- INPUT.send(data, token); ++ let translated = if data == b'\r' { b'\n' } else { data }; ++ INPUT.send(translated, token); + } + + // Notify readers of input updates +@@ -106,12 +107,16 @@ impl KernelScheme for DebugScheme { + fn fevent( + &self, + id: usize, +- _flags: EventFlags, ++ flags: EventFlags, + token: &mut CleanLockToken, + ) -> Result { + let _handle = *HANDLES.read(token.token()).get(id)?; + +- Ok(EventFlags::empty()) ++ let mut ready = EventFlags::empty(); ++ if flags.contains(EventFlags::EVENT_READ) { ++ ready |= EventFlags::EVENT_READ; ++ } ++ Ok(ready) + } + + fn fsync(&self, id: usize, token: &mut CleanLockToken) -> Result<()> { +diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs +index d30272c1..765e547f 100644 +--- a/src/scheme/mod.rs ++++ b/src/scheme/mod.rs +@@ -14,7 +14,7 @@ use alloc::{ + }; + use core::{ + str, +- sync::atomic::{AtomicUsize, Ordering}, ++ sync::atomic::{AtomicU64, AtomicUsize, Ordering}, + }; + use hashbrown::hash_map::{self, DefaultHashBuilder, HashMap}; + use spin::Once; +@@ -169,6 +169,7 @@ enum Handle { + + /// Schemes list + static HANDLES: Once>> = Once::new(); ++static SCHEME_GENERATIONS: Once>> = Once::new(); + static SCHEME_LIST_NEXT_ID: AtomicUsize = AtomicUsize::new(MAX_GLOBAL_SCHEMES); + static SCHEME_LIST_ID: AtomicUsize = AtomicUsize::new(0); + +@@ -204,6 +205,10 @@ fn init_schemes() -> RwLock> { + RwLock::new(handles) + } + ++fn init_scheme_generations() -> RwLock> { ++ RwLock::new(HashMap::new()) ++} ++ + /// Get a handle to a scheme. + pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result { + match handles().read(token).get(&scheme_id) { +@@ -212,10 +217,33 @@ pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result, scheme_id: SchemeId) -> u64 { ++ scheme_generations() ++ .read(token) ++ .get(&scheme_id) ++ .map(|generation| generation.load(Ordering::Acquire)) ++ .unwrap_or(0) ++} ++ + fn handles<'a>() -> &'a RwLock> { + HANDLES.call_once(init_schemes) + } + ++fn scheme_generations<'a>() -> &'a RwLock> { ++ SCHEME_GENERATIONS.call_once(init_scheme_generations) ++} ++ ++fn increment_scheme_generation(scheme_id: SchemeId, token: &mut CleanLockToken) { ++ match scheme_generations().write(token.token()).entry(scheme_id) { ++ hash_map::Entry::Occupied(entry) => { ++ entry.get().fetch_add(1, Ordering::AcqRel); ++ } ++ hash_map::Entry::Vacant(entry) => { ++ entry.insert(AtomicU64::new(1)); ++ } ++ } ++} ++ + /// Scheme list type + pub struct SchemeList; + +@@ -260,9 +288,14 @@ impl SchemeList { + + /// Remove a scheme + fn remove(&self, id: usize, token: &mut CleanLockToken) { +- let scheme = handles().write(token.token()).remove(&SchemeId(id)); ++ let scheme_id = SchemeId(id); ++ let scheme = handles().write(token.token()).remove(&scheme_id); + + assert!(scheme.is_some()); ++ if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme.as_ref() { ++ user.inner.fail_pending_calls(token); ++ } ++ increment_scheme_generation(scheme_id, token); + if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme + && let Some(user) = Arc::into_inner(user.inner) + { +@@ -287,32 +320,32 @@ impl KernelScheme for SchemeList { + token: &mut CleanLockToken, + ) -> Result { + let scheme_id = SchemeId(scheme_id); +- match handles() +- .read(token.token()) +- .get(&scheme_id) +- .ok_or(Error::new(EBADF))? +- { +- Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => { +- let inner = inner.clone(); +- assert!(scheme_id == inner.scheme_id); +- let scheme = scheme_id; +- let params = unsafe { user_buf.read_exact::()? }; +- +- return Ok(OpenResult::External(Arc::new(RwLock::new( +- FileDescription { +- scheme, +- number: params.number, +- offset: params.offset, +- flags: params.flags as u32, +- internal_flags: InternalFlags::from_extra0(params.internal_flags) +- .ok_or(Error::new(EINVAL))?, +- }, +- )))); ++ let maybe_inner = { ++ let handles = handles().read(token.token()); ++ match handles.get(&scheme_id).ok_or(Error::new(EBADF))? { ++ Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => Some(inner.clone()), ++ Handle::SchemeCreationCapability => None, ++ _ => return Err(Error::new(EBADF)), + } +- Handle::SchemeCreationCapability => (), +- _ => return Err(Error::new(EBADF)), + }; + ++ if let Some(inner) = maybe_inner { ++ assert!(scheme_id == inner.scheme_id); ++ let params = unsafe { user_buf.read_exact::()? }; ++ ++ return Ok(OpenResult::External(Arc::new(RwLock::new( ++ FileDescription::new( ++ scheme_id, ++ params.number, ++ params.offset, ++ params.flags as u32, ++ InternalFlags::from_extra0(params.internal_flags) ++ .ok_or(Error::new(EINVAL))?, ++ token, ++ ), ++ )))); ++ } ++ + const EXPECTED: &[u8] = b"create-scheme"; + let mut buf = [0u8; EXPECTED.len()]; + +@@ -777,6 +810,7 @@ pub struct CallerCtx { + pub pid: usize, + pub uid: u32, + pub gid: u32, ++ pub groups: alloc::vec::Vec, + } + impl CallerCtx { + pub fn filter_uid_gid(self, euid: u32, egid: u32) -> Self { +@@ -785,6 +819,7 @@ impl CallerCtx { + pid: self.pid, + uid: euid, + gid: egid, ++ groups: self.groups, + } + } else { + self +diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs +index 47588e10..f38c4aec 100644 +--- a/src/scheme/proc.rs ++++ b/src/scheme/proc.rs +@@ -105,6 +105,7 @@ enum ContextHandle { + // Attr handles, to set ens/euid/egid/pid. + Authority, + Attr, ++ Groups, + + Status { + privileged: bool, +@@ -261,6 +262,7 @@ impl ProcScheme { + let handle = match actual_name { + "attrs" => ContextHandle::Attr, + "status" => ContextHandle::Status { privileged: true }, ++ "groups" => ContextHandle::Groups, + _ => return Err(Error::new(ENOENT)), + }; + +@@ -306,6 +308,11 @@ impl ProcScheme { + let id = NonZeroUsize::new(NEXT_ID.fetch_add(1, Ordering::Relaxed)) + .ok_or(Error::new(EMFILE))?; + let context = context::spawn(true, Some(id), ret, token)?; ++ { ++ let parent_groups = ++ context::current().read(token.token()).groups.clone(); ++ context.write(token.token()).groups = parent_groups; ++ } + HANDLES.write(token.token()).insert( + id.get(), + Handle { +@@ -849,17 +856,17 @@ impl KernelScheme for ProcScheme { + } + } + fn extract_scheme_number(fd: usize, token: &mut CleanLockToken) -> Result<(KernelSchemes, usize)> { +- let (scheme_id, number) = { ++ let desc = { + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); ++ let (context, mut context_token) = current.token_split(); + let file_descriptor = context +- .get_file(FileHandle::from(fd), &mut token) ++ .get_file(FileHandle::from(fd), &mut context_token) + .ok_or(Error::new(EBADF))?; +- let desc = file_descriptor.description.read(token.token()); +- (desc.scheme, desc.number) ++ *file_descriptor.description.read(context_token.token()) + }; +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; ++ let scheme = desc.get_scheme(token)?; ++ let number = desc.number; + + Ok((scheme, number)) + } +@@ -1271,6 +1278,39 @@ impl ContextHandle { + guard.prio = (info.prio as usize).min(39); + Ok(size_of::()) + } ++ Self::Groups => { ++ const NGROUPS_MAX: usize = 65536; ++ if buf.len() % size_of::() != 0 { ++ return Err(Error::new(EINVAL)); ++ } ++ let count = buf.len() / size_of::(); ++ if count > NGROUPS_MAX { ++ return Err(Error::new(EINVAL)); ++ } ++ let mut groups = Vec::with_capacity(count); ++ for chunk in buf.in_exact_chunks(size_of::()).take(count) { ++ groups.push(chunk.read_u32()?); ++ } ++ let proc_id = { ++ let guard = context.read(token.token()); ++ guard.owner_proc_id ++ }; ++ { ++ let mut guard = context.write(token.token()); ++ guard.groups = groups.clone(); ++ } ++ if let Some(pid) = proc_id { ++ let mut contexts = context::contexts(token.downgrade()); ++ let (contexts, mut t) = contexts.token_split(); ++ for context_ref in contexts.iter() { ++ let mut ctx = context_ref.write(t.token()); ++ if ctx.owner_proc_id == Some(pid) { ++ ctx.groups = groups.clone(); ++ } ++ } ++ } ++ Ok(count * size_of::()) ++ } + ContextHandle::OpenViaDup => { + let mut args = buf.usizes(); + +@@ -1475,6 +1515,15 @@ impl ContextHandle { + debug_name, + }) + } ++ Self::Groups => { ++ let c = &context.read(token.token()); ++ let max = buf.len() / size_of::(); ++ let count = c.groups.len().min(max); ++ for (chunk, gid) in buf.in_exact_chunks(size_of::()).zip(&c.groups).take(count) { ++ chunk.copy_from_slice(&gid.to_ne_bytes())?; ++ } ++ Ok(count * size_of::()) ++ } + ContextHandle::Sighandler => { + let data = match context.read(token.token()).sig { + Some(ref sig) => SetSighandlerData { +diff --git a/src/scheme/user.rs b/src/scheme/user.rs +index b9013021..dfbf66b1 100644 +--- a/src/scheme/user.rs ++++ b/src/scheme/user.rs +@@ -80,6 +80,7 @@ const ONE: NonZeroUsize = match NonZeroUsize::new(1) { + Some(one) => one, + None => unreachable!(), + }; ++const MAX_SPURIOUS_WAKEUPS: usize = 100; + + enum ParsedCqe { + TriggerFevent { +@@ -209,6 +210,8 @@ impl UserInner { + caller_responsible: &mut PageSpan, + token: &mut CleanLockToken, + ) -> Result { ++ let mut remaining_spurious_wakeups = MAX_SPURIOUS_WAKEUPS; ++ + { + // Disable preemption to avoid context switches between setting the + // process state and sending the scheme request. The process is made +@@ -261,7 +264,10 @@ impl UserInner { + }; + + let states = self.states.lock(token.token()); +- let (mut states, mut token) = states.into_split(); ++ let (mut states, mut state_token) = states.into_split(); ++ let mut timed_out_descriptions = None; ++ let mut remove_state = false; ++ let mut timed_out = false; + match states.get_mut(sqe.tag as usize) { + // invalid state + None => return Err(Error::new(EBADFD)), +@@ -274,24 +280,35 @@ impl UserInner { + fds, + } => { + let maybe_eintr = +- eintr_if_sigkill(&mut callee_responsible, &mut token.token()); +- *o = State::Waiting { +- canceling: true, +- callee_responsible, +- context, +- fds, +- }; ++ eintr_if_sigkill(&mut callee_responsible, &mut state_token.token()); + +- maybe_eintr?; ++ if maybe_eintr.is_ok() { ++ remaining_spurious_wakeups = ++ remaining_spurious_wakeups.saturating_sub(1); ++ } ++ ++ if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 { ++ timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds)); ++ remove_state = true; ++ } else { ++ *o = State::Waiting { ++ canceling: true, ++ callee_responsible, ++ context, ++ fds, ++ }; ++ } + +- context::current() +- .write(token.token()) +- .block("UserInner::call (woken up after cancelation request)"); ++ maybe_eintr?; + +- // We do not want to drop the lock before blocking +- // as if we get preempted in between we might miss a +- // wakeup. +- drop(states); ++ if remove_state { ++ states.remove(sqe.tag as usize); ++ timed_out = true; ++ } else { ++ context::current() ++ .write(state_token.token()) ++ .block("UserInner::call (woken up after cancelation request)"); ++ } + } + // spurious wakeup + State::Waiting { +@@ -300,60 +317,76 @@ impl UserInner { + context, + mut callee_responsible, + } => { +- let maybe_eintr = eintr_if_sigkill(&mut callee_responsible, &mut token); + let current_context = context::current(); ++ let maybe_eintr = ++ eintr_if_sigkill(&mut callee_responsible, &mut state_token); ++ ++ if maybe_eintr.is_ok() { ++ remaining_spurious_wakeups = ++ remaining_spurious_wakeups.saturating_sub(1); ++ } + +- *o = State::Waiting { +- // Currently we treat all spurious wakeups to have the same behavior +- // as signals (i.e., we send a cancellation request). It is not something +- // that should happen, but it certainly can happen, for example if a context +- // is awoken through its thread handle without setting any sig bits, or if the +- // caller clears its own sig bits. If it actually is a signal, then it is the +- // intended behavior. +- canceling: true, +- fds, +- context, +- callee_responsible, +- }; ++ if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 { ++ timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds)); ++ remove_state = true; ++ } else { ++ *o = State::Waiting { ++ // Currently we treat all spurious wakeups to have the same behavior ++ // as signals (i.e., we send a cancellation request). It is not something ++ // that should happen, but it certainly can happen, for example if a context ++ // is awoken through its thread handle without setting any sig bits, or if the ++ // caller clears its own sig bits. If it actually is a signal, then it is the ++ // intended behavior. ++ canceling: true, ++ fds, ++ context, ++ callee_responsible, ++ }; ++ } + + maybe_eintr?; + +- // We do not want to preempt between sending the +- // cancellation and blocking again where we might +- // miss a wakeup. +- let mut preempt = PreemptGuardL1::new(¤t_context, &mut token); +- let token = preempt.token(); +- +- self.todo.send_locked( +- Sqe { +- opcode: Opcode::Cancel as u8, +- sqe_flags: SqeFlags::ONEWAY, +- tag: sqe.tag, +- ..Default::default() +- }, +- token.token(), +- ); +- event::trigger_locked( +- self.root_id, +- self.scheme_id.get(), +- EVENT_READ, +- token.token(), +- ); +- +- // 1. If cancellation was requested and arrived +- // before the scheme processed the request, an +- // acknowledgement will be sent back after the +- // cancellation is processed and we will be woken up +- // again. State will be State::Responded then. +- // +- // 2. If cancellation was requested but the scheme +- // already processed the request, we will receive +- // the actual response next and woken up again. +- // State will be State::Responded then. +- context::current() +- .write(token.token()) +- .block("UserInner::call (spurious wakeup)"); +- drop(states); ++ if remove_state { ++ states.remove(sqe.tag as usize); ++ timed_out = true; ++ } else { ++ // We do not want to preempt between sending the ++ // cancellation and blocking again where we might ++ // miss a wakeup. ++ let mut preempt = ++ PreemptGuardL1::new(¤t_context, &mut state_token); ++ let token = preempt.token(); ++ ++ self.todo.send_locked( ++ Sqe { ++ opcode: Opcode::Cancel as u8, ++ sqe_flags: SqeFlags::ONEWAY, ++ tag: sqe.tag, ++ ..Default::default() ++ }, ++ token.token(), ++ ); ++ event::trigger_locked( ++ self.root_id, ++ self.scheme_id.get(), ++ EVENT_READ, ++ token.token(), ++ ); ++ ++ // 1. If cancellation was requested and arrived ++ // before the scheme processed the request, an ++ // acknowledgement will be sent back after the ++ // cancellation is processed and we will be woken up ++ // again. State will be State::Responded then. ++ // ++ // 2. If cancellation was requested but the scheme ++ // already processed the request, we will receive ++ // the actual response next and woken up again. ++ // State will be State::Responded then. ++ context::current() ++ .write(token.token()) ++ .block("UserInner::call (spurious wakeup)"); ++ } + } + + // invalid state +@@ -368,7 +401,67 @@ impl UserInner { + } + }, + } ++ ++ if let Some(descriptions) = timed_out_descriptions { ++ drop(states); ++ for desc in descriptions { ++ let _ = desc.try_close(token); ++ } ++ } ++ ++ if timed_out { ++ return Err(Error::new(ETIMEDOUT)); ++ } ++ } ++ } ++ } ++ ++ fn collect_descriptions_to_close( ++ fds: Vec>, ++ ) -> Vec { ++ fds.into_iter() ++ .filter_map(|fd| Arc::try_unwrap(fd).ok()) ++ .map(RwLock::into_inner) ++ .collect() ++ } ++ ++ pub fn fail_pending_calls(&self, token: &mut CleanLockToken) { ++ let descriptions_to_close = { ++ let mut states_lock = self.states.lock(token.token()); ++ let (states, mut lock_token) = states_lock.token_split(); ++ let mut descriptions_to_close = Vec::new(); ++ let mut states_to_remove = Vec::new(); ++ ++ for (id, state) in states.iter_mut() { ++ match mem::replace(state, State::Placeholder) { ++ State::Waiting { context, fds, .. } => { ++ descriptions_to_close.extend(Self::collect_descriptions_to_close(fds)); ++ ++ match context.upgrade() { ++ Some(context) => { ++ *state = State::Responded(Response::Regular( ++ Err(Error::new(ENODEV)), ++ 0, ++ false, ++ )); ++ context.write(lock_token.token()).unblock(); ++ } ++ None => states_to_remove.push(id), ++ } ++ } ++ old_state => *state = old_state, ++ } + } ++ ++ for id in states_to_remove { ++ states.remove(id); ++ } ++ ++ descriptions_to_close ++ }; ++ ++ for desc in descriptions_to_close { ++ let _ = desc.try_close(token); + } + } + +@@ -1283,6 +1376,7 @@ impl UserInner { + } + + pub fn into_drop(self, token: &mut CleanLockToken) { ++ self.fail_pending_calls(token); + self.todo.condition.into_drop(token); + } + } +diff --git a/src/startup/memory.rs b/src/startup/memory.rs +index 26922dde..9fb5fb10 100644 +--- a/src/startup/memory.rs ++++ b/src/startup/memory.rs +@@ -74,14 +74,16 @@ impl MemoryEntry { + } + + struct MemoryMap { +- entries: [MemoryEntry; 512], ++ entries: [MemoryEntry; 1024], + size: usize, + } + + impl MemoryMap { + fn register(&mut self, base: usize, size: usize, kind: BootloaderMemoryKind) { + if self.size >= self.entries.len() { +- panic!("Early memory map overflow!"); ++ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] ++ unsafe { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'!', options(nostack, preserves_flags)); } ++ panic!("Early memory map overflow at entry {} (max {})", self.size, self.entries.len()); + } + let start = if kind == BootloaderMemoryKind::Free { + align_up(base) +@@ -134,7 +136,7 @@ static MEMORY_MAP: SyncUnsafeCell = SyncUnsafeCell::new(MemoryMap { + start: 0, + end: 0, + kind: BootloaderMemoryKind::Null, +- }; 512], ++ }; 1024], + size: 0, + }); + +@@ -323,7 +325,16 @@ unsafe fn map_memory(areas: &[MemoryArea], mut bump_allocator: &mut Bum + } + } + +- let kernel_area = (*MEMORY_MAP.get()).kernel().unwrap(); ++ let kernel_area = match (*MEMORY_MAP.get()).kernel() { ++ Some(area) => area, ++ None => { ++ println!("FATAL: kernel memory area not found in boot memory map"); ++ println!("Cannot determine kernel base address. Halting."); ++ loop { ++ core::hint::spin_loop(); ++ } ++ } ++ }; + let kernel_base = kernel_area.start; + let kernel_size = kernel_area.end.saturating_sub(kernel_area.start); + // Map kernel at KERNEL_OFFSET +diff --git a/src/startup/mod.rs b/src/startup/mod.rs +index 8ad3cdf7..86aabc22 100644 +--- a/src/startup/mod.rs ++++ b/src/startup/mod.rs +@@ -149,6 +149,15 @@ static BOOTSTRAP: spin::Once = spin::Once::new(); + pub(crate) static AP_READY: AtomicBool = AtomicBool::new(false); + static BSP_READY: AtomicBool = AtomicBool::new(false); + ++#[cold] ++fn halt_boot(message: &str) -> ! { ++ print!("{message}"); ++ println!("Kernel boot cannot continue. Halting."); ++ loop { ++ hint::spin_loop(); ++ } ++} ++ + /// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this + pub(crate) fn kmain(bootstrap: Bootstrap) -> ! { + let mut token = unsafe { CleanLockToken::new() }; +@@ -180,9 +189,7 @@ pub(crate) fn kmain(bootstrap: Bootstrap) -> ! { + context.euid = 0; + context.egid = 0; + } +- Err(err) => { +- panic!("failed to spawn userspace_init: {:?}", err); +- } ++ Err(_err) => halt_boot("FATAL: failed to spawn first userspace process userspace_init\n"), + } + + run_userspace(&mut token) +diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs +index bf984641..10c6a92c 100644 +--- a/src/syscall/fs.rs ++++ b/src/syscall/fs.rs +@@ -12,7 +12,7 @@ use crate::{ + memory::{AddrSpace, GenericFlusher, Grant, PageSpan, TlbShootdownActions}, + }, + memory::{Page, VirtualAddress, PAGE_SIZE}, +- scheme::{self, FileHandle, KernelScheme, OpenResult, StrOrBytes}, ++ scheme::{FileHandle, KernelScheme, OpenResult, StrOrBytes}, + sync::{CleanLockToken, RwLock}, + syscall::{data::Stat, error::*, flag::*}, + }; +@@ -45,7 +45,7 @@ pub fn file_op_generic_ext( + (file, desc) + }; + +- let scheme = scheme::get_scheme(token.token(), desc.scheme)?; ++ let scheme = desc.get_scheme(token)?; + + op(&*scheme, file.description, desc, token) + } +@@ -73,14 +73,18 @@ pub fn openat( + ) -> Result { + let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; + +- let (scheme_id, number) = { ++ let desc = { + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); +- let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?; +- let desc = pipe.description.read(token.token()); +- (desc.scheme, desc.number) ++ let (context, mut context_token) = current.token_split(); ++ let pipe = context ++ .get_file(fh, &mut context_token) ++ .ok_or(Error::new(EBADF))?; ++ *pipe.description.read(context_token.token()) + }; ++ let scheme = desc.get_scheme(token)?; ++ let number = desc.number; ++ let scheme_id = desc.scheme; + + let caller_ctx = context::current() + .read(token.token()) +@@ -88,8 +92,6 @@ pub fn openat( + .filter_uid_gid(euid, egid); + + let new_description = { +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; +- + let res = scheme.kopenat( + number, + StrOrBytes::from_str(&path_buf), +@@ -101,13 +103,14 @@ pub fn openat( + + match res? { + OpenResult::SchemeLocal(number, internal_flags) => { +- Arc::new(RwLock::new(FileDescription { +- offset: 0, +- internal_flags, +- scheme: scheme_id, ++ Arc::new(RwLock::new(FileDescription::new( ++ scheme_id, + number, +- flags: (flags & !O_CLOEXEC) as u32, +- })) ++ 0, ++ (flags & !O_CLOEXEC) as u32, ++ internal_flags, ++ token, ++ ))) + } + OpenResult::External(desc) => desc, + } +@@ -137,16 +140,17 @@ pub fn unlinkat( + ) -> Result<()> { + let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; + +- let (number, scheme_id) = { ++ let desc = { + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); +- let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?; +- let desc = pipe.description.read(token.token()); +- (desc.number, desc.scheme) ++ let (context, mut context_token) = current.token_split(); ++ let pipe = context ++ .get_file(fh, &mut context_token) ++ .ok_or(Error::new(EBADF))?; ++ *pipe.description.read(context_token.token()) + }; +- +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; ++ let number = desc.number; ++ let scheme = desc.get_scheme(token)?; + + let caller_ctx = context::current() + .read(token.token()) +@@ -199,17 +203,18 @@ fn duplicate_file( + let description = { *file.description.read(token.token()) }; + + let new_description = { +- let scheme = scheme::get_scheme(token.token(), description.scheme)?; ++ let scheme = description.get_scheme(token)?; + + match scheme.kdup(description.number, user_buf, caller_ctx, token)? { + OpenResult::SchemeLocal(number, internal_flags) => { +- Arc::new(RwLock::new(FileDescription { +- offset: 0, +- internal_flags, +- scheme: description.scheme, ++ Arc::new(RwLock::new(FileDescription::new( ++ description.scheme, + number, +- flags: description.flags, +- })) ++ 0, ++ description.flags, ++ internal_flags, ++ token, ++ ))) + } + OpenResult::External(desc) => desc, + } +@@ -296,11 +301,10 @@ fn call_normal( + } + .ok_or(Error::new(EBADF))?; + +- let (scheme_id, number) = { +- let desc = file.description.read(token.token()); +- (desc.scheme, desc.number) ++ let (scheme, number) = { ++ let desc = *file.description.read(token.token()); ++ (desc.get_scheme(token)?, desc.number) + }; +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; + + if flags.contains(CallFlags::STD_FS) { + scheme.translate_std_fs_call(number, file.description, payload, flags, metadata, token) +@@ -341,28 +345,28 @@ fn fdwrite_inner( + ) -> Result { + // TODO: Ensure deadlocks can't happen + let (scheme, number, descs_to_send) = { +- let (scheme, number) = { ++ let desc = { + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); ++ let (context, mut context_token) = current.token_split(); + let file_descriptor = context +- .get_file(socket, &mut token) ++ .get_file(socket, &mut context_token) + .ok_or(Error::new(EBADF))?; +- let desc = &file_descriptor.description.read(token.token()); +- (desc.scheme, desc.number) ++ *file_descriptor.description.read(context_token.token()) + }; +- let scheme = scheme::get_scheme(token.token(), scheme)?; ++ let scheme = desc.get_scheme(token)?; ++ let number = desc.number; + + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); ++ let (context, mut context_token) = current.token_split(); + ( + scheme, + number, + if flags.contains(CallFlags::FD_CLONE) { +- context.bulk_get_files(&target_fds, &mut token) ++ context.bulk_get_files(&target_fds, &mut context_token) + } else { +- context.bulk_remove_files(&target_fds, &mut token) ++ context.bulk_remove_files(&target_fds, &mut context_token) + }? + .into_iter() + .map(|f| f.description) +@@ -395,18 +399,22 @@ fn call_fdread( + metadata: &[u64], + token: &mut CleanLockToken, + ) -> Result { ++ let desc = { ++ let current_lock = context::current(); ++ let mut current = current_lock.read(token.token()); ++ let (context, mut context_token) = current.token_split(); ++ let file_descriptor = context ++ .get_file(fd, &mut context_token) ++ .ok_or(Error::new(EBADF))?; ++ *file_descriptor.description.read(context_token.token()) ++ }; + let (scheme, number) = { +- let (scheme, number) = { +- let current_lock = context::current(); +- let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); +- let file_descriptor = context.get_file(fd, &mut token).ok_or(Error::new(EBADF))?; +- let desc = file_descriptor.description.read(token.token()); +- (desc.scheme, desc.number) +- }; +- let scheme = scheme::get_scheme(token.token(), scheme)?; +- +- (scheme, number) ++ let scheme = desc.get_scheme(token)?; ++ let number = desc.number; ++ ( ++ scheme, ++ number, ++ ) + }; + + scheme.kfdread(number, payload, flags, metadata, token) +@@ -440,9 +448,9 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken) + } + .ok_or(Error::new(EBADF))?; + +- let (scheme_id, number, flags) = { +- let desc = file.description.write(token.token()); +- (desc.scheme, desc.number, desc.flags) ++ let (number, flags, desc) = { ++ let desc = *file.description.read(token.token()); ++ (desc.number, desc.flags, desc) + }; + + if cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC { +@@ -460,7 +468,7 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken) + + // Communicate fcntl with scheme + if cmd != F_GETFD && cmd != F_SETFD { +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; ++ let scheme = desc.get_scheme(token)?; + + scheme.fcntl(number, cmd, arg, token)?; + }; +@@ -518,13 +526,11 @@ pub fn flink(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken) + let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; + let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; + +- let (number, scheme_id) = { +- let desc = file.description.read(token.token()); +- (desc.number, desc.scheme) ++ let (number, scheme) = { ++ let desc = *file.description.read(token.token()); ++ (desc.number, desc.get_scheme(token)?) + }; + +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; +- + // TODO: Check EXDEV. + /* + if scheme_id != description.scheme { +@@ -554,13 +560,11 @@ pub fn frename(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken + let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; + let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; + +- let (number, scheme_id) = { +- let desc = file.description.read(token.token()); +- (desc.number, desc.scheme) ++ let (number, scheme) = { ++ let desc = *file.description.read(token.token()); ++ (desc.number, desc.get_scheme(token)?) + }; + +- let scheme = scheme::get_scheme(token.token(), scheme_id)?; +- + // TODO: Check EXDEV. + /* + if scheme_id != description.scheme { +diff --git a/src/syscall/process.rs b/src/syscall/process.rs +index e83da427..8a1d385e 100644 +--- a/src/syscall/process.rs ++++ b/src/syscall/process.rs +@@ -271,23 +271,26 @@ unsafe fn bootstrap_mem(bootstrap: &crate::startup::Bootstrap) -> &'static [u8] + } + + fn insert_fd(scheme: SchemeId, number: usize, cloexec: bool, token: &mut CleanLockToken) -> usize { ++ let description = Arc::new(RwLock::new(FileDescription::new( ++ scheme, ++ number, ++ 0, ++ (O_CREAT | O_RDWR) as u32, ++ InternalFlags::empty(), ++ token, ++ ))); ++ + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); +- let (context, mut token) = current.token_split(); ++ let (context, mut context_token) = current.token_split(); + context + .add_file_min( + FileDescriptor { +- description: Arc::new(RwLock::new(FileDescription { +- scheme, +- number, +- offset: 0, +- flags: (O_CREAT | O_RDWR) as u32, +- internal_flags: InternalFlags::empty(), +- })), ++ description, + cloexec, + }, + syscall::flag::UPPER_FDTBL_TAG + scheme.get(), +- &mut token, ++ &mut context_token, + ) + .expect("failed to insert fd to current context") + .get() diff --git a/local/patches/relibc/P10-stack-size-8mb.patch b/local/patches/relibc/P10-stack-size-8mb.patch new file mode 100644 index 00000000..28d813e4 --- /dev/null +++ b/local/patches/relibc/P10-stack-size-8mb.patch @@ -0,0 +1,11 @@ +--- a/redox-rt/src/arch/x86_64.rs 2026-04-28 07:19:14.000000000 +0100 ++++ b/redox-rt/src/arch/x86_64.rs 2026-05-04 08:13:45.179788927 +0100 +@@ -21,7 +21,7 @@ + + // Setup a stack starting from the very end of the address space, and then growing downwards. + pub const STACK_TOP: usize = 1 << 47; +-pub const STACK_SIZE: usize = 1024 * 1024; ++pub const STACK_SIZE: usize = 8 * 1024 * 1024; + + #[derive(Debug, Default)] + #[repr(C)] diff --git a/local/patches/relibc/P11-getrlimit-getrusage.patch b/local/patches/relibc/P11-getrlimit-getrusage.patch new file mode 100644 index 00000000..575b05b2 --- /dev/null +++ b/local/patches/relibc/P11-getrlimit-getrusage.patch @@ -0,0 +1,349 @@ +diff --git a/src/header/sys_resource/mod.rs b/src/header/sys_resource/mod.rs +index 9166007a..c645e8eb 100644 +--- a/src/header/sys_resource/mod.rs ++++ b/src/header/sys_resource/mod.rs +@@ -92,7 +92,10 @@ pub unsafe extern "C" fn setpriority(which: c_int, who: id_t, nice: c_int) -> c_ + /// See . + #[unsafe(no_mangle)] + pub unsafe extern "C" fn getrlimit(resource: c_int, rlp: *mut rlimit) -> c_int { +- let rlp = unsafe { Out::nonnull(rlp) }; ++ let Some(rlp) = (unsafe { Out::nullable(rlp) }) else { ++ crate::platform::ERRNO.set(crate::header::errno::EFAULT); ++ return -1; ++ }; + + Sys::getrlimit(resource, rlp) + .map(|()| 0) +@@ -110,7 +113,12 @@ pub unsafe extern "C" fn setrlimit(resource: c_int, rlp: *const rlimit) -> c_int + /// See . + #[unsafe(no_mangle)] + pub unsafe extern "C" fn getrusage(who: c_int, r_usage: *mut rusage) -> c_int { +- Sys::getrusage(who, unsafe { Out::nonnull(r_usage) }) ++ let Some(r_usage) = (unsafe { Out::nullable(r_usage) }) else { ++ crate::platform::ERRNO.set(crate::header::errno::EFAULT); ++ return -1; ++ }; ++ ++ Sys::getrusage(who, r_usage) + .map(|()| 0) + .or_minus_one_errno() + } +diff --git a/src/header/unistd/mod.rs b/src/header/unistd/mod.rs +index fdd1ff0d..9e3a20e9 100644 +--- a/src/header/unistd/mod.rs ++++ b/src/header/unistd/mod.rs +@@ -521,7 +521,7 @@ pub extern "C" fn getdtablesize() -> c_int { + }; + if r == 0 { + let cur = unsafe { lim.assume_init() }.rlim_cur; +- match cur { ++ return match cur { + c if c < i32::MAX as u64 => c as i32, + _ => i32::MAX, + }; +diff --git a/src/header/unistd/sysconf/linux.rs b/src/header/unistd/sysconf/linux.rs +index 2ec17eaf..8ec01d2d 100644 +--- a/src/header/unistd/sysconf/linux.rs ++++ b/src/header/unistd/sysconf/linux.rs +@@ -167,11 +167,33 @@ pub(super) fn sysconf_impl(name: c_int) -> c_long { + // Values from musl which we can assume is correct. + match name { + _SC_CLK_TCK => 100, +- // TODO: getrlimit +- _SC_CHILD_MAX => -1, ++ _SC_CHILD_MAX => { ++ let mut lim = core::mem::MaybeUninit::::uninit(); ++ let r = unsafe { ++ crate::header::sys_resource::getrlimit( ++ crate::header::sys_resource::RLIMIT_NPROC as c_int, ++ lim.as_mut_ptr().cast::(), ++ ) ++ }; ++ if r == 0 { ++ let cur = unsafe { lim.assume_init() }.rlim_cur; ++ if cur == crate::header::sys_resource::RLIM_INFINITY { -1 } else if cur > c_long::MAX as u64 { c_long::MAX } else { cur as c_long } ++ } else { -1 } ++ } + _SC_NGROUPS_MAX => NGROUPS_MAX as c_long, +- // TODO: getrlimit +- _SC_OPEN_MAX => -1, ++ _SC_OPEN_MAX => { ++ let mut lim = core::mem::MaybeUninit::::uninit(); ++ let r = unsafe { ++ crate::header::sys_resource::getrlimit( ++ crate::header::sys_resource::RLIMIT_NOFILE as c_int, ++ lim.as_mut_ptr().cast::(), ++ ) ++ }; ++ if r == 0 { ++ let cur = unsafe { lim.assume_init() }.rlim_cur; ++ if cur == crate::header::sys_resource::RLIM_INFINITY { -1 } else if cur > c_long::MAX as u64 { c_long::MAX } else { cur as c_long } ++ } else { -1 } ++ } + _SC_STREAM_MAX => -1, + // TODO: limits.h + _SC_TZNAME_MAX => -1, +diff --git a/src/header/unistd/sysconf/redox.rs b/src/header/unistd/sysconf/redox.rs +index 97ee81aa..3d7f96dc 100644 +--- a/src/header/unistd/sysconf/redox.rs ++++ b/src/header/unistd/sysconf/redox.rs +@@ -5,7 +5,7 @@ use alloc::string::String; + use crate::{ + error::Errno, + fs::File, +- header::{errno, fcntl, limits, sys_statvfs}, ++ header::{errno, fcntl, limits, sys_resource, sys_statvfs}, + io::Read, + out::Out, + platform::{ +@@ -65,14 +65,31 @@ pub const _SC_SIGQUEUE_MAX: c_int = 190; + pub const _SC_REALTIME_SIGNALS: c_int = 191; + // } POSIX.1 + ++fn resource_limit_sysconf(resource: c_int) -> c_long { ++ let mut lim = core::mem::MaybeUninit::::uninit(); ++ let r = unsafe { sys_resource::getrlimit(resource, lim.as_mut_ptr()) }; ++ if r != 0 { ++ return -1; ++ } ++ ++ let cur = unsafe { lim.assume_init() }.rlim_cur; ++ if cur == sys_resource::RLIM_INFINITY { ++ -1 ++ } else if cur > c_long::MAX as u64 { ++ c_long::MAX ++ } else { ++ cur as c_long ++ } ++} ++ + pub(super) fn sysconf_impl(name: c_int) -> c_long { + //TODO: Real values + match name { + _SC_ARG_MAX => 4096, +- _SC_CHILD_MAX => 65536, ++ _SC_CHILD_MAX => resource_limit_sysconf(sys_resource::RLIMIT_NPROC as c_int), + _SC_CLK_TCK => 100, + _SC_NGROUPS_MAX => limits::NGROUPS_MAX as c_long, +- _SC_OPEN_MAX => 1024, ++ _SC_OPEN_MAX => resource_limit_sysconf(sys_resource::RLIMIT_NOFILE as c_int), + _SC_STREAM_MAX => 16, + _SC_TZNAME_MAX => -1, + _SC_VERSION => 200809, +diff --git a/src/platform/redox/mod.rs b/src/platform/redox/mod.rs +index 8b5560e7..e6dcac55 100644 +--- a/src/platform/redox/mod.rs ++++ b/src/platform/redox/mod.rs +@@ -43,7 +43,7 @@ use crate::{ + sys_file, + sys_mman::{MAP_ANONYMOUS, PROT_READ, PROT_WRITE}, + sys_random, +- sys_resource::{RLIM_INFINITY, rlimit, rusage}, ++ sys_resource::{RLIM_INFINITY, RLIMIT_NLIMITS, rlimit, rusage}, + sys_select::timeval, + sys_stat::{S_ISVTX, stat}, + sys_statvfs::statvfs, +@@ -103,6 +103,32 @@ macro_rules! path_from_c_str { + + static CLONE_LOCK: RwLock<()> = RwLock::new(()); + ++/// Per-process resource limits. Initialized with Linux-compatible defaults. ++/// Inherited automatically across fork() (kernel copies address space). ++const RLIMIT_DEFAULTS: [rlimit; RLIMIT_NLIMITS as usize] = [ ++ rlimit { rlim_cur: RLIM_INFINITY, rlim_max: RLIM_INFINITY }, // RLIMIT_CPU ++ rlimit { rlim_cur: RLIM_INFINITY, rlim_max: RLIM_INFINITY }, // RLIMIT_FSIZE ++ rlimit { rlim_cur: RLIM_INFINITY, rlim_max: RLIM_INFINITY }, // RLIMIT_DATA ++ rlimit { rlim_cur: 8 * 1024 * 1024, rlim_max: RLIM_INFINITY }, // RLIMIT_STACK (8 MB soft) ++ rlimit { rlim_cur: 0, rlim_max: RLIM_INFINITY }, // RLIMIT_CORE ++ rlimit { rlim_cur: RLIM_INFINITY, rlim_max: RLIM_INFINITY }, // RLIMIT_RSS ++ rlimit { rlim_cur: 4096, rlim_max: RLIM_INFINITY }, // RLIMIT_NPROC ++ rlimit { rlim_cur: 1024, rlim_max: 1024 * 64 }, // RLIMIT_NOFILE ++ rlimit { rlim_cur: RLIM_INFINITY, rlim_max: RLIM_INFINITY }, // RLIMIT_MEMLOCK ++ rlimit { rlim_cur: RLIM_INFINITY, rlim_max: RLIM_INFINITY }, // RLIMIT_AS ++ rlimit { rlim_cur: RLIM_INFINITY, rlim_max: RLIM_INFINITY }, // RLIMIT_LOCKS ++ rlimit { rlim_cur: 4096, rlim_max: RLIM_INFINITY }, // RLIMIT_SIGPENDING ++ rlimit { rlim_cur: 819200, rlim_max: RLIM_INFINITY }, // RLIMIT_MSGQUEUE ++ rlimit { rlim_cur: 0, rlim_max: 0 }, // RLIMIT_NICE ++ rlimit { rlim_cur: 0, rlim_max: 0 }, // RLIMIT_RTPRIO ++]; ++ ++/// Runtime resource limits, mutable via setrlimit(). ++/// Inherited across fork() (kernel copies address space). ++static RLIMIT_TABLE: RwLock<[rlimit; RLIMIT_NLIMITS as usize]> = RwLock::new( ++ RLIMIT_DEFAULTS ++); ++ + /// Redox syscall implementation of [`Pal`]. + pub struct Sys; + +@@ -729,21 +755,77 @@ impl Pal for Sys { + } + + fn getrlimit(resource: c_int, mut rlim: Out) -> Result<()> { +- todo_skip!(0, "getrlimit({}, {:p}): not implemented", resource, rlim); ++ if resource < 0 || resource >= RLIMIT_NLIMITS as c_int { ++ return Err(Errno(EINVAL)); ++ } ++ let table = RLIMIT_TABLE.read(); ++ let current = &table[resource as usize]; + rlim.write(rlimit { +- rlim_cur: RLIM_INFINITY, +- rlim_max: RLIM_INFINITY, ++ rlim_cur: current.rlim_cur, ++ rlim_max: current.rlim_max, + }); + Ok(()) + } + + unsafe fn setrlimit(resource: c_int, rlim: *const rlimit) -> Result<()> { +- todo_skip!(0, "setrlimit({}, {:p}): not implemented", resource, rlim); +- Err(Errno(EPERM)) ++ if resource < 0 || resource >= RLIMIT_NLIMITS as c_int { ++ return Err(Errno(EINVAL)); ++ } ++ if rlim.is_null() { ++ return Err(Errno(EFAULT)); ++ } ++ let new = unsafe { &*rlim }; ++ if new.rlim_cur > new.rlim_max { ++ return Err(Errno(EINVAL)); ++ } ++ let mut table = RLIMIT_TABLE.write(); ++ let old = &table[resource as usize]; ++ if new.rlim_max > old.rlim_max { ++ return Err(Errno(EPERM)); ++ } ++ table[resource as usize] = rlimit { ++ rlim_cur: new.rlim_cur, ++ rlim_max: new.rlim_max, ++ }; ++ Ok(()) + } + +- fn getrusage(who: c_int, r_usage: Out) -> Result<()> { +- todo_skip!(0, "getrusage({}, {:p}): not implemented", who, r_usage); ++ fn getrusage(who: c_int, mut r_usage: Out) -> Result<()> { ++ let clock_id = match who { ++ 0 /* RUSAGE_SELF */ => 2 /* CLOCK_PROCESS_CPUTIME_ID */, ++ 1 /* RUSAGE_THREAD */ => 3 /* CLOCK_THREAD_CPUTIME_ID */, ++ -1 /* RUSAGE_CHILDREN */ => { ++ r_usage.write(rusage { ++ ru_utime: timeval { tv_sec: 0, tv_usec: 0 }, ++ ru_stime: timeval { tv_sec: 0, tv_usec: 0 }, ++ ru_maxrss: 0, ru_ixrss: 0, ru_idrss: 0, ru_isrss: 0, ++ ru_minflt: 0, ru_majflt: 0, ru_nswap: 0, ++ ru_inblock: 0, ru_oublock: 0, ru_msgsnd: 0, ru_msgrcv: 0, ++ ru_nsignals: 0, ru_nvcsw: 0, ru_nivcsw: 0, ++ }); ++ return Ok(()); ++ } ++ _ => return Err(Errno(EINVAL)), ++ }; ++ ++ let mut redox_tp = syscall::TimeSpec::default(); ++ let clock_result = syscall::clock_gettime(clock_id, &mut redox_tp); ++ ++ let (tv_sec, tv_usec) = if clock_result.is_ok() { ++ let ts: timespec = (&redox_tp).into(); ++ (ts.tv_sec, (ts.tv_nsec / 1000) as _) ++ } else { ++ (0, 0) ++ }; ++ ++ r_usage.write(rusage { ++ ru_utime: timeval { tv_sec, tv_usec }, ++ ru_stime: timeval { tv_sec: 0, tv_usec: 0 }, ++ ru_maxrss: 0, ru_ixrss: 0, ru_idrss: 0, ru_isrss: 0, ++ ru_minflt: 0, ru_majflt: 0, ru_nswap: 0, ++ ru_inblock: 0, ru_oublock: 0, ru_msgsnd: 0, ru_msgrcv: 0, ++ ru_nsignals: 0, ru_nvcsw: 0, ru_nivcsw: 0, ++ }); + Ok(()) + } + +diff --git a/tests/sys_resource/rlimit_roundtrip.c b/tests/sys_resource/rlimit_roundtrip.c +new file mode 100644 +index 00000000..c90a6b79 +--- /dev/null ++++ b/tests/sys_resource/rlimit_roundtrip.c +@@ -0,0 +1,80 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++int main(void) { ++ struct rlimit original; ++ struct rlimit current; ++ struct rlimit invalid; ++ ++ errno = 0; ++ assert(getrlimit(RLIMIT_NOFILE, &original) == 0); ++ ++ errno = 0; ++ assert(getrlimit(RLIMIT_NLIMITS, ¤t) == -1); ++ assert(errno == EINVAL); ++ ++ errno = 0; ++ assert(getrlimit(RLIMIT_NOFILE, NULL) == -1); ++ assert(errno == EFAULT); ++ ++ errno = 0; ++ assert(getrusage(RUSAGE_SELF, NULL) == -1); ++ assert(errno == EFAULT); ++ ++ errno = 0; ++ assert(setrlimit(RLIMIT_NOFILE, NULL) == -1); ++ assert(errno == EFAULT); ++ ++ invalid.rlim_cur = original.rlim_max; ++ invalid.rlim_max = original.rlim_cur; ++ errno = 0; ++ assert(setrlimit(RLIMIT_NOFILE, &invalid) == -1); ++ assert(errno == EINVAL); ++ ++ if (original.rlim_max != RLIM_INFINITY) { ++ invalid.rlim_cur = original.rlim_max + 1; ++ invalid.rlim_max = original.rlim_max + 1; ++ errno = 0; ++ assert(setrlimit(RLIMIT_NOFILE, &invalid) == -1); ++ assert(errno == EPERM); ++ } ++ ++ current.rlim_cur = original.rlim_cur > 16 ? original.rlim_cur - 16 : original.rlim_cur; ++ current.rlim_max = original.rlim_max; ++ errno = 0; ++ assert(setrlimit(RLIMIT_NOFILE, ¤t) == 0); ++ ++ struct rlimit roundtrip; ++ errno = 0; ++ assert(getrlimit(RLIMIT_NOFILE, &roundtrip) == 0); ++ assert(roundtrip.rlim_cur == current.rlim_cur); ++ assert(roundtrip.rlim_max == current.rlim_max); ++ ++ long open_max = sysconf(_SC_OPEN_MAX); ++ if (current.rlim_cur == RLIM_INFINITY) { ++ assert(open_max == -1); ++ } else if (current.rlim_cur > LONG_MAX) { ++ assert(open_max == LONG_MAX); ++ } else { ++ assert(open_max == (long)current.rlim_cur); ++ } ++ ++ if (current.rlim_cur > INT_MAX) { ++ assert(getdtablesize() == INT_MAX); ++ } else { ++ assert(getdtablesize() == (int)current.rlim_cur); ++ } ++ ++ errno = 0; ++ assert(setrlimit(RLIMIT_NOFILE, &original) == 0); ++ assert(getrlimit(RLIMIT_NOFILE, &roundtrip) == 0); ++ assert(roundtrip.rlim_cur == original.rlim_cur); ++ assert(roundtrip.rlim_max == original.rlim_max); ++ ++ puts("rlimit roundtrip ok"); ++ return 0; ++} diff --git a/local/patches/userutils/P5-redbear-branding.patch b/local/patches/userutils/P5-redbear-branding.patch new file mode 100644 index 00000000..77586b5f --- /dev/null +++ b/local/patches/userutils/P5-redbear-branding.patch @@ -0,0 +1,67 @@ +diff --git a/res/issue b/res/issue +index 6a963d8..092a432 100644 +--- a/res/issue ++++ b/res/issue +@@ -1,4 +1,4 @@ +-########## Redox OS ########## ++########## RedBear OS ########## + # Login with the following: # + # `user` # + # `root`:`password` # +diff --git a/res/motd b/res/motd +index 5cd097a..df2f802 100644 +--- a/res/motd ++++ b/res/motd +@@ -1,2 +1,2 @@ +-Welcome to Redox OS! ++Welcome to RedBear OS! + +diff --git a/src/bin/login.rs b/src/bin/login.rs +index 08e178c..022fb47 100644 +--- a/src/bin/login.rs ++++ b/src/bin/login.rs +@@ -120,6 +120,7 @@ fn load_config_schemes(user: &User) -> Option + (author: "Jeremy Soller, Jose Narvaez") +@@ -133,9 +134,13 @@ pub fn main() { + } + + loop { ++ if consecutive_failures >= 3 { ++ let delay_secs = std::cmp::min(consecutive_failures as u64, 30); ++ std::thread::sleep(std::time::Duration::from_secs(delay_secs)); ++ } + let user = liner::Context::new() + .read_line( +- liner::Prompt::from("\x1B[1mredox login:\x1B[0m "), ++ liner::Prompt::from("\x1B[1mRedBear Login:\x1B[0m "), + None, + &mut liner::BasicCompleter::new(Vec::::new()), + ) +@@ -150,11 +155,13 @@ pub fn main() { + None => { + stdout.write(b"\nLogin incorrect\n").r#try(&mut stderr); + stdout.write(b"\n").r#try(&mut stderr); ++ consecutive_failures += 1; + stdout.flush().r#try(&mut stderr); + continue; + } + Some(user) => { + if user.is_passwd_blank() { ++ consecutive_failures = 0; + if let Ok(mut motd) = File::open(MOTD_FILE) { + io::copy(&mut motd, &mut stdout).r#try(&mut stderr); + stdout.flush().r#try(&mut stderr); +@@ -185,6 +192,7 @@ pub fn main() { + stdout.flush().r#try(&mut stderr); + + if user.verify_passwd(&password) { ++ consecutive_failures = 0; + if let Ok(mut motd) = File::open(MOTD_FILE) { + io::copy(&mut motd, &mut stdout).r#try(&mut stderr); + stdout.flush().r#try(&mut stderr); diff --git a/recipes/core/base/P2-ihdad-graceful-init.patch b/recipes/core/base/P2-ihdad-graceful-init.patch new file mode 120000 index 00000000..e0226722 --- /dev/null +++ b/recipes/core/base/P2-ihdad-graceful-init.patch @@ -0,0 +1 @@ +../../../local/patches/base/P2-ihdad-graceful-init.patch \ No newline at end of file diff --git a/recipes/core/base/P6-driver-main-fixes.patch b/recipes/core/base/P6-driver-main-fixes.patch new file mode 120000 index 00000000..f5f8be68 --- /dev/null +++ b/recipes/core/base/P6-driver-main-fixes.patch @@ -0,0 +1 @@ +../../../local/patches/base/P6-driver-main-fixes.patch \ No newline at end of file diff --git a/recipes/core/base/P6-driver-new-modules.patch b/recipes/core/base/P6-driver-new-modules.patch new file mode 120000 index 00000000..2a80af9b --- /dev/null +++ b/recipes/core/base/P6-driver-new-modules.patch @@ -0,0 +1 @@ +../../../local/patches/base/P6-driver-new-modules.patch \ No newline at end of file diff --git a/recipes/core/base/recipe.toml b/recipes/core/base/recipe.toml index 1b127973..ed5915ea 100644 --- a/recipes/core/base/recipe.toml +++ b/recipes/core/base/recipe.toml @@ -22,6 +22,7 @@ patches = [ # P5-init-daemon-panic-hardening.patch # P5-init-supervisor-restart.patch "P2-i2c-gpio-ucsi-drivers.patch", + "P2-ihdad-graceful-init.patch", "P9-fix-so-pecred.patch", "P3-inputd-keymap-bridge.patch", "P3-ps2d-led-feedback.patch", @@ -38,6 +39,9 @@ patches = [ "P4-fbcond-scrollback.patch", "P4-thermal-daemon.patch", "P4-thermald-workspace.patch", + "P6-driver-main-fixes.patch", + "P6-driver-new-modules.patch", + ] [package] diff --git a/recipes/core/kernel/recipe.toml b/recipes/core/kernel/recipe.toml index 902725f9..a160407a 100644 --- a/recipes/core/kernel/recipe.toml +++ b/recipes/core/kernel/recipe.toml @@ -1,6 +1,21 @@ +# Consolidated patch: all Red Bear kernel changes (P0-P10) in a single file. +# Individual patches preserved in local/patches/kernel/ for reference/rebase. +# The consolidated patch was generated from applying: redox(no-op), P0-canary, +# P1-memory-map-overflow, P4-supplementary-groups, P4-s3-suspend-resume, +# P4-scheme-failure-modes, P5-sched-rt-policy, P5-scheme-sched-id, +# P5-context-mod-sched, P6-percpu-runqueues, P6-futex-sharding, +# P8-initial-placement, P9-proc-lock-ordering, P9-numa-topology, +# P1-boot-path-diagnostics, P10-debug-scheme-serial-fix. +# Patches that were cumulative supersets (P5-sched-policy-context, P5-proc-setschedpolicy, +# P5-boot-path-hardening, P6-vruntime-*, P7-cache-affine-*, P7-proc-setname, +# P7-proc-setpriority, P8-futex-requeue, P8-futex-pi, P8-futex-robust, +# P8-percpu-wiring, P8-percpu-sched, P8-load-balance, P8-work-stealing, +# P9-futex-pi-cas-fix) failed to apply at commit 866dfad0 due to +# context conflicts and are deferred until rebase. [source] git = "https://gitlab.redox-os.org/redox-os/kernel.git" -patches = ["redox.patch", "P0-canary.patch", "P1-memory-map-overflow.patch", "../../../local/patches/kernel/P4-supplementary-groups.patch", "../../../local/patches/kernel/P4-s3-suspend-resume.patch", "../../../local/patches/kernel/P4-scheme-failure-modes.patch", "../../../local/patches/kernel/P5-sched-policy-context.patch", "../../../local/patches/kernel/P5-sched-rt-policy.patch", "../../../local/patches/kernel/P5-proc-setschedpolicy.patch", "../../../local/patches/kernel/P5-scheme-sched-id.patch", "../../../local/patches/kernel/P5-context-mod-sched.patch", "../../../local/patches/kernel/P5-boot-path-hardening.patch", "../../../local/patches/kernel/P6-vruntime-context.patch", "../../../local/patches/kernel/P6-percpu-runqueues.patch", "../../../local/patches/kernel/P6-futex-sharding.patch", "../../../local/patches/kernel/P6-vruntime-switch.patch", "../../../local/patches/kernel/P7-cache-affine-context.patch", "../../../local/patches/kernel/P7-cache-affine-switch.patch", "../../../local/patches/kernel/P7-proc-setname.patch", "../../../local/patches/kernel/P7-proc-setpriority.patch", "../../../local/patches/kernel/P8-futex-requeue.patch", "../../../local/patches/kernel/P8-futex-pi.patch", "../../../local/patches/kernel/P8-futex-robust.patch", "../../../local/patches/kernel/P8-percpu-wiring.patch", "../../../local/patches/kernel/P8-percpu-sched.patch", "../../../local/patches/kernel/P9-proc-lock-ordering.patch", "../../../local/patches/kernel/P9-futex-pi-cas-fix.patch", "../../../local/patches/kernel/P1-boot-path-diagnostics.patch"] +rev = "866dfad0" +patches = ["../../../local/patches/kernel/redbear-consolidated.patch"] [build] template = "custom" diff --git a/recipes/core/kernel/redbear-consolidated.patch b/recipes/core/kernel/redbear-consolidated.patch new file mode 120000 index 00000000..edb5b32a --- /dev/null +++ b/recipes/core/kernel/redbear-consolidated.patch @@ -0,0 +1 @@ +../../../local/patches/kernel/redbear-consolidated.patch \ No newline at end of file diff --git a/recipes/core/relibc/P10-stack-size-8mb.patch b/recipes/core/relibc/P10-stack-size-8mb.patch new file mode 120000 index 00000000..8287de02 --- /dev/null +++ b/recipes/core/relibc/P10-stack-size-8mb.patch @@ -0,0 +1 @@ +../../../local/patches/relibc/P10-stack-size-8mb.patch \ No newline at end of file diff --git a/recipes/core/relibc/P11-getrlimit-getrusage.patch b/recipes/core/relibc/P11-getrlimit-getrusage.patch new file mode 120000 index 00000000..8b052245 --- /dev/null +++ b/recipes/core/relibc/P11-getrlimit-getrusage.patch @@ -0,0 +1 @@ +../../../local/patches/relibc/P11-getrlimit-getrusage.patch \ No newline at end of file diff --git a/recipes/core/relibc/recipe.toml b/recipes/core/relibc/recipe.toml index bafe67d7..cbaae807 100644 --- a/recipes/core/relibc/recipe.toml +++ b/recipes/core/relibc/recipe.toml @@ -1,6 +1,6 @@ [source] git = "https://gitlab.redox-os.org/redox-os/relibc.git" -patches = ["P5-named-semaphores.patch"] +patches = ["P10-stack-size-8mb.patch", "P11-getrlimit-getrusage.patch"] [build] template = "custom" diff --git a/recipes/core/userutils/recipe.toml b/recipes/core/userutils/recipe.toml index e72806df..1dd0dd1a 100644 --- a/recipes/core/userutils/recipe.toml +++ b/recipes/core/userutils/recipe.toml @@ -1,6 +1,6 @@ [source] git = "https://gitlab.redox-os.org/redox-os/userutils.git" -patches = ["P4-login-rate-limit.patch"] +patches = ["P5-redbear-branding.patch"] [build] template = "custom"