RedBear-OS/local/patches/kernel/redbear-consolidated.patch

diff --git a/Cargo.toml b/Cargo.toml
index 6d4f059a..e05f723c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,6 +12,7 @@ cc = "1.0"
 toml = "0.8"

 [dependencies]
+acpi_ext = { package = "acpi", git = "https://gitlab.redox-os.org/redox-os/acpi.git", branch = "redox-6.x" }
 arrayvec = { version = "0.7.4", default-features = false }
 bitfield = "0.13.2"
 bitflags = "2"
diff --git a/Makefile b/Makefile
index 68a8c50a..ce59b910 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,4 @@
+# Red Bear OS kernel patches applied via individual patch files
 .PHONY: all check

 SOURCE:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
diff --git a/build.rs b/build.rs
index 96c3ea5c..751746cc 100644
--- a/build.rs
+++ b/build.rs
@@ -77,6 +77,7 @@ fn main() {
         }
         "x86_64" => {
             println!("cargo::rerun-if-changed=src/asm/x86_64/trampoline.asm");
+            println!("cargo::rerun-if-changed=src/asm/x86_64/s3_wakeup.asm");

             let status = Command::new("nasm")
                 .arg("-f")
@@ -89,6 +90,18 @@ fn main() {
             if !status.success() {
                 panic!("nasm failed with exit status {}", status);
             }
+
+            let status = Command::new("nasm")
+                .arg("-f")
+                .arg("bin")
+                .arg("-o")
+                .arg(format!("{}/s3_wakeup", out_dir))
+                .arg("src/asm/x86_64/s3_wakeup.asm")
+                .status()
+                .expect("failed to run nasm");
+            if !status.success() {
+                panic!("nasm failed with exit status {}", status);
+            }
         }
         "riscv64" => {
             println!("cargo::rustc-cfg=dtb");
diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs
index 4dc23883..f472c088 100644
--- a/src/acpi/madt/arch/x86.rs
+++ b/src/acpi/madt/arch/x86.rs
@@ -18,6 +18,7 @@ use crate::{

 use super::{Madt, MadtEntry};

+const AP_SPIN_LIMIT: u32 = 1_000_000;
 const TRAMPOLINE: usize = 0x8000;
 static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline"));

@@ -42,13 +43,17 @@ pub(super) fn init(madt: Madt) {
         //TODO: do not have writable and executable!
         let mut mapper = KernelMapper::lock_rw();

-        let result = mapper
-            .map_phys(
-                trampoline_page.start_address(),
-                trampoline_frame.base(),
-                PageFlags::new().execute(true).write(true),
-            )
-            .expect("failed to map trampoline");
+        let result = match mapper.map_phys(
+            trampoline_page.start_address(),
+            trampoline_frame.base(),
+            PageFlags::new().execute(true).write(true),
+        ) {
+            Some(result) => result,
+            None => {
+                println!("KERNEL AP: failed to map trampoline page, AP bring-up disabled");
+                return;
+            }
+        };

         (result, mapper.table().phys().data())
     };
@@ -72,17 +77,27 @@ pub(super) fn init(madt: Madt) {
             if u32::from(ap_local_apic.id) == me.get() {
                 debug!("        This is my local APIC");
             } else if ap_local_apic.flags & 1 == 1 {
-                let cpu_id = LogicalCpuId::next();
-
                 // Allocate a stack
-                let stack_start = RmmA::phys_to_virt(
-                    allocate_p2frame(4)
-                        .expect("no more frames in acpi stack_start")
-                        .base(),
-                )
-                .data();
+                let alloc = match allocate_p2frame(4) {
+                    Some(frame) => frame,
+                    None => {
+                        println!("KERNEL AP: CPU {} no memory for stack, skipping", ap_local_apic.id);
+                        continue;
+                    }
+                };
+                let stack_start = RmmA::phys_to_virt(alloc.base()).data();
                 let stack_end = stack_start + (PAGE_SIZE << 4);

+                let next_cpu = crate::CPU_COUNT.load(Ordering::Relaxed);
+                if next_cpu >= crate::cpu_set::MAX_CPU_COUNT {
+                    println!(
+                        "KERNEL AP: CPU {} exceeds logical CPU limit, skipping",
+                        ap_local_apic.id
+                    );
+                    continue;
+                }
+                let cpu_id = LogicalCpuId::new(next_cpu);
+
                 let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end);

                 let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id);
@@ -137,13 +152,34 @@ pub(super) fn init(madt: Madt) {
                     local_apic.set_icr(icr);
                 }

-                // Wait for trampoline ready
-                while unsafe { (*ap_ready.cast::<AtomicU8>()).load(Ordering::SeqCst) } == 0 {
+                // Wait for trampoline ready with timeout
+                let mut trampoline_ready = false;
+                for _ in 0..AP_SPIN_LIMIT {
+                    if unsafe { (*ap_ready.cast::<AtomicU8>()).load(Ordering::SeqCst) } != 0 {
+                        trampoline_ready = true;
+                        break;
+                    }
                     hint::spin_loop();
                 }
-                while !AP_READY.load(Ordering::SeqCst) {
+                if !trampoline_ready {
+                    println!("KERNEL AP: CPU {} trampoline timeout, skipping", ap_local_apic.id);
+                    continue;
+                }
+
+                let mut kernel_ready = false;
+                for _ in 0..AP_SPIN_LIMIT {
+                    if AP_READY.load(Ordering::SeqCst) {
+                        kernel_ready = true;
+                        break;
+                    }
                     hint::spin_loop();
                 }
+                if !kernel_ready {
+                    println!("KERNEL AP: CPU {} AP_READY timeout, skipping", ap_local_apic.id);
+                    continue;
+                }
+
+                crate::CPU_COUNT.fetch_add(1, Ordering::Relaxed);

                 RmmA::invalidate_all();
             }
@@ -151,10 +187,12 @@ pub(super) fn init(madt: Madt) {
     }

     // Unmap trampoline
-    let (_frame, _, flush) = unsafe {
+    if let Some((_frame, _, flush)) = unsafe {
         KernelMapper::lock_rw()
             .unmap_phys(trampoline_page.start_address())
-            .expect("failed to unmap trampoline page")
-    };
-    flush.flush();
+    } {
+        flush.flush();
+    } else {
+        println!("KERNEL AP: failed to unmap trampoline page (non-fatal)");
+    }
 }
diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs
index 59e35265..b3b80f0c 100644
--- a/src/acpi/mod.rs
+++ b/src/acpi/mod.rs
@@ -82,6 +82,14 @@ impl Rxsdt for RxsdtEnum {

 pub static RXSDT_ENUM: Once<RxsdtEnum> = Once::new();

+#[derive(Clone, Copy, Debug)]
+pub struct AcpiRootInfo {
+    pub revision: u8,
+    pub root_sdt_address: PhysicalAddress,
+}
+
+pub static ACPI_ROOT_INFO: Once<AcpiRootInfo> = Once::new();
+
 /// Parse the ACPI tables to gather CPU, interrupt, and timer information
 pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) {
     unsafe {
@@ -94,6 +102,15 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) {
         let rsdp_opt = Rsdp::get_rsdp(already_supplied_rsdp);

         if let Some(rsdp) = rsdp_opt {
+            let root_info = ACPI_ROOT_INFO.call_once(|| AcpiRootInfo {
+                revision: rsdp.revision(),
+                root_sdt_address: rsdp.sdt_address(),
+            });
+
+            if root_info.root_sdt_address != rsdp.sdt_address() || root_info.revision != rsdp.revision() {
+                error!("ACPI_ROOT_INFO already initialized with a different RSDP root");
+            }
+
             debug!("SDT address: {:#x}", rsdp.sdt_address().data());
             let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock_rw());

diff --git a/src/acpi/rsdp.rs b/src/acpi/rsdp.rs
index f10c5ac9..5e93a9f8 100644
--- a/src/acpi/rsdp.rs
+++ b/src/acpi/rsdp.rs
@@ -31,4 +31,8 @@ impl Rsdp {
             self.rsdt_address as usize
         })
     }
+
+    pub fn revision(&self) -> u8 {
+        self.revision
+    }
 }
diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs
index 4fdb0ba1..aaa71963 100644
--- a/src/allocator/mod.rs
+++ b/src/allocator/mod.rs
@@ -7,26 +7,40 @@ mod linked_list;
 /// Size of kernel heap
 const KERNEL_HEAP_SIZE: usize = ::rmm::MEGABYTE;

+#[cold]
+fn halt_kernel_heap_init(message: &str) -> ! {
+    print!("{message}");
+    println!("Kernel heap initialization cannot continue. Halting.");
+    loop {
+        core::hint::spin_loop();
+    }
+}
+
 unsafe fn map_heap(mapper: &mut KernelMapper<true>, offset: usize, size: usize) {
     let mut flush_all = PageFlushAll::new();

     let heap_start_page = Page::containing_address(VirtualAddress::new(offset));
     let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size - 1));
     for page in Page::range_inclusive(heap_start_page, heap_end_page) {
-        let phys = mapper
-            .allocator_mut()
-            .allocate_one()
-            .expect("failed to allocate kernel heap");
+        let phys = match mapper.allocator_mut().allocate_one() {
+            Some(phys) => phys,
+            None => halt_kernel_heap_init(
+                "FATAL: failed to allocate physical frame for kernel heap\n",
+            ),
+        };
         let flush = unsafe {
-            mapper
-                .map_phys(
-                    page.start_address(),
-                    phys,
-                    PageFlags::new()
-                        .write(true)
-                        .global(cfg!(not(feature = "pti"))),
-                )
-                .expect("failed to map kernel heap")
+            match mapper.map_phys(
+                page.start_address(),
+                phys,
+                PageFlags::new()
+                    .write(true)
+                    .global(cfg!(not(feature = "pti"))),
+            ) {
+                Some(flush) => flush,
+                None => halt_kernel_heap_init(
+                    "FATAL: failed to map kernel heap virtual page\n",
+                ),
+            }
         };
         flush_all.consume(flush);
     }
diff --git a/src/arch/x86_shared/gdt.rs b/src/arch/x86_shared/gdt.rs
index cad344f3..f7acae35 100644
--- a/src/arch/x86_shared/gdt.rs
+++ b/src/arch/x86_shared/gdt.rs
@@ -192,6 +192,15 @@ impl ProcessorControlRegion {
     }
 }

+#[cold]
+fn halt_pcr_init() -> ! {
+    println!("FATAL: failed to allocate physical memory for Processor Control Region");
+    println!("Processor startup cannot continue. Halting.");
+    loop {
+        core::hint::spin_loop();
+    }
+}
+
 pub unsafe fn pcr() -> *mut ProcessorControlRegion {
     unsafe {
         // Primitive benchmarking of RDFSBASE and RDGSBASE in userspace, appears to indicate that
@@ -375,7 +384,10 @@ pub fn allocate_and_init_pcr(
         .next_power_of_two()
         .trailing_zeros();

-    let pcr_frame = crate::memory::allocate_p2frame(alloc_order).expect("failed to allocate PCR");
+    let pcr_frame = match crate::memory::allocate_p2frame(alloc_order) {
+        Some(frame) => frame,
+        None => halt_pcr_init(),
+    };
     let pcr_ptr = RmmA::phys_to_virt(pcr_frame.base()).data() as *mut ProcessorControlRegion;
     unsafe { core::ptr::write(pcr_ptr, ProcessorControlRegion::new_partial_init(cpu_id)) };

diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs
index 50064585..47f692f6 100644
--- a/src/arch/x86_shared/idt.rs
+++ b/src/arch/x86_shared/idt.rs
@@ -78,6 +78,15 @@ static INIT_BSP_IDT: SyncUnsafeCell<Idt> = SyncUnsafeCell::new(Idt::new());
 pub(crate) static IDTS: RwLock<HashMap<LogicalCpuId, &'static mut Idt>> =
     RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new()));

+#[cold]
+fn halt_idt_init() -> ! {
+    println!("FATAL: failed to allocate physical pages for backup interrupt stack");
+    println!("Interrupt setup cannot continue. Halting.");
+    loop {
+        core::hint::spin_loop();
+    }
+}
+
 #[inline]
 pub fn is_reserved(cpu_id: LogicalCpuId, index: u8) -> bool {
     if cpu_id == LogicalCpuId::BSP {
@@ -161,8 +170,10 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt {
         .or_insert_with(|| Box::leak(Box::new(Idt::new())));

     use crate::memory::{RmmA, RmmArch};
-    let frames = crate::memory::allocate_p2frame(4)
-        .expect("failed to allocate pages for backup interrupt stack");
+    let frames = match crate::memory::allocate_p2frame(4) {
+        Some(frames) => frames,
+        None => halt_idt_init(),
+    };

     // Physical pages are mapped linearly. So is the linearly mapped virtual memory.
     let base_address = RmmA::phys_to_virt(frames.base());
diff --git a/src/arch/x86_shared/mod.rs b/src/arch/x86_shared/mod.rs
index e3c30501..11c33e94 100644
--- a/src/arch/x86_shared/mod.rs
+++ b/src/arch/x86_shared/mod.rs
@@ -28,6 +28,8 @@ pub mod pti;
 /// Initialization and start function
 pub mod start;

+pub mod sleep;
+
 /// Stop function
 pub mod stop;

diff --git a/src/arch/x86_shared/sleep.rs b/src/arch/x86_shared/sleep.rs
new file mode 100644
index 00000000..9f98c0d8
--- /dev/null
+++ b/src/arch/x86_shared/sleep.rs
@@ -0,0 +1,712 @@
+use alloc::{sync::Arc, vec::Vec};
+use core::{
+    ptr::NonNull,
+    str::FromStr,
+    sync::atomic::{AtomicU32, Ordering},
+};
+
+use acpi_ext::{
+    aml::{namespace::AmlName, object::Object, Interpreter},
+    registers::FixedRegisters,
+    sdt::{facs::Facs, fadt::Fadt, SdtHeader},
+    AcpiTables, Handle, Handler, PhysicalMapping,
+};
+use spin::Mutex;
+use syscall::error::{Error, EINVAL, EIO};
+use x86::{segmentation::SegmentSelector, task, Ring};
+
+use crate::{
+    acpi::ACPI_ROOT_INFO,
+    arch::interrupt,
+    memory::{
+        round_down_pages, round_up_pages, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA,
+        RmmArch, VirtualAddress, PAGE_SIZE,
+    },
+    syscall::io::{Io, Pio},
+};
+
+const ACPI_SLP_TYP_SHIFT: u16 = 10;
+const ACPI_SLP_TYP_MASK: u16 = 0x1C00;
+const ACPI_SLP_EN: u16 = 1 << 13;
+const WAKE_TRAMPOLINE_PHYS: usize = 0x8000;
+const SLEEP_RETURN_OK: usize = 0;
+
+#[cfg(target_arch = "x86_64")]
+static WAKE_TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/s3_wakeup"));
+
+#[repr(C, packed)]
+#[derive(Clone, Copy, Debug, Default)]
+struct DescriptorTableRegister {
+    limit: u16,
+    base: u64,
+}
+
+#[repr(C, align(64))]
+#[derive(Clone, Copy, Debug)]
+struct FpuState {
+    bytes: [u8; 4096],
+}
+
+impl Default for FpuState {
+    fn default() -> Self {
+        Self { bytes: [0; 4096] }
+    }
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum SleepState {
+    S3,
+    S5,
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum SleepError {
+    UnsupportedArch,
+    MissingAcpi,
+    MissingFadt,
+    MissingFacs,
+    MissingSleepObject,
+    InvalidSleepObject,
+    UnsupportedPmControl,
+    UnsupportedAmlOperation,
+    SleepDidNotEnter,
+}
+
+impl SleepError {
+    fn code(self) -> usize {
+        match self {
+            Self::UnsupportedArch => EINVAL as usize,
+            Self::MissingAcpi
+            | Self::MissingFadt
+            | Self::MissingFacs
+            | Self::MissingSleepObject
+            | Self::UnsupportedAmlOperation => EIO as usize,
+            Self::InvalidSleepObject | Self::UnsupportedPmControl | Self::SleepDidNotEnter => {
+                EINVAL as usize
+            }
+        }
+    }
+
+    fn from_code(code: usize) -> Self {
+        match code as i32 {
+            x if x == EINVAL => Self::InvalidSleepObject,
+            _ => Self::MissingAcpi,
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, Default)]
+struct SavedCpuContext {
+    entry_rsp: usize,
+    runtime_rsp: usize,
+    facs_address: usize,
+    cr0: usize,
+    cr2: usize,
+    cr3: usize,
+    cr4: usize,
+    rflags: usize,
+    gdtr: DescriptorTableRegister,
+    idtr: DescriptorTableRegister,
+    efer: u64,
+    fs_base: u64,
+    gs_base: u64,
+    kernel_gs_base: u64,
+    fpu: FpuState,
+}
+
+static SAVED_CONTEXT: Mutex<Option<SavedCpuContext>> = Mutex::new(None);
+static AML_MUTEX_IDS: AtomicU32 = AtomicU32::new(1);
+
+#[derive(Clone, Copy, Debug)]
+struct SleepTypeData {
+    a: u16,
+    b: u16,
+}
+
+#[derive(Clone, Copy)]
+struct KernelAcpiHandler;
+
+impl KernelAcpiHandler {
+    fn map_range(physical_address: usize, size: usize) -> (*mut u8, usize) {
+        let map_base = round_down_pages(physical_address);
+        let map_offset = physical_address - map_base;
+        let mapped_length = round_up_pages(size + map_offset);
+
+        // SAFETY: The ACPI interpreter only requests firmware-described physical regions.
+        unsafe {
+            let mut mapper = KernelMapper::lock_rw();
+            for page_index in 0..mapped_length / PAGE_SIZE {
+                let (_, flush) = mapper
+                    .map_linearly(
+                        PhysicalAddress::new(map_base + page_index * PAGE_SIZE),
+                        PageFlags::new(),
+                    )
+                    .expect("failed to linearly map ACPI physical region");
+                flush.flush();
+            }
+        }
+
+        let virtual_base = RmmA::phys_to_virt(PhysicalAddress::new(map_base)).data();
+        ((virtual_base + map_offset) as *mut u8, mapped_length)
+    }
+}
+
+impl Handler for KernelAcpiHandler {
+    unsafe fn map_physical_region<T>(&self, physical_address: usize, size: usize) -> PhysicalMapping<Self, T> {
+        let (virtual_start, mapped_length) = Self::map_range(physical_address, size);
+        PhysicalMapping {
+            physical_start: physical_address,
+            virtual_start: NonNull::new(virtual_start.cast::<T>())
+                .expect("expected mapped ACPI virtual address to be non-null"),
+            region_length: size,
+            mapped_length,
+            handler: *self,
+        }
+    }
+
+    fn unmap_physical_region<T>(_region: &PhysicalMapping<Self, T>) {}
+
+    fn read_u8(&self, address: usize) -> u8 {
+        // SAFETY: AML system-memory accesses are byte-addressable firmware regions.
+        unsafe { core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u8) }
+    }
+
+    fn read_u16(&self, address: usize) -> u16 {
+        // SAFETY: AML system-memory accesses are word-addressable firmware regions.
+        unsafe {
+            core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u16)
+        }
+    }
+
+    fn read_u32(&self, address: usize) -> u32 {
+        // SAFETY: AML system-memory accesses are dword-addressable firmware regions.
+        unsafe {
+            core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u32)
+        }
+    }
+
+    fn read_u64(&self, address: usize) -> u64 {
+        // SAFETY: AML system-memory accesses are qword-addressable firmware regions.
+        unsafe {
+            core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u64)
+        }
+    }
+
+    fn write_u8(&self, address: usize, value: u8) {
+        // SAFETY: AML system-memory accesses are byte-addressable firmware regions.
+        unsafe {
+            core::ptr::write_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u8, value)
+        }
+    }
+
+    fn write_u16(&self, address: usize, value: u16) {
+        // SAFETY: AML system-memory accesses are word-addressable firmware regions.
+        unsafe {
+            core::ptr::write_volatile(
+                RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u16,
+                value,
+            )
+        }
+    }
+
+    fn write_u32(&self, address: usize, value: u32) {
+        // SAFETY: AML system-memory accesses are dword-addressable firmware regions.
+        unsafe {
+            core::ptr::write_volatile(
+                RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u32,
+                value,
+            )
+        }
+    }
+
+    fn write_u64(&self, address: usize, value: u64) {
+        // SAFETY: AML system-memory accesses are qword-addressable firmware regions.
+        unsafe {
+            core::ptr::write_volatile(
+                RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u64,
+                value,
+            )
+        }
+    }
+
+    fn read_io_u8(&self, port: u16) -> u8 {
+        Pio::<u8>::new(port).read()
+    }
+
+    fn read_io_u16(&self, port: u16) -> u16 {
+        Pio::<u16>::new(port).read()
+    }
+
+    fn read_io_u32(&self, port: u16) -> u32 {
+        Pio::<u32>::new(port).read()
+    }
+
+    fn write_io_u8(&self, port: u16, value: u8) {
+        Pio::<u8>::new(port).write(value)
+    }
+
+    fn write_io_u16(&self, port: u16, value: u16) {
+        Pio::<u16>::new(port).write(value)
+    }
+
+    fn write_io_u32(&self, port: u16, value: u32) {
+        Pio::<u32>::new(port).write(value)
+    }
+
+    fn read_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u8 {
+        0
+    }
+
+    fn read_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u16 {
+        0
+    }
+
+    fn read_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u32 {
+        0
+    }
+
+    fn write_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u8) {}
+
+    fn write_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u16) {}
+
+    fn write_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u32) {}
+
+    fn nanos_since_boot(&self) -> u64 {
+        0
+    }
+
+    fn stall(&self, microseconds: u64) {
+        for _ in 0..(microseconds.saturating_mul(64)) {
+            core::hint::spin_loop();
+        }
+    }
+
+    fn sleep(&self, milliseconds: u64) {
+        for _ in 0..(milliseconds.saturating_mul(64_000)) {
+            core::hint::spin_loop();
+        }
+    }
+
+    fn create_mutex(&self) -> Handle {
+        Handle(AML_MUTEX_IDS.fetch_add(1, Ordering::Relaxed))
+    }
+
+    fn acquire(&self, _mutex: Handle, _timeout: u16) -> Result<(), acpi_ext::aml::AmlError> {
+        Ok(())
+    }
+
+    fn release(&self, _mutex: Handle) {}
+}
+
+fn sleep_state_name(state: SleepState) -> &'static str {
+    match state {
+        SleepState::S3 => "\\_S3",
+        SleepState::S5 => "\\_S5",
+    }
+}
+
+fn encode_sleep_type(value: u16) -> u16 {
+    if value <= 0x7 {
+        value << ACPI_SLP_TYP_SHIFT
+    } else {
+        value & ACPI_SLP_TYP_MASK
+    }
+}
+
+fn load_interpreter() -> Result<(
+    Arc<FixedRegisters<KernelAcpiHandler>>,
+    PhysicalMapping<KernelAcpiHandler, Facs>,
+    Interpreter<KernelAcpiHandler>,
+), SleepError> {
+    let root = *ACPI_ROOT_INFO.get().ok_or(SleepError::MissingAcpi)?;
+    let handler = KernelAcpiHandler;
+
+    // SAFETY: ACPI root info is captured from the firmware-provided, already validated root table.
+    let tables = unsafe {
+        AcpiTables::from_rsdt(handler, root.revision, root.root_sdt_address.data())
+            .map_err(|_| SleepError::MissingAcpi)?
+    };
+    let fadt = tables.find_table::<Fadt>().ok_or(SleepError::MissingFadt)?;
+    let registers = Arc::new(
+        FixedRegisters::new(&fadt, handler).map_err(|_| SleepError::UnsupportedPmControl)?,
+    );
+    let facs_address = fadt.facs_address().map_err(|_| SleepError::MissingFacs)?;
+
+    // SAFETY: The FADT-supplied FACS address is used exactly as described by the ACPI spec.
+    let facs = unsafe { handler.map_physical_region::<Facs>(facs_address, core::mem::size_of::<Facs>()) };
+    // SAFETY: The AML interpreter only needs an owned mapping of the same firmware FACS table.
+    let interpreter_facs = unsafe {
+        handler.map_physical_region::<Facs>(facs_address, core::mem::size_of::<Facs>())
+    };
+    let dsdt = tables.dsdt().map_err(|_| SleepError::MissingFadt)?;
+    let interpreter = Interpreter::new(handler, dsdt.revision, Arc::clone(&registers), Some(interpreter_facs));
+
+    // SAFETY: Each AML table mapping is owned by the interpreter during table loading.
+    unsafe {
+        let mapping = handler.map_physical_region::<SdtHeader>(dsdt.phys_address, dsdt.length as usize);
+        let stream = core::slice::from_raw_parts(
+            mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::<SdtHeader>()) as *const u8,
+            dsdt.length as usize - core::mem::size_of::<SdtHeader>(),
+        );
+        interpreter
+            .load_table(stream)
+            .map_err(|_| SleepError::UnsupportedAmlOperation)?;
+
+        for ssdt in tables.ssdts() {
+            let mapping = handler.map_physical_region::<SdtHeader>(ssdt.phys_address, ssdt.length as usize);
+            let stream = core::slice::from_raw_parts(
+                mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::<SdtHeader>()) as *const u8,
+                ssdt.length as usize - core::mem::size_of::<SdtHeader>(),
+            );
+            interpreter
+                .load_table(stream)
+                .map_err(|_| SleepError::UnsupportedAmlOperation)?;
+        }
+    }
+
+    Ok((registers, facs, interpreter))
+}
+
+fn sleep_type_data_from_interpreter(
+    interpreter: &Interpreter<KernelAcpiHandler>,
+    state: SleepState,
+) -> Result<SleepTypeData, SleepError> {
+    let name = AmlName::from_str(sleep_state_name(state)).map_err(|_| SleepError::MissingSleepObject)?;
+    let object = interpreter
+        .evaluate(name, Vec::new())
+        .map_err(|_| SleepError::MissingSleepObject)?;
+
+    let Object::Package(package) = &*object else {
+        return Err(SleepError::InvalidSleepObject);
+    };
+
+    let Some(typa_object) = package.first() else {
+        return Err(SleepError::InvalidSleepObject);
+    };
+    let Some(typb_object) = package.get(1) else {
+        return Err(SleepError::InvalidSleepObject);
+    };
+
+    let Object::Integer(typa) = &**typa_object else {
+        return Err(SleepError::InvalidSleepObject);
+    };
+    let Object::Integer(typb) = &**typb_object else {
+        return Err(SleepError::InvalidSleepObject);
+    };
+
+    Ok(SleepTypeData {
+        a: encode_sleep_type(*typa as u16),
+        b: encode_sleep_type(*typb as u16),
+    })
+}
+
+fn sleep_type_data(state: SleepState) -> Result<SleepTypeData, SleepError> {
+    let (_registers, _facs, interpreter) = load_interpreter()?;
+    sleep_type_data_from_interpreter(&interpreter, state)
+}
+
+fn install_wake_trampoline(stack_rsp: usize, cr3: usize) {
+    let trampoline_page = Page::containing_address(VirtualAddress::new(WAKE_TRAMPOLINE_PHYS));
+    let trampoline_frame = PhysicalAddress::new(WAKE_TRAMPOLINE_PHYS);
+
+    // SAFETY: The 0x8000 low-memory trampoline page is reserved by the kernel for bootstrap stubs.
+    let (result, _) = unsafe {
+        let mut mapper = KernelMapper::lock_rw();
+        let result = mapper
+            .map_phys(
+                trampoline_page.start_address(),
+                trampoline_frame,
+                PageFlags::new().execute(true).write(true),
+            )
+            .expect("failed to map S3 wake trampoline page");
+        (result, mapper.table().phys().data())
+    };
+    result.flush();
+
+    for (index, value) in WAKE_TRAMPOLINE_DATA.iter().enumerate() {
+        // SAFETY: The trampoline page is mapped writable at the same virtual address as the physical page.
+        unsafe {
+            core::ptr::write_volatile((WAKE_TRAMPOLINE_PHYS as *mut u8).add(index), *value);
+        }
+    }
+
+    // SAFETY: The wake trampoline layout reserves three qword fields immediately after the jump.
+    unsafe {
+        let stack_slot = (WAKE_TRAMPOLINE_PHYS + 8) as *mut u64;
+        let page_table_slot = stack_slot.add(1);
+        let code_slot = stack_slot.add(2);
+        stack_slot.write(stack_rsp as u64);
+        page_table_slot.write(cr3 as u64);
+        #[expect(clippy::fn_to_numeric_cast)]
+        code_slot.write(resume_from_s3_trampoline as usize as u64);
+    }
+
+    // SAFETY: The trampoline mapping is no longer needed once the physical page has been populated.
+    let (_frame, _, flush) = unsafe {
+        KernelMapper::lock_rw()
+            .unmap_phys(trampoline_page.start_address())
+            .expect("failed to unmap S3 wake trampoline page")
+    };
+    flush.flush();
+}
+
+fn save_descriptor_tables(context: &mut SavedCpuContext) {
+    // SAFETY: SGDT/SIDT only read the current CPU descriptor-table registers into the provided storage.
+    unsafe {
+        core::arch::asm!("sgdt [{}]", in(reg) &mut context.gdtr, options(nostack, preserves_flags));
+        core::arch::asm!("sidt [{}]", in(reg) &mut context.idtr, options(nostack, preserves_flags));
+    }
+}
+
+fn save_fpu_state(context: &mut SavedCpuContext) {
+    // SAFETY: The kernel owns the current CPU at suspend entry and the FXSAVE buffer is 64-byte aligned.
+    unsafe {
+        core::arch::asm!(
+            "fxsave64 [{}]",
+            in(reg) context.fpu.bytes.as_mut_ptr(),
+        );
+    }
+}
+
+fn restore_fpu_state(context: &SavedCpuContext) {
+    // SAFETY: The saved FXSAVE image belongs to the same CPU context and matches the restore instruction.
+    unsafe {
+        core::arch::asm!(
+            "fxrstor64 [{}]",
+            in(reg) context.fpu.bytes.as_ptr(),
+        );
+    }
+}
+
+fn save_cpu_context(entry_rsp: usize) -> SavedCpuContext {
+    let mut context = SavedCpuContext {
+        entry_rsp,
+        ..SavedCpuContext::default()
+    };
+
+    // SAFETY: Reading control registers and MSRs is required to reconstruct the CPU execution state on wake.
+    unsafe {
+        core::arch::asm!(
+            "mov {}, cr0",
+            out(reg) context.cr0,
+            options(nostack, preserves_flags)
+        );
+        core::arch::asm!(
+            "mov {}, cr2",
+            out(reg) context.cr2,
+            options(nostack, preserves_flags)
+        );
+        core::arch::asm!(
+            "mov {}, cr3",
+            out(reg) context.cr3,
+            options(nostack, preserves_flags)
+        );
+        core::arch::asm!(
+            "mov {}, cr4",
+            out(reg) context.cr4,
+            options(nostack, preserves_flags)
+        );
+        core::arch::asm!(
+            "pushfq",
+            "pop {}",
+            out(reg) context.rflags,
+            options(preserves_flags)
+        );
+        core::arch::asm!("mov {}, rsp", out(reg) context.runtime_rsp, options(nostack, preserves_flags));
+
+        context.efer = x86::msr::rdmsr(x86::msr::IA32_EFER);
+        context.fs_base = x86::msr::rdmsr(x86::msr::IA32_FS_BASE);
+        context.gs_base = x86::msr::rdmsr(x86::msr::IA32_GS_BASE);
+        context.kernel_gs_base = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE);
+    }
+
+    save_descriptor_tables(&mut context);
+    save_fpu_state(&mut context);
+    context
+}
+
+fn set_firmware_waking_vector(facs: &mut PhysicalMapping<KernelAcpiHandler, Facs>, vector: usize) {
+    facs.firmware_waking_vector = vector as u32;
+    facs.x_firmware_waking_vector = vector as u64;
+}
+
+fn write_pm1_control_block(
+    registers: &FixedRegisters<KernelAcpiHandler>,
+    sleep_type: SleepTypeData,
+) -> Result<(), SleepError> {
+    let current_a = registers
+        .pm1_control_registers
+        .pm1a
+        .read()
+        .map_err(|_| SleepError::UnsupportedPmControl)? as u16;
+    let armed_a = (current_a & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.a;
+
+    registers
+        .pm1_control_registers
+        .pm1a
+        .write(u64::from(armed_a))
+        .map_err(|_| SleepError::UnsupportedPmControl)?;
+
+    if let Some(pm1b) = &registers.pm1_control_registers.pm1b {
+        let current_b = pm1b.read().map_err(|_| SleepError::UnsupportedPmControl)? as u16;
+        let armed_b = (current_b & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.b;
+        pm1b.write(u64::from(armed_b))
+            .map_err(|_| SleepError::UnsupportedPmControl)?;
+        pm1b.write(u64::from(armed_b | ACPI_SLP_EN))
+            .map_err(|_| SleepError::UnsupportedPmControl)?;
+    }
+
+    // SAFETY: WBINVD is required here to flush dirty cache lines before firmware powers down the CPU package.
+    unsafe {
+        core::arch::asm!("wbinvd", options(nostack, preserves_flags));
+    }
+
+    registers
+        .pm1_control_registers
+        .pm1a
+        .write(u64::from(armed_a | ACPI_SLP_EN))
+        .map_err(|_| SleepError::UnsupportedPmControl)?;
+
+    Ok(())
+}
+
+#[unsafe(naked)]
+unsafe extern "sysv64" fn enter_sleep_raw(state: usize) -> usize {
+    core::arch::naked_asm!(
+        "mov rsi, rsp",
+        "jmp {inner}",
+        inner = sym enter_sleep_raw_inner,
+    );
+}
+
+extern "C" fn enter_sleep_raw_inner(state: usize, entry_rsp: usize) -> usize {
+    let state = match state {
+        3 => SleepState::S3,
+        5 => SleepState::S5,
+        _ => return SleepError::InvalidSleepObject.code(),
+    };
+
+    let (registers, mut facs, interpreter) = match load_interpreter() {
+        Ok(tuple) => tuple,
+        Err(error) => return error.code(),
+    };
+    let sleep_type = match sleep_type_data_from_interpreter(&interpreter, state) {
+        Ok(data) => data,
+        Err(error) => return error.code(),
+    };
+
+    let mut context = save_cpu_context(entry_rsp);
+    context.facs_address = facs.physical_start;
+    install_wake_trampoline(context.runtime_rsp, context.cr3);
+    set_firmware_waking_vector(&mut facs, WAKE_TRAMPOLINE_PHYS);
+
+    {
+        let mut saved = SAVED_CONTEXT.lock();
+        *saved = Some(context);
+    }
+
+    // SAFETY: Suspend entry must not be interrupted while the wake vector and PM1 control block are being armed.
+    unsafe {
+        interrupt::disable();
+    }
+
+    if let Err(error) = write_pm1_control_block(registers.as_ref(), sleep_type) {
+        return error.code();
+    }
+
+    // SAFETY: The final CLI+HLT sequence is the architectural handoff point after asserting SLP_EN.
+    unsafe {
+        core::arch::asm!("cli; hlt", options(nostack));
+    }
+
+    SleepError::SleepDidNotEnter.code()
+}
+
+extern "C" fn resume_from_s3_trampoline() -> ! {
+    let mut saved = SAVED_CONTEXT.lock();
+    let context = saved.take().expect("S3 wake trampoline resumed without saved CPU context");
+    drop(saved);
+
+    // SAFETY: The saved FACS physical address was captured from the validated FADT during suspend entry.
+    if context.facs_address != 0 {
+        let mut facs = unsafe {
+            KernelAcpiHandler.map_physical_region::<Facs>(
+                context.facs_address,
+                core::mem::size_of::<Facs>(),
+            )
+        };
+        set_firmware_waking_vector(&mut facs, 0);
+    }
+
+    // SAFETY: The wake trampoline already switched to the saved kernel CR3 and long mode, so the remaining restores are architectural register state only.
+    unsafe {
+        x86::msr::wrmsr(x86::msr::IA32_EFER, context.efer);
+        core::arch::asm!("mov cr3, {}", in(reg) context.cr3, options(nostack));
+        core::arch::asm!("mov cr4, {}", in(reg) context.cr4, options(nostack));
+        core::arch::asm!("mov cr2, {}", in(reg) context.cr2, options(nostack));
+        core::arch::asm!("mov cr0, {}", in(reg) context.cr0, options(nostack));
+        core::arch::asm!("lgdt [{}]", in(reg) &context.gdtr, options(nostack));
+        core::arch::asm!("lidt [{}]", in(reg) &context.idtr, options(nostack));
+
+        task::load_tr(SegmentSelector::new(crate::arch::gdt::GDT_TSS as u16, Ring::Ring0));
+
+        x86::msr::wrmsr(x86::msr::IA32_FS_BASE, context.fs_base);
+        x86::msr::wrmsr(x86::msr::IA32_GS_BASE, context.gs_base);
+        x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, context.kernel_gs_base);
+    }
+
+    restore_fpu_state(&context);
+
+    // SAFETY: Returning with the original entry stack and RFLAGS completes the suspend call as a successful function return.
+    unsafe {
+        core::arch::asm!(
+            "mov rsp, {entry_rsp}",
+            "push {rflags}",
+            "popfq",
+            "xor eax, eax",
+            "ret",
+            entry_rsp = in(reg) context.entry_rsp,
+            rflags = in(reg) context.rflags,
+            options(noreturn)
+        );
+    }
+}
+
+pub fn enter_sleep_state(state: SleepState) -> core::result::Result<(), SleepError> {
+    #[cfg(not(target_arch = "x86_64"))]
+    {
+        let _ = state;
+        return Err(SleepError::UnsupportedArch);
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    {
+        let raw = unsafe {
+            enter_sleep_raw(match state {
+                SleepState::S3 => 3,
+                SleepState::S5 => 5,
+            })
+        };
+        if raw == SLEEP_RETURN_OK {
+            Ok(())
+        } else {
+            Err(SleepError::from_code(raw))
+        }
+    }
+}
+
+pub fn available_sleep_states() -> &'static [u8] {
+    if sleep_type_data(SleepState::S3).is_ok() {
+        b"S3\nS5\n"
+    } else {
+        b"S5\n"
+    }
+}
+
+pub fn trigger_sleep_request(request: &str) -> Result<(), Error> {
+    match request.trim() {
+        "S3" => enter_sleep_state(SleepState::S3).map_err(|_| Error::new(EIO)),
+        "S5" => enter_sleep_state(SleepState::S5).map_err(|_| Error::new(EIO)),
+        _ => Err(Error::new(EINVAL)),
+    }
+}
diff --git a/src/arch/x86_shared/start.rs b/src/arch/x86_shared/start.rs
index 7a7c0ae8..f1dbb6b4 100644
--- a/src/arch/x86_shared/start.rs
+++ b/src/arch/x86_shared/start.rs
@@ -82,6 +82,15 @@ extern "C" fn kstart() {
 /// The entry to Rust, all things must be initialized
 unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
     unsafe {
+        // EARLY CANARY: write 'R' to COM1 before any kernel init.
+        // This proves the serial hardware works and the kernel reached Rust entry.
+        // If this character appears but "Redox OS starting..." does not,
+        // the hang is in args_ptr.read(), serial::init(), or graphical_debug::init().
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        {
+            core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'R', options(nostack, preserves_flags));
+        }
+
         let bootstrap = {
             let args = args_ptr.read();

@@ -91,27 +100,49 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
             // Set up graphical debug
             graphical_debug::init(args.env());

+            // SECOND CANARY: write 'S' to COM1 after serial init.
+            // If 'R' appears but 'S' does not, the hang is in serial::init() or graphical_debug::init().
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            {
+                core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'S', options(nostack, preserves_flags));
+            }
+
             info!("Redox OS starting...");
             args.print();

+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'1', options(nostack, preserves_flags)); }
+
             // Set up GDT
             gdt::init_bsp(stack_end);

+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'2', options(nostack, preserves_flags)); }
+
             // Set up IDT
             idt::init_bsp();

+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'3', options(nostack, preserves_flags)); }
+
             // Initialize RMM
             #[cfg(target_arch = "x86")]
             crate::startup::memory::init(&args, Some(0x100000), Some(0x40000000));
             #[cfg(target_arch = "x86_64")]
             crate::startup::memory::init(&args, Some(0x100000), None);

+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'4', options(nostack, preserves_flags)); }
+
             // Initialize paging
             paging::init();

             #[cfg(target_arch = "x86_64")]
             crate::arch::alternative::early_init(true);

+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'5', options(nostack, preserves_flags)); }
+
             // Set up syscall instruction
             interrupt::syscall::init();

@@ -121,6 +152,9 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
             // Activate memory logging
             crate::log::init();

+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'6', options(nostack, preserves_flags)); }
+
             // Initialize miscellaneous processor features
             #[cfg(target_arch = "x86_64")]
             crate::arch::misc::init(LogicalCpuId::BSP);
@@ -128,6 +162,9 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
             // Initialize devices
             device::init();

+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'7', options(nostack, preserves_flags)); }
+
             // Read ACPI tables, starts APs
             if cfg!(feature = "acpi") {
                 crate::acpi::init(args.acpi_rsdp());
diff --git a/src/asm/x86_64/s3_wakeup.asm b/src/asm/x86_64/s3_wakeup.asm
new file mode 100644
index 00000000..7beeccf6
--- /dev/null
+++ b/src/asm/x86_64/s3_wakeup.asm
@@ -0,0 +1,110 @@
+; ACPI S3 wake trampoline
+; compiled with nasm by build.rs, copied to physical 0x8000 before S3 entry
+
+ORG 0x8000
+SECTION .text
+USE16
+
+trampoline:
+    jmp short startup_wake
+    times 8 - ($ - trampoline) nop
+    .stack: dq 0
+    .page_table: dq 0
+    .code: dq 0
+
+startup_wake:
+    cli
+
+    xor ax, ax
+    mov ds, ax
+    mov es, ax
+    mov ss, ax
+    mov sp, 0
+
+    mov edi, [trampoline.page_table]
+    mov cr3, edi
+
+    mov eax, cr0
+    and al, 11110011b
+    or al, 00100010b
+    mov cr0, eax
+
+    mov eax, cr4
+    or eax, 1 << 9 | 1 << 7 | 1 << 5 | 1 << 4
+    mov cr4, eax
+
+    fninit
+
+    lgdt [gdtr]
+
+    mov ecx, 0xC0000080
+    rdmsr
+    or eax, 1 << 11 | 1 << 8
+    wrmsr
+
+    mov ebx, cr0
+    or ebx, 1 << 31 | 1 << 16 | 1
+    mov cr0, ebx
+
+    jmp gdt.kernel_code:long_mode_wake
+
+USE64
+long_mode_wake:
+    mov rax, gdt.kernel_data
+    mov ds, rax
+    mov es, rax
+    mov fs, rax
+    mov gs, rax
+    mov ss, rax
+
+    mov rsp, [trampoline.stack]
+    mov rax, [trampoline.code]
+    jmp rax
+
+struc GDTEntry
+    .limitl resw 1
+    .basel resw 1
+    .basem resb 1
+    .attribute resb 1
+    .flags__limith resb 1
+    .baseh resb 1
+endstruc
+
+attrib:
+    .present              equ 1 << 7
+    .user                 equ 1 << 4
+    .code                 equ 1 << 3
+    .writable             equ 1 << 1
+
+flags:
+    .long_mode equ 1 << 5
+
+gdtr:
+    dw gdt.end + 1
+    dq gdt
+
+gdt:
+.null equ $ - gdt
+    dq 0
+
+.kernel_code equ $ - gdt
+istruc GDTEntry
+    at GDTEntry.limitl, dw 0
+    at GDTEntry.basel, dw 0
+    at GDTEntry.basem, db 0
+    at GDTEntry.attribute, db attrib.present | attrib.user | attrib.code
+    at GDTEntry.flags__limith, db flags.long_mode
+    at GDTEntry.baseh, db 0
+iend
+
+.kernel_data equ $ - gdt
+istruc GDTEntry
+    at GDTEntry.limitl, dw 0
+    at GDTEntry.basel, dw 0
+    at GDTEntry.basem, db 0
+    at GDTEntry.attribute, db attrib.present | attrib.user | attrib.writable
+    at GDTEntry.flags__limith, db 0
+    at GDTEntry.baseh, db 0
+iend
+
+.end equ $ - gdt
diff --git a/src/context/context.rs b/src/context/context.rs
index c97c5166..6d723f49 100644
--- a/src/context/context.rs
+++ b/src/context/context.rs
@@ -148,6 +148,8 @@ pub struct Context {
     pub euid: u32,
     pub egid: u32,
     pub pid: usize,
+    /// Supplementary group IDs for access control decisions.
+    pub groups: Vec<u32>,

     // See [`PreemptGuard`]
     //
@@ -204,6 +206,7 @@ impl Context {
             euid: 0,
             egid: 0,
             pid: 0,
+            groups: Vec::new(),

             #[cfg(feature = "syscall_debug")]
             syscall_debug_info: crate::syscall::debug::SyscallDebugInfo::default(),
@@ -479,6 +482,7 @@ impl Context {
             uid: self.euid,
             gid: self.egid,
             pid: self.pid,
+            groups: self.groups.clone(),
         }
     }
 }
diff --git a/src/context/file.rs b/src/context/file.rs
index 2d3790f1..150f483a 100644
--- a/src/context/file.rs
+++ b/src/context/file.rs
@@ -4,7 +4,7 @@ use crate::{
     event,
     scheme::{self, SchemeId},
     sync::{CleanLockToken, RwLock, L6},
-    syscall::error::Result,
+    syscall::error::{Error, Result, ESTALE},
 };
 use alloc::sync::Arc;
 use syscall::{schemev2::NewFdFlags, RwFlags, O_APPEND, O_NONBLOCK};
@@ -18,6 +18,7 @@ pub struct FileDescription {
     pub offset: u64,
     /// The scheme that this file refers to
     pub scheme: SchemeId,
+    pub scheme_generation: Option<u64>,
     /// The number the scheme uses to refer to this file
     pub number: usize,
     /// The flags passed to open or fcntl(SETFL)
@@ -32,6 +33,52 @@ bitflags! {
     }
 }
 impl FileDescription {
+    pub fn with_generation(
+        scheme: SchemeId,
+        scheme_generation: Option<u64>,
+        number: usize,
+        offset: u64,
+        flags: u32,
+        internal_flags: InternalFlags,
+    ) -> Self {
+        Self {
+            offset,
+            scheme,
+            scheme_generation,
+            number,
+            flags,
+            internal_flags,
+        }
+    }
+
+    pub fn new(
+        scheme: SchemeId,
+        number: usize,
+        offset: u64,
+        flags: u32,
+        internal_flags: InternalFlags,
+        token: &mut CleanLockToken,
+    ) -> Self {
+        Self::with_generation(
+            scheme,
+            Some(scheme::current_scheme_generation(token.token(), scheme)),
+            number,
+            offset,
+            flags,
+            internal_flags,
+        )
+    }
+
+    pub fn get_scheme(&self, token: &mut CleanLockToken) -> Result<scheme::KernelSchemes> {
+        if let Some(expected_generation) = self.scheme_generation
+            && expected_generation != scheme::current_scheme_generation(token.token(), self.scheme)
+        {
+            return Err(Error::new(ESTALE));
+        }
+
+        scheme::get_scheme(token.token(), self.scheme)
+    }
+
     pub fn rw_flags(&self, rw: RwFlags) -> u32 {
         let mut ret = self.flags & !(O_NONBLOCK | O_APPEND) as u32;
         if rw.contains(RwFlags::APPEND) {
@@ -76,7 +123,7 @@ impl FileDescription {
     pub fn try_close(self, token: &mut CleanLockToken) -> Result<()> {
         event::unregister_file(self.scheme, self.number, token);

-        let scheme = scheme::get_scheme(token.token(), self.scheme)?;
+        let scheme = self.get_scheme(token)?;

         scheme.close(self.number, token)
     }
@@ -85,12 +132,12 @@ impl FileDescription {
 impl FileDescriptor {
     pub fn close(self, token: &mut CleanLockToken) -> Result<()> {
         {
-            let (scheme_id, number, internal_flags) = {
+            let (desc, number, internal_flags) = {
                 let desc = self.description.read(token.token());
-                (desc.scheme, desc.number, desc.internal_flags)
+                (*desc, desc.number, desc.internal_flags)
             };
             if internal_flags.contains(InternalFlags::NOTIFY_ON_NEXT_DETACH) {
-                let scheme = scheme::get_scheme(token.token(), scheme_id)?;
+                let scheme = desc.get_scheme(token)?;
                 scheme.detach(number, token)?;
             }
         }
diff --git a/src/context/memory.rs b/src/context/memory.rs
index 93446ba7..127a34fd 100644
--- a/src/context/memory.rs
+++ b/src/context/memory.rs
@@ -64,14 +64,13 @@ impl UnmapResult {
             return Ok(());
         };

-        let (scheme_id, number) = {
-            let desc = description.write(token.token());
-            (desc.scheme, desc.number)
+        let (scheme, number) = {
+            let desc = *description.read(token.token());
+            (desc.get_scheme(token)?, desc.number)
         };

-        let scheme_opt = scheme::get_scheme(token.token(), scheme_id);
-        let funmap_result = scheme_opt
-            .and_then(|scheme| scheme.kfunmap(number, base_offset, self.size, self.flags, token));
+        let funmap_result = scheme
+            .kfunmap(number, base_offset, self.size, self.flags, token);

         if let Ok(fd) = Arc::try_unwrap(description) {
             fd.into_inner().try_close(token)?;
@@ -2687,20 +2686,13 @@ fn correct_inner<'l>(
             // XXX: This is cheating, but guaranteed we won't deadlock because we've dropped addr_space_guard
             let mut token = unsafe { CleanLockToken::new() };

-            let (scheme_id, scheme_number) = {
-                let desc = &file_ref.description.read(token.token());
-                (desc.scheme, desc.number)
+            let desc = *file_ref.description.read(token.token());
+            let scheme = desc.get_scheme(&mut token).map_err(|_| PfError::Segv)?;
+            let scheme_number = desc.number;
+            let user_inner = match scheme {
+                KernelSchemes::User(user) => user.inner,
+                _ => return Err(PfError::Segv),
             };
-            let user_inner = scheme::get_scheme(token.token(), scheme_id)
-                .ok()
-                .and_then(|s| {
-                    if let KernelSchemes::User(user) = s {
-                        Some(user.inner)
-                    } else {
-                        None
-                    }
-                })
-                .ok_or(PfError::Segv)?;

             let offset = file_ref.base_offset as u64 + (pages_from_grant_start * PAGE_SIZE) as u64;
             user_inner
diff --git a/src/scheme/acpi.rs b/src/scheme/acpi.rs
index 87570a12..5d734691 100644
--- a/src/scheme/acpi.rs
+++ b/src/scheme/acpi.rs
@@ -10,6 +10,7 @@ use syscall::{

 use crate::{
     acpi::{RxsdtEnum, RXSDT_ENUM},
+    arch::sleep,
     context::file::InternalFlags,
     event,
     sync::{CleanLockToken, RwLock, WaitCondition, L1},
@@ -40,6 +41,7 @@ enum HandleKind {
     TopLevel,
     Rxsdt,
     ShutdownPipe,
+    SleepControl,
     SchemeRoot,
 }

@@ -146,11 +148,11 @@ impl KernelScheme for AcpiScheme {
         if flags & O_EXCL == O_EXCL || flags & O_SYMLINK == O_SYMLINK {
             return Err(Error::new(EINVAL));
         }
-        if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
-            return Err(Error::new(EROFS));
-        }
         let (handle_kind, int_flags) = match path {
             "" => {
+                if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
+                    return Err(Error::new(EROFS));
+                }
                 if flags & O_DIRECTORY != O_DIRECTORY && flags & O_STAT != O_STAT {
                     return Err(Error::new(EISDIR));
                 }
@@ -158,17 +160,36 @@ impl KernelScheme for AcpiScheme {
                 (HandleKind::TopLevel, InternalFlags::POSITIONED)
             }
             "rxsdt" => {
+                if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
+                    return Err(Error::new(EROFS));
+                }
                 if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT {
                     return Err(Error::new(ENOTDIR));
                 }
                 (HandleKind::Rxsdt, InternalFlags::POSITIONED)
             }
             "kstop" => {
+                if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
+                    return Err(Error::new(EROFS));
+                }
                 if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT {
                     return Err(Error::new(ENOTDIR));
                 }
                 (HandleKind::ShutdownPipe, InternalFlags::empty())
             }
+            "sleep" => {
+                if flags & O_ACCMODE == O_RDONLY || flags & O_STAT == O_STAT {
+                    // allowed
+                } else if flags & O_ACCMODE != syscall::flag::O_WRONLY
+                    && flags & O_ACCMODE != syscall::flag::O_RDWR
+                {
+                    return Err(Error::new(EINVAL));
+                }
+                if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT {
+                    return Err(Error::new(ENOTDIR));
+                }
+                (HandleKind::SleepControl, InternalFlags::POSITIONED)
+            }
             _ => return Err(Error::new(ENOENT)),
         };

@@ -191,6 +212,7 @@ impl KernelScheme for AcpiScheme {
         Ok(match handle.kind {
             HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?.len() as u64,
             HandleKind::ShutdownPipe => 1,
+            HandleKind::SleepControl => sleep::available_sleep_states().len() as u64,
             HandleKind::TopLevel => 0,
             HandleKind::SchemeRoot => return Err(Error::new(EBADF))?,
         })
@@ -253,6 +275,7 @@ impl KernelScheme for AcpiScheme {

                 return dst_buf.copy_exactly(&[0x42]).map(|()| 1);
             }
+            HandleKind::SleepControl => sleep::available_sleep_states(),
             HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?,
             HandleKind::TopLevel => return Err(Error::new(EISDIR)),
             HandleKind::SchemeRoot => return Err(Error::new(EBADF)),
@@ -295,11 +318,45 @@ impl KernelScheme for AcpiScheme {
                 kind: DirentKind::Socket,
                 name: "kstop",
                 inode: 0,
+                next_opaque_id: 2,
+            })?;
+        }
+        if opaque <= 2 {
+            buf.entry(DirEntry {
+                kind: DirentKind::Regular,
+                name: "sleep",
+                inode: 0,
                 next_opaque_id: u64::MAX,
             })?;
         }
         Ok(buf.finalize())
     }
+    fn kwrite(
+        &self,
+        id: usize,
+        buf: crate::syscall::usercopy::UserSliceRo,
+        _flags: u32,
+        _stored_flags: u32,
+        token: &mut CleanLockToken,
+    ) -> Result<usize> {
+        let handle = *HANDLES.read(token.token()).get(id)?;
+
+        if handle.stat {
+            return Err(Error::new(EBADF));
+        }
+
+        match handle.kind {
+            HandleKind::SleepControl => {
+                let mut tmp = [0_u8; 16];
+                let len = buf.copy_common_bytes_to_slice(&mut tmp)?;
+                let request = core::str::from_utf8(&tmp[..len]).map_err(|_| Error::new(EINVAL))?;
+                sleep::trigger_sleep_request(request)?;
+                Ok(len)
+            }
+            HandleKind::SchemeRoot => Err(Error::new(EBADF)),
+            _ => Err(Error::new(EBADF)),
+        }
+    }
     fn kfpath(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result<usize> {
         //TODO: construct useful path?
         buf.copy_common_bytes_from_slice("/scheme/kernel.acpi/".as_bytes())
@@ -328,6 +385,11 @@ impl KernelScheme for AcpiScheme {
                 st_size: 1,
                 ..Default::default()
             },
+            HandleKind::SleepControl => Stat {
+                st_mode: MODE_FILE,
+                st_size: sleep::available_sleep_states().len().try_into().unwrap_or(u64::MAX),
+                ..Default::default()
+            },
             HandleKind::SchemeRoot => return Err(Error::new(EBADF)),
         })?;

diff --git a/src/scheme/debug.rs b/src/scheme/debug.rs
index c70ac579..4a23b3cf 100644
--- a/src/scheme/debug.rs
+++ b/src/scheme/debug.rs
@@ -22,9 +22,10 @@ struct Handle {

 static HANDLES: RwLock<L1, HandleMap<Handle>> = RwLock::new(HandleMap::new());

-/// Add to the input queue
+/// Add to the input queue, translating CR to NL (ICRNL) for serial console compatibility.
 pub fn debug_input(data: u8, token: &mut CleanLockToken) {
-    INPUT.send(data, token);
+    let translated = if data == b'\r' { b'\n' } else { data };
+    INPUT.send(translated, token);
 }

 // Notify readers of input updates
@@ -106,12 +107,16 @@ impl KernelScheme for DebugScheme {
     fn fevent(
         &self,
         id: usize,
-        _flags: EventFlags,
+        flags: EventFlags,
         token: &mut CleanLockToken,
     ) -> Result<EventFlags> {
         let _handle = *HANDLES.read(token.token()).get(id)?;

-        Ok(EventFlags::empty())
+        let mut ready = EventFlags::empty();
+        if flags.contains(EventFlags::EVENT_READ) {
+            ready |= EventFlags::EVENT_READ;
+        }
+        Ok(ready)
     }

     fn fsync(&self, id: usize, token: &mut CleanLockToken) -> Result<()> {
diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs
index d30272c1..765e547f 100644
--- a/src/scheme/mod.rs
+++ b/src/scheme/mod.rs
@@ -14,7 +14,7 @@ use alloc::{
 };
 use core::{
     str,
-    sync::atomic::{AtomicUsize, Ordering},
+    sync::atomic::{AtomicU64, AtomicUsize, Ordering},
 };
 use hashbrown::hash_map::{self, DefaultHashBuilder, HashMap};
 use spin::Once;
@@ -169,6 +169,7 @@ enum Handle {

 /// Schemes list
 static HANDLES: Once<RwLock<L1, HashMap<SchemeId, Handle>>> = Once::new();
+static SCHEME_GENERATIONS: Once<RwLock<L1, HashMap<SchemeId, AtomicU64>>> = Once::new();
 static SCHEME_LIST_NEXT_ID: AtomicUsize = AtomicUsize::new(MAX_GLOBAL_SCHEMES);
 static SCHEME_LIST_ID: AtomicUsize = AtomicUsize::new(0);

@@ -204,6 +205,10 @@ fn init_schemes() -> RwLock<L1, HashMap<SchemeId, Handle>> {
     RwLock::new(handles)
 }

+fn init_scheme_generations() -> RwLock<L1, HashMap<SchemeId, AtomicU64>> {
+    RwLock::new(HashMap::new())
+}
+
 /// Get a handle to a scheme.
 pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result<KernelSchemes> {
     match handles().read(token).get(&scheme_id) {
@@ -212,10 +217,33 @@ pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result<Kerne
     }
 }

+pub fn current_scheme_generation(token: LockToken<'_, L0>, scheme_id: SchemeId) -> u64 {
+    scheme_generations()
+        .read(token)
+        .get(&scheme_id)
+        .map(|generation| generation.load(Ordering::Acquire))
+        .unwrap_or(0)
+}
+
 fn handles<'a>() -> &'a RwLock<L1, HashMap<SchemeId, Handle>> {
     HANDLES.call_once(init_schemes)
 }

+fn scheme_generations<'a>() -> &'a RwLock<L1, HashMap<SchemeId, AtomicU64>> {
+    SCHEME_GENERATIONS.call_once(init_scheme_generations)
+}
+
+fn increment_scheme_generation(scheme_id: SchemeId, token: &mut CleanLockToken) {
+    match scheme_generations().write(token.token()).entry(scheme_id) {
+        hash_map::Entry::Occupied(entry) => {
+            entry.get().fetch_add(1, Ordering::AcqRel);
+        }
+        hash_map::Entry::Vacant(entry) => {
+            entry.insert(AtomicU64::new(1));
+        }
+    }
+}
+
 /// Scheme list type
 pub struct SchemeList;

@@ -260,9 +288,14 @@ impl SchemeList {

     /// Remove a scheme
     fn remove(&self, id: usize, token: &mut CleanLockToken) {
-        let scheme = handles().write(token.token()).remove(&SchemeId(id));
+        let scheme_id = SchemeId(id);
+        let scheme = handles().write(token.token()).remove(&scheme_id);

         assert!(scheme.is_some());
+        if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme.as_ref() {
+            user.inner.fail_pending_calls(token);
+        }
+        increment_scheme_generation(scheme_id, token);
         if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme
             && let Some(user) = Arc::into_inner(user.inner)
         {
@@ -287,32 +320,32 @@ impl KernelScheme for SchemeList {
         token: &mut CleanLockToken,
     ) -> Result<OpenResult> {
         let scheme_id = SchemeId(scheme_id);
-        match handles()
-            .read(token.token())
-            .get(&scheme_id)
-            .ok_or(Error::new(EBADF))?
-        {
-            Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => {
-                let inner = inner.clone();
-                assert!(scheme_id == inner.scheme_id);
-                let scheme = scheme_id;
-                let params = unsafe { user_buf.read_exact::<NewFdParams>()? };
-
-                return Ok(OpenResult::External(Arc::new(RwLock::new(
-                    FileDescription {
-                        scheme,
-                        number: params.number,
-                        offset: params.offset,
-                        flags: params.flags as u32,
-                        internal_flags: InternalFlags::from_extra0(params.internal_flags)
-                            .ok_or(Error::new(EINVAL))?,
-                    },
-                ))));
+        let maybe_inner = {
+            let handles = handles().read(token.token());
+            match handles.get(&scheme_id).ok_or(Error::new(EBADF))? {
+                Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => Some(inner.clone()),
+                Handle::SchemeCreationCapability => None,
+                _ => return Err(Error::new(EBADF)),
             }
-            Handle::SchemeCreationCapability => (),
-            _ => return Err(Error::new(EBADF)),
         };

+        if let Some(inner) = maybe_inner {
+            assert!(scheme_id == inner.scheme_id);
+            let params = unsafe { user_buf.read_exact::<NewFdParams>()? };
+
+            return Ok(OpenResult::External(Arc::new(RwLock::new(
+                FileDescription::new(
+                    scheme_id,
+                    params.number,
+                    params.offset,
+                    params.flags as u32,
+                    InternalFlags::from_extra0(params.internal_flags)
+                        .ok_or(Error::new(EINVAL))?,
+                    token,
+                ),
+            ))));
+        }
+
         const EXPECTED: &[u8] = b"create-scheme";
         let mut buf = [0u8; EXPECTED.len()];

@@ -777,6 +810,7 @@ pub struct CallerCtx {
     pub pid: usize,
     pub uid: u32,
     pub gid: u32,
+    pub groups: alloc::vec::Vec<u32>,
 }
 impl CallerCtx {
     pub fn filter_uid_gid(self, euid: u32, egid: u32) -> Self {
@@ -785,6 +819,7 @@ impl CallerCtx {
                 pid: self.pid,
                 uid: euid,
                 gid: egid,
+                groups: self.groups,
             }
         } else {
             self
diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs
index 47588e10..f38c4aec 100644
--- a/src/scheme/proc.rs
+++ b/src/scheme/proc.rs
@@ -105,6 +105,7 @@ enum ContextHandle {
     // Attr handles, to set ens/euid/egid/pid.
     Authority,
     Attr,
+    Groups,

     Status {
         privileged: bool,
@@ -261,6 +262,7 @@ impl ProcScheme {
                 let handle = match actual_name {
                     "attrs" => ContextHandle::Attr,
                     "status" => ContextHandle::Status { privileged: true },
+                    "groups" => ContextHandle::Groups,
                     _ => return Err(Error::new(ENOENT)),
                 };

@@ -306,6 +308,11 @@ impl ProcScheme {
                         let id = NonZeroUsize::new(NEXT_ID.fetch_add(1, Ordering::Relaxed))
                             .ok_or(Error::new(EMFILE))?;
                         let context = context::spawn(true, Some(id), ret, token)?;
+                        {
+                            let parent_groups =
+                                context::current().read(token.token()).groups.clone();
+                            context.write(token.token()).groups = parent_groups;
+                        }
                         HANDLES.write(token.token()).insert(
                             id.get(),
                             Handle {
@@ -849,17 +856,17 @@ impl KernelScheme for ProcScheme {
     }
 }
 fn extract_scheme_number(fd: usize, token: &mut CleanLockToken) -> Result<(KernelSchemes, usize)> {
-    let (scheme_id, number) = {
+    let desc = {
         let current_lock = context::current();
         let mut current = current_lock.read(token.token());
-        let (context, mut token) = current.token_split();
+        let (context, mut context_token) = current.token_split();
         let file_descriptor = context
-            .get_file(FileHandle::from(fd), &mut token)
+            .get_file(FileHandle::from(fd), &mut context_token)
             .ok_or(Error::new(EBADF))?;
-        let desc = file_descriptor.description.read(token.token());
-        (desc.scheme, desc.number)
+        *file_descriptor.description.read(context_token.token())
     };
-    let scheme = scheme::get_scheme(token.token(), scheme_id)?;
+    let scheme = desc.get_scheme(token)?;
+    let number = desc.number;

     Ok((scheme, number))
 }
@@ -1271,6 +1278,39 @@ impl ContextHandle {
                 guard.prio = (info.prio as usize).min(39);
                 Ok(size_of::<ProcSchemeAttrs>())
             }
+            Self::Groups => {
+                const NGROUPS_MAX: usize = 65536;
+                if buf.len() % size_of::<u32>() != 0 {
+                    return Err(Error::new(EINVAL));
+                }
+                let count = buf.len() / size_of::<u32>();
+                if count > NGROUPS_MAX {
+                    return Err(Error::new(EINVAL));
+                }
+                let mut groups = Vec::with_capacity(count);
+                for chunk in buf.in_exact_chunks(size_of::<u32>()).take(count) {
+                    groups.push(chunk.read_u32()?);
+                }
+                let proc_id = {
+                    let guard = context.read(token.token());
+                    guard.owner_proc_id
+                };
+                {
+                    let mut guard = context.write(token.token());
+                    guard.groups = groups.clone();
+                }
+                if let Some(pid) = proc_id {
+                    let mut contexts = context::contexts(token.downgrade());
+                    let (contexts, mut t) = contexts.token_split();
+                    for context_ref in contexts.iter() {
+                        let mut ctx = context_ref.write(t.token());
+                        if ctx.owner_proc_id == Some(pid) {
+                            ctx.groups = groups.clone();
+                        }
+                    }
+                }
+                Ok(count * size_of::<u32>())
+            }
             ContextHandle::OpenViaDup => {
                 let mut args = buf.usizes();

@@ -1475,6 +1515,15 @@ impl ContextHandle {
                     debug_name,
                 })
             }
+            Self::Groups => {
+                let c = &context.read(token.token());
+                let max = buf.len() / size_of::<u32>();
+                let count = c.groups.len().min(max);
+                for (chunk, gid) in buf.in_exact_chunks(size_of::<u32>()).zip(&c.groups).take(count) {
+                    chunk.copy_from_slice(&gid.to_ne_bytes())?;
+                }
+                Ok(count * size_of::<u32>())
+            }
             ContextHandle::Sighandler => {
                 let data = match context.read(token.token()).sig {
                     Some(ref sig) => SetSighandlerData {
diff --git a/src/scheme/user.rs b/src/scheme/user.rs
index b9013021..dfbf66b1 100644
--- a/src/scheme/user.rs
+++ b/src/scheme/user.rs
@@ -80,6 +80,7 @@ const ONE: NonZeroUsize = match NonZeroUsize::new(1) {
     Some(one) => one,
     None => unreachable!(),
 };
+const MAX_SPURIOUS_WAKEUPS: usize = 100;

 enum ParsedCqe {
     TriggerFevent {
@@ -209,6 +210,8 @@ impl UserInner {
         caller_responsible: &mut PageSpan,
         token: &mut CleanLockToken,
     ) -> Result<Response> {
+        let mut remaining_spurious_wakeups = MAX_SPURIOUS_WAKEUPS;
+
         {
             // Disable preemption to avoid context switches between setting the
             // process state and sending the scheme request. The process is made
@@ -261,7 +264,10 @@ impl UserInner {
                     };

                 let states = self.states.lock(token.token());
-                let (mut states, mut token) = states.into_split();
+                let (mut states, mut state_token) = states.into_split();
+                let mut timed_out_descriptions = None;
+                let mut remove_state = false;
+                let mut timed_out = false;
                 match states.get_mut(sqe.tag as usize) {
                     // invalid state
                     None => return Err(Error::new(EBADFD)),
@@ -274,24 +280,35 @@ impl UserInner {
                             fds,
                         } => {
                             let maybe_eintr =
-                                eintr_if_sigkill(&mut callee_responsible, &mut token.token());
-                            *o = State::Waiting {
-                                canceling: true,
-                                callee_responsible,
-                                context,
-                                fds,
-                            };
+                                eintr_if_sigkill(&mut callee_responsible, &mut state_token.token());

-                            maybe_eintr?;
+                            if maybe_eintr.is_ok() {
+                                remaining_spurious_wakeups =
+                                    remaining_spurious_wakeups.saturating_sub(1);
+                            }
+
+                            if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 {
+                                timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds));
+                                remove_state = true;
+                            } else {
+                                *o = State::Waiting {
+                                    canceling: true,
+                                    callee_responsible,
+                                    context,
+                                    fds,
+                                };
+                            }

-                            context::current()
-                                .write(token.token())
-                                .block("UserInner::call (woken up after cancelation request)");
+                            maybe_eintr?;

-                            // We do not want to drop the lock before blocking
-                            // as if we get preempted in between we might miss a
-                            // wakeup.
-                            drop(states);
+                            if remove_state {
+                                states.remove(sqe.tag as usize);
+                                timed_out = true;
+                            } else {
+                                context::current()
+                                    .write(state_token.token())
+                                    .block("UserInner::call (woken up after cancelation request)");
+                            }
                         }
                         // spurious wakeup
                         State::Waiting {
@@ -300,60 +317,76 @@ impl UserInner {
                             context,
                             mut callee_responsible,
                         } => {
-                            let maybe_eintr = eintr_if_sigkill(&mut callee_responsible, &mut token);
                             let current_context = context::current();
+                            let maybe_eintr =
+                                eintr_if_sigkill(&mut callee_responsible, &mut state_token);
+
+                            if maybe_eintr.is_ok() {
+                                remaining_spurious_wakeups =
+                                    remaining_spurious_wakeups.saturating_sub(1);
+                            }

-                            *o = State::Waiting {
-                                // Currently we treat all spurious wakeups to have the same behavior
-                                // as signals (i.e., we send a cancellation request). It is not something
-                                // that should happen, but it certainly can happen, for example if a context
-                                // is awoken through its thread handle without setting any sig bits, or if the
-                                // caller clears its own sig bits. If it actually is a signal, then it is the
-                                // intended behavior.
-                                canceling: true,
-                                fds,
-                                context,
-                                callee_responsible,
-                            };
+                            if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 {
+                                timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds));
+                                remove_state = true;
+                            } else {
+                                *o = State::Waiting {
+                                    // Currently we treat all spurious wakeups to have the same behavior
+                                    // as signals (i.e., we send a cancellation request). It is not something
+                                    // that should happen, but it certainly can happen, for example if a context
+                                    // is awoken through its thread handle without setting any sig bits, or if the
+                                    // caller clears its own sig bits. If it actually is a signal, then it is the
+                                    // intended behavior.
+                                    canceling: true,
+                                    fds,
+                                    context,
+                                    callee_responsible,
+                                };
+                            }

                             maybe_eintr?;

-                            // We do not want to preempt between sending the
-                            // cancellation and blocking again where we might
-                            // miss a wakeup.
-                            let mut preempt = PreemptGuardL1::new(&current_context, &mut token);
-                            let token = preempt.token();
-
-                            self.todo.send_locked(
-                                Sqe {
-                                    opcode: Opcode::Cancel as u8,
-                                    sqe_flags: SqeFlags::ONEWAY,
-                                    tag: sqe.tag,
-                                    ..Default::default()
-                                },
-                                token.token(),
-                            );
-                            event::trigger_locked(
-                                self.root_id,
-                                self.scheme_id.get(),
-                                EVENT_READ,
-                                token.token(),
-                            );
-
-                            // 1. If cancellation was requested and arrived
-                            // before the scheme processed the request, an
-                            // acknowledgement will be sent back after the
-                            // cancellation is processed and we will be woken up
-                            // again. State will be State::Responded then.
-                            //
-                            // 2. If cancellation was requested but the scheme
-                            // already processed the request, we will receive
-                            // the actual response next and woken up again.
-                            // State will be State::Responded then.
-                            context::current()
-                                .write(token.token())
-                                .block("UserInner::call (spurious wakeup)");
-                            drop(states);
+                            if remove_state {
+                                states.remove(sqe.tag as usize);
+                                timed_out = true;
+                            } else {
+                                // We do not want to preempt between sending the
+                                // cancellation and blocking again where we might
+                                // miss a wakeup.
+                                let mut preempt =
+                                    PreemptGuardL1::new(&current_context, &mut state_token);
+                                let token = preempt.token();
+
+                                self.todo.send_locked(
+                                    Sqe {
+                                        opcode: Opcode::Cancel as u8,
+                                        sqe_flags: SqeFlags::ONEWAY,
+                                        tag: sqe.tag,
+                                        ..Default::default()
+                                    },
+                                    token.token(),
+                                );
+                                event::trigger_locked(
+                                    self.root_id,
+                                    self.scheme_id.get(),
+                                    EVENT_READ,
+                                    token.token(),
+                                );
+
+                                // 1. If cancellation was requested and arrived
+                                // before the scheme processed the request, an
+                                // acknowledgement will be sent back after the
+                                // cancellation is processed and we will be woken up
+                                // again. State will be State::Responded then.
+                                //
+                                // 2. If cancellation was requested but the scheme
+                                // already processed the request, we will receive
+                                // the actual response next and woken up again.
+                                // State will be State::Responded then.
+                                context::current()
+                                    .write(token.token())
+                                    .block("UserInner::call (spurious wakeup)");
+                            }
                         }

                         // invalid state
@@ -368,7 +401,67 @@ impl UserInner {
                         }
                     },
                 }
+
+                if let Some(descriptions) = timed_out_descriptions {
+                    drop(states);
+                    for desc in descriptions {
+                        let _ = desc.try_close(token);
+                    }
+                }
+
+                if timed_out {
+                    return Err(Error::new(ETIMEDOUT));
+                }
+            }
+        }
+    }
+
+    fn collect_descriptions_to_close(
+        fds: Vec<Arc<LockedFileDescription>>,
+    ) -> Vec<FileDescription> {
+        fds.into_iter()
+            .filter_map(|fd| Arc::try_unwrap(fd).ok())
+            .map(RwLock::into_inner)
+            .collect()
+    }
+
+    pub fn fail_pending_calls(&self, token: &mut CleanLockToken) {
+        let descriptions_to_close = {
+            let mut states_lock = self.states.lock(token.token());
+            let (states, mut lock_token) = states_lock.token_split();
+            let mut descriptions_to_close = Vec::new();
+            let mut states_to_remove = Vec::new();
+
+            for (id, state) in states.iter_mut() {
+                match mem::replace(state, State::Placeholder) {
+                    State::Waiting { context, fds, .. } => {
+                        descriptions_to_close.extend(Self::collect_descriptions_to_close(fds));
+
+                        match context.upgrade() {
+                            Some(context) => {
+                                *state = State::Responded(Response::Regular(
+                                    Err(Error::new(ENODEV)),
+                                    0,
+                                    false,
+                                ));
+                                context.write(lock_token.token()).unblock();
+                            }
+                            None => states_to_remove.push(id),
+                        }
+                    }
+                    old_state => *state = old_state,
+                }
             }
+
+            for id in states_to_remove {
+                states.remove(id);
+            }
+
+            descriptions_to_close
+        };
+
+        for desc in descriptions_to_close {
+            let _ = desc.try_close(token);
         }
     }

@@ -1283,6 +1376,7 @@ impl UserInner {
     }

     pub fn into_drop(self, token: &mut CleanLockToken) {
+        self.fail_pending_calls(token);
         self.todo.condition.into_drop(token);
     }
 }
diff --git a/src/startup/memory.rs b/src/startup/memory.rs
index 26922dde..9fb5fb10 100644
--- a/src/startup/memory.rs
+++ b/src/startup/memory.rs
@@ -74,14 +74,16 @@ impl MemoryEntry {
 }

 struct MemoryMap {
-    entries: [MemoryEntry; 512],
+    entries: [MemoryEntry; 1024],
     size: usize,
 }

 impl MemoryMap {
     fn register(&mut self, base: usize, size: usize, kind: BootloaderMemoryKind) {
         if self.size >= self.entries.len() {
-            panic!("Early memory map overflow!");
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            unsafe { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'!', options(nostack, preserves_flags)); }
+            panic!("Early memory map overflow at entry {} (max {})", self.size, self.entries.len());
         }
         let start = if kind == BootloaderMemoryKind::Free {
             align_up(base)
@@ -134,7 +136,7 @@ static MEMORY_MAP: SyncUnsafeCell<MemoryMap> = SyncUnsafeCell::new(MemoryMap {
         start: 0,
         end: 0,
         kind: BootloaderMemoryKind::Null,
-    }; 512],
+    }; 1024],
     size: 0,
 });

@@ -323,7 +325,16 @@ unsafe fn map_memory<A: Arch>(areas: &[MemoryArea], mut bump_allocator: &mut Bum
             }
         }

-        let kernel_area = (*MEMORY_MAP.get()).kernel().unwrap();
+        let kernel_area = match (*MEMORY_MAP.get()).kernel() {
+            Some(area) => area,
+            None => {
+                println!("FATAL: kernel memory area not found in boot memory map");
+                println!("Cannot determine kernel base address. Halting.");
+                loop {
+                    core::hint::spin_loop();
+                }
+            }
+        };
         let kernel_base = kernel_area.start;
         let kernel_size = kernel_area.end.saturating_sub(kernel_area.start);
         // Map kernel at KERNEL_OFFSET
diff --git a/src/startup/mod.rs b/src/startup/mod.rs
index 8ad3cdf7..86aabc22 100644
--- a/src/startup/mod.rs
+++ b/src/startup/mod.rs
@@ -149,6 +149,15 @@ static BOOTSTRAP: spin::Once<Bootstrap> = spin::Once::new();
 pub(crate) static AP_READY: AtomicBool = AtomicBool::new(false);
 static BSP_READY: AtomicBool = AtomicBool::new(false);

+#[cold]
+fn halt_boot(message: &str) -> ! {
+    print!("{message}");
+    println!("Kernel boot cannot continue. Halting.");
+    loop {
+        hint::spin_loop();
+    }
+}
+
 /// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this
 pub(crate) fn kmain(bootstrap: Bootstrap) -> ! {
     let mut token = unsafe { CleanLockToken::new() };
@@ -180,9 +189,7 @@ pub(crate) fn kmain(bootstrap: Bootstrap) -> ! {
             context.euid = 0;
             context.egid = 0;
         }
-        Err(err) => {
-            panic!("failed to spawn userspace_init: {:?}", err);
-        }
+        Err(_err) => halt_boot("FATAL: failed to spawn first userspace process userspace_init\n"),
     }

     run_userspace(&mut token)
diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs
index bf984641..10c6a92c 100644
--- a/src/syscall/fs.rs
+++ b/src/syscall/fs.rs
@@ -12,7 +12,7 @@ use crate::{
         memory::{AddrSpace, GenericFlusher, Grant, PageSpan, TlbShootdownActions},
     },
     memory::{Page, VirtualAddress, PAGE_SIZE},
-    scheme::{self, FileHandle, KernelScheme, OpenResult, StrOrBytes},
+    scheme::{FileHandle, KernelScheme, OpenResult, StrOrBytes},
     sync::{CleanLockToken, RwLock},
     syscall::{data::Stat, error::*, flag::*},
 };
@@ -45,7 +45,7 @@ pub fn file_op_generic_ext<T>(
         (file, desc)
     };

-    let scheme = scheme::get_scheme(token.token(), desc.scheme)?;
+    let scheme = desc.get_scheme(token)?;

     op(&*scheme, file.description, desc, token)
 }
@@ -73,14 +73,18 @@ pub fn openat(
 ) -> Result<FileHandle> {
     let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?;

-    let (scheme_id, number) = {
+    let desc = {
         let current_lock = context::current();
         let mut current = current_lock.read(token.token());
-        let (context, mut token) = current.token_split();
-        let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?;
-        let desc = pipe.description.read(token.token());
-        (desc.scheme, desc.number)
+        let (context, mut context_token) = current.token_split();
+        let pipe = context
+            .get_file(fh, &mut context_token)
+            .ok_or(Error::new(EBADF))?;
+        *pipe.description.read(context_token.token())
     };
+    let scheme = desc.get_scheme(token)?;
+    let number = desc.number;
+    let scheme_id = desc.scheme;

     let caller_ctx = context::current()
         .read(token.token())
@@ -88,8 +92,6 @@ pub fn openat(
         .filter_uid_gid(euid, egid);

     let new_description = {
-        let scheme = scheme::get_scheme(token.token(), scheme_id)?;
-
         let res = scheme.kopenat(
             number,
             StrOrBytes::from_str(&path_buf),
@@ -101,13 +103,14 @@ pub fn openat(

         match res? {
             OpenResult::SchemeLocal(number, internal_flags) => {
-                Arc::new(RwLock::new(FileDescription {
-                    offset: 0,
-                    internal_flags,
-                    scheme: scheme_id,
+                Arc::new(RwLock::new(FileDescription::new(
+                    scheme_id,
                     number,
-                    flags: (flags & !O_CLOEXEC) as u32,
-                }))
+                    0,
+                    (flags & !O_CLOEXEC) as u32,
+                    internal_flags,
+                    token,
+                )))
             }
             OpenResult::External(desc) => desc,
         }
@@ -137,16 +140,17 @@ pub fn unlinkat(
 ) -> Result<()> {
     let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?;

-    let (number, scheme_id) = {
+    let desc = {
         let current_lock = context::current();
         let mut current = current_lock.read(token.token());
-        let (context, mut token) = current.token_split();
-        let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?;
-        let desc = pipe.description.read(token.token());
-        (desc.number, desc.scheme)
+        let (context, mut context_token) = current.token_split();
+        let pipe = context
+            .get_file(fh, &mut context_token)
+            .ok_or(Error::new(EBADF))?;
+        *pipe.description.read(context_token.token())
     };
-
-    let scheme = scheme::get_scheme(token.token(), scheme_id)?;
+    let number = desc.number;
+    let scheme = desc.get_scheme(token)?;

     let caller_ctx = context::current()
         .read(token.token())
@@ -199,17 +203,18 @@ fn duplicate_file(
         let description = { *file.description.read(token.token()) };

         let new_description = {
-            let scheme = scheme::get_scheme(token.token(), description.scheme)?;
+            let scheme = description.get_scheme(token)?;

             match scheme.kdup(description.number, user_buf, caller_ctx, token)? {
                 OpenResult::SchemeLocal(number, internal_flags) => {
-                    Arc::new(RwLock::new(FileDescription {
-                        offset: 0,
-                        internal_flags,
-                        scheme: description.scheme,
+                    Arc::new(RwLock::new(FileDescription::new(
+                        description.scheme,
                         number,
-                        flags: description.flags,
-                    }))
+                        0,
+                        description.flags,
+                        internal_flags,
+                        token,
+                    )))
                 }
                 OpenResult::External(desc) => desc,
             }
@@ -296,11 +301,10 @@ fn call_normal(
     }
     .ok_or(Error::new(EBADF))?;

-    let (scheme_id, number) = {
-        let desc = file.description.read(token.token());
-        (desc.scheme, desc.number)
+    let (scheme, number) = {
+        let desc = *file.description.read(token.token());
+        (desc.get_scheme(token)?, desc.number)
     };
-    let scheme = scheme::get_scheme(token.token(), scheme_id)?;

     if flags.contains(CallFlags::STD_FS) {
         scheme.translate_std_fs_call(number, file.description, payload, flags, metadata, token)
@@ -341,28 +345,28 @@ fn fdwrite_inner(
 ) -> Result<usize> {
     // TODO: Ensure deadlocks can't happen
     let (scheme, number, descs_to_send) = {
-        let (scheme, number) = {
+        let desc = {
             let current_lock = context::current();
             let mut current = current_lock.read(token.token());
-            let (context, mut token) = current.token_split();
+            let (context, mut context_token) = current.token_split();
             let file_descriptor = context
-                .get_file(socket, &mut token)
+                .get_file(socket, &mut context_token)
                 .ok_or(Error::new(EBADF))?;
-            let desc = &file_descriptor.description.read(token.token());
-            (desc.scheme, desc.number)
+            *file_descriptor.description.read(context_token.token())
         };
-        let scheme = scheme::get_scheme(token.token(), scheme)?;
+        let scheme = desc.get_scheme(token)?;
+        let number = desc.number;

         let current_lock = context::current();
         let mut current = current_lock.read(token.token());
-        let (context, mut token) = current.token_split();
+        let (context, mut context_token) = current.token_split();
         (
             scheme,
             number,
             if flags.contains(CallFlags::FD_CLONE) {
-                context.bulk_get_files(&target_fds, &mut token)
+                context.bulk_get_files(&target_fds, &mut context_token)
             } else {
-                context.bulk_remove_files(&target_fds, &mut token)
+                context.bulk_remove_files(&target_fds, &mut context_token)
             }?
             .into_iter()
             .map(|f| f.description)
@@ -395,18 +399,22 @@ fn call_fdread(
     metadata: &[u64],
     token: &mut CleanLockToken,
 ) -> Result<usize> {
+    let desc = {
+        let current_lock = context::current();
+        let mut current = current_lock.read(token.token());
+        let (context, mut context_token) = current.token_split();
+        let file_descriptor = context
+            .get_file(fd, &mut context_token)
+            .ok_or(Error::new(EBADF))?;
+        *file_descriptor.description.read(context_token.token())
+    };
     let (scheme, number) = {
-        let (scheme, number) = {
-            let current_lock = context::current();
-            let mut current = current_lock.read(token.token());
-            let (context, mut token) = current.token_split();
-            let file_descriptor = context.get_file(fd, &mut token).ok_or(Error::new(EBADF))?;
-            let desc = file_descriptor.description.read(token.token());
-            (desc.scheme, desc.number)
-        };
-        let scheme = scheme::get_scheme(token.token(), scheme)?;
-
-        (scheme, number)
+        let scheme = desc.get_scheme(token)?;
+        let number = desc.number;
+        (
+            scheme,
+            number,
+        )
     };

     scheme.kfdread(number, payload, flags, metadata, token)
@@ -440,9 +448,9 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken)
     }
     .ok_or(Error::new(EBADF))?;

-    let (scheme_id, number, flags) = {
-        let desc = file.description.write(token.token());
-        (desc.scheme, desc.number, desc.flags)
+    let (number, flags, desc) = {
+        let desc = *file.description.read(token.token());
+        (desc.number, desc.flags, desc)
     };

     if cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC {
@@ -460,7 +468,7 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken)

     // Communicate fcntl with scheme
     if cmd != F_GETFD && cmd != F_SETFD {
-        let scheme = scheme::get_scheme(token.token(), scheme_id)?;
+        let scheme = desc.get_scheme(token)?;

         scheme.fcntl(number, cmd, arg, token)?;
     };
@@ -518,13 +526,11 @@ pub fn flink(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken)
     let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?;
     let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?;

-    let (number, scheme_id) = {
-        let desc = file.description.read(token.token());
-        (desc.number, desc.scheme)
+    let (number, scheme) = {
+        let desc = *file.description.read(token.token());
+        (desc.number, desc.get_scheme(token)?)
     };

-    let scheme = scheme::get_scheme(token.token(), scheme_id)?;
-
     // TODO: Check EXDEV.
     /*
     if scheme_id != description.scheme {
@@ -554,13 +560,11 @@ pub fn frename(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken
     let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?;
     let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?;

-    let (number, scheme_id) = {
-        let desc = file.description.read(token.token());
-        (desc.number, desc.scheme)
+    let (number, scheme) = {
+        let desc = *file.description.read(token.token());
+        (desc.number, desc.get_scheme(token)?)
     };

-    let scheme = scheme::get_scheme(token.token(), scheme_id)?;
-
     // TODO: Check EXDEV.
     /*
     if scheme_id != description.scheme {
diff --git a/src/syscall/process.rs b/src/syscall/process.rs
index e83da427..8a1d385e 100644
--- a/src/syscall/process.rs
+++ b/src/syscall/process.rs
@@ -271,23 +271,26 @@ unsafe fn bootstrap_mem(bootstrap: &crate::startup::Bootstrap) -> &'static [u8]
 }

 fn insert_fd(scheme: SchemeId, number: usize, cloexec: bool, token: &mut CleanLockToken) -> usize {
+    let description = Arc::new(RwLock::new(FileDescription::new(
+        scheme,
+        number,
+        0,
+        (O_CREAT | O_RDWR) as u32,
+        InternalFlags::empty(),
+        token,
+    )));
+
     let current_lock = context::current();
     let mut current = current_lock.read(token.token());
-    let (context, mut token) = current.token_split();
+    let (context, mut context_token) = current.token_split();
     context
         .add_file_min(
             FileDescriptor {
-                description: Arc::new(RwLock::new(FileDescription {
-                    scheme,
-                    number,
-                    offset: 0,
-                    flags: (O_CREAT | O_RDWR) as u32,
-                    internal_flags: InternalFlags::empty(),
-                })),
+                description,
                 cloexec,
             },
             syscall::flag::UPPER_FDTBL_TAG + scheme.get(),
-            &mut token,
+            &mut context_token,
         )
         .expect("failed to insert fd to current context")
         .get()