diff --git a/local/patches/kernel/P0-canary.patch b/local/patches/kernel/P0-canary.patch deleted file mode 100644 index 9a915092e4..0000000000 --- a/local/patches/kernel/P0-canary.patch +++ /dev/null @@ -1,90 +0,0 @@ -diff --git a/src/arch/x86_shared/start.rs b/src/arch/x86_shared/start.rs -index 7a7c0ae8..f1dbb6b4 100644 ---- a/src/arch/x86_shared/start.rs -+++ b/src/arch/x86_shared/start.rs -@@ -82,6 +82,15 @@ extern "C" fn kstart() { - /// The entry to Rust, all things must be initialized - unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { - unsafe { -+ // EARLY CANARY: write 'R' to COM1 before any kernel init. -+ // This proves the serial hardware works and the kernel reached Rust entry. -+ // If this character appears but "Redox OS starting..." does not, -+ // the hang is in args_ptr.read(), serial::init(), or graphical_debug::init(). -+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -+ { -+ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'R', options(nostack, preserves_flags)); -+ } -+ - let bootstrap = { - let args = args_ptr.read(); - -@@ -91,27 +100,49 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { - // Set up graphical debug - graphical_debug::init(args.env()); - -+ // SECOND CANARY: write 'S' to COM1 after serial init. -+ // If 'R' appears but 'S' does not, the hang is in serial::init() or graphical_debug::init(). -+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -+ { -+ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'S', options(nostack, preserves_flags)); -+ } -+ - info!("Redox OS starting..."); - args.print(); - -+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'1', options(nostack, preserves_flags)); } -+ - // Set up GDT - gdt::init_bsp(stack_end); - -+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'2', options(nostack, preserves_flags)); } -+ - // Set up IDT - idt::init_bsp(); - -+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'3', options(nostack, preserves_flags)); } -+ - // Initialize RMM - #[cfg(target_arch = "x86")] - crate::startup::memory::init(&args, Some(0x100000), Some(0x40000000)); - #[cfg(target_arch = "x86_64")] - crate::startup::memory::init(&args, Some(0x100000), None); - -+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'4', options(nostack, preserves_flags)); } -+ - // Initialize paging - paging::init(); - - #[cfg(target_arch = "x86_64")] - crate::arch::alternative::early_init(true); - -+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'5', options(nostack, preserves_flags)); } -+ - // Set up syscall instruction - interrupt::syscall::init(); - -@@ -121,6 +152,9 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { - // Activate memory logging - crate::log::init(); - -+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'6', options(nostack, preserves_flags)); } -+ - // Initialize miscellaneous processor features - #[cfg(target_arch = "x86_64")] - crate::arch::misc::init(LogicalCpuId::BSP); -@@ -128,6 +162,9 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { - // Initialize devices - device::init(); - -+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'7', options(nostack, preserves_flags)); } -+ - // Read ACPI tables, starts APs - if cfg!(feature = "acpi") { - crate::acpi::init(args.acpi_rsdp()); diff --git a/local/patches/kernel/P0-eventfd-kernel.patch b/local/patches/kernel/P0-eventfd-kernel.patch index 0a16dab33d..f62b185404 100644 --- a/local/patches/kernel/P0-eventfd-kernel.patch +++ b/local/patches/kernel/P0-eventfd-kernel.patch @@ -34,10 +34,16 @@ index 7398145a..f4f57c23 100644 } diff --git a/src/scheme/event.rs b/src/scheme/event.rs -index 36efe5b2..62e46c99 100644 +index 36efe5b2..f2dc6276 100644 --- a/src/scheme/event.rs +++ b/src/scheme/event.rs -@@ -25,12 +25,26 @@ impl KernelScheme for EventScheme { +@@ -1,4 +1,5 @@ + use alloc::sync::Arc; ++use core::sync::atomic::Ordering; + use syscall::{EventFlags, O_NONBLOCK}; + + use crate::{ +@@ -25,12 +26,25 @@ impl KernelScheme for EventScheme { fn kopenat( &self, id: usize, @@ -49,9 +55,8 @@ index 36efe5b2..62e46c99 100644 token: &mut CleanLockToken, ) -> Result { + let path = match &user_buf { -+ StrOrBytes::Str(s) | StrOrBytes::Bytes(s) => { -+ core::str::from_utf8(s).unwrap_or("") -+ } ++ StrOrBytes::Str(s) => s, ++ StrOrBytes::Bytes(b) => core::str::from_utf8(b).unwrap_or(""), + }; + if path.starts_with("eventfd/") { + let rest = &path[8..]; // after "eventfd/" diff --git a/local/patches/kernel/P1-boot-path-diagnostics.patch b/local/patches/kernel/P1-boot-path-diagnostics.patch deleted file mode 100644 index d2750dd71c..0000000000 --- a/local/patches/kernel/P1-boot-path-diagnostics.patch +++ /dev/null @@ -1,219 +0,0 @@ -diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs -index 4dc2388..f472c08 100644 ---- a/src/acpi/madt/arch/x86.rs -+++ b/src/acpi/madt/arch/x86.rs -@@ -20,0 +21 @@ use super::{Madt, MadtEntry}; -+const AP_SPIN_LIMIT: u32 = 1_000_000; -@@ -45,7 +46,11 @@ pub(super) fn init(madt: Madt) { -- let result = mapper -- .map_phys( -- trampoline_page.start_address(), -- trampoline_frame.base(), -- PageFlags::new().execute(true).write(true), -- ) -- .expect("failed to map trampoline"); -+ let result = match mapper.map_phys( -+ trampoline_page.start_address(), -+ trampoline_frame.base(), -+ PageFlags::new().execute(true).write(true), -+ ) { -+ Some(result) => result, -+ None => { -+ println!("KERNEL AP: failed to map trampoline page, AP bring-up disabled"); -+ return; -+ } -+ }; -@@ -75,2 +79,0 @@ pub(super) fn init(madt: Madt) { -- let cpu_id = LogicalCpuId::next(); -- -@@ -78,6 +81,8 @@ pub(super) fn init(madt: Madt) { -- let stack_start = RmmA::phys_to_virt( -- allocate_p2frame(4) -- .expect("no more frames in acpi stack_start") -- .base(), -- ) -- .data(); -+ let alloc = match allocate_p2frame(4) { -+ Some(frame) => frame, -+ None => { -+ println!("KERNEL AP: CPU {} no memory for stack, skipping", ap_local_apic.id); -+ continue; -+ } -+ }; -+ let stack_start = RmmA::phys_to_virt(alloc.base()).data(); -@@ -85,0 +91,10 @@ pub(super) fn init(madt: Madt) { -+ let next_cpu = crate::CPU_COUNT.load(Ordering::Relaxed); -+ if next_cpu >= crate::cpu_set::MAX_CPU_COUNT { -+ println!( -+ "KERNEL AP: CPU {} exceeds logical CPU limit, skipping", -+ ap_local_apic.id -+ ); -+ continue; -+ } -+ let cpu_id = LogicalCpuId::new(next_cpu); -+ -@@ -140,2 +155,7 @@ pub(super) fn init(madt: Madt) { -- // Wait for trampoline ready -- while unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } == 0 { -+ // Wait for trampoline ready with timeout -+ let mut trampoline_ready = false; -+ for _ in 0..AP_SPIN_LIMIT { -+ if unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } != 0 { -+ trampoline_ready = true; -+ break; -+ } -@@ -144 +164,11 @@ pub(super) fn init(madt: Madt) { -- while !AP_READY.load(Ordering::SeqCst) { -+ if !trampoline_ready { -+ println!("KERNEL AP: CPU {} trampoline timeout, skipping", ap_local_apic.id); -+ continue; -+ } -+ -+ let mut kernel_ready = false; -+ for _ in 0..AP_SPIN_LIMIT { -+ if AP_READY.load(Ordering::SeqCst) { -+ kernel_ready = true; -+ break; -+ } -@@ -146,0 +177,6 @@ pub(super) fn init(madt: Madt) { -+ if !kernel_ready { -+ println!("KERNEL AP: CPU {} AP_READY timeout, skipping", ap_local_apic.id); -+ continue; -+ } -+ -+ crate::CPU_COUNT.fetch_add(1, Ordering::Relaxed); -@@ -154 +190 @@ pub(super) fn init(madt: Madt) { -- let (_frame, _, flush) = unsafe { -+ if let Some((_frame, _, flush)) = unsafe { -@@ -157,3 +193,5 @@ pub(super) fn init(madt: Madt) { -- .expect("failed to unmap trampoline page") -- }; -- flush.flush(); -+ } { -+ flush.flush(); -+ } else { -+ println!("KERNEL AP: failed to unmap trampoline page (non-fatal)"); -+ } -diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs -index 4fdb0ba..aaa7196 100644 ---- a/src/allocator/mod.rs -+++ b/src/allocator/mod.rs -@@ -9,0 +10,9 @@ const KERNEL_HEAP_SIZE: usize = ::rmm::MEGABYTE; -+#[cold] -+fn halt_kernel_heap_init(message: &str) -> ! { -+ print!("{message}"); -+ println!("Kernel heap initialization cannot continue. Halting."); -+ loop { -+ core::hint::spin_loop(); -+ } -+} -+ -@@ -16,4 +25,6 @@ unsafe fn map_heap(mapper: &mut KernelMapper, offset: usize, size: usize) -- let phys = mapper -- .allocator_mut() -- .allocate_one() -- .expect("failed to allocate kernel heap"); -+ let phys = match mapper.allocator_mut().allocate_one() { -+ Some(phys) => phys, -+ None => halt_kernel_heap_init( -+ "FATAL: failed to allocate physical frame for kernel heap\n", -+ ), -+ }; -@@ -21,9 +32,12 @@ unsafe fn map_heap(mapper: &mut KernelMapper, offset: usize, size: usize) -- mapper -- .map_phys( -- page.start_address(), -- phys, -- PageFlags::new() -- .write(true) -- .global(cfg!(not(feature = "pti"))), -- ) -- .expect("failed to map kernel heap") -+ match mapper.map_phys( -+ page.start_address(), -+ phys, -+ PageFlags::new() -+ .write(true) -+ .global(cfg!(not(feature = "pti"))), -+ ) { -+ Some(flush) => flush, -+ None => halt_kernel_heap_init( -+ "FATAL: failed to map kernel heap virtual page\n", -+ ), -+ } -diff --git a/src/arch/x86_shared/gdt.rs b/src/arch/x86_shared/gdt.rs -index cad344f..f7acae3 100644 ---- a/src/arch/x86_shared/gdt.rs -+++ b/src/arch/x86_shared/gdt.rs -@@ -194,0 +195,9 @@ impl ProcessorControlRegion { -+#[cold] -+fn halt_pcr_init() -> ! { -+ println!("FATAL: failed to allocate physical memory for Processor Control Region"); -+ println!("Processor startup cannot continue. Halting."); -+ loop { -+ core::hint::spin_loop(); -+ } -+} -+ -@@ -378 +387,4 @@ pub fn allocate_and_init_pcr( -- let pcr_frame = crate::memory::allocate_p2frame(alloc_order).expect("failed to allocate PCR"); -+ let pcr_frame = match crate::memory::allocate_p2frame(alloc_order) { -+ Some(frame) => frame, -+ None => halt_pcr_init(), -+ }; -diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs -index 5006458..47f692f 100644 ---- a/src/arch/x86_shared/idt.rs -+++ b/src/arch/x86_shared/idt.rs -@@ -80,0 +81,9 @@ pub(crate) static IDTS: RwLock> = -+#[cold] -+fn halt_idt_init() -> ! { -+ println!("FATAL: failed to allocate physical pages for backup interrupt stack"); -+ println!("Interrupt setup cannot continue. Halting."); -+ loop { -+ core::hint::spin_loop(); -+ } -+} -+ -@@ -164,2 +173,4 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt { -- let frames = crate::memory::allocate_p2frame(4) -- .expect("failed to allocate pages for backup interrupt stack"); -+ let frames = match crate::memory::allocate_p2frame(4) { -+ Some(frames) => frames, -+ None => halt_idt_init(), -+ }; -diff --git a/src/startup/memory.rs b/src/startup/memory.rs -index 26922dd..f271200 100644 ---- a/src/startup/memory.rs -+++ b/src/startup/memory.rs -@@ -326 +326,10 @@ unsafe fn map_memory(areas: &[MemoryArea], mut bump_allocator: &mut Bum -- let kernel_area = (*MEMORY_MAP.get()).kernel().unwrap(); -+ let kernel_area = match (*MEMORY_MAP.get()).kernel() { -+ Some(area) => area, -+ None => { -+ println!("FATAL: kernel memory area not found in boot memory map"); -+ println!("Cannot determine kernel base address. Halting."); -+ loop { -+ core::hint::spin_loop(); -+ } -+ } -+ }; -diff --git a/src/startup/mod.rs b/src/startup/mod.rs -index 8ad3cdf..86aabc2 100644 ---- a/src/startup/mod.rs -+++ b/src/startup/mod.rs -@@ -151,0 +152,9 @@ static BSP_READY: AtomicBool = AtomicBool::new(false); -+#[cold] -+fn halt_boot(message: &str) -> ! { -+ print!("{message}"); -+ println!("Kernel boot cannot continue. Halting."); -+ loop { -+ hint::spin_loop(); -+ } -+} -+ -@@ -183,3 +192 @@ pub(crate) fn kmain(bootstrap: Bootstrap) -> ! { -- Err(err) => { -- panic!("failed to spawn userspace_init: {:?}", err); -- } -+ Err(_err) => halt_boot("FATAL: failed to spawn first userspace process userspace_init\n"), diff --git a/local/patches/kernel/P1-ioapic-hpet-nmi-v2.patch b/local/patches/kernel/P1-ioapic-hpet-nmi-v2.patch new file mode 100644 index 0000000000..d18783593c --- /dev/null +++ b/local/patches/kernel/P1-ioapic-hpet-nmi-v2.patch @@ -0,0 +1,691 @@ +diff --git a/src/acpi/madt/mod.rs b/src/acpi/madt/mod.rs +index 3159b9c49..e792b2e6c 100644 +--- a/src/acpi/madt/mod.rs ++++ b/src/acpi/madt/mod.rs +@@ -146,6 +146,48 @@ pub struct MadtGicd { + _reserved2: [u8; 3], + } + ++/// MADT Local x2APIC (entry type 0x9) ++#[derive(Clone, Copy, Debug)] ++#[repr(C, packed)] ++pub struct MadtLocalX2Apic { ++ _reserved: u16, ++ pub x2apic_id: u32, ++ pub flags: u32, ++ pub processor_uid: u32, ++} ++ ++/// MADT Local APIC NMI (entry type 0x4) ++#[derive(Clone, Copy, Debug)] ++#[repr(C, packed)] ++pub struct MadtLocalApicNmi { ++ pub processor: u8, ++ pub flags: u16, ++ pub nmi_pin: u8, ++} ++ ++/// MADT Local APIC address override (entry type 0x5) ++#[derive(Clone, Copy, Debug)] ++#[repr(C, packed)] ++pub struct MadtLapicAddressOverride { ++ _reserved: u16, ++ pub local_apic_address: u64, ++} ++ ++/// MADT Local x2APIC NMI (entry type 0xA) ++#[derive(Clone, Copy, Debug)] ++#[repr(C, packed)] ++pub struct MadtLocalX2ApicNmi { ++ _reserved: u16, ++ pub processor_uid: u32, ++ pub flags: u16, ++ pub nmi_pin: u8, ++ _reserved2: u8, ++} ++ ++const _: () = assert!(size_of::() == 4); ++const _: () = assert!(size_of::() == 10); ++const _: () = assert!(size_of::() == 10); ++ + /// MADT Entries + #[derive(Debug)] + #[allow(dead_code)] +@@ -156,10 +198,18 @@ pub enum MadtEntry { + InvalidIoApic(usize), + IntSrcOverride(&'static MadtIntSrcOverride), + InvalidIntSrcOverride(usize), ++ LocalApicNmi(&'static MadtLocalApicNmi), ++ InvalidLocalApicNmi(usize), ++ LapicAddressOverride(&'static MadtLapicAddressOverride), ++ InvalidLapicAddressOverride(usize), + Gicc(&'static MadtGicc), + InvalidGicc(usize), + Gicd(&'static MadtGicd), + InvalidGicd(usize), ++ LocalX2Apic(&'static MadtLocalX2Apic), ++ InvalidLocalX2Apic(usize), ++ LocalX2ApicNmi(&'static MadtLocalX2ApicNmi), ++ InvalidLocalX2ApicNmi(usize), + Unknown(u8), + } + +@@ -176,6 +226,10 @@ impl Iterator for MadtIter { + let entry_len = + unsafe { *(self.sdt.data_address() as *const u8).add(self.i + 1) } as usize; + ++ if entry_len < 2 { ++ return None; ++ } ++ + if self.i + entry_len <= self.sdt.data_len() { + let item = match entry_type { + 0x0 => { +@@ -206,6 +260,46 @@ impl Iterator for MadtIter { + MadtEntry::InvalidIntSrcOverride(entry_len) + } + } ++ 0x4 => { ++ if entry_len == size_of::() + 2 { ++ MadtEntry::LocalApicNmi(unsafe { ++ &*((self.sdt.data_address() + self.i + 2) ++ as *const MadtLocalApicNmi) ++ }) ++ } else { ++ MadtEntry::InvalidLocalApicNmi(entry_len) ++ } ++ } ++ 0x5 => { ++ if entry_len == size_of::() + 2 { ++ MadtEntry::LapicAddressOverride(unsafe { ++ &*((self.sdt.data_address() + self.i + 2) ++ as *const MadtLapicAddressOverride) ++ }) ++ } else { ++ MadtEntry::InvalidLapicAddressOverride(entry_len) ++ } ++ } ++ 0x9 => { ++ if entry_len == size_of::() + 2 { ++ MadtEntry::LocalX2Apic(unsafe { ++ &*((self.sdt.data_address() + self.i + 2) ++ as *const MadtLocalX2Apic) ++ }) ++ } else { ++ MadtEntry::InvalidLocalX2Apic(entry_len) ++ } ++ } ++ 0xA => { ++ if entry_len == size_of::() + 2 { ++ MadtEntry::LocalX2ApicNmi(unsafe { ++ &*((self.sdt.data_address() + self.i + 2) ++ as *const MadtLocalX2ApicNmi) ++ }) ++ } else { ++ MadtEntry::InvalidLocalX2ApicNmi(entry_len) ++ } ++ } + 0xB => { + if entry_len >= size_of::() + 2 { + MadtEntry::Gicc(unsafe { +diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs +index f472c0886..e8625a205 100644 +--- a/src/acpi/madt/arch/x86.rs ++++ b/src/acpi/madt/arch/x86.rs +@@ -10,8 +10,8 @@ use crate::{ + }, + cpu_set::LogicalCpuId, + memory::{ +- allocate_p2frame, Frame, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch, +- VirtualAddress, PAGE_SIZE, ++ allocate_p2frame, map_device_memory, Frame, KernelMapper, Page, PageFlags, ++ PhysicalAddress, RmmA, RmmArch, VirtualAddress, PAGE_SIZE, + }, + startup::AP_READY, + }; +@@ -22,6 +22,34 @@ const AP_SPIN_LIMIT: u32 = 1_000_000; + const TRAMPOLINE: usize = 0x8000; + static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline")); + ++fn current_x2apic_processor_uid(madt: &Madt, apic_id: u32) -> Option { ++ madt.iter().find_map(|entry| match entry { ++ MadtEntry::LocalX2Apic(x2apic) if x2apic.x2apic_id == apic_id => Some(x2apic.processor_uid), ++ _ => None, ++ }) ++} ++ ++fn apply_lapic_address_override( ++ local_apic: &mut crate::arch::device::local_apic::LocalApic, ++ address: u64, ++) { ++ if local_apic.x2 || address == 0 { ++ return; ++ } ++ ++ let Ok(physaddr) = usize::try_from(address) else { ++ warn!( ++ "Ignoring LAPIC address override {:#x}: does not fit host usize", ++ address ++ ); ++ return; ++ }; ++ ++ let mapped = unsafe { map_device_memory(PhysicalAddress::new(physaddr), 4096) }.data(); ++ local_apic.address = mapped; ++ debug!("Applied LAPIC address override: {:#x}", address); ++} ++ + pub(super) fn init(madt: Madt) { + let local_apic = unsafe { the_local_apic() }; + let me = local_apic.id(); +@@ -67,7 +95,14 @@ pub(super) fn init(madt: Madt) { + } + + unsafe { +- let preliminary_cpu_count = madt.iter().filter(|e| matches!(e, MadtEntry::LocalApic(entry) if u32::from(entry.id) == me.get() || entry.flags & 1 == 1)).count(); ++ let preliminary_cpu_count = madt ++ .iter() ++ .filter(|entry| match entry { ++ MadtEntry::LocalApic(local) => u32::from(local.id) == me.get() || local.flags & 1 == 1, ++ MadtEntry::LocalX2Apic(local) => local.x2apic_id == me.get() || local.flags & 1 == 1, ++ _ => false, ++ }) ++ .count(); + crate::profiling::allocate(preliminary_cpu_count as u32); + } + +@@ -183,6 +218,127 @@ pub(super) fn init(madt: Madt) { + + RmmA::invalidate_all(); + } ++ } else if let MadtEntry::LocalX2Apic(ap_x2apic) = madt_entry { ++ let apic_id = ap_x2apic.x2apic_id; ++ let flags = ap_x2apic.flags; ++ ++ if apic_id == me.get() { ++ debug!(" This is my local x2APIC"); ++ } else if flags & 1 == 1 { ++ let alloc = match allocate_p2frame(4) { ++ Some(frame) => frame, ++ None => { ++ println!("KERNEL AP: CPU {} no memory for stack, skipping", apic_id); ++ continue; ++ } ++ }; ++ let stack_start = RmmA::phys_to_virt(alloc.base()).data(); ++ let stack_end = stack_start + (PAGE_SIZE << 4); ++ ++ let next_cpu = crate::CPU_COUNT.load(Ordering::Relaxed); ++ if next_cpu >= crate::cpu_set::MAX_CPU_COUNT { ++ println!( ++ "KERNEL AP: CPU {} exceeds logical CPU limit, skipping", ++ apic_id ++ ); ++ continue; ++ } ++ let cpu_id = LogicalCpuId::new(next_cpu); ++ ++ let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end); ++ let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id); ++ ++ let args = KernelArgsAp { ++ stack_end: stack_end as *mut u8, ++ cpu_id, ++ pcr_ptr, ++ idt_ptr, ++ }; ++ ++ let ap_ready = (TRAMPOLINE + 8) as *mut u64; ++ let ap_args_ptr = unsafe { ap_ready.add(1) }; ++ let ap_page_table = unsafe { ap_ready.add(2) }; ++ let ap_code = unsafe { ap_ready.add(3) }; ++ ++ unsafe { ++ ap_ready.write(0); ++ ap_args_ptr.write(&args as *const _ as u64); ++ ap_page_table.write(page_table_physaddr as u64); ++ #[expect(clippy::fn_to_numeric_cast)] ++ ap_code.write(kstart_ap as u64); ++ core::arch::asm!(""); ++ } ++ AP_READY.store(false, Ordering::SeqCst); ++ ++ { ++ let mut icr = 0x4500u64; ++ icr |= u64::from(apic_id) << 32; ++ local_apic.set_icr(icr); ++ } ++ ++ for _ in 0..100_000 { ++ hint::spin_loop(); ++ } ++ ++ { ++ let ap_segment = (TRAMPOLINE >> 12) & 0xFF; ++ let mut icr = 0x4600u64 | ap_segment as u64; ++ icr |= u64::from(apic_id) << 32; ++ local_apic.set_icr(icr); ++ } ++ ++ for _ in 0..2_000_000 { ++ hint::spin_loop(); ++ } ++ ++ { ++ let ap_segment = (TRAMPOLINE >> 12) & 0xFF; ++ let mut icr = 0x4600u64 | ap_segment as u64; ++ icr |= u64::from(apic_id) << 32; ++ local_apic.set_icr(icr); ++ } ++ ++ let mut trampoline_ready = false; ++ for _ in 0..AP_SPIN_LIMIT { ++ if unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } != 0 { ++ trampoline_ready = true; ++ break; ++ } ++ hint::spin_loop(); ++ } ++ if !trampoline_ready { ++ println!("KERNEL AP: CPU {} trampoline timeout, skipping", apic_id); ++ continue; ++ } ++ ++ let mut kernel_ready = false; ++ for _ in 0..AP_SPIN_LIMIT { ++ if AP_READY.load(Ordering::SeqCst) { ++ kernel_ready = true; ++ break; ++ } ++ hint::spin_loop(); ++ } ++ if !kernel_ready { ++ println!("KERNEL AP: CPU {} AP_READY timeout, skipping", apic_id); ++ continue; ++ } ++ ++ crate::CPU_COUNT.fetch_add(1, Ordering::Relaxed); ++ RmmA::invalidate_all(); ++ } ++ } else if let MadtEntry::LocalApicNmi(nmi) = madt_entry { ++ let target_apic = nmi.processor; ++ if target_apic == 0xFF || target_apic == local_apic.id().get() as u8 { ++ unsafe { local_apic.set_lvt_nmi(nmi.nmi_pin, nmi.flags) }; ++ } ++ } else if let MadtEntry::LocalX2ApicNmi(nmi) = madt_entry { ++ let current_uid = current_x2apic_processor_uid(&madt, me.get()); ++ if nmi.processor_uid == u32::MAX || current_uid == Some(nmi.processor_uid) { ++ unsafe { local_apic.set_lvt_nmi(nmi.nmi_pin, nmi.flags) }; ++ } ++ } else if let MadtEntry::LapicAddressOverride(override_entry) = madt_entry { ++ apply_lapic_address_override(local_apic, override_entry.local_apic_address); + } + } + +diff --git a/src/arch/x86_shared/device/ioapic.rs b/src/arch/x86_shared/device/ioapic.rs +index fb66d3bf2..cd34c03b9 100644 +--- a/src/arch/x86_shared/device/ioapic.rs ++++ b/src/arch/x86_shared/device/ioapic.rs +@@ -14,6 +14,10 @@ pub struct IoApicRegs { + pointer: *const u32, + } + impl IoApicRegs { ++ fn redirection_index_valid(&mut self, idx: u8) -> bool { ++ idx <= self.max_redirection_table_entries() ++ } ++ + fn ioregsel(&self) -> *const u32 { + self.pointer + } +@@ -44,21 +48,28 @@ impl IoApicRegs { + pub fn read_ioapicver(&mut self) -> u32 { + self.read_reg(0x01) + } +- pub fn read_ioredtbl(&mut self, idx: u8) -> u64 { +- assert!(idx < 24); ++ pub fn read_ioredtbl(&mut self, idx: u8) -> Option { ++ if !self.redirection_index_valid(idx) { ++ warn!("IOAPIC read_ioredtbl index {} out of range", idx); ++ return None; ++ } + let lo = self.read_reg(0x10 + idx * 2); + let hi = self.read_reg(0x10 + idx * 2 + 1); + +- u64::from(lo) | (u64::from(hi) << 32) ++ Some(u64::from(lo) | (u64::from(hi) << 32)) + } +- pub fn write_ioredtbl(&mut self, idx: u8, value: u64) { +- assert!(idx < 24); ++ pub fn write_ioredtbl(&mut self, idx: u8, value: u64) -> bool { ++ if !self.redirection_index_valid(idx) { ++ warn!("IOAPIC write_ioredtbl index {} out of range", idx); ++ return false; ++ } + + let lo = value as u32; + let hi = (value >> 32) as u32; + + self.write_reg(0x10 + idx * 2, lo); + self.write_reg(0x10 + idx * 2 + 1, hi); ++ true + } + + pub fn max_redirection_table_entries(&mut self) -> u8 { +@@ -92,17 +103,22 @@ impl IoApic { + } + /// Map an interrupt vector to a physical local APIC ID of a processor (thus physical mode). + #[allow(dead_code)] +- pub fn map(&self, idx: u8, info: MapInfo) { +- self.regs.lock().write_ioredtbl(idx, info.as_raw()) ++ pub fn map(&self, idx: u8, info: MapInfo) -> bool { ++ let Some(raw) = info.as_raw() else { ++ return false; ++ }; ++ self.regs.lock().write_ioredtbl(idx, raw) + } + pub fn set_mask(&self, gsi: u32, mask: bool) { + let idx = (gsi - self.gsi_start) as u8; + let mut guard = self.regs.lock(); + +- let mut reg = guard.read_ioredtbl(idx); ++ let Some(mut reg) = guard.read_ioredtbl(idx) else { ++ return; ++ }; + reg &= !(1 << 16); + reg |= u64::from(mask) << 16; +- guard.write_ioredtbl(idx, reg); ++ let _ = guard.write_ioredtbl(idx, reg); + } + } + +@@ -149,19 +165,26 @@ pub struct MapInfo { + } + + impl MapInfo { +- pub fn as_raw(&self) -> u64 { +- assert!(self.vector >= 0x20); +- assert!(self.vector <= 0xFE); ++ pub fn as_raw(&self) -> Option { ++ if !(0x20..=0xFE).contains(&self.vector) { ++ warn!( ++ "Refusing to map IOAPIC vector outside valid range: {:#x}", ++ self.vector ++ ); ++ return None; ++ } + + // TODO: Check for reserved fields. + +- (u64::from(self.dest.get()) << 56) ++ Some( ++ (u64::from(self.dest.get()) << 56) + | (u64::from(self.mask) << 16) + | ((self.trigger_mode as u64) << 15) + | ((self.polarity as u64) << 13) + | ((self.dest_mode as u64) << 11) + | ((self.delivery_mode as u64) << 8) +- | u64::from(self.vector) ++ | u64::from(self.vector), ++ ) + } + } + +@@ -175,7 +198,7 @@ impl fmt::Debug for IoApic { + + let count = guard.max_redirection_table_entries(); + f.debug_list() +- .entries((0..count).map(|i| guard.read_ioredtbl(i))) ++ .entries((0..=count).filter_map(|i| guard.read_ioredtbl(i))) + .finish() + } + } +@@ -237,11 +260,14 @@ pub unsafe fn handle_ioapic(madt_ioapic: &'static MadtIoApic) { + let ioapic_registers = virt.data() as *const u32; + let ioapic = IoApic::new(ioapic_registers, madt_ioapic.gsi_base); + +- assert_eq!( +- ioapic.regs.lock().id(), +- madt_ioapic.id, +- "mismatched ACPI MADT I/O APIC ID, and the ID reported by the I/O APIC" +- ); ++ let detected_id = ioapic.regs.lock().id(); ++ if detected_id != madt_ioapic.id { ++ warn!( ++ "mismatched ACPI MADT I/O APIC ID: MADT={}, IOAPIC={}; continuing with detected hardware", ++ madt_ioapic.id, ++ detected_id ++ ); ++ } + + (*IOAPICS.get()).get_or_insert_with(Vec::new).push(ioapic); + } +@@ -310,11 +336,11 @@ pub unsafe fn init() { + } + } + } +- println!( +- "I/O APICs: {:?}, overrides: {:?}", +- ioapics(), +- src_overrides() +- ); ++ for ioapic in ioapics() { ++ for idx in 0..=ioapic.count { ++ ioapic.set_mask(ioapic.gsi_start + u32::from(idx), true); ++ } ++ } + + // map the legacy PC-compatible IRQs (0-15) to 32-47, just like we did with 8259 PIC (if it + // wouldn't have been disabled due to this I/O APIC) +@@ -329,7 +355,6 @@ pub unsafe fn init() { + .iter() + .any(|over| over.bus_irq == legacy_irq) + { +- // there's an IRQ conflict, making this legacy IRQ inaccessible. + continue; + } + ( +@@ -349,7 +374,6 @@ pub unsafe fn init() { + let redir_tbl_index = (gsi - apic.gsi_start) as u8; + + let map_info = MapInfo { +- // only send to the BSP + dest: bsp_apic_id, + dest_mode: DestinationMode::Physical, + delivery_mode: DeliveryMode::Fixed, +@@ -366,7 +390,32 @@ pub unsafe fn init() { + }, + vector: 32 + legacy_irq, + }; +- apic.map(redir_tbl_index, map_info); ++ if !apic.map(redir_tbl_index, map_info) { ++ warn!( ++ "Unable to map legacy IRQ {} (GSI {}) through IOAPIC index {}", ++ legacy_irq, ++ gsi, ++ redir_tbl_index ++ ); ++ } ++ ++ if legacy_irq == 0 && gsi != u32::from(legacy_irq) { ++ if let Some(apic0) = find_ioapic(u32::from(legacy_irq)) { ++ let idx0 = (u32::from(legacy_irq) - apic0.gsi_start) as u8; ++ let _ = apic0.map( ++ idx0, ++ MapInfo { ++ dest: bsp_apic_id, ++ dest_mode: DestinationMode::Physical, ++ delivery_mode: DeliveryMode::Fixed, ++ mask: false, ++ polarity: ApicPolarity::ActiveHigh, ++ trigger_mode: ApicTriggerMode::Edge, ++ vector: 32, ++ }, ++ ); ++ } ++ } + } + println!( + "I/O APICs: {:?}, overrides: {:?}", +@@ -406,7 +455,7 @@ fn resolve(irq: u8) -> u32 { + fn find_ioapic(gsi: u32) -> Option<&'static IoApic> { + ioapics() + .iter() +- .find(|apic| gsi >= apic.gsi_start && gsi < apic.gsi_start + u32::from(apic.count)) ++ .find(|apic| gsi >= apic.gsi_start && gsi <= apic.gsi_start + u32::from(apic.count)) + } + + pub unsafe fn mask(irq: u8) { +diff --git a/src/arch/x86_shared/device/local_apic.rs b/src/arch/x86_shared/device/local_apic.rs +index b6afe02af..b300e6fea 100644 +--- a/src/arch/x86_shared/device/local_apic.rs ++++ b/src/arch/x86_shared/device/local_apic.rs +@@ -103,7 +103,7 @@ impl LocalApic { + ApicId::new(if self.x2 { + unsafe { rdmsr(IA32_X2APIC_APICID) as u32 } + } else { +- unsafe { self.read(0x20) } ++ unsafe { self.read(0x20) >> 24 } + }) + } + +@@ -126,7 +126,14 @@ impl LocalApic { + pub fn set_icr(&mut self, value: u64) { + if self.x2 { + unsafe { ++ const PENDING: u32 = 1 << 12; ++ while (rdmsr(IA32_X2APIC_ICR) as u32) & PENDING == PENDING { ++ core::hint::spin_loop(); ++ } + wrmsr(IA32_X2APIC_ICR, value); ++ while (rdmsr(IA32_X2APIC_ICR) as u32) & PENDING == PENDING { ++ core::hint::spin_loop(); ++ } + } + } else { + unsafe { +@@ -256,6 +263,39 @@ impl LocalApic { + } + } + } ++ ++ pub unsafe fn set_lvt_nmi(&mut self, pin: u8, flags: u16) { ++ let polarity = match flags & 0b11 { ++ 0b11 => 1 << 13, ++ _ => 0, ++ }; ++ let trigger_mode = match (flags >> 2) & 0b11 { ++ 0b11 => 1 << 15, ++ _ => 0, ++ }; ++ let lvt_value = (0b100 << 8) | polarity | trigger_mode; ++ ++ unsafe { ++ match pin { ++ 0 => { ++ if self.x2 { ++ wrmsr(IA32_X2APIC_LVT_LINT0, u64::from(lvt_value)); ++ } else { ++ self.write(0x350, lvt_value); ++ } ++ } ++ 1 => { ++ if self.x2 { ++ wrmsr(IA32_X2APIC_LVT_LINT1, u64::from(lvt_value)); ++ } else { ++ self.write(0x360, lvt_value); ++ } ++ } ++ _ => {} ++ } ++ } ++ } ++ + unsafe fn setup_error_int(&mut self) { + unsafe { + let vector = 49u32; +diff --git a/src/arch/x86_shared/device/mod.rs b/src/arch/x86_shared/device/mod.rs +index 7a2e25df3..a1e0b78ad 100644 +--- a/src/arch/x86_shared/device/mod.rs ++++ b/src/arch/x86_shared/device/mod.rs +@@ -25,8 +25,7 @@ pub unsafe fn init() { + } + } + pub unsafe fn init_after_acpi() { +- // this will disable the IOAPIC if needed. +- //ioapic::init(mapper); ++ unsafe { ioapic::init() }; + } + + unsafe fn init_hpet() -> bool { +diff --git a/src/arch/x86_shared/interrupt/exception.rs b/src/arch/x86_shared/interrupt/exception.rs +index 7725a45d0..bfe9f096a 100644 +--- a/src/arch/x86_shared/interrupt/exception.rs ++++ b/src/arch/x86_shared/interrupt/exception.rs +@@ -1,3 +1,5 @@ ++use core::sync::atomic::{AtomicBool, Ordering}; ++ + use syscall::Exception; + use x86::irq::PageFaultError; + +@@ -10,6 +12,22 @@ use crate::{ + syscall::flag::*, + }; + ++static NMI_IN_PROGRESS: AtomicBool = AtomicBool::new(false); ++ ++unsafe fn nmi_raw_serial_write(bytes: &[u8]) { ++ use crate::syscall::io::{Io, Pio}; ++ ++ let mut com1 = Pio::::new(0x3F8); ++ let lsr = Pio::::new(0x3F8 + 5); ++ ++ for &byte in bytes { ++ while lsr.read() & (1 << 5) == 0 { ++ core::hint::spin_loop(); ++ } ++ com1.write(byte); ++ } ++} ++ + interrupt_stack!(divide_by_zero, |stack| { + println!("Divide by zero"); + stack.trace(); +@@ -55,9 +73,35 @@ interrupt_stack!(non_maskable, @paranoid, |stack| { + + #[cfg(not(all(target_arch = "x86_64", feature = "profiling")))] + { +- // TODO: This will likely deadlock +- println!("Non-maskable interrupt"); +- stack.dump(); ++ if NMI_IN_PROGRESS.swap(true, Ordering::SeqCst) { ++ return; ++ } ++ ++ unsafe { ++ nmi_raw_serial_write(b"Non-maskable interrupt\n"); ++ nmi_raw_serial_write(b" RIP: "); ++ ++ #[cfg(target_arch = "x86")] ++ let instruction_pointer = u64::from(stack.iret.eip); ++ #[cfg(target_arch = "x86_64")] ++ let instruction_pointer = stack.iret.rip; ++ ++ let mut buf = [0u8; 19]; ++ buf[0] = b'0'; ++ buf[1] = b'x'; ++ for i in 0..16 { ++ let nibble = ((instruction_pointer >> (60 - i * 4)) & 0xF) as u8; ++ buf[2 + i] = if nibble < 10 { ++ b'0' + nibble ++ } else { ++ b'a' + nibble - 10 ++ }; ++ } ++ buf[18] = b'\n'; ++ nmi_raw_serial_write(&buf); ++ } ++ ++ NMI_IN_PROGRESS.store(false, Ordering::SeqCst); + } + }); + diff --git a/local/patches/kernel/P1-memory-map-overflow.patch b/local/patches/kernel/P1-memory-map-overflow.patch deleted file mode 100644 index c0d79c7ba8..0000000000 --- a/local/patches/kernel/P1-memory-map-overflow.patch +++ /dev/null @@ -1,32 +0,0 @@ -diff --git a/src/startup/memory.rs b/src/startup/memory.rs -index 26922dde..60c7f061 100644 ---- a/src/startup/memory.rs -+++ b/src/startup/memory.rs -@@ -74,14 +74,16 @@ impl MemoryEntry { - } - - struct MemoryMap { -- entries: [MemoryEntry; 512], -+ entries: [MemoryEntry; 1024], - size: usize, - } - - impl MemoryMap { - fn register(&mut self, base: usize, size: usize, kind: BootloaderMemoryKind) { - if self.size >= self.entries.len() { -- panic!("Early memory map overflow!"); -+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -+ unsafe { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'!', options(nostack, preserves_flags)); } -+ panic!("Early memory map overflow at entry {} (max {})", self.size, self.entries.len()); - } - let start = if kind == BootloaderMemoryKind::Free { - align_up(base) -@@ -134,7 +136,7 @@ static MEMORY_MAP: SyncUnsafeCell = SyncUnsafeCell::new(MemoryMap { - start: 0, - end: 0, - kind: BootloaderMemoryKind::Null, -- }; 512], -+ }; 1024], - size: 0, - }); - diff --git a/local/patches/kernel/P1-mkfifo-fifo-support-v2.patch b/local/patches/kernel/P1-mkfifo-fifo-support-v2.patch new file mode 100644 index 0000000000..a5c48d1c64 --- /dev/null +++ b/local/patches/kernel/P1-mkfifo-fifo-support-v2.patch @@ -0,0 +1,1150 @@ +--- a/src/scheme/pipe.rs ++++ b/src/scheme/pipe.rs +@@ -1,5 +1,10 @@ +-use alloc::{collections::VecDeque, sync::Arc, vec::Vec}; +-use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; ++use alloc::{ ++ collections::VecDeque, ++ string::{String, ToString}, ++ sync::Arc, ++ vec::Vec, ++}; ++use core::sync::atomic::{AtomicUsize, Ordering}; + + use syscall::{data::GlobalSchemes, CallFlags}; + +@@ -14,100 +19,262 @@ + sync::{CleanLockToken, Mutex, RwLock, WaitCondition, L1}, + syscall::{ + data::Stat, +- error::{Error, Result, EAGAIN, EBADF, EINTR, EINVAL, ENOENT, EPIPE}, +- flag::{EventFlags, EVENT_READ, EVENT_WRITE, MODE_FIFO, O_NONBLOCK}, ++ error::{ ++ Error, Result, EAGAIN, EBADF, EEXIST, EINVAL, EINTR, ENOENT, ENOTDIR, EPIPE, ++ }, ++ flag::{ ++ EventFlags, EVENT_READ, EVENT_WRITE, MODE_FIFO, O_ACCMODE, O_DIRECTORY, ++ O_NONBLOCK, O_RDONLY, O_RDWR, O_STAT, O_WRONLY, ++ }, + usercopy::{UserSliceRo, UserSliceRw, UserSliceWo}, + }, + }; + + use super::{CallerCtx, KernelScheme, OpenResult, SchemeExt, StrOrBytes}; + +-// TODO: Preallocate a number of scheme IDs, since there can only be *one* root namespace, and +-// therefore only *one* pipe scheme. +-static PIPE_NEXT_ID: AtomicUsize = AtomicUsize::new(0); +- ++static PIPE_NEXT_ID: AtomicUsize = AtomicUsize::new(1); ++ ++#[derive(Clone)] + enum Handle { +- Pipe(Arc), ++ Endpoint(EndpointHandle), + SchemeRoot, + } + +-// TODO: SLOB? +-static PIPES: RwLock> = ++#[derive(Clone, Copy, Eq, PartialEq)] ++enum EndpointKind { ++ Read, ++ Write, ++ ReadWrite, ++} ++ ++impl EndpointKind { ++ fn can_read(self) -> bool { ++ matches!(self, Self::Read | Self::ReadWrite) ++ } ++ ++ fn can_write(self) -> bool { ++ matches!(self, Self::Write | Self::ReadWrite) ++ } ++} ++ ++#[derive(Clone)] ++struct EndpointHandle { ++ pipe: Arc, ++ kind: EndpointKind, ++ named: Option>, ++} ++ ++struct NamedPipe { ++ path: String, ++ mode: u16, ++ active: Mutex>>, ++} ++ ++static HANDLES: RwLock> = + RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new())); ++static NAMED_PIPES: RwLock>> = ++ RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new())); + + const MAX_QUEUE_SIZE: usize = 65536; + +-// In almost all places where Rust (and LLVM) uses pointers, they are limited to nonnegative isize, +-// so this is fine. +-const WRITE_NOT_READ_BIT: usize = 1; +- +-fn from_raw_id(id: usize) -> (bool, usize) { +- (id & WRITE_NOT_READ_BIT != 0, id & !WRITE_NOT_READ_BIT) +-} +- +-pub fn pipe(token: &mut CleanLockToken) -> Result<(usize, usize)> { +- // Bit 0 is used for WRITE_NOT_READ_BIT +- let id = PIPE_NEXT_ID.fetch_add(2, Ordering::Relaxed); +- +- PIPES.write(token.token()).insert( +- id, +- Handle::Pipe(Arc::new(Pipe { +- queue: Mutex::new(VecDeque::new()), +- read_condition: WaitCondition::new(), +- write_condition: WaitCondition::new(), +- writer_is_alive: AtomicBool::new(true), +- reader_is_alive: AtomicBool::new(true), +- has_run_dup: AtomicBool::new(false), +- fd_queue: Mutex::new(VecDeque::new()), +- })), +- ); +- +- Ok((id, id | WRITE_NOT_READ_BIT)) +-} +- +-pub struct PipeScheme; +- +-impl PipeScheme { +- fn get_pipe(key: usize, token: &mut CleanLockToken) -> Result> { +- PIPES +- .read(token.token()) +- .get(&key) +- .and_then(|handle| match handle { +- Handle::Pipe(pipe) => Some(Arc::clone(pipe)), ++fn next_id() -> usize { ++ PIPE_NEXT_ID.fetch_add(1, Ordering::Relaxed) ++} ++ ++fn endpoint_kind_from_flags(flags: usize) -> Result { ++ match flags & O_ACCMODE { ++ O_RDONLY => Ok(EndpointKind::Read), ++ O_WRONLY => Ok(EndpointKind::Write), ++ O_RDWR => Ok(EndpointKind::ReadWrite), ++ _ => Err(Error::new(EINVAL)), ++ } ++} ++ ++fn validate_named_fifo_open(flags: usize) -> Result<()> { ++ if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { ++ return Err(Error::new(ENOTDIR)); ++ } ++ ++ let _ = endpoint_kind_from_flags(flags)?; ++ Ok(()) ++} ++ ++fn trigger_matching( ++ pipe: &Arc, ++ require_read: bool, ++ require_write: bool, ++ flags: EventFlags, ++ token: &mut CleanLockToken, ++) { ++ let ids = { ++ let handles = HANDLES.read(token.token()); ++ handles ++ .iter() ++ .filter_map(|(id, handle)| match handle { ++ Handle::Endpoint(endpoint) ++ if Arc::ptr_eq(&endpoint.pipe, pipe) ++ && (!require_read || endpoint.kind.can_read()) ++ && (!require_write || endpoint.kind.can_write()) => ++ { ++ Some(*id) ++ } + _ => None, + }) ++ .collect::>() ++ }; ++ ++ for id in ids { ++ event::trigger(GlobalSchemes::Pipe.scheme_id(), id, flags, token); ++ } ++} ++ ++fn open_endpoint( ++ pipe: Arc, ++ kind: EndpointKind, ++ named: Option>, ++ token: &mut CleanLockToken, ++) -> usize { ++ if kind.can_read() { ++ pipe.reader_count.fetch_add(1, Ordering::SeqCst); ++ } ++ if kind.can_write() { ++ pipe.writer_count.fetch_add(1, Ordering::SeqCst); ++ } ++ ++ let id = next_id(); ++ HANDLES.write(token.token()).insert( ++ id, ++ Handle::Endpoint(EndpointHandle { pipe, kind, named }), ++ ); ++ id ++} ++ ++fn drop_wait_conditions_if_possible(pipe: Arc, token: &mut CleanLockToken) { ++ if let Some(pipe) = Arc::into_inner(pipe) { ++ { ++ pipe.read_condition.into_drop(token); ++ } ++ { ++ pipe.write_condition.into_drop(token); ++ } ++ } ++} ++ ++pub fn pipe(token: &mut CleanLockToken) -> Result<(usize, usize)> { ++ let pipe = Arc::new(Pipe::new()); ++ let read_id = open_endpoint(Arc::clone(&pipe), EndpointKind::Read, None, token); ++ let write_id = open_endpoint(pipe, EndpointKind::Write, None, token); ++ ++ Ok((read_id, write_id)) ++} ++ ++pub fn named_pipe_exists(path: &str, token: &mut CleanLockToken) -> bool { ++ NAMED_PIPES.read(token.token()).contains_key(path) ++} ++ ++pub fn create_named_pipe( ++ path: &str, ++ display_path: &str, ++ mode: u16, ++ flags: usize, ++ token: &mut CleanLockToken, ++) -> Result { ++ validate_named_fifo_open(flags)?; ++ ++ let named = { ++ let mut named_pipes = NAMED_PIPES.write(token.token()); ++ if named_pipes.contains_key(path) { ++ return Err(Error::new(EEXIST)); ++ } ++ ++ let named = Arc::new(NamedPipe { ++ path: display_path.to_string(), ++ mode, ++ active: Mutex::new(None), ++ }); ++ named_pipes.insert(path.to_string(), Arc::clone(&named)); ++ named ++ }; ++ ++ let kind = endpoint_kind_from_flags(flags)?; ++ let pipe = Arc::new(Pipe::new()); ++ *named.active.lock(token.token()) = Some(Arc::clone(&pipe)); ++ ++ Ok(open_endpoint(pipe, kind, Some(named), token)) ++} ++ ++pub fn open_named_pipe(path: &str, flags: usize, token: &mut CleanLockToken) -> Result> { ++ validate_named_fifo_open(flags)?; ++ ++ let named = match NAMED_PIPES.read(token.token()).get(path) { ++ Some(named) => Arc::clone(named), ++ None => return Ok(None), ++ }; ++ ++ let kind = endpoint_kind_from_flags(flags)?; ++ let pipe = { ++ let mut active = named.active.lock(token.token()); ++ match active.as_ref() { ++ Some(pipe) => Arc::clone(pipe), ++ None => { ++ let pipe = Arc::new(Pipe::new()); ++ *active = Some(Arc::clone(&pipe)); ++ pipe ++ } ++ } ++ }; ++ ++ Ok(Some(open_endpoint(pipe, kind, Some(named), token))) ++} ++ ++pub fn unlink_named_pipe(path: &str, token: &mut CleanLockToken) -> bool { ++ NAMED_PIPES.write(token.token()).remove(path).is_some() ++} ++ ++pub struct PipeScheme; ++ ++impl PipeScheme { ++ fn get_endpoint(id: usize, token: &mut CleanLockToken) -> Result { ++ HANDLES ++ .read(token.token()) ++ .get(&id) ++ .and_then(|handle| match handle { ++ Handle::Endpoint(endpoint) => Some(endpoint.clone()), ++ Handle::SchemeRoot => None, ++ }) + .ok_or(Error::new(EBADF)) + } + } + + impl KernelScheme for PipeScheme { + fn scheme_root(&self, token: &mut CleanLockToken) -> Result { +- let id = PIPE_NEXT_ID.fetch_add(2, Ordering::Relaxed); +- PIPES.write(token.token()).insert(id, Handle::SchemeRoot); ++ let id = next_id(); ++ HANDLES.write(token.token()).insert(id, Handle::SchemeRoot); + Ok(id) + } ++ + fn fevent( + &self, + id: usize, + flags: EventFlags, + token: &mut CleanLockToken, + ) -> Result { +- let (is_writer_not_reader, key) = from_raw_id(id); +- let pipe = Self::get_pipe(key, token)?; ++ let endpoint = Self::get_endpoint(id, token)?; + + let mut ready = EventFlags::empty(); + +- if is_writer_not_reader ++ if endpoint.kind.can_write() + && flags.contains(EVENT_WRITE) +- && (pipe.queue.lock(token.token()).len() <= MAX_QUEUE_SIZE +- || !pipe.reader_is_alive.load(Ordering::Acquire)) ++ && (endpoint.pipe.queue.lock(token.token()).len() <= MAX_QUEUE_SIZE ++ || endpoint.pipe.reader_count.load(Ordering::Acquire) == 0) + { + ready |= EventFlags::EVENT_WRITE; + } +- if !is_writer_not_reader ++ ++ if endpoint.kind.can_read() + && flags.contains(EVENT_READ) +- && (!pipe.queue.lock(token.token()).is_empty() +- || !pipe.writer_is_alive.load(Ordering::Acquire)) ++ && (!endpoint.pipe.queue.lock(token.token()).is_empty() ++ || endpoint.pipe.writer_count.load(Ordering::Acquire) == 0) + { + ready |= EventFlags::EVENT_READ; + } +@@ -116,46 +283,48 @@ + } + + fn close(&self, id: usize, token: &mut CleanLockToken) -> Result<()> { +- let (is_write_not_read, key) = from_raw_id(id); +- +- let pipe = Self::get_pipe(key, token)?; +- let scheme_id = GlobalSchemes::Pipe.scheme_id(); +- +- let can_remove = if is_write_not_read { +- pipe.writer_is_alive.store(false, Ordering::SeqCst); +- event::trigger(scheme_id, key, EVENT_READ, token); +- pipe.read_condition.notify(token); +- +- !pipe.reader_is_alive.load(Ordering::SeqCst) +- } else { +- pipe.reader_is_alive.store(false, Ordering::SeqCst); +- event::trigger(scheme_id, key | WRITE_NOT_READ_BIT, EVENT_WRITE, token); +- pipe.write_condition.notify(token); +- +- !pipe.writer_is_alive.load(Ordering::SeqCst) ++ let handle = HANDLES ++ .write(token.token()) ++ .remove(&id) ++ .ok_or(Error::new(EBADF))?; ++ ++ let Handle::Endpoint(endpoint) = handle else { ++ return Ok(()); + }; + +- if can_remove { +- let handle = PIPES.write(token.token()).remove(&key); +- if let Some(Handle::Pipe(pipe)) = handle +- && let Some(pipe) = Arc::into_inner(pipe) +- { ++ let mut last_reader = false; ++ let mut last_writer = false; ++ ++ if endpoint.kind.can_read() { ++ last_reader = endpoint.pipe.reader_count.fetch_sub(1, Ordering::SeqCst) == 1; ++ } ++ if endpoint.kind.can_write() { ++ last_writer = endpoint.pipe.writer_count.fetch_sub(1, Ordering::SeqCst) == 1; ++ } ++ ++ if last_writer { ++ trigger_matching(&endpoint.pipe, true, false, EVENT_READ, token); ++ endpoint.pipe.read_condition.notify(token); ++ } ++ if last_reader { ++ trigger_matching(&endpoint.pipe, false, true, EVENT_WRITE, token); ++ endpoint.pipe.write_condition.notify(token); ++ } ++ ++ let no_readers = endpoint.pipe.reader_count.load(Ordering::SeqCst) == 0; ++ let no_writers = endpoint.pipe.writer_count.load(Ordering::SeqCst) == 0; ++ if no_readers && no_writers { ++ if let Some(named) = endpoint.named { ++ let mut active = named.active.lock(token.token()); ++ if active ++ .as_ref() ++ .is_some_and(|active_pipe| Arc::ptr_eq(active_pipe, &endpoint.pipe)) + { +- pipe.read_condition.into_drop(token); ++ *active = None; + } +- { +- pipe.write_condition.into_drop(token); +- } +- } +- } +- +- if let Some(pipe) = Arc::into_inner(pipe) { +- { +- pipe.read_condition.into_drop(token); +- } +- { +- pipe.write_condition.into_drop(token); +- } ++ } ++ ++ drop_wait_conditions_if_possible(endpoint.pipe, token); + } + + Ok(()) +@@ -168,9 +337,9 @@ + _ctx: CallerCtx, + token: &mut CleanLockToken, + ) -> Result { +- let (is_writer_not_reader, key) = from_raw_id(old_id); +- +- if is_writer_not_reader { ++ let endpoint = Self::get_endpoint(old_id, token)?; ++ ++ if !endpoint.kind.can_read() { + return Err(Error::new(EBADF)); + } + +@@ -180,17 +349,17 @@ + return Err(Error::new(EINVAL)); + } + +- let pipe = Self::get_pipe(key, token)?; +- +- if pipe.has_run_dup.swap(true, Ordering::SeqCst) { +- return Err(Error::new(EBADF)); +- } +- + Ok(OpenResult::SchemeLocal( +- key | WRITE_NOT_READ_BIT, ++ open_endpoint( ++ Arc::clone(&endpoint.pipe), ++ EndpointKind::Write, ++ endpoint.named, ++ token, ++ ), + InternalFlags::empty(), + )) + } ++ + fn kopenat( + &self, + id: usize, +@@ -200,40 +369,47 @@ + _ctx: CallerCtx, + token: &mut CleanLockToken, + ) -> Result { +- let (_, key) = from_raw_id(id); +- +- { +- let guard = PIPES.read(token.token()); +- if let Some(Handle::SchemeRoot) = guard.get(&key) { +- } else if let Some(Handle::Pipe(pipe_arc)) = guard.get(&key) { +- let pipe = Arc::clone(pipe_arc); +- drop(guard); +- +- if user_buf.as_bytes() == b"write" { +- return Err(Error::new(EINVAL)); ++ let is_scheme_root = { ++ let handles = HANDLES.read(token.token()); ++ match handles.get(&id) { ++ Some(Handle::SchemeRoot) => true, ++ Some(Handle::Endpoint(_)) => false, ++ None => return Err(Error::new(EBADF)), ++ } ++ }; ++ ++ if is_scheme_root { ++ let path = user_buf.as_str().or(Err(Error::new(EINVAL)))?; ++ if !path.trim_start_matches('/').is_empty() { ++ return Err(Error::new(ENOENT)); + } + +- if pipe.has_run_dup.swap(true, Ordering::SeqCst) { +- return Err(Error::new(EBADF)); +- } +- ++ let pipe = Arc::new(Pipe::new()); + return Ok(OpenResult::SchemeLocal( +- key | WRITE_NOT_READ_BIT, ++ open_endpoint(pipe, EndpointKind::Read, None, token), + InternalFlags::empty(), + )); +- } else { +- return Err(Error::new(EBADF)); +- } +- } +- +- let path = user_buf.as_str().or(Err(Error::new(EINVAL)))?; +- if !path.trim_start_matches('/').is_empty() { +- return Err(Error::new(ENOENT)); +- } +- +- let (read_id, _) = pipe(token)?; +- +- Ok(OpenResult::SchemeLocal(read_id, InternalFlags::empty())) ++ } ++ ++ let endpoint = Self::get_endpoint(id, token)?; ++ if !endpoint.kind.can_read() { ++ return Err(Error::new(EBADF)); ++ } ++ ++ let path = user_buf.as_bytes(); ++ if !path.is_empty() && path != b"write" { ++ return Err(Error::new(EINVAL)); ++ } ++ ++ Ok(OpenResult::SchemeLocal( ++ open_endpoint( ++ Arc::clone(&endpoint.pipe), ++ EndpointKind::Write, ++ endpoint.named, ++ token, ++ ), ++ InternalFlags::empty(), ++ )) + } + + fn kread( +@@ -244,16 +420,15 @@ + _stored_flags: u32, + token: &mut CleanLockToken, + ) -> Result { +- let (is_write_not_read, key) = from_raw_id(id); +- +- if is_write_not_read { ++ let endpoint = Self::get_endpoint(id, token)?; ++ ++ if !endpoint.kind.can_read() { + return Err(Error::new(EBADF)); + } +- let pipe = Self::get_pipe(key, token)?; + + loop { +- let vec = pipe.queue.lock(token.token()); +- let (mut vec, mut token) = vec.into_split(); ++ let vec = endpoint.pipe.queue.lock(token.token()); ++ let (mut vec, mut lock_token) = vec.into_split(); + + let (s1, s2) = vec.as_slices(); + let s1_count = core::cmp::min(user_buf.len(), s1.len()); +@@ -273,28 +448,34 @@ + let _ = vec.drain(..bytes_read); + + if bytes_read > 0 { +- event::trigger_locked( +- GlobalSchemes::Pipe.scheme_id(), +- key | WRITE_NOT_READ_BIT, +- EVENT_WRITE, +- token.token(), +- ); +- pipe.write_condition.notify_locked(token.token()); ++ drop(vec); ++ drop(lock_token); ++ trigger_matching(&endpoint.pipe, false, true, EVENT_WRITE, token); ++ endpoint.pipe.write_condition.notify(token); + + return Ok(bytes_read); +- } else if user_buf.is_empty() { ++ } ++ ++ if user_buf.is_empty() { + return Ok(0); + } + +- if !pipe.writer_is_alive.load(Ordering::SeqCst) { ++ if endpoint.pipe.writer_count.load(Ordering::SeqCst) == 0 { + return Ok(0); +- } else if fcntl_flags & O_NONBLOCK as u32 != 0 { ++ } ++ if fcntl_flags & O_NONBLOCK as u32 != 0 { + return Err(Error::new(EAGAIN)); +- } else if !pipe.read_condition.wait(vec, "PipeRead::read", &mut token) { ++ } ++ if !endpoint ++ .pipe ++ .read_condition ++ .wait(vec, "PipeRead::read", &mut lock_token) ++ { + return Err(Error::new(EINTR)); + } + } + } ++ + fn kwrite( + &self, + id: usize, +@@ -303,18 +484,17 @@ + _stored_flags: u32, + token: &mut CleanLockToken, + ) -> Result { +- let (is_write_not_read, key) = from_raw_id(id); +- +- if !is_write_not_read { ++ let endpoint = Self::get_endpoint(id, token)?; ++ ++ if !endpoint.kind.can_write() { + return Err(Error::new(EBADF)); + } +- let pipe = Self::get_pipe(key, token)?; + + loop { +- let vec = pipe.queue.lock(token.token()); +- let (mut vec, mut token) = vec.into_split(); +- +- if !pipe.reader_is_alive.load(Ordering::Relaxed) { ++ let vec = endpoint.pipe.queue.lock(token.token()); ++ let (mut vec, mut lock_token) = vec.into_split(); ++ ++ if endpoint.pipe.reader_count.load(Ordering::Relaxed) == 0 { + return Err(Error::new(EPIPE)); + } + +@@ -329,7 +509,6 @@ + + let mut bytes_written = 0; + +- // TODO: Modify VecDeque so that the unwritten portions can be accessed directly? + for (idx, chunk) in src_buf.in_variable_chunks(TMPBUF_SIZE).enumerate() { + let chunk_byte_count = match chunk.copy_common_bytes_to_slice(&mut tmp_buf) { + Ok(c) => c, +@@ -341,41 +520,52 @@ + } + + if bytes_written > 0 { +- event::trigger_locked( +- GlobalSchemes::Pipe.scheme_id(), +- key, +- EVENT_READ, +- token.token(), +- ); +- pipe.read_condition.notify_locked(token.token()); ++ drop(vec); ++ drop(lock_token); ++ trigger_matching(&endpoint.pipe, true, false, EVENT_READ, token); ++ endpoint.pipe.read_condition.notify(token); + + return Ok(bytes_written); +- } else if user_buf.is_empty() { ++ } ++ ++ if user_buf.is_empty() { + return Ok(0); + } + + if fcntl_flags & O_NONBLOCK as u32 != 0 { + return Err(Error::new(EAGAIN)); +- } else if !pipe ++ } ++ if !endpoint ++ .pipe + .write_condition +- .wait(vec, "PipeWrite::write", &mut token) ++ .wait(vec, "PipeWrite::write", &mut lock_token) + { + return Err(Error::new(EINTR)); + } + } + } +- fn kfpath(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result { +- //TODO: construct useful path? +- buf.copy_common_bytes_from_slice("/scheme/pipe/".as_bytes()) +- } +- fn kfstat(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result<()> { ++ ++ fn kfpath(&self, id: usize, buf: UserSliceWo, token: &mut CleanLockToken) -> Result { ++ let endpoint = Self::get_endpoint(id, token)?; ++ if let Some(named) = endpoint.named { ++ buf.copy_common_bytes_from_slice(named.path.as_bytes()) ++ } else { ++ buf.copy_common_bytes_from_slice("/scheme/pipe/".as_bytes()) ++ } ++ } ++ ++ fn kfstat(&self, id: usize, buf: UserSliceWo, token: &mut CleanLockToken) -> Result<()> { ++ let endpoint = Self::get_endpoint(id, token)?; ++ let mode = endpoint.named.map_or(0o666, |named| named.mode); ++ + buf.copy_exactly(&Stat { +- st_mode: MODE_FIFO | 0o666, ++ st_mode: MODE_FIFO | mode, + ..Default::default() + })?; + + Ok(()) + } ++ + fn kfdwrite( + &self, + id: usize, +@@ -385,23 +575,17 @@ + _metadata: &[u64], + token: &mut CleanLockToken, + ) -> Result { +- let (is_write_not_read, key) = from_raw_id(id); +- +- if !is_write_not_read { ++ let endpoint = Self::get_endpoint(id, token)?; ++ ++ if !endpoint.kind.can_write() { + return Err(Error::new(EBADF)); + } +- let pipe = match Self::get_pipe(key, token) { +- Ok(p) => p, +- Err(e) => { +- return Err(e); +- } +- }; + + loop { +- let vec = pipe.fd_queue.lock(token.token()); +- let (mut vec, mut token) = vec.into_split(); +- +- if !pipe.reader_is_alive.load(Ordering::Relaxed) { ++ let vec = endpoint.pipe.fd_queue.lock(token.token()); ++ let (mut vec, mut lock_token) = vec.into_split(); ++ ++ if endpoint.pipe.reader_count.load(Ordering::Relaxed) == 0 { + return Err(Error::new(EPIPE)); + } + if descs.is_empty() { +@@ -421,25 +605,24 @@ + let fds_written = vec.len() - before_len; + + if fds_written > 0 { +- event::trigger_locked( +- GlobalSchemes::Pipe.scheme_id(), +- key, +- EVENT_READ, +- token.token(), +- ); +- pipe.read_condition.notify_locked(token.token()); ++ drop(vec); ++ drop(lock_token); ++ trigger_matching(&endpoint.pipe, true, false, EVENT_READ, token); ++ endpoint.pipe.read_condition.notify(token); + + return Ok(fds_written); + } + +- if !pipe ++ if !endpoint ++ .pipe + .write_condition +- .wait(vec, "PipeWrite::write", &mut token) ++ .wait(vec, "PipeWrite::write", &mut lock_token) + { + return Err(Error::new(EINTR)); + } + } + } ++ + fn kfdread( + &self, + id: usize, +@@ -448,25 +631,19 @@ + _metadata: &[u64], + token: &mut CleanLockToken, + ) -> Result { +- let (is_write_not_read, key) = from_raw_id(id); +- +- if is_write_not_read { ++ let endpoint = Self::get_endpoint(id, token)?; ++ ++ if !endpoint.kind.can_read() { + return Err(Error::new(EBADF)); + } +- let pipe = match Self::get_pipe(key, token) { +- Ok(p) => p, +- Err(e) => { +- return Err(e); +- } +- }; + + if payload.is_empty() { + return Ok(0); + } + + loop { +- let vec = pipe.fd_queue.lock(token.token()); +- let (mut vec, mut token) = vec.into_split(); ++ let vec = endpoint.pipe.fd_queue.lock(token.token()); ++ let (mut vec, mut lock_token) = vec.into_split(); + + let fds_available = vec.len(); + let max_fds_read = payload.len() / size_of::(); +@@ -479,31 +656,33 @@ + fds_to_transfer, + payload, + flags.contains(CallFlags::FD_CLOEXEC), +- &mut token, ++ &mut lock_token, + )?; + } else { + bulk_add_fds( + fds_to_transfer, + payload, + flags.contains(CallFlags::FD_CLOEXEC), +- &mut token, ++ &mut lock_token, + )?; + } + +- event::trigger_locked( +- GlobalSchemes::Pipe.scheme_id(), +- key | WRITE_NOT_READ_BIT, +- EVENT_WRITE, +- token.token(), +- ); +- pipe.write_condition.notify_locked(token.token()); ++ drop(vec); ++ drop(lock_token); ++ trigger_matching(&endpoint.pipe, false, true, EVENT_WRITE, token); ++ endpoint.pipe.write_condition.notify(token); + + return Ok(fds_to_read); + } + +- if !pipe.writer_is_alive.load(Ordering::SeqCst) { ++ if endpoint.pipe.writer_count.load(Ordering::SeqCst) == 0 { + return Ok(0); +- } else if !pipe.read_condition.wait(vec, "PipeRead::read", &mut token) { ++ } ++ if !endpoint ++ .pipe ++ .read_condition ++ .wait(vec, "PipeRead::read", &mut lock_token) ++ { + return Err(Error::new(EINTR)); + } + } +@@ -511,11 +690,23 @@ + } + + pub struct Pipe { +- read_condition: WaitCondition, // signals whether there are available bytes to read +- write_condition: WaitCondition, // signals whether there is room for additional bytes ++ read_condition: WaitCondition, ++ write_condition: WaitCondition, + queue: Mutex>, +- reader_is_alive: AtomicBool, // starts set, unset when reader closes +- writer_is_alive: AtomicBool, // starts set, unset when writer closes +- has_run_dup: AtomicBool, ++ reader_count: AtomicUsize, ++ writer_count: AtomicUsize, + fd_queue: Mutex>>, + } ++ ++impl Pipe { ++ fn new() -> Self { ++ Self { ++ read_condition: WaitCondition::new(), ++ write_condition: WaitCondition::new(), ++ queue: Mutex::new(VecDeque::new()), ++ reader_count: AtomicUsize::new(0), ++ writer_count: AtomicUsize::new(0), ++ fd_queue: Mutex::new(VecDeque::new()), ++ } ++ } ++} +--- a/src/syscall/fs.rs ++++ b/src/syscall/fs.rs +@@ -1,29 +1,29 @@ + //! Filesystem syscalls + + use core::num::NonZeroUsize; + +-use alloc::{string::String, sync::Arc, vec::Vec}; ++use alloc::{format, string::{String, ToString}, sync::Arc, vec::Vec}; + use redox_path::RedoxPath; + + use crate::{ + context::{ + self, + file::{FileDescription, FileDescriptor, InternalFlags, LockedFileDescription}, + memory::{AddrSpace, GenericFlusher, Grant, PageSpan, TlbShootdownActions}, + }, + memory::{Page, VirtualAddress, PAGE_SIZE}, +- scheme::{FileHandle, KernelScheme, OpenResult, StrOrBytes}, ++ scheme::{self, pipe, FileHandle, KernelScheme, OpenResult, SchemeExt, StrOrBytes}, + sync::{CleanLockToken, RwLock}, +- syscall::{data::Stat, error::*, flag::*}, ++ syscall::{data::{GlobalSchemes, Stat}, error::*, flag::*}, + }; + + use super::usercopy::{UserSlice, UserSliceRo, UserSliceRw, UserSliceWo}; + + pub fn file_op_generic( + fd: FileHandle, + token: &mut CleanLockToken, + op: impl FnOnce(&dyn KernelScheme, usize, &mut CleanLockToken) -> Result, + ) -> Result { + file_op_generic_ext(fd, token, |s, _, desc, token| op(s, desc.number, token)) + } + pub fn file_op_generic_ext( +@@ -53,91 +53,161 @@ + let mut path_buf = vec![0_u8; max_len]; + if raw_path.len() > path_buf.len() { + return Err(Error::new(ENAMETOOLONG)); + } + let path_len = raw_path.copy_common_bytes_to_slice(&mut path_buf)?; + path_buf.truncate(path_len); + String::from_utf8(path_buf).map_err(|_| Error::new(EINVAL)) + //core::str::from_utf8(&path_buf[..path_len]).map_err(|_| Error::new(EINVAL)) + } + // TODO: Define elsewhere + const PATH_MAX: usize = PAGE_SIZE; + +-pub fn openat( +- fh: FileHandle, +- raw_path: UserSliceRo, ++fn fifo_path_key(scheme_id: scheme::SchemeId, number: usize, path: &str) -> String { ++ if path.starts_with('/') { ++ path.to_string() ++ } else { ++ format!("@fifo:{}:{}:{}", scheme_id.get(), number, path) ++ } ++} ++ ++fn install_open_result( ++ scheme_id: scheme::SchemeId, + flags: usize, +- fcntl_flags: u32, +- euid: u32, +- egid: u32, ++ open_result: OpenResult, + token: &mut CleanLockToken, + ) -> Result { +- let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; +- +- let desc = { +- let current_lock = context::current(); +- let mut current = current_lock.read(token.token()); +- let (context, mut context_token) = current.token_split(); +- let pipe = context +- .get_file(fh, &mut context_token) +- .ok_or(Error::new(EBADF))?; +- *pipe.description.read(context_token.token()) +- }; +- let scheme = desc.get_scheme(token)?; +- let number = desc.number; +- let scheme_id = desc.scheme; +- +- let caller_ctx = context::current() +- .read(token.token()) +- .caller_ctx() +- .filter_uid_gid(euid, egid); +- +- let new_description = { +- let res = scheme.kopenat( +- number, +- StrOrBytes::from_str(&path_buf), +- flags, +- fcntl_flags, +- caller_ctx, +- token, +- ); +- +- match res? { +- OpenResult::SchemeLocal(number, internal_flags) => { +- Arc::new(RwLock::new(FileDescription::new( +- scheme_id, +- number, +- 0, +- (flags & !O_CLOEXEC) as u32, +- internal_flags, +- token, +- ))) +- } +- OpenResult::External(desc) => desc, +- } ++ let new_description = match open_result { ++ OpenResult::SchemeLocal(number, internal_flags) => Arc::new(RwLock::new( ++ FileDescription::new( ++ scheme_id, ++ number, ++ 0, ++ (flags & !O_CLOEXEC) as u32, ++ internal_flags, ++ token, ++ ), ++ )), ++ OpenResult::External(desc) => desc, + }; + + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); + let (context, mut token) = current.token_split(); + context + .add_file( + FileDescriptor { + description: new_description, + cloexec: flags & O_CLOEXEC == O_CLOEXEC, + }, + &mut token, + ) + .ok_or(Error::new(EMFILE)) + } ++ ++fn path_exists_in_scheme( ++ scheme: &dyn KernelScheme, ++ number: usize, ++ path: &str, ++ caller_ctx: scheme::CallerCtx, ++ token: &mut CleanLockToken, ++) -> Result { ++ match scheme.kopenat(number, StrOrBytes::from_str(path), O_STAT, 0, caller_ctx, token) { ++ Ok(OpenResult::SchemeLocal(number, _)) => { ++ let _ = scheme.close(number, token); ++ Ok(true) ++ } ++ Ok(OpenResult::External(_)) => Ok(true), ++ Err(err) if err.errno == ENOENT => Ok(false), ++ Err(err) => Err(err), ++ } ++} ++ ++pub fn openat( ++ fh: FileHandle, ++ raw_path: UserSliceRo, ++ flags: usize, ++ fcntl_flags: u32, ++ euid: u32, ++ egid: u32, ++ token: &mut CleanLockToken, ++) -> Result { ++ let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; ++ ++ let desc = { ++ let current_lock = context::current(); ++ let mut current = current_lock.read(token.token()); ++ let (context, mut context_token) = current.token_split(); ++ let pipe = context ++ .get_file(fh, &mut context_token) ++ .ok_or(Error::new(EBADF))?; ++ *pipe.description.read(context_token.token()) ++ }; ++ let scheme = desc.get_scheme(token)?; ++ let number = desc.number; ++ let scheme_id = desc.scheme; ++ ++ let caller_ctx = context::current() ++ .read(token.token()) ++ .caller_ctx() ++ .filter_uid_gid(euid, egid); ++ ++ let fifo_mode_requested = flags & MODE_FIFO as usize == MODE_FIFO as usize; ++ let fifo_key = fifo_path_key(scheme_id, number, &path_buf); ++ ++ if pipe::named_pipe_exists(&fifo_key, token) { ++ if flags & O_EXCL == O_EXCL && flags & O_CREAT == O_CREAT { ++ return Err(Error::new(EEXIST)); ++ } ++ if fifo_mode_requested && flags & O_CREAT == O_CREAT { ++ return Err(Error::new(EEXIST)); ++ } ++ ++ let pipe_number = pipe::open_named_pipe(&fifo_key, flags, token)? ++ .ok_or(Error::new(ENOENT))?; ++ return install_open_result( ++ GlobalSchemes::Pipe.scheme_id(), ++ flags, ++ OpenResult::SchemeLocal(pipe_number, InternalFlags::empty()), ++ token, ++ ); ++ } ++ ++ if fifo_mode_requested && flags & O_CREAT == O_CREAT { ++ if path_exists_in_scheme(&*scheme, number, &path_buf, caller_ctx, token)? { ++ return Err(Error::new(EEXIST)); ++ } ++ ++ let mode = u16::try_from(flags & 0o7777).map_err(|_| Error::new(EINVAL))?; ++ let pipe_number = pipe::create_named_pipe(&fifo_key, &path_buf, mode, flags, token)?; ++ ++ return install_open_result( ++ GlobalSchemes::Pipe.scheme_id(), ++ flags, ++ OpenResult::SchemeLocal(pipe_number, InternalFlags::empty()), ++ token, ++ ); ++ } ++ ++ let open_result = scheme.kopenat( ++ number, ++ StrOrBytes::from_str(&path_buf), ++ flags, ++ fcntl_flags, ++ caller_ctx, ++ token, ++ )?; ++ ++ install_open_result(scheme_id, flags, open_result, token) ++} + /// Unlinkat syscall + pub fn unlinkat( + fh: FileHandle, + raw_path: UserSliceRo, + flags: usize, + euid: u32, + egid: u32, + token: &mut CleanLockToken, + ) -> Result<()> { + let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; + + let desc = { +@@ -147,24 +217,28 @@ + let pipe = context + .get_file(fh, &mut context_token) + .ok_or(Error::new(EBADF))?; + *pipe.description.read(context_token.token()) + }; + let number = desc.number; + let scheme = desc.get_scheme(token)?; + + let caller_ctx = context::current() + .read(token.token()) + .caller_ctx() + .filter_uid_gid(euid, egid); ++ ++ if pipe::unlink_named_pipe(&fifo_path_key(desc.scheme, number, &path_buf), token) { ++ return Ok(()); ++ } + + /* + let mut path_buf = BorrowedHtBuf::head()?; + let path = path_buf.use_for_string(raw_path)?; + */ + scheme.unlinkat(number, &path_buf, flags, caller_ctx, token) + } + + /// Close syscall + pub fn close(fd: FileHandle, token: &mut CleanLockToken) -> Result<()> { + let file = { + let current_lock = context::current(); diff --git a/local/patches/kernel/P10-debug-scheme-serial-fix.patch b/local/patches/kernel/P10-debug-scheme-serial-fix.patch deleted file mode 100644 index 9d79c4369f..0000000000 --- a/local/patches/kernel/P10-debug-scheme-serial-fix.patch +++ /dev/null @@ -1,34 +0,0 @@ ---- a/src/scheme/debug.rs 2026-04-28 07:21:41.000000000 +0100 -+++ b/src/scheme/debug.rs 2026-05-04 08:10:23.688174541 +0100 -@@ -22,9 +22,10 @@ - - static HANDLES: RwLock> = RwLock::new(HandleMap::new()); - --/// Add to the input queue -+/// Add to the input queue, translating CR to NL (ICRNL) for serial console compatibility. - pub fn debug_input(data: u8, token: &mut CleanLockToken) { -- INPUT.send(data, token); -+ let translated = if data == b'\r' { b'\n' } else { data }; -+ INPUT.send(translated, token); - } - - // Notify readers of input updates -@@ -106,12 +107,16 @@ - fn fevent( - &self, - id: usize, -- _flags: EventFlags, -+ flags: EventFlags, - token: &mut CleanLockToken, - ) -> Result { - let _handle = *HANDLES.read(token.token()).get(id)?; - -- Ok(EventFlags::empty()) -+ let mut ready = EventFlags::empty(); -+ if flags.contains(EventFlags::EVENT_READ) { -+ ready |= EventFlags::EVENT_READ; -+ } -+ Ok(ready) - } - - fn fsync(&self, id: usize, token: &mut CleanLockToken) -> Result<()> { diff --git a/local/patches/kernel/P3-eventfd-kernel.patch b/local/patches/kernel/P3-eventfd-kernel.patch deleted file mode 100644 index 784d663dd1..0000000000 --- a/local/patches/kernel/P3-eventfd-kernel.patch +++ /dev/null @@ -1,368 +0,0 @@ -# eventfd kernel support — EventCounter implementation and scheme dispatch -# Adds EventCounter struct with blocking read/write, semaphore mode, and wait conditions -# Extends EventScheme to handle eventfd path-based open, read, write, close, fevent, kfpath - -diff --git a/src/event.rs b/src/event.rs -index 7398145a..92e5793c 100644 ---- a/src/event.rs -+++ b/src/event.rs -@@ -8,13 +8,14 @@ use crate::{ - context, - scheme::{self, SchemeExt, SchemeId}, - sync::{ -- CleanLockToken, LockToken, RwLock, RwLockReadGuard, RwLockWriteGuard, WaitQueue, L0, L1, L2, -+ CleanLockToken, LockToken, Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard, -+ WaitCondition, WaitQueue, L0, L1, L2, - }, - syscall::{ - data::Event, -- error::{Error, Result, EBADF}, -- flag::EventFlags, -- usercopy::UserSliceWo, -+ error::{Error, Result, EAGAIN, EBADF, EINVAL, EINTR}, -+ flag::{EVENT_READ, EVENT_WRITE, EventFlags}, -+ usercopy::{UserSliceRo, UserSliceWo}, - }, - }; - -@@ -25,6 +26,17 @@ pub struct EventQueue { - queue: WaitQueue, - } - -+const EVENTFD_COUNTER_MAX: u64 = u64::MAX - 1; -+const EVENTFD_TAG_BIT: usize = 1usize << (usize::BITS - 1); -+ -+pub struct EventCounter { -+ id: usize, -+ counter: Mutex, -+ read_condition: WaitCondition, -+ write_condition: WaitCondition, -+ semaphore: bool, -+} -+ - impl EventQueue { - pub fn new(id: EventQueueId) -> EventQueue { - EventQueue { -@@ -91,19 +103,146 @@ impl EventQueue { - } - } - -+impl EventCounter { -+ pub fn new(id: usize, init: u64, semaphore: bool) -> EventCounter { -+ EventCounter { -+ id, -+ counter: Mutex::new(init), -+ read_condition: WaitCondition::new(), -+ write_condition: WaitCondition::new(), -+ semaphore, -+ } -+ } -+ -+ pub fn is_readable(&self, token: &mut CleanLockToken) -> bool { -+ *self.counter.lock(token.token()) > 0 -+ } -+ -+ pub fn is_writable(&self, token: &mut CleanLockToken) -> bool { -+ *self.counter.lock(token.token()) < EVENTFD_COUNTER_MAX -+ } -+ -+ pub fn read(&self, buf: UserSliceWo, block: bool, token: &mut CleanLockToken) -> Result { -+ if buf.len() < core::mem::size_of::() { -+ return Err(Error::new(EINVAL)); -+ } -+ -+ loop { -+ let counter = self.counter.lock(token.token()); -+ let (mut counter, mut token) = counter.into_split(); -+ -+ if *counter > 0 { -+ let value = if self.semaphore { -+ *counter -= 1; -+ 1 -+ } else { -+ let value = *counter; -+ *counter = 0; -+ value -+ }; -+ -+ buf.limit(core::mem::size_of::()) -+ .ok_or(Error::new(EINVAL))? -+ .copy_from_slice(&value.to_ne_bytes())?; -+ -+ trigger_locked( -+ GlobalSchemes::Event.scheme_id(), -+ self.id, -+ EVENT_WRITE, -+ token.token(), -+ ); -+ self.write_condition.notify_locked(token.token()); -+ -+ return Ok(core::mem::size_of::()); -+ } -+ -+ if !block { -+ return Err(Error::new(EAGAIN)); -+ } -+ -+ if !self -+ .read_condition -+ .wait(counter, "EventCounter::read", &mut token) -+ { -+ return Err(Error::new(EINTR)); -+ } -+ } -+ } -+ -+ pub fn write(&self, buf: UserSliceRo, block: bool, token: &mut CleanLockToken) -> Result { -+ if buf.len() != core::mem::size_of::() { -+ return Err(Error::new(EINVAL)); -+ } -+ -+ let value = unsafe { buf.read_exact::()? }; -+ if value == u64::MAX { -+ return Err(Error::new(EINVAL)); -+ } -+ -+ loop { -+ let counter = self.counter.lock(token.token()); -+ let (mut counter, mut token) = counter.into_split(); -+ -+ if EVENTFD_COUNTER_MAX - *counter >= value { -+ let was_zero = *counter == 0; -+ *counter += value; -+ -+ if was_zero && value != 0 { -+ trigger_locked( -+ GlobalSchemes::Event.scheme_id(), -+ self.id, -+ EVENT_READ, -+ token.token(), -+ ); -+ self.read_condition.notify_locked(token.token()); -+ } -+ -+ return Ok(core::mem::size_of::()); -+ } -+ -+ if !block { -+ return Err(Error::new(EAGAIN)); -+ } -+ -+ if !self -+ .write_condition -+ .wait(counter, "EventCounter::write", &mut token) -+ { -+ return Err(Error::new(EINTR)); -+ } -+ } -+ } -+ -+ pub fn into_drop(self, _token: LockToken<'_, L1>) { -+ drop(self); -+ } -+} -+ - pub type EventQueueList = HashMap>; -+pub type EventCounterList = HashMap>; - - // Next queue id - static NEXT_QUEUE_ID: AtomicUsize = AtomicUsize::new(0); -+static NEXT_COUNTER_ID: AtomicUsize = AtomicUsize::new(0); - - /// Get next queue id - pub fn next_queue_id() -> EventQueueId { - EventQueueId::from(NEXT_QUEUE_ID.fetch_add(1, Ordering::SeqCst)) - } - -+pub fn next_counter_id() -> usize { -+ EVENTFD_TAG_BIT | NEXT_COUNTER_ID.fetch_add(1, Ordering::SeqCst) -+} -+ -+pub fn is_counter_id(id: usize) -> bool { -+ id & EVENTFD_TAG_BIT != 0 -+} -+ - // Current event queues - static QUEUES: RwLock = - RwLock::new(EventQueueList::with_hasher(DefaultHashBuilder::new())); -+static COUNTERS: RwLock = -+ RwLock::new(EventCounterList::with_hasher(DefaultHashBuilder::new())); - - /// Get the event queues list, const - pub fn queues(token: LockToken<'_, L0>) -> RwLockReadGuard<'_, L2, EventQueueList> { -@@ -115,6 +254,14 @@ pub fn queues_mut(token: LockToken<'_, L0>) -> RwLockWriteGuard<'_, L2, EventQue - QUEUES.write(token) - } - -+pub fn counters(token: LockToken<'_, L0>) -> RwLockReadGuard<'_, L2, EventCounterList> { -+ COUNTERS.read(token) -+} -+ -+pub fn counters_mut(token: LockToken<'_, L0>) -> RwLockWriteGuard<'_, L2, EventCounterList> { -+ COUNTERS.write(token) -+} -+ - #[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] - pub struct RegKey { - pub scheme: SchemeId, -diff --git a/src/scheme/event.rs b/src/scheme/event.rs -index 36efe5b2..c64b6bd0 100644 ---- a/src/scheme/event.rs -+++ b/src/scheme/event.rs -@@ -1,9 +1,12 @@ --use alloc::sync::Arc; -+use alloc::{sync::Arc, vec::Vec}; - use syscall::{EventFlags, O_NONBLOCK}; - - use crate::{ - context::file::InternalFlags, -- event::{next_queue_id, queues, queues_mut, EventQueue, EventQueueId}, -+ event::{ -+ EventCounter, EventQueue, EventQueueId, counters, counters_mut, is_counter_id, -+ next_counter_id, next_queue_id, queues, queues_mut, -+ }, - sync::CleanLockToken, - syscall::{ - data::Event, -@@ -25,7 +28,7 @@ impl KernelScheme for EventScheme { - fn kopenat( - &self, - id: usize, -- _user_buf: StrOrBytes, -+ user_buf: StrOrBytes, - _flags: usize, - _fcntl_flags: u32, - _ctx: CallerCtx, -@@ -34,13 +37,53 @@ impl KernelScheme for EventScheme { - if id != SCHEME_ROOT_ID { - return Err(Error::new(EACCES)); - } -- let id = next_queue_id(); -- queues_mut(token.token()).insert(id, Arc::new(EventQueue::new(id))); - -- Ok(OpenResult::SchemeLocal(id.get(), InternalFlags::empty())) -+ let path = user_buf.as_str().or(Err(Error::new(EINVAL)))?; -+ let path = path.trim_matches('/'); -+ -+ if path.is_empty() { -+ let id = next_queue_id(); -+ queues_mut(token.token()).insert(id, Arc::new(EventQueue::new(id))); -+ return Ok(OpenResult::SchemeLocal(id.get(), InternalFlags::empty())); -+ } -+ -+ let parts: Vec<&str> = path.split('/').collect(); -+ if matches!(parts.first(), Some(&"eventfd")) { -+ let init = match parts.get(1) { -+ Some(value) => value.parse::().map_err(|_| Error::new(EINVAL))?, -+ None => 0_u64, -+ }; -+ if init > u32::MAX as u64 { -+ return Err(Error::new(EINVAL)); -+ } -+ let semaphore = match parts.get(2) { -+ Some(value) => match *value { -+ "0" => Ok(false), -+ "1" => Ok(true), -+ _ => Err(Error::new(EINVAL)), -+ }?, -+ None => false, -+ }; -+ -+ let id = next_counter_id(); -+ counters_mut(token.token()).insert(id, Arc::new(EventCounter::new(id, init, semaphore))); -+ return Ok(OpenResult::SchemeLocal(id, InternalFlags::empty())); -+ } -+ -+ Err(Error::new(ENOENT)) - } - - fn close(&self, id: usize, token: &mut CleanLockToken) -> Result<()> { -+ if is_counter_id(id) { -+ let counter = counters_mut(token.token()) -+ .remove(&id) -+ .ok_or(Error::new(EBADF))?; -+ if let Some(counter) = Arc::into_inner(counter) { -+ counter.into_drop(token.downgrade()); -+ } -+ return Ok(()); -+ } -+ - let id = EventQueueId::from(id); - let queue = queues_mut(token.token()) - .remove(&id) -@@ -59,6 +102,15 @@ impl KernelScheme for EventScheme { - _stored_flags: u32, - token: &mut CleanLockToken, - ) -> Result { -+ if is_counter_id(id) { -+ let counter = { -+ let handles = counters(token.token()); -+ let handle = handles.get(&id).ok_or(Error::new(EBADF))?; -+ handle.clone() -+ }; -+ return counter.read(buf, flags & O_NONBLOCK as u32 == 0, token); -+ } -+ - let id = EventQueueId::from(id); - - let queue = { -@@ -74,10 +126,19 @@ impl KernelScheme for EventScheme { - &self, - id: usize, - buf: UserSliceRo, -- _flags: u32, -+ flags: u32, - _stored_flags: u32, - token: &mut CleanLockToken, - ) -> Result { -+ if is_counter_id(id) { -+ let counter = { -+ let handles = counters(token.token()); -+ let handle = handles.get(&id).ok_or(Error::new(EBADF))?; -+ handle.clone() -+ }; -+ return counter.write(buf, flags & O_NONBLOCK as u32 == 0, token); -+ } -+ - let id = EventQueueId::from(id); - - let queue = { -@@ -98,8 +159,12 @@ impl KernelScheme for EventScheme { - Ok(events_written * size_of::()) - } - -- fn kfpath(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result { -- buf.copy_common_bytes_from_slice(b"/scheme/event/") -+ fn kfpath(&self, id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result { -+ if is_counter_id(id) { -+ buf.copy_common_bytes_from_slice(b"/scheme/event/eventfd") -+ } else { -+ buf.copy_common_bytes_from_slice(b"/scheme/event/") -+ } - } - - fn fevent( -@@ -108,6 +173,23 @@ impl KernelScheme for EventScheme { - flags: EventFlags, - token: &mut CleanLockToken, - ) -> Result { -+ if is_counter_id(id) { -+ let counter = { -+ let handles = counters(token.token()); -+ let handle = handles.get(&id).ok_or(Error::new(EBADF))?; -+ handle.clone() -+ }; -+ -+ let mut ready = EventFlags::empty(); -+ if flags.contains(EventFlags::EVENT_READ) && counter.is_readable(token) { -+ ready |= EventFlags::EVENT_READ; -+ } -+ if flags.contains(EventFlags::EVENT_WRITE) && counter.is_writable(token) { -+ ready |= EventFlags::EVENT_WRITE; -+ } -+ return Ok(ready); -+ } -+ - let id = EventQueueId::from(id); - - let queue = { diff --git a/local/patches/kernel/P4-s3-suspend-resume.patch b/local/patches/kernel/P4-s3-suspend-resume.patch deleted file mode 100644 index 9b223b3137..0000000000 --- a/local/patches/kernel/P4-s3-suspend-resume.patch +++ /dev/null @@ -1,1084 +0,0 @@ -diff --git a/Cargo.toml b/Cargo.toml -index 6d4f059..e05f723 100644 ---- a/Cargo.toml -+++ b/Cargo.toml -@@ -12,6 +12,7 @@ cc = "1.0" - toml = "0.8" - - [dependencies] -+acpi_ext = { package = "acpi", git = "https://gitlab.redox-os.org/redox-os/acpi.git", branch = "redox-6.x" } - arrayvec = { version = "0.7.4", default-features = false } - bitfield = "0.13.2" - bitflags = "2" -diff --git a/build.rs b/build.rs -index 96c3ea5..751746c 100644 ---- a/build.rs -+++ b/build.rs -@@ -77,6 +77,7 @@ fn main() { - } - "x86_64" => { - println!("cargo::rerun-if-changed=src/asm/x86_64/trampoline.asm"); -+ println!("cargo::rerun-if-changed=src/asm/x86_64/s3_wakeup.asm"); - - let status = Command::new("nasm") - .arg("-f") -@@ -89,6 +90,18 @@ fn main() { - if !status.success() { - panic!("nasm failed with exit status {}", status); - } -+ -+ let status = Command::new("nasm") -+ .arg("-f") -+ .arg("bin") -+ .arg("-o") -+ .arg(format!("{}/s3_wakeup", out_dir)) -+ .arg("src/asm/x86_64/s3_wakeup.asm") -+ .status() -+ .expect("failed to run nasm"); -+ if !status.success() { -+ panic!("nasm failed with exit status {}", status); -+ } - } - "riscv64" => { - println!("cargo::rustc-cfg=dtb"); -diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs -index 59e3526..b3b80f0 100644 ---- a/src/acpi/mod.rs -+++ b/src/acpi/mod.rs -@@ -82,6 +82,14 @@ impl Rxsdt for RxsdtEnum { - - pub static RXSDT_ENUM: Once = Once::new(); - -+#[derive(Clone, Copy, Debug)] -+pub struct AcpiRootInfo { -+ pub revision: u8, -+ pub root_sdt_address: PhysicalAddress, -+} -+ -+pub static ACPI_ROOT_INFO: Once = Once::new(); -+ - /// Parse the ACPI tables to gather CPU, interrupt, and timer information - pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) { - unsafe { -@@ -94,6 +102,15 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) { - let rsdp_opt = Rsdp::get_rsdp(already_supplied_rsdp); - - if let Some(rsdp) = rsdp_opt { -+ let root_info = ACPI_ROOT_INFO.call_once(|| AcpiRootInfo { -+ revision: rsdp.revision(), -+ root_sdt_address: rsdp.sdt_address(), -+ }); -+ -+ if root_info.root_sdt_address != rsdp.sdt_address() || root_info.revision != rsdp.revision() { -+ error!("ACPI_ROOT_INFO already initialized with a different RSDP root"); -+ } -+ - debug!("SDT address: {:#x}", rsdp.sdt_address().data()); - let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock_rw()); - -diff --git a/src/acpi/rsdp.rs b/src/acpi/rsdp.rs -index f10c5ac..5e93a9f 100644 ---- a/src/acpi/rsdp.rs -+++ b/src/acpi/rsdp.rs -@@ -31,4 +31,8 @@ impl Rsdp { - self.rsdt_address as usize - }) - } -+ -+ pub fn revision(&self) -> u8 { -+ self.revision -+ } - } -diff --git a/src/arch/x86_shared/mod.rs b/src/arch/x86_shared/mod.rs -index e3c3050..11c33e9 100644 ---- a/src/arch/x86_shared/mod.rs -+++ b/src/arch/x86_shared/mod.rs -@@ -28,6 +28,8 @@ pub mod pti; - /// Initialization and start function - pub mod start; - -+pub mod sleep; -+ - /// Stop function - pub mod stop; - -diff --git a/src/scheme/acpi.rs b/src/scheme/acpi.rs -index 87570a1..5d73469 100644 ---- a/src/scheme/acpi.rs -+++ b/src/scheme/acpi.rs -@@ -10,6 +10,7 @@ use syscall::{ - - use crate::{ - acpi::{RxsdtEnum, RXSDT_ENUM}, -+ arch::sleep, - context::file::InternalFlags, - event, - sync::{CleanLockToken, RwLock, WaitCondition, L1}, -@@ -40,6 +41,7 @@ enum HandleKind { - TopLevel, - Rxsdt, - ShutdownPipe, -+ SleepControl, - SchemeRoot, - } - -@@ -146,11 +148,11 @@ impl KernelScheme for AcpiScheme { - if flags & O_EXCL == O_EXCL || flags & O_SYMLINK == O_SYMLINK { - return Err(Error::new(EINVAL)); - } -- if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { -- return Err(Error::new(EROFS)); -- } - let (handle_kind, int_flags) = match path { - "" => { -+ if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { -+ return Err(Error::new(EROFS)); -+ } - if flags & O_DIRECTORY != O_DIRECTORY && flags & O_STAT != O_STAT { - return Err(Error::new(EISDIR)); - } -@@ -158,17 +160,36 @@ impl KernelScheme for AcpiScheme { - (HandleKind::TopLevel, InternalFlags::POSITIONED) - } - "rxsdt" => { -+ if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { -+ return Err(Error::new(EROFS)); -+ } - if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { - return Err(Error::new(ENOTDIR)); - } - (HandleKind::Rxsdt, InternalFlags::POSITIONED) - } - "kstop" => { -+ if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { -+ return Err(Error::new(EROFS)); -+ } - if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { - return Err(Error::new(ENOTDIR)); - } - (HandleKind::ShutdownPipe, InternalFlags::empty()) - } -+ "sleep" => { -+ if flags & O_ACCMODE == O_RDONLY || flags & O_STAT == O_STAT { -+ // allowed -+ } else if flags & O_ACCMODE != syscall::flag::O_WRONLY -+ && flags & O_ACCMODE != syscall::flag::O_RDWR -+ { -+ return Err(Error::new(EINVAL)); -+ } -+ if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { -+ return Err(Error::new(ENOTDIR)); -+ } -+ (HandleKind::SleepControl, InternalFlags::POSITIONED) -+ } - _ => return Err(Error::new(ENOENT)), - }; - -@@ -191,6 +212,7 @@ impl KernelScheme for AcpiScheme { - Ok(match handle.kind { - HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?.len() as u64, - HandleKind::ShutdownPipe => 1, -+ HandleKind::SleepControl => sleep::available_sleep_states().len() as u64, - HandleKind::TopLevel => 0, - HandleKind::SchemeRoot => return Err(Error::new(EBADF))?, - }) -@@ -253,6 +275,7 @@ impl KernelScheme for AcpiScheme { - - return dst_buf.copy_exactly(&[0x42]).map(|()| 1); - } -+ HandleKind::SleepControl => sleep::available_sleep_states(), - HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?, - HandleKind::TopLevel => return Err(Error::new(EISDIR)), - HandleKind::SchemeRoot => return Err(Error::new(EBADF)), -@@ -295,11 +318,45 @@ impl KernelScheme for AcpiScheme { - kind: DirentKind::Socket, - name: "kstop", - inode: 0, -+ next_opaque_id: 2, -+ })?; -+ } -+ if opaque <= 2 { -+ buf.entry(DirEntry { -+ kind: DirentKind::Regular, -+ name: "sleep", -+ inode: 0, - next_opaque_id: u64::MAX, - })?; - } - Ok(buf.finalize()) - } -+ fn kwrite( -+ &self, -+ id: usize, -+ buf: crate::syscall::usercopy::UserSliceRo, -+ _flags: u32, -+ _stored_flags: u32, -+ token: &mut CleanLockToken, -+ ) -> Result { -+ let handle = *HANDLES.read(token.token()).get(id)?; -+ -+ if handle.stat { -+ return Err(Error::new(EBADF)); -+ } -+ -+ match handle.kind { -+ HandleKind::SleepControl => { -+ let mut tmp = [0_u8; 16]; -+ let len = buf.copy_common_bytes_to_slice(&mut tmp)?; -+ let request = core::str::from_utf8(&tmp[..len]).map_err(|_| Error::new(EINVAL))?; -+ sleep::trigger_sleep_request(request)?; -+ Ok(len) -+ } -+ HandleKind::SchemeRoot => Err(Error::new(EBADF)), -+ _ => Err(Error::new(EBADF)), -+ } -+ } - fn kfpath(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result { - //TODO: construct useful path? - buf.copy_common_bytes_from_slice("/scheme/kernel.acpi/".as_bytes()) -@@ -328,6 +385,11 @@ impl KernelScheme for AcpiScheme { - st_size: 1, - ..Default::default() - }, -+ HandleKind::SleepControl => Stat { -+ st_mode: MODE_FILE, -+ st_size: sleep::available_sleep_states().len().try_into().unwrap_or(u64::MAX), -+ ..Default::default() -+ }, - HandleKind::SchemeRoot => return Err(Error::new(EBADF)), - })?; - -diff --git a/src/arch/x86_shared/sleep.rs b/src/arch/x86_shared/sleep.rs -new file mode 100644 -index 0000000..9f98c0d ---- /dev/null -+++ b/src/arch/x86_shared/sleep.rs -@@ -0,0 +1,712 @@ -+use alloc::{sync::Arc, vec::Vec}; -+use core::{ -+ ptr::NonNull, -+ str::FromStr, -+ sync::atomic::{AtomicU32, Ordering}, -+}; -+ -+use acpi_ext::{ -+ aml::{namespace::AmlName, object::Object, Interpreter}, -+ registers::FixedRegisters, -+ sdt::{facs::Facs, fadt::Fadt, SdtHeader}, -+ AcpiTables, Handle, Handler, PhysicalMapping, -+}; -+use spin::Mutex; -+use syscall::error::{Error, EINVAL, EIO}; -+use x86::{segmentation::SegmentSelector, task, Ring}; -+ -+use crate::{ -+ acpi::ACPI_ROOT_INFO, -+ arch::interrupt, -+ memory::{ -+ round_down_pages, round_up_pages, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, -+ RmmArch, VirtualAddress, PAGE_SIZE, -+ }, -+ syscall::io::{Io, Pio}, -+}; -+ -+const ACPI_SLP_TYP_SHIFT: u16 = 10; -+const ACPI_SLP_TYP_MASK: u16 = 0x1C00; -+const ACPI_SLP_EN: u16 = 1 << 13; -+const WAKE_TRAMPOLINE_PHYS: usize = 0x8000; -+const SLEEP_RETURN_OK: usize = 0; -+ -+#[cfg(target_arch = "x86_64")] -+static WAKE_TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/s3_wakeup")); -+ -+#[repr(C, packed)] -+#[derive(Clone, Copy, Debug, Default)] -+struct DescriptorTableRegister { -+ limit: u16, -+ base: u64, -+} -+ -+#[repr(C, align(64))] -+#[derive(Clone, Copy, Debug)] -+struct FpuState { -+ bytes: [u8; 4096], -+} -+ -+impl Default for FpuState { -+ fn default() -> Self { -+ Self { bytes: [0; 4096] } -+ } -+} -+ -+#[derive(Clone, Copy, Debug, Eq, PartialEq)] -+pub enum SleepState { -+ S3, -+ S5, -+} -+ -+#[derive(Clone, Copy, Debug, Eq, PartialEq)] -+pub enum SleepError { -+ UnsupportedArch, -+ MissingAcpi, -+ MissingFadt, -+ MissingFacs, -+ MissingSleepObject, -+ InvalidSleepObject, -+ UnsupportedPmControl, -+ UnsupportedAmlOperation, -+ SleepDidNotEnter, -+} -+ -+impl SleepError { -+ fn code(self) -> usize { -+ match self { -+ Self::UnsupportedArch => EINVAL as usize, -+ Self::MissingAcpi -+ | Self::MissingFadt -+ | Self::MissingFacs -+ | Self::MissingSleepObject -+ | Self::UnsupportedAmlOperation => EIO as usize, -+ Self::InvalidSleepObject | Self::UnsupportedPmControl | Self::SleepDidNotEnter => { -+ EINVAL as usize -+ } -+ } -+ } -+ -+ fn from_code(code: usize) -> Self { -+ match code as i32 { -+ x if x == EINVAL => Self::InvalidSleepObject, -+ _ => Self::MissingAcpi, -+ } -+ } -+} -+ -+#[derive(Clone, Copy, Debug, Default)] -+struct SavedCpuContext { -+ entry_rsp: usize, -+ runtime_rsp: usize, -+ facs_address: usize, -+ cr0: usize, -+ cr2: usize, -+ cr3: usize, -+ cr4: usize, -+ rflags: usize, -+ gdtr: DescriptorTableRegister, -+ idtr: DescriptorTableRegister, -+ efer: u64, -+ fs_base: u64, -+ gs_base: u64, -+ kernel_gs_base: u64, -+ fpu: FpuState, -+} -+ -+static SAVED_CONTEXT: Mutex> = Mutex::new(None); -+static AML_MUTEX_IDS: AtomicU32 = AtomicU32::new(1); -+ -+#[derive(Clone, Copy, Debug)] -+struct SleepTypeData { -+ a: u16, -+ b: u16, -+} -+ -+#[derive(Clone, Copy)] -+struct KernelAcpiHandler; -+ -+impl KernelAcpiHandler { -+ fn map_range(physical_address: usize, size: usize) -> (*mut u8, usize) { -+ let map_base = round_down_pages(physical_address); -+ let map_offset = physical_address - map_base; -+ let mapped_length = round_up_pages(size + map_offset); -+ -+ // SAFETY: The ACPI interpreter only requests firmware-described physical regions. -+ unsafe { -+ let mut mapper = KernelMapper::lock_rw(); -+ for page_index in 0..mapped_length / PAGE_SIZE { -+ let (_, flush) = mapper -+ .map_linearly( -+ PhysicalAddress::new(map_base + page_index * PAGE_SIZE), -+ PageFlags::new(), -+ ) -+ .expect("failed to linearly map ACPI physical region"); -+ flush.flush(); -+ } -+ } -+ -+ let virtual_base = RmmA::phys_to_virt(PhysicalAddress::new(map_base)).data(); -+ ((virtual_base + map_offset) as *mut u8, mapped_length) -+ } -+} -+ -+impl Handler for KernelAcpiHandler { -+ unsafe fn map_physical_region(&self, physical_address: usize, size: usize) -> PhysicalMapping { -+ let (virtual_start, mapped_length) = Self::map_range(physical_address, size); -+ PhysicalMapping { -+ physical_start: physical_address, -+ virtual_start: NonNull::new(virtual_start.cast::()) -+ .expect("expected mapped ACPI virtual address to be non-null"), -+ region_length: size, -+ mapped_length, -+ handler: *self, -+ } -+ } -+ -+ fn unmap_physical_region(_region: &PhysicalMapping) {} -+ -+ fn read_u8(&self, address: usize) -> u8 { -+ // SAFETY: AML system-memory accesses are byte-addressable firmware regions. -+ unsafe { core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u8) } -+ } -+ -+ fn read_u16(&self, address: usize) -> u16 { -+ // SAFETY: AML system-memory accesses are word-addressable firmware regions. -+ unsafe { -+ core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u16) -+ } -+ } -+ -+ fn read_u32(&self, address: usize) -> u32 { -+ // SAFETY: AML system-memory accesses are dword-addressable firmware regions. -+ unsafe { -+ core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u32) -+ } -+ } -+ -+ fn read_u64(&self, address: usize) -> u64 { -+ // SAFETY: AML system-memory accesses are qword-addressable firmware regions. -+ unsafe { -+ core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u64) -+ } -+ } -+ -+ fn write_u8(&self, address: usize, value: u8) { -+ // SAFETY: AML system-memory accesses are byte-addressable firmware regions. -+ unsafe { -+ core::ptr::write_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u8, value) -+ } -+ } -+ -+ fn write_u16(&self, address: usize, value: u16) { -+ // SAFETY: AML system-memory accesses are word-addressable firmware regions. -+ unsafe { -+ core::ptr::write_volatile( -+ RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u16, -+ value, -+ ) -+ } -+ } -+ -+ fn write_u32(&self, address: usize, value: u32) { -+ // SAFETY: AML system-memory accesses are dword-addressable firmware regions. -+ unsafe { -+ core::ptr::write_volatile( -+ RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u32, -+ value, -+ ) -+ } -+ } -+ -+ fn write_u64(&self, address: usize, value: u64) { -+ // SAFETY: AML system-memory accesses are qword-addressable firmware regions. -+ unsafe { -+ core::ptr::write_volatile( -+ RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u64, -+ value, -+ ) -+ } -+ } -+ -+ fn read_io_u8(&self, port: u16) -> u8 { -+ Pio::::new(port).read() -+ } -+ -+ fn read_io_u16(&self, port: u16) -> u16 { -+ Pio::::new(port).read() -+ } -+ -+ fn read_io_u32(&self, port: u16) -> u32 { -+ Pio::::new(port).read() -+ } -+ -+ fn write_io_u8(&self, port: u16, value: u8) { -+ Pio::::new(port).write(value) -+ } -+ -+ fn write_io_u16(&self, port: u16, value: u16) { -+ Pio::::new(port).write(value) -+ } -+ -+ fn write_io_u32(&self, port: u16, value: u32) { -+ Pio::::new(port).write(value) -+ } -+ -+ fn read_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u8 { -+ 0 -+ } -+ -+ fn read_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u16 { -+ 0 -+ } -+ -+ fn read_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u32 { -+ 0 -+ } -+ -+ fn write_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u8) {} -+ -+ fn write_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u16) {} -+ -+ fn write_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u32) {} -+ -+ fn nanos_since_boot(&self) -> u64 { -+ 0 -+ } -+ -+ fn stall(&self, microseconds: u64) { -+ for _ in 0..(microseconds.saturating_mul(64)) { -+ core::hint::spin_loop(); -+ } -+ } -+ -+ fn sleep(&self, milliseconds: u64) { -+ for _ in 0..(milliseconds.saturating_mul(64_000)) { -+ core::hint::spin_loop(); -+ } -+ } -+ -+ fn create_mutex(&self) -> Handle { -+ Handle(AML_MUTEX_IDS.fetch_add(1, Ordering::Relaxed)) -+ } -+ -+ fn acquire(&self, _mutex: Handle, _timeout: u16) -> Result<(), acpi_ext::aml::AmlError> { -+ Ok(()) -+ } -+ -+ fn release(&self, _mutex: Handle) {} -+} -+ -+fn sleep_state_name(state: SleepState) -> &'static str { -+ match state { -+ SleepState::S3 => "\\_S3", -+ SleepState::S5 => "\\_S5", -+ } -+} -+ -+fn encode_sleep_type(value: u16) -> u16 { -+ if value <= 0x7 { -+ value << ACPI_SLP_TYP_SHIFT -+ } else { -+ value & ACPI_SLP_TYP_MASK -+ } -+} -+ -+fn load_interpreter() -> Result<( -+ Arc>, -+ PhysicalMapping, -+ Interpreter, -+), SleepError> { -+ let root = *ACPI_ROOT_INFO.get().ok_or(SleepError::MissingAcpi)?; -+ let handler = KernelAcpiHandler; -+ -+ // SAFETY: ACPI root info is captured from the firmware-provided, already validated root table. -+ let tables = unsafe { -+ AcpiTables::from_rsdt(handler, root.revision, root.root_sdt_address.data()) -+ .map_err(|_| SleepError::MissingAcpi)? -+ }; -+ let fadt = tables.find_table::().ok_or(SleepError::MissingFadt)?; -+ let registers = Arc::new( -+ FixedRegisters::new(&fadt, handler).map_err(|_| SleepError::UnsupportedPmControl)?, -+ ); -+ let facs_address = fadt.facs_address().map_err(|_| SleepError::MissingFacs)?; -+ -+ // SAFETY: The FADT-supplied FACS address is used exactly as described by the ACPI spec. -+ let facs = unsafe { handler.map_physical_region::(facs_address, core::mem::size_of::()) }; -+ // SAFETY: The AML interpreter only needs an owned mapping of the same firmware FACS table. -+ let interpreter_facs = unsafe { -+ handler.map_physical_region::(facs_address, core::mem::size_of::()) -+ }; -+ let dsdt = tables.dsdt().map_err(|_| SleepError::MissingFadt)?; -+ let interpreter = Interpreter::new(handler, dsdt.revision, Arc::clone(®isters), Some(interpreter_facs)); -+ -+ // SAFETY: Each AML table mapping is owned by the interpreter during table loading. -+ unsafe { -+ let mapping = handler.map_physical_region::(dsdt.phys_address, dsdt.length as usize); -+ let stream = core::slice::from_raw_parts( -+ mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::()) as *const u8, -+ dsdt.length as usize - core::mem::size_of::(), -+ ); -+ interpreter -+ .load_table(stream) -+ .map_err(|_| SleepError::UnsupportedAmlOperation)?; -+ -+ for ssdt in tables.ssdts() { -+ let mapping = handler.map_physical_region::(ssdt.phys_address, ssdt.length as usize); -+ let stream = core::slice::from_raw_parts( -+ mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::()) as *const u8, -+ ssdt.length as usize - core::mem::size_of::(), -+ ); -+ interpreter -+ .load_table(stream) -+ .map_err(|_| SleepError::UnsupportedAmlOperation)?; -+ } -+ } -+ -+ Ok((registers, facs, interpreter)) -+} -+ -+fn sleep_type_data_from_interpreter( -+ interpreter: &Interpreter, -+ state: SleepState, -+) -> Result { -+ let name = AmlName::from_str(sleep_state_name(state)).map_err(|_| SleepError::MissingSleepObject)?; -+ let object = interpreter -+ .evaluate(name, Vec::new()) -+ .map_err(|_| SleepError::MissingSleepObject)?; -+ -+ let Object::Package(package) = &*object else { -+ return Err(SleepError::InvalidSleepObject); -+ }; -+ -+ let Some(typa_object) = package.first() else { -+ return Err(SleepError::InvalidSleepObject); -+ }; -+ let Some(typb_object) = package.get(1) else { -+ return Err(SleepError::InvalidSleepObject); -+ }; -+ -+ let Object::Integer(typa) = &**typa_object else { -+ return Err(SleepError::InvalidSleepObject); -+ }; -+ let Object::Integer(typb) = &**typb_object else { -+ return Err(SleepError::InvalidSleepObject); -+ }; -+ -+ Ok(SleepTypeData { -+ a: encode_sleep_type(*typa as u16), -+ b: encode_sleep_type(*typb as u16), -+ }) -+} -+ -+fn sleep_type_data(state: SleepState) -> Result { -+ let (_registers, _facs, interpreter) = load_interpreter()?; -+ sleep_type_data_from_interpreter(&interpreter, state) -+} -+ -+fn install_wake_trampoline(stack_rsp: usize, cr3: usize) { -+ let trampoline_page = Page::containing_address(VirtualAddress::new(WAKE_TRAMPOLINE_PHYS)); -+ let trampoline_frame = PhysicalAddress::new(WAKE_TRAMPOLINE_PHYS); -+ -+ // SAFETY: The 0x8000 low-memory trampoline page is reserved by the kernel for bootstrap stubs. -+ let (result, _) = unsafe { -+ let mut mapper = KernelMapper::lock_rw(); -+ let result = mapper -+ .map_phys( -+ trampoline_page.start_address(), -+ trampoline_frame, -+ PageFlags::new().execute(true).write(true), -+ ) -+ .expect("failed to map S3 wake trampoline page"); -+ (result, mapper.table().phys().data()) -+ }; -+ result.flush(); -+ -+ for (index, value) in WAKE_TRAMPOLINE_DATA.iter().enumerate() { -+ // SAFETY: The trampoline page is mapped writable at the same virtual address as the physical page. -+ unsafe { -+ core::ptr::write_volatile((WAKE_TRAMPOLINE_PHYS as *mut u8).add(index), *value); -+ } -+ } -+ -+ // SAFETY: The wake trampoline layout reserves three qword fields immediately after the jump. -+ unsafe { -+ let stack_slot = (WAKE_TRAMPOLINE_PHYS + 8) as *mut u64; -+ let page_table_slot = stack_slot.add(1); -+ let code_slot = stack_slot.add(2); -+ stack_slot.write(stack_rsp as u64); -+ page_table_slot.write(cr3 as u64); -+ #[expect(clippy::fn_to_numeric_cast)] -+ code_slot.write(resume_from_s3_trampoline as usize as u64); -+ } -+ -+ // SAFETY: The trampoline mapping is no longer needed once the physical page has been populated. -+ let (_frame, _, flush) = unsafe { -+ KernelMapper::lock_rw() -+ .unmap_phys(trampoline_page.start_address()) -+ .expect("failed to unmap S3 wake trampoline page") -+ }; -+ flush.flush(); -+} -+ -+fn save_descriptor_tables(context: &mut SavedCpuContext) { -+ // SAFETY: SGDT/SIDT only read the current CPU descriptor-table registers into the provided storage. -+ unsafe { -+ core::arch::asm!("sgdt [{}]", in(reg) &mut context.gdtr, options(nostack, preserves_flags)); -+ core::arch::asm!("sidt [{}]", in(reg) &mut context.idtr, options(nostack, preserves_flags)); -+ } -+} -+ -+fn save_fpu_state(context: &mut SavedCpuContext) { -+ // SAFETY: The kernel owns the current CPU at suspend entry and the FXSAVE buffer is 64-byte aligned. -+ unsafe { -+ core::arch::asm!( -+ "fxsave64 [{}]", -+ in(reg) context.fpu.bytes.as_mut_ptr(), -+ ); -+ } -+} -+ -+fn restore_fpu_state(context: &SavedCpuContext) { -+ // SAFETY: The saved FXSAVE image belongs to the same CPU context and matches the restore instruction. -+ unsafe { -+ core::arch::asm!( -+ "fxrstor64 [{}]", -+ in(reg) context.fpu.bytes.as_ptr(), -+ ); -+ } -+} -+ -+fn save_cpu_context(entry_rsp: usize) -> SavedCpuContext { -+ let mut context = SavedCpuContext { -+ entry_rsp, -+ ..SavedCpuContext::default() -+ }; -+ -+ // SAFETY: Reading control registers and MSRs is required to reconstruct the CPU execution state on wake. -+ unsafe { -+ core::arch::asm!( -+ "mov {}, cr0", -+ out(reg) context.cr0, -+ options(nostack, preserves_flags) -+ ); -+ core::arch::asm!( -+ "mov {}, cr2", -+ out(reg) context.cr2, -+ options(nostack, preserves_flags) -+ ); -+ core::arch::asm!( -+ "mov {}, cr3", -+ out(reg) context.cr3, -+ options(nostack, preserves_flags) -+ ); -+ core::arch::asm!( -+ "mov {}, cr4", -+ out(reg) context.cr4, -+ options(nostack, preserves_flags) -+ ); -+ core::arch::asm!( -+ "pushfq", -+ "pop {}", -+ out(reg) context.rflags, -+ options(preserves_flags) -+ ); -+ core::arch::asm!("mov {}, rsp", out(reg) context.runtime_rsp, options(nostack, preserves_flags)); -+ -+ context.efer = x86::msr::rdmsr(x86::msr::IA32_EFER); -+ context.fs_base = x86::msr::rdmsr(x86::msr::IA32_FS_BASE); -+ context.gs_base = x86::msr::rdmsr(x86::msr::IA32_GS_BASE); -+ context.kernel_gs_base = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE); -+ } -+ -+ save_descriptor_tables(&mut context); -+ save_fpu_state(&mut context); -+ context -+} -+ -+fn set_firmware_waking_vector(facs: &mut PhysicalMapping, vector: usize) { -+ facs.firmware_waking_vector = vector as u32; -+ facs.x_firmware_waking_vector = vector as u64; -+} -+ -+fn write_pm1_control_block( -+ registers: &FixedRegisters, -+ sleep_type: SleepTypeData, -+) -> Result<(), SleepError> { -+ let current_a = registers -+ .pm1_control_registers -+ .pm1a -+ .read() -+ .map_err(|_| SleepError::UnsupportedPmControl)? as u16; -+ let armed_a = (current_a & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.a; -+ -+ registers -+ .pm1_control_registers -+ .pm1a -+ .write(u64::from(armed_a)) -+ .map_err(|_| SleepError::UnsupportedPmControl)?; -+ -+ if let Some(pm1b) = ®isters.pm1_control_registers.pm1b { -+ let current_b = pm1b.read().map_err(|_| SleepError::UnsupportedPmControl)? as u16; -+ let armed_b = (current_b & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.b; -+ pm1b.write(u64::from(armed_b)) -+ .map_err(|_| SleepError::UnsupportedPmControl)?; -+ pm1b.write(u64::from(armed_b | ACPI_SLP_EN)) -+ .map_err(|_| SleepError::UnsupportedPmControl)?; -+ } -+ -+ // SAFETY: WBINVD is required here to flush dirty cache lines before firmware powers down the CPU package. -+ unsafe { -+ core::arch::asm!("wbinvd", options(nostack, preserves_flags)); -+ } -+ -+ registers -+ .pm1_control_registers -+ .pm1a -+ .write(u64::from(armed_a | ACPI_SLP_EN)) -+ .map_err(|_| SleepError::UnsupportedPmControl)?; -+ -+ Ok(()) -+} -+ -+#[unsafe(naked)] -+unsafe extern "sysv64" fn enter_sleep_raw(state: usize) -> usize { -+ core::arch::naked_asm!( -+ "mov rsi, rsp", -+ "jmp {inner}", -+ inner = sym enter_sleep_raw_inner, -+ ); -+} -+ -+extern "C" fn enter_sleep_raw_inner(state: usize, entry_rsp: usize) -> usize { -+ let state = match state { -+ 3 => SleepState::S3, -+ 5 => SleepState::S5, -+ _ => return SleepError::InvalidSleepObject.code(), -+ }; -+ -+ let (registers, mut facs, interpreter) = match load_interpreter() { -+ Ok(tuple) => tuple, -+ Err(error) => return error.code(), -+ }; -+ let sleep_type = match sleep_type_data_from_interpreter(&interpreter, state) { -+ Ok(data) => data, -+ Err(error) => return error.code(), -+ }; -+ -+ let mut context = save_cpu_context(entry_rsp); -+ context.facs_address = facs.physical_start; -+ install_wake_trampoline(context.runtime_rsp, context.cr3); -+ set_firmware_waking_vector(&mut facs, WAKE_TRAMPOLINE_PHYS); -+ -+ { -+ let mut saved = SAVED_CONTEXT.lock(); -+ *saved = Some(context); -+ } -+ -+ // SAFETY: Suspend entry must not be interrupted while the wake vector and PM1 control block are being armed. -+ unsafe { -+ interrupt::disable(); -+ } -+ -+ if let Err(error) = write_pm1_control_block(registers.as_ref(), sleep_type) { -+ return error.code(); -+ } -+ -+ // SAFETY: The final CLI+HLT sequence is the architectural handoff point after asserting SLP_EN. -+ unsafe { -+ core::arch::asm!("cli; hlt", options(nostack)); -+ } -+ -+ SleepError::SleepDidNotEnter.code() -+} -+ -+extern "C" fn resume_from_s3_trampoline() -> ! { -+ let mut saved = SAVED_CONTEXT.lock(); -+ let context = saved.take().expect("S3 wake trampoline resumed without saved CPU context"); -+ drop(saved); -+ -+ // SAFETY: The saved FACS physical address was captured from the validated FADT during suspend entry. -+ if context.facs_address != 0 { -+ let mut facs = unsafe { -+ KernelAcpiHandler.map_physical_region::( -+ context.facs_address, -+ core::mem::size_of::(), -+ ) -+ }; -+ set_firmware_waking_vector(&mut facs, 0); -+ } -+ -+ // SAFETY: The wake trampoline already switched to the saved kernel CR3 and long mode, so the remaining restores are architectural register state only. -+ unsafe { -+ x86::msr::wrmsr(x86::msr::IA32_EFER, context.efer); -+ core::arch::asm!("mov cr3, {}", in(reg) context.cr3, options(nostack)); -+ core::arch::asm!("mov cr4, {}", in(reg) context.cr4, options(nostack)); -+ core::arch::asm!("mov cr2, {}", in(reg) context.cr2, options(nostack)); -+ core::arch::asm!("mov cr0, {}", in(reg) context.cr0, options(nostack)); -+ core::arch::asm!("lgdt [{}]", in(reg) &context.gdtr, options(nostack)); -+ core::arch::asm!("lidt [{}]", in(reg) &context.idtr, options(nostack)); -+ -+ task::load_tr(SegmentSelector::new(crate::arch::gdt::GDT_TSS as u16, Ring::Ring0)); -+ -+ x86::msr::wrmsr(x86::msr::IA32_FS_BASE, context.fs_base); -+ x86::msr::wrmsr(x86::msr::IA32_GS_BASE, context.gs_base); -+ x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, context.kernel_gs_base); -+ } -+ -+ restore_fpu_state(&context); -+ -+ // SAFETY: Returning with the original entry stack and RFLAGS completes the suspend call as a successful function return. -+ unsafe { -+ core::arch::asm!( -+ "mov rsp, {entry_rsp}", -+ "push {rflags}", -+ "popfq", -+ "xor eax, eax", -+ "ret", -+ entry_rsp = in(reg) context.entry_rsp, -+ rflags = in(reg) context.rflags, -+ options(noreturn) -+ ); -+ } -+} -+ -+pub fn enter_sleep_state(state: SleepState) -> core::result::Result<(), SleepError> { -+ #[cfg(not(target_arch = "x86_64"))] -+ { -+ let _ = state; -+ return Err(SleepError::UnsupportedArch); -+ } -+ -+ #[cfg(target_arch = "x86_64")] -+ { -+ let raw = unsafe { -+ enter_sleep_raw(match state { -+ SleepState::S3 => 3, -+ SleepState::S5 => 5, -+ }) -+ }; -+ if raw == SLEEP_RETURN_OK { -+ Ok(()) -+ } else { -+ Err(SleepError::from_code(raw)) -+ } -+ } -+} -+ -+pub fn available_sleep_states() -> &'static [u8] { -+ if sleep_type_data(SleepState::S3).is_ok() { -+ b"S3\nS5\n" -+ } else { -+ b"S5\n" -+ } -+} -+ -+pub fn trigger_sleep_request(request: &str) -> Result<(), Error> { -+ match request.trim() { -+ "S3" => enter_sleep_state(SleepState::S3).map_err(|_| Error::new(EIO)), -+ "S5" => enter_sleep_state(SleepState::S5).map_err(|_| Error::new(EIO)), -+ _ => Err(Error::new(EINVAL)), -+ } -+} -diff --git a/src/asm/x86_64/s3_wakeup.asm b/src/asm/x86_64/s3_wakeup.asm -new file mode 100644 -index 0000000..7beeccf ---- /dev/null -+++ b/src/asm/x86_64/s3_wakeup.asm -@@ -0,0 +1,110 @@ -+; ACPI S3 wake trampoline -+; compiled with nasm by build.rs, copied to physical 0x8000 before S3 entry -+ -+ORG 0x8000 -+SECTION .text -+USE16 -+ -+trampoline: -+ jmp short startup_wake -+ times 8 - ($ - trampoline) nop -+ .stack: dq 0 -+ .page_table: dq 0 -+ .code: dq 0 -+ -+startup_wake: -+ cli -+ -+ xor ax, ax -+ mov ds, ax -+ mov es, ax -+ mov ss, ax -+ mov sp, 0 -+ -+ mov edi, [trampoline.page_table] -+ mov cr3, edi -+ -+ mov eax, cr0 -+ and al, 11110011b -+ or al, 00100010b -+ mov cr0, eax -+ -+ mov eax, cr4 -+ or eax, 1 << 9 | 1 << 7 | 1 << 5 | 1 << 4 -+ mov cr4, eax -+ -+ fninit -+ -+ lgdt [gdtr] -+ -+ mov ecx, 0xC0000080 -+ rdmsr -+ or eax, 1 << 11 | 1 << 8 -+ wrmsr -+ -+ mov ebx, cr0 -+ or ebx, 1 << 31 | 1 << 16 | 1 -+ mov cr0, ebx -+ -+ jmp gdt.kernel_code:long_mode_wake -+ -+USE64 -+long_mode_wake: -+ mov rax, gdt.kernel_data -+ mov ds, rax -+ mov es, rax -+ mov fs, rax -+ mov gs, rax -+ mov ss, rax -+ -+ mov rsp, [trampoline.stack] -+ mov rax, [trampoline.code] -+ jmp rax -+ -+struc GDTEntry -+ .limitl resw 1 -+ .basel resw 1 -+ .basem resb 1 -+ .attribute resb 1 -+ .flags__limith resb 1 -+ .baseh resb 1 -+endstruc -+ -+attrib: -+ .present equ 1 << 7 -+ .user equ 1 << 4 -+ .code equ 1 << 3 -+ .writable equ 1 << 1 -+ -+flags: -+ .long_mode equ 1 << 5 -+ -+gdtr: -+ dw gdt.end + 1 -+ dq gdt -+ -+gdt: -+.null equ $ - gdt -+ dq 0 -+ -+.kernel_code equ $ - gdt -+istruc GDTEntry -+ at GDTEntry.limitl, dw 0 -+ at GDTEntry.basel, dw 0 -+ at GDTEntry.basem, db 0 -+ at GDTEntry.attribute, db attrib.present | attrib.user | attrib.code -+ at GDTEntry.flags__limith, db flags.long_mode -+ at GDTEntry.baseh, db 0 -+iend -+ -+.kernel_data equ $ - gdt -+istruc GDTEntry -+ at GDTEntry.limitl, dw 0 -+ at GDTEntry.basel, dw 0 -+ at GDTEntry.basem, db 0 -+ at GDTEntry.attribute, db attrib.present | attrib.user | attrib.writable -+ at GDTEntry.flags__limith, db 0 -+ at GDTEntry.baseh, db 0 -+iend -+ -+.end equ $ - gdt diff --git a/local/patches/kernel/P4-scheme-failure-modes.patch b/local/patches/kernel/P4-scheme-failure-modes.patch deleted file mode 100644 index c3453b8696..0000000000 --- a/local/patches/kernel/P4-scheme-failure-modes.patch +++ /dev/null @@ -1,913 +0,0 @@ -diff --git a/src/context/file.rs b/src/context/file.rs -index 2d3790f..150f483 100644 ---- a/src/context/file.rs -+++ b/src/context/file.rs -@@ -4,7 +4,7 @@ use crate::{ - event, - scheme::{self, SchemeId}, - sync::{CleanLockToken, RwLock, L6}, -- syscall::error::Result, -+ syscall::error::{Error, Result, ESTALE}, - }; - use alloc::sync::Arc; - use syscall::{schemev2::NewFdFlags, RwFlags, O_APPEND, O_NONBLOCK}; -@@ -18,6 +18,7 @@ pub struct FileDescription { - pub offset: u64, - /// The scheme that this file refers to - pub scheme: SchemeId, -+ pub scheme_generation: Option, - /// The number the scheme uses to refer to this file - pub number: usize, - /// The flags passed to open or fcntl(SETFL) -@@ -32,6 +33,52 @@ bitflags! { - } - } - impl FileDescription { -+ pub fn with_generation( -+ scheme: SchemeId, -+ scheme_generation: Option, -+ number: usize, -+ offset: u64, -+ flags: u32, -+ internal_flags: InternalFlags, -+ ) -> Self { -+ Self { -+ offset, -+ scheme, -+ scheme_generation, -+ number, -+ flags, -+ internal_flags, -+ } -+ } -+ -+ pub fn new( -+ scheme: SchemeId, -+ number: usize, -+ offset: u64, -+ flags: u32, -+ internal_flags: InternalFlags, -+ token: &mut CleanLockToken, -+ ) -> Self { -+ Self::with_generation( -+ scheme, -+ Some(scheme::current_scheme_generation(token.token(), scheme)), -+ number, -+ offset, -+ flags, -+ internal_flags, -+ ) -+ } -+ -+ pub fn get_scheme(&self, token: &mut CleanLockToken) -> Result { -+ if let Some(expected_generation) = self.scheme_generation -+ && expected_generation != scheme::current_scheme_generation(token.token(), self.scheme) -+ { -+ return Err(Error::new(ESTALE)); -+ } -+ -+ scheme::get_scheme(token.token(), self.scheme) -+ } -+ - pub fn rw_flags(&self, rw: RwFlags) -> u32 { - let mut ret = self.flags & !(O_NONBLOCK | O_APPEND) as u32; - if rw.contains(RwFlags::APPEND) { -@@ -76,7 +123,7 @@ impl FileDescription { - pub fn try_close(self, token: &mut CleanLockToken) -> Result<()> { - event::unregister_file(self.scheme, self.number, token); - -- let scheme = scheme::get_scheme(token.token(), self.scheme)?; -+ let scheme = self.get_scheme(token)?; - - scheme.close(self.number, token) - } -@@ -85,12 +132,12 @@ impl FileDescription { - impl FileDescriptor { - pub fn close(self, token: &mut CleanLockToken) -> Result<()> { - { -- let (scheme_id, number, internal_flags) = { -+ let (desc, number, internal_flags) = { - let desc = self.description.read(token.token()); -- (desc.scheme, desc.number, desc.internal_flags) -+ (*desc, desc.number, desc.internal_flags) - }; - if internal_flags.contains(InternalFlags::NOTIFY_ON_NEXT_DETACH) { -- let scheme = scheme::get_scheme(token.token(), scheme_id)?; -+ let scheme = desc.get_scheme(token)?; - scheme.detach(number, token)?; - } - } -diff --git a/src/context/memory.rs b/src/context/memory.rs -index 93446ba..a862b35 100644 ---- a/src/context/memory.rs -+++ b/src/context/memory.rs -@@ -64,14 +64,13 @@ impl UnmapResult { - return Ok(()); - }; - -- let (scheme_id, number) = { -- let desc = description.write(token.token()); -- (desc.scheme, desc.number) -+ let (scheme, number) = { -+ let desc = *description.read(token.token()); -+ (desc.get_scheme(token)?, desc.number) - }; - -- let scheme_opt = scheme::get_scheme(token.token(), scheme_id); -- let funmap_result = scheme_opt -- .and_then(|scheme| scheme.kfunmap(number, base_offset, self.size, self.flags, token)); -+ let funmap_result = scheme -+ .kfunmap(number, base_offset, self.size, self.flags, token); - - if let Ok(fd) = Arc::try_unwrap(description) { - fd.into_inner().try_close(token)?; -@@ -2687,20 +2686,13 @@ fn correct_inner<'l>( - // XXX: This is cheating, but guaranteed we won't deadlock because we've dropped addr_space_guard - let mut token = unsafe { CleanLockToken::new() }; - -- let (scheme_id, scheme_number) = { -- let desc = &file_ref.description.read(token.token()); -- (desc.scheme, desc.number) -+ let desc = *file_ref.description.read(token.token()); -+ let scheme = desc.get_scheme(&mut token).map_err(|_| PfError::Segv)?; -+ let scheme_number = desc.number; -+ let user_inner = match scheme { -+ KernelSchemes::User(user) => user.inner, -+ _ => return Err(PfError::Segv), - }; -- let user_inner = scheme::get_scheme(token.token(), scheme_id) -- .ok() -- .and_then(|s| { -- if let KernelSchemes::User(user) = s { -- Some(user.inner) -- } else { -- None -- } -- }) -- .ok_or(PfError::Segv)?; - - let offset = file_ref.base_offset as u64 + (pages_from_grant_start * PAGE_SIZE) as u64; - user_inner -diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs -index d30272c..765e547 100644 ---- a/src/scheme/mod.rs -+++ b/src/scheme/mod.rs -@@ -14,7 +14,7 @@ use alloc::{ - }; - use core::{ - str, -- sync::atomic::{AtomicUsize, Ordering}, -+ sync::atomic::{AtomicU64, AtomicUsize, Ordering}, - }; - use hashbrown::hash_map::{self, DefaultHashBuilder, HashMap}; - use spin::Once; -@@ -169,6 +169,7 @@ enum Handle { - - /// Schemes list - static HANDLES: Once>> = Once::new(); -+static SCHEME_GENERATIONS: Once>> = Once::new(); - static SCHEME_LIST_NEXT_ID: AtomicUsize = AtomicUsize::new(MAX_GLOBAL_SCHEMES); - static SCHEME_LIST_ID: AtomicUsize = AtomicUsize::new(0); - -@@ -204,6 +205,10 @@ fn init_schemes() -> RwLock> { - RwLock::new(handles) - } - -+fn init_scheme_generations() -> RwLock> { -+ RwLock::new(HashMap::new()) -+} -+ - /// Get a handle to a scheme. - pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result { - match handles().read(token).get(&scheme_id) { -@@ -212,10 +217,33 @@ pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result, scheme_id: SchemeId) -> u64 { -+ scheme_generations() -+ .read(token) -+ .get(&scheme_id) -+ .map(|generation| generation.load(Ordering::Acquire)) -+ .unwrap_or(0) -+} -+ - fn handles<'a>() -> &'a RwLock> { - HANDLES.call_once(init_schemes) - } - -+fn scheme_generations<'a>() -> &'a RwLock> { -+ SCHEME_GENERATIONS.call_once(init_scheme_generations) -+} -+ -+fn increment_scheme_generation(scheme_id: SchemeId, token: &mut CleanLockToken) { -+ match scheme_generations().write(token.token()).entry(scheme_id) { -+ hash_map::Entry::Occupied(entry) => { -+ entry.get().fetch_add(1, Ordering::AcqRel); -+ } -+ hash_map::Entry::Vacant(entry) => { -+ entry.insert(AtomicU64::new(1)); -+ } -+ } -+} -+ - /// Scheme list type - pub struct SchemeList; - -@@ -260,9 +288,14 @@ impl SchemeList { - - /// Remove a scheme - fn remove(&self, id: usize, token: &mut CleanLockToken) { -- let scheme = handles().write(token.token()).remove(&SchemeId(id)); -+ let scheme_id = SchemeId(id); -+ let scheme = handles().write(token.token()).remove(&scheme_id); - - assert!(scheme.is_some()); -+ if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme.as_ref() { -+ user.inner.fail_pending_calls(token); -+ } -+ increment_scheme_generation(scheme_id, token); - if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme - && let Some(user) = Arc::into_inner(user.inner) - { -@@ -287,32 +320,32 @@ impl KernelScheme for SchemeList { - token: &mut CleanLockToken, - ) -> Result { - let scheme_id = SchemeId(scheme_id); -- match handles() -- .read(token.token()) -- .get(&scheme_id) -- .ok_or(Error::new(EBADF))? -- { -- Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => { -- let inner = inner.clone(); -- assert!(scheme_id == inner.scheme_id); -- let scheme = scheme_id; -- let params = unsafe { user_buf.read_exact::()? }; -- -- return Ok(OpenResult::External(Arc::new(RwLock::new( -- FileDescription { -- scheme, -- number: params.number, -- offset: params.offset, -- flags: params.flags as u32, -- internal_flags: InternalFlags::from_extra0(params.internal_flags) -- .ok_or(Error::new(EINVAL))?, -- }, -- )))); -+ let maybe_inner = { -+ let handles = handles().read(token.token()); -+ match handles.get(&scheme_id).ok_or(Error::new(EBADF))? { -+ Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => Some(inner.clone()), -+ Handle::SchemeCreationCapability => None, -+ _ => return Err(Error::new(EBADF)), - } -- Handle::SchemeCreationCapability => (), -- _ => return Err(Error::new(EBADF)), - }; - -+ if let Some(inner) = maybe_inner { -+ assert!(scheme_id == inner.scheme_id); -+ let params = unsafe { user_buf.read_exact::()? }; -+ -+ return Ok(OpenResult::External(Arc::new(RwLock::new( -+ FileDescription::new( -+ scheme_id, -+ params.number, -+ params.offset, -+ params.flags as u32, -+ InternalFlags::from_extra0(params.internal_flags) -+ .ok_or(Error::new(EINVAL))?, -+ token, -+ ), -+ )))); -+ } -+ - const EXPECTED: &[u8] = b"create-scheme"; - let mut buf = [0u8; EXPECTED.len()]; - -diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs -index 47588e1..1bdd6cc 100644 ---- a/src/scheme/proc.rs -+++ b/src/scheme/proc.rs -@@ -849,17 +873,17 @@ impl KernelScheme for ProcScheme { - } - } - fn extract_scheme_number(fd: usize, token: &mut CleanLockToken) -> Result<(KernelSchemes, usize)> { -- let (scheme_id, number) = { -+ let desc = { - let current_lock = context::current(); - let mut current = current_lock.read(token.token()); -- let (context, mut token) = current.token_split(); -+ let (context, mut context_token) = current.token_split(); - let file_descriptor = context -- .get_file(FileHandle::from(fd), &mut token) -+ .get_file(FileHandle::from(fd), &mut context_token) - .ok_or(Error::new(EBADF))?; -- let desc = file_descriptor.description.read(token.token()); -- (desc.scheme, desc.number) -+ *file_descriptor.description.read(context_token.token()) - }; -- let scheme = scheme::get_scheme(token.token(), scheme_id)?; -+ let scheme = desc.get_scheme(token)?; -+ let number = desc.number; - - Ok((scheme, number)) - } -diff --git a/src/scheme/user.rs b/src/scheme/user.rs -index b901302..dfbf66b 100644 ---- a/src/scheme/user.rs -+++ b/src/scheme/user.rs -@@ -80,6 +80,7 @@ const ONE: NonZeroUsize = match NonZeroUsize::new(1) { - Some(one) => one, - None => unreachable!(), - }; -+const MAX_SPURIOUS_WAKEUPS: usize = 100; - - enum ParsedCqe { - TriggerFevent { -@@ -209,6 +210,8 @@ impl UserInner { - caller_responsible: &mut PageSpan, - token: &mut CleanLockToken, - ) -> Result { -+ let mut remaining_spurious_wakeups = MAX_SPURIOUS_WAKEUPS; -+ - { - // Disable preemption to avoid context switches between setting the - // process state and sending the scheme request. The process is made -@@ -261,7 +264,10 @@ impl UserInner { - }; - - let states = self.states.lock(token.token()); -- let (mut states, mut token) = states.into_split(); -+ let (mut states, mut state_token) = states.into_split(); -+ let mut timed_out_descriptions = None; -+ let mut remove_state = false; -+ let mut timed_out = false; - match states.get_mut(sqe.tag as usize) { - // invalid state - None => return Err(Error::new(EBADFD)), -@@ -274,24 +280,35 @@ impl UserInner { - fds, - } => { - let maybe_eintr = -- eintr_if_sigkill(&mut callee_responsible, &mut token.token()); -- *o = State::Waiting { -- canceling: true, -- callee_responsible, -- context, -- fds, -- }; -+ eintr_if_sigkill(&mut callee_responsible, &mut state_token.token()); - -- maybe_eintr?; -+ if maybe_eintr.is_ok() { -+ remaining_spurious_wakeups = -+ remaining_spurious_wakeups.saturating_sub(1); -+ } -+ -+ if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 { -+ timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds)); -+ remove_state = true; -+ } else { -+ *o = State::Waiting { -+ canceling: true, -+ callee_responsible, -+ context, -+ fds, -+ }; -+ } - -- context::current() -- .write(token.token()) -- .block("UserInner::call (woken up after cancelation request)"); -+ maybe_eintr?; - -- // We do not want to drop the lock before blocking -- // as if we get preempted in between we might miss a -- // wakeup. -- drop(states); -+ if remove_state { -+ states.remove(sqe.tag as usize); -+ timed_out = true; -+ } else { -+ context::current() -+ .write(state_token.token()) -+ .block("UserInner::call (woken up after cancelation request)"); -+ } - } - // spurious wakeup - State::Waiting { -@@ -300,60 +317,76 @@ impl UserInner { - context, - mut callee_responsible, - } => { -- let maybe_eintr = eintr_if_sigkill(&mut callee_responsible, &mut token); - let current_context = context::current(); -+ let maybe_eintr = -+ eintr_if_sigkill(&mut callee_responsible, &mut state_token); -+ -+ if maybe_eintr.is_ok() { -+ remaining_spurious_wakeups = -+ remaining_spurious_wakeups.saturating_sub(1); -+ } - -- *o = State::Waiting { -- // Currently we treat all spurious wakeups to have the same behavior -- // as signals (i.e., we send a cancellation request). It is not something -- // that should happen, but it certainly can happen, for example if a context -- // is awoken through its thread handle without setting any sig bits, or if the -- // caller clears its own sig bits. If it actually is a signal, then it is the -- // intended behavior. -- canceling: true, -- fds, -- context, -- callee_responsible, -- }; -+ if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 { -+ timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds)); -+ remove_state = true; -+ } else { -+ *o = State::Waiting { -+ // Currently we treat all spurious wakeups to have the same behavior -+ // as signals (i.e., we send a cancellation request). It is not something -+ // that should happen, but it certainly can happen, for example if a context -+ // is awoken through its thread handle without setting any sig bits, or if the -+ // caller clears its own sig bits. If it actually is a signal, then it is the -+ // intended behavior. -+ canceling: true, -+ fds, -+ context, -+ callee_responsible, -+ }; -+ } - - maybe_eintr?; - -- // We do not want to preempt between sending the -- // cancellation and blocking again where we might -- // miss a wakeup. -- let mut preempt = PreemptGuardL1::new(¤t_context, &mut token); -- let token = preempt.token(); -- -- self.todo.send_locked( -- Sqe { -- opcode: Opcode::Cancel as u8, -- sqe_flags: SqeFlags::ONEWAY, -- tag: sqe.tag, -- ..Default::default() -- }, -- token.token(), -- ); -- event::trigger_locked( -- self.root_id, -- self.scheme_id.get(), -- EVENT_READ, -- token.token(), -- ); -- -- // 1. If cancellation was requested and arrived -- // before the scheme processed the request, an -- // acknowledgement will be sent back after the -- // cancellation is processed and we will be woken up -- // again. State will be State::Responded then. -- // -- // 2. If cancellation was requested but the scheme -- // already processed the request, we will receive -- // the actual response next and woken up again. -- // State will be State::Responded then. -- context::current() -- .write(token.token()) -- .block("UserInner::call (spurious wakeup)"); -- drop(states); -+ if remove_state { -+ states.remove(sqe.tag as usize); -+ timed_out = true; -+ } else { -+ // We do not want to preempt between sending the -+ // cancellation and blocking again where we might -+ // miss a wakeup. -+ let mut preempt = -+ PreemptGuardL1::new(¤t_context, &mut state_token); -+ let token = preempt.token(); -+ -+ self.todo.send_locked( -+ Sqe { -+ opcode: Opcode::Cancel as u8, -+ sqe_flags: SqeFlags::ONEWAY, -+ tag: sqe.tag, -+ ..Default::default() -+ }, -+ token.token(), -+ ); -+ event::trigger_locked( -+ self.root_id, -+ self.scheme_id.get(), -+ EVENT_READ, -+ token.token(), -+ ); -+ -+ // 1. If cancellation was requested and arrived -+ // before the scheme processed the request, an -+ // acknowledgement will be sent back after the -+ // cancellation is processed and we will be woken up -+ // again. State will be State::Responded then. -+ // -+ // 2. If cancellation was requested but the scheme -+ // already processed the request, we will receive -+ // the actual response next and woken up again. -+ // State will be State::Responded then. -+ context::current() -+ .write(token.token()) -+ .block("UserInner::call (spurious wakeup)"); -+ } - } - - // invalid state -@@ -368,6 +401,68 @@ impl UserInner { - } - }, - } -+ -+ if let Some(descriptions) = timed_out_descriptions { -+ drop(states); -+ for desc in descriptions { -+ let _ = desc.try_close(token); -+ } -+ } -+ -+ if timed_out { -+ return Err(Error::new(ETIMEDOUT)); -+ } -+ } -+ } -+ } -+ -+ fn collect_descriptions_to_close( -+ fds: Vec>, -+ ) -> Vec { -+ fds.into_iter() -+ .filter_map(|fd| Arc::try_unwrap(fd).ok()) -+ .map(RwLock::into_inner) -+ .collect() -+ } -+ -+ pub fn fail_pending_calls(&self, token: &mut CleanLockToken) { -+ let descriptions_to_close = { -+ let mut states_lock = self.states.lock(token.token()); -+ let (states, mut lock_token) = states_lock.token_split(); -+ let mut descriptions_to_close = Vec::new(); -+ let mut states_to_remove = Vec::new(); -+ -+ for (id, state) in states.iter_mut() { -+ match mem::replace(state, State::Placeholder) { -+ State::Waiting { context, fds, .. } => { -+ descriptions_to_close.extend(Self::collect_descriptions_to_close(fds)); -+ -+ match context.upgrade() { -+ Some(context) => { -+ *state = State::Responded(Response::Regular( -+ Err(Error::new(ENODEV)), -+ 0, -+ false, -+ )); -+ context.write(lock_token.token()).unblock(); -+ } -+ None => states_to_remove.push(id), -+ } -+ } -+ old_state => *state = old_state, -+ } -+ } -+ -+ for id in states_to_remove { -+ states.remove(id); -+ } -+ -+ descriptions_to_close -+ }; -+ -+ for desc in descriptions_to_close { -+ let _ = desc.try_close(token); -+ } - } - } - } -@@ -1283,6 +1376,7 @@ impl UserInner { - } - - pub fn into_drop(self, token: &mut CleanLockToken) { -+ self.fail_pending_calls(token); - self.todo.condition.into_drop(token); - } - } -diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs -index bf98464..10c6a92 100644 ---- a/src/syscall/fs.rs -+++ b/src/syscall/fs.rs -@@ -12,7 +12,7 @@ use crate::{ - memory::{AddrSpace, GenericFlusher, Grant, PageSpan, TlbShootdownActions}, - }, - memory::{Page, VirtualAddress, PAGE_SIZE}, -- scheme::{self, FileHandle, KernelScheme, OpenResult, StrOrBytes}, -+ scheme::{FileHandle, KernelScheme, OpenResult, StrOrBytes}, - sync::{CleanLockToken, RwLock}, - syscall::{data::Stat, error::*, flag::*}, - }; -@@ -45,7 +45,7 @@ pub fn file_op_generic_ext( - (file, desc) - }; - -- let scheme = scheme::get_scheme(token.token(), desc.scheme)?; -+ let scheme = desc.get_scheme(token)?; - - op(&*scheme, file.description, desc, token) - } -@@ -73,14 +73,18 @@ pub fn openat( - ) -> Result { - let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; - -- let (scheme_id, number) = { -+ let desc = { - let current_lock = context::current(); - let mut current = current_lock.read(token.token()); -- let (context, mut token) = current.token_split(); -- let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?; -- let desc = pipe.description.read(token.token()); -- (desc.scheme, desc.number) -+ let (context, mut context_token) = current.token_split(); -+ let pipe = context -+ .get_file(fh, &mut context_token) -+ .ok_or(Error::new(EBADF))?; -+ *pipe.description.read(context_token.token()) - }; -+ let scheme = desc.get_scheme(token)?; -+ let number = desc.number; -+ let scheme_id = desc.scheme; - - let caller_ctx = context::current() - .read(token.token()) -@@ -88,8 +92,6 @@ pub fn openat( - .filter_uid_gid(euid, egid); - - let new_description = { -- let scheme = scheme::get_scheme(token.token(), scheme_id)?; -- - let res = scheme.kopenat( - number, - StrOrBytes::from_str(&path_buf), -@@ -101,13 +103,14 @@ pub fn openat( - - match res? { - OpenResult::SchemeLocal(number, internal_flags) => { -- Arc::new(RwLock::new(FileDescription { -- offset: 0, -- internal_flags, -- scheme: scheme_id, -+ Arc::new(RwLock::new(FileDescription::new( -+ scheme_id, - number, -- flags: (flags & !O_CLOEXEC) as u32, -- })) -+ 0, -+ (flags & !O_CLOEXEC) as u32, -+ internal_flags, -+ token, -+ ))) - } - OpenResult::External(desc) => desc, - } -@@ -137,16 +140,17 @@ pub fn unlinkat( - ) -> Result<()> { - let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; - -- let (number, scheme_id) = { -+ let desc = { - let current_lock = context::current(); - let mut current = current_lock.read(token.token()); -- let (context, mut token) = current.token_split(); -- let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?; -- let desc = pipe.description.read(token.token()); -- (desc.number, desc.scheme) -+ let (context, mut context_token) = current.token_split(); -+ let pipe = context -+ .get_file(fh, &mut context_token) -+ .ok_or(Error::new(EBADF))?; -+ *pipe.description.read(context_token.token()) - }; -- -- let scheme = scheme::get_scheme(token.token(), scheme_id)?; -+ let number = desc.number; -+ let scheme = desc.get_scheme(token)?; - - let caller_ctx = context::current() - .read(token.token()) -@@ -199,17 +203,18 @@ fn duplicate_file( - let description = { *file.description.read(token.token()) }; - - let new_description = { -- let scheme = scheme::get_scheme(token.token(), description.scheme)?; -+ let scheme = description.get_scheme(token)?; - - match scheme.kdup(description.number, user_buf, caller_ctx, token)? { - OpenResult::SchemeLocal(number, internal_flags) => { -- Arc::new(RwLock::new(FileDescription { -- offset: 0, -- internal_flags, -- scheme: description.scheme, -+ Arc::new(RwLock::new(FileDescription::new( -+ description.scheme, - number, -- flags: description.flags, -- })) -+ 0, -+ description.flags, -+ internal_flags, -+ token, -+ ))) - } - OpenResult::External(desc) => desc, - } -@@ -296,11 +301,10 @@ fn call_normal( - } - .ok_or(Error::new(EBADF))?; - -- let (scheme_id, number) = { -- let desc = file.description.read(token.token()); -- (desc.scheme, desc.number) -+ let (scheme, number) = { -+ let desc = *file.description.read(token.token()); -+ (desc.get_scheme(token)?, desc.number) - }; -- let scheme = scheme::get_scheme(token.token(), scheme_id)?; - - if flags.contains(CallFlags::STD_FS) { - scheme.translate_std_fs_call(number, file.description, payload, flags, metadata, token) -@@ -341,28 +345,28 @@ fn fdwrite_inner( - ) -> Result { - // TODO: Ensure deadlocks can't happen - let (scheme, number, descs_to_send) = { -- let (scheme, number) = { -+ let desc = { - let current_lock = context::current(); - let mut current = current_lock.read(token.token()); -- let (context, mut token) = current.token_split(); -+ let (context, mut context_token) = current.token_split(); - let file_descriptor = context -- .get_file(socket, &mut token) -+ .get_file(socket, &mut context_token) - .ok_or(Error::new(EBADF))?; -- let desc = &file_descriptor.description.read(token.token()); -- (desc.scheme, desc.number) -+ *file_descriptor.description.read(context_token.token()) - }; -- let scheme = scheme::get_scheme(token.token(), scheme)?; -+ let scheme = desc.get_scheme(token)?; -+ let number = desc.number; - - let current_lock = context::current(); - let mut current = current_lock.read(token.token()); -- let (context, mut token) = current.token_split(); -+ let (context, mut context_token) = current.token_split(); - ( - scheme, - number, - if flags.contains(CallFlags::FD_CLONE) { -- context.bulk_get_files(&target_fds, &mut token) -+ context.bulk_get_files(&target_fds, &mut context_token) - } else { -- context.bulk_remove_files(&target_fds, &mut token) -+ context.bulk_remove_files(&target_fds, &mut context_token) - }? - .into_iter() - .map(|f| f.description) -@@ -395,18 +399,22 @@ fn call_fdread( - metadata: &[u64], - token: &mut CleanLockToken, - ) -> Result { -+ let desc = { -+ let current_lock = context::current(); -+ let mut current = current_lock.read(token.token()); -+ let (context, mut context_token) = current.token_split(); -+ let file_descriptor = context -+ .get_file(fd, &mut context_token) -+ .ok_or(Error::new(EBADF))?; -+ *file_descriptor.description.read(context_token.token()) -+ }; - let (scheme, number) = { -- let (scheme, number) = { -- let current_lock = context::current(); -- let mut current = current_lock.read(token.token()); -- let (context, mut token) = current.token_split(); -- let file_descriptor = context.get_file(fd, &mut token).ok_or(Error::new(EBADF))?; -- let desc = file_descriptor.description.read(token.token()); -- (desc.scheme, desc.number) -- }; -- let scheme = scheme::get_scheme(token.token(), scheme)?; -- -- (scheme, number) -+ let scheme = desc.get_scheme(token)?; -+ let number = desc.number; -+ ( -+ scheme, -+ number, -+ ) - }; - - scheme.kfdread(number, payload, flags, metadata, token) -@@ -440,9 +448,9 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken) - } - .ok_or(Error::new(EBADF))?; - -- let (scheme_id, number, flags) = { -- let desc = file.description.write(token.token()); -- (desc.scheme, desc.number, desc.flags) -+ let (number, flags, desc) = { -+ let desc = *file.description.read(token.token()); -+ (desc.number, desc.flags, desc) - }; - - if cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC { -@@ -460,7 +468,7 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken) - - // Communicate fcntl with scheme - if cmd != F_GETFD && cmd != F_SETFD { -- let scheme = scheme::get_scheme(token.token(), scheme_id)?; -+ let scheme = desc.get_scheme(token)?; - - scheme.fcntl(number, cmd, arg, token)?; - }; -@@ -518,13 +526,11 @@ pub fn flink(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken) - let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; - let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; - -- let (number, scheme_id) = { -- let desc = file.description.read(token.token()); -- (desc.number, desc.scheme) -+ let (number, scheme) = { -+ let desc = *file.description.read(token.token()); -+ (desc.number, desc.get_scheme(token)?) - }; - -- let scheme = scheme::get_scheme(token.token(), scheme_id)?; -- - // TODO: Check EXDEV. - /* - if scheme_id != description.scheme { -@@ -554,13 +560,11 @@ pub fn frename(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken - let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; - let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; - -- let (number, scheme_id) = { -- let desc = file.description.read(token.token()); -- (desc.number, desc.scheme) -+ let (number, scheme) = { -+ let desc = *file.description.read(token.token()); -+ (desc.number, desc.get_scheme(token)?) - }; - -- let scheme = scheme::get_scheme(token.token(), scheme_id)?; -- - // TODO: Check EXDEV. - /* - if scheme_id != description.scheme { -diff --git a/src/syscall/process.rs b/src/syscall/process.rs -index e83da42..78eed9d 100644 ---- a/src/syscall/process.rs -+++ b/src/syscall/process.rs -@@ -271,23 +274,26 @@ unsafe fn bootstrap_mem(bootstrap: &crate::startup::Bootstrap) -> &'static [u8] - } - - fn insert_fd(scheme: SchemeId, number: usize, cloexec: bool, token: &mut CleanLockToken) -> usize { -+ let description = Arc::new(RwLock::new(FileDescription::new( -+ scheme, -+ number, -+ 0, -+ (O_CREAT | O_RDWR) as u32, -+ InternalFlags::empty(), -+ token, -+ ))); -+ - let current_lock = context::current(); - let mut current = current_lock.read(token.token()); -- let (context, mut token) = current.token_split(); -+ let (context, mut context_token) = current.token_split(); - context - .add_file_min( - FileDescriptor { -- description: Arc::new(RwLock::new(FileDescription { -- scheme, -- number, -- offset: 0, -- flags: (O_CREAT | O_RDWR) as u32, -- internal_flags: InternalFlags::empty(), -- })), -+ description, - cloexec, - }, - syscall::flag::UPPER_FDTBL_TAG + scheme.get(), -- &mut token, -+ &mut context_token, - ) - .expect("failed to insert fd to current context") - .get() diff --git a/local/patches/kernel/P8-msi-foundation-v2.patch b/local/patches/kernel/P8-msi-foundation-v2.patch new file mode 100644 index 0000000000..1f4054e022 --- /dev/null +++ b/local/patches/kernel/P8-msi-foundation-v2.patch @@ -0,0 +1,222 @@ +diff --git a/src/arch/x86_shared/device/msi.rs b/src/arch/x86_shared/device/msi.rs +--- a/src/arch/x86_shared/device/msi.rs ++++ b/src/arch/x86_shared/device/msi.rs +@@ -1,66 +1,183 @@ ++// MSI/MSI-X support for x86 — kernel-level message composition and validation ++// Cross-referenced from Linux 7.0: arch/x86/kernel/apic/msi.c (391 lines) ++ + use crate::arch::device::local_apic::ApicId; + + pub const MSI_ADDRESS_BASE: u64 = 0xFEE0_0000; ++pub const MSI_ADDRESS_MASK: u64 = 0xFEEF_F000; ++const MSI_DEST_MODE_LOGICAL: u64 = 1 << 2; ++const MSI_REDIRECTION_HINT: u64 = 1 << 3; ++ ++#[derive(Debug, Clone, Copy)] ++pub struct MsiAddress { ++ pub raw: u64, ++} ++ ++#[derive(Debug, Clone, Copy)] ++pub struct MsiData { ++ pub raw: u32, ++} + + #[derive(Debug, Clone)] + pub struct MsiMessage { +- pub address: u64, +- pub data: u32, ++ pub address: MsiAddress, ++ pub data: MsiData, ++} ++ ++impl MsiAddress { ++ pub fn new(dest_apic_id: u8, redirection_hint: bool, dest_mode_logical: bool) -> Self { ++ let mut addr = MSI_ADDRESS_BASE; ++ addr |= u64::from(dest_apic_id) << 12; ++ if redirection_hint { ++ addr |= MSI_REDIRECTION_HINT; ++ } ++ if dest_mode_logical { ++ addr |= MSI_DEST_MODE_LOGICAL; ++ } ++ Self { raw: addr } ++ } ++ ++ pub fn validate(addr: u64) -> bool { ++ (addr & MSI_ADDRESS_MASK) == MSI_ADDRESS_BASE ++ } ++ ++ pub fn dest_apic_id(&self) -> u8 { ++ ((self.raw >> 12) & 0xFF) as u8 ++ } ++} ++ ++impl MsiData { ++ pub fn new(vector: u8, delivery_mode: u8, trigger_mode: u8) -> Self { ++ let mut data = u32::from(vector); ++ data |= u32::from(delivery_mode) << 8; ++ data |= u32::from(trigger_mode) << 15; ++ Self { raw: data } ++ } ++ ++ pub fn vector(&self) -> u8 { ++ (self.raw & 0xFF) as u8 ++ } ++ ++ pub fn delivery_mode(&self) -> u8 { ++ ((self.raw >> 8) & 0x7) as u8 ++ } ++ ++ pub fn trigger_mode(&self) -> u8 { ++ ((self.raw >> 15) & 0x1) as u8 ++ } + } + + impl MsiMessage { +- pub fn compose(dest: ApicId, vector: u8, delivery_mode: u8) -> Self { +- let address = MSI_ADDRESS_BASE | (u64::from(dest.get()) << 12); +- let data = u32::from(vector) | (u32::from(delivery_mode) << 8); ++ pub fn compose(dest: ApicId, vector: u8, delivery_mode: u8, trigger_mode: u8) -> Self { ++ let address = MsiAddress::new(dest.get() as u8, false, false); ++ let data = MsiData::new(vector, delivery_mode, trigger_mode); + Self { address, data } + } + + pub fn validate(&self) -> bool { +- (self.address & 0xFFF0_0000) == MSI_ADDRESS_BASE +- && self.data & 0xFF >= 32 +- && self.data & 0xFF < 255 ++ MsiAddress::validate(self.address.raw) ++ && self.data.vector() >= 32 ++ && self.data.vector() < 255 + } + } + +-pub fn is_valid_msi_address(addr: u64) -> bool { (addr & 0xFFF0_0000) == MSI_ADDRESS_BASE } +-pub fn is_valid_msi_vector(vector: u8) -> bool { vector >= 32 && vector < 255 } ++pub fn is_valid_msi_address(addr: u64) -> bool { ++ MsiAddress::validate(addr) ++} ++ ++pub fn is_valid_msi_vector(vector: u8) -> bool { ++ vector >= 32 && vector < 255 ++} + + #[derive(Debug)] + pub struct MsiCapability { +- pub msg_ctl: u16, pub msg_addr_lo: u32, pub msg_data: u16, +- pub is_64bit: bool, pub is_maskable: bool, pub multiple_message_capable: u8, ++ pub msg_ctl: u16, ++ pub msg_addr_lo: u32, ++ pub msg_addr_hi: u32, ++ pub msg_data: u16, ++ pub mask_bits: u32, ++ pub pending_bits: u32, ++ pub is_64bit: bool, ++ pub is_maskable: bool, ++ pub multiple_message_capable: u8, + } + + impl MsiCapability { +- pub fn parse(raw: &[u32], msg_ctl: u16) -> Option { +- let msg_addr_lo = *raw.get(1)?; +- let msg_data = if msg_ctl & (1<<7) != 0 { +- (*raw.get(3)? & 0xFFFF) as u16 +- } else { +- (*raw.get(2)? & 0xFFFF) as u16 +- }; +- Some(Self { +- msg_ctl, msg_addr_lo, msg_data, +- is_64bit: msg_ctl & (1<<7) != 0, +- is_maskable: msg_ctl & (1<<8) != 0, +- multiple_message_capable: ((msg_ctl>>1)&0x7) as u8, +- }) ++ pub fn parse(raw: &[u32; 6], msg_ctl: u16) -> Self { ++ Self { ++ msg_ctl, ++ msg_addr_lo: raw[1], ++ msg_addr_hi: if msg_ctl & (1 << 7) != 0 { raw[2] } else { 0 }, ++ msg_data: if msg_ctl & (1 << 7) != 0 { ++ (raw[3] & 0xFFFF) as u16 ++ } else { ++ (raw[2] & 0xFFFF) as u16 ++ }, ++ mask_bits: if msg_ctl & (1 << 8) != 0 { ++ if msg_ctl & (1 << 7) != 0 { ++ raw[3] >> 16 ++ } else { ++ raw[3] ++ } ++ } else { ++ 0 ++ }, ++ pending_bits: if msg_ctl & (1 << 8) != 0 { raw[4] } else { 0 }, ++ is_64bit: msg_ctl & (1 << 7) != 0, ++ is_maskable: msg_ctl & (1 << 8) != 0, ++ multiple_message_capable: ((msg_ctl >> 1) & 0x7) as u8, ++ } + } + } + ++#[derive(Debug)] + pub struct MsixCapability { +- pub table_offset: u32, pub table_bar: u8, +- pub pba_offset: u32, pub pba_bar: u8, pub table_size: u16, ++ pub msg_ctl: u16, ++ pub table_offset: u32, ++ pub table_bar: u8, ++ pub pba_offset: u32, ++ pub pba_bar: u8, ++ pub table_size: u16, + } + + impl MsixCapability { +- pub fn parse(raw: &[u32], msg_ctl: u16) -> Option { +- let r1 = *raw.get(1)?; +- let r2 = *raw.get(2)?; +- Some(Self { +- table_offset: r1 & !0x7, table_bar: (r1&0x7) as u8, +- pba_offset: r2 & !0x7, pba_bar: (r2&0x7) as u8, +- table_size: ((msg_ctl>>1)&0x7FF) as u16 + 1, +- }) ++ pub fn parse(raw: &[u32; 3], msg_ctl: u16) -> Self { ++ Self { ++ msg_ctl, ++ table_offset: raw[1] & !0x7, ++ table_bar: (raw[1] & 0x7) as u8, ++ pba_offset: raw[2] & !0x7, ++ pba_bar: (raw[2] & 0x7) as u8, ++ table_size: ((msg_ctl >> 1) & 0x7FF) as u16 + 1, ++ } ++ } ++} ++ ++#[cfg(test)] ++mod tests { ++ use super::*; ++ ++ #[test] ++ fn test_compose_message() { ++ let msg = MsiMessage::compose(ApicId::new(3), 48, 0b101, 1); ++ assert!(msg.validate()); ++ assert_eq!(msg.address.dest_apic_id(), 3); ++ assert_eq!(msg.data.vector(), 48); ++ assert_eq!(msg.data.delivery_mode(), 0b101); ++ assert_eq!(msg.data.trigger_mode(), 1); ++ } ++ ++ #[test] ++ fn test_invalid_address() { ++ assert!(!is_valid_msi_address(0xDEAD_BEEF)); ++ assert!(is_valid_msi_address(0xFEE0_0000)); ++ } ++ ++ #[test] ++ fn test_msi_parse() { ++ let raw = [0u32; 6]; ++ let cap = MsiCapability::parse(&raw, 0); ++ assert!(!cap.is_64bit); ++ assert!(!cap.is_maskable); + } + } diff --git a/local/patches/kernel/redox.patch b/local/patches/kernel/redox.patch deleted file mode 100644 index 581440fdcb..0000000000 --- a/local/patches/kernel/redox.patch +++ /dev/null @@ -1,4 +0,0 @@ ---- a/Makefile -+++ b/Makefile -@@ -0,0 +1,1 @@ -+# Red Bear OS kernel patches applied via individual patch files diff --git a/recipes/core/kernel/recipe.toml b/recipes/core/kernel/recipe.toml index 455d7ed61b..f9d7f16067 100644 --- a/recipes/core/kernel/recipe.toml +++ b/recipes/core/kernel/recipe.toml @@ -1,26 +1,18 @@ -# Consolidated patch: all Red Bear kernel changes (P0-P10) in a single file. -# Individual patches preserved in local/patches/kernel/ for reference/rebase. -# The consolidated patch was generated from applying: redox(no-op), P0-canary, -# P1-memory-map-overflow, P4-supplementary-groups, P4-s3-suspend-resume, -# P4-scheme-failure-modes, P5-sched-rt-policy, P5-scheme-sched-id, -# P5-context-mod-sched, P6-percpu-runqueues, P6-futex-sharding, -# P8-initial-placement, P9-proc-lock-ordering, P9-numa-topology, -# P1-boot-path-diagnostics, P10-debug-scheme-serial-fix. -# Patches that were cumulative supersets (P5-sched-policy-context, P5-proc-setschedpolicy, -# P5-boot-path-hardening, P6-vruntime-*, P7-cache-affine-*, P7-proc-setname, -# P7-proc-setpriority, P8-futex-requeue, P8-futex-pi, P8-futex-robust, -# P8-percpu-wiring, P8-percpu-sched, P8-load-balance, P8-work-stealing, -# P9-futex-pi-cas-fix) failed to apply at commit 866dfad0 due to -# context conflicts and are deferred until rebase. -# P7-scheduler-improvements.patch: removed from recipe patches — 3/4 hunks -# fail on context.rs at 866dfad0. Rebase needed. -# P8-msi (applies separately): T1.1 msi.rs (message composition/validation/capability -# parsing), T1.2 vector.rs (per-CPU bitmatrix allocation), T1.3 IRQ scheme MSI -# validation gate + iommu hook, T2.2 kernel-side IRQ affinity handler. [source] git = "https://gitlab.redox-os.org/redox-os/kernel.git" rev = "866dfad0" -patches = ["../../../local/patches/kernel/redbear-consolidated.patch", "../../../local/patches/kernel/P8-msi.patch", "../../../local/patches/kernel/P2-rebrand-start-message.patch", "P0-eventfd-kernel.patch", "../../../local/patches/kernel/P1-mkfifo-fifo-support.patch"] +patches = [ + "../../../local/patches/kernel/redbear-consolidated.patch", + "../../../local/patches/kernel/P8-msi.patch", + "../../../local/patches/kernel/P8-msi-foundation-v2.patch", + "../../../local/patches/kernel/P2-rebrand-start-message.patch", + "../../../local/patches/kernel/P0-eventfd-kernel.patch", + "../../../local/patches/kernel/P0-rsdp-checksum.patch", + "../../../local/patches/kernel/P1-mkfifo-fifo-support-v2.patch", + "../../../local/patches/kernel/P1-ioapic-hpet-nmi-v2.patch", + "../../../local/patches/kernel/P9-numa-topology.patch", + "../../../local/patches/kernel/P9-proc-lock-ordering.patch", +] [build] template = "custom"