diff --git a/Cargo.toml b/Cargo.toml index 6d4f059a..e05f723c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ cc = "1.0" toml = "0.8" [dependencies] +acpi_ext = { package = "acpi", git = "https://gitlab.redox-os.org/redox-os/acpi.git", branch = "redox-6.x" } arrayvec = { version = "0.7.4", default-features = false } bitfield = "0.13.2" bitflags = "2" diff --git a/Makefile b/Makefile index 68a8c50a..ce59b910 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,4 @@ +# Red Bear OS kernel patches applied via individual patch files .PHONY: all check SOURCE:=$(dir $(realpath $(lastword $(MAKEFILE_LIST)))) diff --git a/build.rs b/build.rs index 96c3ea5c..751746cc 100644 --- a/build.rs +++ b/build.rs @@ -77,6 +77,7 @@ fn main() { } "x86_64" => { println!("cargo::rerun-if-changed=src/asm/x86_64/trampoline.asm"); + println!("cargo::rerun-if-changed=src/asm/x86_64/s3_wakeup.asm"); let status = Command::new("nasm") .arg("-f") @@ -89,6 +90,18 @@ fn main() { if !status.success() { panic!("nasm failed with exit status {}", status); } + + let status = Command::new("nasm") + .arg("-f") + .arg("bin") + .arg("-o") + .arg(format!("{}/s3_wakeup", out_dir)) + .arg("src/asm/x86_64/s3_wakeup.asm") + .status() + .expect("failed to run nasm"); + if !status.success() { + panic!("nasm failed with exit status {}", status); + } } "riscv64" => { println!("cargo::rustc-cfg=dtb"); diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs index 4dc23883..f472c088 100644 --- a/src/acpi/madt/arch/x86.rs +++ b/src/acpi/madt/arch/x86.rs @@ -18,6 +18,7 @@ use crate::{ use super::{Madt, MadtEntry}; +const AP_SPIN_LIMIT: u32 = 1_000_000; const TRAMPOLINE: usize = 0x8000; static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline")); @@ -42,13 +43,17 @@ pub(super) fn init(madt: Madt) { //TODO: do not have writable and executable! let mut mapper = KernelMapper::lock_rw(); - let result = mapper - .map_phys( - trampoline_page.start_address(), - trampoline_frame.base(), - PageFlags::new().execute(true).write(true), - ) - .expect("failed to map trampoline"); + let result = match mapper.map_phys( + trampoline_page.start_address(), + trampoline_frame.base(), + PageFlags::new().execute(true).write(true), + ) { + Some(result) => result, + None => { + println!("KERNEL AP: failed to map trampoline page, AP bring-up disabled"); + return; + } + }; (result, mapper.table().phys().data()) }; @@ -72,17 +77,27 @@ pub(super) fn init(madt: Madt) { if u32::from(ap_local_apic.id) == me.get() { debug!(" This is my local APIC"); } else if ap_local_apic.flags & 1 == 1 { - let cpu_id = LogicalCpuId::next(); - // Allocate a stack - let stack_start = RmmA::phys_to_virt( - allocate_p2frame(4) - .expect("no more frames in acpi stack_start") - .base(), - ) - .data(); + let alloc = match allocate_p2frame(4) { + Some(frame) => frame, + None => { + println!("KERNEL AP: CPU {} no memory for stack, skipping", ap_local_apic.id); + continue; + } + }; + let stack_start = RmmA::phys_to_virt(alloc.base()).data(); let stack_end = stack_start + (PAGE_SIZE << 4); + let next_cpu = crate::CPU_COUNT.load(Ordering::Relaxed); + if next_cpu >= crate::cpu_set::MAX_CPU_COUNT { + println!( + "KERNEL AP: CPU {} exceeds logical CPU limit, skipping", + ap_local_apic.id + ); + continue; + } + let cpu_id = LogicalCpuId::new(next_cpu); + let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end); let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id); @@ -137,13 +152,34 @@ pub(super) fn init(madt: Madt) { local_apic.set_icr(icr); } - // Wait for trampoline ready - while unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } == 0 { + // Wait for trampoline ready with timeout + let mut trampoline_ready = false; + for _ in 0..AP_SPIN_LIMIT { + if unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } != 0 { + trampoline_ready = true; + break; + } hint::spin_loop(); } - while !AP_READY.load(Ordering::SeqCst) { + if !trampoline_ready { + println!("KERNEL AP: CPU {} trampoline timeout, skipping", ap_local_apic.id); + continue; + } + + let mut kernel_ready = false; + for _ in 0..AP_SPIN_LIMIT { + if AP_READY.load(Ordering::SeqCst) { + kernel_ready = true; + break; + } hint::spin_loop(); } + if !kernel_ready { + println!("KERNEL AP: CPU {} AP_READY timeout, skipping", ap_local_apic.id); + continue; + } + + crate::CPU_COUNT.fetch_add(1, Ordering::Relaxed); RmmA::invalidate_all(); } @@ -151,10 +187,12 @@ pub(super) fn init(madt: Madt) { } // Unmap trampoline - let (_frame, _, flush) = unsafe { + if let Some((_frame, _, flush)) = unsafe { KernelMapper::lock_rw() .unmap_phys(trampoline_page.start_address()) - .expect("failed to unmap trampoline page") - }; - flush.flush(); + } { + flush.flush(); + } else { + println!("KERNEL AP: failed to unmap trampoline page (non-fatal)"); + } } diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs index 59e35265..b3b80f0c 100644 --- a/src/acpi/mod.rs +++ b/src/acpi/mod.rs @@ -82,6 +82,14 @@ impl Rxsdt for RxsdtEnum { pub static RXSDT_ENUM: Once = Once::new(); +#[derive(Clone, Copy, Debug)] +pub struct AcpiRootInfo { + pub revision: u8, + pub root_sdt_address: PhysicalAddress, +} + +pub static ACPI_ROOT_INFO: Once = Once::new(); + /// Parse the ACPI tables to gather CPU, interrupt, and timer information pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) { unsafe { @@ -94,6 +102,15 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) { let rsdp_opt = Rsdp::get_rsdp(already_supplied_rsdp); if let Some(rsdp) = rsdp_opt { + let root_info = ACPI_ROOT_INFO.call_once(|| AcpiRootInfo { + revision: rsdp.revision(), + root_sdt_address: rsdp.sdt_address(), + }); + + if root_info.root_sdt_address != rsdp.sdt_address() || root_info.revision != rsdp.revision() { + error!("ACPI_ROOT_INFO already initialized with a different RSDP root"); + } + debug!("SDT address: {:#x}", rsdp.sdt_address().data()); let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock_rw()); diff --git a/src/acpi/rsdp.rs b/src/acpi/rsdp.rs index f10c5ac9..5e93a9f8 100644 --- a/src/acpi/rsdp.rs +++ b/src/acpi/rsdp.rs @@ -31,4 +31,8 @@ impl Rsdp { self.rsdt_address as usize }) } + + pub fn revision(&self) -> u8 { + self.revision + } } diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs index 4fdb0ba1..aaa71963 100644 --- a/src/allocator/mod.rs +++ b/src/allocator/mod.rs @@ -7,26 +7,40 @@ mod linked_list; /// Size of kernel heap const KERNEL_HEAP_SIZE: usize = ::rmm::MEGABYTE; +#[cold] +fn halt_kernel_heap_init(message: &str) -> ! { + print!("{message}"); + println!("Kernel heap initialization cannot continue. Halting."); + loop { + core::hint::spin_loop(); + } +} + unsafe fn map_heap(mapper: &mut KernelMapper, offset: usize, size: usize) { let mut flush_all = PageFlushAll::new(); let heap_start_page = Page::containing_address(VirtualAddress::new(offset)); let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size - 1)); for page in Page::range_inclusive(heap_start_page, heap_end_page) { - let phys = mapper - .allocator_mut() - .allocate_one() - .expect("failed to allocate kernel heap"); + let phys = match mapper.allocator_mut().allocate_one() { + Some(phys) => phys, + None => halt_kernel_heap_init( + "FATAL: failed to allocate physical frame for kernel heap\n", + ), + }; let flush = unsafe { - mapper - .map_phys( - page.start_address(), - phys, - PageFlags::new() - .write(true) - .global(cfg!(not(feature = "pti"))), - ) - .expect("failed to map kernel heap") + match mapper.map_phys( + page.start_address(), + phys, + PageFlags::new() + .write(true) + .global(cfg!(not(feature = "pti"))), + ) { + Some(flush) => flush, + None => halt_kernel_heap_init( + "FATAL: failed to map kernel heap virtual page\n", + ), + } }; flush_all.consume(flush); } diff --git a/src/arch/x86_shared/gdt.rs b/src/arch/x86_shared/gdt.rs index cad344f3..f7acae35 100644 --- a/src/arch/x86_shared/gdt.rs +++ b/src/arch/x86_shared/gdt.rs @@ -192,6 +192,15 @@ impl ProcessorControlRegion { } } +#[cold] +fn halt_pcr_init() -> ! { + println!("FATAL: failed to allocate physical memory for Processor Control Region"); + println!("Processor startup cannot continue. Halting."); + loop { + core::hint::spin_loop(); + } +} + pub unsafe fn pcr() -> *mut ProcessorControlRegion { unsafe { // Primitive benchmarking of RDFSBASE and RDGSBASE in userspace, appears to indicate that @@ -375,7 +384,10 @@ pub fn allocate_and_init_pcr( .next_power_of_two() .trailing_zeros(); - let pcr_frame = crate::memory::allocate_p2frame(alloc_order).expect("failed to allocate PCR"); + let pcr_frame = match crate::memory::allocate_p2frame(alloc_order) { + Some(frame) => frame, + None => halt_pcr_init(), + }; let pcr_ptr = RmmA::phys_to_virt(pcr_frame.base()).data() as *mut ProcessorControlRegion; unsafe { core::ptr::write(pcr_ptr, ProcessorControlRegion::new_partial_init(cpu_id)) }; diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs index 50064585..47f692f6 100644 --- a/src/arch/x86_shared/idt.rs +++ b/src/arch/x86_shared/idt.rs @@ -78,6 +78,15 @@ static INIT_BSP_IDT: SyncUnsafeCell = SyncUnsafeCell::new(Idt::new()); pub(crate) static IDTS: RwLock> = RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new())); +#[cold] +fn halt_idt_init() -> ! { + println!("FATAL: failed to allocate physical pages for backup interrupt stack"); + println!("Interrupt setup cannot continue. Halting."); + loop { + core::hint::spin_loop(); + } +} + #[inline] pub fn is_reserved(cpu_id: LogicalCpuId, index: u8) -> bool { if cpu_id == LogicalCpuId::BSP { @@ -161,8 +170,10 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt { .or_insert_with(|| Box::leak(Box::new(Idt::new()))); use crate::memory::{RmmA, RmmArch}; - let frames = crate::memory::allocate_p2frame(4) - .expect("failed to allocate pages for backup interrupt stack"); + let frames = match crate::memory::allocate_p2frame(4) { + Some(frames) => frames, + None => halt_idt_init(), + }; // Physical pages are mapped linearly. So is the linearly mapped virtual memory. let base_address = RmmA::phys_to_virt(frames.base()); diff --git a/src/arch/x86_shared/mod.rs b/src/arch/x86_shared/mod.rs index e3c30501..11c33e94 100644 --- a/src/arch/x86_shared/mod.rs +++ b/src/arch/x86_shared/mod.rs @@ -28,6 +28,8 @@ pub mod pti; /// Initialization and start function pub mod start; +pub mod sleep; + /// Stop function pub mod stop; diff --git a/src/arch/x86_shared/sleep.rs b/src/arch/x86_shared/sleep.rs new file mode 100644 index 00000000..9f98c0d8 --- /dev/null +++ b/src/arch/x86_shared/sleep.rs @@ -0,0 +1,712 @@ +use alloc::{sync::Arc, vec::Vec}; +use core::{ + ptr::NonNull, + str::FromStr, + sync::atomic::{AtomicU32, Ordering}, +}; + +use acpi_ext::{ + aml::{namespace::AmlName, object::Object, Interpreter}, + registers::FixedRegisters, + sdt::{facs::Facs, fadt::Fadt, SdtHeader}, + AcpiTables, Handle, Handler, PhysicalMapping, +}; +use spin::Mutex; +use syscall::error::{Error, EINVAL, EIO}; +use x86::{segmentation::SegmentSelector, task, Ring}; + +use crate::{ + acpi::ACPI_ROOT_INFO, + arch::interrupt, + memory::{ + round_down_pages, round_up_pages, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, + RmmArch, VirtualAddress, PAGE_SIZE, + }, + syscall::io::{Io, Pio}, +}; + +const ACPI_SLP_TYP_SHIFT: u16 = 10; +const ACPI_SLP_TYP_MASK: u16 = 0x1C00; +const ACPI_SLP_EN: u16 = 1 << 13; +const WAKE_TRAMPOLINE_PHYS: usize = 0x8000; +const SLEEP_RETURN_OK: usize = 0; + +#[cfg(target_arch = "x86_64")] +static WAKE_TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/s3_wakeup")); + +#[repr(C, packed)] +#[derive(Clone, Copy, Debug, Default)] +struct DescriptorTableRegister { + limit: u16, + base: u64, +} + +#[repr(C, align(64))] +#[derive(Clone, Copy, Debug)] +struct FpuState { + bytes: [u8; 4096], +} + +impl Default for FpuState { + fn default() -> Self { + Self { bytes: [0; 4096] } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum SleepState { + S3, + S5, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum SleepError { + UnsupportedArch, + MissingAcpi, + MissingFadt, + MissingFacs, + MissingSleepObject, + InvalidSleepObject, + UnsupportedPmControl, + UnsupportedAmlOperation, + SleepDidNotEnter, +} + +impl SleepError { + fn code(self) -> usize { + match self { + Self::UnsupportedArch => EINVAL as usize, + Self::MissingAcpi + | Self::MissingFadt + | Self::MissingFacs + | Self::MissingSleepObject + | Self::UnsupportedAmlOperation => EIO as usize, + Self::InvalidSleepObject | Self::UnsupportedPmControl | Self::SleepDidNotEnter => { + EINVAL as usize + } + } + } + + fn from_code(code: usize) -> Self { + match code as i32 { + x if x == EINVAL => Self::InvalidSleepObject, + _ => Self::MissingAcpi, + } + } +} + +#[derive(Clone, Copy, Debug, Default)] +struct SavedCpuContext { + entry_rsp: usize, + runtime_rsp: usize, + facs_address: usize, + cr0: usize, + cr2: usize, + cr3: usize, + cr4: usize, + rflags: usize, + gdtr: DescriptorTableRegister, + idtr: DescriptorTableRegister, + efer: u64, + fs_base: u64, + gs_base: u64, + kernel_gs_base: u64, + fpu: FpuState, +} + +static SAVED_CONTEXT: Mutex> = Mutex::new(None); +static AML_MUTEX_IDS: AtomicU32 = AtomicU32::new(1); + +#[derive(Clone, Copy, Debug)] +struct SleepTypeData { + a: u16, + b: u16, +} + +#[derive(Clone, Copy)] +struct KernelAcpiHandler; + +impl KernelAcpiHandler { + fn map_range(physical_address: usize, size: usize) -> (*mut u8, usize) { + let map_base = round_down_pages(physical_address); + let map_offset = physical_address - map_base; + let mapped_length = round_up_pages(size + map_offset); + + // SAFETY: The ACPI interpreter only requests firmware-described physical regions. + unsafe { + let mut mapper = KernelMapper::lock_rw(); + for page_index in 0..mapped_length / PAGE_SIZE { + let (_, flush) = mapper + .map_linearly( + PhysicalAddress::new(map_base + page_index * PAGE_SIZE), + PageFlags::new(), + ) + .expect("failed to linearly map ACPI physical region"); + flush.flush(); + } + } + + let virtual_base = RmmA::phys_to_virt(PhysicalAddress::new(map_base)).data(); + ((virtual_base + map_offset) as *mut u8, mapped_length) + } +} + +impl Handler for KernelAcpiHandler { + unsafe fn map_physical_region(&self, physical_address: usize, size: usize) -> PhysicalMapping { + let (virtual_start, mapped_length) = Self::map_range(physical_address, size); + PhysicalMapping { + physical_start: physical_address, + virtual_start: NonNull::new(virtual_start.cast::()) + .expect("expected mapped ACPI virtual address to be non-null"), + region_length: size, + mapped_length, + handler: *self, + } + } + + fn unmap_physical_region(_region: &PhysicalMapping) {} + + fn read_u8(&self, address: usize) -> u8 { + // SAFETY: AML system-memory accesses are byte-addressable firmware regions. + unsafe { core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u8) } + } + + fn read_u16(&self, address: usize) -> u16 { + // SAFETY: AML system-memory accesses are word-addressable firmware regions. + unsafe { + core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u16) + } + } + + fn read_u32(&self, address: usize) -> u32 { + // SAFETY: AML system-memory accesses are dword-addressable firmware regions. + unsafe { + core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u32) + } + } + + fn read_u64(&self, address: usize) -> u64 { + // SAFETY: AML system-memory accesses are qword-addressable firmware regions. + unsafe { + core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u64) + } + } + + fn write_u8(&self, address: usize, value: u8) { + // SAFETY: AML system-memory accesses are byte-addressable firmware regions. + unsafe { + core::ptr::write_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u8, value) + } + } + + fn write_u16(&self, address: usize, value: u16) { + // SAFETY: AML system-memory accesses are word-addressable firmware regions. + unsafe { + core::ptr::write_volatile( + RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u16, + value, + ) + } + } + + fn write_u32(&self, address: usize, value: u32) { + // SAFETY: AML system-memory accesses are dword-addressable firmware regions. + unsafe { + core::ptr::write_volatile( + RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u32, + value, + ) + } + } + + fn write_u64(&self, address: usize, value: u64) { + // SAFETY: AML system-memory accesses are qword-addressable firmware regions. + unsafe { + core::ptr::write_volatile( + RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u64, + value, + ) + } + } + + fn read_io_u8(&self, port: u16) -> u8 { + Pio::::new(port).read() + } + + fn read_io_u16(&self, port: u16) -> u16 { + Pio::::new(port).read() + } + + fn read_io_u32(&self, port: u16) -> u32 { + Pio::::new(port).read() + } + + fn write_io_u8(&self, port: u16, value: u8) { + Pio::::new(port).write(value) + } + + fn write_io_u16(&self, port: u16, value: u16) { + Pio::::new(port).write(value) + } + + fn write_io_u32(&self, port: u16, value: u32) { + Pio::::new(port).write(value) + } + + fn read_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u8 { + 0 + } + + fn read_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u16 { + 0 + } + + fn read_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u32 { + 0 + } + + fn write_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u8) {} + + fn write_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u16) {} + + fn write_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u32) {} + + fn nanos_since_boot(&self) -> u64 { + 0 + } + + fn stall(&self, microseconds: u64) { + for _ in 0..(microseconds.saturating_mul(64)) { + core::hint::spin_loop(); + } + } + + fn sleep(&self, milliseconds: u64) { + for _ in 0..(milliseconds.saturating_mul(64_000)) { + core::hint::spin_loop(); + } + } + + fn create_mutex(&self) -> Handle { + Handle(AML_MUTEX_IDS.fetch_add(1, Ordering::Relaxed)) + } + + fn acquire(&self, _mutex: Handle, _timeout: u16) -> Result<(), acpi_ext::aml::AmlError> { + Ok(()) + } + + fn release(&self, _mutex: Handle) {} +} + +fn sleep_state_name(state: SleepState) -> &'static str { + match state { + SleepState::S3 => "\\_S3", + SleepState::S5 => "\\_S5", + } +} + +fn encode_sleep_type(value: u16) -> u16 { + if value <= 0x7 { + value << ACPI_SLP_TYP_SHIFT + } else { + value & ACPI_SLP_TYP_MASK + } +} + +fn load_interpreter() -> Result<( + Arc>, + PhysicalMapping, + Interpreter, +), SleepError> { + let root = *ACPI_ROOT_INFO.get().ok_or(SleepError::MissingAcpi)?; + let handler = KernelAcpiHandler; + + // SAFETY: ACPI root info is captured from the firmware-provided, already validated root table. + let tables = unsafe { + AcpiTables::from_rsdt(handler, root.revision, root.root_sdt_address.data()) + .map_err(|_| SleepError::MissingAcpi)? + }; + let fadt = tables.find_table::().ok_or(SleepError::MissingFadt)?; + let registers = Arc::new( + FixedRegisters::new(&fadt, handler).map_err(|_| SleepError::UnsupportedPmControl)?, + ); + let facs_address = fadt.facs_address().map_err(|_| SleepError::MissingFacs)?; + + // SAFETY: The FADT-supplied FACS address is used exactly as described by the ACPI spec. + let facs = unsafe { handler.map_physical_region::(facs_address, core::mem::size_of::()) }; + // SAFETY: The AML interpreter only needs an owned mapping of the same firmware FACS table. + let interpreter_facs = unsafe { + handler.map_physical_region::(facs_address, core::mem::size_of::()) + }; + let dsdt = tables.dsdt().map_err(|_| SleepError::MissingFadt)?; + let interpreter = Interpreter::new(handler, dsdt.revision, Arc::clone(®isters), Some(interpreter_facs)); + + // SAFETY: Each AML table mapping is owned by the interpreter during table loading. + unsafe { + let mapping = handler.map_physical_region::(dsdt.phys_address, dsdt.length as usize); + let stream = core::slice::from_raw_parts( + mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::()) as *const u8, + dsdt.length as usize - core::mem::size_of::(), + ); + interpreter + .load_table(stream) + .map_err(|_| SleepError::UnsupportedAmlOperation)?; + + for ssdt in tables.ssdts() { + let mapping = handler.map_physical_region::(ssdt.phys_address, ssdt.length as usize); + let stream = core::slice::from_raw_parts( + mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::()) as *const u8, + ssdt.length as usize - core::mem::size_of::(), + ); + interpreter + .load_table(stream) + .map_err(|_| SleepError::UnsupportedAmlOperation)?; + } + } + + Ok((registers, facs, interpreter)) +} + +fn sleep_type_data_from_interpreter( + interpreter: &Interpreter, + state: SleepState, +) -> Result { + let name = AmlName::from_str(sleep_state_name(state)).map_err(|_| SleepError::MissingSleepObject)?; + let object = interpreter + .evaluate(name, Vec::new()) + .map_err(|_| SleepError::MissingSleepObject)?; + + let Object::Package(package) = &*object else { + return Err(SleepError::InvalidSleepObject); + }; + + let Some(typa_object) = package.first() else { + return Err(SleepError::InvalidSleepObject); + }; + let Some(typb_object) = package.get(1) else { + return Err(SleepError::InvalidSleepObject); + }; + + let Object::Integer(typa) = &**typa_object else { + return Err(SleepError::InvalidSleepObject); + }; + let Object::Integer(typb) = &**typb_object else { + return Err(SleepError::InvalidSleepObject); + }; + + Ok(SleepTypeData { + a: encode_sleep_type(*typa as u16), + b: encode_sleep_type(*typb as u16), + }) +} + +fn sleep_type_data(state: SleepState) -> Result { + let (_registers, _facs, interpreter) = load_interpreter()?; + sleep_type_data_from_interpreter(&interpreter, state) +} + +fn install_wake_trampoline(stack_rsp: usize, cr3: usize) { + let trampoline_page = Page::containing_address(VirtualAddress::new(WAKE_TRAMPOLINE_PHYS)); + let trampoline_frame = PhysicalAddress::new(WAKE_TRAMPOLINE_PHYS); + + // SAFETY: The 0x8000 low-memory trampoline page is reserved by the kernel for bootstrap stubs. + let (result, _) = unsafe { + let mut mapper = KernelMapper::lock_rw(); + let result = mapper + .map_phys( + trampoline_page.start_address(), + trampoline_frame, + PageFlags::new().execute(true).write(true), + ) + .expect("failed to map S3 wake trampoline page"); + (result, mapper.table().phys().data()) + }; + result.flush(); + + for (index, value) in WAKE_TRAMPOLINE_DATA.iter().enumerate() { + // SAFETY: The trampoline page is mapped writable at the same virtual address as the physical page. + unsafe { + core::ptr::write_volatile((WAKE_TRAMPOLINE_PHYS as *mut u8).add(index), *value); + } + } + + // SAFETY: The wake trampoline layout reserves three qword fields immediately after the jump. + unsafe { + let stack_slot = (WAKE_TRAMPOLINE_PHYS + 8) as *mut u64; + let page_table_slot = stack_slot.add(1); + let code_slot = stack_slot.add(2); + stack_slot.write(stack_rsp as u64); + page_table_slot.write(cr3 as u64); + #[expect(clippy::fn_to_numeric_cast)] + code_slot.write(resume_from_s3_trampoline as usize as u64); + } + + // SAFETY: The trampoline mapping is no longer needed once the physical page has been populated. + let (_frame, _, flush) = unsafe { + KernelMapper::lock_rw() + .unmap_phys(trampoline_page.start_address()) + .expect("failed to unmap S3 wake trampoline page") + }; + flush.flush(); +} + +fn save_descriptor_tables(context: &mut SavedCpuContext) { + // SAFETY: SGDT/SIDT only read the current CPU descriptor-table registers into the provided storage. + unsafe { + core::arch::asm!("sgdt [{}]", in(reg) &mut context.gdtr, options(nostack, preserves_flags)); + core::arch::asm!("sidt [{}]", in(reg) &mut context.idtr, options(nostack, preserves_flags)); + } +} + +fn save_fpu_state(context: &mut SavedCpuContext) { + // SAFETY: The kernel owns the current CPU at suspend entry and the FXSAVE buffer is 64-byte aligned. + unsafe { + core::arch::asm!( + "fxsave64 [{}]", + in(reg) context.fpu.bytes.as_mut_ptr(), + ); + } +} + +fn restore_fpu_state(context: &SavedCpuContext) { + // SAFETY: The saved FXSAVE image belongs to the same CPU context and matches the restore instruction. + unsafe { + core::arch::asm!( + "fxrstor64 [{}]", + in(reg) context.fpu.bytes.as_ptr(), + ); + } +} + +fn save_cpu_context(entry_rsp: usize) -> SavedCpuContext { + let mut context = SavedCpuContext { + entry_rsp, + ..SavedCpuContext::default() + }; + + // SAFETY: Reading control registers and MSRs is required to reconstruct the CPU execution state on wake. + unsafe { + core::arch::asm!( + "mov {}, cr0", + out(reg) context.cr0, + options(nostack, preserves_flags) + ); + core::arch::asm!( + "mov {}, cr2", + out(reg) context.cr2, + options(nostack, preserves_flags) + ); + core::arch::asm!( + "mov {}, cr3", + out(reg) context.cr3, + options(nostack, preserves_flags) + ); + core::arch::asm!( + "mov {}, cr4", + out(reg) context.cr4, + options(nostack, preserves_flags) + ); + core::arch::asm!( + "pushfq", + "pop {}", + out(reg) context.rflags, + options(preserves_flags) + ); + core::arch::asm!("mov {}, rsp", out(reg) context.runtime_rsp, options(nostack, preserves_flags)); + + context.efer = x86::msr::rdmsr(x86::msr::IA32_EFER); + context.fs_base = x86::msr::rdmsr(x86::msr::IA32_FS_BASE); + context.gs_base = x86::msr::rdmsr(x86::msr::IA32_GS_BASE); + context.kernel_gs_base = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE); + } + + save_descriptor_tables(&mut context); + save_fpu_state(&mut context); + context +} + +fn set_firmware_waking_vector(facs: &mut PhysicalMapping, vector: usize) { + facs.firmware_waking_vector = vector as u32; + facs.x_firmware_waking_vector = vector as u64; +} + +fn write_pm1_control_block( + registers: &FixedRegisters, + sleep_type: SleepTypeData, +) -> Result<(), SleepError> { + let current_a = registers + .pm1_control_registers + .pm1a + .read() + .map_err(|_| SleepError::UnsupportedPmControl)? as u16; + let armed_a = (current_a & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.a; + + registers + .pm1_control_registers + .pm1a + .write(u64::from(armed_a)) + .map_err(|_| SleepError::UnsupportedPmControl)?; + + if let Some(pm1b) = ®isters.pm1_control_registers.pm1b { + let current_b = pm1b.read().map_err(|_| SleepError::UnsupportedPmControl)? as u16; + let armed_b = (current_b & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.b; + pm1b.write(u64::from(armed_b)) + .map_err(|_| SleepError::UnsupportedPmControl)?; + pm1b.write(u64::from(armed_b | ACPI_SLP_EN)) + .map_err(|_| SleepError::UnsupportedPmControl)?; + } + + // SAFETY: WBINVD is required here to flush dirty cache lines before firmware powers down the CPU package. + unsafe { + core::arch::asm!("wbinvd", options(nostack, preserves_flags)); + } + + registers + .pm1_control_registers + .pm1a + .write(u64::from(armed_a | ACPI_SLP_EN)) + .map_err(|_| SleepError::UnsupportedPmControl)?; + + Ok(()) +} + +#[unsafe(naked)] +unsafe extern "sysv64" fn enter_sleep_raw(state: usize) -> usize { + core::arch::naked_asm!( + "mov rsi, rsp", + "jmp {inner}", + inner = sym enter_sleep_raw_inner, + ); +} + +extern "C" fn enter_sleep_raw_inner(state: usize, entry_rsp: usize) -> usize { + let state = match state { + 3 => SleepState::S3, + 5 => SleepState::S5, + _ => return SleepError::InvalidSleepObject.code(), + }; + + let (registers, mut facs, interpreter) = match load_interpreter() { + Ok(tuple) => tuple, + Err(error) => return error.code(), + }; + let sleep_type = match sleep_type_data_from_interpreter(&interpreter, state) { + Ok(data) => data, + Err(error) => return error.code(), + }; + + let mut context = save_cpu_context(entry_rsp); + context.facs_address = facs.physical_start; + install_wake_trampoline(context.runtime_rsp, context.cr3); + set_firmware_waking_vector(&mut facs, WAKE_TRAMPOLINE_PHYS); + + { + let mut saved = SAVED_CONTEXT.lock(); + *saved = Some(context); + } + + // SAFETY: Suspend entry must not be interrupted while the wake vector and PM1 control block are being armed. + unsafe { + interrupt::disable(); + } + + if let Err(error) = write_pm1_control_block(registers.as_ref(), sleep_type) { + return error.code(); + } + + // SAFETY: The final CLI+HLT sequence is the architectural handoff point after asserting SLP_EN. + unsafe { + core::arch::asm!("cli; hlt", options(nostack)); + } + + SleepError::SleepDidNotEnter.code() +} + +extern "C" fn resume_from_s3_trampoline() -> ! { + let mut saved = SAVED_CONTEXT.lock(); + let context = saved.take().expect("S3 wake trampoline resumed without saved CPU context"); + drop(saved); + + // SAFETY: The saved FACS physical address was captured from the validated FADT during suspend entry. + if context.facs_address != 0 { + let mut facs = unsafe { + KernelAcpiHandler.map_physical_region::( + context.facs_address, + core::mem::size_of::(), + ) + }; + set_firmware_waking_vector(&mut facs, 0); + } + + // SAFETY: The wake trampoline already switched to the saved kernel CR3 and long mode, so the remaining restores are architectural register state only. + unsafe { + x86::msr::wrmsr(x86::msr::IA32_EFER, context.efer); + core::arch::asm!("mov cr3, {}", in(reg) context.cr3, options(nostack)); + core::arch::asm!("mov cr4, {}", in(reg) context.cr4, options(nostack)); + core::arch::asm!("mov cr2, {}", in(reg) context.cr2, options(nostack)); + core::arch::asm!("mov cr0, {}", in(reg) context.cr0, options(nostack)); + core::arch::asm!("lgdt [{}]", in(reg) &context.gdtr, options(nostack)); + core::arch::asm!("lidt [{}]", in(reg) &context.idtr, options(nostack)); + + task::load_tr(SegmentSelector::new(crate::arch::gdt::GDT_TSS as u16, Ring::Ring0)); + + x86::msr::wrmsr(x86::msr::IA32_FS_BASE, context.fs_base); + x86::msr::wrmsr(x86::msr::IA32_GS_BASE, context.gs_base); + x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, context.kernel_gs_base); + } + + restore_fpu_state(&context); + + // SAFETY: Returning with the original entry stack and RFLAGS completes the suspend call as a successful function return. + unsafe { + core::arch::asm!( + "mov rsp, {entry_rsp}", + "push {rflags}", + "popfq", + "xor eax, eax", + "ret", + entry_rsp = in(reg) context.entry_rsp, + rflags = in(reg) context.rflags, + options(noreturn) + ); + } +} + +pub fn enter_sleep_state(state: SleepState) -> core::result::Result<(), SleepError> { + #[cfg(not(target_arch = "x86_64"))] + { + let _ = state; + return Err(SleepError::UnsupportedArch); + } + + #[cfg(target_arch = "x86_64")] + { + let raw = unsafe { + enter_sleep_raw(match state { + SleepState::S3 => 3, + SleepState::S5 => 5, + }) + }; + if raw == SLEEP_RETURN_OK { + Ok(()) + } else { + Err(SleepError::from_code(raw)) + } + } +} + +pub fn available_sleep_states() -> &'static [u8] { + if sleep_type_data(SleepState::S3).is_ok() { + b"S3\nS5\n" + } else { + b"S5\n" + } +} + +pub fn trigger_sleep_request(request: &str) -> Result<(), Error> { + match request.trim() { + "S3" => enter_sleep_state(SleepState::S3).map_err(|_| Error::new(EIO)), + "S5" => enter_sleep_state(SleepState::S5).map_err(|_| Error::new(EIO)), + _ => Err(Error::new(EINVAL)), + } +} diff --git a/src/arch/x86_shared/start.rs b/src/arch/x86_shared/start.rs index 7a7c0ae8..f1dbb6b4 100644 --- a/src/arch/x86_shared/start.rs +++ b/src/arch/x86_shared/start.rs @@ -82,6 +82,15 @@ extern "C" fn kstart() { /// The entry to Rust, all things must be initialized unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { unsafe { + // EARLY CANARY: write 'R' to COM1 before any kernel init. + // This proves the serial hardware works and the kernel reached Rust entry. + // If this character appears but "Redox OS starting..." does not, + // the hang is in args_ptr.read(), serial::init(), or graphical_debug::init(). + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'R', options(nostack, preserves_flags)); + } + let bootstrap = { let args = args_ptr.read(); @@ -91,27 +100,49 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { // Set up graphical debug graphical_debug::init(args.env()); + // SECOND CANARY: write 'S' to COM1 after serial init. + // If 'R' appears but 'S' does not, the hang is in serial::init() or graphical_debug::init(). + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'S', options(nostack, preserves_flags)); + } + info!("Redox OS starting..."); args.print(); + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'1', options(nostack, preserves_flags)); } + // Set up GDT gdt::init_bsp(stack_end); + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'2', options(nostack, preserves_flags)); } + // Set up IDT idt::init_bsp(); + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'3', options(nostack, preserves_flags)); } + // Initialize RMM #[cfg(target_arch = "x86")] crate::startup::memory::init(&args, Some(0x100000), Some(0x40000000)); #[cfg(target_arch = "x86_64")] crate::startup::memory::init(&args, Some(0x100000), None); + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'4', options(nostack, preserves_flags)); } + // Initialize paging paging::init(); #[cfg(target_arch = "x86_64")] crate::arch::alternative::early_init(true); + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'5', options(nostack, preserves_flags)); } + // Set up syscall instruction interrupt::syscall::init(); @@ -121,6 +152,9 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { // Activate memory logging crate::log::init(); + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'6', options(nostack, preserves_flags)); } + // Initialize miscellaneous processor features #[cfg(target_arch = "x86_64")] crate::arch::misc::init(LogicalCpuId::BSP); @@ -128,6 +162,9 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { // Initialize devices device::init(); + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'7', options(nostack, preserves_flags)); } + // Read ACPI tables, starts APs if cfg!(feature = "acpi") { crate::acpi::init(args.acpi_rsdp()); diff --git a/src/asm/x86_64/s3_wakeup.asm b/src/asm/x86_64/s3_wakeup.asm new file mode 100644 index 00000000..7beeccf6 --- /dev/null +++ b/src/asm/x86_64/s3_wakeup.asm @@ -0,0 +1,110 @@ +; ACPI S3 wake trampoline +; compiled with nasm by build.rs, copied to physical 0x8000 before S3 entry + +ORG 0x8000 +SECTION .text +USE16 + +trampoline: + jmp short startup_wake + times 8 - ($ - trampoline) nop + .stack: dq 0 + .page_table: dq 0 + .code: dq 0 + +startup_wake: + cli + + xor ax, ax + mov ds, ax + mov es, ax + mov ss, ax + mov sp, 0 + + mov edi, [trampoline.page_table] + mov cr3, edi + + mov eax, cr0 + and al, 11110011b + or al, 00100010b + mov cr0, eax + + mov eax, cr4 + or eax, 1 << 9 | 1 << 7 | 1 << 5 | 1 << 4 + mov cr4, eax + + fninit + + lgdt [gdtr] + + mov ecx, 0xC0000080 + rdmsr + or eax, 1 << 11 | 1 << 8 + wrmsr + + mov ebx, cr0 + or ebx, 1 << 31 | 1 << 16 | 1 + mov cr0, ebx + + jmp gdt.kernel_code:long_mode_wake + +USE64 +long_mode_wake: + mov rax, gdt.kernel_data + mov ds, rax + mov es, rax + mov fs, rax + mov gs, rax + mov ss, rax + + mov rsp, [trampoline.stack] + mov rax, [trampoline.code] + jmp rax + +struc GDTEntry + .limitl resw 1 + .basel resw 1 + .basem resb 1 + .attribute resb 1 + .flags__limith resb 1 + .baseh resb 1 +endstruc + +attrib: + .present equ 1 << 7 + .user equ 1 << 4 + .code equ 1 << 3 + .writable equ 1 << 1 + +flags: + .long_mode equ 1 << 5 + +gdtr: + dw gdt.end + 1 + dq gdt + +gdt: +.null equ $ - gdt + dq 0 + +.kernel_code equ $ - gdt +istruc GDTEntry + at GDTEntry.limitl, dw 0 + at GDTEntry.basel, dw 0 + at GDTEntry.basem, db 0 + at GDTEntry.attribute, db attrib.present | attrib.user | attrib.code + at GDTEntry.flags__limith, db flags.long_mode + at GDTEntry.baseh, db 0 +iend + +.kernel_data equ $ - gdt +istruc GDTEntry + at GDTEntry.limitl, dw 0 + at GDTEntry.basel, dw 0 + at GDTEntry.basem, db 0 + at GDTEntry.attribute, db attrib.present | attrib.user | attrib.writable + at GDTEntry.flags__limith, db 0 + at GDTEntry.baseh, db 0 +iend + +.end equ $ - gdt diff --git a/src/context/context.rs b/src/context/context.rs index c97c5166..6d723f49 100644 --- a/src/context/context.rs +++ b/src/context/context.rs @@ -148,6 +148,8 @@ pub struct Context { pub euid: u32, pub egid: u32, pub pid: usize, + /// Supplementary group IDs for access control decisions. + pub groups: Vec, // See [`PreemptGuard`] // @@ -204,6 +206,7 @@ impl Context { euid: 0, egid: 0, pid: 0, + groups: Vec::new(), #[cfg(feature = "syscall_debug")] syscall_debug_info: crate::syscall::debug::SyscallDebugInfo::default(), @@ -479,6 +482,7 @@ impl Context { uid: self.euid, gid: self.egid, pid: self.pid, + groups: self.groups.clone(), } } } diff --git a/src/context/file.rs b/src/context/file.rs index 2d3790f1..150f483a 100644 --- a/src/context/file.rs +++ b/src/context/file.rs @@ -4,7 +4,7 @@ use crate::{ event, scheme::{self, SchemeId}, sync::{CleanLockToken, RwLock, L6}, - syscall::error::Result, + syscall::error::{Error, Result, ESTALE}, }; use alloc::sync::Arc; use syscall::{schemev2::NewFdFlags, RwFlags, O_APPEND, O_NONBLOCK}; @@ -18,6 +18,7 @@ pub struct FileDescription { pub offset: u64, /// The scheme that this file refers to pub scheme: SchemeId, + pub scheme_generation: Option, /// The number the scheme uses to refer to this file pub number: usize, /// The flags passed to open or fcntl(SETFL) @@ -32,6 +33,52 @@ bitflags! { } } impl FileDescription { + pub fn with_generation( + scheme: SchemeId, + scheme_generation: Option, + number: usize, + offset: u64, + flags: u32, + internal_flags: InternalFlags, + ) -> Self { + Self { + offset, + scheme, + scheme_generation, + number, + flags, + internal_flags, + } + } + + pub fn new( + scheme: SchemeId, + number: usize, + offset: u64, + flags: u32, + internal_flags: InternalFlags, + token: &mut CleanLockToken, + ) -> Self { + Self::with_generation( + scheme, + Some(scheme::current_scheme_generation(token.token(), scheme)), + number, + offset, + flags, + internal_flags, + ) + } + + pub fn get_scheme(&self, token: &mut CleanLockToken) -> Result { + if let Some(expected_generation) = self.scheme_generation + && expected_generation != scheme::current_scheme_generation(token.token(), self.scheme) + { + return Err(Error::new(ESTALE)); + } + + scheme::get_scheme(token.token(), self.scheme) + } + pub fn rw_flags(&self, rw: RwFlags) -> u32 { let mut ret = self.flags & !(O_NONBLOCK | O_APPEND) as u32; if rw.contains(RwFlags::APPEND) { @@ -76,7 +123,7 @@ impl FileDescription { pub fn try_close(self, token: &mut CleanLockToken) -> Result<()> { event::unregister_file(self.scheme, self.number, token); - let scheme = scheme::get_scheme(token.token(), self.scheme)?; + let scheme = self.get_scheme(token)?; scheme.close(self.number, token) } @@ -85,12 +132,12 @@ impl FileDescription { impl FileDescriptor { pub fn close(self, token: &mut CleanLockToken) -> Result<()> { { - let (scheme_id, number, internal_flags) = { + let (desc, number, internal_flags) = { let desc = self.description.read(token.token()); - (desc.scheme, desc.number, desc.internal_flags) + (*desc, desc.number, desc.internal_flags) }; if internal_flags.contains(InternalFlags::NOTIFY_ON_NEXT_DETACH) { - let scheme = scheme::get_scheme(token.token(), scheme_id)?; + let scheme = desc.get_scheme(token)?; scheme.detach(number, token)?; } } diff --git a/src/context/memory.rs b/src/context/memory.rs index 93446ba7..127a34fd 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -64,14 +64,13 @@ impl UnmapResult { return Ok(()); }; - let (scheme_id, number) = { - let desc = description.write(token.token()); - (desc.scheme, desc.number) + let (scheme, number) = { + let desc = *description.read(token.token()); + (desc.get_scheme(token)?, desc.number) }; - let scheme_opt = scheme::get_scheme(token.token(), scheme_id); - let funmap_result = scheme_opt - .and_then(|scheme| scheme.kfunmap(number, base_offset, self.size, self.flags, token)); + let funmap_result = scheme + .kfunmap(number, base_offset, self.size, self.flags, token); if let Ok(fd) = Arc::try_unwrap(description) { fd.into_inner().try_close(token)?; @@ -2687,20 +2686,13 @@ fn correct_inner<'l>( // XXX: This is cheating, but guaranteed we won't deadlock because we've dropped addr_space_guard let mut token = unsafe { CleanLockToken::new() }; - let (scheme_id, scheme_number) = { - let desc = &file_ref.description.read(token.token()); - (desc.scheme, desc.number) + let desc = *file_ref.description.read(token.token()); + let scheme = desc.get_scheme(&mut token).map_err(|_| PfError::Segv)?; + let scheme_number = desc.number; + let user_inner = match scheme { + KernelSchemes::User(user) => user.inner, + _ => return Err(PfError::Segv), }; - let user_inner = scheme::get_scheme(token.token(), scheme_id) - .ok() - .and_then(|s| { - if let KernelSchemes::User(user) = s { - Some(user.inner) - } else { - None - } - }) - .ok_or(PfError::Segv)?; let offset = file_ref.base_offset as u64 + (pages_from_grant_start * PAGE_SIZE) as u64; user_inner diff --git a/src/scheme/acpi.rs b/src/scheme/acpi.rs index 87570a12..5d734691 100644 --- a/src/scheme/acpi.rs +++ b/src/scheme/acpi.rs @@ -10,6 +10,7 @@ use syscall::{ use crate::{ acpi::{RxsdtEnum, RXSDT_ENUM}, + arch::sleep, context::file::InternalFlags, event, sync::{CleanLockToken, RwLock, WaitCondition, L1}, @@ -40,6 +41,7 @@ enum HandleKind { TopLevel, Rxsdt, ShutdownPipe, + SleepControl, SchemeRoot, } @@ -146,11 +148,11 @@ impl KernelScheme for AcpiScheme { if flags & O_EXCL == O_EXCL || flags & O_SYMLINK == O_SYMLINK { return Err(Error::new(EINVAL)); } - if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { - return Err(Error::new(EROFS)); - } let (handle_kind, int_flags) = match path { "" => { + if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { + return Err(Error::new(EROFS)); + } if flags & O_DIRECTORY != O_DIRECTORY && flags & O_STAT != O_STAT { return Err(Error::new(EISDIR)); } @@ -158,17 +160,36 @@ impl KernelScheme for AcpiScheme { (HandleKind::TopLevel, InternalFlags::POSITIONED) } "rxsdt" => { + if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { + return Err(Error::new(EROFS)); + } if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { return Err(Error::new(ENOTDIR)); } (HandleKind::Rxsdt, InternalFlags::POSITIONED) } "kstop" => { + if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT { + return Err(Error::new(EROFS)); + } if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { return Err(Error::new(ENOTDIR)); } (HandleKind::ShutdownPipe, InternalFlags::empty()) } + "sleep" => { + if flags & O_ACCMODE == O_RDONLY || flags & O_STAT == O_STAT { + // allowed + } else if flags & O_ACCMODE != syscall::flag::O_WRONLY + && flags & O_ACCMODE != syscall::flag::O_RDWR + { + return Err(Error::new(EINVAL)); + } + if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT { + return Err(Error::new(ENOTDIR)); + } + (HandleKind::SleepControl, InternalFlags::POSITIONED) + } _ => return Err(Error::new(ENOENT)), }; @@ -191,6 +212,7 @@ impl KernelScheme for AcpiScheme { Ok(match handle.kind { HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?.len() as u64, HandleKind::ShutdownPipe => 1, + HandleKind::SleepControl => sleep::available_sleep_states().len() as u64, HandleKind::TopLevel => 0, HandleKind::SchemeRoot => return Err(Error::new(EBADF))?, }) @@ -253,6 +275,7 @@ impl KernelScheme for AcpiScheme { return dst_buf.copy_exactly(&[0x42]).map(|()| 1); } + HandleKind::SleepControl => sleep::available_sleep_states(), HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?, HandleKind::TopLevel => return Err(Error::new(EISDIR)), HandleKind::SchemeRoot => return Err(Error::new(EBADF)), @@ -295,11 +318,45 @@ impl KernelScheme for AcpiScheme { kind: DirentKind::Socket, name: "kstop", inode: 0, + next_opaque_id: 2, + })?; + } + if opaque <= 2 { + buf.entry(DirEntry { + kind: DirentKind::Regular, + name: "sleep", + inode: 0, next_opaque_id: u64::MAX, })?; } Ok(buf.finalize()) } + fn kwrite( + &self, + id: usize, + buf: crate::syscall::usercopy::UserSliceRo, + _flags: u32, + _stored_flags: u32, + token: &mut CleanLockToken, + ) -> Result { + let handle = *HANDLES.read(token.token()).get(id)?; + + if handle.stat { + return Err(Error::new(EBADF)); + } + + match handle.kind { + HandleKind::SleepControl => { + let mut tmp = [0_u8; 16]; + let len = buf.copy_common_bytes_to_slice(&mut tmp)?; + let request = core::str::from_utf8(&tmp[..len]).map_err(|_| Error::new(EINVAL))?; + sleep::trigger_sleep_request(request)?; + Ok(len) + } + HandleKind::SchemeRoot => Err(Error::new(EBADF)), + _ => Err(Error::new(EBADF)), + } + } fn kfpath(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result { //TODO: construct useful path? buf.copy_common_bytes_from_slice("/scheme/kernel.acpi/".as_bytes()) @@ -328,6 +385,11 @@ impl KernelScheme for AcpiScheme { st_size: 1, ..Default::default() }, + HandleKind::SleepControl => Stat { + st_mode: MODE_FILE, + st_size: sleep::available_sleep_states().len().try_into().unwrap_or(u64::MAX), + ..Default::default() + }, HandleKind::SchemeRoot => return Err(Error::new(EBADF)), })?; diff --git a/src/scheme/debug.rs b/src/scheme/debug.rs index c70ac579..4a23b3cf 100644 --- a/src/scheme/debug.rs +++ b/src/scheme/debug.rs @@ -22,9 +22,10 @@ struct Handle { static HANDLES: RwLock> = RwLock::new(HandleMap::new()); -/// Add to the input queue +/// Add to the input queue, translating CR to NL (ICRNL) for serial console compatibility. pub fn debug_input(data: u8, token: &mut CleanLockToken) { - INPUT.send(data, token); + let translated = if data == b'\r' { b'\n' } else { data }; + INPUT.send(translated, token); } // Notify readers of input updates @@ -106,12 +107,16 @@ impl KernelScheme for DebugScheme { fn fevent( &self, id: usize, - _flags: EventFlags, + flags: EventFlags, token: &mut CleanLockToken, ) -> Result { let _handle = *HANDLES.read(token.token()).get(id)?; - Ok(EventFlags::empty()) + let mut ready = EventFlags::empty(); + if flags.contains(EventFlags::EVENT_READ) { + ready |= EventFlags::EVENT_READ; + } + Ok(ready) } fn fsync(&self, id: usize, token: &mut CleanLockToken) -> Result<()> { diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs index d30272c1..765e547f 100644 --- a/src/scheme/mod.rs +++ b/src/scheme/mod.rs @@ -14,7 +14,7 @@ use alloc::{ }; use core::{ str, - sync::atomic::{AtomicUsize, Ordering}, + sync::atomic::{AtomicU64, AtomicUsize, Ordering}, }; use hashbrown::hash_map::{self, DefaultHashBuilder, HashMap}; use spin::Once; @@ -169,6 +169,7 @@ enum Handle { /// Schemes list static HANDLES: Once>> = Once::new(); +static SCHEME_GENERATIONS: Once>> = Once::new(); static SCHEME_LIST_NEXT_ID: AtomicUsize = AtomicUsize::new(MAX_GLOBAL_SCHEMES); static SCHEME_LIST_ID: AtomicUsize = AtomicUsize::new(0); @@ -204,6 +205,10 @@ fn init_schemes() -> RwLock> { RwLock::new(handles) } +fn init_scheme_generations() -> RwLock> { + RwLock::new(HashMap::new()) +} + /// Get a handle to a scheme. pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result { match handles().read(token).get(&scheme_id) { @@ -212,10 +217,33 @@ pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result, scheme_id: SchemeId) -> u64 { + scheme_generations() + .read(token) + .get(&scheme_id) + .map(|generation| generation.load(Ordering::Acquire)) + .unwrap_or(0) +} + fn handles<'a>() -> &'a RwLock> { HANDLES.call_once(init_schemes) } +fn scheme_generations<'a>() -> &'a RwLock> { + SCHEME_GENERATIONS.call_once(init_scheme_generations) +} + +fn increment_scheme_generation(scheme_id: SchemeId, token: &mut CleanLockToken) { + match scheme_generations().write(token.token()).entry(scheme_id) { + hash_map::Entry::Occupied(entry) => { + entry.get().fetch_add(1, Ordering::AcqRel); + } + hash_map::Entry::Vacant(entry) => { + entry.insert(AtomicU64::new(1)); + } + } +} + /// Scheme list type pub struct SchemeList; @@ -260,9 +288,14 @@ impl SchemeList { /// Remove a scheme fn remove(&self, id: usize, token: &mut CleanLockToken) { - let scheme = handles().write(token.token()).remove(&SchemeId(id)); + let scheme_id = SchemeId(id); + let scheme = handles().write(token.token()).remove(&scheme_id); assert!(scheme.is_some()); + if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme.as_ref() { + user.inner.fail_pending_calls(token); + } + increment_scheme_generation(scheme_id, token); if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme && let Some(user) = Arc::into_inner(user.inner) { @@ -287,32 +320,32 @@ impl KernelScheme for SchemeList { token: &mut CleanLockToken, ) -> Result { let scheme_id = SchemeId(scheme_id); - match handles() - .read(token.token()) - .get(&scheme_id) - .ok_or(Error::new(EBADF))? - { - Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => { - let inner = inner.clone(); - assert!(scheme_id == inner.scheme_id); - let scheme = scheme_id; - let params = unsafe { user_buf.read_exact::()? }; - - return Ok(OpenResult::External(Arc::new(RwLock::new( - FileDescription { - scheme, - number: params.number, - offset: params.offset, - flags: params.flags as u32, - internal_flags: InternalFlags::from_extra0(params.internal_flags) - .ok_or(Error::new(EINVAL))?, - }, - )))); + let maybe_inner = { + let handles = handles().read(token.token()); + match handles.get(&scheme_id).ok_or(Error::new(EBADF))? { + Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => Some(inner.clone()), + Handle::SchemeCreationCapability => None, + _ => return Err(Error::new(EBADF)), } - Handle::SchemeCreationCapability => (), - _ => return Err(Error::new(EBADF)), }; + if let Some(inner) = maybe_inner { + assert!(scheme_id == inner.scheme_id); + let params = unsafe { user_buf.read_exact::()? }; + + return Ok(OpenResult::External(Arc::new(RwLock::new( + FileDescription::new( + scheme_id, + params.number, + params.offset, + params.flags as u32, + InternalFlags::from_extra0(params.internal_flags) + .ok_or(Error::new(EINVAL))?, + token, + ), + )))); + } + const EXPECTED: &[u8] = b"create-scheme"; let mut buf = [0u8; EXPECTED.len()]; @@ -777,6 +810,7 @@ pub struct CallerCtx { pub pid: usize, pub uid: u32, pub gid: u32, + pub groups: alloc::vec::Vec, } impl CallerCtx { pub fn filter_uid_gid(self, euid: u32, egid: u32) -> Self { @@ -785,6 +819,7 @@ impl CallerCtx { pid: self.pid, uid: euid, gid: egid, + groups: self.groups, } } else { self diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs index 47588e10..f38c4aec 100644 --- a/src/scheme/proc.rs +++ b/src/scheme/proc.rs @@ -105,6 +105,7 @@ enum ContextHandle { // Attr handles, to set ens/euid/egid/pid. Authority, Attr, + Groups, Status { privileged: bool, @@ -261,6 +262,7 @@ impl ProcScheme { let handle = match actual_name { "attrs" => ContextHandle::Attr, "status" => ContextHandle::Status { privileged: true }, + "groups" => ContextHandle::Groups, _ => return Err(Error::new(ENOENT)), }; @@ -306,6 +308,11 @@ impl ProcScheme { let id = NonZeroUsize::new(NEXT_ID.fetch_add(1, Ordering::Relaxed)) .ok_or(Error::new(EMFILE))?; let context = context::spawn(true, Some(id), ret, token)?; + { + let parent_groups = + context::current().read(token.token()).groups.clone(); + context.write(token.token()).groups = parent_groups; + } HANDLES.write(token.token()).insert( id.get(), Handle { @@ -849,17 +856,17 @@ impl KernelScheme for ProcScheme { } } fn extract_scheme_number(fd: usize, token: &mut CleanLockToken) -> Result<(KernelSchemes, usize)> { - let (scheme_id, number) = { + let desc = { let current_lock = context::current(); let mut current = current_lock.read(token.token()); - let (context, mut token) = current.token_split(); + let (context, mut context_token) = current.token_split(); let file_descriptor = context - .get_file(FileHandle::from(fd), &mut token) + .get_file(FileHandle::from(fd), &mut context_token) .ok_or(Error::new(EBADF))?; - let desc = file_descriptor.description.read(token.token()); - (desc.scheme, desc.number) + *file_descriptor.description.read(context_token.token()) }; - let scheme = scheme::get_scheme(token.token(), scheme_id)?; + let scheme = desc.get_scheme(token)?; + let number = desc.number; Ok((scheme, number)) } @@ -1271,6 +1278,39 @@ impl ContextHandle { guard.prio = (info.prio as usize).min(39); Ok(size_of::()) } + Self::Groups => { + const NGROUPS_MAX: usize = 65536; + if buf.len() % size_of::() != 0 { + return Err(Error::new(EINVAL)); + } + let count = buf.len() / size_of::(); + if count > NGROUPS_MAX { + return Err(Error::new(EINVAL)); + } + let mut groups = Vec::with_capacity(count); + for chunk in buf.in_exact_chunks(size_of::()).take(count) { + groups.push(chunk.read_u32()?); + } + let proc_id = { + let guard = context.read(token.token()); + guard.owner_proc_id + }; + { + let mut guard = context.write(token.token()); + guard.groups = groups.clone(); + } + if let Some(pid) = proc_id { + let mut contexts = context::contexts(token.downgrade()); + let (contexts, mut t) = contexts.token_split(); + for context_ref in contexts.iter() { + let mut ctx = context_ref.write(t.token()); + if ctx.owner_proc_id == Some(pid) { + ctx.groups = groups.clone(); + } + } + } + Ok(count * size_of::()) + } ContextHandle::OpenViaDup => { let mut args = buf.usizes(); @@ -1475,6 +1515,15 @@ impl ContextHandle { debug_name, }) } + Self::Groups => { + let c = &context.read(token.token()); + let max = buf.len() / size_of::(); + let count = c.groups.len().min(max); + for (chunk, gid) in buf.in_exact_chunks(size_of::()).zip(&c.groups).take(count) { + chunk.copy_from_slice(&gid.to_ne_bytes())?; + } + Ok(count * size_of::()) + } ContextHandle::Sighandler => { let data = match context.read(token.token()).sig { Some(ref sig) => SetSighandlerData { diff --git a/src/scheme/user.rs b/src/scheme/user.rs index b9013021..dfbf66b1 100644 --- a/src/scheme/user.rs +++ b/src/scheme/user.rs @@ -80,6 +80,7 @@ const ONE: NonZeroUsize = match NonZeroUsize::new(1) { Some(one) => one, None => unreachable!(), }; +const MAX_SPURIOUS_WAKEUPS: usize = 100; enum ParsedCqe { TriggerFevent { @@ -209,6 +210,8 @@ impl UserInner { caller_responsible: &mut PageSpan, token: &mut CleanLockToken, ) -> Result { + let mut remaining_spurious_wakeups = MAX_SPURIOUS_WAKEUPS; + { // Disable preemption to avoid context switches between setting the // process state and sending the scheme request. The process is made @@ -261,7 +264,10 @@ impl UserInner { }; let states = self.states.lock(token.token()); - let (mut states, mut token) = states.into_split(); + let (mut states, mut state_token) = states.into_split(); + let mut timed_out_descriptions = None; + let mut remove_state = false; + let mut timed_out = false; match states.get_mut(sqe.tag as usize) { // invalid state None => return Err(Error::new(EBADFD)), @@ -274,24 +280,35 @@ impl UserInner { fds, } => { let maybe_eintr = - eintr_if_sigkill(&mut callee_responsible, &mut token.token()); - *o = State::Waiting { - canceling: true, - callee_responsible, - context, - fds, - }; + eintr_if_sigkill(&mut callee_responsible, &mut state_token.token()); - maybe_eintr?; + if maybe_eintr.is_ok() { + remaining_spurious_wakeups = + remaining_spurious_wakeups.saturating_sub(1); + } + + if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 { + timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds)); + remove_state = true; + } else { + *o = State::Waiting { + canceling: true, + callee_responsible, + context, + fds, + }; + } - context::current() - .write(token.token()) - .block("UserInner::call (woken up after cancelation request)"); + maybe_eintr?; - // We do not want to drop the lock before blocking - // as if we get preempted in between we might miss a - // wakeup. - drop(states); + if remove_state { + states.remove(sqe.tag as usize); + timed_out = true; + } else { + context::current() + .write(state_token.token()) + .block("UserInner::call (woken up after cancelation request)"); + } } // spurious wakeup State::Waiting { @@ -300,60 +317,76 @@ impl UserInner { context, mut callee_responsible, } => { - let maybe_eintr = eintr_if_sigkill(&mut callee_responsible, &mut token); let current_context = context::current(); + let maybe_eintr = + eintr_if_sigkill(&mut callee_responsible, &mut state_token); + + if maybe_eintr.is_ok() { + remaining_spurious_wakeups = + remaining_spurious_wakeups.saturating_sub(1); + } - *o = State::Waiting { - // Currently we treat all spurious wakeups to have the same behavior - // as signals (i.e., we send a cancellation request). It is not something - // that should happen, but it certainly can happen, for example if a context - // is awoken through its thread handle without setting any sig bits, or if the - // caller clears its own sig bits. If it actually is a signal, then it is the - // intended behavior. - canceling: true, - fds, - context, - callee_responsible, - }; + if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 { + timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds)); + remove_state = true; + } else { + *o = State::Waiting { + // Currently we treat all spurious wakeups to have the same behavior + // as signals (i.e., we send a cancellation request). It is not something + // that should happen, but it certainly can happen, for example if a context + // is awoken through its thread handle without setting any sig bits, or if the + // caller clears its own sig bits. If it actually is a signal, then it is the + // intended behavior. + canceling: true, + fds, + context, + callee_responsible, + }; + } maybe_eintr?; - // We do not want to preempt between sending the - // cancellation and blocking again where we might - // miss a wakeup. - let mut preempt = PreemptGuardL1::new(¤t_context, &mut token); - let token = preempt.token(); - - self.todo.send_locked( - Sqe { - opcode: Opcode::Cancel as u8, - sqe_flags: SqeFlags::ONEWAY, - tag: sqe.tag, - ..Default::default() - }, - token.token(), - ); - event::trigger_locked( - self.root_id, - self.scheme_id.get(), - EVENT_READ, - token.token(), - ); - - // 1. If cancellation was requested and arrived - // before the scheme processed the request, an - // acknowledgement will be sent back after the - // cancellation is processed and we will be woken up - // again. State will be State::Responded then. - // - // 2. If cancellation was requested but the scheme - // already processed the request, we will receive - // the actual response next and woken up again. - // State will be State::Responded then. - context::current() - .write(token.token()) - .block("UserInner::call (spurious wakeup)"); - drop(states); + if remove_state { + states.remove(sqe.tag as usize); + timed_out = true; + } else { + // We do not want to preempt between sending the + // cancellation and blocking again where we might + // miss a wakeup. + let mut preempt = + PreemptGuardL1::new(¤t_context, &mut state_token); + let token = preempt.token(); + + self.todo.send_locked( + Sqe { + opcode: Opcode::Cancel as u8, + sqe_flags: SqeFlags::ONEWAY, + tag: sqe.tag, + ..Default::default() + }, + token.token(), + ); + event::trigger_locked( + self.root_id, + self.scheme_id.get(), + EVENT_READ, + token.token(), + ); + + // 1. If cancellation was requested and arrived + // before the scheme processed the request, an + // acknowledgement will be sent back after the + // cancellation is processed and we will be woken up + // again. State will be State::Responded then. + // + // 2. If cancellation was requested but the scheme + // already processed the request, we will receive + // the actual response next and woken up again. + // State will be State::Responded then. + context::current() + .write(token.token()) + .block("UserInner::call (spurious wakeup)"); + } } // invalid state @@ -368,7 +401,67 @@ impl UserInner { } }, } + + if let Some(descriptions) = timed_out_descriptions { + drop(states); + for desc in descriptions { + let _ = desc.try_close(token); + } + } + + if timed_out { + return Err(Error::new(ETIMEDOUT)); + } + } + } + } + + fn collect_descriptions_to_close( + fds: Vec>, + ) -> Vec { + fds.into_iter() + .filter_map(|fd| Arc::try_unwrap(fd).ok()) + .map(RwLock::into_inner) + .collect() + } + + pub fn fail_pending_calls(&self, token: &mut CleanLockToken) { + let descriptions_to_close = { + let mut states_lock = self.states.lock(token.token()); + let (states, mut lock_token) = states_lock.token_split(); + let mut descriptions_to_close = Vec::new(); + let mut states_to_remove = Vec::new(); + + for (id, state) in states.iter_mut() { + match mem::replace(state, State::Placeholder) { + State::Waiting { context, fds, .. } => { + descriptions_to_close.extend(Self::collect_descriptions_to_close(fds)); + + match context.upgrade() { + Some(context) => { + *state = State::Responded(Response::Regular( + Err(Error::new(ENODEV)), + 0, + false, + )); + context.write(lock_token.token()).unblock(); + } + None => states_to_remove.push(id), + } + } + old_state => *state = old_state, + } } + + for id in states_to_remove { + states.remove(id); + } + + descriptions_to_close + }; + + for desc in descriptions_to_close { + let _ = desc.try_close(token); } } @@ -1283,6 +1376,7 @@ impl UserInner { } pub fn into_drop(self, token: &mut CleanLockToken) { + self.fail_pending_calls(token); self.todo.condition.into_drop(token); } } diff --git a/src/startup/memory.rs b/src/startup/memory.rs index 26922dde..9fb5fb10 100644 --- a/src/startup/memory.rs +++ b/src/startup/memory.rs @@ -74,14 +74,16 @@ impl MemoryEntry { } struct MemoryMap { - entries: [MemoryEntry; 512], + entries: [MemoryEntry; 1024], size: usize, } impl MemoryMap { fn register(&mut self, base: usize, size: usize, kind: BootloaderMemoryKind) { if self.size >= self.entries.len() { - panic!("Early memory map overflow!"); + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + unsafe { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'!', options(nostack, preserves_flags)); } + panic!("Early memory map overflow at entry {} (max {})", self.size, self.entries.len()); } let start = if kind == BootloaderMemoryKind::Free { align_up(base) @@ -134,7 +136,7 @@ static MEMORY_MAP: SyncUnsafeCell = SyncUnsafeCell::new(MemoryMap { start: 0, end: 0, kind: BootloaderMemoryKind::Null, - }; 512], + }; 1024], size: 0, }); @@ -323,7 +325,16 @@ unsafe fn map_memory(areas: &[MemoryArea], mut bump_allocator: &mut Bum } } - let kernel_area = (*MEMORY_MAP.get()).kernel().unwrap(); + let kernel_area = match (*MEMORY_MAP.get()).kernel() { + Some(area) => area, + None => { + println!("FATAL: kernel memory area not found in boot memory map"); + println!("Cannot determine kernel base address. Halting."); + loop { + core::hint::spin_loop(); + } + } + }; let kernel_base = kernel_area.start; let kernel_size = kernel_area.end.saturating_sub(kernel_area.start); // Map kernel at KERNEL_OFFSET diff --git a/src/startup/mod.rs b/src/startup/mod.rs index 8ad3cdf7..86aabc22 100644 --- a/src/startup/mod.rs +++ b/src/startup/mod.rs @@ -149,6 +149,15 @@ static BOOTSTRAP: spin::Once = spin::Once::new(); pub(crate) static AP_READY: AtomicBool = AtomicBool::new(false); static BSP_READY: AtomicBool = AtomicBool::new(false); +#[cold] +fn halt_boot(message: &str) -> ! { + print!("{message}"); + println!("Kernel boot cannot continue. Halting."); + loop { + hint::spin_loop(); + } +} + /// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this pub(crate) fn kmain(bootstrap: Bootstrap) -> ! { let mut token = unsafe { CleanLockToken::new() }; @@ -180,9 +189,7 @@ pub(crate) fn kmain(bootstrap: Bootstrap) -> ! { context.euid = 0; context.egid = 0; } - Err(err) => { - panic!("failed to spawn userspace_init: {:?}", err); - } + Err(_err) => halt_boot("FATAL: failed to spawn first userspace process userspace_init\n"), } run_userspace(&mut token) diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs index bf984641..10c6a92c 100644 --- a/src/syscall/fs.rs +++ b/src/syscall/fs.rs @@ -12,7 +12,7 @@ use crate::{ memory::{AddrSpace, GenericFlusher, Grant, PageSpan, TlbShootdownActions}, }, memory::{Page, VirtualAddress, PAGE_SIZE}, - scheme::{self, FileHandle, KernelScheme, OpenResult, StrOrBytes}, + scheme::{FileHandle, KernelScheme, OpenResult, StrOrBytes}, sync::{CleanLockToken, RwLock}, syscall::{data::Stat, error::*, flag::*}, }; @@ -45,7 +45,7 @@ pub fn file_op_generic_ext( (file, desc) }; - let scheme = scheme::get_scheme(token.token(), desc.scheme)?; + let scheme = desc.get_scheme(token)?; op(&*scheme, file.description, desc, token) } @@ -73,14 +73,18 @@ pub fn openat( ) -> Result { let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; - let (scheme_id, number) = { + let desc = { let current_lock = context::current(); let mut current = current_lock.read(token.token()); - let (context, mut token) = current.token_split(); - let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?; - let desc = pipe.description.read(token.token()); - (desc.scheme, desc.number) + let (context, mut context_token) = current.token_split(); + let pipe = context + .get_file(fh, &mut context_token) + .ok_or(Error::new(EBADF))?; + *pipe.description.read(context_token.token()) }; + let scheme = desc.get_scheme(token)?; + let number = desc.number; + let scheme_id = desc.scheme; let caller_ctx = context::current() .read(token.token()) @@ -88,8 +92,6 @@ pub fn openat( .filter_uid_gid(euid, egid); let new_description = { - let scheme = scheme::get_scheme(token.token(), scheme_id)?; - let res = scheme.kopenat( number, StrOrBytes::from_str(&path_buf), @@ -101,13 +103,14 @@ pub fn openat( match res? { OpenResult::SchemeLocal(number, internal_flags) => { - Arc::new(RwLock::new(FileDescription { - offset: 0, - internal_flags, - scheme: scheme_id, + Arc::new(RwLock::new(FileDescription::new( + scheme_id, number, - flags: (flags & !O_CLOEXEC) as u32, - })) + 0, + (flags & !O_CLOEXEC) as u32, + internal_flags, + token, + ))) } OpenResult::External(desc) => desc, } @@ -137,16 +140,17 @@ pub fn unlinkat( ) -> Result<()> { let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?; - let (number, scheme_id) = { + let desc = { let current_lock = context::current(); let mut current = current_lock.read(token.token()); - let (context, mut token) = current.token_split(); - let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?; - let desc = pipe.description.read(token.token()); - (desc.number, desc.scheme) + let (context, mut context_token) = current.token_split(); + let pipe = context + .get_file(fh, &mut context_token) + .ok_or(Error::new(EBADF))?; + *pipe.description.read(context_token.token()) }; - - let scheme = scheme::get_scheme(token.token(), scheme_id)?; + let number = desc.number; + let scheme = desc.get_scheme(token)?; let caller_ctx = context::current() .read(token.token()) @@ -199,17 +203,18 @@ fn duplicate_file( let description = { *file.description.read(token.token()) }; let new_description = { - let scheme = scheme::get_scheme(token.token(), description.scheme)?; + let scheme = description.get_scheme(token)?; match scheme.kdup(description.number, user_buf, caller_ctx, token)? { OpenResult::SchemeLocal(number, internal_flags) => { - Arc::new(RwLock::new(FileDescription { - offset: 0, - internal_flags, - scheme: description.scheme, + Arc::new(RwLock::new(FileDescription::new( + description.scheme, number, - flags: description.flags, - })) + 0, + description.flags, + internal_flags, + token, + ))) } OpenResult::External(desc) => desc, } @@ -296,11 +301,10 @@ fn call_normal( } .ok_or(Error::new(EBADF))?; - let (scheme_id, number) = { - let desc = file.description.read(token.token()); - (desc.scheme, desc.number) + let (scheme, number) = { + let desc = *file.description.read(token.token()); + (desc.get_scheme(token)?, desc.number) }; - let scheme = scheme::get_scheme(token.token(), scheme_id)?; if flags.contains(CallFlags::STD_FS) { scheme.translate_std_fs_call(number, file.description, payload, flags, metadata, token) @@ -341,28 +345,28 @@ fn fdwrite_inner( ) -> Result { // TODO: Ensure deadlocks can't happen let (scheme, number, descs_to_send) = { - let (scheme, number) = { + let desc = { let current_lock = context::current(); let mut current = current_lock.read(token.token()); - let (context, mut token) = current.token_split(); + let (context, mut context_token) = current.token_split(); let file_descriptor = context - .get_file(socket, &mut token) + .get_file(socket, &mut context_token) .ok_or(Error::new(EBADF))?; - let desc = &file_descriptor.description.read(token.token()); - (desc.scheme, desc.number) + *file_descriptor.description.read(context_token.token()) }; - let scheme = scheme::get_scheme(token.token(), scheme)?; + let scheme = desc.get_scheme(token)?; + let number = desc.number; let current_lock = context::current(); let mut current = current_lock.read(token.token()); - let (context, mut token) = current.token_split(); + let (context, mut context_token) = current.token_split(); ( scheme, number, if flags.contains(CallFlags::FD_CLONE) { - context.bulk_get_files(&target_fds, &mut token) + context.bulk_get_files(&target_fds, &mut context_token) } else { - context.bulk_remove_files(&target_fds, &mut token) + context.bulk_remove_files(&target_fds, &mut context_token) }? .into_iter() .map(|f| f.description) @@ -395,18 +399,22 @@ fn call_fdread( metadata: &[u64], token: &mut CleanLockToken, ) -> Result { + let desc = { + let current_lock = context::current(); + let mut current = current_lock.read(token.token()); + let (context, mut context_token) = current.token_split(); + let file_descriptor = context + .get_file(fd, &mut context_token) + .ok_or(Error::new(EBADF))?; + *file_descriptor.description.read(context_token.token()) + }; let (scheme, number) = { - let (scheme, number) = { - let current_lock = context::current(); - let mut current = current_lock.read(token.token()); - let (context, mut token) = current.token_split(); - let file_descriptor = context.get_file(fd, &mut token).ok_or(Error::new(EBADF))?; - let desc = file_descriptor.description.read(token.token()); - (desc.scheme, desc.number) - }; - let scheme = scheme::get_scheme(token.token(), scheme)?; - - (scheme, number) + let scheme = desc.get_scheme(token)?; + let number = desc.number; + ( + scheme, + number, + ) }; scheme.kfdread(number, payload, flags, metadata, token) @@ -440,9 +448,9 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken) } .ok_or(Error::new(EBADF))?; - let (scheme_id, number, flags) = { - let desc = file.description.write(token.token()); - (desc.scheme, desc.number, desc.flags) + let (number, flags, desc) = { + let desc = *file.description.read(token.token()); + (desc.number, desc.flags, desc) }; if cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC { @@ -460,7 +468,7 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken) // Communicate fcntl with scheme if cmd != F_GETFD && cmd != F_SETFD { - let scheme = scheme::get_scheme(token.token(), scheme_id)?; + let scheme = desc.get_scheme(token)?; scheme.fcntl(number, cmd, arg, token)?; }; @@ -518,13 +526,11 @@ pub fn flink(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken) let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; - let (number, scheme_id) = { - let desc = file.description.read(token.token()); - (desc.number, desc.scheme) + let (number, scheme) = { + let desc = *file.description.read(token.token()); + (desc.number, desc.get_scheme(token)?) }; - let scheme = scheme::get_scheme(token.token(), scheme_id)?; - // TODO: Check EXDEV. /* if scheme_id != description.scheme { @@ -554,13 +560,11 @@ pub fn frename(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?; let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?; - let (number, scheme_id) = { - let desc = file.description.read(token.token()); - (desc.number, desc.scheme) + let (number, scheme) = { + let desc = *file.description.read(token.token()); + (desc.number, desc.get_scheme(token)?) }; - let scheme = scheme::get_scheme(token.token(), scheme_id)?; - // TODO: Check EXDEV. /* if scheme_id != description.scheme { diff --git a/src/syscall/process.rs b/src/syscall/process.rs index e83da427..8a1d385e 100644 --- a/src/syscall/process.rs +++ b/src/syscall/process.rs @@ -271,23 +271,26 @@ unsafe fn bootstrap_mem(bootstrap: &crate::startup::Bootstrap) -> &'static [u8] } fn insert_fd(scheme: SchemeId, number: usize, cloexec: bool, token: &mut CleanLockToken) -> usize { + let description = Arc::new(RwLock::new(FileDescription::new( + scheme, + number, + 0, + (O_CREAT | O_RDWR) as u32, + InternalFlags::empty(), + token, + ))); + let current_lock = context::current(); let mut current = current_lock.read(token.token()); - let (context, mut token) = current.token_split(); + let (context, mut context_token) = current.token_split(); context .add_file_min( FileDescriptor { - description: Arc::new(RwLock::new(FileDescription { - scheme, - number, - offset: 0, - flags: (O_CREAT | O_RDWR) as u32, - internal_flags: InternalFlags::empty(), - })), + description, cloexec, }, syscall::flag::UPPER_FDTBL_TAG + scheme.get(), - &mut token, + &mut context_token, ) .expect("failed to insert fd to current context") .get()