Files
RedBear-OS/local/patches/kernel/redbear-consolidated.patch
T
vasilito 11993af01f fix: rebase base patches, commit recipe drift, add relibc rlimit/sysconf
Base: fix P6-driver-new-modules.patch (ed format -> unified diff) for new
driver modules (ncq, itr, phy). P6-driver-main-fixes.patch now applies with
offset on current upstream source.

Relibc: remove stale P5-named-semaphores (upstream has stubs), add
P10-stack-size-8mb and P11-getrlimit-getrusage (per-process rlimit table,
sysconf integration, getdtablesize fix, null-pointer safety).

Kernel: consolidate 29 individual patches into single redbear-consolidated.patch.

Userutils: P5-redbear-branding replaces P4-login-rate-limit.

Recipe.toml changes now committed so they survive source resets.
2026-05-04 11:49:15 +01:00

2593 lines
94 KiB
Diff

diff --git a/Cargo.toml b/Cargo.toml
index 6d4f059a..e05f723c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,6 +12,7 @@ cc = "1.0"
toml = "0.8"
[dependencies]
+acpi_ext = { package = "acpi", git = "https://gitlab.redox-os.org/redox-os/acpi.git", branch = "redox-6.x" }
arrayvec = { version = "0.7.4", default-features = false }
bitfield = "0.13.2"
bitflags = "2"
diff --git a/Makefile b/Makefile
index 68a8c50a..ce59b910 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,4 @@
+# Red Bear OS kernel patches applied via individual patch files
.PHONY: all check
SOURCE:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
diff --git a/build.rs b/build.rs
index 96c3ea5c..751746cc 100644
--- a/build.rs
+++ b/build.rs
@@ -77,6 +77,7 @@ fn main() {
}
"x86_64" => {
println!("cargo::rerun-if-changed=src/asm/x86_64/trampoline.asm");
+ println!("cargo::rerun-if-changed=src/asm/x86_64/s3_wakeup.asm");
let status = Command::new("nasm")
.arg("-f")
@@ -89,6 +90,18 @@ fn main() {
if !status.success() {
panic!("nasm failed with exit status {}", status);
}
+
+ let status = Command::new("nasm")
+ .arg("-f")
+ .arg("bin")
+ .arg("-o")
+ .arg(format!("{}/s3_wakeup", out_dir))
+ .arg("src/asm/x86_64/s3_wakeup.asm")
+ .status()
+ .expect("failed to run nasm");
+ if !status.success() {
+ panic!("nasm failed with exit status {}", status);
+ }
}
"riscv64" => {
println!("cargo::rustc-cfg=dtb");
diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs
index 4dc23883..f472c088 100644
--- a/src/acpi/madt/arch/x86.rs
+++ b/src/acpi/madt/arch/x86.rs
@@ -18,6 +18,7 @@ use crate::{
use super::{Madt, MadtEntry};
+const AP_SPIN_LIMIT: u32 = 1_000_000;
const TRAMPOLINE: usize = 0x8000;
static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline"));
@@ -42,13 +43,17 @@ pub(super) fn init(madt: Madt) {
//TODO: do not have writable and executable!
let mut mapper = KernelMapper::lock_rw();
- let result = mapper
- .map_phys(
- trampoline_page.start_address(),
- trampoline_frame.base(),
- PageFlags::new().execute(true).write(true),
- )
- .expect("failed to map trampoline");
+ let result = match mapper.map_phys(
+ trampoline_page.start_address(),
+ trampoline_frame.base(),
+ PageFlags::new().execute(true).write(true),
+ ) {
+ Some(result) => result,
+ None => {
+ println!("KERNEL AP: failed to map trampoline page, AP bring-up disabled");
+ return;
+ }
+ };
(result, mapper.table().phys().data())
};
@@ -72,17 +77,27 @@ pub(super) fn init(madt: Madt) {
if u32::from(ap_local_apic.id) == me.get() {
debug!(" This is my local APIC");
} else if ap_local_apic.flags & 1 == 1 {
- let cpu_id = LogicalCpuId::next();
-
// Allocate a stack
- let stack_start = RmmA::phys_to_virt(
- allocate_p2frame(4)
- .expect("no more frames in acpi stack_start")
- .base(),
- )
- .data();
+ let alloc = match allocate_p2frame(4) {
+ Some(frame) => frame,
+ None => {
+ println!("KERNEL AP: CPU {} no memory for stack, skipping", ap_local_apic.id);
+ continue;
+ }
+ };
+ let stack_start = RmmA::phys_to_virt(alloc.base()).data();
let stack_end = stack_start + (PAGE_SIZE << 4);
+ let next_cpu = crate::CPU_COUNT.load(Ordering::Relaxed);
+ if next_cpu >= crate::cpu_set::MAX_CPU_COUNT {
+ println!(
+ "KERNEL AP: CPU {} exceeds logical CPU limit, skipping",
+ ap_local_apic.id
+ );
+ continue;
+ }
+ let cpu_id = LogicalCpuId::new(next_cpu);
+
let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end);
let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id);
@@ -137,13 +152,34 @@ pub(super) fn init(madt: Madt) {
local_apic.set_icr(icr);
}
- // Wait for trampoline ready
- while unsafe { (*ap_ready.cast::<AtomicU8>()).load(Ordering::SeqCst) } == 0 {
+ // Wait for trampoline ready with timeout
+ let mut trampoline_ready = false;
+ for _ in 0..AP_SPIN_LIMIT {
+ if unsafe { (*ap_ready.cast::<AtomicU8>()).load(Ordering::SeqCst) } != 0 {
+ trampoline_ready = true;
+ break;
+ }
hint::spin_loop();
}
- while !AP_READY.load(Ordering::SeqCst) {
+ if !trampoline_ready {
+ println!("KERNEL AP: CPU {} trampoline timeout, skipping", ap_local_apic.id);
+ continue;
+ }
+
+ let mut kernel_ready = false;
+ for _ in 0..AP_SPIN_LIMIT {
+ if AP_READY.load(Ordering::SeqCst) {
+ kernel_ready = true;
+ break;
+ }
hint::spin_loop();
}
+ if !kernel_ready {
+ println!("KERNEL AP: CPU {} AP_READY timeout, skipping", ap_local_apic.id);
+ continue;
+ }
+
+ crate::CPU_COUNT.fetch_add(1, Ordering::Relaxed);
RmmA::invalidate_all();
}
@@ -151,10 +187,12 @@ pub(super) fn init(madt: Madt) {
}
// Unmap trampoline
- let (_frame, _, flush) = unsafe {
+ if let Some((_frame, _, flush)) = unsafe {
KernelMapper::lock_rw()
.unmap_phys(trampoline_page.start_address())
- .expect("failed to unmap trampoline page")
- };
- flush.flush();
+ } {
+ flush.flush();
+ } else {
+ println!("KERNEL AP: failed to unmap trampoline page (non-fatal)");
+ }
}
diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs
index 59e35265..b3b80f0c 100644
--- a/src/acpi/mod.rs
+++ b/src/acpi/mod.rs
@@ -82,6 +82,14 @@ impl Rxsdt for RxsdtEnum {
pub static RXSDT_ENUM: Once<RxsdtEnum> = Once::new();
+#[derive(Clone, Copy, Debug)]
+pub struct AcpiRootInfo {
+ pub revision: u8,
+ pub root_sdt_address: PhysicalAddress,
+}
+
+pub static ACPI_ROOT_INFO: Once<AcpiRootInfo> = Once::new();
+
/// Parse the ACPI tables to gather CPU, interrupt, and timer information
pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) {
unsafe {
@@ -94,6 +102,15 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) {
let rsdp_opt = Rsdp::get_rsdp(already_supplied_rsdp);
if let Some(rsdp) = rsdp_opt {
+ let root_info = ACPI_ROOT_INFO.call_once(|| AcpiRootInfo {
+ revision: rsdp.revision(),
+ root_sdt_address: rsdp.sdt_address(),
+ });
+
+ if root_info.root_sdt_address != rsdp.sdt_address() || root_info.revision != rsdp.revision() {
+ error!("ACPI_ROOT_INFO already initialized with a different RSDP root");
+ }
+
debug!("SDT address: {:#x}", rsdp.sdt_address().data());
let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock_rw());
diff --git a/src/acpi/rsdp.rs b/src/acpi/rsdp.rs
index f10c5ac9..5e93a9f8 100644
--- a/src/acpi/rsdp.rs
+++ b/src/acpi/rsdp.rs
@@ -31,4 +31,8 @@ impl Rsdp {
self.rsdt_address as usize
})
}
+
+ pub fn revision(&self) -> u8 {
+ self.revision
+ }
}
diff --git a/src/allocator/mod.rs b/src/allocator/mod.rs
index 4fdb0ba1..aaa71963 100644
--- a/src/allocator/mod.rs
+++ b/src/allocator/mod.rs
@@ -7,26 +7,40 @@ mod linked_list;
/// Size of kernel heap
const KERNEL_HEAP_SIZE: usize = ::rmm::MEGABYTE;
+#[cold]
+fn halt_kernel_heap_init(message: &str) -> ! {
+ print!("{message}");
+ println!("Kernel heap initialization cannot continue. Halting.");
+ loop {
+ core::hint::spin_loop();
+ }
+}
+
unsafe fn map_heap(mapper: &mut KernelMapper<true>, offset: usize, size: usize) {
let mut flush_all = PageFlushAll::new();
let heap_start_page = Page::containing_address(VirtualAddress::new(offset));
let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size - 1));
for page in Page::range_inclusive(heap_start_page, heap_end_page) {
- let phys = mapper
- .allocator_mut()
- .allocate_one()
- .expect("failed to allocate kernel heap");
+ let phys = match mapper.allocator_mut().allocate_one() {
+ Some(phys) => phys,
+ None => halt_kernel_heap_init(
+ "FATAL: failed to allocate physical frame for kernel heap\n",
+ ),
+ };
let flush = unsafe {
- mapper
- .map_phys(
- page.start_address(),
- phys,
- PageFlags::new()
- .write(true)
- .global(cfg!(not(feature = "pti"))),
- )
- .expect("failed to map kernel heap")
+ match mapper.map_phys(
+ page.start_address(),
+ phys,
+ PageFlags::new()
+ .write(true)
+ .global(cfg!(not(feature = "pti"))),
+ ) {
+ Some(flush) => flush,
+ None => halt_kernel_heap_init(
+ "FATAL: failed to map kernel heap virtual page\n",
+ ),
+ }
};
flush_all.consume(flush);
}
diff --git a/src/arch/x86_shared/gdt.rs b/src/arch/x86_shared/gdt.rs
index cad344f3..f7acae35 100644
--- a/src/arch/x86_shared/gdt.rs
+++ b/src/arch/x86_shared/gdt.rs
@@ -192,6 +192,15 @@ impl ProcessorControlRegion {
}
}
+#[cold]
+fn halt_pcr_init() -> ! {
+ println!("FATAL: failed to allocate physical memory for Processor Control Region");
+ println!("Processor startup cannot continue. Halting.");
+ loop {
+ core::hint::spin_loop();
+ }
+}
+
pub unsafe fn pcr() -> *mut ProcessorControlRegion {
unsafe {
// Primitive benchmarking of RDFSBASE and RDGSBASE in userspace, appears to indicate that
@@ -375,7 +384,10 @@ pub fn allocate_and_init_pcr(
.next_power_of_two()
.trailing_zeros();
- let pcr_frame = crate::memory::allocate_p2frame(alloc_order).expect("failed to allocate PCR");
+ let pcr_frame = match crate::memory::allocate_p2frame(alloc_order) {
+ Some(frame) => frame,
+ None => halt_pcr_init(),
+ };
let pcr_ptr = RmmA::phys_to_virt(pcr_frame.base()).data() as *mut ProcessorControlRegion;
unsafe { core::ptr::write(pcr_ptr, ProcessorControlRegion::new_partial_init(cpu_id)) };
diff --git a/src/arch/x86_shared/idt.rs b/src/arch/x86_shared/idt.rs
index 50064585..47f692f6 100644
--- a/src/arch/x86_shared/idt.rs
+++ b/src/arch/x86_shared/idt.rs
@@ -78,6 +78,15 @@ static INIT_BSP_IDT: SyncUnsafeCell<Idt> = SyncUnsafeCell::new(Idt::new());
pub(crate) static IDTS: RwLock<HashMap<LogicalCpuId, &'static mut Idt>> =
RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new()));
+#[cold]
+fn halt_idt_init() -> ! {
+ println!("FATAL: failed to allocate physical pages for backup interrupt stack");
+ println!("Interrupt setup cannot continue. Halting.");
+ loop {
+ core::hint::spin_loop();
+ }
+}
+
#[inline]
pub fn is_reserved(cpu_id: LogicalCpuId, index: u8) -> bool {
if cpu_id == LogicalCpuId::BSP {
@@ -161,8 +170,10 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt {
.or_insert_with(|| Box::leak(Box::new(Idt::new())));
use crate::memory::{RmmA, RmmArch};
- let frames = crate::memory::allocate_p2frame(4)
- .expect("failed to allocate pages for backup interrupt stack");
+ let frames = match crate::memory::allocate_p2frame(4) {
+ Some(frames) => frames,
+ None => halt_idt_init(),
+ };
// Physical pages are mapped linearly. So is the linearly mapped virtual memory.
let base_address = RmmA::phys_to_virt(frames.base());
diff --git a/src/arch/x86_shared/mod.rs b/src/arch/x86_shared/mod.rs
index e3c30501..11c33e94 100644
--- a/src/arch/x86_shared/mod.rs
+++ b/src/arch/x86_shared/mod.rs
@@ -28,6 +28,8 @@ pub mod pti;
/// Initialization and start function
pub mod start;
+pub mod sleep;
+
/// Stop function
pub mod stop;
diff --git a/src/arch/x86_shared/sleep.rs b/src/arch/x86_shared/sleep.rs
new file mode 100644
index 00000000..9f98c0d8
--- /dev/null
+++ b/src/arch/x86_shared/sleep.rs
@@ -0,0 +1,712 @@
+use alloc::{sync::Arc, vec::Vec};
+use core::{
+ ptr::NonNull,
+ str::FromStr,
+ sync::atomic::{AtomicU32, Ordering},
+};
+
+use acpi_ext::{
+ aml::{namespace::AmlName, object::Object, Interpreter},
+ registers::FixedRegisters,
+ sdt::{facs::Facs, fadt::Fadt, SdtHeader},
+ AcpiTables, Handle, Handler, PhysicalMapping,
+};
+use spin::Mutex;
+use syscall::error::{Error, EINVAL, EIO};
+use x86::{segmentation::SegmentSelector, task, Ring};
+
+use crate::{
+ acpi::ACPI_ROOT_INFO,
+ arch::interrupt,
+ memory::{
+ round_down_pages, round_up_pages, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA,
+ RmmArch, VirtualAddress, PAGE_SIZE,
+ },
+ syscall::io::{Io, Pio},
+};
+
+const ACPI_SLP_TYP_SHIFT: u16 = 10;
+const ACPI_SLP_TYP_MASK: u16 = 0x1C00;
+const ACPI_SLP_EN: u16 = 1 << 13;
+const WAKE_TRAMPOLINE_PHYS: usize = 0x8000;
+const SLEEP_RETURN_OK: usize = 0;
+
+#[cfg(target_arch = "x86_64")]
+static WAKE_TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/s3_wakeup"));
+
+#[repr(C, packed)]
+#[derive(Clone, Copy, Debug, Default)]
+struct DescriptorTableRegister {
+ limit: u16,
+ base: u64,
+}
+
+#[repr(C, align(64))]
+#[derive(Clone, Copy, Debug)]
+struct FpuState {
+ bytes: [u8; 4096],
+}
+
+impl Default for FpuState {
+ fn default() -> Self {
+ Self { bytes: [0; 4096] }
+ }
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum SleepState {
+ S3,
+ S5,
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum SleepError {
+ UnsupportedArch,
+ MissingAcpi,
+ MissingFadt,
+ MissingFacs,
+ MissingSleepObject,
+ InvalidSleepObject,
+ UnsupportedPmControl,
+ UnsupportedAmlOperation,
+ SleepDidNotEnter,
+}
+
+impl SleepError {
+ fn code(self) -> usize {
+ match self {
+ Self::UnsupportedArch => EINVAL as usize,
+ Self::MissingAcpi
+ | Self::MissingFadt
+ | Self::MissingFacs
+ | Self::MissingSleepObject
+ | Self::UnsupportedAmlOperation => EIO as usize,
+ Self::InvalidSleepObject | Self::UnsupportedPmControl | Self::SleepDidNotEnter => {
+ EINVAL as usize
+ }
+ }
+ }
+
+ fn from_code(code: usize) -> Self {
+ match code as i32 {
+ x if x == EINVAL => Self::InvalidSleepObject,
+ _ => Self::MissingAcpi,
+ }
+ }
+}
+
+#[derive(Clone, Copy, Debug, Default)]
+struct SavedCpuContext {
+ entry_rsp: usize,
+ runtime_rsp: usize,
+ facs_address: usize,
+ cr0: usize,
+ cr2: usize,
+ cr3: usize,
+ cr4: usize,
+ rflags: usize,
+ gdtr: DescriptorTableRegister,
+ idtr: DescriptorTableRegister,
+ efer: u64,
+ fs_base: u64,
+ gs_base: u64,
+ kernel_gs_base: u64,
+ fpu: FpuState,
+}
+
+static SAVED_CONTEXT: Mutex<Option<SavedCpuContext>> = Mutex::new(None);
+static AML_MUTEX_IDS: AtomicU32 = AtomicU32::new(1);
+
+#[derive(Clone, Copy, Debug)]
+struct SleepTypeData {
+ a: u16,
+ b: u16,
+}
+
+#[derive(Clone, Copy)]
+struct KernelAcpiHandler;
+
+impl KernelAcpiHandler {
+ fn map_range(physical_address: usize, size: usize) -> (*mut u8, usize) {
+ let map_base = round_down_pages(physical_address);
+ let map_offset = physical_address - map_base;
+ let mapped_length = round_up_pages(size + map_offset);
+
+ // SAFETY: The ACPI interpreter only requests firmware-described physical regions.
+ unsafe {
+ let mut mapper = KernelMapper::lock_rw();
+ for page_index in 0..mapped_length / PAGE_SIZE {
+ let (_, flush) = mapper
+ .map_linearly(
+ PhysicalAddress::new(map_base + page_index * PAGE_SIZE),
+ PageFlags::new(),
+ )
+ .expect("failed to linearly map ACPI physical region");
+ flush.flush();
+ }
+ }
+
+ let virtual_base = RmmA::phys_to_virt(PhysicalAddress::new(map_base)).data();
+ ((virtual_base + map_offset) as *mut u8, mapped_length)
+ }
+}
+
+impl Handler for KernelAcpiHandler {
+ unsafe fn map_physical_region<T>(&self, physical_address: usize, size: usize) -> PhysicalMapping<Self, T> {
+ let (virtual_start, mapped_length) = Self::map_range(physical_address, size);
+ PhysicalMapping {
+ physical_start: physical_address,
+ virtual_start: NonNull::new(virtual_start.cast::<T>())
+ .expect("expected mapped ACPI virtual address to be non-null"),
+ region_length: size,
+ mapped_length,
+ handler: *self,
+ }
+ }
+
+ fn unmap_physical_region<T>(_region: &PhysicalMapping<Self, T>) {}
+
+ fn read_u8(&self, address: usize) -> u8 {
+ // SAFETY: AML system-memory accesses are byte-addressable firmware regions.
+ unsafe { core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u8) }
+ }
+
+ fn read_u16(&self, address: usize) -> u16 {
+ // SAFETY: AML system-memory accesses are word-addressable firmware regions.
+ unsafe {
+ core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u16)
+ }
+ }
+
+ fn read_u32(&self, address: usize) -> u32 {
+ // SAFETY: AML system-memory accesses are dword-addressable firmware regions.
+ unsafe {
+ core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u32)
+ }
+ }
+
+ fn read_u64(&self, address: usize) -> u64 {
+ // SAFETY: AML system-memory accesses are qword-addressable firmware regions.
+ unsafe {
+ core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u64)
+ }
+ }
+
+ fn write_u8(&self, address: usize, value: u8) {
+ // SAFETY: AML system-memory accesses are byte-addressable firmware regions.
+ unsafe {
+ core::ptr::write_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u8, value)
+ }
+ }
+
+ fn write_u16(&self, address: usize, value: u16) {
+ // SAFETY: AML system-memory accesses are word-addressable firmware regions.
+ unsafe {
+ core::ptr::write_volatile(
+ RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u16,
+ value,
+ )
+ }
+ }
+
+ fn write_u32(&self, address: usize, value: u32) {
+ // SAFETY: AML system-memory accesses are dword-addressable firmware regions.
+ unsafe {
+ core::ptr::write_volatile(
+ RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u32,
+ value,
+ )
+ }
+ }
+
+ fn write_u64(&self, address: usize, value: u64) {
+ // SAFETY: AML system-memory accesses are qword-addressable firmware regions.
+ unsafe {
+ core::ptr::write_volatile(
+ RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u64,
+ value,
+ )
+ }
+ }
+
+ fn read_io_u8(&self, port: u16) -> u8 {
+ Pio::<u8>::new(port).read()
+ }
+
+ fn read_io_u16(&self, port: u16) -> u16 {
+ Pio::<u16>::new(port).read()
+ }
+
+ fn read_io_u32(&self, port: u16) -> u32 {
+ Pio::<u32>::new(port).read()
+ }
+
+ fn write_io_u8(&self, port: u16, value: u8) {
+ Pio::<u8>::new(port).write(value)
+ }
+
+ fn write_io_u16(&self, port: u16, value: u16) {
+ Pio::<u16>::new(port).write(value)
+ }
+
+ fn write_io_u32(&self, port: u16, value: u32) {
+ Pio::<u32>::new(port).write(value)
+ }
+
+ fn read_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u8 {
+ 0
+ }
+
+ fn read_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u16 {
+ 0
+ }
+
+ fn read_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u32 {
+ 0
+ }
+
+ fn write_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u8) {}
+
+ fn write_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u16) {}
+
+ fn write_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u32) {}
+
+ fn nanos_since_boot(&self) -> u64 {
+ 0
+ }
+
+ fn stall(&self, microseconds: u64) {
+ for _ in 0..(microseconds.saturating_mul(64)) {
+ core::hint::spin_loop();
+ }
+ }
+
+ fn sleep(&self, milliseconds: u64) {
+ for _ in 0..(milliseconds.saturating_mul(64_000)) {
+ core::hint::spin_loop();
+ }
+ }
+
+ fn create_mutex(&self) -> Handle {
+ Handle(AML_MUTEX_IDS.fetch_add(1, Ordering::Relaxed))
+ }
+
+ fn acquire(&self, _mutex: Handle, _timeout: u16) -> Result<(), acpi_ext::aml::AmlError> {
+ Ok(())
+ }
+
+ fn release(&self, _mutex: Handle) {}
+}
+
+fn sleep_state_name(state: SleepState) -> &'static str {
+ match state {
+ SleepState::S3 => "\\_S3",
+ SleepState::S5 => "\\_S5",
+ }
+}
+
+fn encode_sleep_type(value: u16) -> u16 {
+ if value <= 0x7 {
+ value << ACPI_SLP_TYP_SHIFT
+ } else {
+ value & ACPI_SLP_TYP_MASK
+ }
+}
+
+fn load_interpreter() -> Result<(
+ Arc<FixedRegisters<KernelAcpiHandler>>,
+ PhysicalMapping<KernelAcpiHandler, Facs>,
+ Interpreter<KernelAcpiHandler>,
+), SleepError> {
+ let root = *ACPI_ROOT_INFO.get().ok_or(SleepError::MissingAcpi)?;
+ let handler = KernelAcpiHandler;
+
+ // SAFETY: ACPI root info is captured from the firmware-provided, already validated root table.
+ let tables = unsafe {
+ AcpiTables::from_rsdt(handler, root.revision, root.root_sdt_address.data())
+ .map_err(|_| SleepError::MissingAcpi)?
+ };
+ let fadt = tables.find_table::<Fadt>().ok_or(SleepError::MissingFadt)?;
+ let registers = Arc::new(
+ FixedRegisters::new(&fadt, handler).map_err(|_| SleepError::UnsupportedPmControl)?,
+ );
+ let facs_address = fadt.facs_address().map_err(|_| SleepError::MissingFacs)?;
+
+ // SAFETY: The FADT-supplied FACS address is used exactly as described by the ACPI spec.
+ let facs = unsafe { handler.map_physical_region::<Facs>(facs_address, core::mem::size_of::<Facs>()) };
+ // SAFETY: The AML interpreter only needs an owned mapping of the same firmware FACS table.
+ let interpreter_facs = unsafe {
+ handler.map_physical_region::<Facs>(facs_address, core::mem::size_of::<Facs>())
+ };
+ let dsdt = tables.dsdt().map_err(|_| SleepError::MissingFadt)?;
+ let interpreter = Interpreter::new(handler, dsdt.revision, Arc::clone(&registers), Some(interpreter_facs));
+
+ // SAFETY: Each AML table mapping is owned by the interpreter during table loading.
+ unsafe {
+ let mapping = handler.map_physical_region::<SdtHeader>(dsdt.phys_address, dsdt.length as usize);
+ let stream = core::slice::from_raw_parts(
+ mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::<SdtHeader>()) as *const u8,
+ dsdt.length as usize - core::mem::size_of::<SdtHeader>(),
+ );
+ interpreter
+ .load_table(stream)
+ .map_err(|_| SleepError::UnsupportedAmlOperation)?;
+
+ for ssdt in tables.ssdts() {
+ let mapping = handler.map_physical_region::<SdtHeader>(ssdt.phys_address, ssdt.length as usize);
+ let stream = core::slice::from_raw_parts(
+ mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::<SdtHeader>()) as *const u8,
+ ssdt.length as usize - core::mem::size_of::<SdtHeader>(),
+ );
+ interpreter
+ .load_table(stream)
+ .map_err(|_| SleepError::UnsupportedAmlOperation)?;
+ }
+ }
+
+ Ok((registers, facs, interpreter))
+}
+
+fn sleep_type_data_from_interpreter(
+ interpreter: &Interpreter<KernelAcpiHandler>,
+ state: SleepState,
+) -> Result<SleepTypeData, SleepError> {
+ let name = AmlName::from_str(sleep_state_name(state)).map_err(|_| SleepError::MissingSleepObject)?;
+ let object = interpreter
+ .evaluate(name, Vec::new())
+ .map_err(|_| SleepError::MissingSleepObject)?;
+
+ let Object::Package(package) = &*object else {
+ return Err(SleepError::InvalidSleepObject);
+ };
+
+ let Some(typa_object) = package.first() else {
+ return Err(SleepError::InvalidSleepObject);
+ };
+ let Some(typb_object) = package.get(1) else {
+ return Err(SleepError::InvalidSleepObject);
+ };
+
+ let Object::Integer(typa) = &**typa_object else {
+ return Err(SleepError::InvalidSleepObject);
+ };
+ let Object::Integer(typb) = &**typb_object else {
+ return Err(SleepError::InvalidSleepObject);
+ };
+
+ Ok(SleepTypeData {
+ a: encode_sleep_type(*typa as u16),
+ b: encode_sleep_type(*typb as u16),
+ })
+}
+
+fn sleep_type_data(state: SleepState) -> Result<SleepTypeData, SleepError> {
+ let (_registers, _facs, interpreter) = load_interpreter()?;
+ sleep_type_data_from_interpreter(&interpreter, state)
+}
+
+fn install_wake_trampoline(stack_rsp: usize, cr3: usize) {
+ let trampoline_page = Page::containing_address(VirtualAddress::new(WAKE_TRAMPOLINE_PHYS));
+ let trampoline_frame = PhysicalAddress::new(WAKE_TRAMPOLINE_PHYS);
+
+ // SAFETY: The 0x8000 low-memory trampoline page is reserved by the kernel for bootstrap stubs.
+ let (result, _) = unsafe {
+ let mut mapper = KernelMapper::lock_rw();
+ let result = mapper
+ .map_phys(
+ trampoline_page.start_address(),
+ trampoline_frame,
+ PageFlags::new().execute(true).write(true),
+ )
+ .expect("failed to map S3 wake trampoline page");
+ (result, mapper.table().phys().data())
+ };
+ result.flush();
+
+ for (index, value) in WAKE_TRAMPOLINE_DATA.iter().enumerate() {
+ // SAFETY: The trampoline page is mapped writable at the same virtual address as the physical page.
+ unsafe {
+ core::ptr::write_volatile((WAKE_TRAMPOLINE_PHYS as *mut u8).add(index), *value);
+ }
+ }
+
+ // SAFETY: The wake trampoline layout reserves three qword fields immediately after the jump.
+ unsafe {
+ let stack_slot = (WAKE_TRAMPOLINE_PHYS + 8) as *mut u64;
+ let page_table_slot = stack_slot.add(1);
+ let code_slot = stack_slot.add(2);
+ stack_slot.write(stack_rsp as u64);
+ page_table_slot.write(cr3 as u64);
+ #[expect(clippy::fn_to_numeric_cast)]
+ code_slot.write(resume_from_s3_trampoline as usize as u64);
+ }
+
+ // SAFETY: The trampoline mapping is no longer needed once the physical page has been populated.
+ let (_frame, _, flush) = unsafe {
+ KernelMapper::lock_rw()
+ .unmap_phys(trampoline_page.start_address())
+ .expect("failed to unmap S3 wake trampoline page")
+ };
+ flush.flush();
+}
+
+fn save_descriptor_tables(context: &mut SavedCpuContext) {
+ // SAFETY: SGDT/SIDT only read the current CPU descriptor-table registers into the provided storage.
+ unsafe {
+ core::arch::asm!("sgdt [{}]", in(reg) &mut context.gdtr, options(nostack, preserves_flags));
+ core::arch::asm!("sidt [{}]", in(reg) &mut context.idtr, options(nostack, preserves_flags));
+ }
+}
+
+fn save_fpu_state(context: &mut SavedCpuContext) {
+ // SAFETY: The kernel owns the current CPU at suspend entry and the FXSAVE buffer is 64-byte aligned.
+ unsafe {
+ core::arch::asm!(
+ "fxsave64 [{}]",
+ in(reg) context.fpu.bytes.as_mut_ptr(),
+ );
+ }
+}
+
+fn restore_fpu_state(context: &SavedCpuContext) {
+ // SAFETY: The saved FXSAVE image belongs to the same CPU context and matches the restore instruction.
+ unsafe {
+ core::arch::asm!(
+ "fxrstor64 [{}]",
+ in(reg) context.fpu.bytes.as_ptr(),
+ );
+ }
+}
+
+fn save_cpu_context(entry_rsp: usize) -> SavedCpuContext {
+ let mut context = SavedCpuContext {
+ entry_rsp,
+ ..SavedCpuContext::default()
+ };
+
+ // SAFETY: Reading control registers and MSRs is required to reconstruct the CPU execution state on wake.
+ unsafe {
+ core::arch::asm!(
+ "mov {}, cr0",
+ out(reg) context.cr0,
+ options(nostack, preserves_flags)
+ );
+ core::arch::asm!(
+ "mov {}, cr2",
+ out(reg) context.cr2,
+ options(nostack, preserves_flags)
+ );
+ core::arch::asm!(
+ "mov {}, cr3",
+ out(reg) context.cr3,
+ options(nostack, preserves_flags)
+ );
+ core::arch::asm!(
+ "mov {}, cr4",
+ out(reg) context.cr4,
+ options(nostack, preserves_flags)
+ );
+ core::arch::asm!(
+ "pushfq",
+ "pop {}",
+ out(reg) context.rflags,
+ options(preserves_flags)
+ );
+ core::arch::asm!("mov {}, rsp", out(reg) context.runtime_rsp, options(nostack, preserves_flags));
+
+ context.efer = x86::msr::rdmsr(x86::msr::IA32_EFER);
+ context.fs_base = x86::msr::rdmsr(x86::msr::IA32_FS_BASE);
+ context.gs_base = x86::msr::rdmsr(x86::msr::IA32_GS_BASE);
+ context.kernel_gs_base = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE);
+ }
+
+ save_descriptor_tables(&mut context);
+ save_fpu_state(&mut context);
+ context
+}
+
+fn set_firmware_waking_vector(facs: &mut PhysicalMapping<KernelAcpiHandler, Facs>, vector: usize) {
+ facs.firmware_waking_vector = vector as u32;
+ facs.x_firmware_waking_vector = vector as u64;
+}
+
+fn write_pm1_control_block(
+ registers: &FixedRegisters<KernelAcpiHandler>,
+ sleep_type: SleepTypeData,
+) -> Result<(), SleepError> {
+ let current_a = registers
+ .pm1_control_registers
+ .pm1a
+ .read()
+ .map_err(|_| SleepError::UnsupportedPmControl)? as u16;
+ let armed_a = (current_a & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.a;
+
+ registers
+ .pm1_control_registers
+ .pm1a
+ .write(u64::from(armed_a))
+ .map_err(|_| SleepError::UnsupportedPmControl)?;
+
+ if let Some(pm1b) = &registers.pm1_control_registers.pm1b {
+ let current_b = pm1b.read().map_err(|_| SleepError::UnsupportedPmControl)? as u16;
+ let armed_b = (current_b & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.b;
+ pm1b.write(u64::from(armed_b))
+ .map_err(|_| SleepError::UnsupportedPmControl)?;
+ pm1b.write(u64::from(armed_b | ACPI_SLP_EN))
+ .map_err(|_| SleepError::UnsupportedPmControl)?;
+ }
+
+ // SAFETY: WBINVD is required here to flush dirty cache lines before firmware powers down the CPU package.
+ unsafe {
+ core::arch::asm!("wbinvd", options(nostack, preserves_flags));
+ }
+
+ registers
+ .pm1_control_registers
+ .pm1a
+ .write(u64::from(armed_a | ACPI_SLP_EN))
+ .map_err(|_| SleepError::UnsupportedPmControl)?;
+
+ Ok(())
+}
+
+#[unsafe(naked)]
+unsafe extern "sysv64" fn enter_sleep_raw(state: usize) -> usize {
+ core::arch::naked_asm!(
+ "mov rsi, rsp",
+ "jmp {inner}",
+ inner = sym enter_sleep_raw_inner,
+ );
+}
+
+extern "C" fn enter_sleep_raw_inner(state: usize, entry_rsp: usize) -> usize {
+ let state = match state {
+ 3 => SleepState::S3,
+ 5 => SleepState::S5,
+ _ => return SleepError::InvalidSleepObject.code(),
+ };
+
+ let (registers, mut facs, interpreter) = match load_interpreter() {
+ Ok(tuple) => tuple,
+ Err(error) => return error.code(),
+ };
+ let sleep_type = match sleep_type_data_from_interpreter(&interpreter, state) {
+ Ok(data) => data,
+ Err(error) => return error.code(),
+ };
+
+ let mut context = save_cpu_context(entry_rsp);
+ context.facs_address = facs.physical_start;
+ install_wake_trampoline(context.runtime_rsp, context.cr3);
+ set_firmware_waking_vector(&mut facs, WAKE_TRAMPOLINE_PHYS);
+
+ {
+ let mut saved = SAVED_CONTEXT.lock();
+ *saved = Some(context);
+ }
+
+ // SAFETY: Suspend entry must not be interrupted while the wake vector and PM1 control block are being armed.
+ unsafe {
+ interrupt::disable();
+ }
+
+ if let Err(error) = write_pm1_control_block(registers.as_ref(), sleep_type) {
+ return error.code();
+ }
+
+ // SAFETY: The final CLI+HLT sequence is the architectural handoff point after asserting SLP_EN.
+ unsafe {
+ core::arch::asm!("cli; hlt", options(nostack));
+ }
+
+ SleepError::SleepDidNotEnter.code()
+}
+
+extern "C" fn resume_from_s3_trampoline() -> ! {
+ let mut saved = SAVED_CONTEXT.lock();
+ let context = saved.take().expect("S3 wake trampoline resumed without saved CPU context");
+ drop(saved);
+
+ // SAFETY: The saved FACS physical address was captured from the validated FADT during suspend entry.
+ if context.facs_address != 0 {
+ let mut facs = unsafe {
+ KernelAcpiHandler.map_physical_region::<Facs>(
+ context.facs_address,
+ core::mem::size_of::<Facs>(),
+ )
+ };
+ set_firmware_waking_vector(&mut facs, 0);
+ }
+
+ // SAFETY: The wake trampoline already switched to the saved kernel CR3 and long mode, so the remaining restores are architectural register state only.
+ unsafe {
+ x86::msr::wrmsr(x86::msr::IA32_EFER, context.efer);
+ core::arch::asm!("mov cr3, {}", in(reg) context.cr3, options(nostack));
+ core::arch::asm!("mov cr4, {}", in(reg) context.cr4, options(nostack));
+ core::arch::asm!("mov cr2, {}", in(reg) context.cr2, options(nostack));
+ core::arch::asm!("mov cr0, {}", in(reg) context.cr0, options(nostack));
+ core::arch::asm!("lgdt [{}]", in(reg) &context.gdtr, options(nostack));
+ core::arch::asm!("lidt [{}]", in(reg) &context.idtr, options(nostack));
+
+ task::load_tr(SegmentSelector::new(crate::arch::gdt::GDT_TSS as u16, Ring::Ring0));
+
+ x86::msr::wrmsr(x86::msr::IA32_FS_BASE, context.fs_base);
+ x86::msr::wrmsr(x86::msr::IA32_GS_BASE, context.gs_base);
+ x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, context.kernel_gs_base);
+ }
+
+ restore_fpu_state(&context);
+
+ // SAFETY: Returning with the original entry stack and RFLAGS completes the suspend call as a successful function return.
+ unsafe {
+ core::arch::asm!(
+ "mov rsp, {entry_rsp}",
+ "push {rflags}",
+ "popfq",
+ "xor eax, eax",
+ "ret",
+ entry_rsp = in(reg) context.entry_rsp,
+ rflags = in(reg) context.rflags,
+ options(noreturn)
+ );
+ }
+}
+
+pub fn enter_sleep_state(state: SleepState) -> core::result::Result<(), SleepError> {
+ #[cfg(not(target_arch = "x86_64"))]
+ {
+ let _ = state;
+ return Err(SleepError::UnsupportedArch);
+ }
+
+ #[cfg(target_arch = "x86_64")]
+ {
+ let raw = unsafe {
+ enter_sleep_raw(match state {
+ SleepState::S3 => 3,
+ SleepState::S5 => 5,
+ })
+ };
+ if raw == SLEEP_RETURN_OK {
+ Ok(())
+ } else {
+ Err(SleepError::from_code(raw))
+ }
+ }
+}
+
+pub fn available_sleep_states() -> &'static [u8] {
+ if sleep_type_data(SleepState::S3).is_ok() {
+ b"S3\nS5\n"
+ } else {
+ b"S5\n"
+ }
+}
+
+pub fn trigger_sleep_request(request: &str) -> Result<(), Error> {
+ match request.trim() {
+ "S3" => enter_sleep_state(SleepState::S3).map_err(|_| Error::new(EIO)),
+ "S5" => enter_sleep_state(SleepState::S5).map_err(|_| Error::new(EIO)),
+ _ => Err(Error::new(EINVAL)),
+ }
+}
diff --git a/src/arch/x86_shared/start.rs b/src/arch/x86_shared/start.rs
index 7a7c0ae8..f1dbb6b4 100644
--- a/src/arch/x86_shared/start.rs
+++ b/src/arch/x86_shared/start.rs
@@ -82,6 +82,15 @@ extern "C" fn kstart() {
/// The entry to Rust, all things must be initialized
unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
unsafe {
+ // EARLY CANARY: write 'R' to COM1 before any kernel init.
+ // This proves the serial hardware works and the kernel reached Rust entry.
+ // If this character appears but "Redox OS starting..." does not,
+ // the hang is in args_ptr.read(), serial::init(), or graphical_debug::init().
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ {
+ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'R', options(nostack, preserves_flags));
+ }
+
let bootstrap = {
let args = args_ptr.read();
@@ -91,27 +100,49 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
// Set up graphical debug
graphical_debug::init(args.env());
+ // SECOND CANARY: write 'S' to COM1 after serial init.
+ // If 'R' appears but 'S' does not, the hang is in serial::init() or graphical_debug::init().
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ {
+ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'S', options(nostack, preserves_flags));
+ }
+
info!("Redox OS starting...");
args.print();
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'1', options(nostack, preserves_flags)); }
+
// Set up GDT
gdt::init_bsp(stack_end);
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'2', options(nostack, preserves_flags)); }
+
// Set up IDT
idt::init_bsp();
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'3', options(nostack, preserves_flags)); }
+
// Initialize RMM
#[cfg(target_arch = "x86")]
crate::startup::memory::init(&args, Some(0x100000), Some(0x40000000));
#[cfg(target_arch = "x86_64")]
crate::startup::memory::init(&args, Some(0x100000), None);
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'4', options(nostack, preserves_flags)); }
+
// Initialize paging
paging::init();
#[cfg(target_arch = "x86_64")]
crate::arch::alternative::early_init(true);
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'5', options(nostack, preserves_flags)); }
+
// Set up syscall instruction
interrupt::syscall::init();
@@ -121,6 +152,9 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
// Activate memory logging
crate::log::init();
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'6', options(nostack, preserves_flags)); }
+
// Initialize miscellaneous processor features
#[cfg(target_arch = "x86_64")]
crate::arch::misc::init(LogicalCpuId::BSP);
@@ -128,6 +162,9 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
// Initialize devices
device::init();
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'7', options(nostack, preserves_flags)); }
+
// Read ACPI tables, starts APs
if cfg!(feature = "acpi") {
crate::acpi::init(args.acpi_rsdp());
diff --git a/src/asm/x86_64/s3_wakeup.asm b/src/asm/x86_64/s3_wakeup.asm
new file mode 100644
index 00000000..7beeccf6
--- /dev/null
+++ b/src/asm/x86_64/s3_wakeup.asm
@@ -0,0 +1,110 @@
+; ACPI S3 wake trampoline
+; compiled with nasm by build.rs, copied to physical 0x8000 before S3 entry
+
+ORG 0x8000
+SECTION .text
+USE16
+
+trampoline:
+ jmp short startup_wake
+ times 8 - ($ - trampoline) nop
+ .stack: dq 0
+ .page_table: dq 0
+ .code: dq 0
+
+startup_wake:
+ cli
+
+ xor ax, ax
+ mov ds, ax
+ mov es, ax
+ mov ss, ax
+ mov sp, 0
+
+ mov edi, [trampoline.page_table]
+ mov cr3, edi
+
+ mov eax, cr0
+ and al, 11110011b
+ or al, 00100010b
+ mov cr0, eax
+
+ mov eax, cr4
+ or eax, 1 << 9 | 1 << 7 | 1 << 5 | 1 << 4
+ mov cr4, eax
+
+ fninit
+
+ lgdt [gdtr]
+
+ mov ecx, 0xC0000080
+ rdmsr
+ or eax, 1 << 11 | 1 << 8
+ wrmsr
+
+ mov ebx, cr0
+ or ebx, 1 << 31 | 1 << 16 | 1
+ mov cr0, ebx
+
+ jmp gdt.kernel_code:long_mode_wake
+
+USE64
+long_mode_wake:
+ mov rax, gdt.kernel_data
+ mov ds, rax
+ mov es, rax
+ mov fs, rax
+ mov gs, rax
+ mov ss, rax
+
+ mov rsp, [trampoline.stack]
+ mov rax, [trampoline.code]
+ jmp rax
+
+struc GDTEntry
+ .limitl resw 1
+ .basel resw 1
+ .basem resb 1
+ .attribute resb 1
+ .flags__limith resb 1
+ .baseh resb 1
+endstruc
+
+attrib:
+ .present equ 1 << 7
+ .user equ 1 << 4
+ .code equ 1 << 3
+ .writable equ 1 << 1
+
+flags:
+ .long_mode equ 1 << 5
+
+gdtr:
+ dw gdt.end + 1
+ dq gdt
+
+gdt:
+.null equ $ - gdt
+ dq 0
+
+.kernel_code equ $ - gdt
+istruc GDTEntry
+ at GDTEntry.limitl, dw 0
+ at GDTEntry.basel, dw 0
+ at GDTEntry.basem, db 0
+ at GDTEntry.attribute, db attrib.present | attrib.user | attrib.code
+ at GDTEntry.flags__limith, db flags.long_mode
+ at GDTEntry.baseh, db 0
+iend
+
+.kernel_data equ $ - gdt
+istruc GDTEntry
+ at GDTEntry.limitl, dw 0
+ at GDTEntry.basel, dw 0
+ at GDTEntry.basem, db 0
+ at GDTEntry.attribute, db attrib.present | attrib.user | attrib.writable
+ at GDTEntry.flags__limith, db 0
+ at GDTEntry.baseh, db 0
+iend
+
+.end equ $ - gdt
diff --git a/src/context/context.rs b/src/context/context.rs
index c97c5166..6d723f49 100644
--- a/src/context/context.rs
+++ b/src/context/context.rs
@@ -148,6 +148,8 @@ pub struct Context {
pub euid: u32,
pub egid: u32,
pub pid: usize,
+ /// Supplementary group IDs for access control decisions.
+ pub groups: Vec<u32>,
// See [`PreemptGuard`]
//
@@ -204,6 +206,7 @@ impl Context {
euid: 0,
egid: 0,
pid: 0,
+ groups: Vec::new(),
#[cfg(feature = "syscall_debug")]
syscall_debug_info: crate::syscall::debug::SyscallDebugInfo::default(),
@@ -479,6 +482,7 @@ impl Context {
uid: self.euid,
gid: self.egid,
pid: self.pid,
+ groups: self.groups.clone(),
}
}
}
diff --git a/src/context/file.rs b/src/context/file.rs
index 2d3790f1..150f483a 100644
--- a/src/context/file.rs
+++ b/src/context/file.rs
@@ -4,7 +4,7 @@ use crate::{
event,
scheme::{self, SchemeId},
sync::{CleanLockToken, RwLock, L6},
- syscall::error::Result,
+ syscall::error::{Error, Result, ESTALE},
};
use alloc::sync::Arc;
use syscall::{schemev2::NewFdFlags, RwFlags, O_APPEND, O_NONBLOCK};
@@ -18,6 +18,7 @@ pub struct FileDescription {
pub offset: u64,
/// The scheme that this file refers to
pub scheme: SchemeId,
+ pub scheme_generation: Option<u64>,
/// The number the scheme uses to refer to this file
pub number: usize,
/// The flags passed to open or fcntl(SETFL)
@@ -32,6 +33,52 @@ bitflags! {
}
}
impl FileDescription {
+ pub fn with_generation(
+ scheme: SchemeId,
+ scheme_generation: Option<u64>,
+ number: usize,
+ offset: u64,
+ flags: u32,
+ internal_flags: InternalFlags,
+ ) -> Self {
+ Self {
+ offset,
+ scheme,
+ scheme_generation,
+ number,
+ flags,
+ internal_flags,
+ }
+ }
+
+ pub fn new(
+ scheme: SchemeId,
+ number: usize,
+ offset: u64,
+ flags: u32,
+ internal_flags: InternalFlags,
+ token: &mut CleanLockToken,
+ ) -> Self {
+ Self::with_generation(
+ scheme,
+ Some(scheme::current_scheme_generation(token.token(), scheme)),
+ number,
+ offset,
+ flags,
+ internal_flags,
+ )
+ }
+
+ pub fn get_scheme(&self, token: &mut CleanLockToken) -> Result<scheme::KernelSchemes> {
+ if let Some(expected_generation) = self.scheme_generation
+ && expected_generation != scheme::current_scheme_generation(token.token(), self.scheme)
+ {
+ return Err(Error::new(ESTALE));
+ }
+
+ scheme::get_scheme(token.token(), self.scheme)
+ }
+
pub fn rw_flags(&self, rw: RwFlags) -> u32 {
let mut ret = self.flags & !(O_NONBLOCK | O_APPEND) as u32;
if rw.contains(RwFlags::APPEND) {
@@ -76,7 +123,7 @@ impl FileDescription {
pub fn try_close(self, token: &mut CleanLockToken) -> Result<()> {
event::unregister_file(self.scheme, self.number, token);
- let scheme = scheme::get_scheme(token.token(), self.scheme)?;
+ let scheme = self.get_scheme(token)?;
scheme.close(self.number, token)
}
@@ -85,12 +132,12 @@ impl FileDescription {
impl FileDescriptor {
pub fn close(self, token: &mut CleanLockToken) -> Result<()> {
{
- let (scheme_id, number, internal_flags) = {
+ let (desc, number, internal_flags) = {
let desc = self.description.read(token.token());
- (desc.scheme, desc.number, desc.internal_flags)
+ (*desc, desc.number, desc.internal_flags)
};
if internal_flags.contains(InternalFlags::NOTIFY_ON_NEXT_DETACH) {
- let scheme = scheme::get_scheme(token.token(), scheme_id)?;
+ let scheme = desc.get_scheme(token)?;
scheme.detach(number, token)?;
}
}
diff --git a/src/context/memory.rs b/src/context/memory.rs
index 93446ba7..127a34fd 100644
--- a/src/context/memory.rs
+++ b/src/context/memory.rs
@@ -64,14 +64,13 @@ impl UnmapResult {
return Ok(());
};
- let (scheme_id, number) = {
- let desc = description.write(token.token());
- (desc.scheme, desc.number)
+ let (scheme, number) = {
+ let desc = *description.read(token.token());
+ (desc.get_scheme(token)?, desc.number)
};
- let scheme_opt = scheme::get_scheme(token.token(), scheme_id);
- let funmap_result = scheme_opt
- .and_then(|scheme| scheme.kfunmap(number, base_offset, self.size, self.flags, token));
+ let funmap_result = scheme
+ .kfunmap(number, base_offset, self.size, self.flags, token);
if let Ok(fd) = Arc::try_unwrap(description) {
fd.into_inner().try_close(token)?;
@@ -2687,20 +2686,13 @@ fn correct_inner<'l>(
// XXX: This is cheating, but guaranteed we won't deadlock because we've dropped addr_space_guard
let mut token = unsafe { CleanLockToken::new() };
- let (scheme_id, scheme_number) = {
- let desc = &file_ref.description.read(token.token());
- (desc.scheme, desc.number)
+ let desc = *file_ref.description.read(token.token());
+ let scheme = desc.get_scheme(&mut token).map_err(|_| PfError::Segv)?;
+ let scheme_number = desc.number;
+ let user_inner = match scheme {
+ KernelSchemes::User(user) => user.inner,
+ _ => return Err(PfError::Segv),
};
- let user_inner = scheme::get_scheme(token.token(), scheme_id)
- .ok()
- .and_then(|s| {
- if let KernelSchemes::User(user) = s {
- Some(user.inner)
- } else {
- None
- }
- })
- .ok_or(PfError::Segv)?;
let offset = file_ref.base_offset as u64 + (pages_from_grant_start * PAGE_SIZE) as u64;
user_inner
diff --git a/src/scheme/acpi.rs b/src/scheme/acpi.rs
index 87570a12..5d734691 100644
--- a/src/scheme/acpi.rs
+++ b/src/scheme/acpi.rs
@@ -10,6 +10,7 @@ use syscall::{
use crate::{
acpi::{RxsdtEnum, RXSDT_ENUM},
+ arch::sleep,
context::file::InternalFlags,
event,
sync::{CleanLockToken, RwLock, WaitCondition, L1},
@@ -40,6 +41,7 @@ enum HandleKind {
TopLevel,
Rxsdt,
ShutdownPipe,
+ SleepControl,
SchemeRoot,
}
@@ -146,11 +148,11 @@ impl KernelScheme for AcpiScheme {
if flags & O_EXCL == O_EXCL || flags & O_SYMLINK == O_SYMLINK {
return Err(Error::new(EINVAL));
}
- if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
- return Err(Error::new(EROFS));
- }
let (handle_kind, int_flags) = match path {
"" => {
+ if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
+ return Err(Error::new(EROFS));
+ }
if flags & O_DIRECTORY != O_DIRECTORY && flags & O_STAT != O_STAT {
return Err(Error::new(EISDIR));
}
@@ -158,17 +160,36 @@ impl KernelScheme for AcpiScheme {
(HandleKind::TopLevel, InternalFlags::POSITIONED)
}
"rxsdt" => {
+ if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
+ return Err(Error::new(EROFS));
+ }
if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT {
return Err(Error::new(ENOTDIR));
}
(HandleKind::Rxsdt, InternalFlags::POSITIONED)
}
"kstop" => {
+ if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
+ return Err(Error::new(EROFS));
+ }
if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT {
return Err(Error::new(ENOTDIR));
}
(HandleKind::ShutdownPipe, InternalFlags::empty())
}
+ "sleep" => {
+ if flags & O_ACCMODE == O_RDONLY || flags & O_STAT == O_STAT {
+ // allowed
+ } else if flags & O_ACCMODE != syscall::flag::O_WRONLY
+ && flags & O_ACCMODE != syscall::flag::O_RDWR
+ {
+ return Err(Error::new(EINVAL));
+ }
+ if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT {
+ return Err(Error::new(ENOTDIR));
+ }
+ (HandleKind::SleepControl, InternalFlags::POSITIONED)
+ }
_ => return Err(Error::new(ENOENT)),
};
@@ -191,6 +212,7 @@ impl KernelScheme for AcpiScheme {
Ok(match handle.kind {
HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?.len() as u64,
HandleKind::ShutdownPipe => 1,
+ HandleKind::SleepControl => sleep::available_sleep_states().len() as u64,
HandleKind::TopLevel => 0,
HandleKind::SchemeRoot => return Err(Error::new(EBADF))?,
})
@@ -253,6 +275,7 @@ impl KernelScheme for AcpiScheme {
return dst_buf.copy_exactly(&[0x42]).map(|()| 1);
}
+ HandleKind::SleepControl => sleep::available_sleep_states(),
HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?,
HandleKind::TopLevel => return Err(Error::new(EISDIR)),
HandleKind::SchemeRoot => return Err(Error::new(EBADF)),
@@ -295,11 +318,45 @@ impl KernelScheme for AcpiScheme {
kind: DirentKind::Socket,
name: "kstop",
inode: 0,
+ next_opaque_id: 2,
+ })?;
+ }
+ if opaque <= 2 {
+ buf.entry(DirEntry {
+ kind: DirentKind::Regular,
+ name: "sleep",
+ inode: 0,
next_opaque_id: u64::MAX,
})?;
}
Ok(buf.finalize())
}
+ fn kwrite(
+ &self,
+ id: usize,
+ buf: crate::syscall::usercopy::UserSliceRo,
+ _flags: u32,
+ _stored_flags: u32,
+ token: &mut CleanLockToken,
+ ) -> Result<usize> {
+ let handle = *HANDLES.read(token.token()).get(id)?;
+
+ if handle.stat {
+ return Err(Error::new(EBADF));
+ }
+
+ match handle.kind {
+ HandleKind::SleepControl => {
+ let mut tmp = [0_u8; 16];
+ let len = buf.copy_common_bytes_to_slice(&mut tmp)?;
+ let request = core::str::from_utf8(&tmp[..len]).map_err(|_| Error::new(EINVAL))?;
+ sleep::trigger_sleep_request(request)?;
+ Ok(len)
+ }
+ HandleKind::SchemeRoot => Err(Error::new(EBADF)),
+ _ => Err(Error::new(EBADF)),
+ }
+ }
fn kfpath(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result<usize> {
//TODO: construct useful path?
buf.copy_common_bytes_from_slice("/scheme/kernel.acpi/".as_bytes())
@@ -328,6 +385,11 @@ impl KernelScheme for AcpiScheme {
st_size: 1,
..Default::default()
},
+ HandleKind::SleepControl => Stat {
+ st_mode: MODE_FILE,
+ st_size: sleep::available_sleep_states().len().try_into().unwrap_or(u64::MAX),
+ ..Default::default()
+ },
HandleKind::SchemeRoot => return Err(Error::new(EBADF)),
})?;
diff --git a/src/scheme/debug.rs b/src/scheme/debug.rs
index c70ac579..4a23b3cf 100644
--- a/src/scheme/debug.rs
+++ b/src/scheme/debug.rs
@@ -22,9 +22,10 @@ struct Handle {
static HANDLES: RwLock<L1, HandleMap<Handle>> = RwLock::new(HandleMap::new());
-/// Add to the input queue
+/// Add to the input queue, translating CR to NL (ICRNL) for serial console compatibility.
pub fn debug_input(data: u8, token: &mut CleanLockToken) {
- INPUT.send(data, token);
+ let translated = if data == b'\r' { b'\n' } else { data };
+ INPUT.send(translated, token);
}
// Notify readers of input updates
@@ -106,12 +107,16 @@ impl KernelScheme for DebugScheme {
fn fevent(
&self,
id: usize,
- _flags: EventFlags,
+ flags: EventFlags,
token: &mut CleanLockToken,
) -> Result<EventFlags> {
let _handle = *HANDLES.read(token.token()).get(id)?;
- Ok(EventFlags::empty())
+ let mut ready = EventFlags::empty();
+ if flags.contains(EventFlags::EVENT_READ) {
+ ready |= EventFlags::EVENT_READ;
+ }
+ Ok(ready)
}
fn fsync(&self, id: usize, token: &mut CleanLockToken) -> Result<()> {
diff --git a/src/scheme/mod.rs b/src/scheme/mod.rs
index d30272c1..765e547f 100644
--- a/src/scheme/mod.rs
+++ b/src/scheme/mod.rs
@@ -14,7 +14,7 @@ use alloc::{
};
use core::{
str,
- sync::atomic::{AtomicUsize, Ordering},
+ sync::atomic::{AtomicU64, AtomicUsize, Ordering},
};
use hashbrown::hash_map::{self, DefaultHashBuilder, HashMap};
use spin::Once;
@@ -169,6 +169,7 @@ enum Handle {
/// Schemes list
static HANDLES: Once<RwLock<L1, HashMap<SchemeId, Handle>>> = Once::new();
+static SCHEME_GENERATIONS: Once<RwLock<L1, HashMap<SchemeId, AtomicU64>>> = Once::new();
static SCHEME_LIST_NEXT_ID: AtomicUsize = AtomicUsize::new(MAX_GLOBAL_SCHEMES);
static SCHEME_LIST_ID: AtomicUsize = AtomicUsize::new(0);
@@ -204,6 +205,10 @@ fn init_schemes() -> RwLock<L1, HashMap<SchemeId, Handle>> {
RwLock::new(handles)
}
+fn init_scheme_generations() -> RwLock<L1, HashMap<SchemeId, AtomicU64>> {
+ RwLock::new(HashMap::new())
+}
+
/// Get a handle to a scheme.
pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result<KernelSchemes> {
match handles().read(token).get(&scheme_id) {
@@ -212,10 +217,33 @@ pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result<Kerne
}
}
+pub fn current_scheme_generation(token: LockToken<'_, L0>, scheme_id: SchemeId) -> u64 {
+ scheme_generations()
+ .read(token)
+ .get(&scheme_id)
+ .map(|generation| generation.load(Ordering::Acquire))
+ .unwrap_or(0)
+}
+
fn handles<'a>() -> &'a RwLock<L1, HashMap<SchemeId, Handle>> {
HANDLES.call_once(init_schemes)
}
+fn scheme_generations<'a>() -> &'a RwLock<L1, HashMap<SchemeId, AtomicU64>> {
+ SCHEME_GENERATIONS.call_once(init_scheme_generations)
+}
+
+fn increment_scheme_generation(scheme_id: SchemeId, token: &mut CleanLockToken) {
+ match scheme_generations().write(token.token()).entry(scheme_id) {
+ hash_map::Entry::Occupied(entry) => {
+ entry.get().fetch_add(1, Ordering::AcqRel);
+ }
+ hash_map::Entry::Vacant(entry) => {
+ entry.insert(AtomicU64::new(1));
+ }
+ }
+}
+
/// Scheme list type
pub struct SchemeList;
@@ -260,9 +288,14 @@ impl SchemeList {
/// Remove a scheme
fn remove(&self, id: usize, token: &mut CleanLockToken) {
- let scheme = handles().write(token.token()).remove(&SchemeId(id));
+ let scheme_id = SchemeId(id);
+ let scheme = handles().write(token.token()).remove(&scheme_id);
assert!(scheme.is_some());
+ if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme.as_ref() {
+ user.inner.fail_pending_calls(token);
+ }
+ increment_scheme_generation(scheme_id, token);
if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme
&& let Some(user) = Arc::into_inner(user.inner)
{
@@ -287,32 +320,32 @@ impl KernelScheme for SchemeList {
token: &mut CleanLockToken,
) -> Result<OpenResult> {
let scheme_id = SchemeId(scheme_id);
- match handles()
- .read(token.token())
- .get(&scheme_id)
- .ok_or(Error::new(EBADF))?
- {
- Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => {
- let inner = inner.clone();
- assert!(scheme_id == inner.scheme_id);
- let scheme = scheme_id;
- let params = unsafe { user_buf.read_exact::<NewFdParams>()? };
-
- return Ok(OpenResult::External(Arc::new(RwLock::new(
- FileDescription {
- scheme,
- number: params.number,
- offset: params.offset,
- flags: params.flags as u32,
- internal_flags: InternalFlags::from_extra0(params.internal_flags)
- .ok_or(Error::new(EINVAL))?,
- },
- ))));
+ let maybe_inner = {
+ let handles = handles().read(token.token());
+ match handles.get(&scheme_id).ok_or(Error::new(EBADF))? {
+ Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => Some(inner.clone()),
+ Handle::SchemeCreationCapability => None,
+ _ => return Err(Error::new(EBADF)),
}
- Handle::SchemeCreationCapability => (),
- _ => return Err(Error::new(EBADF)),
};
+ if let Some(inner) = maybe_inner {
+ assert!(scheme_id == inner.scheme_id);
+ let params = unsafe { user_buf.read_exact::<NewFdParams>()? };
+
+ return Ok(OpenResult::External(Arc::new(RwLock::new(
+ FileDescription::new(
+ scheme_id,
+ params.number,
+ params.offset,
+ params.flags as u32,
+ InternalFlags::from_extra0(params.internal_flags)
+ .ok_or(Error::new(EINVAL))?,
+ token,
+ ),
+ ))));
+ }
+
const EXPECTED: &[u8] = b"create-scheme";
let mut buf = [0u8; EXPECTED.len()];
@@ -777,6 +810,7 @@ pub struct CallerCtx {
pub pid: usize,
pub uid: u32,
pub gid: u32,
+ pub groups: alloc::vec::Vec<u32>,
}
impl CallerCtx {
pub fn filter_uid_gid(self, euid: u32, egid: u32) -> Self {
@@ -785,6 +819,7 @@ impl CallerCtx {
pid: self.pid,
uid: euid,
gid: egid,
+ groups: self.groups,
}
} else {
self
diff --git a/src/scheme/proc.rs b/src/scheme/proc.rs
index 47588e10..f38c4aec 100644
--- a/src/scheme/proc.rs
+++ b/src/scheme/proc.rs
@@ -105,6 +105,7 @@ enum ContextHandle {
// Attr handles, to set ens/euid/egid/pid.
Authority,
Attr,
+ Groups,
Status {
privileged: bool,
@@ -261,6 +262,7 @@ impl ProcScheme {
let handle = match actual_name {
"attrs" => ContextHandle::Attr,
"status" => ContextHandle::Status { privileged: true },
+ "groups" => ContextHandle::Groups,
_ => return Err(Error::new(ENOENT)),
};
@@ -306,6 +308,11 @@ impl ProcScheme {
let id = NonZeroUsize::new(NEXT_ID.fetch_add(1, Ordering::Relaxed))
.ok_or(Error::new(EMFILE))?;
let context = context::spawn(true, Some(id), ret, token)?;
+ {
+ let parent_groups =
+ context::current().read(token.token()).groups.clone();
+ context.write(token.token()).groups = parent_groups;
+ }
HANDLES.write(token.token()).insert(
id.get(),
Handle {
@@ -849,17 +856,17 @@ impl KernelScheme for ProcScheme {
}
}
fn extract_scheme_number(fd: usize, token: &mut CleanLockToken) -> Result<(KernelSchemes, usize)> {
- let (scheme_id, number) = {
+ let desc = {
let current_lock = context::current();
let mut current = current_lock.read(token.token());
- let (context, mut token) = current.token_split();
+ let (context, mut context_token) = current.token_split();
let file_descriptor = context
- .get_file(FileHandle::from(fd), &mut token)
+ .get_file(FileHandle::from(fd), &mut context_token)
.ok_or(Error::new(EBADF))?;
- let desc = file_descriptor.description.read(token.token());
- (desc.scheme, desc.number)
+ *file_descriptor.description.read(context_token.token())
};
- let scheme = scheme::get_scheme(token.token(), scheme_id)?;
+ let scheme = desc.get_scheme(token)?;
+ let number = desc.number;
Ok((scheme, number))
}
@@ -1271,6 +1278,39 @@ impl ContextHandle {
guard.prio = (info.prio as usize).min(39);
Ok(size_of::<ProcSchemeAttrs>())
}
+ Self::Groups => {
+ const NGROUPS_MAX: usize = 65536;
+ if buf.len() % size_of::<u32>() != 0 {
+ return Err(Error::new(EINVAL));
+ }
+ let count = buf.len() / size_of::<u32>();
+ if count > NGROUPS_MAX {
+ return Err(Error::new(EINVAL));
+ }
+ let mut groups = Vec::with_capacity(count);
+ for chunk in buf.in_exact_chunks(size_of::<u32>()).take(count) {
+ groups.push(chunk.read_u32()?);
+ }
+ let proc_id = {
+ let guard = context.read(token.token());
+ guard.owner_proc_id
+ };
+ {
+ let mut guard = context.write(token.token());
+ guard.groups = groups.clone();
+ }
+ if let Some(pid) = proc_id {
+ let mut contexts = context::contexts(token.downgrade());
+ let (contexts, mut t) = contexts.token_split();
+ for context_ref in contexts.iter() {
+ let mut ctx = context_ref.write(t.token());
+ if ctx.owner_proc_id == Some(pid) {
+ ctx.groups = groups.clone();
+ }
+ }
+ }
+ Ok(count * size_of::<u32>())
+ }
ContextHandle::OpenViaDup => {
let mut args = buf.usizes();
@@ -1475,6 +1515,15 @@ impl ContextHandle {
debug_name,
})
}
+ Self::Groups => {
+ let c = &context.read(token.token());
+ let max = buf.len() / size_of::<u32>();
+ let count = c.groups.len().min(max);
+ for (chunk, gid) in buf.in_exact_chunks(size_of::<u32>()).zip(&c.groups).take(count) {
+ chunk.copy_from_slice(&gid.to_ne_bytes())?;
+ }
+ Ok(count * size_of::<u32>())
+ }
ContextHandle::Sighandler => {
let data = match context.read(token.token()).sig {
Some(ref sig) => SetSighandlerData {
diff --git a/src/scheme/user.rs b/src/scheme/user.rs
index b9013021..dfbf66b1 100644
--- a/src/scheme/user.rs
+++ b/src/scheme/user.rs
@@ -80,6 +80,7 @@ const ONE: NonZeroUsize = match NonZeroUsize::new(1) {
Some(one) => one,
None => unreachable!(),
};
+const MAX_SPURIOUS_WAKEUPS: usize = 100;
enum ParsedCqe {
TriggerFevent {
@@ -209,6 +210,8 @@ impl UserInner {
caller_responsible: &mut PageSpan,
token: &mut CleanLockToken,
) -> Result<Response> {
+ let mut remaining_spurious_wakeups = MAX_SPURIOUS_WAKEUPS;
+
{
// Disable preemption to avoid context switches between setting the
// process state and sending the scheme request. The process is made
@@ -261,7 +264,10 @@ impl UserInner {
};
let states = self.states.lock(token.token());
- let (mut states, mut token) = states.into_split();
+ let (mut states, mut state_token) = states.into_split();
+ let mut timed_out_descriptions = None;
+ let mut remove_state = false;
+ let mut timed_out = false;
match states.get_mut(sqe.tag as usize) {
// invalid state
None => return Err(Error::new(EBADFD)),
@@ -274,24 +280,35 @@ impl UserInner {
fds,
} => {
let maybe_eintr =
- eintr_if_sigkill(&mut callee_responsible, &mut token.token());
- *o = State::Waiting {
- canceling: true,
- callee_responsible,
- context,
- fds,
- };
+ eintr_if_sigkill(&mut callee_responsible, &mut state_token.token());
- maybe_eintr?;
+ if maybe_eintr.is_ok() {
+ remaining_spurious_wakeups =
+ remaining_spurious_wakeups.saturating_sub(1);
+ }
+
+ if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 {
+ timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds));
+ remove_state = true;
+ } else {
+ *o = State::Waiting {
+ canceling: true,
+ callee_responsible,
+ context,
+ fds,
+ };
+ }
- context::current()
- .write(token.token())
- .block("UserInner::call (woken up after cancelation request)");
+ maybe_eintr?;
- // We do not want to drop the lock before blocking
- // as if we get preempted in between we might miss a
- // wakeup.
- drop(states);
+ if remove_state {
+ states.remove(sqe.tag as usize);
+ timed_out = true;
+ } else {
+ context::current()
+ .write(state_token.token())
+ .block("UserInner::call (woken up after cancelation request)");
+ }
}
// spurious wakeup
State::Waiting {
@@ -300,60 +317,76 @@ impl UserInner {
context,
mut callee_responsible,
} => {
- let maybe_eintr = eintr_if_sigkill(&mut callee_responsible, &mut token);
let current_context = context::current();
+ let maybe_eintr =
+ eintr_if_sigkill(&mut callee_responsible, &mut state_token);
+
+ if maybe_eintr.is_ok() {
+ remaining_spurious_wakeups =
+ remaining_spurious_wakeups.saturating_sub(1);
+ }
- *o = State::Waiting {
- // Currently we treat all spurious wakeups to have the same behavior
- // as signals (i.e., we send a cancellation request). It is not something
- // that should happen, but it certainly can happen, for example if a context
- // is awoken through its thread handle without setting any sig bits, or if the
- // caller clears its own sig bits. If it actually is a signal, then it is the
- // intended behavior.
- canceling: true,
- fds,
- context,
- callee_responsible,
- };
+ if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 {
+ timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds));
+ remove_state = true;
+ } else {
+ *o = State::Waiting {
+ // Currently we treat all spurious wakeups to have the same behavior
+ // as signals (i.e., we send a cancellation request). It is not something
+ // that should happen, but it certainly can happen, for example if a context
+ // is awoken through its thread handle without setting any sig bits, or if the
+ // caller clears its own sig bits. If it actually is a signal, then it is the
+ // intended behavior.
+ canceling: true,
+ fds,
+ context,
+ callee_responsible,
+ };
+ }
maybe_eintr?;
- // We do not want to preempt between sending the
- // cancellation and blocking again where we might
- // miss a wakeup.
- let mut preempt = PreemptGuardL1::new(&current_context, &mut token);
- let token = preempt.token();
-
- self.todo.send_locked(
- Sqe {
- opcode: Opcode::Cancel as u8,
- sqe_flags: SqeFlags::ONEWAY,
- tag: sqe.tag,
- ..Default::default()
- },
- token.token(),
- );
- event::trigger_locked(
- self.root_id,
- self.scheme_id.get(),
- EVENT_READ,
- token.token(),
- );
-
- // 1. If cancellation was requested and arrived
- // before the scheme processed the request, an
- // acknowledgement will be sent back after the
- // cancellation is processed and we will be woken up
- // again. State will be State::Responded then.
- //
- // 2. If cancellation was requested but the scheme
- // already processed the request, we will receive
- // the actual response next and woken up again.
- // State will be State::Responded then.
- context::current()
- .write(token.token())
- .block("UserInner::call (spurious wakeup)");
- drop(states);
+ if remove_state {
+ states.remove(sqe.tag as usize);
+ timed_out = true;
+ } else {
+ // We do not want to preempt between sending the
+ // cancellation and blocking again where we might
+ // miss a wakeup.
+ let mut preempt =
+ PreemptGuardL1::new(&current_context, &mut state_token);
+ let token = preempt.token();
+
+ self.todo.send_locked(
+ Sqe {
+ opcode: Opcode::Cancel as u8,
+ sqe_flags: SqeFlags::ONEWAY,
+ tag: sqe.tag,
+ ..Default::default()
+ },
+ token.token(),
+ );
+ event::trigger_locked(
+ self.root_id,
+ self.scheme_id.get(),
+ EVENT_READ,
+ token.token(),
+ );
+
+ // 1. If cancellation was requested and arrived
+ // before the scheme processed the request, an
+ // acknowledgement will be sent back after the
+ // cancellation is processed and we will be woken up
+ // again. State will be State::Responded then.
+ //
+ // 2. If cancellation was requested but the scheme
+ // already processed the request, we will receive
+ // the actual response next and woken up again.
+ // State will be State::Responded then.
+ context::current()
+ .write(token.token())
+ .block("UserInner::call (spurious wakeup)");
+ }
}
// invalid state
@@ -368,7 +401,67 @@ impl UserInner {
}
},
}
+
+ if let Some(descriptions) = timed_out_descriptions {
+ drop(states);
+ for desc in descriptions {
+ let _ = desc.try_close(token);
+ }
+ }
+
+ if timed_out {
+ return Err(Error::new(ETIMEDOUT));
+ }
+ }
+ }
+ }
+
+ fn collect_descriptions_to_close(
+ fds: Vec<Arc<LockedFileDescription>>,
+ ) -> Vec<FileDescription> {
+ fds.into_iter()
+ .filter_map(|fd| Arc::try_unwrap(fd).ok())
+ .map(RwLock::into_inner)
+ .collect()
+ }
+
+ pub fn fail_pending_calls(&self, token: &mut CleanLockToken) {
+ let descriptions_to_close = {
+ let mut states_lock = self.states.lock(token.token());
+ let (states, mut lock_token) = states_lock.token_split();
+ let mut descriptions_to_close = Vec::new();
+ let mut states_to_remove = Vec::new();
+
+ for (id, state) in states.iter_mut() {
+ match mem::replace(state, State::Placeholder) {
+ State::Waiting { context, fds, .. } => {
+ descriptions_to_close.extend(Self::collect_descriptions_to_close(fds));
+
+ match context.upgrade() {
+ Some(context) => {
+ *state = State::Responded(Response::Regular(
+ Err(Error::new(ENODEV)),
+ 0,
+ false,
+ ));
+ context.write(lock_token.token()).unblock();
+ }
+ None => states_to_remove.push(id),
+ }
+ }
+ old_state => *state = old_state,
+ }
}
+
+ for id in states_to_remove {
+ states.remove(id);
+ }
+
+ descriptions_to_close
+ };
+
+ for desc in descriptions_to_close {
+ let _ = desc.try_close(token);
}
}
@@ -1283,6 +1376,7 @@ impl UserInner {
}
pub fn into_drop(self, token: &mut CleanLockToken) {
+ self.fail_pending_calls(token);
self.todo.condition.into_drop(token);
}
}
diff --git a/src/startup/memory.rs b/src/startup/memory.rs
index 26922dde..9fb5fb10 100644
--- a/src/startup/memory.rs
+++ b/src/startup/memory.rs
@@ -74,14 +74,16 @@ impl MemoryEntry {
}
struct MemoryMap {
- entries: [MemoryEntry; 512],
+ entries: [MemoryEntry; 1024],
size: usize,
}
impl MemoryMap {
fn register(&mut self, base: usize, size: usize, kind: BootloaderMemoryKind) {
if self.size >= self.entries.len() {
- panic!("Early memory map overflow!");
+ #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+ unsafe { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'!', options(nostack, preserves_flags)); }
+ panic!("Early memory map overflow at entry {} (max {})", self.size, self.entries.len());
}
let start = if kind == BootloaderMemoryKind::Free {
align_up(base)
@@ -134,7 +136,7 @@ static MEMORY_MAP: SyncUnsafeCell<MemoryMap> = SyncUnsafeCell::new(MemoryMap {
start: 0,
end: 0,
kind: BootloaderMemoryKind::Null,
- }; 512],
+ }; 1024],
size: 0,
});
@@ -323,7 +325,16 @@ unsafe fn map_memory<A: Arch>(areas: &[MemoryArea], mut bump_allocator: &mut Bum
}
}
- let kernel_area = (*MEMORY_MAP.get()).kernel().unwrap();
+ let kernel_area = match (*MEMORY_MAP.get()).kernel() {
+ Some(area) => area,
+ None => {
+ println!("FATAL: kernel memory area not found in boot memory map");
+ println!("Cannot determine kernel base address. Halting.");
+ loop {
+ core::hint::spin_loop();
+ }
+ }
+ };
let kernel_base = kernel_area.start;
let kernel_size = kernel_area.end.saturating_sub(kernel_area.start);
// Map kernel at KERNEL_OFFSET
diff --git a/src/startup/mod.rs b/src/startup/mod.rs
index 8ad3cdf7..86aabc22 100644
--- a/src/startup/mod.rs
+++ b/src/startup/mod.rs
@@ -149,6 +149,15 @@ static BOOTSTRAP: spin::Once<Bootstrap> = spin::Once::new();
pub(crate) static AP_READY: AtomicBool = AtomicBool::new(false);
static BSP_READY: AtomicBool = AtomicBool::new(false);
+#[cold]
+fn halt_boot(message: &str) -> ! {
+ print!("{message}");
+ println!("Kernel boot cannot continue. Halting.");
+ loop {
+ hint::spin_loop();
+ }
+}
+
/// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this
pub(crate) fn kmain(bootstrap: Bootstrap) -> ! {
let mut token = unsafe { CleanLockToken::new() };
@@ -180,9 +189,7 @@ pub(crate) fn kmain(bootstrap: Bootstrap) -> ! {
context.euid = 0;
context.egid = 0;
}
- Err(err) => {
- panic!("failed to spawn userspace_init: {:?}", err);
- }
+ Err(_err) => halt_boot("FATAL: failed to spawn first userspace process userspace_init\n"),
}
run_userspace(&mut token)
diff --git a/src/syscall/fs.rs b/src/syscall/fs.rs
index bf984641..10c6a92c 100644
--- a/src/syscall/fs.rs
+++ b/src/syscall/fs.rs
@@ -12,7 +12,7 @@ use crate::{
memory::{AddrSpace, GenericFlusher, Grant, PageSpan, TlbShootdownActions},
},
memory::{Page, VirtualAddress, PAGE_SIZE},
- scheme::{self, FileHandle, KernelScheme, OpenResult, StrOrBytes},
+ scheme::{FileHandle, KernelScheme, OpenResult, StrOrBytes},
sync::{CleanLockToken, RwLock},
syscall::{data::Stat, error::*, flag::*},
};
@@ -45,7 +45,7 @@ pub fn file_op_generic_ext<T>(
(file, desc)
};
- let scheme = scheme::get_scheme(token.token(), desc.scheme)?;
+ let scheme = desc.get_scheme(token)?;
op(&*scheme, file.description, desc, token)
}
@@ -73,14 +73,18 @@ pub fn openat(
) -> Result<FileHandle> {
let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?;
- let (scheme_id, number) = {
+ let desc = {
let current_lock = context::current();
let mut current = current_lock.read(token.token());
- let (context, mut token) = current.token_split();
- let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?;
- let desc = pipe.description.read(token.token());
- (desc.scheme, desc.number)
+ let (context, mut context_token) = current.token_split();
+ let pipe = context
+ .get_file(fh, &mut context_token)
+ .ok_or(Error::new(EBADF))?;
+ *pipe.description.read(context_token.token())
};
+ let scheme = desc.get_scheme(token)?;
+ let number = desc.number;
+ let scheme_id = desc.scheme;
let caller_ctx = context::current()
.read(token.token())
@@ -88,8 +92,6 @@ pub fn openat(
.filter_uid_gid(euid, egid);
let new_description = {
- let scheme = scheme::get_scheme(token.token(), scheme_id)?;
-
let res = scheme.kopenat(
number,
StrOrBytes::from_str(&path_buf),
@@ -101,13 +103,14 @@ pub fn openat(
match res? {
OpenResult::SchemeLocal(number, internal_flags) => {
- Arc::new(RwLock::new(FileDescription {
- offset: 0,
- internal_flags,
- scheme: scheme_id,
+ Arc::new(RwLock::new(FileDescription::new(
+ scheme_id,
number,
- flags: (flags & !O_CLOEXEC) as u32,
- }))
+ 0,
+ (flags & !O_CLOEXEC) as u32,
+ internal_flags,
+ token,
+ )))
}
OpenResult::External(desc) => desc,
}
@@ -137,16 +140,17 @@ pub fn unlinkat(
) -> Result<()> {
let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?;
- let (number, scheme_id) = {
+ let desc = {
let current_lock = context::current();
let mut current = current_lock.read(token.token());
- let (context, mut token) = current.token_split();
- let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?;
- let desc = pipe.description.read(token.token());
- (desc.number, desc.scheme)
+ let (context, mut context_token) = current.token_split();
+ let pipe = context
+ .get_file(fh, &mut context_token)
+ .ok_or(Error::new(EBADF))?;
+ *pipe.description.read(context_token.token())
};
-
- let scheme = scheme::get_scheme(token.token(), scheme_id)?;
+ let number = desc.number;
+ let scheme = desc.get_scheme(token)?;
let caller_ctx = context::current()
.read(token.token())
@@ -199,17 +203,18 @@ fn duplicate_file(
let description = { *file.description.read(token.token()) };
let new_description = {
- let scheme = scheme::get_scheme(token.token(), description.scheme)?;
+ let scheme = description.get_scheme(token)?;
match scheme.kdup(description.number, user_buf, caller_ctx, token)? {
OpenResult::SchemeLocal(number, internal_flags) => {
- Arc::new(RwLock::new(FileDescription {
- offset: 0,
- internal_flags,
- scheme: description.scheme,
+ Arc::new(RwLock::new(FileDescription::new(
+ description.scheme,
number,
- flags: description.flags,
- }))
+ 0,
+ description.flags,
+ internal_flags,
+ token,
+ )))
}
OpenResult::External(desc) => desc,
}
@@ -296,11 +301,10 @@ fn call_normal(
}
.ok_or(Error::new(EBADF))?;
- let (scheme_id, number) = {
- let desc = file.description.read(token.token());
- (desc.scheme, desc.number)
+ let (scheme, number) = {
+ let desc = *file.description.read(token.token());
+ (desc.get_scheme(token)?, desc.number)
};
- let scheme = scheme::get_scheme(token.token(), scheme_id)?;
if flags.contains(CallFlags::STD_FS) {
scheme.translate_std_fs_call(number, file.description, payload, flags, metadata, token)
@@ -341,28 +345,28 @@ fn fdwrite_inner(
) -> Result<usize> {
// TODO: Ensure deadlocks can't happen
let (scheme, number, descs_to_send) = {
- let (scheme, number) = {
+ let desc = {
let current_lock = context::current();
let mut current = current_lock.read(token.token());
- let (context, mut token) = current.token_split();
+ let (context, mut context_token) = current.token_split();
let file_descriptor = context
- .get_file(socket, &mut token)
+ .get_file(socket, &mut context_token)
.ok_or(Error::new(EBADF))?;
- let desc = &file_descriptor.description.read(token.token());
- (desc.scheme, desc.number)
+ *file_descriptor.description.read(context_token.token())
};
- let scheme = scheme::get_scheme(token.token(), scheme)?;
+ let scheme = desc.get_scheme(token)?;
+ let number = desc.number;
let current_lock = context::current();
let mut current = current_lock.read(token.token());
- let (context, mut token) = current.token_split();
+ let (context, mut context_token) = current.token_split();
(
scheme,
number,
if flags.contains(CallFlags::FD_CLONE) {
- context.bulk_get_files(&target_fds, &mut token)
+ context.bulk_get_files(&target_fds, &mut context_token)
} else {
- context.bulk_remove_files(&target_fds, &mut token)
+ context.bulk_remove_files(&target_fds, &mut context_token)
}?
.into_iter()
.map(|f| f.description)
@@ -395,18 +399,22 @@ fn call_fdread(
metadata: &[u64],
token: &mut CleanLockToken,
) -> Result<usize> {
+ let desc = {
+ let current_lock = context::current();
+ let mut current = current_lock.read(token.token());
+ let (context, mut context_token) = current.token_split();
+ let file_descriptor = context
+ .get_file(fd, &mut context_token)
+ .ok_or(Error::new(EBADF))?;
+ *file_descriptor.description.read(context_token.token())
+ };
let (scheme, number) = {
- let (scheme, number) = {
- let current_lock = context::current();
- let mut current = current_lock.read(token.token());
- let (context, mut token) = current.token_split();
- let file_descriptor = context.get_file(fd, &mut token).ok_or(Error::new(EBADF))?;
- let desc = file_descriptor.description.read(token.token());
- (desc.scheme, desc.number)
- };
- let scheme = scheme::get_scheme(token.token(), scheme)?;
-
- (scheme, number)
+ let scheme = desc.get_scheme(token)?;
+ let number = desc.number;
+ (
+ scheme,
+ number,
+ )
};
scheme.kfdread(number, payload, flags, metadata, token)
@@ -440,9 +448,9 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken)
}
.ok_or(Error::new(EBADF))?;
- let (scheme_id, number, flags) = {
- let desc = file.description.write(token.token());
- (desc.scheme, desc.number, desc.flags)
+ let (number, flags, desc) = {
+ let desc = *file.description.read(token.token());
+ (desc.number, desc.flags, desc)
};
if cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC {
@@ -460,7 +468,7 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken)
// Communicate fcntl with scheme
if cmd != F_GETFD && cmd != F_SETFD {
- let scheme = scheme::get_scheme(token.token(), scheme_id)?;
+ let scheme = desc.get_scheme(token)?;
scheme.fcntl(number, cmd, arg, token)?;
};
@@ -518,13 +526,11 @@ pub fn flink(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken)
let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?;
let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?;
- let (number, scheme_id) = {
- let desc = file.description.read(token.token());
- (desc.number, desc.scheme)
+ let (number, scheme) = {
+ let desc = *file.description.read(token.token());
+ (desc.number, desc.get_scheme(token)?)
};
- let scheme = scheme::get_scheme(token.token(), scheme_id)?;
-
// TODO: Check EXDEV.
/*
if scheme_id != description.scheme {
@@ -554,13 +560,11 @@ pub fn frename(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken
let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?;
let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?;
- let (number, scheme_id) = {
- let desc = file.description.read(token.token());
- (desc.number, desc.scheme)
+ let (number, scheme) = {
+ let desc = *file.description.read(token.token());
+ (desc.number, desc.get_scheme(token)?)
};
- let scheme = scheme::get_scheme(token.token(), scheme_id)?;
-
// TODO: Check EXDEV.
/*
if scheme_id != description.scheme {
diff --git a/src/syscall/process.rs b/src/syscall/process.rs
index e83da427..8a1d385e 100644
--- a/src/syscall/process.rs
+++ b/src/syscall/process.rs
@@ -271,23 +271,26 @@ unsafe fn bootstrap_mem(bootstrap: &crate::startup::Bootstrap) -> &'static [u8]
}
fn insert_fd(scheme: SchemeId, number: usize, cloexec: bool, token: &mut CleanLockToken) -> usize {
+ let description = Arc::new(RwLock::new(FileDescription::new(
+ scheme,
+ number,
+ 0,
+ (O_CREAT | O_RDWR) as u32,
+ InternalFlags::empty(),
+ token,
+ )));
+
let current_lock = context::current();
let mut current = current_lock.read(token.token());
- let (context, mut token) = current.token_split();
+ let (context, mut context_token) = current.token_split();
context
.add_file_min(
FileDescriptor {
- description: Arc::new(RwLock::new(FileDescription {
- scheme,
- number,
- offset: 0,
- flags: (O_CREAT | O_RDWR) as u32,
- internal_flags: InternalFlags::empty(),
- })),
+ description,
cloexec,
},
syscall::flag::UPPER_FDTBL_TAG + scheme.get(),
- &mut token,
+ &mut context_token,
)
.expect("failed to insert fd to current context")
.get()