Files
RedBear-OS/local/patches/kernel/P1-ioapic-hpet-nmi.patch
T
vasilito 6d48f80bea Fix IOAPIC/HPET/NMI, PS/2 driver, and remove duplicate VT service entries
- IOAPIC: enable full IOAPIC initialization on AMD/Intel bare metal,
  dual GSI 0/2 timer mapping for platform compatibility, NMI handler
  uses raw COM1 PIO writes to avoid mutex deadlock
- HPET: counter validation, graceful fallback to PIT when HPET missing
- PS/2: fix 0xFE RESEND handling in all MouseState variants, add
  controller flush/self-test retry/aux port test from Linux 7.0
- ACPI: defer AML evaluation to avoid blocking initfs driver spawn
- VT chain: remove duplicate rootfs service files (inputd, vesad,
  fbcond, getty) that were already handled by initfs phase 1 and the
  legacy 30_console script from minimal.toml
- QEMU verified: boots to login prompt, 20 rootfs units (was 26),
  single login prompt (was double), only 1 expected error (wifictl)
2026-04-24 00:57:19 +01:00

1530 lines
55 KiB
Diff

diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs
index 4dc23883..c52e0ab4 100644
--- a/src/acpi/madt/arch/x86.rs
+++ b/src/acpi/madt/arch/x86.rs
@@ -10,7 +10,8 @@ use crate::{
},
cpu_set::LogicalCpuId,
memory::{
- allocate_p2frame, Frame, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch,
+ allocate_p2frame, map_device_memory, Frame, KernelMapper, Page, PageFlags,
+ PhysicalAddress, RmmA, RmmArch,
VirtualAddress, PAGE_SIZE,
},
startup::AP_READY,
@@ -20,6 +21,55 @@ use super::{Madt, MadtEntry};
const TRAMPOLINE: usize = 0x8000;
static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline"));
+const AP_STARTUP_TIMEOUT: u32 = 100_000_000;
+
+fn wait_for_ap_flag(flag: *mut u64, description: &str, apic_id: u32) -> bool {
+ let mut timeout = AP_STARTUP_TIMEOUT;
+ while unsafe { (*flag.cast::<AtomicU8>()).load(Ordering::SeqCst) } == 0 {
+ hint::spin_loop();
+ timeout -= 1;
+ if timeout == 0 {
+ debug!("AP {} {} timed out", apic_id, description);
+ return false;
+ }
+ }
+ true
+}
+
+fn wait_for_kernel_ap_ready(apic_id: u32) -> bool {
+ let mut timeout = AP_STARTUP_TIMEOUT;
+ while !AP_READY.load(Ordering::SeqCst) {
+ hint::spin_loop();
+ timeout -= 1;
+ if timeout == 0 {
+ debug!("AP {} kernel startup timed out", apic_id);
+ return false;
+ }
+ }
+ true
+}
+
+fn current_x2apic_processor_uid(madt: &Madt, apic_id: u32) -> Option<u32> {
+ madt.iter().find_map(|entry| match entry {
+ MadtEntry::LocalX2Apic(x2apic) if x2apic.x2apic_id == apic_id => Some(x2apic.processor_uid),
+ _ => None,
+ })
+}
+
+fn apply_lapic_address_override(local_apic: &mut crate::arch::device::local_apic::LocalApic, addr: u64) {
+ if local_apic.x2 || addr == 0 {
+ return;
+ }
+
+ let Ok(physaddr) = usize::try_from(addr) else {
+ warn!("Ignoring LAPIC address override {:#x}: does not fit host usize", addr);
+ return;
+ };
+
+ let mapped = unsafe { map_device_memory(PhysicalAddress::new(physaddr), 4096) }.data();
+ local_apic.address = mapped;
+ debug!("Applied LAPIC address override: {:#x}", addr);
+}
pub(super) fn init(madt: Madt) {
let local_apic = unsafe { the_local_apic() };
@@ -35,18 +85,19 @@ pub(super) fn init(madt: Madt) {
return;
}
- // Map trampoline
+ // Map trampoline writable and executable (trampoline page holds both code
+ // and AP argument data — AP writes ap_ready on the same page, so W^X is
+ // not possible without splitting code/data across pages).
let trampoline_frame = Frame::containing(PhysicalAddress::new(TRAMPOLINE));
let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE));
let (result, page_table_physaddr) = unsafe {
- //TODO: do not have writable and executable!
let mut mapper = KernelMapper::lock_rw();
let result = mapper
.map_phys(
trampoline_page.start_address(),
trampoline_frame.base(),
- PageFlags::new().execute(true).write(true),
+ PageFlags::new().write(true).execute(true),
)
.expect("failed to map trampoline");
@@ -75,12 +126,11 @@ pub(super) fn init(madt: Madt) {
let cpu_id = LogicalCpuId::next();
// Allocate a stack
- let stack_start = RmmA::phys_to_virt(
- allocate_p2frame(4)
- .expect("no more frames in acpi stack_start")
- .base(),
- )
- .data();
+ let Some(stack_frame) = allocate_p2frame(4) else {
+ warn!("Unable to allocate AP bootstrap stack for local APIC {}", ap_local_apic.id);
+ continue;
+ };
+ let stack_start = RmmA::phys_to_virt(stack_frame.base()).data();
let stack_end = stack_start + (PAGE_SIZE << 4);
let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end);
@@ -138,15 +188,168 @@ pub(super) fn init(madt: Madt) {
}
// Wait for trampoline ready
- while unsafe { (*ap_ready.cast::<AtomicU8>()).load(Ordering::SeqCst) } == 0 {
+ let ready = wait_for_ap_flag(ap_ready, "trampoline startup", u32::from(ap_local_apic.id));
+ let kernel_ready = ready && wait_for_kernel_ap_ready(u32::from(ap_local_apic.id));
+
+ if !kernel_ready {
+ warn!("Skipping local APIC {} after startup timeout", ap_local_apic.id);
+ }
+
+ RmmA::invalidate_all();
+ }
+ } else if let MadtEntry::LocalX2Apic(ap_x2apic) = madt_entry {
+ let x2id = ap_x2apic.x2apic_id;
+ let x2flags = ap_x2apic.flags;
+ if x2id == me.get() {
+ debug!(" This is my local x2APIC");
+ } else if x2flags & 1 == 1 {
+ let cpu_id = LogicalCpuId::next();
+
+ let Some(stack_frame) = allocate_p2frame(4) else {
+ warn!(
+ "Unable to allocate AP bootstrap stack for x2APIC {}",
+ x2id
+ );
+ continue;
+ };
+ let stack_start = RmmA::phys_to_virt(stack_frame.base()).data();
+ let stack_end = stack_start + (PAGE_SIZE << 4);
+
+ let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end);
+ let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id);
+
+ let args = KernelArgsAp {
+ stack_end: stack_end as *mut u8,
+ cpu_id,
+ pcr_ptr,
+ idt_ptr,
+ };
+
+ let ap_ready = (TRAMPOLINE + 8) as *mut u64;
+ let ap_args_ptr = unsafe { ap_ready.add(1) };
+ let ap_page_table = unsafe { ap_ready.add(2) };
+ let ap_code = unsafe { ap_ready.add(3) };
+
+ unsafe {
+ ap_ready.write(0);
+ ap_args_ptr.write(&args as *const _ as u64);
+ ap_page_table.write(page_table_physaddr as u64);
+ #[expect(clippy::fn_to_numeric_cast)]
+ ap_code.write(kstart_ap as u64);
+ core::arch::asm!("");
+ };
+ AP_READY.store(false, Ordering::SeqCst);
+
+ // Same ICR delivery-mode bits are used by xAPIC and x2APIC; only the
+ // destination field encoding changes between the MMIO and MSR forms.
+ const ICR_INIT_ASSERT: u64 = 0x4500;
+ const ICR_STARTUP: u64 = 0x4600;
+
+ // ICR bits 10:8 = 0b101 (INIT), bit 14 = level assert.
+ // Send INIT IPI (x2APIC always uses 32-bit APIC ID in bits 32-63)
+ {
+ let mut icr = ICR_INIT_ASSERT;
+ icr |= u64::from(x2id) << 32;
+ local_apic.set_icr(icr);
+ }
+
+ // Wait for INIT delivery (~10 μs de-assert window per Intel SDM)
+ for _ in 0..100_000 {
hint::spin_loop();
}
- while !AP_READY.load(Ordering::SeqCst) {
+
+ // ICR bits 10:8 = 0b110 (STARTUP), bit 14 = level assert.
+ // Send STARTUP IPI
+ {
+ let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
+ let mut icr = ICR_STARTUP | ap_segment as u64;
+ icr |= u64::from(x2id) << 32;
+ local_apic.set_icr(icr);
+ }
+
+ // Wait ~200 μs, then send second STARTUP IPI per the universal
+ // startup algorithm.
+ for _ in 0..2_000_000 {
hint::spin_loop();
}
+ {
+ let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
+ let mut icr = ICR_STARTUP | ap_segment as u64;
+ icr |= u64::from(x2id) << 32;
+ local_apic.set_icr(icr);
+ }
+
+ // Known limitation: cpu_id and per-CPU bootstrap state are allocated
+ // before the timeout checks, so a timed-out AP still consumes a
+ // logical CPU slot until startup rollback/teardown is implemented.
+ let ready = wait_for_ap_flag(ap_ready, "trampoline startup", x2id);
+ let kernel_ready = ready && wait_for_kernel_ap_ready(x2id);
+
+ if !kernel_ready {
+ warn!("Skipping x2APIC {} after startup timeout", x2id);
+ }
RmmA::invalidate_all();
}
+ } else if let MadtEntry::LocalApicNmi(nmi) = madt_entry {
+ let target_id = nmi.processor;
+ let nmi_pin = nmi.nmi_pin;
+ let nmi_flags = nmi.flags;
+ if target_id == 0xFF {
+ debug!(
+ " NMI: all processors, pin={}, flags={:#x}",
+ nmi_pin, nmi_flags
+ );
+ unsafe {
+ local_apic.set_lvt_nmi(nmi_pin, nmi_flags);
+ }
+ } else {
+ let my_apic_id = local_apic.id().get() as u8;
+ if target_id == my_apic_id {
+ debug!(
+ " NMI: processor {}, pin={}, flags={:#x}",
+ target_id, nmi_pin, nmi_flags
+ );
+ unsafe {
+ local_apic.set_lvt_nmi(nmi_pin, nmi_flags);
+ }
+ }
+ }
+ } else if let MadtEntry::LocalX2ApicNmi(nmi) = madt_entry {
+ let target_uid = nmi.processor_uid;
+ let nmi_pin = nmi.nmi_pin;
+ let nmi_flags = nmi.flags;
+ if target_uid == 0xFFFFFFFF {
+ debug!(
+ " x2APIC NMI: all processors, pin={}, flags={:#x}",
+ nmi_pin, nmi_flags
+ );
+ unsafe {
+ local_apic.set_lvt_nmi(nmi_pin, nmi_flags);
+ }
+ } else {
+ let current_uid = current_x2apic_processor_uid(&madt, me.get());
+ if current_uid == Some(target_uid) {
+ debug!(
+ " x2APIC NMI: uid {}, pin={}, flags={:#x}",
+ target_uid, nmi_pin, nmi_flags
+ );
+ unsafe {
+ local_apic.set_lvt_nmi(nmi_pin, nmi_flags);
+ }
+ } else {
+ debug!(
+ " x2APIC NMI: skipping uid {} on current uid {:?}",
+ target_uid, current_uid
+ );
+ }
+ }
+ } else if let MadtEntry::LapicAddressOverride(addr) = madt_entry {
+ let lapic_addr = addr.local_apic_address;
+ if lapic_addr != 0 {
+ debug!(" LAPIC address override: {:#x}", lapic_addr);
+ apply_lapic_address_override(local_apic, lapic_addr);
+ }
}
}
diff --git a/src/acpi/madt/mod.rs b/src/acpi/madt/mod.rs
index 3159b9c4..23551c64 100644
--- a/src/acpi/madt/mod.rs
+++ b/src/acpi/madt/mod.rs
@@ -146,6 +146,52 @@ pub struct MadtGicd {
_reserved2: [u8; 3],
}
+/// MADT Local x2APIC (entry type 0x9)
+/// Used by modern AMD and Intel platforms with APIC IDs >= 255.
+#[derive(Clone, Copy, Debug)]
+#[repr(C, packed)]
+pub struct MadtLocalX2Apic {
+ _reserved: u16,
+ pub x2apic_id: u32,
+ pub flags: u32,
+ pub processor_uid: u32,
+}
+
+/// MADT Local APIC NMI (entry type 0x4)
+/// Configures NMI routing to a processor's LINT0/LINT1 pin.
+#[derive(Clone, Copy, Debug)]
+#[repr(C, packed)]
+pub struct MadtLocalApicNmi {
+ pub processor: u8, // 0xFF = all processors
+ pub flags: u16, // bits 0-1: polarity, bits 2-3: trigger mode
+ pub nmi_pin: u8, // 0 = LINT0, 1 = LINT1
+}
+
+/// MADT Local APIC Address Override (entry type 0x5)
+/// Provides 64-bit override for the 32-bit local APIC address.
+#[derive(Clone, Copy, Debug)]
+#[repr(C, packed)]
+pub struct MadtLapicAddressOverride {
+ _reserved: u16,
+ pub local_apic_address: u64,
+}
+
+/// MADT Local x2APIC NMI (entry type 0xA)
+/// x2APIC equivalent of type 0x4 for APIC IDs >= 255.
+#[derive(Clone, Copy, Debug)]
+#[repr(C, packed)]
+pub struct MadtLocalX2ApicNmi {
+ _reserved: u16,
+ pub processor_uid: u32, // 0xFFFFFFFF = all processors
+ pub flags: u16,
+ pub nmi_pin: u8, // 0 = LINT0, 1 = LINT1
+ _reserved2: u8,
+}
+
+const _: () = assert!(size_of::<MadtLocalApicNmi>() == 4);
+const _: () = assert!(size_of::<MadtLapicAddressOverride>() == 10);
+const _: () = assert!(size_of::<MadtLocalX2ApicNmi>() == 10);
+
/// MADT Entries
#[derive(Debug)]
#[allow(dead_code)]
@@ -160,6 +206,14 @@ pub enum MadtEntry {
InvalidGicc(usize),
Gicd(&'static MadtGicd),
InvalidGicd(usize),
+ LocalX2Apic(&'static MadtLocalX2Apic),
+ InvalidLocalX2Apic(usize),
+ LocalApicNmi(&'static MadtLocalApicNmi),
+ InvalidLocalApicNmi(usize),
+ LapicAddressOverride(&'static MadtLapicAddressOverride),
+ InvalidLapicAddressOverride(usize),
+ LocalX2ApicNmi(&'static MadtLocalX2ApicNmi),
+ InvalidLocalX2ApicNmi(usize),
Unknown(u8),
}
@@ -176,6 +230,10 @@ impl Iterator for MadtIter {
let entry_len =
unsafe { *(self.sdt.data_address() as *const u8).add(self.i + 1) } as usize;
+ if entry_len < 2 {
+ return None;
+ }
+
if self.i + entry_len <= self.sdt.data_len() {
let item = match entry_type {
0x0 => {
@@ -224,6 +282,44 @@ impl Iterator for MadtIter {
MadtEntry::InvalidGicd(entry_len)
}
}
+ 0x9 => {
+ if entry_len == size_of::<MadtLocalX2Apic>() + 2 {
+ MadtEntry::LocalX2Apic(unsafe {
+ &*((self.sdt.data_address() + self.i + 2) as *const MadtLocalX2Apic)
+ })
+ } else {
+ MadtEntry::InvalidLocalX2Apic(entry_len)
+ }
+ }
+ 0x4 => {
+ if entry_len == size_of::<MadtLocalApicNmi>() + 2 {
+ MadtEntry::LocalApicNmi(unsafe {
+ &*((self.sdt.data_address() + self.i + 2) as *const MadtLocalApicNmi)
+ })
+ } else {
+ MadtEntry::InvalidLocalApicNmi(entry_len)
+ }
+ }
+ 0x5 => {
+ if entry_len == size_of::<MadtLapicAddressOverride>() + 2 {
+ MadtEntry::LapicAddressOverride(unsafe {
+ &*((self.sdt.data_address() + self.i + 2)
+ as *const MadtLapicAddressOverride)
+ })
+ } else {
+ MadtEntry::InvalidLapicAddressOverride(entry_len)
+ }
+ }
+ 0xA => {
+ if entry_len == size_of::<MadtLocalX2ApicNmi>() + 2 {
+ MadtEntry::LocalX2ApicNmi(unsafe {
+ &*((self.sdt.data_address() + self.i + 2)
+ as *const MadtLocalX2ApicNmi)
+ })
+ } else {
+ MadtEntry::InvalidLocalX2ApicNmi(entry_len)
+ }
+ }
_ => MadtEntry::Unknown(entry_type),
};
diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs
index 59e35265..d4c81f11 100644
--- a/src/acpi/mod.rs
+++ b/src/acpi/mod.rs
@@ -10,6 +10,8 @@ use crate::memory::{KernelMapper, PageFlags, PhysicalAddress, RmmA, RmmArch};
use self::{hpet::Hpet, madt::Madt, rsdp::Rsdp, rsdt::Rsdt, rxsdt::Rxsdt, sdt::Sdt, xsdt::Xsdt};
+const MAX_SDT_SIZE: usize = 16 * 1024 * 1024;
+
#[cfg(target_arch = "aarch64")]
mod gtdt;
pub mod hpet;
@@ -22,39 +24,79 @@ pub mod sdt;
mod spcr;
mod xsdt;
-unsafe fn map_linearly(addr: PhysicalAddress, len: usize, mapper: &mut crate::memory::PageMapper) {
+unsafe fn map_linearly(
+ addr: PhysicalAddress,
+ len: usize,
+ mapper: &mut crate::memory::PageMapper,
+) -> bool {
unsafe {
let base = PhysicalAddress::new(crate::memory::round_down_pages(addr.data()));
- let aligned_len = crate::memory::round_up_pages(len + (addr.data() - base.data()));
+ let Some(total_len) = len.checked_add(addr.data() - base.data()) else {
+ error!("ACPI table mapping length overflow at {:#x}", addr.data());
+ return false;
+ };
+ let aligned_len = crate::memory::round_up_pages(total_len);
for page_idx in 0..aligned_len / crate::memory::PAGE_SIZE {
- let (_, flush) = mapper
+ let Some((_virt, flush)) = mapper
.map_linearly(
base.add(page_idx * crate::memory::PAGE_SIZE),
PageFlags::new(),
)
- .expect("failed to linearly map SDT");
+ else {
+ error!(
+ "failed to linearly map ACPI table page at {:#x}",
+ base.add(page_idx * crate::memory::PAGE_SIZE).data()
+ );
+ return false;
+ };
flush.flush();
}
+
+ true
}
}
-pub fn get_sdt(sdt_address: PhysicalAddress, mapper: &mut KernelMapper<true>) -> &'static Sdt {
+pub fn get_sdt(sdt_address: PhysicalAddress, mapper: &mut KernelMapper<true>) -> Option<&'static Sdt> {
let sdt;
unsafe {
const SDT_SIZE: usize = size_of::<Sdt>();
- map_linearly(sdt_address, SDT_SIZE, mapper);
+ if !map_linearly(sdt_address, SDT_SIZE, mapper) {
+ return None;
+ }
sdt = &*(RmmA::phys_to_virt(sdt_address).data() as *const Sdt);
- map_linearly(
+ let total_len = sdt.length as usize;
+ if total_len < SDT_SIZE {
+ warn!(
+ "ACPI table {:?} at {:#x} shorter than header ({})",
+ sdt.signature,
+ sdt_address.data(),
+ total_len
+ );
+ return None;
+ }
+ if total_len > MAX_SDT_SIZE {
+ warn!(
+ "ACPI table {:?} at {:#x} exceeds max supported size ({})",
+ sdt.signature,
+ sdt_address.data(),
+ total_len
+ );
+ return None;
+ }
+
+ if !map_linearly(
sdt_address.add(SDT_SIZE),
- sdt.length as usize - SDT_SIZE,
+ total_len - SDT_SIZE,
mapper,
- );
+ ) {
+ return None;
+ }
}
- sdt
+ Some(sdt)
}
#[repr(C, packed)]
@@ -95,7 +137,19 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) {
if let Some(rsdp) = rsdp_opt {
debug!("SDT address: {:#x}", rsdp.sdt_address().data());
- let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock_rw());
+ let Some(rxsdt) = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock_rw()) else {
+ error!("Unable to map RSDT/XSDT header");
+ return;
+ };
+
+ if !rxsdt.validate_checksum() {
+ warn!(
+ "Root ACPI table {:?} at {:#x} has invalid checksum; ignoring ACPI",
+ rxsdt.signature,
+ rsdp.sdt_address().data()
+ );
+ return;
+ }
let rxsdt = if let Some(rsdt) = Rsdt::new(rxsdt) {
let mut initialized = false;
@@ -132,12 +186,28 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) {
// TODO: Don't touch ACPI tables in kernel?
for sdt in rxsdt.iter() {
- get_sdt(sdt, &mut KernelMapper::lock_rw());
+ if get_sdt(sdt, &mut KernelMapper::lock_rw()).is_none() {
+ warn!("Skipping unreadable ACPI table at {:#x}", sdt.data());
+ }
}
for sdt_address in rxsdt.iter() {
+ let Some(sdt) = get_sdt(sdt_address, &mut KernelMapper::lock_rw()) else {
+ warn!("Skipping ACPI table at {:#x}: unable to map safely", sdt_address.data());
+ continue;
+ };
let sdt = &*(RmmA::phys_to_virt(sdt_address).data() as *const Sdt);
+ if !sdt.validate_checksum() {
+ let sig = &sdt.signature;
+ warn!(
+ "ACPI table {:?} at {:#x} has invalid checksum",
+ sig,
+ sdt_address.data()
+ );
+ continue;
+ }
+
let signature = get_sdt_signature(sdt);
if let Some(ref mut ptrs) = *(SDT_POINTERS.write()) {
ptrs.insert(signature, sdt);
@@ -198,8 +268,7 @@ macro_rules! find_one_sdt {
}
pub fn get_sdt_signature(sdt: &'static Sdt) -> SdtSignature {
- let signature =
- String::from_utf8(sdt.signature.to_vec()).expect("Error converting signature to string");
+ let signature = String::from_utf8_lossy(&sdt.signature).into_owned();
(signature, sdt.oem_id, sdt.oem_table_id)
}
diff --git a/src/acpi/rsdp.rs b/src/acpi/rsdp.rs
index f10c5ac9..571aeeec 100644
--- a/src/acpi/rsdp.rs
+++ b/src/acpi/rsdp.rs
@@ -1,5 +1,8 @@
use rmm::PhysicalAddress;
+const RSDP_V1_SIZE: usize = 20;
+const RSDP_V2_MIN_SIZE: usize = size_of::<Rsdp>();
+
/// RSDP
#[derive(Copy, Clone, Debug)]
#[repr(C, packed)]
@@ -17,10 +20,33 @@ pub struct Rsdp {
impl Rsdp {
pub unsafe fn get_rsdp(already_supplied_rsdp: Option<*const u8>) -> Option<Rsdp> {
- already_supplied_rsdp.map(|rsdp_ptr| {
- // TODO: Validate
- unsafe { *(rsdp_ptr as *const Rsdp) }
- })
+ let rsdp_ptr = already_supplied_rsdp?;
+ let rsdp = unsafe { *(rsdp_ptr as *const Rsdp) };
+
+ if rsdp.signature != *b"RSD PTR " {
+ warn!("RSDP signature invalid");
+ return None;
+ }
+
+ if !checksum_ok(rsdp_ptr, RSDP_V1_SIZE) {
+ warn!("RSDP base checksum invalid");
+ return None;
+ }
+
+ if rsdp.revision >= 2 {
+ let length = rsdp._length as usize;
+ if length < RSDP_V2_MIN_SIZE {
+ warn!("RSDP revision {} length {} too small", rsdp.revision, length);
+ return None;
+ }
+
+ if !checksum_ok(rsdp_ptr, length) {
+ warn!("RSDP extended checksum invalid");
+ return None;
+ }
+ }
+
+ Some(rsdp)
}
/// Get the RSDT or XSDT address
@@ -32,3 +58,8 @@ impl Rsdp {
})
}
}
+
+fn checksum_ok(ptr: *const u8, len: usize) -> bool {
+ let bytes = unsafe { core::slice::from_raw_parts(ptr, len) };
+ bytes.iter().fold(0u8, |sum, &byte| sum.wrapping_add(byte)) == 0
+}
diff --git a/src/acpi/sdt.rs b/src/acpi/sdt.rs
index 83ff67da..f49b6212 100644
--- a/src/acpi/sdt.rs
+++ b/src/acpi/sdt.rs
@@ -24,4 +24,15 @@ impl Sdt {
let header_size = size_of::<Sdt>();
total_size.saturating_sub(header_size)
}
+
+ /// Validate that the sum of all bytes in this table is zero (ACPI spec requirement).
+ /// Returns false if the length is too small or the checksum doesn't match.
+ pub fn validate_checksum(&self) -> bool {
+ let len = self.length as usize;
+ if len < size_of::<Sdt>() {
+ return false;
+ }
+ let bytes = unsafe { core::slice::from_raw_parts(self as *const _ as *const u8, len) };
+ bytes.iter().fold(0u8, |sum, &b| sum.wrapping_add(b)) == 0
+ }
}
diff --git a/src/arch/aarch64/start.rs b/src/arch/aarch64/start.rs
index e1c8cfb4..65e3fe33 100644
--- a/src/arch/aarch64/start.rs
+++ b/src/arch/aarch64/start.rs
@@ -91,7 +91,7 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs) -> ! {
dtb::serial::init_early(dtb);
}
- info!("Redox OS starting...");
+ info!("RedBear OS starting...");
args.print();
// Initialize RMM
diff --git a/src/arch/riscv64/start.rs b/src/arch/riscv64/start.rs
index 2551968f..a825536a 100644
--- a/src/arch/riscv64/start.rs
+++ b/src/arch/riscv64/start.rs
@@ -97,7 +97,7 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs) -> ! {
init_early(dtb);
}
- info!("Redox OS starting...");
+ info!("RedBear OS starting...");
args.print();
if let Some(dtb) = &dtb {
diff --git a/src/arch/x86_shared/cpuid.rs b/src/arch/x86_shared/cpuid.rs
index b3683125..be7db1be 100644
--- a/src/arch/x86_shared/cpuid.rs
+++ b/src/arch/x86_shared/cpuid.rs
@@ -1,11 +1,8 @@
use raw_cpuid::{CpuId, CpuIdResult, ExtendedFeatures, FeatureInfo};
+#[cfg(target_arch = "x86_64")]
pub fn cpuid() -> CpuId {
- // FIXME check for cpuid availability during early boot and error out if it doesn't exist.
CpuId::with_cpuid_fn(|a, c| {
- #[cfg(target_arch = "x86")]
- let result = unsafe { core::arch::x86::__cpuid_count(a, c) };
- #[cfg(target_arch = "x86_64")]
let result = unsafe { core::arch::x86_64::__cpuid_count(a, c) };
CpuIdResult {
eax: result.eax,
@@ -16,6 +13,19 @@ pub fn cpuid() -> CpuId {
})
}
+#[cfg(target_arch = "x86")]
+pub fn cpuid() -> CpuId {
+ CpuId::with_cpuid_fn(|a, c| {
+ let result = unsafe { core::arch::x86::__cpuid_count(a, c) };
+ CpuIdResult {
+ eax: result.eax,
+ ebx: result.ebx,
+ ecx: result.ecx,
+ edx: result.edx,
+ }
+ })
+}
+
#[cfg_attr(not(target_arch = "x86_64"), expect(dead_code))]
pub fn feature_info() -> FeatureInfo {
cpuid()
diff --git a/src/arch/x86_shared/device/ioapic.rs b/src/arch/x86_shared/device/ioapic.rs
index fb66d3bf..5938540f 100644
--- a/src/arch/x86_shared/device/ioapic.rs
+++ b/src/arch/x86_shared/device/ioapic.rs
@@ -14,6 +14,9 @@ pub struct IoApicRegs {
pointer: *const u32,
}
impl IoApicRegs {
+ fn redirection_index_valid(&mut self, idx: u8) -> bool {
+ idx <= self.max_redirection_table_entries()
+ }
fn ioregsel(&self) -> *const u32 {
self.pointer
}
@@ -44,21 +47,28 @@ impl IoApicRegs {
pub fn read_ioapicver(&mut self) -> u32 {
self.read_reg(0x01)
}
- pub fn read_ioredtbl(&mut self, idx: u8) -> u64 {
- assert!(idx < 24);
+ pub fn read_ioredtbl(&mut self, idx: u8) -> Option<u64> {
+ if !self.redirection_index_valid(idx) {
+ warn!("IOAPIC read_ioredtbl index {} out of range", idx);
+ return None;
+ }
let lo = self.read_reg(0x10 + idx * 2);
let hi = self.read_reg(0x10 + idx * 2 + 1);
- u64::from(lo) | (u64::from(hi) << 32)
+ Some(u64::from(lo) | (u64::from(hi) << 32))
}
- pub fn write_ioredtbl(&mut self, idx: u8, value: u64) {
- assert!(idx < 24);
+ pub fn write_ioredtbl(&mut self, idx: u8, value: u64) -> bool {
+ if !self.redirection_index_valid(idx) {
+ warn!("IOAPIC write_ioredtbl index {} out of range", idx);
+ return false;
+ }
let lo = value as u32;
let hi = (value >> 32) as u32;
self.write_reg(0x10 + idx * 2, lo);
self.write_reg(0x10 + idx * 2 + 1, hi);
+ true
}
pub fn max_redirection_table_entries(&mut self) -> u8 {
@@ -92,17 +102,22 @@ impl IoApic {
}
/// Map an interrupt vector to a physical local APIC ID of a processor (thus physical mode).
#[allow(dead_code)]
- pub fn map(&self, idx: u8, info: MapInfo) {
- self.regs.lock().write_ioredtbl(idx, info.as_raw())
+ pub fn map(&self, idx: u8, info: MapInfo) -> bool {
+ let Some(raw) = info.as_raw() else {
+ return false;
+ };
+ self.regs.lock().write_ioredtbl(idx, raw)
}
pub fn set_mask(&self, gsi: u32, mask: bool) {
let idx = (gsi - self.gsi_start) as u8;
let mut guard = self.regs.lock();
- let mut reg = guard.read_ioredtbl(idx);
+ let Some(mut reg) = guard.read_ioredtbl(idx) else {
+ return;
+ };
reg &= !(1 << 16);
reg |= u64::from(mask) << 16;
- guard.write_ioredtbl(idx, reg);
+ let _ = guard.write_ioredtbl(idx, reg);
}
}
@@ -149,19 +164,21 @@ pub struct MapInfo {
}
impl MapInfo {
- pub fn as_raw(&self) -> u64 {
- assert!(self.vector >= 0x20);
- assert!(self.vector <= 0xFE);
+ pub fn as_raw(&self) -> Option<u64> {
+ if !(0x20..=0xFE).contains(&self.vector) {
+ warn!("Refusing to map IOAPIC vector outside valid range: {:#x}", self.vector);
+ return None;
+ }
// TODO: Check for reserved fields.
- (u64::from(self.dest.get()) << 56)
+ Some((u64::from(self.dest.get()) << 56)
| (u64::from(self.mask) << 16)
| ((self.trigger_mode as u64) << 15)
| ((self.polarity as u64) << 13)
| ((self.dest_mode as u64) << 11)
| ((self.delivery_mode as u64) << 8)
- | u64::from(self.vector)
+ | u64::from(self.vector))
}
}
@@ -175,7 +192,7 @@ impl fmt::Debug for IoApic {
let count = guard.max_redirection_table_entries();
f.debug_list()
- .entries((0..count).map(|i| guard.read_ioredtbl(i)))
+ .entries((0..=count).filter_map(|i| guard.read_ioredtbl(i)))
.finish()
}
}
@@ -237,11 +254,14 @@ pub unsafe fn handle_ioapic(madt_ioapic: &'static MadtIoApic) {
let ioapic_registers = virt.data() as *const u32;
let ioapic = IoApic::new(ioapic_registers, madt_ioapic.gsi_base);
- assert_eq!(
- ioapic.regs.lock().id(),
- madt_ioapic.id,
- "mismatched ACPI MADT I/O APIC ID, and the ID reported by the I/O APIC"
- );
+ let detected_id = ioapic.regs.lock().id();
+ if detected_id != madt_ioapic.id {
+ warn!(
+ "mismatched ACPI MADT I/O APIC ID: MADT={}, IOAPIC={}; continuing with detected hardware",
+ madt_ioapic.id,
+ detected_id
+ );
+ }
(*IOAPICS.get()).get_or_insert_with(Vec::new).push(ioapic);
}
@@ -310,11 +330,14 @@ pub unsafe fn init() {
}
}
}
- println!(
- "I/O APICs: {:?}, overrides: {:?}",
- ioapics(),
- src_overrides()
- );
+ // Sanitize all IOAPIC redirection entries: mask everything first to clear
+ // stale firmware/emulator defaults. Entries are selectively unmasked below.
+ for ioapic in ioapics().iter() {
+ let max_idx = ioapic.count;
+ for idx in 0..=max_idx {
+ ioapic.set_mask(ioapic.gsi_start + u32::from(idx), true);
+ }
+ }
// map the legacy PC-compatible IRQs (0-15) to 32-47, just like we did with 8259 PIC (if it
// wouldn't have been disabled due to this I/O APIC)
@@ -329,7 +352,6 @@ pub unsafe fn init() {
.iter()
.any(|over| over.bus_irq == legacy_irq)
{
- // there's an IRQ conflict, making this legacy IRQ inaccessible.
continue;
}
(
@@ -349,7 +371,6 @@ pub unsafe fn init() {
let redir_tbl_index = (gsi - apic.gsi_start) as u8;
let map_info = MapInfo {
- // only send to the BSP
dest: bsp_apic_id,
dest_mode: DestinationMode::Physical,
delivery_mode: DeliveryMode::Fixed,
@@ -366,7 +387,31 @@ pub unsafe fn init() {
},
vector: 32 + legacy_irq,
};
- apic.map(redir_tbl_index, map_info);
+ if !apic.map(redir_tbl_index, map_info) {
+ warn!(
+ "Unable to map legacy IRQ {} (GSI {}) through IOAPIC index {}",
+ legacy_irq, gsi, redir_tbl_index
+ );
+ }
+
+ // IRQ 0 (timer) is often overridden to GSI 2, but some platforms
+ // (including QEMU) route the HPET timer directly to GSI 0 regardless
+ // of the MADT override. Map GSI 0 as well so the timer works on both
+ // virtual and physical hardware.
+ if legacy_irq == 0 && gsi != u32::from(legacy_irq) {
+ if let Some(apic0) = find_ioapic(u32::from(legacy_irq)) {
+ let idx0 = (u32::from(legacy_irq) - apic0.gsi_start) as u8;
+ apic0.map(idx0, MapInfo {
+ dest: bsp_apic_id,
+ dest_mode: DestinationMode::Physical,
+ delivery_mode: DeliveryMode::Fixed,
+ mask: false,
+ polarity: ApicPolarity::ActiveHigh,
+ trigger_mode: ApicTriggerMode::Edge,
+ vector: 32,
+ });
+ }
+ }
}
println!(
"I/O APICs: {:?}, overrides: {:?}",
@@ -406,7 +451,7 @@ fn resolve(irq: u8) -> u32 {
fn find_ioapic(gsi: u32) -> Option<&'static IoApic> {
ioapics()
.iter()
- .find(|apic| gsi >= apic.gsi_start && gsi < apic.gsi_start + u32::from(apic.count))
+ .find(|apic| gsi >= apic.gsi_start && gsi <= apic.gsi_start + u32::from(apic.count))
}
pub unsafe fn mask(irq: u8) {
diff --git a/src/arch/x86_shared/device/local_apic.rs b/src/arch/x86_shared/device/local_apic.rs
index b6afe02a..e256d160 100644
--- a/src/arch/x86_shared/device/local_apic.rs
+++ b/src/arch/x86_shared/device/local_apic.rs
@@ -103,7 +103,7 @@ impl LocalApic {
ApicId::new(if self.x2 {
unsafe { rdmsr(IA32_X2APIC_APICID) as u32 }
} else {
- unsafe { self.read(0x20) }
+ unsafe { self.read(0x20) >> 24 }
})
}
@@ -126,7 +126,14 @@ impl LocalApic {
pub fn set_icr(&mut self, value: u64) {
if self.x2 {
unsafe {
+ const PENDING: u32 = 1 << 12;
+ while (rdmsr(IA32_X2APIC_ICR) as u32) & PENDING == PENDING {
+ core::hint::spin_loop();
+ }
wrmsr(IA32_X2APIC_ICR, value);
+ while (rdmsr(IA32_X2APIC_ICR) as u32) & PENDING == PENDING {
+ core::hint::spin_loop();
+ }
}
} else {
unsafe {
@@ -256,6 +263,30 @@ impl LocalApic {
}
}
}
+ /// Configure LVT NMI entry. `pin` is 0 for LINT0, 1 for LINT1.
+ /// `flags` encodes polarity and trigger mode per MADT NMI spec.
+ pub unsafe fn set_lvt_nmi(&mut self, pin: u8, flags: u16) {
+ let lvt_value = (flags as u32) | 0x400; /* bit 10 = NMI delivery mode, masked off if flags don't set it */
+ unsafe {
+ match pin {
+ 0 => {
+ if self.x2 {
+ wrmsr(IA32_X2APIC_LVT_LINT0, u64::from(lvt_value));
+ } else {
+ self.write(0x350, lvt_value);
+ }
+ }
+ 1 => {
+ if self.x2 {
+ wrmsr(IA32_X2APIC_LVT_LINT1, u64::from(lvt_value));
+ } else {
+ self.write(0x360, lvt_value);
+ }
+ }
+ _ => {}
+ }
+ }
+ }
unsafe fn setup_error_int(&mut self) {
unsafe {
let vector = 49u32;
diff --git a/src/arch/x86_shared/device/mod.rs b/src/arch/x86_shared/device/mod.rs
index 6f417706..acb14d72 100644
--- a/src/arch/x86_shared/device/mod.rs
+++ b/src/arch/x86_shared/device/mod.rs
@@ -23,8 +23,7 @@ pub unsafe fn init() {
}
}
pub unsafe fn init_after_acpi() {
- // this will disable the IOAPIC if needed.
- //ioapic::init(mapper);
+ unsafe { ioapic::init() };
}
unsafe fn init_hpet() -> bool {
diff --git a/src/arch/x86_shared/interrupt/exception.rs b/src/arch/x86_shared/interrupt/exception.rs
index 7725a45d..fbba75c7 100644
--- a/src/arch/x86_shared/interrupt/exception.rs
+++ b/src/arch/x86_shared/interrupt/exception.rs
@@ -1,3 +1,5 @@
+use core::sync::atomic::{AtomicBool, Ordering};
+
use syscall::Exception;
use x86::irq::PageFaultError;
@@ -10,6 +12,20 @@ use crate::{
syscall::flag::*,
};
+static NMI_IN_PROGRESS: AtomicBool = AtomicBool::new(false);
+
+unsafe fn nmi_raw_serial_write(s: &[u8]) {
+ use crate::syscall::io::{Io, Pio};
+ let mut com1 = Pio::<u8>::new(0x3F8);
+ let lsr = Pio::<u8>::new(0x3F8 + 5);
+ for &b in s {
+ while lsr.read() & (1 << 5) == 0 {
+ core::hint::spin_loop();
+ }
+ com1.write(b);
+ }
+}
+
interrupt_stack!(divide_by_zero, |stack| {
println!("Divide by zero");
stack.trace();
@@ -55,9 +71,24 @@ interrupt_stack!(non_maskable, @paranoid, |stack| {
#[cfg(not(all(target_arch = "x86_64", feature = "profiling")))]
{
- // TODO: This will likely deadlock
- println!("Non-maskable interrupt");
- stack.dump();
+ if NMI_IN_PROGRESS.swap(true, Ordering::SeqCst) {
+ return;
+ }
+ unsafe {
+ nmi_raw_serial_write(b"Non-maskable interrupt\n");
+ nmi_raw_serial_write(b" RIP: ");
+ // Print RIP as hex manually to avoid formatting locks
+ let rip = stack.iret.rip;
+ let mut buf = [0u8; 19];
+ buf[0] = b'0'; buf[1] = b'x';
+ for i in 0..16 {
+ let nibble = ((rip >> (60 - i * 4)) & 0xf) as u8;
+ buf[2 + i] = if nibble < 10 { b'0' + nibble } else { b'a' + nibble - 10 };
+ }
+ buf[18] = b'\n';
+ nmi_raw_serial_write(&buf);
+ }
+ NMI_IN_PROGRESS.store(false, Ordering::SeqCst);
}
});
diff --git a/src/arch/x86_shared/start.rs b/src/arch/x86_shared/start.rs
index 7a7c0ae8..62f9523c 100644
--- a/src/arch/x86_shared/start.rs
+++ b/src/arch/x86_shared/start.rs
@@ -91,7 +91,7 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
// Set up graphical debug
graphical_debug::init(args.env());
- info!("Redox OS starting...");
+ info!("RedBear OS starting...");
args.print();
// Set up GDT
@@ -127,16 +127,21 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
// Initialize devices
device::init();
+ info!("kernel: device init complete (PIC + LAPIC)");
// Read ACPI tables, starts APs
if cfg!(feature = "acpi") {
crate::acpi::init(args.acpi_rsdp());
+ info!("kernel: ACPI tables parsed");
+
device::init_after_acpi();
+ info!("kernel: IOAPIC init complete");
}
crate::profiling::init();
// Initialize all of the non-core devices not otherwise needed to complete initialization
device::init_noncore();
+ info!("kernel: timer init complete, entering userspace");
args.bootstrap()
};
diff --git a/src/context/memory.rs b/src/context/memory.rs
index 94519448..0db1de53 100644
--- a/src/context/memory.rs
+++ b/src/context/memory.rs
@@ -927,8 +927,8 @@ impl UserGrants {
.take_while(move |(base, info)| PageSpan::new(**base, info.page_count).intersects(span))
.map(|(base, info)| (*base, info))
}
- /// Return a free region with the specified size
- // TODO: Alignment (x86_64: 4 KiB, 2 MiB, or 1 GiB).
+ /// Return a free region with the specified size, optionally aligned to a power-of-two
+ /// boundary (x86_64 supports 4 KiB, 2 MiB, or 1 GiB pages).
// TODO: Support finding grant close to a requested address?
pub fn find_free_near(
&self,
@@ -936,29 +936,42 @@ impl UserGrants {
page_count: usize,
_near: Option<Page>,
) -> Option<PageSpan> {
- // Get first available hole, but do reserve the page starting from zero as most compiled
- // languages cannot handle null pointers safely even if they point to valid memory. If an
- // application absolutely needs to map the 0th page, they will have to do so explicitly via
- // MAP_FIXED/MAP_FIXED_NOREPLACE.
- // TODO: Allow explicitly allocating guard pages? Perhaps using mprotect or mmap with
- // PROT_NONE?
+ self.find_free_near_aligned(min, page_count, _near, 0)
+ }
+ pub fn find_free_near_aligned(
+ &self,
+ min: usize,
+ page_count: usize,
+ _near: Option<Page>,
+ page_alignment: usize,
+ ) -> Option<PageSpan> {
+ let alignment = if page_alignment == 0 {
+ PAGE_SIZE
+ } else {
+ assert!(page_alignment.is_power_of_two(), "page_alignment must be a power of two");
+ page_alignment * PAGE_SIZE
+ };
let (hole_start, _hole_size) = self
.holes
.iter()
.skip_while(|(hole_offset, hole_size)| hole_offset.data() + **hole_size <= min)
.find(|(hole_offset, hole_size)| {
- let avail_size =
- if hole_offset.data() <= min && min <= hole_offset.data() + **hole_size {
- **hole_size - (min - hole_offset.data())
- } else {
- **hole_size
- };
+ let base = cmp::max(hole_offset.data(), min);
+ let aligned_base = (base + alignment - 1) & !(alignment - 1);
+ let avail_size = if aligned_base <= hole_offset.data() + **hole_size {
+ hole_offset.data() + **hole_size - aligned_base
+ } else {
+ 0
+ };
page_count * PAGE_SIZE <= avail_size
})?;
- // Create new region
+
+ let base = cmp::max(hole_start.data(), min);
+ let aligned_base = (base + alignment - 1) & !(alignment - 1);
+
Some(PageSpan::new(
- Page::containing_address(VirtualAddress::new(cmp::max(hole_start.data(), min))),
+ Page::containing_address(VirtualAddress::new(aligned_base)),
page_count,
))
}
diff --git a/src/event.rs b/src/event.rs
index 7398145a..92e5793c 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -8,13 +8,14 @@ use crate::{
context,
scheme::{self, SchemeExt, SchemeId},
sync::{
- CleanLockToken, LockToken, RwLock, RwLockReadGuard, RwLockWriteGuard, WaitQueue, L0, L1, L2,
+ CleanLockToken, LockToken, Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard,
+ WaitCondition, WaitQueue, L0, L1, L2,
},
syscall::{
data::Event,
- error::{Error, Result, EBADF},
- flag::EventFlags,
- usercopy::UserSliceWo,
+ error::{Error, Result, EAGAIN, EBADF, EINVAL, EINTR},
+ flag::{EVENT_READ, EVENT_WRITE, EventFlags},
+ usercopy::{UserSliceRo, UserSliceWo},
},
};
@@ -25,6 +26,17 @@ pub struct EventQueue {
queue: WaitQueue<Event>,
}
+const EVENTFD_COUNTER_MAX: u64 = u64::MAX - 1;
+const EVENTFD_TAG_BIT: usize = 1usize << (usize::BITS - 1);
+
+pub struct EventCounter {
+ id: usize,
+ counter: Mutex<L1, u64>,
+ read_condition: WaitCondition,
+ write_condition: WaitCondition,
+ semaphore: bool,
+}
+
impl EventQueue {
pub fn new(id: EventQueueId) -> EventQueue {
EventQueue {
@@ -91,19 +103,146 @@ impl EventQueue {
}
}
+impl EventCounter {
+ pub fn new(id: usize, init: u64, semaphore: bool) -> EventCounter {
+ EventCounter {
+ id,
+ counter: Mutex::new(init),
+ read_condition: WaitCondition::new(),
+ write_condition: WaitCondition::new(),
+ semaphore,
+ }
+ }
+
+ pub fn is_readable(&self, token: &mut CleanLockToken) -> bool {
+ *self.counter.lock(token.token()) > 0
+ }
+
+ pub fn is_writable(&self, token: &mut CleanLockToken) -> bool {
+ *self.counter.lock(token.token()) < EVENTFD_COUNTER_MAX
+ }
+
+ pub fn read(&self, buf: UserSliceWo, block: bool, token: &mut CleanLockToken) -> Result<usize> {
+ if buf.len() < core::mem::size_of::<u64>() {
+ return Err(Error::new(EINVAL));
+ }
+
+ loop {
+ let counter = self.counter.lock(token.token());
+ let (mut counter, mut token) = counter.into_split();
+
+ if *counter > 0 {
+ let value = if self.semaphore {
+ *counter -= 1;
+ 1
+ } else {
+ let value = *counter;
+ *counter = 0;
+ value
+ };
+
+ buf.limit(core::mem::size_of::<u64>())
+ .ok_or(Error::new(EINVAL))?
+ .copy_from_slice(&value.to_ne_bytes())?;
+
+ trigger_locked(
+ GlobalSchemes::Event.scheme_id(),
+ self.id,
+ EVENT_WRITE,
+ token.token(),
+ );
+ self.write_condition.notify_locked(token.token());
+
+ return Ok(core::mem::size_of::<u64>());
+ }
+
+ if !block {
+ return Err(Error::new(EAGAIN));
+ }
+
+ if !self
+ .read_condition
+ .wait(counter, "EventCounter::read", &mut token)
+ {
+ return Err(Error::new(EINTR));
+ }
+ }
+ }
+
+ pub fn write(&self, buf: UserSliceRo, block: bool, token: &mut CleanLockToken) -> Result<usize> {
+ if buf.len() != core::mem::size_of::<u64>() {
+ return Err(Error::new(EINVAL));
+ }
+
+ let value = unsafe { buf.read_exact::<u64>()? };
+ if value == u64::MAX {
+ return Err(Error::new(EINVAL));
+ }
+
+ loop {
+ let counter = self.counter.lock(token.token());
+ let (mut counter, mut token) = counter.into_split();
+
+ if EVENTFD_COUNTER_MAX - *counter >= value {
+ let was_zero = *counter == 0;
+ *counter += value;
+
+ if was_zero && value != 0 {
+ trigger_locked(
+ GlobalSchemes::Event.scheme_id(),
+ self.id,
+ EVENT_READ,
+ token.token(),
+ );
+ self.read_condition.notify_locked(token.token());
+ }
+
+ return Ok(core::mem::size_of::<u64>());
+ }
+
+ if !block {
+ return Err(Error::new(EAGAIN));
+ }
+
+ if !self
+ .write_condition
+ .wait(counter, "EventCounter::write", &mut token)
+ {
+ return Err(Error::new(EINTR));
+ }
+ }
+ }
+
+ pub fn into_drop(self, _token: LockToken<'_, L1>) {
+ drop(self);
+ }
+}
+
pub type EventQueueList = HashMap<EventQueueId, Arc<EventQueue>>;
+pub type EventCounterList = HashMap<usize, Arc<EventCounter>>;
// Next queue id
static NEXT_QUEUE_ID: AtomicUsize = AtomicUsize::new(0);
+static NEXT_COUNTER_ID: AtomicUsize = AtomicUsize::new(0);
/// Get next queue id
pub fn next_queue_id() -> EventQueueId {
EventQueueId::from(NEXT_QUEUE_ID.fetch_add(1, Ordering::SeqCst))
}
+pub fn next_counter_id() -> usize {
+ EVENTFD_TAG_BIT | NEXT_COUNTER_ID.fetch_add(1, Ordering::SeqCst)
+}
+
+pub fn is_counter_id(id: usize) -> bool {
+ id & EVENTFD_TAG_BIT != 0
+}
+
// Current event queues
static QUEUES: RwLock<L2, EventQueueList> =
RwLock::new(EventQueueList::with_hasher(DefaultHashBuilder::new()));
+static COUNTERS: RwLock<L2, EventCounterList> =
+ RwLock::new(EventCounterList::with_hasher(DefaultHashBuilder::new()));
/// Get the event queues list, const
pub fn queues(token: LockToken<'_, L0>) -> RwLockReadGuard<'_, L2, EventQueueList> {
@@ -115,6 +254,14 @@ pub fn queues_mut(token: LockToken<'_, L0>) -> RwLockWriteGuard<'_, L2, EventQue
QUEUES.write(token)
}
+pub fn counters(token: LockToken<'_, L0>) -> RwLockReadGuard<'_, L2, EventCounterList> {
+ COUNTERS.read(token)
+}
+
+pub fn counters_mut(token: LockToken<'_, L0>) -> RwLockWriteGuard<'_, L2, EventCounterList> {
+ COUNTERS.write(token)
+}
+
#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct RegKey {
pub scheme: SchemeId,
diff --git a/src/scheme/event.rs b/src/scheme/event.rs
index 36efe5b2..c64b6bd0 100644
--- a/src/scheme/event.rs
+++ b/src/scheme/event.rs
@@ -1,9 +1,12 @@
-use alloc::sync::Arc;
+use alloc::{sync::Arc, vec::Vec};
use syscall::{EventFlags, O_NONBLOCK};
use crate::{
context::file::InternalFlags,
- event::{next_queue_id, queues, queues_mut, EventQueue, EventQueueId},
+ event::{
+ EventCounter, EventQueue, EventQueueId, counters, counters_mut, is_counter_id,
+ next_counter_id, next_queue_id, queues, queues_mut,
+ },
sync::CleanLockToken,
syscall::{
data::Event,
@@ -25,7 +28,7 @@ impl KernelScheme for EventScheme {
fn kopenat(
&self,
id: usize,
- _user_buf: StrOrBytes,
+ user_buf: StrOrBytes,
_flags: usize,
_fcntl_flags: u32,
_ctx: CallerCtx,
@@ -34,13 +37,53 @@ impl KernelScheme for EventScheme {
if id != SCHEME_ROOT_ID {
return Err(Error::new(EACCES));
}
- let id = next_queue_id();
- queues_mut(token.token()).insert(id, Arc::new(EventQueue::new(id)));
- Ok(OpenResult::SchemeLocal(id.get(), InternalFlags::empty()))
+ let path = user_buf.as_str().or(Err(Error::new(EINVAL)))?;
+ let path = path.trim_matches('/');
+
+ if path.is_empty() {
+ let id = next_queue_id();
+ queues_mut(token.token()).insert(id, Arc::new(EventQueue::new(id)));
+ return Ok(OpenResult::SchemeLocal(id.get(), InternalFlags::empty()));
+ }
+
+ let parts: Vec<&str> = path.split('/').collect();
+ if matches!(parts.first(), Some(&"eventfd")) {
+ let init = match parts.get(1) {
+ Some(value) => value.parse::<u64>().map_err(|_| Error::new(EINVAL))?,
+ None => 0_u64,
+ };
+ if init > u32::MAX as u64 {
+ return Err(Error::new(EINVAL));
+ }
+ let semaphore = match parts.get(2) {
+ Some(value) => match *value {
+ "0" => Ok(false),
+ "1" => Ok(true),
+ _ => Err(Error::new(EINVAL)),
+ }?,
+ None => false,
+ };
+
+ let id = next_counter_id();
+ counters_mut(token.token()).insert(id, Arc::new(EventCounter::new(id, init, semaphore)));
+ return Ok(OpenResult::SchemeLocal(id, InternalFlags::empty()));
+ }
+
+ Err(Error::new(ENOENT))
}
fn close(&self, id: usize, token: &mut CleanLockToken) -> Result<()> {
+ if is_counter_id(id) {
+ let counter = counters_mut(token.token())
+ .remove(&id)
+ .ok_or(Error::new(EBADF))?;
+ if let Some(counter) = Arc::into_inner(counter) {
+ counter.into_drop(token.downgrade());
+ }
+ return Ok(());
+ }
+
let id = EventQueueId::from(id);
let queue = queues_mut(token.token())
.remove(&id)
@@ -59,6 +102,15 @@ impl KernelScheme for EventScheme {
_stored_flags: u32,
token: &mut CleanLockToken,
) -> Result<usize> {
+ if is_counter_id(id) {
+ let counter = {
+ let handles = counters(token.token());
+ let handle = handles.get(&id).ok_or(Error::new(EBADF))?;
+ handle.clone()
+ };
+ return counter.read(buf, flags & O_NONBLOCK as u32 == 0, token);
+ }
+
let id = EventQueueId::from(id);
let queue = {
@@ -74,10 +126,19 @@ impl KernelScheme for EventScheme {
&self,
id: usize,
buf: UserSliceRo,
- _flags: u32,
+ flags: u32,
_stored_flags: u32,
token: &mut CleanLockToken,
) -> Result<usize> {
+ if is_counter_id(id) {
+ let counter = {
+ let handles = counters(token.token());
+ let handle = handles.get(&id).ok_or(Error::new(EBADF))?;
+ handle.clone()
+ };
+ return counter.write(buf, flags & O_NONBLOCK as u32 == 0, token);
+ }
+
let id = EventQueueId::from(id);
let queue = {
@@ -98,8 +159,12 @@ impl KernelScheme for EventScheme {
Ok(events_written * size_of::<Event>())
}
- fn kfpath(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result<usize> {
- buf.copy_common_bytes_from_slice(b"/scheme/event/")
+ fn kfpath(&self, id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result<usize> {
+ if is_counter_id(id) {
+ buf.copy_common_bytes_from_slice(b"/scheme/event/eventfd")
+ } else {
+ buf.copy_common_bytes_from_slice(b"/scheme/event/")
+ }
}
fn fevent(
@@ -108,6 +173,23 @@ impl KernelScheme for EventScheme {
flags: EventFlags,
token: &mut CleanLockToken,
) -> Result<EventFlags> {
+ if is_counter_id(id) {
+ let counter = {
+ let handles = counters(token.token());
+ let handle = handles.get(&id).ok_or(Error::new(EBADF))?;
+ handle.clone()
+ };
+
+ let mut ready = EventFlags::empty();
+ if flags.contains(EventFlags::EVENT_READ) && counter.is_readable(token) {
+ ready |= EventFlags::EVENT_READ;
+ }
+ if flags.contains(EventFlags::EVENT_WRITE) && counter.is_writable(token) {
+ ready |= EventFlags::EVENT_WRITE;
+ }
+ return Ok(ready);
+ }
+
let id = EventQueueId::from(id);
let queue = {