diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs --- a/src/acpi/madt/arch/x86.rs +++ b/src/acpi/madt/arch/x86.rs @@ -1,154 +1,247 @@ use core::{ hint, sync::atomic::{AtomicU8, Ordering}, }; use crate::{ arch::start::KernelArgsAp, cpu_set::LogicalCpuId, device::local_apic::the_local_apic, memory::{ allocate_p2frame, Frame, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch, VirtualAddress, PAGE_SIZE, }, start::kstart_ap, AP_READY, }; use super::{Madt, MadtEntry}; const TRAMPOLINE: usize = 0x8000; static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline")); pub(super) fn init(madt: Madt) { let local_apic = unsafe { the_local_apic() }; let me = local_apic.id(); if local_apic.x2 { debug!(" X2APIC {}", me.get()); } else { debug!(" XAPIC {}: {:>08X}", me.get(), local_apic.address); } if cfg!(not(feature = "multi_core")) { return; } - // Map trampoline + // Map trampoline writable and executable (trampoline page holds both code + // and AP argument data — AP writes ap_ready on the same page, so W^X is + // not possible without splitting code/data across pages). let trampoline_frame = Frame::containing(PhysicalAddress::new(TRAMPOLINE)); let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE)); let (result, page_table_physaddr) = unsafe { - //TODO: do not have writable and executable! let mut mapper = KernelMapper::lock_rw(); let result = mapper .map_phys( trampoline_page.start_address(), trampoline_frame.base(), - PageFlags::new().execute(true).write(true), + PageFlags::new().write(true).execute(true), ) .expect("failed to map trampoline"); (result, mapper.table().phys().data()) }; result.flush(); // Write trampoline, make sure TRAMPOLINE page is free for use for (i, val) in TRAMPOLINE_DATA.iter().enumerate() { unsafe { (*((TRAMPOLINE as *mut u8).add(i) as *const AtomicU8)).store(*val, Ordering::SeqCst); } } for madt_entry in madt.iter() { debug!(" {:x?}", madt_entry); if let MadtEntry::LocalApic(ap_local_apic) = madt_entry { if u32::from(ap_local_apic.id) == me.get() { debug!(" This is my local APIC"); } else if ap_local_apic.flags & 1 == 1 { let cpu_id = LogicalCpuId::next(); // Allocate a stack let stack_start = RmmA::phys_to_virt( allocate_p2frame(4) .expect("no more frames in acpi stack_start") .base(), ) .data(); let stack_end = stack_start + (PAGE_SIZE << 4); let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end); let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id); let args = KernelArgsAp { stack_end: stack_end as *mut u8, cpu_id, pcr_ptr, idt_ptr, }; let ap_ready = (TRAMPOLINE + 8) as *mut u64; let ap_args_ptr = unsafe { ap_ready.add(1) }; let ap_page_table = unsafe { ap_ready.add(2) }; let ap_code = unsafe { ap_ready.add(3) }; // Set the ap_ready to 0, volatile unsafe { ap_ready.write(0); ap_args_ptr.write(&args as *const _ as u64); ap_page_table.write(page_table_physaddr as u64); #[expect(clippy::fn_to_numeric_cast)] ap_code.write(kstart_ap as u64); // TODO: Is this necessary (this fence)? core::arch::asm!(""); }; AP_READY.store(false, Ordering::SeqCst); // Send INIT IPI { let mut icr = 0x4500; if local_apic.x2 { icr |= u64::from(ap_local_apic.id) << 32; } else { icr |= u64::from(ap_local_apic.id) << 56; } local_apic.set_icr(icr); } // Send START IPI { let ap_segment = (TRAMPOLINE >> 12) & 0xFF; let mut icr = 0x4600 | ap_segment as u64; if local_apic.x2 { icr |= u64::from(ap_local_apic.id) << 32; } else { icr |= u64::from(ap_local_apic.id) << 56; } local_apic.set_icr(icr); } // Wait for trampoline ready while unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } == 0 { hint::spin_loop(); } while !AP_READY.load(Ordering::SeqCst) { hint::spin_loop(); } RmmA::invalidate_all(); } + } else if let MadtEntry::LocalX2Apic(ap_x2apic) = madt_entry { + if ap_x2apic.x2apic_id == me.get() { + debug!(" This is my local x2APIC"); + } else if ap_x2apic.flags & 1 == 1 { + let cpu_id = LogicalCpuId::next(); + + let stack_start = RmmA::phys_to_virt( + allocate_p2frame(4) + .expect("no more frames in acpi stack_start") + .base(), + ) + .data(); + let stack_end = stack_start + (PAGE_SIZE << 4); + + let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end); + let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id); + + let args = KernelArgsAp { + stack_end: stack_end as *mut u8, + cpu_id, + pcr_ptr, + idt_ptr, + }; + + let ap_ready = (TRAMPOLINE + 8) as *mut u64; + let ap_args_ptr = unsafe { ap_ready.add(1) }; + let ap_page_table = unsafe { ap_ready.add(2) }; + let ap_code = unsafe { ap_ready.add(3) }; + + unsafe { + ap_ready.write(0); + ap_args_ptr.write(&args as *const _ as u64); + ap_page_table.write(page_table_physaddr as u64); + #[expect(clippy::fn_to_numeric_cast)] + ap_code.write(kstart_ap as u64); + core::arch::asm!(""); + }; + AP_READY.store(false, Ordering::SeqCst); + + // Send INIT IPI (x2APIC always uses 32-bit APIC ID in bits 32-63) + { + let mut icr = 0x4500u64; + icr |= u64::from(ap_x2apic.x2apic_id) << 32; + local_apic.set_icr(icr); + } + + // Wait for INIT delivery (~10 μs de-assert window per Intel SDM) + for _ in 0..100_000 { + hint::spin_loop(); + } + + // Send STARTUP IPI + { + let ap_segment = (TRAMPOLINE >> 12) & 0xFF; + let mut icr = 0x4600u64 | ap_segment as u64; + icr |= u64::from(ap_x2apic.x2apic_id) << 32; + local_apic.set_icr(icr); + } + + // Wait ~200 μs, then send second STARTUP IPI per the universal + // startup algorithm. + for _ in 0..2_000_000 { + hint::spin_loop(); + } + { + let ap_segment = (TRAMPOLINE >> 12) & 0xFF; + let mut icr = 0x4600u64 | ap_segment as u64; + icr |= u64::from(ap_x2apic.x2apic_id) << 32; + local_apic.set_icr(icr); + } + + let mut timeout = 100_000_000u32; + while unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } == 0 { + hint::spin_loop(); + timeout -= 1; + if timeout == 0 { + debug!("x2APIC AP {} trampoline startup timed out", ap_x2apic.x2apic_id); + break; + } + } + let mut timeout = 100_000_000u32; + while !AP_READY.load(Ordering::SeqCst) { + hint::spin_loop(); + timeout -= 1; + if timeout == 0 { + debug!("x2APIC AP {} kernel startup timed out", ap_x2apic.x2apic_id); + break; + } + } + + RmmA::invalidate_all(); + } } } // Unmap trampoline let (_frame, _, flush) = unsafe { KernelMapper::lock_rw() .unmap_phys(trampoline_page.start_address()) .expect("failed to unmap trampoline page") }; flush.flush(); } diff --git a/src/acpi/madt/mod.rs b/src/acpi/madt/mod.rs --- a/src/acpi/madt/mod.rs +++ b/src/acpi/madt/mod.rs @@ -27,214 +27,240 @@ pub fn madt() -> Option<&'static Madt> { unsafe { &*MADT.get() }.as_ref() } pub const FLAG_PCAT: u32 = 1; impl Madt { pub fn init() { let madt = Madt::new(find_one_sdt!("APIC")); if let Some(madt) = madt { // safe because no APs have been started yet. unsafe { MADT.get().write(Some(madt)) }; debug!(" APIC: {:>08X}: {}", madt.local_address, madt.flags); arch::init(madt); } } pub fn new(sdt: &'static Sdt) -> Option { if &sdt.signature == b"APIC" && sdt.data_len() >= 8 { //Not valid if no local address and flags let local_address = unsafe { (sdt.data_address() as *const u32).read_unaligned() }; let flags = unsafe { (sdt.data_address() as *const u32) .offset(1) .read_unaligned() }; Some(Madt { sdt, local_address, flags, }) } else { None } } pub fn iter(&self) -> MadtIter { MadtIter { sdt: self.sdt, i: 8, // Skip local controller address and flags } } } /// MADT Local APIC #[derive(Clone, Copy, Debug)] #[repr(C, packed)] pub struct MadtLocalApic { /// Processor ID pub processor: u8, /// Local APIC ID pub id: u8, /// Flags. 1 means that the processor is enabled pub flags: u32, } /// MADT I/O APIC #[derive(Clone, Copy, Debug)] #[repr(C, packed)] pub struct MadtIoApic { /// I/O APIC ID pub id: u8, /// reserved _reserved: u8, /// I/O APIC address pub address: u32, /// Global system interrupt base pub gsi_base: u32, } /// MADT Interrupt Source Override #[derive(Clone, Copy, Debug)] #[repr(C, packed)] pub struct MadtIntSrcOverride { /// Bus Source pub bus_source: u8, /// IRQ Source pub irq_source: u8, /// Global system interrupt base pub gsi_base: u32, /// Flags pub flags: u16, } /// MADT GICC #[derive(Clone, Copy, Debug)] #[repr(C, packed)] pub struct MadtGicc { _reserved: u16, pub cpu_interface_number: u32, pub acpi_processor_uid: u32, pub flags: u32, pub parking_protocol_version: u32, pub performance_interrupt_gsiv: u32, pub parked_address: u64, pub physical_base_address: u64, pub gicv: u64, pub gich: u64, pub vgic_maintenance_interrupt: u32, pub gicr_base_address: u64, pub mpidr: u64, pub processor_power_efficiency_class: u8, _reserved2: u8, pub spe_overflow_interrupt: u16, //TODO: optional field introduced in ACPI 6.5: pub trbe_interrupt: u16, } /// MADT GICD #[derive(Clone, Copy, Debug)] #[repr(C, packed)] pub struct MadtGicd { _reserved: u16, pub gic_id: u32, pub physical_base_address: u64, pub system_vector_base: u32, pub gic_version: u8, _reserved2: [u8; 3], +} + +/// MADT Local x2APIC (entry type 0x9) +/// Used by modern AMD and Intel platforms with APIC IDs >= 255. +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct MadtLocalX2Apic { + _reserved: u16, + pub x2apic_id: u32, + pub flags: u32, + pub processor_uid: u32, } /// MADT Entries #[derive(Debug)] #[allow(dead_code)] pub enum MadtEntry { LocalApic(&'static MadtLocalApic), InvalidLocalApic(usize), IoApic(&'static MadtIoApic), InvalidIoApic(usize), IntSrcOverride(&'static MadtIntSrcOverride), InvalidIntSrcOverride(usize), Gicc(&'static MadtGicc), InvalidGicc(usize), Gicd(&'static MadtGicd), InvalidGicd(usize), + LocalX2Apic(&'static MadtLocalX2Apic), + InvalidLocalX2Apic(usize), Unknown(u8), } pub struct MadtIter { sdt: &'static Sdt, i: usize, } impl Iterator for MadtIter { type Item = MadtEntry; fn next(&mut self) -> Option { if self.i + 1 < self.sdt.data_len() { let entry_type = unsafe { *(self.sdt.data_address() as *const u8).add(self.i) }; let entry_len = unsafe { *(self.sdt.data_address() as *const u8).add(self.i + 1) } as usize; + if entry_len < 2 { + return None; + } + if self.i + entry_len <= self.sdt.data_len() { let item = match entry_type { 0x0 => { if entry_len == size_of::() + 2 { MadtEntry::LocalApic(unsafe { &*((self.sdt.data_address() + self.i + 2) as *const MadtLocalApic) }) } else { MadtEntry::InvalidLocalApic(entry_len) } } 0x1 => { if entry_len == size_of::() + 2 { MadtEntry::IoApic(unsafe { &*((self.sdt.data_address() + self.i + 2) as *const MadtIoApic) }) } else { MadtEntry::InvalidIoApic(entry_len) } } 0x2 => { if entry_len == size_of::() + 2 { MadtEntry::IntSrcOverride(unsafe { &*((self.sdt.data_address() + self.i + 2) as *const MadtIntSrcOverride) }) } else { MadtEntry::InvalidIntSrcOverride(entry_len) } } 0xB => { if entry_len >= size_of::() + 2 { MadtEntry::Gicc(unsafe { &*((self.sdt.data_address() + self.i + 2) as *const MadtGicc) }) } else { MadtEntry::InvalidGicc(entry_len) } } 0xC => { if entry_len >= size_of::() + 2 { MadtEntry::Gicd(unsafe { &*((self.sdt.data_address() + self.i + 2) as *const MadtGicd) }) } else { MadtEntry::InvalidGicd(entry_len) } } + 0x9 => { + if entry_len == size_of::() + 2 { + MadtEntry::LocalX2Apic(unsafe { + &*((self.sdt.data_address() + self.i + 2) as *const MadtLocalX2Apic) + }) + } else { + MadtEntry::InvalidLocalX2Apic(entry_len) + } + } _ => MadtEntry::Unknown(entry_type), }; self.i += entry_len; Some(item) } else { None } } else { None } } } diff --git a/src/arch/x86_shared/cpuid.rs b/src/arch/x86_shared/cpuid.rs --- a/src/arch/x86_shared/cpuid.rs +++ b/src/arch/x86_shared/cpuid.rs @@ -1,29 +1,39 @@ use raw_cpuid::{CpuId, CpuIdResult, ExtendedFeatures, FeatureInfo}; +#[cfg(target_arch = "x86_64")] pub fn cpuid() -> CpuId { - // FIXME check for cpuid availability during early boot and error out if it doesn't exist. CpuId::with_cpuid_fn(|a, c| { - #[cfg(target_arch = "x86")] + let result = unsafe { core::arch::x86_64::__cpuid_count(a, c) }; + CpuIdResult { + eax: result.eax, + ebx: result.ebx, + ecx: result.ecx, + edx: result.edx, + } + }) +} + +#[cfg(target_arch = "x86")] +pub fn cpuid() -> CpuId { + CpuId::with_cpuid_fn(|a, c| { let result = unsafe { core::arch::x86::__cpuid_count(a, c) }; - #[cfg(target_arch = "x86_64")] - let result = unsafe { core::arch::x86_64::__cpuid_count(a, c) }; CpuIdResult { eax: result.eax, ebx: result.ebx, ecx: result.ecx, edx: result.edx, } }) } #[cfg_attr(not(target_arch = "x86_64"), expect(dead_code))] pub fn feature_info() -> FeatureInfo { cpuid() .get_feature_info() .expect("x86_64 requires CPUID leaf=0x01 to be present") } #[cfg_attr(not(target_arch = "x86_64"), expect(dead_code))] pub fn has_ext_feat(feat: impl FnOnce(ExtendedFeatures) -> bool) -> bool { cpuid().get_extended_feature_info().is_some_and(feat) } diff --git a/src/context/memory.rs b/src/context/memory.rs --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -890,112 +890,128 @@ .range(..=page) .next_back() .filter(|(base, info)| (**base..base.next_by(info.page_count)).contains(&page)) .map(|(base, info)| (*base, info)) } /// Returns an iterator over all grants that occupy some part of the /// requested region pub fn conflicts(&self, span: PageSpan) -> impl Iterator + '_ { let start = self.contains(span.base); // If there is a grant that contains the base page, start searching at the base of that // grant, rather than the requested base here. let start_span = start .map(|(base, info)| PageSpan::new(base, info.page_count)) .unwrap_or(span); self.inner .range(start_span.base..) .take_while(move |(base, info)| PageSpan::new(**base, info.page_count).intersects(span)) .map(|(base, info)| (*base, info)) } // TODO: DEDUPLICATE CODE! pub fn conflicts_mut( &mut self, span: PageSpan, ) -> impl Iterator + '_ { let start = self.contains(span.base); // If there is a grant that contains the base page, start searching at the base of that // grant, rather than the requested base here. let start_span = start .map(|(base, info)| PageSpan::new(base, info.page_count)) .unwrap_or(span); self.inner .range_mut(start_span.base..) .take_while(move |(base, info)| PageSpan::new(**base, info.page_count).intersects(span)) .map(|(base, info)| (*base, info)) } - /// Return a free region with the specified size - // TODO: Alignment (x86_64: 4 KiB, 2 MiB, or 1 GiB). + /// Return a free region with the specified size, optionally aligned to a power-of-two + /// boundary (x86_64 supports 4 KiB, 2 MiB, or 1 GiB pages). // TODO: Support finding grant close to a requested address? pub fn find_free_near( &self, min: usize, page_count: usize, _near: Option, ) -> Option { - // Get first available hole, but do reserve the page starting from zero as most compiled - // languages cannot handle null pointers safely even if they point to valid memory. If an - // application absolutely needs to map the 0th page, they will have to do so explicitly via - // MAP_FIXED/MAP_FIXED_NOREPLACE. - // TODO: Allow explicitly allocating guard pages? Perhaps using mprotect or mmap with - // PROT_NONE? + self.find_free_near_aligned(min, page_count, _near, 0) + } + pub fn find_free_near_aligned( + &self, + min: usize, + page_count: usize, + _near: Option, + page_alignment: usize, + ) -> Option { + let alignment = if page_alignment == 0 { + PAGE_SIZE + } else { + assert!( + page_alignment.is_power_of_two(), + "page_alignment must be a power of two" + ); + page_alignment * PAGE_SIZE + }; let (hole_start, _hole_size) = self .holes .iter() .skip_while(|(hole_offset, hole_size)| hole_offset.data() + **hole_size <= min) .find(|(hole_offset, hole_size)| { - let avail_size = - if hole_offset.data() <= min && min <= hole_offset.data() + **hole_size { - **hole_size - (min - hole_offset.data()) - } else { - **hole_size - }; + let base = cmp::max(hole_offset.data(), min); + let aligned_base = (base + alignment - 1) & !(alignment - 1); + let avail_size = if aligned_base <= hole_offset.data() + **hole_size { + hole_offset.data() + **hole_size - aligned_base + } else { + 0 + }; page_count * PAGE_SIZE <= avail_size })?; - // Create new region + + let base = cmp::max(hole_start.data(), min); + let aligned_base = (base + alignment - 1) & !(alignment - 1); + Some(PageSpan::new( - Page::containing_address(VirtualAddress::new(cmp::max(hole_start.data(), min))), + Page::containing_address(VirtualAddress::new(aligned_base)), page_count, )) } pub fn find_free(&self, min: usize, page_count: usize) -> Option { self.find_free_near(min, page_count, None) } fn reserve(&mut self, base: Page, page_count: usize) { let start_address = base.start_address(); let size = page_count * PAGE_SIZE; let end_address = base.start_address().add(size); let previous_hole = self.holes.range_mut(..start_address).next_back(); if let Some((hole_offset, hole_size)) = previous_hole { let prev_hole_end = hole_offset.data() + *hole_size; // Note that prev_hole_end cannot exactly equal start_address, since that would imply // there is another grant at that position already, as it would otherwise have been // larger. if prev_hole_end > start_address.data() { // hole_offset must be below (but never equal to) the start address due to the // `..start_address()` limit; hence, all we have to do is to shrink the // previous offset. *hole_size = start_address.data() - hole_offset.data(); } if prev_hole_end > end_address.data() { // The grant is splitting this hole in two, so insert the new one at the end. self.holes .insert(end_address, prev_hole_end - end_address.data()); } } // Next hole if let Some(hole_size) = self.holes.remove(&start_address) { let remainder = hole_size - size; if remainder > 0 { self.holes.insert(end_address, remainder); } } diff --git a/src/arch/x86_shared/device/local_apic.rs b/src/arch/x86_shared/device/local_apic.rs --- a/src/arch/x86_shared/device/local_apic.rs +++ b/src/arch/x86_shared/device/local_apic.rs @@ -100,61 +100,68 @@ } } pub fn id(&self) -> ApicId { ApicId::new(if self.x2 { unsafe { rdmsr(IA32_X2APIC_APICID) as u32 } } else { unsafe { self.read(0x20) } }) } pub fn version(&self) -> u32 { if self.x2 { unsafe { rdmsr(IA32_X2APIC_VERSION) as u32 } } else { unsafe { self.read(0x30) } } } pub fn icr(&self) -> u64 { if self.x2 { unsafe { rdmsr(IA32_X2APIC_ICR) } } else { unsafe { ((self.read(0x310) as u64) << 32) | self.read(0x300) as u64 } } } pub fn set_icr(&mut self, value: u64) { if self.x2 { unsafe { + const PENDING: u32 = 1 << 12; + while (rdmsr(IA32_X2APIC_ICR) as u32) & PENDING == PENDING { + core::hint::spin_loop(); + } wrmsr(IA32_X2APIC_ICR, value); + while (rdmsr(IA32_X2APIC_ICR) as u32) & PENDING == PENDING { + core::hint::spin_loop(); + } } } else { unsafe { const PENDING: u32 = 1 << 12; while self.read(0x300) & PENDING == PENDING { core::hint::spin_loop(); } self.write(0x310, (value >> 32) as u32); self.write(0x300, value as u32); while self.read(0x300) & PENDING == PENDING { core::hint::spin_loop(); } } } } pub fn ipi(&mut self, apic_id: ApicId, kind: IpiKind) { let shift = if self.x2 { 32 } else { 56 }; self.set_icr((u64::from(apic_id.get()) << shift) | 0x40 | kind as u64); } pub fn ipi_nmi(&mut self, apic_id: ApicId) { let shift = if self.x2 { 32 } else { 56 }; self.set_icr((u64::from(apic_id.get()) << shift) | (1 << 14) | (0b100 << 8)); } pub unsafe fn eoi(&mut self) { unsafe { if self.x2 { wrmsr(IA32_X2APIC_EOI, 0); } else { diff --git a/src/acpi/rsdp.rs b/src/acpi/rsdp.rs index f10c5ac9..f3cf3175 100644 --- a/src/acpi/rsdp.rs +++ b/src/acpi/rsdp.rs @@ -17,9 +17,33 @@ pub struct Rsdp { impl Rsdp { pub unsafe fn get_rsdp(already_supplied_rsdp: Option<*const u8>) -> Option { - already_supplied_rsdp.map(|rsdp_ptr| { - // TODO: Validate - unsafe { *(rsdp_ptr as *const Rsdp) } + already_supplied_rsdp.and_then(|rsdp_ptr| { + let rsdp = unsafe { *(rsdp_ptr as *const Rsdp) }; + + // Validate signature "RSD PTR " + if &rsdp.signature != b"RSD PTR " { + return None; + } + + // ACPI 1.0 checksum: sum of first 20 bytes must be zero + let bytes_v1 = unsafe { core::slice::from_raw_parts(rsdp_ptr, 20) }; + if bytes_v1.iter().fold(0u8, |sum, &b| sum.wrapping_add(b)) != 0 { + return None; + } + + // ACPI 2.0+ extended checksum: sum of entire table (length bytes) must be zero + if rsdp.revision >= 2 { + let full_len = rsdp._length as usize; + if full_len < 36 || full_len > 256 { + return None; + } + let bytes_full = unsafe { core::slice::from_raw_parts(rsdp_ptr, full_len) }; + if bytes_full.iter().fold(0u8, |sum, &b| sum.wrapping_add(b)) != 0 { + return None; + } + } + + Some(rsdp) }) } diff --git a/src/acpi/madt/arch/x86.rs b/src/acpi/madt/arch/x86.rs index 4dc23883..c52e0ab4 100644 --- a/src/acpi/madt/arch/x86.rs +++ b/src/acpi/madt/arch/x86.rs @@ -10,7 +10,8 @@ use crate::{ }, cpu_set::LogicalCpuId, memory::{ - allocate_p2frame, Frame, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch, + allocate_p2frame, map_device_memory, Frame, KernelMapper, Page, PageFlags, + PhysicalAddress, RmmA, RmmArch, VirtualAddress, PAGE_SIZE, }, startup::AP_READY, @@ -20,6 +21,55 @@ use super::{Madt, MadtEntry}; const TRAMPOLINE: usize = 0x8000; static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline")); +const AP_STARTUP_TIMEOUT: u32 = 100_000_000; + +fn wait_for_ap_flag(flag: *mut u64, description: &str, apic_id: u32) -> bool { + let mut timeout = AP_STARTUP_TIMEOUT; + while unsafe { (*flag.cast::()).load(Ordering::SeqCst) } == 0 { + hint::spin_loop(); + timeout -= 1; + if timeout == 0 { + debug!("AP {} {} timed out", apic_id, description); + return false; + } + } + true +} + +fn wait_for_kernel_ap_ready(apic_id: u32) -> bool { + let mut timeout = AP_STARTUP_TIMEOUT; + while !AP_READY.load(Ordering::SeqCst) { + hint::spin_loop(); + timeout -= 1; + if timeout == 0 { + debug!("AP {} kernel startup timed out", apic_id); + return false; + } + } + true +} + +fn current_x2apic_processor_uid(madt: &Madt, apic_id: u32) -> Option { + madt.iter().find_map(|entry| match entry { + MadtEntry::LocalX2Apic(x2apic) if x2apic.x2apic_id == apic_id => Some(x2apic.processor_uid), + _ => None, + }) +} + +fn apply_lapic_address_override(local_apic: &mut crate::arch::device::local_apic::LocalApic, addr: u64) { + if local_apic.x2 || addr == 0 { + return; + } + + let Ok(physaddr) = usize::try_from(addr) else { + warn!("Ignoring LAPIC address override {:#x}: does not fit host usize", addr); + return; + }; + + let mapped = unsafe { map_device_memory(PhysicalAddress::new(physaddr), 4096) }.data(); + local_apic.address = mapped; + debug!("Applied LAPIC address override: {:#x}", addr); +} pub(super) fn init(madt: Madt) { let local_apic = unsafe { the_local_apic() }; @@ -35,18 +85,19 @@ pub(super) fn init(madt: Madt) { return; } - // Map trampoline + // Map trampoline writable and executable (trampoline page holds both code + // and AP argument data — AP writes ap_ready on the same page, so W^X is + // not possible without splitting code/data across pages). let trampoline_frame = Frame::containing(PhysicalAddress::new(TRAMPOLINE)); let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE)); let (result, page_table_physaddr) = unsafe { - //TODO: do not have writable and executable! let mut mapper = KernelMapper::lock_rw(); let result = mapper .map_phys( trampoline_page.start_address(), trampoline_frame.base(), - PageFlags::new().execute(true).write(true), + PageFlags::new().write(true).execute(true), ) .expect("failed to map trampoline"); @@ -75,12 +126,11 @@ pub(super) fn init(madt: Madt) { let cpu_id = LogicalCpuId::next(); // Allocate a stack - let stack_start = RmmA::phys_to_virt( - allocate_p2frame(4) - .expect("no more frames in acpi stack_start") - .base(), - ) - .data(); + let Some(stack_frame) = allocate_p2frame(4) else { + warn!("Unable to allocate AP bootstrap stack for local APIC {}", ap_local_apic.id); + continue; + }; + let stack_start = RmmA::phys_to_virt(stack_frame.base()).data(); let stack_end = stack_start + (PAGE_SIZE << 4); let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end); @@ -138,15 +188,168 @@ pub(super) fn init(madt: Madt) { } // Wait for trampoline ready - while unsafe { (*ap_ready.cast::()).load(Ordering::SeqCst) } == 0 { + let ready = wait_for_ap_flag(ap_ready, "trampoline startup", u32::from(ap_local_apic.id)); + let kernel_ready = ready && wait_for_kernel_ap_ready(u32::from(ap_local_apic.id)); + + if !kernel_ready { + warn!("Skipping local APIC {} after startup timeout", ap_local_apic.id); + } + + RmmA::invalidate_all(); + } + } else if let MadtEntry::LocalX2Apic(ap_x2apic) = madt_entry { + let x2id = ap_x2apic.x2apic_id; + let x2flags = ap_x2apic.flags; + if x2id == me.get() { + debug!(" This is my local x2APIC"); + } else if x2flags & 1 == 1 { + let cpu_id = LogicalCpuId::next(); + + let Some(stack_frame) = allocate_p2frame(4) else { + warn!( + "Unable to allocate AP bootstrap stack for x2APIC {}", + x2id + ); + continue; + }; + let stack_start = RmmA::phys_to_virt(stack_frame.base()).data(); + let stack_end = stack_start + (PAGE_SIZE << 4); + + let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end); + let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id); + + let args = KernelArgsAp { + stack_end: stack_end as *mut u8, + cpu_id, + pcr_ptr, + idt_ptr, + }; + + let ap_ready = (TRAMPOLINE + 8) as *mut u64; + let ap_args_ptr = unsafe { ap_ready.add(1) }; + let ap_page_table = unsafe { ap_ready.add(2) }; + let ap_code = unsafe { ap_ready.add(3) }; + + unsafe { + ap_ready.write(0); + ap_args_ptr.write(&args as *const _ as u64); + ap_page_table.write(page_table_physaddr as u64); + #[expect(clippy::fn_to_numeric_cast)] + ap_code.write(kstart_ap as u64); + core::arch::asm!(""); + }; + AP_READY.store(false, Ordering::SeqCst); + + // Same ICR delivery-mode bits are used by xAPIC and x2APIC; only the + // destination field encoding changes between the MMIO and MSR forms. + const ICR_INIT_ASSERT: u64 = 0x4500; + const ICR_STARTUP: u64 = 0x4600; + + // ICR bits 10:8 = 0b101 (INIT), bit 14 = level assert. + // Send INIT IPI (x2APIC always uses 32-bit APIC ID in bits 32-63) + { + let mut icr = ICR_INIT_ASSERT; + icr |= u64::from(x2id) << 32; + local_apic.set_icr(icr); + } + + // Wait for INIT delivery (~10 μs de-assert window per Intel SDM) + for _ in 0..100_000 { hint::spin_loop(); } - while !AP_READY.load(Ordering::SeqCst) { + + // ICR bits 10:8 = 0b110 (STARTUP), bit 14 = level assert. + // Send STARTUP IPI + { + let ap_segment = (TRAMPOLINE >> 12) & 0xFF; + let mut icr = ICR_STARTUP | ap_segment as u64; + icr |= u64::from(x2id) << 32; + local_apic.set_icr(icr); + } + + // Wait ~200 μs, then send second STARTUP IPI per the universal + // startup algorithm. + for _ in 0..2_000_000 { hint::spin_loop(); } + { + let ap_segment = (TRAMPOLINE >> 12) & 0xFF; + let mut icr = ICR_STARTUP | ap_segment as u64; + icr |= u64::from(x2id) << 32; + local_apic.set_icr(icr); + } + + // Known limitation: cpu_id and per-CPU bootstrap state are allocated + // before the timeout checks, so a timed-out AP still consumes a + // logical CPU slot until startup rollback/teardown is implemented. + let ready = wait_for_ap_flag(ap_ready, "trampoline startup", x2id); + let kernel_ready = ready && wait_for_kernel_ap_ready(x2id); + + if !kernel_ready { + warn!("Skipping x2APIC {} after startup timeout", x2id); + } RmmA::invalidate_all(); } + } else if let MadtEntry::LocalApicNmi(nmi) = madt_entry { + let target_id = nmi.processor; + let nmi_pin = nmi.nmi_pin; + let nmi_flags = nmi.flags; + if target_id == 0xFF { + debug!( + " NMI: all processors, pin={}, flags={:#x}", + nmi_pin, nmi_flags + ); + unsafe { + local_apic.set_lvt_nmi(nmi_pin, nmi_flags); + } + } else { + let my_apic_id = local_apic.id().get() as u8; + if target_id == my_apic_id { + debug!( + " NMI: processor {}, pin={}, flags={:#x}", + target_id, nmi_pin, nmi_flags + ); + unsafe { + local_apic.set_lvt_nmi(nmi_pin, nmi_flags); + } + } + } + } else if let MadtEntry::LocalX2ApicNmi(nmi) = madt_entry { + let target_uid = nmi.processor_uid; + let nmi_pin = nmi.nmi_pin; + let nmi_flags = nmi.flags; + if target_uid == 0xFFFFFFFF { + debug!( + " x2APIC NMI: all processors, pin={}, flags={:#x}", + nmi_pin, nmi_flags + ); + unsafe { + local_apic.set_lvt_nmi(nmi_pin, nmi_flags); + } + } else { + let current_uid = current_x2apic_processor_uid(&madt, me.get()); + if current_uid == Some(target_uid) { + debug!( + " x2APIC NMI: uid {}, pin={}, flags={:#x}", + target_uid, nmi_pin, nmi_flags + ); + unsafe { + local_apic.set_lvt_nmi(nmi_pin, nmi_flags); + } + } else { + debug!( + " x2APIC NMI: skipping uid {} on current uid {:?}", + target_uid, current_uid + ); + } + } + } else if let MadtEntry::LapicAddressOverride(addr) = madt_entry { + let lapic_addr = addr.local_apic_address; + if lapic_addr != 0 { + debug!(" LAPIC address override: {:#x}", lapic_addr); + apply_lapic_address_override(local_apic, lapic_addr); + } } } diff --git a/src/acpi/madt/mod.rs b/src/acpi/madt/mod.rs index 3159b9c4..23551c64 100644 --- a/src/acpi/madt/mod.rs +++ b/src/acpi/madt/mod.rs @@ -146,6 +146,52 @@ pub struct MadtGicd { _reserved2: [u8; 3], } +/// MADT Local x2APIC (entry type 0x9) +/// Used by modern AMD and Intel platforms with APIC IDs >= 255. +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct MadtLocalX2Apic { + _reserved: u16, + pub x2apic_id: u32, + pub flags: u32, + pub processor_uid: u32, +} + +/// MADT Local APIC NMI (entry type 0x4) +/// Configures NMI routing to a processor's LINT0/LINT1 pin. +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct MadtLocalApicNmi { + pub processor: u8, // 0xFF = all processors + pub flags: u16, // bits 0-1: polarity, bits 2-3: trigger mode + pub nmi_pin: u8, // 0 = LINT0, 1 = LINT1 +} + +/// MADT Local APIC Address Override (entry type 0x5) +/// Provides 64-bit override for the 32-bit local APIC address. +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct MadtLapicAddressOverride { + _reserved: u16, + pub local_apic_address: u64, +} + +/// MADT Local x2APIC NMI (entry type 0xA) +/// x2APIC equivalent of type 0x4 for APIC IDs >= 255. +#[derive(Clone, Copy, Debug)] +#[repr(C, packed)] +pub struct MadtLocalX2ApicNmi { + _reserved: u16, + pub processor_uid: u32, // 0xFFFFFFFF = all processors + pub flags: u16, + pub nmi_pin: u8, // 0 = LINT0, 1 = LINT1 + _reserved2: u8, +} + +const _: () = assert!(size_of::() == 4); +const _: () = assert!(size_of::() == 10); +const _: () = assert!(size_of::() == 10); + /// MADT Entries #[derive(Debug)] #[allow(dead_code)] @@ -160,6 +206,14 @@ pub enum MadtEntry { InvalidGicc(usize), Gicd(&'static MadtGicd), InvalidGicd(usize), + LocalX2Apic(&'static MadtLocalX2Apic), + InvalidLocalX2Apic(usize), + LocalApicNmi(&'static MadtLocalApicNmi), + InvalidLocalApicNmi(usize), + LapicAddressOverride(&'static MadtLapicAddressOverride), + InvalidLapicAddressOverride(usize), + LocalX2ApicNmi(&'static MadtLocalX2ApicNmi), + InvalidLocalX2ApicNmi(usize), Unknown(u8), } @@ -176,6 +230,10 @@ impl Iterator for MadtIter { let entry_len = unsafe { *(self.sdt.data_address() as *const u8).add(self.i + 1) } as usize; + if entry_len < 2 { + return None; + } + if self.i + entry_len <= self.sdt.data_len() { let item = match entry_type { 0x0 => { @@ -224,6 +282,44 @@ impl Iterator for MadtIter { MadtEntry::InvalidGicd(entry_len) } } + 0x9 => { + if entry_len == size_of::() + 2 { + MadtEntry::LocalX2Apic(unsafe { + &*((self.sdt.data_address() + self.i + 2) as *const MadtLocalX2Apic) + }) + } else { + MadtEntry::InvalidLocalX2Apic(entry_len) + } + } + 0x4 => { + if entry_len == size_of::() + 2 { + MadtEntry::LocalApicNmi(unsafe { + &*((self.sdt.data_address() + self.i + 2) as *const MadtLocalApicNmi) + }) + } else { + MadtEntry::InvalidLocalApicNmi(entry_len) + } + } + 0x5 => { + if entry_len == size_of::() + 2 { + MadtEntry::LapicAddressOverride(unsafe { + &*((self.sdt.data_address() + self.i + 2) + as *const MadtLapicAddressOverride) + }) + } else { + MadtEntry::InvalidLapicAddressOverride(entry_len) + } + } + 0xA => { + if entry_len == size_of::() + 2 { + MadtEntry::LocalX2ApicNmi(unsafe { + &*((self.sdt.data_address() + self.i + 2) + as *const MadtLocalX2ApicNmi) + }) + } else { + MadtEntry::InvalidLocalX2ApicNmi(entry_len) + } + } _ => MadtEntry::Unknown(entry_type), }; diff --git a/src/acpi/mod.rs b/src/acpi/mod.rs index 59e35265..d4c81f11 100644 --- a/src/acpi/mod.rs +++ b/src/acpi/mod.rs @@ -10,6 +10,8 @@ use crate::memory::{KernelMapper, PageFlags, PhysicalAddress, RmmA, RmmArch}; use self::{hpet::Hpet, madt::Madt, rsdp::Rsdp, rsdt::Rsdt, rxsdt::Rxsdt, sdt::Sdt, xsdt::Xsdt}; +const MAX_SDT_SIZE: usize = 16 * 1024 * 1024; + #[cfg(target_arch = "aarch64")] mod gtdt; pub mod hpet; @@ -22,39 +24,79 @@ pub mod sdt; mod spcr; mod xsdt; -unsafe fn map_linearly(addr: PhysicalAddress, len: usize, mapper: &mut crate::memory::PageMapper) { +unsafe fn map_linearly( + addr: PhysicalAddress, + len: usize, + mapper: &mut crate::memory::PageMapper, +) -> bool { unsafe { let base = PhysicalAddress::new(crate::memory::round_down_pages(addr.data())); - let aligned_len = crate::memory::round_up_pages(len + (addr.data() - base.data())); + let Some(total_len) = len.checked_add(addr.data() - base.data()) else { + error!("ACPI table mapping length overflow at {:#x}", addr.data()); + return false; + }; + let aligned_len = crate::memory::round_up_pages(total_len); for page_idx in 0..aligned_len / crate::memory::PAGE_SIZE { - let (_, flush) = mapper + let Some((_virt, flush)) = mapper .map_linearly( base.add(page_idx * crate::memory::PAGE_SIZE), PageFlags::new(), ) - .expect("failed to linearly map SDT"); + else { + error!( + "failed to linearly map ACPI table page at {:#x}", + base.add(page_idx * crate::memory::PAGE_SIZE).data() + ); + return false; + }; flush.flush(); } + + true } } -pub fn get_sdt(sdt_address: PhysicalAddress, mapper: &mut KernelMapper) -> &'static Sdt { +pub fn get_sdt(sdt_address: PhysicalAddress, mapper: &mut KernelMapper) -> Option<&'static Sdt> { let sdt; unsafe { const SDT_SIZE: usize = size_of::(); - map_linearly(sdt_address, SDT_SIZE, mapper); + if !map_linearly(sdt_address, SDT_SIZE, mapper) { + return None; + } sdt = &*(RmmA::phys_to_virt(sdt_address).data() as *const Sdt); - map_linearly( + let total_len = sdt.length as usize; + if total_len < SDT_SIZE { + warn!( + "ACPI table {:?} at {:#x} shorter than header ({})", + sdt.signature, + sdt_address.data(), + total_len + ); + return None; + } + if total_len > MAX_SDT_SIZE { + warn!( + "ACPI table {:?} at {:#x} exceeds max supported size ({})", + sdt.signature, + sdt_address.data(), + total_len + ); + return None; + } + + if !map_linearly( sdt_address.add(SDT_SIZE), - sdt.length as usize - SDT_SIZE, + total_len - SDT_SIZE, mapper, - ); + ) { + return None; + } } - sdt + Some(sdt) } #[repr(C, packed)] @@ -95,7 +137,19 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) { if let Some(rsdp) = rsdp_opt { debug!("SDT address: {:#x}", rsdp.sdt_address().data()); - let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock_rw()); + let Some(rxsdt) = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock_rw()) else { + error!("Unable to map RSDT/XSDT header"); + return; + }; + + if !rxsdt.validate_checksum() { + warn!( + "Root ACPI table {:?} at {:#x} has invalid checksum; ignoring ACPI", + rxsdt.signature, + rsdp.sdt_address().data() + ); + return; + } let rxsdt = if let Some(rsdt) = Rsdt::new(rxsdt) { let mut initialized = false; @@ -132,12 +186,28 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) { // TODO: Don't touch ACPI tables in kernel? for sdt in rxsdt.iter() { - get_sdt(sdt, &mut KernelMapper::lock_rw()); + if get_sdt(sdt, &mut KernelMapper::lock_rw()).is_none() { + warn!("Skipping unreadable ACPI table at {:#x}", sdt.data()); + } } for sdt_address in rxsdt.iter() { + let Some(sdt) = get_sdt(sdt_address, &mut KernelMapper::lock_rw()) else { + warn!("Skipping ACPI table at {:#x}: unable to map safely", sdt_address.data()); + continue; + }; let sdt = &*(RmmA::phys_to_virt(sdt_address).data() as *const Sdt); + if !sdt.validate_checksum() { + let sig = &sdt.signature; + warn!( + "ACPI table {:?} at {:#x} has invalid checksum", + sig, + sdt_address.data() + ); + continue; + } + let signature = get_sdt_signature(sdt); if let Some(ref mut ptrs) = *(SDT_POINTERS.write()) { ptrs.insert(signature, sdt); @@ -198,8 +268,7 @@ macro_rules! find_one_sdt { } pub fn get_sdt_signature(sdt: &'static Sdt) -> SdtSignature { - let signature = - String::from_utf8(sdt.signature.to_vec()).expect("Error converting signature to string"); + let signature = String::from_utf8_lossy(&sdt.signature).into_owned(); (signature, sdt.oem_id, sdt.oem_table_id) } diff --git a/src/acpi/rsdp.rs b/src/acpi/rsdp.rs index f10c5ac9..571aeeec 100644 --- a/src/acpi/rsdp.rs +++ b/src/acpi/rsdp.rs @@ -1,5 +1,8 @@ use rmm::PhysicalAddress; +const RSDP_V1_SIZE: usize = 20; +const RSDP_V2_MIN_SIZE: usize = size_of::(); + /// RSDP #[derive(Copy, Clone, Debug)] #[repr(C, packed)] @@ -17,10 +20,33 @@ pub struct Rsdp { impl Rsdp { pub unsafe fn get_rsdp(already_supplied_rsdp: Option<*const u8>) -> Option { - already_supplied_rsdp.map(|rsdp_ptr| { - // TODO: Validate - unsafe { *(rsdp_ptr as *const Rsdp) } - }) + let rsdp_ptr = already_supplied_rsdp?; + let rsdp = unsafe { *(rsdp_ptr as *const Rsdp) }; + + if rsdp.signature != *b"RSD PTR " { + warn!("RSDP signature invalid"); + return None; + } + + if !checksum_ok(rsdp_ptr, RSDP_V1_SIZE) { + warn!("RSDP base checksum invalid"); + return None; + } + + if rsdp.revision >= 2 { + let length = rsdp._length as usize; + if length < RSDP_V2_MIN_SIZE { + warn!("RSDP revision {} length {} too small", rsdp.revision, length); + return None; + } + + if !checksum_ok(rsdp_ptr, length) { + warn!("RSDP extended checksum invalid"); + return None; + } + } + + Some(rsdp) } /// Get the RSDT or XSDT address @@ -32,3 +58,8 @@ impl Rsdp { }) } } + +fn checksum_ok(ptr: *const u8, len: usize) -> bool { + let bytes = unsafe { core::slice::from_raw_parts(ptr, len) }; + bytes.iter().fold(0u8, |sum, &byte| sum.wrapping_add(byte)) == 0 +} diff --git a/src/acpi/sdt.rs b/src/acpi/sdt.rs index 83ff67da..f49b6212 100644 --- a/src/acpi/sdt.rs +++ b/src/acpi/sdt.rs @@ -24,4 +24,15 @@ impl Sdt { let header_size = size_of::(); total_size.saturating_sub(header_size) } + + /// Validate that the sum of all bytes in this table is zero (ACPI spec requirement). + /// Returns false if the length is too small or the checksum doesn't match. + pub fn validate_checksum(&self) -> bool { + let len = self.length as usize; + if len < size_of::() { + return false; + } + let bytes = unsafe { core::slice::from_raw_parts(self as *const _ as *const u8, len) }; + bytes.iter().fold(0u8, |sum, &b| sum.wrapping_add(b)) == 0 + } } diff --git a/src/arch/aarch64/start.rs b/src/arch/aarch64/start.rs index e1c8cfb4..65e3fe33 100644 --- a/src/arch/aarch64/start.rs +++ b/src/arch/aarch64/start.rs @@ -91,7 +91,7 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs) -> ! { dtb::serial::init_early(dtb); } - info!("Redox OS starting..."); + info!("RedBear OS starting..."); args.print(); // Initialize RMM diff --git a/src/arch/riscv64/start.rs b/src/arch/riscv64/start.rs index 2551968f..a825536a 100644 --- a/src/arch/riscv64/start.rs +++ b/src/arch/riscv64/start.rs @@ -97,7 +97,7 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs) -> ! { init_early(dtb); } - info!("Redox OS starting..."); + info!("RedBear OS starting..."); args.print(); if let Some(dtb) = &dtb { diff --git a/src/arch/x86_shared/cpuid.rs b/src/arch/x86_shared/cpuid.rs index b3683125..be7db1be 100644 --- a/src/arch/x86_shared/cpuid.rs +++ b/src/arch/x86_shared/cpuid.rs @@ -1,11 +1,8 @@ use raw_cpuid::{CpuId, CpuIdResult, ExtendedFeatures, FeatureInfo}; +#[cfg(target_arch = "x86_64")] pub fn cpuid() -> CpuId { - // FIXME check for cpuid availability during early boot and error out if it doesn't exist. CpuId::with_cpuid_fn(|a, c| { - #[cfg(target_arch = "x86")] - let result = unsafe { core::arch::x86::__cpuid_count(a, c) }; - #[cfg(target_arch = "x86_64")] let result = unsafe { core::arch::x86_64::__cpuid_count(a, c) }; CpuIdResult { eax: result.eax, @@ -16,6 +13,19 @@ pub fn cpuid() -> CpuId { }) } +#[cfg(target_arch = "x86")] +pub fn cpuid() -> CpuId { + CpuId::with_cpuid_fn(|a, c| { + let result = unsafe { core::arch::x86::__cpuid_count(a, c) }; + CpuIdResult { + eax: result.eax, + ebx: result.ebx, + ecx: result.ecx, + edx: result.edx, + } + }) +} + #[cfg_attr(not(target_arch = "x86_64"), expect(dead_code))] pub fn feature_info() -> FeatureInfo { cpuid() diff --git a/src/arch/x86_shared/device/ioapic.rs b/src/arch/x86_shared/device/ioapic.rs index fb66d3bf..5938540f 100644 --- a/src/arch/x86_shared/device/ioapic.rs +++ b/src/arch/x86_shared/device/ioapic.rs @@ -14,6 +14,9 @@ pub struct IoApicRegs { pointer: *const u32, } impl IoApicRegs { + fn redirection_index_valid(&mut self, idx: u8) -> bool { + idx <= self.max_redirection_table_entries() + } fn ioregsel(&self) -> *const u32 { self.pointer } @@ -44,21 +47,28 @@ impl IoApicRegs { pub fn read_ioapicver(&mut self) -> u32 { self.read_reg(0x01) } - pub fn read_ioredtbl(&mut self, idx: u8) -> u64 { - assert!(idx < 24); + pub fn read_ioredtbl(&mut self, idx: u8) -> Option { + if !self.redirection_index_valid(idx) { + warn!("IOAPIC read_ioredtbl index {} out of range", idx); + return None; + } let lo = self.read_reg(0x10 + idx * 2); let hi = self.read_reg(0x10 + idx * 2 + 1); - u64::from(lo) | (u64::from(hi) << 32) + Some(u64::from(lo) | (u64::from(hi) << 32)) } - pub fn write_ioredtbl(&mut self, idx: u8, value: u64) { - assert!(idx < 24); + pub fn write_ioredtbl(&mut self, idx: u8, value: u64) -> bool { + if !self.redirection_index_valid(idx) { + warn!("IOAPIC write_ioredtbl index {} out of range", idx); + return false; + } let lo = value as u32; let hi = (value >> 32) as u32; self.write_reg(0x10 + idx * 2, lo); self.write_reg(0x10 + idx * 2 + 1, hi); + true } pub fn max_redirection_table_entries(&mut self) -> u8 { @@ -92,17 +102,22 @@ impl IoApic { } /// Map an interrupt vector to a physical local APIC ID of a processor (thus physical mode). #[allow(dead_code)] - pub fn map(&self, idx: u8, info: MapInfo) { - self.regs.lock().write_ioredtbl(idx, info.as_raw()) + pub fn map(&self, idx: u8, info: MapInfo) -> bool { + let Some(raw) = info.as_raw() else { + return false; + }; + self.regs.lock().write_ioredtbl(idx, raw) } pub fn set_mask(&self, gsi: u32, mask: bool) { let idx = (gsi - self.gsi_start) as u8; let mut guard = self.regs.lock(); - let mut reg = guard.read_ioredtbl(idx); + let Some(mut reg) = guard.read_ioredtbl(idx) else { + return; + }; reg &= !(1 << 16); reg |= u64::from(mask) << 16; - guard.write_ioredtbl(idx, reg); + let _ = guard.write_ioredtbl(idx, reg); } } @@ -149,19 +164,21 @@ pub struct MapInfo { } impl MapInfo { - pub fn as_raw(&self) -> u64 { - assert!(self.vector >= 0x20); - assert!(self.vector <= 0xFE); + pub fn as_raw(&self) -> Option { + if !(0x20..=0xFE).contains(&self.vector) { + warn!("Refusing to map IOAPIC vector outside valid range: {:#x}", self.vector); + return None; + } // TODO: Check for reserved fields. - (u64::from(self.dest.get()) << 56) + Some((u64::from(self.dest.get()) << 56) | (u64::from(self.mask) << 16) | ((self.trigger_mode as u64) << 15) | ((self.polarity as u64) << 13) | ((self.dest_mode as u64) << 11) | ((self.delivery_mode as u64) << 8) - | u64::from(self.vector) + | u64::from(self.vector)) } } @@ -175,7 +192,7 @@ impl fmt::Debug for IoApic { let count = guard.max_redirection_table_entries(); f.debug_list() - .entries((0..count).map(|i| guard.read_ioredtbl(i))) + .entries((0..=count).filter_map(|i| guard.read_ioredtbl(i))) .finish() } } @@ -237,11 +254,14 @@ pub unsafe fn handle_ioapic(madt_ioapic: &'static MadtIoApic) { let ioapic_registers = virt.data() as *const u32; let ioapic = IoApic::new(ioapic_registers, madt_ioapic.gsi_base); - assert_eq!( - ioapic.regs.lock().id(), - madt_ioapic.id, - "mismatched ACPI MADT I/O APIC ID, and the ID reported by the I/O APIC" - ); + let detected_id = ioapic.regs.lock().id(); + if detected_id != madt_ioapic.id { + warn!( + "mismatched ACPI MADT I/O APIC ID: MADT={}, IOAPIC={}; continuing with detected hardware", + madt_ioapic.id, + detected_id + ); + } (*IOAPICS.get()).get_or_insert_with(Vec::new).push(ioapic); } @@ -310,11 +330,14 @@ pub unsafe fn init() { } } } - println!( - "I/O APICs: {:?}, overrides: {:?}", - ioapics(), - src_overrides() - ); + // Sanitize all IOAPIC redirection entries: mask everything first to clear + // stale firmware/emulator defaults. Entries are selectively unmasked below. + for ioapic in ioapics().iter() { + let max_idx = ioapic.count; + for idx in 0..=max_idx { + ioapic.set_mask(ioapic.gsi_start + u32::from(idx), true); + } + } // map the legacy PC-compatible IRQs (0-15) to 32-47, just like we did with 8259 PIC (if it // wouldn't have been disabled due to this I/O APIC) @@ -329,7 +352,6 @@ pub unsafe fn init() { .iter() .any(|over| over.bus_irq == legacy_irq) { - // there's an IRQ conflict, making this legacy IRQ inaccessible. continue; } ( @@ -349,7 +371,6 @@ pub unsafe fn init() { let redir_tbl_index = (gsi - apic.gsi_start) as u8; let map_info = MapInfo { - // only send to the BSP dest: bsp_apic_id, dest_mode: DestinationMode::Physical, delivery_mode: DeliveryMode::Fixed, @@ -366,7 +387,31 @@ pub unsafe fn init() { }, vector: 32 + legacy_irq, }; - apic.map(redir_tbl_index, map_info); + if !apic.map(redir_tbl_index, map_info) { + warn!( + "Unable to map legacy IRQ {} (GSI {}) through IOAPIC index {}", + legacy_irq, gsi, redir_tbl_index + ); + } + + // IRQ 0 (timer) is often overridden to GSI 2, but some platforms + // (including QEMU) route the HPET timer directly to GSI 0 regardless + // of the MADT override. Map GSI 0 as well so the timer works on both + // virtual and physical hardware. + if legacy_irq == 0 && gsi != u32::from(legacy_irq) { + if let Some(apic0) = find_ioapic(u32::from(legacy_irq)) { + let idx0 = (u32::from(legacy_irq) - apic0.gsi_start) as u8; + apic0.map(idx0, MapInfo { + dest: bsp_apic_id, + dest_mode: DestinationMode::Physical, + delivery_mode: DeliveryMode::Fixed, + mask: false, + polarity: ApicPolarity::ActiveHigh, + trigger_mode: ApicTriggerMode::Edge, + vector: 32, + }); + } + } } println!( "I/O APICs: {:?}, overrides: {:?}", @@ -406,7 +451,7 @@ fn resolve(irq: u8) -> u32 { fn find_ioapic(gsi: u32) -> Option<&'static IoApic> { ioapics() .iter() - .find(|apic| gsi >= apic.gsi_start && gsi < apic.gsi_start + u32::from(apic.count)) + .find(|apic| gsi >= apic.gsi_start && gsi <= apic.gsi_start + u32::from(apic.count)) } pub unsafe fn mask(irq: u8) { diff --git a/src/arch/x86_shared/device/local_apic.rs b/src/arch/x86_shared/device/local_apic.rs index b6afe02a..e256d160 100644 --- a/src/arch/x86_shared/device/local_apic.rs +++ b/src/arch/x86_shared/device/local_apic.rs @@ -103,7 +103,7 @@ impl LocalApic { ApicId::new(if self.x2 { unsafe { rdmsr(IA32_X2APIC_APICID) as u32 } } else { - unsafe { self.read(0x20) } + unsafe { self.read(0x20) >> 24 } }) } @@ -126,7 +126,14 @@ impl LocalApic { pub fn set_icr(&mut self, value: u64) { if self.x2 { unsafe { + const PENDING: u32 = 1 << 12; + while (rdmsr(IA32_X2APIC_ICR) as u32) & PENDING == PENDING { + core::hint::spin_loop(); + } wrmsr(IA32_X2APIC_ICR, value); + while (rdmsr(IA32_X2APIC_ICR) as u32) & PENDING == PENDING { + core::hint::spin_loop(); + } } } else { unsafe { @@ -256,6 +263,30 @@ impl LocalApic { } } } + /// Configure LVT NMI entry. `pin` is 0 for LINT0, 1 for LINT1. + /// `flags` encodes polarity and trigger mode per MADT NMI spec. + pub unsafe fn set_lvt_nmi(&mut self, pin: u8, flags: u16) { + let lvt_value = (flags as u32) | 0x400; /* bit 10 = NMI delivery mode, masked off if flags don't set it */ + unsafe { + match pin { + 0 => { + if self.x2 { + wrmsr(IA32_X2APIC_LVT_LINT0, u64::from(lvt_value)); + } else { + self.write(0x350, lvt_value); + } + } + 1 => { + if self.x2 { + wrmsr(IA32_X2APIC_LVT_LINT1, u64::from(lvt_value)); + } else { + self.write(0x360, lvt_value); + } + } + _ => {} + } + } + } unsafe fn setup_error_int(&mut self) { unsafe { let vector = 49u32; diff --git a/src/arch/x86_shared/device/mod.rs b/src/arch/x86_shared/device/mod.rs index 6f417706..acb14d72 100644 --- a/src/arch/x86_shared/device/mod.rs +++ b/src/arch/x86_shared/device/mod.rs @@ -23,8 +23,7 @@ pub unsafe fn init() { } } pub unsafe fn init_after_acpi() { - // this will disable the IOAPIC if needed. - //ioapic::init(mapper); + unsafe { ioapic::init() }; } unsafe fn init_hpet() -> bool { diff --git a/src/arch/x86_shared/interrupt/exception.rs b/src/arch/x86_shared/interrupt/exception.rs index 7725a45d..fbba75c7 100644 --- a/src/arch/x86_shared/interrupt/exception.rs +++ b/src/arch/x86_shared/interrupt/exception.rs @@ -1,3 +1,5 @@ +use core::sync::atomic::{AtomicBool, Ordering}; + use syscall::Exception; use x86::irq::PageFaultError; @@ -10,6 +12,20 @@ use crate::{ syscall::flag::*, }; +static NMI_IN_PROGRESS: AtomicBool = AtomicBool::new(false); + +unsafe fn nmi_raw_serial_write(s: &[u8]) { + use crate::syscall::io::{Io, Pio}; + let mut com1 = Pio::::new(0x3F8); + let lsr = Pio::::new(0x3F8 + 5); + for &b in s { + while lsr.read() & (1 << 5) == 0 { + core::hint::spin_loop(); + } + com1.write(b); + } +} + interrupt_stack!(divide_by_zero, |stack| { println!("Divide by zero"); stack.trace(); @@ -55,9 +71,24 @@ interrupt_stack!(non_maskable, @paranoid, |stack| { #[cfg(not(all(target_arch = "x86_64", feature = "profiling")))] { - // TODO: This will likely deadlock - println!("Non-maskable interrupt"); - stack.dump(); + if NMI_IN_PROGRESS.swap(true, Ordering::SeqCst) { + return; + } + unsafe { + nmi_raw_serial_write(b"Non-maskable interrupt\n"); + nmi_raw_serial_write(b" RIP: "); + // Print RIP as hex manually to avoid formatting locks + let rip = stack.iret.rip; + let mut buf = [0u8; 19]; + buf[0] = b'0'; buf[1] = b'x'; + for i in 0..16 { + let nibble = ((rip >> (60 - i * 4)) & 0xf) as u8; + buf[2 + i] = if nibble < 10 { b'0' + nibble } else { b'a' + nibble - 10 }; + } + buf[18] = b'\n'; + nmi_raw_serial_write(&buf); + } + NMI_IN_PROGRESS.store(false, Ordering::SeqCst); } }); diff --git a/src/arch/x86_shared/start.rs b/src/arch/x86_shared/start.rs index 7a7c0ae8..62f9523c 100644 --- a/src/arch/x86_shared/start.rs +++ b/src/arch/x86_shared/start.rs @@ -91,7 +91,7 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { // Set up graphical debug graphical_debug::init(args.env()); - info!("Redox OS starting..."); + info!("RedBear OS starting..."); args.print(); // Set up GDT @@ -127,16 +127,21 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! { // Initialize devices device::init(); + info!("kernel: device init complete (PIC + LAPIC)"); // Read ACPI tables, starts APs if cfg!(feature = "acpi") { crate::acpi::init(args.acpi_rsdp()); + info!("kernel: ACPI tables parsed"); + device::init_after_acpi(); + info!("kernel: IOAPIC init complete"); } crate::profiling::init(); // Initialize all of the non-core devices not otherwise needed to complete initialization device::init_noncore(); + info!("kernel: timer init complete, entering userspace"); args.bootstrap() }; diff --git a/src/context/memory.rs b/src/context/memory.rs index 94519448..0db1de53 100644 --- a/src/context/memory.rs +++ b/src/context/memory.rs @@ -927,8 +927,8 @@ impl UserGrants { .take_while(move |(base, info)| PageSpan::new(**base, info.page_count).intersects(span)) .map(|(base, info)| (*base, info)) } - /// Return a free region with the specified size - // TODO: Alignment (x86_64: 4 KiB, 2 MiB, or 1 GiB). + /// Return a free region with the specified size, optionally aligned to a power-of-two + /// boundary (x86_64 supports 4 KiB, 2 MiB, or 1 GiB pages). // TODO: Support finding grant close to a requested address? pub fn find_free_near( &self, @@ -936,29 +936,42 @@ impl UserGrants { page_count: usize, _near: Option, ) -> Option { - // Get first available hole, but do reserve the page starting from zero as most compiled - // languages cannot handle null pointers safely even if they point to valid memory. If an - // application absolutely needs to map the 0th page, they will have to do so explicitly via - // MAP_FIXED/MAP_FIXED_NOREPLACE. - // TODO: Allow explicitly allocating guard pages? Perhaps using mprotect or mmap with - // PROT_NONE? + self.find_free_near_aligned(min, page_count, _near, 0) + } + pub fn find_free_near_aligned( + &self, + min: usize, + page_count: usize, + _near: Option, + page_alignment: usize, + ) -> Option { + let alignment = if page_alignment == 0 { + PAGE_SIZE + } else { + assert!(page_alignment.is_power_of_two(), "page_alignment must be a power of two"); + page_alignment * PAGE_SIZE + }; let (hole_start, _hole_size) = self .holes .iter() .skip_while(|(hole_offset, hole_size)| hole_offset.data() + **hole_size <= min) .find(|(hole_offset, hole_size)| { - let avail_size = - if hole_offset.data() <= min && min <= hole_offset.data() + **hole_size { - **hole_size - (min - hole_offset.data()) - } else { - **hole_size - }; + let base = cmp::max(hole_offset.data(), min); + let aligned_base = (base + alignment - 1) & !(alignment - 1); + let avail_size = if aligned_base <= hole_offset.data() + **hole_size { + hole_offset.data() + **hole_size - aligned_base + } else { + 0 + }; page_count * PAGE_SIZE <= avail_size })?; - // Create new region + + let base = cmp::max(hole_start.data(), min); + let aligned_base = (base + alignment - 1) & !(alignment - 1); + Some(PageSpan::new( - Page::containing_address(VirtualAddress::new(cmp::max(hole_start.data(), min))), + Page::containing_address(VirtualAddress::new(aligned_base)), page_count, )) } diff --git a/src/event.rs b/src/event.rs index 7398145a..92e5793c 100644 --- a/src/event.rs +++ b/src/event.rs @@ -8,13 +8,14 @@ use crate::{ context, scheme::{self, SchemeExt, SchemeId}, sync::{ - CleanLockToken, LockToken, RwLock, RwLockReadGuard, RwLockWriteGuard, WaitQueue, L0, L1, L2, + CleanLockToken, LockToken, Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard, + WaitCondition, WaitQueue, L0, L1, L2, }, syscall::{ data::Event, - error::{Error, Result, EBADF}, - flag::EventFlags, - usercopy::UserSliceWo, + error::{Error, Result, EAGAIN, EBADF, EINVAL, EINTR}, + flag::{EVENT_READ, EVENT_WRITE, EventFlags}, + usercopy::{UserSliceRo, UserSliceWo}, }, }; @@ -25,6 +26,17 @@ pub struct EventQueue { queue: WaitQueue, } +const EVENTFD_COUNTER_MAX: u64 = u64::MAX - 1; +const EVENTFD_TAG_BIT: usize = 1usize << (usize::BITS - 1); + +pub struct EventCounter { + id: usize, + counter: Mutex, + read_condition: WaitCondition, + write_condition: WaitCondition, + semaphore: bool, +} + impl EventQueue { pub fn new(id: EventQueueId) -> EventQueue { EventQueue { @@ -91,19 +103,146 @@ impl EventQueue { } } +impl EventCounter { + pub fn new(id: usize, init: u64, semaphore: bool) -> EventCounter { + EventCounter { + id, + counter: Mutex::new(init), + read_condition: WaitCondition::new(), + write_condition: WaitCondition::new(), + semaphore, + } + } + + pub fn is_readable(&self, token: &mut CleanLockToken) -> bool { + *self.counter.lock(token.token()) > 0 + } + + pub fn is_writable(&self, token: &mut CleanLockToken) -> bool { + *self.counter.lock(token.token()) < EVENTFD_COUNTER_MAX + } + + pub fn read(&self, buf: UserSliceWo, block: bool, token: &mut CleanLockToken) -> Result { + if buf.len() < core::mem::size_of::() { + return Err(Error::new(EINVAL)); + } + + loop { + let counter = self.counter.lock(token.token()); + let (mut counter, mut token) = counter.into_split(); + + if *counter > 0 { + let value = if self.semaphore { + *counter -= 1; + 1 + } else { + let value = *counter; + *counter = 0; + value + }; + + buf.limit(core::mem::size_of::()) + .ok_or(Error::new(EINVAL))? + .copy_from_slice(&value.to_ne_bytes())?; + + trigger_locked( + GlobalSchemes::Event.scheme_id(), + self.id, + EVENT_WRITE, + token.token(), + ); + self.write_condition.notify_locked(token.token()); + + return Ok(core::mem::size_of::()); + } + + if !block { + return Err(Error::new(EAGAIN)); + } + + if !self + .read_condition + .wait(counter, "EventCounter::read", &mut token) + { + return Err(Error::new(EINTR)); + } + } + } + + pub fn write(&self, buf: UserSliceRo, block: bool, token: &mut CleanLockToken) -> Result { + if buf.len() != core::mem::size_of::() { + return Err(Error::new(EINVAL)); + } + + let value = unsafe { buf.read_exact::()? }; + if value == u64::MAX { + return Err(Error::new(EINVAL)); + } + + loop { + let counter = self.counter.lock(token.token()); + let (mut counter, mut token) = counter.into_split(); + + if EVENTFD_COUNTER_MAX - *counter >= value { + let was_zero = *counter == 0; + *counter += value; + + if was_zero && value != 0 { + trigger_locked( + GlobalSchemes::Event.scheme_id(), + self.id, + EVENT_READ, + token.token(), + ); + self.read_condition.notify_locked(token.token()); + } + + return Ok(core::mem::size_of::()); + } + + if !block { + return Err(Error::new(EAGAIN)); + } + + if !self + .write_condition + .wait(counter, "EventCounter::write", &mut token) + { + return Err(Error::new(EINTR)); + } + } + } + + pub fn into_drop(self, _token: LockToken<'_, L1>) { + drop(self); + } +} + pub type EventQueueList = HashMap>; +pub type EventCounterList = HashMap>; // Next queue id static NEXT_QUEUE_ID: AtomicUsize = AtomicUsize::new(0); +static NEXT_COUNTER_ID: AtomicUsize = AtomicUsize::new(0); /// Get next queue id pub fn next_queue_id() -> EventQueueId { EventQueueId::from(NEXT_QUEUE_ID.fetch_add(1, Ordering::SeqCst)) } +pub fn next_counter_id() -> usize { + EVENTFD_TAG_BIT | NEXT_COUNTER_ID.fetch_add(1, Ordering::SeqCst) +} + +pub fn is_counter_id(id: usize) -> bool { + id & EVENTFD_TAG_BIT != 0 +} + // Current event queues static QUEUES: RwLock = RwLock::new(EventQueueList::with_hasher(DefaultHashBuilder::new())); +static COUNTERS: RwLock = + RwLock::new(EventCounterList::with_hasher(DefaultHashBuilder::new())); /// Get the event queues list, const pub fn queues(token: LockToken<'_, L0>) -> RwLockReadGuard<'_, L2, EventQueueList> { @@ -115,6 +254,14 @@ pub fn queues_mut(token: LockToken<'_, L0>) -> RwLockWriteGuard<'_, L2, EventQue QUEUES.write(token) } +pub fn counters(token: LockToken<'_, L0>) -> RwLockReadGuard<'_, L2, EventCounterList> { + COUNTERS.read(token) +} + +pub fn counters_mut(token: LockToken<'_, L0>) -> RwLockWriteGuard<'_, L2, EventCounterList> { + COUNTERS.write(token) +} + #[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct RegKey { pub scheme: SchemeId, diff --git a/src/scheme/event.rs b/src/scheme/event.rs index 36efe5b2..c64b6bd0 100644 --- a/src/scheme/event.rs +++ b/src/scheme/event.rs @@ -1,9 +1,12 @@ -use alloc::sync::Arc; +use alloc::{sync::Arc, vec::Vec}; use syscall::{EventFlags, O_NONBLOCK}; use crate::{ context::file::InternalFlags, - event::{next_queue_id, queues, queues_mut, EventQueue, EventQueueId}, + event::{ + EventCounter, EventQueue, EventQueueId, counters, counters_mut, is_counter_id, + next_counter_id, next_queue_id, queues, queues_mut, + }, sync::CleanLockToken, syscall::{ data::Event, @@ -25,7 +28,7 @@ impl KernelScheme for EventScheme { fn kopenat( &self, id: usize, - _user_buf: StrOrBytes, + user_buf: StrOrBytes, _flags: usize, _fcntl_flags: u32, _ctx: CallerCtx, @@ -34,13 +37,53 @@ impl KernelScheme for EventScheme { if id != SCHEME_ROOT_ID { return Err(Error::new(EACCES)); } - let id = next_queue_id(); - queues_mut(token.token()).insert(id, Arc::new(EventQueue::new(id))); - Ok(OpenResult::SchemeLocal(id.get(), InternalFlags::empty())) + let path = user_buf.as_str().or(Err(Error::new(EINVAL)))?; + let path = path.trim_matches('/'); + + if path.is_empty() { + let id = next_queue_id(); + queues_mut(token.token()).insert(id, Arc::new(EventQueue::new(id))); + return Ok(OpenResult::SchemeLocal(id.get(), InternalFlags::empty())); + } + + let parts: Vec<&str> = path.split('/').collect(); + if matches!(parts.first(), Some(&"eventfd")) { + let init = match parts.get(1) { + Some(value) => value.parse::().map_err(|_| Error::new(EINVAL))?, + None => 0_u64, + }; + if init > u32::MAX as u64 { + return Err(Error::new(EINVAL)); + } + let semaphore = match parts.get(2) { + Some(value) => match *value { + "0" => Ok(false), + "1" => Ok(true), + _ => Err(Error::new(EINVAL)), + }?, + None => false, + }; + + let id = next_counter_id(); + counters_mut(token.token()).insert(id, Arc::new(EventCounter::new(id, init, semaphore))); + return Ok(OpenResult::SchemeLocal(id, InternalFlags::empty())); + } + + Err(Error::new(ENOENT)) } fn close(&self, id: usize, token: &mut CleanLockToken) -> Result<()> { + if is_counter_id(id) { + let counter = counters_mut(token.token()) + .remove(&id) + .ok_or(Error::new(EBADF))?; + if let Some(counter) = Arc::into_inner(counter) { + counter.into_drop(token.downgrade()); + } + return Ok(()); + } + let id = EventQueueId::from(id); let queue = queues_mut(token.token()) .remove(&id) @@ -59,6 +102,15 @@ impl KernelScheme for EventScheme { _stored_flags: u32, token: &mut CleanLockToken, ) -> Result { + if is_counter_id(id) { + let counter = { + let handles = counters(token.token()); + let handle = handles.get(&id).ok_or(Error::new(EBADF))?; + handle.clone() + }; + return counter.read(buf, flags & O_NONBLOCK as u32 == 0, token); + } + let id = EventQueueId::from(id); let queue = { @@ -74,10 +126,19 @@ impl KernelScheme for EventScheme { &self, id: usize, buf: UserSliceRo, - _flags: u32, + flags: u32, _stored_flags: u32, token: &mut CleanLockToken, ) -> Result { + if is_counter_id(id) { + let counter = { + let handles = counters(token.token()); + let handle = handles.get(&id).ok_or(Error::new(EBADF))?; + handle.clone() + }; + return counter.write(buf, flags & O_NONBLOCK as u32 == 0, token); + } + let id = EventQueueId::from(id); let queue = { @@ -98,8 +159,12 @@ impl KernelScheme for EventScheme { Ok(events_written * size_of::()) } - fn kfpath(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result { - buf.copy_common_bytes_from_slice(b"/scheme/event/") + fn kfpath(&self, id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result { + if is_counter_id(id) { + buf.copy_common_bytes_from_slice(b"/scheme/event/eventfd") + } else { + buf.copy_common_bytes_from_slice(b"/scheme/event/") + } } fn fevent( @@ -108,6 +173,23 @@ impl KernelScheme for EventScheme { flags: EventFlags, token: &mut CleanLockToken, ) -> Result { + if is_counter_id(id) { + let counter = { + let handles = counters(token.token()); + let handle = handles.get(&id).ok_or(Error::new(EBADF))?; + handle.clone() + }; + + let mut ready = EventFlags::empty(); + if flags.contains(EventFlags::EVENT_READ) && counter.is_readable(token) { + ready |= EventFlags::EVENT_READ; + } + if flags.contains(EventFlags::EVENT_WRITE) && counter.is_writable(token) { + ready |= EventFlags::EVENT_WRITE; + } + return Ok(ready); + } + let id = EventQueueId::from(id); let queue = {