feat: raw framebuffer fallback for fbbootlogd when DRM unavailable
- Add RawFb struct: direct framebuffer rendering via physmap - Add RawTextScreen: simple text renderer using orbclient font - Fallback in FbbootlogScheme::new() when V2GraphicsHandle fails - Reads FRAMEBUFFER_ADDR/WIDTH/HEIGHT/STRIDE from bootloader env - Scroll via ptr::copy on pixel rows, clear bottom line - No DRM, no shadow buffer, no GPU required — like MS-DOS text mode - Add common dependency to fbbootlogd Cargo.toml
This commit is contained in:
@@ -12,7 +12,6 @@ cc = "1.0"
|
||||
toml = "0.8"
|
||||
|
||||
[dependencies]
|
||||
acpi_ext = { package = "acpi", git = "https://gitlab.redox-os.org/redox-os/acpi.git", branch = "redox-6.x" }
|
||||
arrayvec = { version = "0.7.4", default-features = false }
|
||||
bitfield = "0.13.2"
|
||||
bitflags = "2"
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# Red Bear OS kernel patches applied via individual patch files
|
||||
.PHONY: all check
|
||||
|
||||
SOURCE:=$(dir $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
@@ -77,7 +77,6 @@ fn main() {
|
||||
}
|
||||
"x86_64" => {
|
||||
println!("cargo::rerun-if-changed=src/asm/x86_64/trampoline.asm");
|
||||
println!("cargo::rerun-if-changed=src/asm/x86_64/s3_wakeup.asm");
|
||||
|
||||
let status = Command::new("nasm")
|
||||
.arg("-f")
|
||||
@@ -90,18 +89,6 @@ fn main() {
|
||||
if !status.success() {
|
||||
panic!("nasm failed with exit status {}", status);
|
||||
}
|
||||
|
||||
let status = Command::new("nasm")
|
||||
.arg("-f")
|
||||
.arg("bin")
|
||||
.arg("-o")
|
||||
.arg(format!("{}/s3_wakeup", out_dir))
|
||||
.arg("src/asm/x86_64/s3_wakeup.asm")
|
||||
.status()
|
||||
.expect("failed to run nasm");
|
||||
if !status.success() {
|
||||
panic!("nasm failed with exit status {}", status);
|
||||
}
|
||||
}
|
||||
"riscv64" => {
|
||||
println!("cargo::rustc-cfg=dtb");
|
||||
|
||||
@@ -0,0 +1,591 @@
|
||||
use core::{
|
||||
hint,
|
||||
sync::atomic::{AtomicU8, Ordering},
|
||||
};
|
||||
|
||||
use x86::time::rdtsc;
|
||||
|
||||
use crate::{
|
||||
arch::{
|
||||
device::local_apic::the_local_apic,
|
||||
start::{kstart_ap, KernelArgsAp},
|
||||
},
|
||||
cpu_set::LogicalCpuId,
|
||||
memory::{
|
||||
allocate_p2frame, map_device_memory, Frame, KernelMapper, Page, PageFlags,
|
||||
PhysicalAddress, RmmA, RmmArch, VirtualAddress, PAGE_SIZE,
|
||||
},
|
||||
startup::AP_READY,
|
||||
};
|
||||
|
||||
use super::{Madt, MadtEntry};
|
||||
|
||||
use alloc::collections::BTreeSet;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
/// Maximum number of APIC→CPU mappings we track for NUMA topology.
|
||||
const MAX_APIC_MAPPINGS: usize = 256;
|
||||
|
||||
struct ApicMapping {
|
||||
apic_id: u32,
|
||||
cpu_id: LogicalCpuId,
|
||||
}
|
||||
|
||||
const UNINIT_MAPPING: ApicMapping = ApicMapping { apic_id: u32::MAX, cpu_id: LogicalCpuId::new(0) };
|
||||
|
||||
static mut APIC_MAPPINGS: [ApicMapping; MAX_APIC_MAPPINGS] = [UNINIT_MAPPING; MAX_APIC_MAPPINGS];
|
||||
static mut APIC_MAPPING_COUNT: usize = 0;
|
||||
|
||||
unsafe fn record_apic_mapping(apic_id: u32, cpu_id: LogicalCpuId) {
|
||||
let count = APIC_MAPPING_COUNT;
|
||||
if count < MAX_APIC_MAPPINGS {
|
||||
APIC_MAPPINGS[count] = ApicMapping { apic_id, cpu_id };
|
||||
APIC_MAPPING_COUNT = count + 1;
|
||||
}
|
||||
}
|
||||
|
||||
const AP_SPIN_LIMIT: u32 = 1_000_000;
|
||||
const TRAMPOLINE: usize = 0x8000;
|
||||
static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline"));
|
||||
|
||||
/// Estimate TSC frequency in MHz from CPUID.
|
||||
///
|
||||
/// Tries CPUID leaf 0x16 (Processor Frequency Information) first,
|
||||
/// then CPUID leaf 0x15 (TSC/Core Crystal Clock Ratio).
|
||||
/// Returns None if frequency cannot be determined.
|
||||
fn tsc_freq_mhz_cpuid() -> Option<u64> {
|
||||
let max_leaf = unsafe { core::arch::x86_64::__cpuid(0).eax as u32 };
|
||||
|
||||
// CPUID leaf 0x16: EAX = Core Base Frequency in MHz (Intel)
|
||||
if max_leaf >= 0x16 {
|
||||
let mhz = unsafe { core::arch::x86_64::__cpuid(0x16) }.eax as u64;
|
||||
if mhz > 0 {
|
||||
return Some(mhz);
|
||||
}
|
||||
}
|
||||
|
||||
// CPUID leaf 0x15: EAX = denominator, EBX = numerator, ECX = crystal Hz
|
||||
if max_leaf >= 0x15 {
|
||||
let res = unsafe { core::arch::x86_64::__cpuid(0x15) };
|
||||
let denom = res.eax as u64;
|
||||
let numer = res.ebx as u64;
|
||||
let crystal_hz = res.ecx as u64;
|
||||
if denom > 0 && numer > 0 && crystal_hz > 0 {
|
||||
// TSC freq = crystal_hz * numer / denom
|
||||
let tsc_hz = crystal_hz * numer / denom;
|
||||
return Some(tsc_hz / 1_000_000); // Hz → MHz
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Early-boot microsecond delay using the Time Stamp Counter.
|
||||
///
|
||||
/// Uses CPUID-based TSC frequency estimation when available.
|
||||
/// Falls back to a conservative spin loop calibrated for the
|
||||
/// minimum expected CPU speed (1 GHz).
|
||||
///
|
||||
/// # Safety
|
||||
/// Must only be called after the BSP TSC is running (always true
|
||||
/// after CPU reset on x86).
|
||||
fn early_udelay(us: u64) {
|
||||
if let Some(mhz) = tsc_freq_mhz_cpuid() {
|
||||
// TSC-based delay: precise on invariant TSC (all modern x86).
|
||||
// MHz = cycles per µs.
|
||||
let target = unsafe { rdtsc() } + us * mhz;
|
||||
while unsafe { rdtsc() } < target {
|
||||
hint::spin_loop();
|
||||
}
|
||||
} else {
|
||||
// Fallback: conservative spin loop.
|
||||
// spin_loop() (PAUSE) is ~40 cycles on modern Intel, ~1 on AMD.
|
||||
// At 1 GHz minimum: 1000 cycles/µs ÷ 40 cycles/iter = 25 iters/µs.
|
||||
// Use 50 iters/µs for safety margin on slower/variable CPUs.
|
||||
let iters = us.saturating_mul(50);
|
||||
for _ in 0..iters {
|
||||
hint::spin_loop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn current_x2apic_processor_uid(madt: &Madt, apic_id: u32) -> Option<u32> {
|
||||
madt.iter().find_map(|entry| match entry {
|
||||
MadtEntry::LocalX2Apic(x2apic) if x2apic.x2apic_id == apic_id => Some(x2apic.processor_uid),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
fn apply_lapic_address_override(
|
||||
local_apic: &mut crate::arch::device::local_apic::LocalApic,
|
||||
address: u64,
|
||||
) {
|
||||
if local_apic.x2 || address == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let Ok(physaddr) = usize::try_from(address) else {
|
||||
warn!(
|
||||
"Ignoring LAPIC address override {:#x}: does not fit host usize",
|
||||
address
|
||||
);
|
||||
return;
|
||||
};
|
||||
|
||||
let mapped = unsafe { map_device_memory(PhysicalAddress::new(physaddr), 4096) }.data();
|
||||
local_apic.address = mapped;
|
||||
debug!("Applied LAPIC address override: {:#x}", address);
|
||||
}
|
||||
|
||||
pub(super) fn init(madt: Madt) {
|
||||
let local_apic = unsafe { the_local_apic() };
|
||||
let me = local_apic.id();
|
||||
|
||||
if local_apic.x2 {
|
||||
debug!(" X2APIC {}", me.get());
|
||||
} else {
|
||||
debug!(" XAPIC {}: {:>08X}", me.get(), local_apic.address);
|
||||
}
|
||||
|
||||
if cfg!(not(feature = "multi_core")) {
|
||||
unsafe {
|
||||
record_apic_mapping(me.get(), LogicalCpuId::new(0));
|
||||
}
|
||||
crate::numa::init_default();
|
||||
return;
|
||||
}
|
||||
|
||||
// Map trampoline
|
||||
let trampoline_frame = Frame::containing(PhysicalAddress::new(TRAMPOLINE));
|
||||
let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE));
|
||||
let (result, page_table_physaddr) = unsafe {
|
||||
//TODO: do not have writable and executable!
|
||||
let mut mapper = KernelMapper::lock_rw();
|
||||
|
||||
let result = match mapper.map_phys(
|
||||
trampoline_page.start_address(),
|
||||
trampoline_frame.base(),
|
||||
PageFlags::new().execute(true).write(true),
|
||||
) {
|
||||
Some(result) => result,
|
||||
None => {
|
||||
println!("KERNEL AP: failed to map trampoline page, AP bring-up disabled");
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
(result, mapper.table().phys().data())
|
||||
};
|
||||
result.flush();
|
||||
|
||||
// Write trampoline, make sure TRAMPOLINE page is free for use
|
||||
for (i, val) in TRAMPOLINE_DATA.iter().enumerate() {
|
||||
unsafe {
|
||||
(*((TRAMPOLINE as *mut u8).add(i) as *const AtomicU8)).store(*val, Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let preliminary_cpu_count = madt
|
||||
.iter()
|
||||
.filter(|entry| match entry {
|
||||
MadtEntry::LocalApic(local) => u32::from(local.id) == me.get() || local.flags & 1 == 1,
|
||||
MadtEntry::LocalX2Apic(local) => local.x2apic_id == me.get() || local.flags & 1 == 1,
|
||||
_ => false,
|
||||
})
|
||||
.count();
|
||||
crate::profiling::allocate(preliminary_cpu_count as u32);
|
||||
}
|
||||
|
||||
// Firmware bug detection: check for duplicate APIC IDs in MADT.
|
||||
// Some firmware (especially on early BIOS/UEFI) may list the same
|
||||
// processor multiple times. Keep first occurrence, warn on duplicates.
|
||||
let mut seen_apic_ids: BTreeSet<u32> = BTreeSet::new();
|
||||
{
|
||||
let _ = seen_apic_ids.insert(me.get()); // BSP
|
||||
for entry in madt.iter() {
|
||||
match entry {
|
||||
MadtEntry::LocalApic(local) if local.flags & 1 == 1 => {
|
||||
let id = u32::from(local.id);
|
||||
if !seen_apic_ids.insert(id) {
|
||||
warn!("MADT: duplicate APIC ID {} in LocalApic entry, firmware bug", id);
|
||||
}
|
||||
}
|
||||
MadtEntry::LocalX2Apic(local) if local.flags & 1 == 1 => {
|
||||
let id = local.x2apic_id;
|
||||
if !seen_apic_ids.insert(id) {
|
||||
warn!("MADT: duplicate x2APIC ID {} in LocalX2Apic entry, firmware bug", id);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for madt_entry in madt.iter() {
|
||||
debug!(" {:x?}", madt_entry);
|
||||
if let MadtEntry::LocalApic(ap_local_apic) = madt_entry {
|
||||
if u32::from(ap_local_apic.id) == me.get() {
|
||||
debug!(" This is my local APIC");
|
||||
} else if ap_local_apic.flags & 1 == 1 {
|
||||
// Allocate a stack
|
||||
let alloc = match allocate_p2frame(4) {
|
||||
Some(frame) => frame,
|
||||
None => {
|
||||
println!("KERNEL AP: CPU {} no memory for stack, skipping", ap_local_apic.id);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let stack_start = RmmA::phys_to_virt(alloc.base()).data();
|
||||
let stack_end = stack_start + (PAGE_SIZE << 4);
|
||||
|
||||
// Atomically allocate a CPU ID — fetch_add is SeqCst so that
|
||||
// all later stores (PercpuBlock, NUMA node) are ordered after.
|
||||
let cpu_id = LogicalCpuId::new(crate::CPU_COUNT.fetch_add(1, Ordering::SeqCst));
|
||||
if cpu_id.get() >= crate::cpu_set::MAX_CPU_COUNT {
|
||||
println!(
|
||||
"KERNEL AP: CPU {} exceeds logical CPU limit, skipping",
|
||||
ap_local_apic.id
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end);
|
||||
|
||||
let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id);
|
||||
|
||||
let args = KernelArgsAp {
|
||||
stack_end: stack_end as *mut u8,
|
||||
cpu_id,
|
||||
pcr_ptr,
|
||||
idt_ptr,
|
||||
};
|
||||
|
||||
let ap_ready = (TRAMPOLINE + 8) as *mut u64;
|
||||
let ap_args_ptr = unsafe { ap_ready.add(1) };
|
||||
let ap_page_table = unsafe { ap_ready.add(2) };
|
||||
let ap_code = unsafe { ap_ready.add(3) };
|
||||
|
||||
// Set the ap_ready to 0, volatile
|
||||
unsafe {
|
||||
ap_ready.write(0);
|
||||
ap_args_ptr.write(&args as *const _ as u64);
|
||||
ap_page_table.write(page_table_physaddr as u64);
|
||||
#[expect(clippy::fn_to_numeric_cast)]
|
||||
ap_code.write(kstart_ap as u64);
|
||||
|
||||
// Ensure all trampoline writes are visible to the AP before
|
||||
// it starts executing. asm!("") is only a compiler barrier;
|
||||
// fence(SeqCst) is a full hardware memory barrier.
|
||||
core::sync::atomic::fence(Ordering::SeqCst);
|
||||
};
|
||||
AP_READY.store(false, Ordering::SeqCst);
|
||||
|
||||
// Clear APIC Error Status Register before starting AP.
|
||||
// Intel SDM §8.4.4: ESR should be cleared before sending SIPI.
|
||||
unsafe { local_apic.esr(); }
|
||||
|
||||
// Send INIT IPI (Assert)
|
||||
{
|
||||
// ICR: Delivery Mode=INIT(101), Level=Assert, Trigger=Edge
|
||||
let mut icr = 0x4500u64;
|
||||
if local_apic.x2 {
|
||||
icr |= u64::from(ap_local_apic.id) << 32;
|
||||
} else {
|
||||
icr |= u64::from(ap_local_apic.id) << 56;
|
||||
}
|
||||
local_apic.set_icr(icr);
|
||||
}
|
||||
|
||||
// Intel SDM Vol 3A §8.4.4: wait 10ms after INIT deassert
|
||||
// before sending first SIPI. Modern CPUs may need less,
|
||||
// but 10ms is the safe specification-compliant value.
|
||||
early_udelay(10_000);
|
||||
|
||||
// Send START IPI #1
|
||||
{
|
||||
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
|
||||
// ICR: Delivery Mode=StartUp(110), Vector=ap_segment
|
||||
// Note: bit 14 (Level) must be 0 for SIPI per Intel SDM.
|
||||
let mut icr = 0x0600 | ap_segment as u64;
|
||||
if local_apic.x2 {
|
||||
icr |= u64::from(ap_local_apic.id) << 32;
|
||||
} else {
|
||||
icr |= u64::from(ap_local_apic.id) << 56;
|
||||
}
|
||||
local_apic.set_icr(icr);
|
||||
}
|
||||
|
||||
// Intel SDM: wait 200µs between SIPIs
|
||||
early_udelay(200);
|
||||
|
||||
// Send START IPI #2 (recommended for compatibility)
|
||||
{
|
||||
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
|
||||
let mut icr = 0x0600 | ap_segment as u64;
|
||||
if local_apic.x2 {
|
||||
icr |= u64::from(ap_local_apic.id) << 32;
|
||||
} else {
|
||||
icr |= u64::from(ap_local_apic.id) << 56;
|
||||
}
|
||||
local_apic.set_icr(icr);
|
||||
}
|
||||
|
||||
// Wait briefly for SIPI to be accepted
|
||||
early_udelay(200);
|
||||
|
||||
// Check ESR for delivery errors after SIPI sequence.
|
||||
// Bit 5 = Send Accept Error, Bit 6 = Send Illegal Vector.
|
||||
let esr_val = unsafe { local_apic.esr() };
|
||||
if esr_val != 0 {
|
||||
println!(
|
||||
"KERNEL AP: CPU {} SIPI delivery error (ESR={:#x}), continuing",
|
||||
ap_local_apic.id, esr_val
|
||||
);
|
||||
}
|
||||
|
||||
// Wait for trampoline ready with timeout
|
||||
let mut trampoline_ready = false;
|
||||
for _ in 0..AP_SPIN_LIMIT {
|
||||
if unsafe { (*ap_ready.cast::<AtomicU8>()).load(Ordering::SeqCst) } != 0 {
|
||||
trampoline_ready = true;
|
||||
break;
|
||||
}
|
||||
hint::spin_loop();
|
||||
}
|
||||
if !trampoline_ready {
|
||||
println!("KERNEL AP: CPU {} trampoline timeout, skipping", ap_local_apic.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut kernel_ready = false;
|
||||
for _ in 0..AP_SPIN_LIMIT {
|
||||
if AP_READY.load(Ordering::SeqCst) {
|
||||
kernel_ready = true;
|
||||
break;
|
||||
}
|
||||
hint::spin_loop();
|
||||
}
|
||||
if !kernel_ready {
|
||||
println!("KERNEL AP: CPU {} AP_READY timeout, skipping", ap_local_apic.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Record APIC→CPU mapping for NUMA topology.
|
||||
unsafe {
|
||||
record_apic_mapping(u32::from(ap_local_apic.id), cpu_id);
|
||||
}
|
||||
// Set NUMA node from SRAT data.
|
||||
if let Some(percpu) = crate::percpu::get_for_cpu(cpu_id) {
|
||||
if let Some(node) = crate::acpi::srat::numa_node_for_apic(u32::from(ap_local_apic.id)) {
|
||||
percpu.numa_node.set(node);
|
||||
}
|
||||
}
|
||||
|
||||
RmmA::invalidate_all();
|
||||
} else {
|
||||
debug!("KERNEL AP: LAPIC CPU {} disabled in MADT, skipping", u32::from(ap_local_apic.id));
|
||||
}
|
||||
} else if let MadtEntry::LocalX2Apic(ap_x2apic) = madt_entry {
|
||||
let apic_id = ap_x2apic.x2apic_id;
|
||||
let flags = ap_x2apic.flags;
|
||||
|
||||
if apic_id == me.get() {
|
||||
debug!(" This is my local x2APIC");
|
||||
} else if flags & 1 == 1 {
|
||||
let alloc = match allocate_p2frame(4) {
|
||||
Some(frame) => frame,
|
||||
None => {
|
||||
println!("KERNEL AP: CPU {} no memory for stack, skipping", apic_id);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let stack_start = RmmA::phys_to_virt(alloc.base()).data();
|
||||
let stack_end = stack_start + (PAGE_SIZE << 4);
|
||||
|
||||
// Atomically allocate a CPU ID — fetch_add is SeqCst so that
|
||||
// all later stores (PercpuBlock, NUMA node) are ordered after.
|
||||
let cpu_id = LogicalCpuId::new(crate::CPU_COUNT.fetch_add(1, Ordering::SeqCst));
|
||||
if cpu_id.get() >= crate::cpu_set::MAX_CPU_COUNT {
|
||||
println!(
|
||||
"KERNEL AP: CPU {} exceeds logical CPU limit, skipping",
|
||||
apic_id
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end);
|
||||
let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id);
|
||||
|
||||
let args = KernelArgsAp {
|
||||
stack_end: stack_end as *mut u8,
|
||||
cpu_id,
|
||||
pcr_ptr,
|
||||
idt_ptr,
|
||||
};
|
||||
|
||||
let ap_ready = (TRAMPOLINE + 8) as *mut u64;
|
||||
let ap_args_ptr = unsafe { ap_ready.add(1) };
|
||||
let ap_page_table = unsafe { ap_ready.add(2) };
|
||||
let ap_code = unsafe { ap_ready.add(3) };
|
||||
|
||||
unsafe {
|
||||
ap_ready.write(0);
|
||||
ap_args_ptr.write(&args as *const _ as u64);
|
||||
ap_page_table.write(page_table_physaddr as u64);
|
||||
#[expect(clippy::fn_to_numeric_cast)]
|
||||
ap_code.write(kstart_ap as u64);
|
||||
// Ensure all trampoline writes are visible to the AP.
|
||||
core::sync::atomic::fence(Ordering::SeqCst);
|
||||
}
|
||||
AP_READY.store(false, Ordering::SeqCst);
|
||||
|
||||
// Clear APIC Error Status Register before starting AP.
|
||||
unsafe { local_apic.esr(); }
|
||||
|
||||
// Send INIT IPI (Assert)
|
||||
{
|
||||
let mut icr = 0x4500u64;
|
||||
if local_apic.x2 {
|
||||
icr |= u64::from(apic_id) << 32;
|
||||
} else {
|
||||
icr |= u64::from(apic_id as u8) << 56;
|
||||
}
|
||||
local_apic.set_icr(icr);
|
||||
}
|
||||
|
||||
// Intel SDM Vol 3A §8.4.4: wait 10ms after INIT
|
||||
early_udelay(10_000);
|
||||
|
||||
// Send START IPI #1
|
||||
{
|
||||
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
|
||||
let mut icr = 0x0600u64 | ap_segment as u64;
|
||||
if local_apic.x2 {
|
||||
icr |= u64::from(apic_id) << 32;
|
||||
} else {
|
||||
icr |= u64::from(apic_id as u8) << 56;
|
||||
}
|
||||
local_apic.set_icr(icr);
|
||||
}
|
||||
|
||||
// Intel SDM: wait 200µs between SIPIs
|
||||
early_udelay(200);
|
||||
|
||||
// Send START IPI #2 (recommended for compatibility)
|
||||
{
|
||||
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
|
||||
let mut icr = 0x0600u64 | ap_segment as u64;
|
||||
if local_apic.x2 {
|
||||
icr |= u64::from(apic_id) << 32;
|
||||
} else {
|
||||
icr |= u64::from(apic_id as u8) << 56;
|
||||
}
|
||||
local_apic.set_icr(icr);
|
||||
}
|
||||
|
||||
// Wait briefly for SIPI acceptance
|
||||
early_udelay(200);
|
||||
|
||||
// Check ESR for delivery errors.
|
||||
let esr_val = unsafe { local_apic.esr() };
|
||||
if esr_val != 0 {
|
||||
println!(
|
||||
"KERNEL AP: CPU {} SIPI delivery error (ESR={:#x}), continuing",
|
||||
apic_id, esr_val
|
||||
);
|
||||
}
|
||||
|
||||
let mut trampoline_ready = false;
|
||||
for _ in 0..AP_SPIN_LIMIT {
|
||||
if unsafe { (*ap_ready.cast::<AtomicU8>()).load(Ordering::SeqCst) } != 0 {
|
||||
trampoline_ready = true;
|
||||
break;
|
||||
}
|
||||
hint::spin_loop();
|
||||
}
|
||||
if !trampoline_ready {
|
||||
println!("KERNEL AP: CPU {} trampoline timeout, skipping", apic_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut kernel_ready = false;
|
||||
for _ in 0..AP_SPIN_LIMIT {
|
||||
if AP_READY.load(Ordering::SeqCst) {
|
||||
kernel_ready = true;
|
||||
break;
|
||||
}
|
||||
hint::spin_loop();
|
||||
}
|
||||
if !kernel_ready {
|
||||
println!("KERNEL AP: CPU {} AP_READY timeout, skipping", apic_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Record APIC→CPU mapping for NUMA topology.
|
||||
unsafe {
|
||||
record_apic_mapping(apic_id, cpu_id);
|
||||
}
|
||||
// Set NUMA node from SRAT data.
|
||||
if let Some(percpu) = crate::percpu::get_for_cpu(cpu_id) {
|
||||
if let Some(node) = crate::acpi::srat::numa_node_for_apic(apic_id) {
|
||||
percpu.numa_node.set(node);
|
||||
}
|
||||
}
|
||||
|
||||
RmmA::invalidate_all();
|
||||
} else {
|
||||
debug!("KERNEL AP: x2APIC CPU {} disabled in MADT (flags={:#x}), skipping", apic_id, flags);
|
||||
}
|
||||
} else if let MadtEntry::LocalApicNmi(nmi) = madt_entry {
|
||||
let target_apic = nmi.processor;
|
||||
if target_apic == 0xFF || target_apic == local_apic.id().get() as u8 {
|
||||
unsafe { local_apic.set_lvt_nmi(nmi.nmi_pin, nmi.flags) };
|
||||
}
|
||||
} else if let MadtEntry::LocalX2ApicNmi(nmi) = madt_entry {
|
||||
let current_uid = current_x2apic_processor_uid(&madt, me.get());
|
||||
if nmi.processor_uid == u32::MAX || current_uid == Some(nmi.processor_uid) {
|
||||
unsafe { local_apic.set_lvt_nmi(nmi.nmi_pin, nmi.flags) };
|
||||
}
|
||||
} else if let MadtEntry::LapicAddressOverride(override_entry) = madt_entry {
|
||||
apply_lapic_address_override(local_apic, override_entry.local_apic_address);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize NUMA topology from APIC→CPU mappings and SRAT.
|
||||
{
|
||||
let mappings = unsafe { &APIC_MAPPINGS[..APIC_MAPPING_COUNT] };
|
||||
let mappings_ref: Vec<(u32, LogicalCpuId)> = mappings
|
||||
.iter()
|
||||
.map(|m| (m.apic_id, m.cpu_id))
|
||||
.collect();
|
||||
crate::numa::init_from_srat(&mappings_ref);
|
||||
}
|
||||
// Set BSP's NUMA node from SRAT.
|
||||
if let Some(node) = crate::acpi::srat::numa_node_for_apic(me.get()) {
|
||||
crate::percpu::PercpuBlock::current().numa_node.set(node);
|
||||
}
|
||||
|
||||
// Log final CPU count vs maximum
|
||||
let cpu_count = crate::CPU_COUNT.load(Ordering::SeqCst);
|
||||
info!(
|
||||
"SMP: {} CPUs online (max {})",
|
||||
cpu_count, crate::cpu_set::MAX_CPU_COUNT
|
||||
);
|
||||
if cpu_count > crate::cpu_set::MAX_CPU_COUNT * 80 / 100 {
|
||||
warn!(
|
||||
"SMP: CPU count approaching MAX_CPU_COUNT limit ({}/{})",
|
||||
cpu_count, crate::cpu_set::MAX_CPU_COUNT
|
||||
);
|
||||
}
|
||||
|
||||
// Unmap trampoline
|
||||
if let Some((_frame, _, flush)) = unsafe {
|
||||
KernelMapper::lock_rw()
|
||||
.unmap_phys(trampoline_page.start_address())
|
||||
} {
|
||||
flush.flush();
|
||||
} else {
|
||||
println!("KERNEL AP: failed to unmap trampoline page (non-fatal)");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
use core::{
|
||||
hint,
|
||||
sync::atomic::{AtomicU8, Ordering},
|
||||
};
|
||||
|
||||
use crate::{
|
||||
arch::{
|
||||
device::local_apic::the_local_apic,
|
||||
start::{kstart_ap, KernelArgsAp},
|
||||
},
|
||||
cpu_set::LogicalCpuId,
|
||||
memory::{
|
||||
allocate_p2frame, Frame, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch,
|
||||
VirtualAddress, PAGE_SIZE,
|
||||
},
|
||||
startup::AP_READY,
|
||||
};
|
||||
|
||||
use super::{Madt, MadtEntry};
|
||||
|
||||
const TRAMPOLINE: usize = 0x8000;
|
||||
static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline"));
|
||||
|
||||
pub(super) fn init(madt: Madt) {
|
||||
let local_apic = unsafe { the_local_apic() };
|
||||
let me = local_apic.id();
|
||||
|
||||
if local_apic.x2 {
|
||||
debug!(" X2APIC {}", me.get());
|
||||
} else {
|
||||
debug!(" XAPIC {}: {:>08X}", me.get(), local_apic.address);
|
||||
}
|
||||
|
||||
if cfg!(not(feature = "multi_core")) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Map trampoline
|
||||
let trampoline_frame = Frame::containing(PhysicalAddress::new(TRAMPOLINE));
|
||||
let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE));
|
||||
let (result, page_table_physaddr) = unsafe {
|
||||
//TODO: do not have writable and executable!
|
||||
let mut mapper = KernelMapper::lock_rw();
|
||||
|
||||
let result = mapper
|
||||
.map_phys(
|
||||
trampoline_page.start_address(),
|
||||
trampoline_frame.base(),
|
||||
PageFlags::new().execute(true).write(true),
|
||||
)
|
||||
.expect("failed to map trampoline");
|
||||
|
||||
(result, mapper.table().phys().data())
|
||||
};
|
||||
result.flush();
|
||||
|
||||
// Write trampoline, make sure TRAMPOLINE page is free for use
|
||||
for (i, val) in TRAMPOLINE_DATA.iter().enumerate() {
|
||||
unsafe {
|
||||
(*((TRAMPOLINE as *mut u8).add(i) as *const AtomicU8)).store(*val, Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let preliminary_cpu_count = madt.iter().filter(|e| matches!(e, MadtEntry::LocalApic(entry) if u32::from(entry.id) == me.get() || entry.flags & 1 == 1)).count();
|
||||
crate::profiling::allocate(preliminary_cpu_count as u32);
|
||||
}
|
||||
|
||||
for madt_entry in madt.iter() {
|
||||
debug!(" {:x?}", madt_entry);
|
||||
if let MadtEntry::LocalApic(ap_local_apic) = madt_entry {
|
||||
if u32::from(ap_local_apic.id) == me.get() {
|
||||
debug!(" This is my local APIC");
|
||||
} else if ap_local_apic.flags & 1 == 1 {
|
||||
let cpu_id = LogicalCpuId::next();
|
||||
|
||||
// Allocate a stack
|
||||
let stack_start = RmmA::phys_to_virt(
|
||||
allocate_p2frame(4)
|
||||
.expect("no more frames in acpi stack_start")
|
||||
.base(),
|
||||
)
|
||||
.data();
|
||||
let stack_end = stack_start + (PAGE_SIZE << 4);
|
||||
|
||||
let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end);
|
||||
|
||||
let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id);
|
||||
|
||||
let args = KernelArgsAp {
|
||||
stack_end: stack_end as *mut u8,
|
||||
cpu_id,
|
||||
pcr_ptr,
|
||||
idt_ptr,
|
||||
};
|
||||
|
||||
let ap_ready = (TRAMPOLINE + 8) as *mut u64;
|
||||
let ap_args_ptr = unsafe { ap_ready.add(1) };
|
||||
let ap_page_table = unsafe { ap_ready.add(2) };
|
||||
let ap_code = unsafe { ap_ready.add(3) };
|
||||
|
||||
// Set the ap_ready to 0, volatile
|
||||
unsafe {
|
||||
ap_ready.write(0);
|
||||
ap_args_ptr.write(&args as *const _ as u64);
|
||||
ap_page_table.write(page_table_physaddr as u64);
|
||||
#[expect(clippy::fn_to_numeric_cast)]
|
||||
ap_code.write(kstart_ap as u64);
|
||||
|
||||
// TODO: Is this necessary (this fence)?
|
||||
core::arch::asm!("");
|
||||
};
|
||||
AP_READY.store(false, Ordering::SeqCst);
|
||||
|
||||
// Send INIT IPI
|
||||
{
|
||||
let mut icr = 0x4500;
|
||||
if local_apic.x2 {
|
||||
icr |= u64::from(ap_local_apic.id) << 32;
|
||||
} else {
|
||||
icr |= u64::from(ap_local_apic.id) << 56;
|
||||
}
|
||||
local_apic.set_icr(icr);
|
||||
}
|
||||
|
||||
// Send START IPI
|
||||
{
|
||||
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
|
||||
let mut icr = 0x4600 | ap_segment as u64;
|
||||
|
||||
if local_apic.x2 {
|
||||
icr |= u64::from(ap_local_apic.id) << 32;
|
||||
} else {
|
||||
icr |= u64::from(ap_local_apic.id) << 56;
|
||||
}
|
||||
|
||||
local_apic.set_icr(icr);
|
||||
}
|
||||
|
||||
// Wait for trampoline ready
|
||||
while unsafe { (*ap_ready.cast::<AtomicU8>()).load(Ordering::SeqCst) } == 0 {
|
||||
hint::spin_loop();
|
||||
}
|
||||
while !AP_READY.load(Ordering::SeqCst) {
|
||||
hint::spin_loop();
|
||||
}
|
||||
|
||||
RmmA::invalidate_all();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unmap trampoline
|
||||
let (_frame, _, flush) = unsafe {
|
||||
KernelMapper::lock_rw()
|
||||
.unmap_phys(trampoline_page.start_address())
|
||||
.expect("failed to unmap trampoline page")
|
||||
};
|
||||
flush.flush();
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
--- src/acpi/madt/arch/x86.rs
|
||||
+++ src/acpi/madt/arch/x86.rs
|
||||
@@ -446,11 +446,7 @@
|
||||
// Send INIT IPI (Assert)
|
||||
{
|
||||
let mut icr = 0x4500u64;
|
||||
- if local_apic.x2 {
|
||||
- icr |= u64::from(apic_id) << 32;
|
||||
- } else {
|
||||
- icr |= u64::from(apic_id as u8) << 56;
|
||||
- }
|
||||
+ icr |= u64::from(apic_id) << 32;
|
||||
local_apic.set_icr(icr);
|
||||
}
|
||||
|
||||
@@ -460,11 +456,7 @@
|
||||
{
|
||||
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
|
||||
let mut icr = 0x0600u64 | ap_segment as u64;
|
||||
- if local_apic.x2 {
|
||||
- icr |= u64::from(apic_id) << 32;
|
||||
- } else {
|
||||
- icr |= u64::from(apic_id as u8) << 56;
|
||||
- }
|
||||
+ icr |= u64::from(apic_id) << 32;
|
||||
local_apic.set_icr(icr);
|
||||
}
|
||||
|
||||
@@ -476,11 +468,7 @@
|
||||
{
|
||||
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
|
||||
let mut icr = 0x0600u64 | ap_segment as u64;
|
||||
- if local_apic.x2 {
|
||||
- icr |= u64::from(apic_id) << 32;
|
||||
- } else {
|
||||
- icr |= u64::from(apic_id as u8) << 56;
|
||||
- }
|
||||
+ icr |= u64::from(apic_id) << 32;
|
||||
local_apic.set_icr(icr);
|
||||
}
|
||||
|
||||
@@ -34,12 +34,6 @@ impl Madt {
|
||||
let madt = Madt::new(find_one_sdt!("APIC"));
|
||||
|
||||
if let Some(madt) = madt {
|
||||
// Validate MADT checksum per ACPI 6.5 §5.2.2
|
||||
if !madt.sdt.validate_checksum() {
|
||||
error!("MADT checksum validation failed, skipping APIC initialization");
|
||||
return;
|
||||
}
|
||||
|
||||
// safe because no APs have been started yet.
|
||||
unsafe { MADT.get().write(Some(madt)) };
|
||||
|
||||
@@ -152,48 +146,6 @@ pub struct MadtGicd {
|
||||
_reserved2: [u8; 3],
|
||||
}
|
||||
|
||||
/// MADT Local x2APIC (entry type 0x9)
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[repr(C, packed)]
|
||||
pub struct MadtLocalX2Apic {
|
||||
_reserved: u16,
|
||||
pub x2apic_id: u32,
|
||||
pub flags: u32,
|
||||
pub processor_uid: u32,
|
||||
}
|
||||
|
||||
/// MADT Local APIC NMI (entry type 0x4)
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[repr(C, packed)]
|
||||
pub struct MadtLocalApicNmi {
|
||||
pub processor: u8,
|
||||
pub flags: u16,
|
||||
pub nmi_pin: u8,
|
||||
}
|
||||
|
||||
/// MADT Local APIC address override (entry type 0x5)
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[repr(C, packed)]
|
||||
pub struct MadtLapicAddressOverride {
|
||||
_reserved: u16,
|
||||
pub local_apic_address: u64,
|
||||
}
|
||||
|
||||
/// MADT Local x2APIC NMI (entry type 0xA)
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[repr(C, packed)]
|
||||
pub struct MadtLocalX2ApicNmi {
|
||||
_reserved: u16,
|
||||
pub processor_uid: u32,
|
||||
pub flags: u16,
|
||||
pub nmi_pin: u8,
|
||||
_reserved2: u8,
|
||||
}
|
||||
|
||||
const _: () = assert!(size_of::<MadtLocalApicNmi>() == 4);
|
||||
const _: () = assert!(size_of::<MadtLapicAddressOverride>() == 10);
|
||||
const _: () = assert!(size_of::<MadtLocalX2ApicNmi>() == 10);
|
||||
|
||||
/// MADT Entries
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
@@ -204,18 +156,10 @@ pub enum MadtEntry {
|
||||
InvalidIoApic(usize),
|
||||
IntSrcOverride(&'static MadtIntSrcOverride),
|
||||
InvalidIntSrcOverride(usize),
|
||||
LocalApicNmi(&'static MadtLocalApicNmi),
|
||||
InvalidLocalApicNmi(usize),
|
||||
LapicAddressOverride(&'static MadtLapicAddressOverride),
|
||||
InvalidLapicAddressOverride(usize),
|
||||
Gicc(&'static MadtGicc),
|
||||
InvalidGicc(usize),
|
||||
Gicd(&'static MadtGicd),
|
||||
InvalidGicd(usize),
|
||||
LocalX2Apic(&'static MadtLocalX2Apic),
|
||||
InvalidLocalX2Apic(usize),
|
||||
LocalX2ApicNmi(&'static MadtLocalX2ApicNmi),
|
||||
InvalidLocalX2ApicNmi(usize),
|
||||
Unknown(u8),
|
||||
}
|
||||
|
||||
@@ -232,10 +176,6 @@ impl Iterator for MadtIter {
|
||||
let entry_len =
|
||||
unsafe { *(self.sdt.data_address() as *const u8).add(self.i + 1) } as usize;
|
||||
|
||||
if entry_len < 2 {
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.i + entry_len <= self.sdt.data_len() {
|
||||
let item = match entry_type {
|
||||
0x0 => {
|
||||
@@ -266,46 +206,6 @@ impl Iterator for MadtIter {
|
||||
MadtEntry::InvalidIntSrcOverride(entry_len)
|
||||
}
|
||||
}
|
||||
0x4 => {
|
||||
if entry_len == size_of::<MadtLocalApicNmi>() + 2 {
|
||||
MadtEntry::LocalApicNmi(unsafe {
|
||||
&*((self.sdt.data_address() + self.i + 2)
|
||||
as *const MadtLocalApicNmi)
|
||||
})
|
||||
} else {
|
||||
MadtEntry::InvalidLocalApicNmi(entry_len)
|
||||
}
|
||||
}
|
||||
0x5 => {
|
||||
if entry_len == size_of::<MadtLapicAddressOverride>() + 2 {
|
||||
MadtEntry::LapicAddressOverride(unsafe {
|
||||
&*((self.sdt.data_address() + self.i + 2)
|
||||
as *const MadtLapicAddressOverride)
|
||||
})
|
||||
} else {
|
||||
MadtEntry::InvalidLapicAddressOverride(entry_len)
|
||||
}
|
||||
}
|
||||
0x9 => {
|
||||
if entry_len == size_of::<MadtLocalX2Apic>() + 2 {
|
||||
MadtEntry::LocalX2Apic(unsafe {
|
||||
&*((self.sdt.data_address() + self.i + 2)
|
||||
as *const MadtLocalX2Apic)
|
||||
})
|
||||
} else {
|
||||
MadtEntry::InvalidLocalX2Apic(entry_len)
|
||||
}
|
||||
}
|
||||
0xA => {
|
||||
if entry_len == size_of::<MadtLocalX2ApicNmi>() + 2 {
|
||||
MadtEntry::LocalX2ApicNmi(unsafe {
|
||||
&*((self.sdt.data_address() + self.i + 2)
|
||||
as *const MadtLocalX2ApicNmi)
|
||||
})
|
||||
} else {
|
||||
MadtEntry::InvalidLocalX2ApicNmi(entry_len)
|
||||
}
|
||||
}
|
||||
0xB => {
|
||||
if entry_len >= size_of::<MadtGicc>() + 2 {
|
||||
MadtEntry::Gicc(unsafe {
|
||||
|
||||
@@ -20,8 +20,6 @@ mod rxsdt;
|
||||
pub mod sdt;
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mod spcr;
|
||||
pub mod slit;
|
||||
pub mod srat;
|
||||
mod xsdt;
|
||||
|
||||
unsafe fn map_linearly(addr: PhysicalAddress, len: usize, mapper: &mut crate::memory::PageMapper) {
|
||||
@@ -84,14 +82,6 @@ impl Rxsdt for RxsdtEnum {
|
||||
|
||||
pub static RXSDT_ENUM: Once<RxsdtEnum> = Once::new();
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct AcpiRootInfo {
|
||||
pub revision: u8,
|
||||
pub root_sdt_address: PhysicalAddress,
|
||||
}
|
||||
|
||||
pub static ACPI_ROOT_INFO: Once<AcpiRootInfo> = Once::new();
|
||||
|
||||
/// Parse the ACPI tables to gather CPU, interrupt, and timer information
|
||||
pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) {
|
||||
unsafe {
|
||||
@@ -104,15 +94,6 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) {
|
||||
let rsdp_opt = Rsdp::get_rsdp(already_supplied_rsdp);
|
||||
|
||||
if let Some(rsdp) = rsdp_opt {
|
||||
let root_info = ACPI_ROOT_INFO.call_once(|| AcpiRootInfo {
|
||||
revision: rsdp.revision(),
|
||||
root_sdt_address: rsdp.sdt_address(),
|
||||
});
|
||||
|
||||
if root_info.root_sdt_address != rsdp.sdt_address() || root_info.revision != rsdp.revision() {
|
||||
error!("ACPI_ROOT_INFO already initialized with a different RSDP root");
|
||||
}
|
||||
|
||||
debug!("SDT address: {:#x}", rsdp.sdt_address().data());
|
||||
let rxsdt = get_sdt(rsdp.sdt_address(), &mut KernelMapper::lock_rw());
|
||||
|
||||
@@ -165,14 +146,7 @@ pub unsafe fn init(already_supplied_rsdp: Option<*const u8>) {
|
||||
|
||||
// TODO: Enumerate processors in userspace, and then provide an ACPI-independent interface
|
||||
// to initialize enumerated processors to userspace?
|
||||
// Parse SRAT BEFORE MADT so NUMA node mapping is available
|
||||
// when APs are started and PercpuBlocks are created.
|
||||
srat::init();
|
||||
|
||||
Madt::init();
|
||||
|
||||
// Parse SLIT after MADT for the NUMA distance matrix.
|
||||
slit::init();
|
||||
//TODO: support this on any arch
|
||||
// SPCR must be initialized after MADT for interrupt controllers
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
|
||||
@@ -17,33 +17,9 @@ pub struct Rsdp {
|
||||
|
||||
impl Rsdp {
|
||||
pub unsafe fn get_rsdp(already_supplied_rsdp: Option<*const u8>) -> Option<Rsdp> {
|
||||
already_supplied_rsdp.and_then(|rsdp_ptr| {
|
||||
let rsdp = unsafe { *(rsdp_ptr as *const Rsdp) };
|
||||
|
||||
// Validate signature "RSD PTR "
|
||||
if &rsdp.signature != b"RSD PTR " {
|
||||
return None;
|
||||
}
|
||||
|
||||
// ACPI 1.0 checksum: sum of first 20 bytes must be zero
|
||||
let bytes_v1 = unsafe { core::slice::from_raw_parts(rsdp_ptr, 20) };
|
||||
if bytes_v1.iter().fold(0u8, |sum, &b| sum.wrapping_add(b)) != 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// ACPI 2.0+ extended checksum: sum of entire table (length bytes) must be zero
|
||||
if rsdp.revision >= 2 {
|
||||
let full_len = rsdp._length as usize;
|
||||
if full_len < 36 || full_len > 256 {
|
||||
return None;
|
||||
}
|
||||
let bytes_full = unsafe { core::slice::from_raw_parts(rsdp_ptr, full_len) };
|
||||
if bytes_full.iter().fold(0u8, |sum, &b| sum.wrapping_add(b)) != 0 {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
Some(rsdp)
|
||||
already_supplied_rsdp.map(|rsdp_ptr| {
|
||||
// TODO: Validate
|
||||
unsafe { *(rsdp_ptr as *const Rsdp) }
|
||||
})
|
||||
}
|
||||
|
||||
@@ -55,8 +31,4 @@ impl Rsdp {
|
||||
self.rsdt_address as usize
|
||||
})
|
||||
}
|
||||
|
||||
pub fn revision(&self) -> u8 {
|
||||
self.revision
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,20 +24,4 @@ impl Sdt {
|
||||
let header_size = size_of::<Sdt>();
|
||||
total_size.saturating_sub(header_size)
|
||||
}
|
||||
|
||||
/// Validate the SDT checksum.
|
||||
///
|
||||
/// Per ACPI 6.5 §5.2.2: the entire table (including the checksum field)
|
||||
/// must sum to 0 when all bytes are added together as unsigned 8-bit values.
|
||||
pub fn validate_checksum(&self) -> bool {
|
||||
let ptr = self as *const _ as *const u8;
|
||||
let len = self.length as usize;
|
||||
if len < size_of::<Sdt>() {
|
||||
return false;
|
||||
}
|
||||
let sum = unsafe { core::slice::from_raw_parts(ptr, len) }
|
||||
.iter()
|
||||
.fold(0u8, |acc, &b| acc.wrapping_add(b));
|
||||
sum == 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
//! SLIT (System Locality Information Table) parser.
|
||||
//!
|
||||
//! Parses the NUMA distance matrix for scheduler NUMA-aware work stealing.
|
||||
|
||||
use super::sdt::Sdt;
|
||||
use crate::acpi::find_sdt;
|
||||
|
||||
const MAX_NODES: usize = 8;
|
||||
|
||||
static mut SLIT_MATRIX: [[u8; MAX_NODES]; MAX_NODES] = [[10u8; MAX_NODES]; MAX_NODES];
|
||||
static mut SLIT_NUM_NODES: usize = 0;
|
||||
static mut SLIT_AVAILABLE: bool = false;
|
||||
|
||||
pub fn is_available() -> bool { unsafe { SLIT_AVAILABLE } }
|
||||
pub fn num_nodes() -> usize { unsafe { SLIT_NUM_NODES } }
|
||||
|
||||
pub fn distance(from: u8, to: u8) -> u8 {
|
||||
if !unsafe { SLIT_AVAILABLE } { return 10; }
|
||||
let (from, to) = (from as usize, to as usize);
|
||||
if from >= MAX_NODES || to >= MAX_NODES { return 10; }
|
||||
unsafe { SLIT_MATRIX[from][to] }
|
||||
}
|
||||
|
||||
pub fn same_socket(node1: u8, node2: u8) -> bool { distance(node1, node2) <= 20 }
|
||||
|
||||
pub fn init() {
|
||||
let sdt = match find_sdt("SLIT").as_slice() {
|
||||
[] => return,
|
||||
[x] => *x,
|
||||
xs => { println!("SLIT: {} tables found, expected 1", xs.len()); return; }
|
||||
};
|
||||
if &sdt.signature != b"SLIT" { return; }
|
||||
let data_addr = sdt.data_address();
|
||||
let data_len = sdt.data_len();
|
||||
if data_len < 8 { return; }
|
||||
let num_nodes = unsafe { *(data_addr as *const u64) } as usize;
|
||||
if num_nodes == 0 || num_nodes > MAX_NODES { println!("SLIT: {num_nodes} nodes (max {MAX_NODES}), ignoring"); return; }
|
||||
let matrix_start = 8;
|
||||
let matrix_size = num_nodes * num_nodes;
|
||||
if data_len < matrix_start + matrix_size { println!("SLIT: matrix truncated ({data_len} < {})", matrix_start + matrix_size); return; }
|
||||
let matrix = unsafe { &mut SLIT_MATRIX };
|
||||
for i in 0..num_nodes { for j in 0..num_nodes { matrix[i][j] = unsafe { *((data_addr + matrix_start + i * num_nodes + j) as *const u8) }; } }
|
||||
unsafe { SLIT_NUM_NODES = num_nodes; SLIT_AVAILABLE = true; }
|
||||
debug!("SLIT: {} nodes, distance matrix loaded", num_nodes);
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
//! SRAT (System Resource Affinity Table) parser.
|
||||
//!
|
||||
//! Parses CPU-to-NUMA-node and memory-to-NUMA-node affinity information.
|
||||
//! Called before MADT init so that NUMA data is available during AP startup.
|
||||
|
||||
use super::sdt::Sdt;
|
||||
use crate::acpi::find_sdt;
|
||||
|
||||
const MAX_CPU_ENTRIES: usize = 256;
|
||||
const MAX_MEM_ENTRIES: usize = 64;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct SratCpuEntry { apic_id: u32, node: u8, enabled: bool }
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct SratMemEntry { node: u8, base: u64, length: u64, enabled: bool }
|
||||
|
||||
const CPU_NONE: SratCpuEntry = SratCpuEntry { apic_id: u32::MAX, node: 0, enabled: false };
|
||||
const MEM_NONE: SratMemEntry = SratMemEntry { node: 0, base: 0, length: 0, enabled: false };
|
||||
|
||||
static mut SRAT_CPU_ENTRIES: [SratCpuEntry; MAX_CPU_ENTRIES] = [CPU_NONE; MAX_CPU_ENTRIES];
|
||||
static mut SRAT_MEM_ENTRIES: [SratMemEntry; MAX_MEM_ENTRIES] = [MEM_NONE; MAX_MEM_ENTRIES];
|
||||
static mut SRAT_CPU_COUNT: usize = 0;
|
||||
static mut SRAT_MEM_COUNT: usize = 0;
|
||||
static mut SRAT_AVAILABLE: bool = false;
|
||||
|
||||
pub fn is_available() -> bool { unsafe { SRAT_AVAILABLE } }
|
||||
|
||||
pub fn numa_node_for_apic(apic_id: u32) -> Option<u8> {
|
||||
if !unsafe { SRAT_AVAILABLE } { return None; }
|
||||
let count = unsafe { SRAT_CPU_COUNT };
|
||||
let entries = unsafe { &SRAT_CPU_ENTRIES };
|
||||
for i in 0..count {
|
||||
if entries[i].apic_id == apic_id && entries[i].enabled { return Some(entries[i].node); }
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn numa_node_count() -> usize {
|
||||
if !unsafe { SRAT_AVAILABLE } { return 1; }
|
||||
let mut max_node: u8 = 0;
|
||||
let count = unsafe { SRAT_CPU_COUNT };
|
||||
let entries = unsafe { &SRAT_CPU_ENTRIES };
|
||||
for i in 0..count { if entries[i].enabled && entries[i].node > max_node { max_node = entries[i].node; } }
|
||||
(max_node as usize) + 1
|
||||
}
|
||||
|
||||
#[repr(C, packed)]
|
||||
struct SratLocalApic { _proximity_lo: u8, apic_id: u8, flags: u32, _local_sapic_eid: u8, _proximity_hi: [u8; 3], _clock_domain: u32 }
|
||||
|
||||
#[repr(C, packed)]
|
||||
struct SratMemoryAffinity { proximity_domain: u32, _reserved1: u16, base_address_lo: u32, base_address_hi: u32, length_lo: u32, length_hi: u32, _reserved2: u32, flags: u32, _reserved3: u64 }
|
||||
|
||||
#[repr(C, packed)]
|
||||
struct SratLocalX2Apic { _reserved: u16, proximity_domain: u32, x2apic_id: u32, flags: u32, _clock_domain: u32, _reserved2: u32 }
|
||||
|
||||
pub fn init() {
|
||||
let sdt = match find_sdt("SRAT").as_slice() {
|
||||
[] => return,
|
||||
[x] => *x,
|
||||
xs => { println!("SRAT: {} tables found, expected 1", xs.len()); return; }
|
||||
};
|
||||
if &sdt.signature != b"SRAT" { return; }
|
||||
let data_addr = sdt.data_address();
|
||||
let data_len = sdt.data_len();
|
||||
if data_len < 12 { println!("SRAT: table too short ({data_len} bytes)"); return; }
|
||||
let mut offset: usize = 12;
|
||||
let cpu_entries = unsafe { &mut SRAT_CPU_ENTRIES };
|
||||
let mem_entries = unsafe { &mut SRAT_MEM_ENTRIES };
|
||||
let mut cpu_count: usize = 0;
|
||||
let mut mem_count: usize = 0;
|
||||
while offset + 2 <= data_len {
|
||||
let entry_type = unsafe { *((data_addr + offset) as *const u8) };
|
||||
let entry_len = unsafe { *((data_addr + offset + 1) as *const u8) } as usize;
|
||||
if entry_len < 2 || offset + entry_len > data_len { break; }
|
||||
let entry_data = data_addr + offset + 2;
|
||||
match entry_type {
|
||||
0x0 if entry_len >= size_of::<SratLocalApic>() + 2 => {
|
||||
let e = unsafe { &*(entry_data as *const SratLocalApic) };
|
||||
let enabled = (e.flags & 1) == 1;
|
||||
let node = (e._proximity_lo as u32) | ((e._proximity_hi[0] as u32) << 8) | ((e._proximity_hi[1] as u32) << 16) | ((e._proximity_hi[2] as u32) << 24);
|
||||
if cpu_count < MAX_CPU_ENTRIES { cpu_entries[cpu_count] = SratCpuEntry { apic_id: e.apic_id as u32, node: node as u8, enabled }; cpu_count += 1; }
|
||||
}
|
||||
0x1 if entry_len >= size_of::<SratMemoryAffinity>() + 2 => {
|
||||
let e = unsafe { &*(entry_data as *const SratMemoryAffinity) };
|
||||
let enabled = (e.flags & 1) == 1;
|
||||
let base = (e.base_address_hi as u64) << 32 | e.base_address_lo as u64;
|
||||
let length = (e.length_hi as u64) << 32 | e.length_lo as u64;
|
||||
if mem_count < MAX_MEM_ENTRIES { mem_entries[mem_count] = SratMemEntry { node: e.proximity_domain as u8, base, length, enabled }; mem_count += 1; }
|
||||
}
|
||||
0x2 if entry_len >= size_of::<SratLocalX2Apic>() + 2 => {
|
||||
let e = unsafe { &*(entry_data as *const SratLocalX2Apic) };
|
||||
let enabled = (e.flags & 1) == 1;
|
||||
if cpu_count < MAX_CPU_ENTRIES { cpu_entries[cpu_count] = SratCpuEntry { apic_id: e.x2apic_id, node: e.proximity_domain as u8, enabled }; cpu_count += 1; }
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
offset += entry_len;
|
||||
}
|
||||
unsafe { SRAT_CPU_COUNT = cpu_count; SRAT_MEM_COUNT = mem_count; SRAT_AVAILABLE = true; }
|
||||
debug!("SRAT: {} CPU entries, {} memory entries", cpu_count, mem_count);
|
||||
}
|
||||
@@ -7,40 +7,26 @@ mod linked_list;
|
||||
/// Size of kernel heap
|
||||
const KERNEL_HEAP_SIZE: usize = ::rmm::MEGABYTE;
|
||||
|
||||
#[cold]
|
||||
fn halt_kernel_heap_init(message: &str) -> ! {
|
||||
print!("{message}");
|
||||
println!("Kernel heap initialization cannot continue. Halting.");
|
||||
loop {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn map_heap(mapper: &mut KernelMapper<true>, offset: usize, size: usize) {
|
||||
let mut flush_all = PageFlushAll::new();
|
||||
|
||||
let heap_start_page = Page::containing_address(VirtualAddress::new(offset));
|
||||
let heap_end_page = Page::containing_address(VirtualAddress::new(offset + size - 1));
|
||||
for page in Page::range_inclusive(heap_start_page, heap_end_page) {
|
||||
let phys = match mapper.allocator_mut().allocate_one() {
|
||||
Some(phys) => phys,
|
||||
None => halt_kernel_heap_init(
|
||||
"FATAL: failed to allocate physical frame for kernel heap\n",
|
||||
),
|
||||
};
|
||||
let phys = mapper
|
||||
.allocator_mut()
|
||||
.allocate_one()
|
||||
.expect("failed to allocate kernel heap");
|
||||
let flush = unsafe {
|
||||
match mapper.map_phys(
|
||||
page.start_address(),
|
||||
phys,
|
||||
PageFlags::new()
|
||||
.write(true)
|
||||
.global(cfg!(not(feature = "pti"))),
|
||||
) {
|
||||
Some(flush) => flush,
|
||||
None => halt_kernel_heap_init(
|
||||
"FATAL: failed to map kernel heap virtual page\n",
|
||||
),
|
||||
}
|
||||
mapper
|
||||
.map_phys(
|
||||
page.start_address(),
|
||||
phys,
|
||||
PageFlags::new()
|
||||
.write(true)
|
||||
.global(cfg!(not(feature = "pti"))),
|
||||
)
|
||||
.expect("failed to map kernel heap")
|
||||
};
|
||||
flush_all.consume(flush);
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs) -> ! {
|
||||
dtb::serial::init_early(dtb);
|
||||
}
|
||||
|
||||
info!("RedBear OS starting...");
|
||||
info!("Redox OS starting...");
|
||||
args.print();
|
||||
|
||||
// Initialize RMM
|
||||
|
||||
@@ -97,7 +97,7 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs) -> ! {
|
||||
init_early(dtb);
|
||||
}
|
||||
|
||||
info!("RedBear OS starting...");
|
||||
info!("Redox OS starting...");
|
||||
args.print();
|
||||
|
||||
if let Some(dtb) = &dtb {
|
||||
|
||||
@@ -14,10 +14,6 @@ pub struct IoApicRegs {
|
||||
pointer: *const u32,
|
||||
}
|
||||
impl IoApicRegs {
|
||||
fn redirection_index_valid(&mut self, idx: u8) -> bool {
|
||||
idx <= self.max_redirection_table_entries()
|
||||
}
|
||||
|
||||
fn ioregsel(&self) -> *const u32 {
|
||||
self.pointer
|
||||
}
|
||||
@@ -48,28 +44,21 @@ impl IoApicRegs {
|
||||
pub fn read_ioapicver(&mut self) -> u32 {
|
||||
self.read_reg(0x01)
|
||||
}
|
||||
pub fn read_ioredtbl(&mut self, idx: u8) -> Option<u64> {
|
||||
if !self.redirection_index_valid(idx) {
|
||||
warn!("IOAPIC read_ioredtbl index {} out of range", idx);
|
||||
return None;
|
||||
}
|
||||
pub fn read_ioredtbl(&mut self, idx: u8) -> u64 {
|
||||
assert!(idx < 24);
|
||||
let lo = self.read_reg(0x10 + idx * 2);
|
||||
let hi = self.read_reg(0x10 + idx * 2 + 1);
|
||||
|
||||
Some(u64::from(lo) | (u64::from(hi) << 32))
|
||||
u64::from(lo) | (u64::from(hi) << 32)
|
||||
}
|
||||
pub fn write_ioredtbl(&mut self, idx: u8, value: u64) -> bool {
|
||||
if !self.redirection_index_valid(idx) {
|
||||
warn!("IOAPIC write_ioredtbl index {} out of range", idx);
|
||||
return false;
|
||||
}
|
||||
pub fn write_ioredtbl(&mut self, idx: u8, value: u64) {
|
||||
assert!(idx < 24);
|
||||
|
||||
let lo = value as u32;
|
||||
let hi = (value >> 32) as u32;
|
||||
|
||||
self.write_reg(0x10 + idx * 2, lo);
|
||||
self.write_reg(0x10 + idx * 2 + 1, hi);
|
||||
true
|
||||
}
|
||||
|
||||
pub fn max_redirection_table_entries(&mut self) -> u8 {
|
||||
@@ -103,37 +92,17 @@ impl IoApic {
|
||||
}
|
||||
/// Map an interrupt vector to a physical local APIC ID of a processor (thus physical mode).
|
||||
#[allow(dead_code)]
|
||||
pub fn map(&self, idx: u8, info: MapInfo) -> bool {
|
||||
let Some(raw) = info.as_raw() else {
|
||||
return false;
|
||||
};
|
||||
self.regs.lock().write_ioredtbl(idx, raw)
|
||||
pub fn map(&self, idx: u8, info: MapInfo) {
|
||||
self.regs.lock().write_ioredtbl(idx, info.as_raw())
|
||||
}
|
||||
pub fn set_mask(&self, gsi: u32, mask: bool) {
|
||||
let idx = (gsi - self.gsi_start) as u8;
|
||||
let mut guard = self.regs.lock();
|
||||
|
||||
let Some(mut reg) = guard.read_ioredtbl(idx) else {
|
||||
return;
|
||||
};
|
||||
let mut reg = guard.read_ioredtbl(idx);
|
||||
reg &= !(1 << 16);
|
||||
reg |= u64::from(mask) << 16;
|
||||
let _ = guard.write_ioredtbl(idx, reg);
|
||||
}
|
||||
/// Change the destination APIC for a GSI by reprogramming the redirection table entry.
|
||||
/// Preserves all other fields (vector, polarity, trigger mode, delivery mode, mask).
|
||||
/// Returns true if the entry was successfully updated.
|
||||
pub fn set_irq_affinity(&self, gsi: u32, dest: ApicId) -> bool {
|
||||
let idx = (gsi - self.gsi_start) as u8;
|
||||
let mut guard = self.regs.lock();
|
||||
let Some(mut entry) = guard.read_ioredtbl(idx) else {
|
||||
return false;
|
||||
};
|
||||
// Clear destination field (bits 63:56 for xAPIC physical mode)
|
||||
// and set new destination APIC ID
|
||||
entry &= !(0xFF_u64 << 56);
|
||||
entry |= u64::from(dest.get()) << 56;
|
||||
guard.write_ioredtbl(idx, entry)
|
||||
guard.write_ioredtbl(idx, reg);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -180,26 +149,19 @@ pub struct MapInfo {
|
||||
}
|
||||
|
||||
impl MapInfo {
|
||||
pub fn as_raw(&self) -> Option<u64> {
|
||||
if !(0x20..=0xFE).contains(&self.vector) {
|
||||
warn!(
|
||||
"Refusing to map IOAPIC vector outside valid range: {:#x}",
|
||||
self.vector
|
||||
);
|
||||
return None;
|
||||
}
|
||||
pub fn as_raw(&self) -> u64 {
|
||||
assert!(self.vector >= 0x20);
|
||||
assert!(self.vector <= 0xFE);
|
||||
|
||||
// TODO: Check for reserved fields.
|
||||
|
||||
Some(
|
||||
(u64::from(self.dest.get()) << 56)
|
||||
(u64::from(self.dest.get()) << 56)
|
||||
| (u64::from(self.mask) << 16)
|
||||
| ((self.trigger_mode as u64) << 15)
|
||||
| ((self.polarity as u64) << 13)
|
||||
| ((self.dest_mode as u64) << 11)
|
||||
| ((self.delivery_mode as u64) << 8)
|
||||
| u64::from(self.vector),
|
||||
)
|
||||
| u64::from(self.vector)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -213,7 +175,7 @@ impl fmt::Debug for IoApic {
|
||||
|
||||
let count = guard.max_redirection_table_entries();
|
||||
f.debug_list()
|
||||
.entries((0..=count).filter_map(|i| guard.read_ioredtbl(i)))
|
||||
.entries((0..count).map(|i| guard.read_ioredtbl(i)))
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
@@ -275,14 +237,11 @@ pub unsafe fn handle_ioapic(madt_ioapic: &'static MadtIoApic) {
|
||||
let ioapic_registers = virt.data() as *const u32;
|
||||
let ioapic = IoApic::new(ioapic_registers, madt_ioapic.gsi_base);
|
||||
|
||||
let detected_id = ioapic.regs.lock().id();
|
||||
if detected_id != madt_ioapic.id {
|
||||
warn!(
|
||||
"mismatched ACPI MADT I/O APIC ID: MADT={}, IOAPIC={}; continuing with detected hardware",
|
||||
madt_ioapic.id,
|
||||
detected_id
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
ioapic.regs.lock().id(),
|
||||
madt_ioapic.id,
|
||||
"mismatched ACPI MADT I/O APIC ID, and the ID reported by the I/O APIC"
|
||||
);
|
||||
|
||||
(*IOAPICS.get()).get_or_insert_with(Vec::new).push(ioapic);
|
||||
}
|
||||
@@ -351,11 +310,11 @@ pub unsafe fn init() {
|
||||
}
|
||||
}
|
||||
}
|
||||
for ioapic in ioapics() {
|
||||
for idx in 0..=ioapic.count {
|
||||
ioapic.set_mask(ioapic.gsi_start + u32::from(idx), true);
|
||||
}
|
||||
}
|
||||
println!(
|
||||
"I/O APICs: {:?}, overrides: {:?}",
|
||||
ioapics(),
|
||||
src_overrides()
|
||||
);
|
||||
|
||||
// map the legacy PC-compatible IRQs (0-15) to 32-47, just like we did with 8259 PIC (if it
|
||||
// wouldn't have been disabled due to this I/O APIC)
|
||||
@@ -370,6 +329,7 @@ pub unsafe fn init() {
|
||||
.iter()
|
||||
.any(|over| over.bus_irq == legacy_irq)
|
||||
{
|
||||
// there's an IRQ conflict, making this legacy IRQ inaccessible.
|
||||
continue;
|
||||
}
|
||||
(
|
||||
@@ -389,6 +349,7 @@ pub unsafe fn init() {
|
||||
let redir_tbl_index = (gsi - apic.gsi_start) as u8;
|
||||
|
||||
let map_info = MapInfo {
|
||||
// only send to the BSP
|
||||
dest: bsp_apic_id,
|
||||
dest_mode: DestinationMode::Physical,
|
||||
delivery_mode: DeliveryMode::Fixed,
|
||||
@@ -405,32 +366,7 @@ pub unsafe fn init() {
|
||||
},
|
||||
vector: 32 + legacy_irq,
|
||||
};
|
||||
if !apic.map(redir_tbl_index, map_info) {
|
||||
warn!(
|
||||
"Unable to map legacy IRQ {} (GSI {}) through IOAPIC index {}",
|
||||
legacy_irq,
|
||||
gsi,
|
||||
redir_tbl_index
|
||||
);
|
||||
}
|
||||
|
||||
if legacy_irq == 0 && gsi != u32::from(legacy_irq) {
|
||||
if let Some(apic0) = find_ioapic(u32::from(legacy_irq)) {
|
||||
let idx0 = (u32::from(legacy_irq) - apic0.gsi_start) as u8;
|
||||
let _ = apic0.map(
|
||||
idx0,
|
||||
MapInfo {
|
||||
dest: bsp_apic_id,
|
||||
dest_mode: DestinationMode::Physical,
|
||||
delivery_mode: DeliveryMode::Fixed,
|
||||
mask: false,
|
||||
polarity: ApicPolarity::ActiveHigh,
|
||||
trigger_mode: ApicTriggerMode::Edge,
|
||||
vector: 32,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
apic.map(redir_tbl_index, map_info);
|
||||
}
|
||||
println!(
|
||||
"I/O APICs: {:?}, overrides: {:?}",
|
||||
@@ -470,7 +406,7 @@ fn resolve(irq: u8) -> u32 {
|
||||
fn find_ioapic(gsi: u32) -> Option<&'static IoApic> {
|
||||
ioapics()
|
||||
.iter()
|
||||
.find(|apic| gsi >= apic.gsi_start && gsi <= apic.gsi_start + u32::from(apic.count))
|
||||
.find(|apic| gsi >= apic.gsi_start && gsi < apic.gsi_start + u32::from(apic.count))
|
||||
}
|
||||
|
||||
pub unsafe fn mask(irq: u8) {
|
||||
@@ -489,14 +425,3 @@ pub unsafe fn unmask(irq: u8) {
|
||||
};
|
||||
apic.set_mask(gsi, false);
|
||||
}
|
||||
|
||||
/// Change the destination CPU for an IRQ by reprogramming the IOAPIC redirection entry.
|
||||
/// Resolves the legacy IRQ to its GSI, finds the owning IOAPIC, and updates the destination
|
||||
/// APIC ID in the redirection table while preserving all other fields.
|
||||
pub unsafe fn set_affinity(irq: u8, dest: ApicId) -> bool {
|
||||
let gsi = resolve(irq);
|
||||
match find_ioapic(gsi) {
|
||||
Some(apic) => apic.set_irq_affinity(gsi, dest),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,312 @@
|
||||
use core::{
|
||||
cell::SyncUnsafeCell,
|
||||
ptr::{read_volatile, write_volatile},
|
||||
};
|
||||
use x86::msr::*;
|
||||
|
||||
use crate::{
|
||||
arch::{cpuid::cpuid, ipi::IpiKind},
|
||||
memory::{map_device_memory, PhysicalAddress},
|
||||
percpu::PercpuBlock,
|
||||
};
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct ApicId(u32);
|
||||
|
||||
impl ApicId {
|
||||
pub fn new(inner: u32) -> Self {
|
||||
Self(inner)
|
||||
}
|
||||
|
||||
pub fn get(&self) -> u32 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
static LOCAL_APIC: SyncUnsafeCell<LocalApic> = SyncUnsafeCell::new(LocalApic {
|
||||
address: 0,
|
||||
x2: false,
|
||||
});
|
||||
pub unsafe fn the_local_apic() -> &'static mut LocalApic {
|
||||
unsafe { &mut *LOCAL_APIC.get() }
|
||||
}
|
||||
|
||||
pub unsafe fn init() {
|
||||
unsafe {
|
||||
the_local_apic().init();
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn init_ap() {
|
||||
unsafe {
|
||||
the_local_apic().init_ap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Local APIC
|
||||
pub struct LocalApic {
|
||||
pub address: usize,
|
||||
pub x2: bool,
|
||||
}
|
||||
|
||||
impl LocalApic {
|
||||
unsafe fn init(&mut self) {
|
||||
unsafe {
|
||||
let physaddr = PhysicalAddress::new(rdmsr(IA32_APIC_BASE) as usize & 0xFFFF_0000);
|
||||
|
||||
self.x2 = cpuid()
|
||||
.get_feature_info()
|
||||
.is_some_and(|feature_info| feature_info.has_x2apic());
|
||||
|
||||
if !self.x2 {
|
||||
info!("Detected xAPIC at {:#x}", physaddr.data());
|
||||
self.address = map_device_memory(physaddr, 4096).data();
|
||||
} else {
|
||||
info!("Detected x2APIC");
|
||||
}
|
||||
|
||||
self.init_ap();
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn init_ap(&mut self) {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
wrmsr(IA32_APIC_BASE, rdmsr(IA32_APIC_BASE) | (1 << 10));
|
||||
wrmsr(IA32_X2APIC_SIVR, 0x100);
|
||||
} else {
|
||||
self.write(0xF0, 0x100);
|
||||
}
|
||||
self.setup_error_int();
|
||||
//self.setup_timer();
|
||||
|
||||
PercpuBlock::current()
|
||||
.misc_arch_info
|
||||
.apic_id_opt
|
||||
.set(Some(self.id()));
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn read(&self, reg: u32) -> u32 {
|
||||
debug_assert!(!self.x2);
|
||||
unsafe { read_volatile((self.address + reg as usize) as *const u32) }
|
||||
}
|
||||
|
||||
unsafe fn write(&mut self, reg: u32, value: u32) {
|
||||
debug_assert!(!self.x2);
|
||||
unsafe {
|
||||
write_volatile((self.address + reg as usize) as *mut u32, value);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn id(&self) -> ApicId {
|
||||
ApicId::new(if self.x2 {
|
||||
unsafe { rdmsr(IA32_X2APIC_APICID) as u32 }
|
||||
} else {
|
||||
unsafe { self.read(0x20) >> 24 }
|
||||
})
|
||||
}
|
||||
|
||||
pub fn version(&self) -> u32 {
|
||||
if self.x2 {
|
||||
unsafe { rdmsr(IA32_X2APIC_VERSION) as u32 }
|
||||
} else {
|
||||
unsafe { self.read(0x30) }
|
||||
}
|
||||
}
|
||||
|
||||
pub fn icr(&self) -> u64 {
|
||||
if self.x2 {
|
||||
unsafe { rdmsr(IA32_X2APIC_ICR) }
|
||||
} else {
|
||||
unsafe { ((self.read(0x310) as u64) << 32) | self.read(0x300) as u64 }
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_icr(&mut self, value: u64) {
|
||||
if self.x2 {
|
||||
unsafe {
|
||||
const PENDING: u32 = 1 << 12;
|
||||
while (rdmsr(IA32_X2APIC_ICR) as u32) & PENDING == PENDING {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
wrmsr(IA32_X2APIC_ICR, value);
|
||||
while (rdmsr(IA32_X2APIC_ICR) as u32) & PENDING == PENDING {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
unsafe {
|
||||
const PENDING: u32 = 1 << 12;
|
||||
while self.read(0x300) & PENDING == PENDING {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
self.write(0x310, (value >> 32) as u32);
|
||||
self.write(0x300, value as u32);
|
||||
while self.read(0x300) & PENDING == PENDING {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ipi(&mut self, apic_id: ApicId, kind: IpiKind) {
|
||||
let shift = if self.x2 { 32 } else { 56 };
|
||||
self.set_icr((u64::from(apic_id.get()) << shift) | 0x40 | kind as u64);
|
||||
}
|
||||
pub fn ipi_nmi(&mut self, apic_id: ApicId) {
|
||||
let shift = if self.x2 { 32 } else { 56 };
|
||||
self.set_icr((u64::from(apic_id.get()) << shift) | (1 << 14) | (0b100 << 8));
|
||||
}
|
||||
|
||||
pub unsafe fn eoi(&mut self) {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
wrmsr(IA32_X2APIC_EOI, 0);
|
||||
} else {
|
||||
self.write(0xB0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Reads the Error Status Register.
|
||||
pub unsafe fn esr(&mut self) -> u32 {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
// update the ESR to the current state of the local apic.
|
||||
wrmsr(IA32_X2APIC_ESR, 0);
|
||||
// read the updated value
|
||||
rdmsr(IA32_X2APIC_ESR) as u32
|
||||
} else {
|
||||
self.write(0x280, 0);
|
||||
self.read(0x280)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub unsafe fn lvt_timer(&mut self) -> u32 {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
rdmsr(IA32_X2APIC_LVT_TIMER) as u32
|
||||
} else {
|
||||
self.read(0x320)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub unsafe fn set_lvt_timer(&mut self, value: u32) {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
wrmsr(IA32_X2APIC_LVT_TIMER, u64::from(value));
|
||||
} else {
|
||||
self.write(0x320, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub unsafe fn init_count(&mut self) -> u32 {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
rdmsr(IA32_X2APIC_INIT_COUNT) as u32
|
||||
} else {
|
||||
self.read(0x380)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub unsafe fn set_init_count(&mut self, initial_count: u32) {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
wrmsr(IA32_X2APIC_INIT_COUNT, u64::from(initial_count));
|
||||
} else {
|
||||
self.write(0x380, initial_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub unsafe fn cur_count(&mut self) -> u32 {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
rdmsr(IA32_X2APIC_CUR_COUNT) as u32
|
||||
} else {
|
||||
self.read(0x390)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub unsafe fn div_conf(&mut self) -> u32 {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
rdmsr(IA32_X2APIC_DIV_CONF) as u32
|
||||
} else {
|
||||
self.read(0x3E0)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub unsafe fn set_div_conf(&mut self, div_conf: u32) {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
wrmsr(IA32_X2APIC_DIV_CONF, u64::from(div_conf));
|
||||
} else {
|
||||
self.write(0x3E0, div_conf);
|
||||
}
|
||||
}
|
||||
}
|
||||
pub unsafe fn lvt_error(&mut self) -> u32 {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
rdmsr(IA32_X2APIC_LVT_ERROR) as u32
|
||||
} else {
|
||||
self.read(0x370)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub unsafe fn set_lvt_error(&mut self, lvt_error: u32) {
|
||||
unsafe {
|
||||
if self.x2 {
|
||||
wrmsr(IA32_X2APIC_LVT_ERROR, u64::from(lvt_error));
|
||||
} else {
|
||||
self.write(0x370, lvt_error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn set_lvt_nmi(&mut self, pin: u8, flags: u16) {
|
||||
let polarity = match flags & 0b11 {
|
||||
0b11 => 1 << 13,
|
||||
_ => 0,
|
||||
};
|
||||
let trigger_mode = match (flags >> 2) & 0b11 {
|
||||
0b11 => 1 << 15,
|
||||
_ => 0,
|
||||
};
|
||||
let lvt_value = (0b100 << 8) | polarity | trigger_mode;
|
||||
|
||||
unsafe {
|
||||
match pin {
|
||||
0 => {
|
||||
if self.x2 {
|
||||
wrmsr(IA32_X2APIC_LVT_LINT0, u64::from(lvt_value));
|
||||
} else {
|
||||
self.write(0x350, lvt_value);
|
||||
}
|
||||
}
|
||||
1 => {
|
||||
if self.x2 {
|
||||
wrmsr(IA32_X2APIC_LVT_LINT1, u64::from(lvt_value));
|
||||
} else {
|
||||
self.write(0x360, lvt_value);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn setup_error_int(&mut self) {
|
||||
unsafe {
|
||||
let vector = 49u32;
|
||||
self.set_lvt_error(vector);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(u8)]
|
||||
pub enum LvtTimerMode {
|
||||
OneShot = 0b00,
|
||||
Periodic = 0b01,
|
||||
TscDeadline = 0b10,
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
--- src/arch/x86_shared/device/local_apic.rs
|
||||
+++ src/arch/x86_shared/device/local_apic.rs
|
||||
@@ -61,9 +61,9 @@
|
||||
|
||||
if !self.x2 {
|
||||
- info!("Detected xAPIC at {:#x}", physaddr.data());
|
||||
+ debug!("Detected xAPIC at {:#x}", physaddr.data());
|
||||
self.address = map_device_memory(physaddr, 4096).data();
|
||||
} else {
|
||||
- info!("Detected x2APIC");
|
||||
+ debug!("Detected x2APIC");
|
||||
}
|
||||
|
||||
|
||||
@@ -4,11 +4,9 @@ pub mod cpu;
|
||||
pub mod hpet;
|
||||
pub mod ioapic;
|
||||
pub mod local_apic;
|
||||
pub mod msi;
|
||||
pub mod pic;
|
||||
pub mod pit;
|
||||
pub mod serial;
|
||||
pub mod vector;
|
||||
#[cfg(feature = "system76_ec_debug")]
|
||||
pub mod system76_ec;
|
||||
|
||||
@@ -25,7 +23,8 @@ pub unsafe fn init() {
|
||||
}
|
||||
}
|
||||
pub unsafe fn init_after_acpi() {
|
||||
unsafe { ioapic::init() };
|
||||
// this will disable the IOAPIC if needed.
|
||||
//ioapic::init(mapper);
|
||||
}
|
||||
|
||||
unsafe fn init_hpet() -> bool {
|
||||
|
||||
@@ -1,183 +0,0 @@
|
||||
// MSI/MSI-X support for x86 — kernel-level message composition and validation
|
||||
// Cross-referenced from Linux 7.0: arch/x86/kernel/apic/msi.c (391 lines)
|
||||
|
||||
use crate::arch::device::local_apic::ApicId;
|
||||
|
||||
pub const MSI_ADDRESS_BASE: u64 = 0xFEE0_0000;
|
||||
pub const MSI_ADDRESS_MASK: u64 = 0xFEEF_F000;
|
||||
const MSI_DEST_MODE_LOGICAL: u64 = 1 << 2;
|
||||
const MSI_REDIRECTION_HINT: u64 = 1 << 3;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct MsiAddress {
|
||||
pub raw: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct MsiData {
|
||||
pub raw: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MsiMessage {
|
||||
pub address: MsiAddress,
|
||||
pub data: MsiData,
|
||||
}
|
||||
|
||||
impl MsiAddress {
|
||||
pub fn new(dest_apic_id: u8, redirection_hint: bool, dest_mode_logical: bool) -> Self {
|
||||
let mut addr = MSI_ADDRESS_BASE;
|
||||
addr |= u64::from(dest_apic_id) << 12;
|
||||
if redirection_hint {
|
||||
addr |= MSI_REDIRECTION_HINT;
|
||||
}
|
||||
if dest_mode_logical {
|
||||
addr |= MSI_DEST_MODE_LOGICAL;
|
||||
}
|
||||
Self { raw: addr }
|
||||
}
|
||||
|
||||
pub fn validate(addr: u64) -> bool {
|
||||
(addr & MSI_ADDRESS_MASK) == MSI_ADDRESS_BASE
|
||||
}
|
||||
|
||||
pub fn dest_apic_id(&self) -> u8 {
|
||||
((self.raw >> 12) & 0xFF) as u8
|
||||
}
|
||||
}
|
||||
|
||||
impl MsiData {
|
||||
pub fn new(vector: u8, delivery_mode: u8, trigger_mode: u8) -> Self {
|
||||
let mut data = u32::from(vector);
|
||||
data |= u32::from(delivery_mode) << 8;
|
||||
data |= u32::from(trigger_mode) << 15;
|
||||
Self { raw: data }
|
||||
}
|
||||
|
||||
pub fn vector(&self) -> u8 {
|
||||
(self.raw & 0xFF) as u8
|
||||
}
|
||||
|
||||
pub fn delivery_mode(&self) -> u8 {
|
||||
((self.raw >> 8) & 0x7) as u8
|
||||
}
|
||||
|
||||
pub fn trigger_mode(&self) -> u8 {
|
||||
((self.raw >> 15) & 0x1) as u8
|
||||
}
|
||||
}
|
||||
|
||||
impl MsiMessage {
|
||||
pub fn compose(dest: ApicId, vector: u8, delivery_mode: u8, trigger_mode: u8) -> Self {
|
||||
let address = MsiAddress::new(dest.get() as u8, false, false);
|
||||
let data = MsiData::new(vector, delivery_mode, trigger_mode);
|
||||
Self { address, data }
|
||||
}
|
||||
|
||||
pub fn validate(&self) -> bool {
|
||||
MsiAddress::validate(self.address.raw)
|
||||
&& self.data.vector() >= 32
|
||||
&& self.data.vector() < 255
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_valid_msi_address(addr: u64) -> bool {
|
||||
MsiAddress::validate(addr)
|
||||
}
|
||||
|
||||
pub fn is_valid_msi_vector(vector: u8) -> bool {
|
||||
vector >= 32 && vector < 255
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MsiCapability {
|
||||
pub msg_ctl: u16,
|
||||
pub msg_addr_lo: u32,
|
||||
pub msg_addr_hi: u32,
|
||||
pub msg_data: u16,
|
||||
pub mask_bits: u32,
|
||||
pub pending_bits: u32,
|
||||
pub is_64bit: bool,
|
||||
pub is_maskable: bool,
|
||||
pub multiple_message_capable: u8,
|
||||
}
|
||||
|
||||
impl MsiCapability {
|
||||
pub fn parse(raw: &[u32; 6], msg_ctl: u16) -> Self {
|
||||
Self {
|
||||
msg_ctl,
|
||||
msg_addr_lo: raw[1],
|
||||
msg_addr_hi: if msg_ctl & (1 << 7) != 0 { raw[2] } else { 0 },
|
||||
msg_data: if msg_ctl & (1 << 7) != 0 {
|
||||
(raw[3] & 0xFFFF) as u16
|
||||
} else {
|
||||
(raw[2] & 0xFFFF) as u16
|
||||
},
|
||||
mask_bits: if msg_ctl & (1 << 8) != 0 {
|
||||
if msg_ctl & (1 << 7) != 0 {
|
||||
raw[3] >> 16
|
||||
} else {
|
||||
raw[3]
|
||||
}
|
||||
} else {
|
||||
0
|
||||
},
|
||||
pending_bits: if msg_ctl & (1 << 8) != 0 { raw[4] } else { 0 },
|
||||
is_64bit: msg_ctl & (1 << 7) != 0,
|
||||
is_maskable: msg_ctl & (1 << 8) != 0,
|
||||
multiple_message_capable: ((msg_ctl >> 1) & 0x7) as u8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MsixCapability {
|
||||
pub msg_ctl: u16,
|
||||
pub table_offset: u32,
|
||||
pub table_bar: u8,
|
||||
pub pba_offset: u32,
|
||||
pub pba_bar: u8,
|
||||
pub table_size: u16,
|
||||
}
|
||||
|
||||
impl MsixCapability {
|
||||
pub fn parse(raw: &[u32; 3], msg_ctl: u16) -> Self {
|
||||
Self {
|
||||
msg_ctl,
|
||||
table_offset: raw[1] & !0x7,
|
||||
table_bar: (raw[1] & 0x7) as u8,
|
||||
pba_offset: raw[2] & !0x7,
|
||||
pba_bar: (raw[2] & 0x7) as u8,
|
||||
table_size: ((msg_ctl >> 1) & 0x7FF) as u16 + 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_compose_message() {
|
||||
let msg = MsiMessage::compose(ApicId::new(3), 48, 0b101, 1);
|
||||
assert!(msg.validate());
|
||||
assert_eq!(msg.address.dest_apic_id(), 3);
|
||||
assert_eq!(msg.data.vector(), 48);
|
||||
assert_eq!(msg.data.delivery_mode(), 0b101);
|
||||
assert_eq!(msg.data.trigger_mode(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_address() {
|
||||
assert!(!is_valid_msi_address(0xDEAD_BEEF));
|
||||
assert!(is_valid_msi_address(0xFEE0_0000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_msi_parse() {
|
||||
let raw = [0u32; 6];
|
||||
let cap = MsiCapability::parse(&raw, 0);
|
||||
assert!(!cap.is_64bit);
|
||||
assert!(!cap.is_maskable);
|
||||
}
|
||||
}
|
||||
@@ -1,53 +0,0 @@
|
||||
use crate::cpu_set::LogicalCpuId;
|
||||
|
||||
const VECTOR_COUNT: usize = 224;
|
||||
|
||||
static VECTORS: [core::sync::atomic::AtomicU32; 7] = [
|
||||
core::sync::atomic::AtomicU32::new(0),
|
||||
core::sync::atomic::AtomicU32::new(0),
|
||||
core::sync::atomic::AtomicU32::new(0),
|
||||
core::sync::atomic::AtomicU32::new(0),
|
||||
core::sync::atomic::AtomicU32::new(0),
|
||||
core::sync::atomic::AtomicU32::new(0),
|
||||
core::sync::atomic::AtomicU32::new(0),
|
||||
];
|
||||
|
||||
pub fn allocate_vector(_cpu: LogicalCpuId) -> Option<u8> {
|
||||
for (bank, slot) in VECTORS.iter().enumerate() {
|
||||
let mut bits = slot.load(core::sync::atomic::Ordering::Acquire);
|
||||
loop {
|
||||
let free = bits.trailing_ones() as usize;
|
||||
if free >= 32 {
|
||||
break;
|
||||
}
|
||||
let bit = 1u32 << free;
|
||||
match slot.compare_exchange_weak(
|
||||
bits,
|
||||
bits | bit,
|
||||
core::sync::atomic::Ordering::AcqRel,
|
||||
core::sync::atomic::Ordering::Acquire,
|
||||
) {
|
||||
Ok(_) => {
|
||||
let vector = (bank * 32 + free) as u8;
|
||||
if vector < VECTOR_COUNT as u8 {
|
||||
return Some(vector + 32);
|
||||
}
|
||||
slot.fetch_and(!bit, core::sync::atomic::Ordering::Release);
|
||||
return None;
|
||||
}
|
||||
Err(current) => bits = current,
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn free_vector(_cpu: LogicalCpuId, vector: u8) {
|
||||
if vector < 32 || (vector as usize) >= 32 + VECTOR_COUNT {
|
||||
return;
|
||||
}
|
||||
let idx = (vector - 32) as usize;
|
||||
let bank = idx / 32;
|
||||
let bit = 1u32 << (idx % 32);
|
||||
VECTORS[bank].fetch_and(!bit, core::sync::atomic::Ordering::Release);
|
||||
}
|
||||
@@ -192,15 +192,6 @@ impl ProcessorControlRegion {
|
||||
}
|
||||
}
|
||||
|
||||
#[cold]
|
||||
fn halt_pcr_init() -> ! {
|
||||
println!("FATAL: failed to allocate physical memory for Processor Control Region");
|
||||
println!("Processor startup cannot continue. Halting.");
|
||||
loop {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn pcr() -> *mut ProcessorControlRegion {
|
||||
unsafe {
|
||||
// Primitive benchmarking of RDFSBASE and RDGSBASE in userspace, appears to indicate that
|
||||
@@ -384,10 +375,7 @@ pub fn allocate_and_init_pcr(
|
||||
.next_power_of_two()
|
||||
.trailing_zeros();
|
||||
|
||||
let pcr_frame = match crate::memory::allocate_p2frame(alloc_order) {
|
||||
Some(frame) => frame,
|
||||
None => halt_pcr_init(),
|
||||
};
|
||||
let pcr_frame = crate::memory::allocate_p2frame(alloc_order).expect("failed to allocate PCR");
|
||||
let pcr_ptr = RmmA::phys_to_virt(pcr_frame.base()).data() as *mut ProcessorControlRegion;
|
||||
unsafe { core::ptr::write(pcr_ptr, ProcessorControlRegion::new_partial_init(cpu_id)) };
|
||||
|
||||
|
||||
@@ -78,15 +78,6 @@ static INIT_BSP_IDT: SyncUnsafeCell<Idt> = SyncUnsafeCell::new(Idt::new());
|
||||
pub(crate) static IDTS: RwLock<HashMap<LogicalCpuId, &'static mut Idt>> =
|
||||
RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new()));
|
||||
|
||||
#[cold]
|
||||
fn halt_idt_init() -> ! {
|
||||
println!("FATAL: failed to allocate physical pages for backup interrupt stack");
|
||||
println!("Interrupt setup cannot continue. Halting.");
|
||||
loop {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_reserved(cpu_id: LogicalCpuId, index: u8) -> bool {
|
||||
if cpu_id == LogicalCpuId::BSP {
|
||||
@@ -110,8 +101,6 @@ pub fn set_reserved(cpu_id: LogicalCpuId, index: u8, reserved: bool) {
|
||||
}
|
||||
|
||||
pub fn available_irqs_iter(cpu_id: LogicalCpuId) -> impl Iterator<Item = u8> + 'static {
|
||||
let count = (32..=254).filter(|&index| !is_reserved(cpu_id, index)).count();
|
||||
info!("available_irqs_iter: cpu_id={} count={}", cpu_id.get(), count);
|
||||
(32..=254).filter(move |&index| !is_reserved(cpu_id, index))
|
||||
}
|
||||
|
||||
@@ -172,10 +161,8 @@ pub fn allocate_and_init_idt(cpu_id: LogicalCpuId) -> *mut Idt {
|
||||
.or_insert_with(|| Box::leak(Box::new(Idt::new())));
|
||||
|
||||
use crate::memory::{RmmA, RmmArch};
|
||||
let frames = match crate::memory::allocate_p2frame(4) {
|
||||
Some(frames) => frames,
|
||||
None => halt_idt_init(),
|
||||
};
|
||||
let frames = crate::memory::allocate_p2frame(4)
|
||||
.expect("failed to allocate pages for backup interrupt stack");
|
||||
|
||||
// Physical pages are mapped linearly. So is the linearly mapped virtual memory.
|
||||
let base_address = RmmA::phys_to_virt(frames.base());
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use core::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
use syscall::Exception;
|
||||
use x86::irq::PageFaultError;
|
||||
|
||||
@@ -12,22 +10,6 @@ use crate::{
|
||||
syscall::flag::*,
|
||||
};
|
||||
|
||||
static NMI_IN_PROGRESS: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
unsafe fn nmi_raw_serial_write(bytes: &[u8]) {
|
||||
use crate::syscall::io::{Io, Pio};
|
||||
|
||||
let mut com1 = Pio::<u8>::new(0x3F8);
|
||||
let lsr = Pio::<u8>::new(0x3F8 + 5);
|
||||
|
||||
for &byte in bytes {
|
||||
while lsr.read() & (1 << 5) == 0 {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
com1.write(byte);
|
||||
}
|
||||
}
|
||||
|
||||
interrupt_stack!(divide_by_zero, |stack| {
|
||||
println!("Divide by zero");
|
||||
stack.trace();
|
||||
@@ -73,35 +55,9 @@ interrupt_stack!(non_maskable, @paranoid, |stack| {
|
||||
|
||||
#[cfg(not(all(target_arch = "x86_64", feature = "profiling")))]
|
||||
{
|
||||
if NMI_IN_PROGRESS.swap(true, Ordering::SeqCst) {
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
nmi_raw_serial_write(b"Non-maskable interrupt\n");
|
||||
nmi_raw_serial_write(b" RIP: ");
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
let instruction_pointer = u64::from(stack.iret.eip);
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
let instruction_pointer = stack.iret.rip;
|
||||
|
||||
let mut buf = [0u8; 19];
|
||||
buf[0] = b'0';
|
||||
buf[1] = b'x';
|
||||
for i in 0..16 {
|
||||
let nibble = ((instruction_pointer >> (60 - i * 4)) & 0xF) as u8;
|
||||
buf[2 + i] = if nibble < 10 {
|
||||
b'0' + nibble
|
||||
} else {
|
||||
b'a' + nibble - 10
|
||||
};
|
||||
}
|
||||
buf[18] = b'\n';
|
||||
nmi_raw_serial_write(&buf);
|
||||
}
|
||||
|
||||
NMI_IN_PROGRESS.store(false, Ordering::SeqCst);
|
||||
// TODO: This will likely deadlock
|
||||
println!("Non-maskable interrupt");
|
||||
stack.dump();
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -28,8 +28,6 @@ pub mod pti;
|
||||
/// Initialization and start function
|
||||
pub mod start;
|
||||
|
||||
pub mod sleep;
|
||||
|
||||
/// Stop function
|
||||
pub mod stop;
|
||||
|
||||
|
||||
@@ -1,712 +0,0 @@
|
||||
use alloc::{sync::Arc, vec::Vec};
|
||||
use core::{
|
||||
ptr::NonNull,
|
||||
str::FromStr,
|
||||
sync::atomic::{AtomicU32, Ordering},
|
||||
};
|
||||
|
||||
use acpi_ext::{
|
||||
aml::{namespace::AmlName, object::Object, Interpreter},
|
||||
registers::FixedRegisters,
|
||||
sdt::{facs::Facs, fadt::Fadt, SdtHeader},
|
||||
AcpiTables, Handle, Handler, PhysicalMapping,
|
||||
};
|
||||
use spin::Mutex;
|
||||
use syscall::error::{Error, EINVAL, EIO};
|
||||
use x86::{segmentation::SegmentSelector, task, Ring};
|
||||
|
||||
use crate::{
|
||||
acpi::ACPI_ROOT_INFO,
|
||||
arch::interrupt,
|
||||
memory::{
|
||||
round_down_pages, round_up_pages, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA,
|
||||
RmmArch, VirtualAddress, PAGE_SIZE,
|
||||
},
|
||||
syscall::io::{Io, Pio},
|
||||
};
|
||||
|
||||
const ACPI_SLP_TYP_SHIFT: u16 = 10;
|
||||
const ACPI_SLP_TYP_MASK: u16 = 0x1C00;
|
||||
const ACPI_SLP_EN: u16 = 1 << 13;
|
||||
const WAKE_TRAMPOLINE_PHYS: usize = 0x8000;
|
||||
const SLEEP_RETURN_OK: usize = 0;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
static WAKE_TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/s3_wakeup"));
|
||||
|
||||
#[repr(C, packed)]
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
struct DescriptorTableRegister {
|
||||
limit: u16,
|
||||
base: u64,
|
||||
}
|
||||
|
||||
#[repr(C, align(64))]
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct FpuState {
|
||||
bytes: [u8; 4096],
|
||||
}
|
||||
|
||||
impl Default for FpuState {
|
||||
fn default() -> Self {
|
||||
Self { bytes: [0; 4096] }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum SleepState {
|
||||
S3,
|
||||
S5,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum SleepError {
|
||||
UnsupportedArch,
|
||||
MissingAcpi,
|
||||
MissingFadt,
|
||||
MissingFacs,
|
||||
MissingSleepObject,
|
||||
InvalidSleepObject,
|
||||
UnsupportedPmControl,
|
||||
UnsupportedAmlOperation,
|
||||
SleepDidNotEnter,
|
||||
}
|
||||
|
||||
impl SleepError {
|
||||
fn code(self) -> usize {
|
||||
match self {
|
||||
Self::UnsupportedArch => EINVAL as usize,
|
||||
Self::MissingAcpi
|
||||
| Self::MissingFadt
|
||||
| Self::MissingFacs
|
||||
| Self::MissingSleepObject
|
||||
| Self::UnsupportedAmlOperation => EIO as usize,
|
||||
Self::InvalidSleepObject | Self::UnsupportedPmControl | Self::SleepDidNotEnter => {
|
||||
EINVAL as usize
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn from_code(code: usize) -> Self {
|
||||
match code as i32 {
|
||||
x if x == EINVAL => Self::InvalidSleepObject,
|
||||
_ => Self::MissingAcpi,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
struct SavedCpuContext {
|
||||
entry_rsp: usize,
|
||||
runtime_rsp: usize,
|
||||
facs_address: usize,
|
||||
cr0: usize,
|
||||
cr2: usize,
|
||||
cr3: usize,
|
||||
cr4: usize,
|
||||
rflags: usize,
|
||||
gdtr: DescriptorTableRegister,
|
||||
idtr: DescriptorTableRegister,
|
||||
efer: u64,
|
||||
fs_base: u64,
|
||||
gs_base: u64,
|
||||
kernel_gs_base: u64,
|
||||
fpu: FpuState,
|
||||
}
|
||||
|
||||
static SAVED_CONTEXT: Mutex<Option<SavedCpuContext>> = Mutex::new(None);
|
||||
static AML_MUTEX_IDS: AtomicU32 = AtomicU32::new(1);
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct SleepTypeData {
|
||||
a: u16,
|
||||
b: u16,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct KernelAcpiHandler;
|
||||
|
||||
impl KernelAcpiHandler {
|
||||
fn map_range(physical_address: usize, size: usize) -> (*mut u8, usize) {
|
||||
let map_base = round_down_pages(physical_address);
|
||||
let map_offset = physical_address - map_base;
|
||||
let mapped_length = round_up_pages(size + map_offset);
|
||||
|
||||
// SAFETY: The ACPI interpreter only requests firmware-described physical regions.
|
||||
unsafe {
|
||||
let mut mapper = KernelMapper::lock_rw();
|
||||
for page_index in 0..mapped_length / PAGE_SIZE {
|
||||
let (_, flush) = mapper
|
||||
.map_linearly(
|
||||
PhysicalAddress::new(map_base + page_index * PAGE_SIZE),
|
||||
PageFlags::new(),
|
||||
)
|
||||
.expect("failed to linearly map ACPI physical region");
|
||||
flush.flush();
|
||||
}
|
||||
}
|
||||
|
||||
let virtual_base = RmmA::phys_to_virt(PhysicalAddress::new(map_base)).data();
|
||||
((virtual_base + map_offset) as *mut u8, mapped_length)
|
||||
}
|
||||
}
|
||||
|
||||
impl Handler for KernelAcpiHandler {
|
||||
unsafe fn map_physical_region<T>(&self, physical_address: usize, size: usize) -> PhysicalMapping<Self, T> {
|
||||
let (virtual_start, mapped_length) = Self::map_range(physical_address, size);
|
||||
PhysicalMapping {
|
||||
physical_start: physical_address,
|
||||
virtual_start: NonNull::new(virtual_start.cast::<T>())
|
||||
.expect("expected mapped ACPI virtual address to be non-null"),
|
||||
region_length: size,
|
||||
mapped_length,
|
||||
handler: *self,
|
||||
}
|
||||
}
|
||||
|
||||
fn unmap_physical_region<T>(_region: &PhysicalMapping<Self, T>) {}
|
||||
|
||||
fn read_u8(&self, address: usize) -> u8 {
|
||||
// SAFETY: AML system-memory accesses are byte-addressable firmware regions.
|
||||
unsafe { core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u8) }
|
||||
}
|
||||
|
||||
fn read_u16(&self, address: usize) -> u16 {
|
||||
// SAFETY: AML system-memory accesses are word-addressable firmware regions.
|
||||
unsafe {
|
||||
core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u16)
|
||||
}
|
||||
}
|
||||
|
||||
fn read_u32(&self, address: usize) -> u32 {
|
||||
// SAFETY: AML system-memory accesses are dword-addressable firmware regions.
|
||||
unsafe {
|
||||
core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u32)
|
||||
}
|
||||
}
|
||||
|
||||
fn read_u64(&self, address: usize) -> u64 {
|
||||
// SAFETY: AML system-memory accesses are qword-addressable firmware regions.
|
||||
unsafe {
|
||||
core::ptr::read_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *const u64)
|
||||
}
|
||||
}
|
||||
|
||||
fn write_u8(&self, address: usize, value: u8) {
|
||||
// SAFETY: AML system-memory accesses are byte-addressable firmware regions.
|
||||
unsafe {
|
||||
core::ptr::write_volatile(RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u8, value)
|
||||
}
|
||||
}
|
||||
|
||||
fn write_u16(&self, address: usize, value: u16) {
|
||||
// SAFETY: AML system-memory accesses are word-addressable firmware regions.
|
||||
unsafe {
|
||||
core::ptr::write_volatile(
|
||||
RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u16,
|
||||
value,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn write_u32(&self, address: usize, value: u32) {
|
||||
// SAFETY: AML system-memory accesses are dword-addressable firmware regions.
|
||||
unsafe {
|
||||
core::ptr::write_volatile(
|
||||
RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u32,
|
||||
value,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn write_u64(&self, address: usize, value: u64) {
|
||||
// SAFETY: AML system-memory accesses are qword-addressable firmware regions.
|
||||
unsafe {
|
||||
core::ptr::write_volatile(
|
||||
RmmA::phys_to_virt(PhysicalAddress::new(address)).data() as *mut u64,
|
||||
value,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn read_io_u8(&self, port: u16) -> u8 {
|
||||
Pio::<u8>::new(port).read()
|
||||
}
|
||||
|
||||
fn read_io_u16(&self, port: u16) -> u16 {
|
||||
Pio::<u16>::new(port).read()
|
||||
}
|
||||
|
||||
fn read_io_u32(&self, port: u16) -> u32 {
|
||||
Pio::<u32>::new(port).read()
|
||||
}
|
||||
|
||||
fn write_io_u8(&self, port: u16, value: u8) {
|
||||
Pio::<u8>::new(port).write(value)
|
||||
}
|
||||
|
||||
fn write_io_u16(&self, port: u16, value: u16) {
|
||||
Pio::<u16>::new(port).write(value)
|
||||
}
|
||||
|
||||
fn write_io_u32(&self, port: u16, value: u32) {
|
||||
Pio::<u32>::new(port).write(value)
|
||||
}
|
||||
|
||||
fn read_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u8 {
|
||||
0
|
||||
}
|
||||
|
||||
fn read_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u16 {
|
||||
0
|
||||
}
|
||||
|
||||
fn read_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16) -> u32 {
|
||||
0
|
||||
}
|
||||
|
||||
fn write_pci_u8(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u8) {}
|
||||
|
||||
fn write_pci_u16(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u16) {}
|
||||
|
||||
fn write_pci_u32(&self, _address: acpi_ext::PciAddress, _offset: u16, _value: u32) {}
|
||||
|
||||
fn nanos_since_boot(&self) -> u64 {
|
||||
0
|
||||
}
|
||||
|
||||
fn stall(&self, microseconds: u64) {
|
||||
for _ in 0..(microseconds.saturating_mul(64)) {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
}
|
||||
|
||||
fn sleep(&self, milliseconds: u64) {
|
||||
for _ in 0..(milliseconds.saturating_mul(64_000)) {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
}
|
||||
|
||||
fn create_mutex(&self) -> Handle {
|
||||
Handle(AML_MUTEX_IDS.fetch_add(1, Ordering::Relaxed))
|
||||
}
|
||||
|
||||
fn acquire(&self, _mutex: Handle, _timeout: u16) -> Result<(), acpi_ext::aml::AmlError> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn release(&self, _mutex: Handle) {}
|
||||
}
|
||||
|
||||
fn sleep_state_name(state: SleepState) -> &'static str {
|
||||
match state {
|
||||
SleepState::S3 => "\\_S3",
|
||||
SleepState::S5 => "\\_S5",
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_sleep_type(value: u16) -> u16 {
|
||||
if value <= 0x7 {
|
||||
value << ACPI_SLP_TYP_SHIFT
|
||||
} else {
|
||||
value & ACPI_SLP_TYP_MASK
|
||||
}
|
||||
}
|
||||
|
||||
fn load_interpreter() -> Result<(
|
||||
Arc<FixedRegisters<KernelAcpiHandler>>,
|
||||
PhysicalMapping<KernelAcpiHandler, Facs>,
|
||||
Interpreter<KernelAcpiHandler>,
|
||||
), SleepError> {
|
||||
let root = *ACPI_ROOT_INFO.get().ok_or(SleepError::MissingAcpi)?;
|
||||
let handler = KernelAcpiHandler;
|
||||
|
||||
// SAFETY: ACPI root info is captured from the firmware-provided, already validated root table.
|
||||
let tables = unsafe {
|
||||
AcpiTables::from_rsdt(handler, root.revision, root.root_sdt_address.data())
|
||||
.map_err(|_| SleepError::MissingAcpi)?
|
||||
};
|
||||
let fadt = tables.find_table::<Fadt>().ok_or(SleepError::MissingFadt)?;
|
||||
let registers = Arc::new(
|
||||
FixedRegisters::new(&fadt, handler).map_err(|_| SleepError::UnsupportedPmControl)?,
|
||||
);
|
||||
let facs_address = fadt.facs_address().map_err(|_| SleepError::MissingFacs)?;
|
||||
|
||||
// SAFETY: The FADT-supplied FACS address is used exactly as described by the ACPI spec.
|
||||
let facs = unsafe { handler.map_physical_region::<Facs>(facs_address, core::mem::size_of::<Facs>()) };
|
||||
// SAFETY: The AML interpreter only needs an owned mapping of the same firmware FACS table.
|
||||
let interpreter_facs = unsafe {
|
||||
handler.map_physical_region::<Facs>(facs_address, core::mem::size_of::<Facs>())
|
||||
};
|
||||
let dsdt = tables.dsdt().map_err(|_| SleepError::MissingFadt)?;
|
||||
let interpreter = Interpreter::new(handler, dsdt.revision, Arc::clone(®isters), Some(interpreter_facs));
|
||||
|
||||
// SAFETY: Each AML table mapping is owned by the interpreter during table loading.
|
||||
unsafe {
|
||||
let mapping = handler.map_physical_region::<SdtHeader>(dsdt.phys_address, dsdt.length as usize);
|
||||
let stream = core::slice::from_raw_parts(
|
||||
mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::<SdtHeader>()) as *const u8,
|
||||
dsdt.length as usize - core::mem::size_of::<SdtHeader>(),
|
||||
);
|
||||
interpreter
|
||||
.load_table(stream)
|
||||
.map_err(|_| SleepError::UnsupportedAmlOperation)?;
|
||||
|
||||
for ssdt in tables.ssdts() {
|
||||
let mapping = handler.map_physical_region::<SdtHeader>(ssdt.phys_address, ssdt.length as usize);
|
||||
let stream = core::slice::from_raw_parts(
|
||||
mapping.virtual_start.as_ptr().byte_add(core::mem::size_of::<SdtHeader>()) as *const u8,
|
||||
ssdt.length as usize - core::mem::size_of::<SdtHeader>(),
|
||||
);
|
||||
interpreter
|
||||
.load_table(stream)
|
||||
.map_err(|_| SleepError::UnsupportedAmlOperation)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok((registers, facs, interpreter))
|
||||
}
|
||||
|
||||
fn sleep_type_data_from_interpreter(
|
||||
interpreter: &Interpreter<KernelAcpiHandler>,
|
||||
state: SleepState,
|
||||
) -> Result<SleepTypeData, SleepError> {
|
||||
let name = AmlName::from_str(sleep_state_name(state)).map_err(|_| SleepError::MissingSleepObject)?;
|
||||
let object = interpreter
|
||||
.evaluate(name, Vec::new())
|
||||
.map_err(|_| SleepError::MissingSleepObject)?;
|
||||
|
||||
let Object::Package(package) = &*object else {
|
||||
return Err(SleepError::InvalidSleepObject);
|
||||
};
|
||||
|
||||
let Some(typa_object) = package.first() else {
|
||||
return Err(SleepError::InvalidSleepObject);
|
||||
};
|
||||
let Some(typb_object) = package.get(1) else {
|
||||
return Err(SleepError::InvalidSleepObject);
|
||||
};
|
||||
|
||||
let Object::Integer(typa) = &**typa_object else {
|
||||
return Err(SleepError::InvalidSleepObject);
|
||||
};
|
||||
let Object::Integer(typb) = &**typb_object else {
|
||||
return Err(SleepError::InvalidSleepObject);
|
||||
};
|
||||
|
||||
Ok(SleepTypeData {
|
||||
a: encode_sleep_type(*typa as u16),
|
||||
b: encode_sleep_type(*typb as u16),
|
||||
})
|
||||
}
|
||||
|
||||
fn sleep_type_data(state: SleepState) -> Result<SleepTypeData, SleepError> {
|
||||
let (_registers, _facs, interpreter) = load_interpreter()?;
|
||||
sleep_type_data_from_interpreter(&interpreter, state)
|
||||
}
|
||||
|
||||
fn install_wake_trampoline(stack_rsp: usize, cr3: usize) {
|
||||
let trampoline_page = Page::containing_address(VirtualAddress::new(WAKE_TRAMPOLINE_PHYS));
|
||||
let trampoline_frame = PhysicalAddress::new(WAKE_TRAMPOLINE_PHYS);
|
||||
|
||||
// SAFETY: The 0x8000 low-memory trampoline page is reserved by the kernel for bootstrap stubs.
|
||||
let (result, _) = unsafe {
|
||||
let mut mapper = KernelMapper::lock_rw();
|
||||
let result = mapper
|
||||
.map_phys(
|
||||
trampoline_page.start_address(),
|
||||
trampoline_frame,
|
||||
PageFlags::new().execute(true).write(true),
|
||||
)
|
||||
.expect("failed to map S3 wake trampoline page");
|
||||
(result, mapper.table().phys().data())
|
||||
};
|
||||
result.flush();
|
||||
|
||||
for (index, value) in WAKE_TRAMPOLINE_DATA.iter().enumerate() {
|
||||
// SAFETY: The trampoline page is mapped writable at the same virtual address as the physical page.
|
||||
unsafe {
|
||||
core::ptr::write_volatile((WAKE_TRAMPOLINE_PHYS as *mut u8).add(index), *value);
|
||||
}
|
||||
}
|
||||
|
||||
// SAFETY: The wake trampoline layout reserves three qword fields immediately after the jump.
|
||||
unsafe {
|
||||
let stack_slot = (WAKE_TRAMPOLINE_PHYS + 8) as *mut u64;
|
||||
let page_table_slot = stack_slot.add(1);
|
||||
let code_slot = stack_slot.add(2);
|
||||
stack_slot.write(stack_rsp as u64);
|
||||
page_table_slot.write(cr3 as u64);
|
||||
#[expect(clippy::fn_to_numeric_cast)]
|
||||
code_slot.write(resume_from_s3_trampoline as usize as u64);
|
||||
}
|
||||
|
||||
// SAFETY: The trampoline mapping is no longer needed once the physical page has been populated.
|
||||
let (_frame, _, flush) = unsafe {
|
||||
KernelMapper::lock_rw()
|
||||
.unmap_phys(trampoline_page.start_address())
|
||||
.expect("failed to unmap S3 wake trampoline page")
|
||||
};
|
||||
flush.flush();
|
||||
}
|
||||
|
||||
fn save_descriptor_tables(context: &mut SavedCpuContext) {
|
||||
// SAFETY: SGDT/SIDT only read the current CPU descriptor-table registers into the provided storage.
|
||||
unsafe {
|
||||
core::arch::asm!("sgdt [{}]", in(reg) &mut context.gdtr, options(nostack, preserves_flags));
|
||||
core::arch::asm!("sidt [{}]", in(reg) &mut context.idtr, options(nostack, preserves_flags));
|
||||
}
|
||||
}
|
||||
|
||||
fn save_fpu_state(context: &mut SavedCpuContext) {
|
||||
// SAFETY: The kernel owns the current CPU at suspend entry and the FXSAVE buffer is 64-byte aligned.
|
||||
unsafe {
|
||||
core::arch::asm!(
|
||||
"fxsave64 [{}]",
|
||||
in(reg) context.fpu.bytes.as_mut_ptr(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn restore_fpu_state(context: &SavedCpuContext) {
|
||||
// SAFETY: The saved FXSAVE image belongs to the same CPU context and matches the restore instruction.
|
||||
unsafe {
|
||||
core::arch::asm!(
|
||||
"fxrstor64 [{}]",
|
||||
in(reg) context.fpu.bytes.as_ptr(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn save_cpu_context(entry_rsp: usize) -> SavedCpuContext {
|
||||
let mut context = SavedCpuContext {
|
||||
entry_rsp,
|
||||
..SavedCpuContext::default()
|
||||
};
|
||||
|
||||
// SAFETY: Reading control registers and MSRs is required to reconstruct the CPU execution state on wake.
|
||||
unsafe {
|
||||
core::arch::asm!(
|
||||
"mov {}, cr0",
|
||||
out(reg) context.cr0,
|
||||
options(nostack, preserves_flags)
|
||||
);
|
||||
core::arch::asm!(
|
||||
"mov {}, cr2",
|
||||
out(reg) context.cr2,
|
||||
options(nostack, preserves_flags)
|
||||
);
|
||||
core::arch::asm!(
|
||||
"mov {}, cr3",
|
||||
out(reg) context.cr3,
|
||||
options(nostack, preserves_flags)
|
||||
);
|
||||
core::arch::asm!(
|
||||
"mov {}, cr4",
|
||||
out(reg) context.cr4,
|
||||
options(nostack, preserves_flags)
|
||||
);
|
||||
core::arch::asm!(
|
||||
"pushfq",
|
||||
"pop {}",
|
||||
out(reg) context.rflags,
|
||||
options(preserves_flags)
|
||||
);
|
||||
core::arch::asm!("mov {}, rsp", out(reg) context.runtime_rsp, options(nostack, preserves_flags));
|
||||
|
||||
context.efer = x86::msr::rdmsr(x86::msr::IA32_EFER);
|
||||
context.fs_base = x86::msr::rdmsr(x86::msr::IA32_FS_BASE);
|
||||
context.gs_base = x86::msr::rdmsr(x86::msr::IA32_GS_BASE);
|
||||
context.kernel_gs_base = x86::msr::rdmsr(x86::msr::IA32_KERNEL_GSBASE);
|
||||
}
|
||||
|
||||
save_descriptor_tables(&mut context);
|
||||
save_fpu_state(&mut context);
|
||||
context
|
||||
}
|
||||
|
||||
fn set_firmware_waking_vector(facs: &mut PhysicalMapping<KernelAcpiHandler, Facs>, vector: usize) {
|
||||
facs.firmware_waking_vector = vector as u32;
|
||||
facs.x_firmware_waking_vector = vector as u64;
|
||||
}
|
||||
|
||||
fn write_pm1_control_block(
|
||||
registers: &FixedRegisters<KernelAcpiHandler>,
|
||||
sleep_type: SleepTypeData,
|
||||
) -> Result<(), SleepError> {
|
||||
let current_a = registers
|
||||
.pm1_control_registers
|
||||
.pm1a
|
||||
.read()
|
||||
.map_err(|_| SleepError::UnsupportedPmControl)? as u16;
|
||||
let armed_a = (current_a & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.a;
|
||||
|
||||
registers
|
||||
.pm1_control_registers
|
||||
.pm1a
|
||||
.write(u64::from(armed_a))
|
||||
.map_err(|_| SleepError::UnsupportedPmControl)?;
|
||||
|
||||
if let Some(pm1b) = ®isters.pm1_control_registers.pm1b {
|
||||
let current_b = pm1b.read().map_err(|_| SleepError::UnsupportedPmControl)? as u16;
|
||||
let armed_b = (current_b & !(ACPI_SLP_TYP_MASK | ACPI_SLP_EN)) | sleep_type.b;
|
||||
pm1b.write(u64::from(armed_b))
|
||||
.map_err(|_| SleepError::UnsupportedPmControl)?;
|
||||
pm1b.write(u64::from(armed_b | ACPI_SLP_EN))
|
||||
.map_err(|_| SleepError::UnsupportedPmControl)?;
|
||||
}
|
||||
|
||||
// SAFETY: WBINVD is required here to flush dirty cache lines before firmware powers down the CPU package.
|
||||
unsafe {
|
||||
core::arch::asm!("wbinvd", options(nostack, preserves_flags));
|
||||
}
|
||||
|
||||
registers
|
||||
.pm1_control_registers
|
||||
.pm1a
|
||||
.write(u64::from(armed_a | ACPI_SLP_EN))
|
||||
.map_err(|_| SleepError::UnsupportedPmControl)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[unsafe(naked)]
|
||||
unsafe extern "sysv64" fn enter_sleep_raw(state: usize) -> usize {
|
||||
core::arch::naked_asm!(
|
||||
"mov rsi, rsp",
|
||||
"jmp {inner}",
|
||||
inner = sym enter_sleep_raw_inner,
|
||||
);
|
||||
}
|
||||
|
||||
extern "C" fn enter_sleep_raw_inner(state: usize, entry_rsp: usize) -> usize {
|
||||
let state = match state {
|
||||
3 => SleepState::S3,
|
||||
5 => SleepState::S5,
|
||||
_ => return SleepError::InvalidSleepObject.code(),
|
||||
};
|
||||
|
||||
let (registers, mut facs, interpreter) = match load_interpreter() {
|
||||
Ok(tuple) => tuple,
|
||||
Err(error) => return error.code(),
|
||||
};
|
||||
let sleep_type = match sleep_type_data_from_interpreter(&interpreter, state) {
|
||||
Ok(data) => data,
|
||||
Err(error) => return error.code(),
|
||||
};
|
||||
|
||||
let mut context = save_cpu_context(entry_rsp);
|
||||
context.facs_address = facs.physical_start;
|
||||
install_wake_trampoline(context.runtime_rsp, context.cr3);
|
||||
set_firmware_waking_vector(&mut facs, WAKE_TRAMPOLINE_PHYS);
|
||||
|
||||
{
|
||||
let mut saved = SAVED_CONTEXT.lock();
|
||||
*saved = Some(context);
|
||||
}
|
||||
|
||||
// SAFETY: Suspend entry must not be interrupted while the wake vector and PM1 control block are being armed.
|
||||
unsafe {
|
||||
interrupt::disable();
|
||||
}
|
||||
|
||||
if let Err(error) = write_pm1_control_block(registers.as_ref(), sleep_type) {
|
||||
return error.code();
|
||||
}
|
||||
|
||||
// SAFETY: The final CLI+HLT sequence is the architectural handoff point after asserting SLP_EN.
|
||||
unsafe {
|
||||
core::arch::asm!("cli; hlt", options(nostack));
|
||||
}
|
||||
|
||||
SleepError::SleepDidNotEnter.code()
|
||||
}
|
||||
|
||||
extern "C" fn resume_from_s3_trampoline() -> ! {
|
||||
let mut saved = SAVED_CONTEXT.lock();
|
||||
let context = saved.take().expect("S3 wake trampoline resumed without saved CPU context");
|
||||
drop(saved);
|
||||
|
||||
// SAFETY: The saved FACS physical address was captured from the validated FADT during suspend entry.
|
||||
if context.facs_address != 0 {
|
||||
let mut facs = unsafe {
|
||||
KernelAcpiHandler.map_physical_region::<Facs>(
|
||||
context.facs_address,
|
||||
core::mem::size_of::<Facs>(),
|
||||
)
|
||||
};
|
||||
set_firmware_waking_vector(&mut facs, 0);
|
||||
}
|
||||
|
||||
// SAFETY: The wake trampoline already switched to the saved kernel CR3 and long mode, so the remaining restores are architectural register state only.
|
||||
unsafe {
|
||||
x86::msr::wrmsr(x86::msr::IA32_EFER, context.efer);
|
||||
core::arch::asm!("mov cr3, {}", in(reg) context.cr3, options(nostack));
|
||||
core::arch::asm!("mov cr4, {}", in(reg) context.cr4, options(nostack));
|
||||
core::arch::asm!("mov cr2, {}", in(reg) context.cr2, options(nostack));
|
||||
core::arch::asm!("mov cr0, {}", in(reg) context.cr0, options(nostack));
|
||||
core::arch::asm!("lgdt [{}]", in(reg) &context.gdtr, options(nostack));
|
||||
core::arch::asm!("lidt [{}]", in(reg) &context.idtr, options(nostack));
|
||||
|
||||
task::load_tr(SegmentSelector::new(crate::arch::gdt::GDT_TSS as u16, Ring::Ring0));
|
||||
|
||||
x86::msr::wrmsr(x86::msr::IA32_FS_BASE, context.fs_base);
|
||||
x86::msr::wrmsr(x86::msr::IA32_GS_BASE, context.gs_base);
|
||||
x86::msr::wrmsr(x86::msr::IA32_KERNEL_GSBASE, context.kernel_gs_base);
|
||||
}
|
||||
|
||||
restore_fpu_state(&context);
|
||||
|
||||
// SAFETY: Returning with the original entry stack and RFLAGS completes the suspend call as a successful function return.
|
||||
unsafe {
|
||||
core::arch::asm!(
|
||||
"mov rsp, {entry_rsp}",
|
||||
"push {rflags}",
|
||||
"popfq",
|
||||
"xor eax, eax",
|
||||
"ret",
|
||||
entry_rsp = in(reg) context.entry_rsp,
|
||||
rflags = in(reg) context.rflags,
|
||||
options(noreturn)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn enter_sleep_state(state: SleepState) -> core::result::Result<(), SleepError> {
|
||||
#[cfg(not(target_arch = "x86_64"))]
|
||||
{
|
||||
let _ = state;
|
||||
return Err(SleepError::UnsupportedArch);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
let raw = unsafe {
|
||||
enter_sleep_raw(match state {
|
||||
SleepState::S3 => 3,
|
||||
SleepState::S5 => 5,
|
||||
})
|
||||
};
|
||||
if raw == SLEEP_RETURN_OK {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(SleepError::from_code(raw))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn available_sleep_states() -> &'static [u8] {
|
||||
if sleep_type_data(SleepState::S3).is_ok() {
|
||||
b"S3\nS5\n"
|
||||
} else {
|
||||
b"S5\n"
|
||||
}
|
||||
}
|
||||
|
||||
pub fn trigger_sleep_request(request: &str) -> Result<(), Error> {
|
||||
match request.trim() {
|
||||
"S3" => enter_sleep_state(SleepState::S3).map_err(|_| Error::new(EIO)),
|
||||
"S5" => enter_sleep_state(SleepState::S5).map_err(|_| Error::new(EIO)),
|
||||
_ => Err(Error::new(EINVAL)),
|
||||
}
|
||||
}
|
||||
@@ -82,15 +82,6 @@ extern "C" fn kstart() {
|
||||
/// The entry to Rust, all things must be initialized
|
||||
unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
|
||||
unsafe {
|
||||
// EARLY CANARY: write 'R' to COM1 before any kernel init.
|
||||
// This proves the serial hardware works and the kernel reached Rust entry.
|
||||
// If this character appears but "RedBear OS starting..." does not,
|
||||
// the hang is in args_ptr.read(), serial::init(), or graphical_debug::init().
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'R', options(nostack, preserves_flags));
|
||||
}
|
||||
|
||||
let bootstrap = {
|
||||
let args = args_ptr.read();
|
||||
|
||||
@@ -100,49 +91,27 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
|
||||
// Set up graphical debug
|
||||
graphical_debug::init(args.env());
|
||||
|
||||
// SECOND CANARY: write 'S' to COM1 after serial init.
|
||||
// If 'R' appears but 'S' does not, the hang is in serial::init() or graphical_debug::init().
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'S', options(nostack, preserves_flags));
|
||||
}
|
||||
|
||||
info!("RedBear OS starting...");
|
||||
info!("Redox OS starting...");
|
||||
args.print();
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'1', options(nostack, preserves_flags)); }
|
||||
|
||||
// Set up GDT
|
||||
gdt::init_bsp(stack_end);
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'2', options(nostack, preserves_flags)); }
|
||||
|
||||
// Set up IDT
|
||||
idt::init_bsp();
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'3', options(nostack, preserves_flags)); }
|
||||
|
||||
// Initialize RMM
|
||||
#[cfg(target_arch = "x86")]
|
||||
crate::startup::memory::init(&args, Some(0x100000), Some(0x40000000));
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
crate::startup::memory::init(&args, Some(0x100000), None);
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'4', options(nostack, preserves_flags)); }
|
||||
|
||||
// Initialize paging
|
||||
paging::init();
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
crate::arch::alternative::early_init(true);
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'5', options(nostack, preserves_flags)); }
|
||||
|
||||
// Set up syscall instruction
|
||||
interrupt::syscall::init();
|
||||
|
||||
@@ -152,9 +121,6 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
|
||||
// Activate memory logging
|
||||
crate::log::init();
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'6', options(nostack, preserves_flags)); }
|
||||
|
||||
// Initialize miscellaneous processor features
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
crate::arch::misc::init(LogicalCpuId::BSP);
|
||||
@@ -162,9 +128,6 @@ unsafe extern "C" fn start(args_ptr: *const KernelArgs, stack_end: usize) -> ! {
|
||||
// Initialize devices
|
||||
device::init();
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{ core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'7', options(nostack, preserves_flags)); }
|
||||
|
||||
// Read ACPI tables, starts APs
|
||||
if cfg!(feature = "acpi") {
|
||||
crate::acpi::init(args.acpi_rsdp());
|
||||
|
||||
@@ -1,110 +0,0 @@
|
||||
; ACPI S3 wake trampoline
|
||||
; compiled with nasm by build.rs, copied to physical 0x8000 before S3 entry
|
||||
|
||||
ORG 0x8000
|
||||
SECTION .text
|
||||
USE16
|
||||
|
||||
trampoline:
|
||||
jmp short startup_wake
|
||||
times 8 - ($ - trampoline) nop
|
||||
.stack: dq 0
|
||||
.page_table: dq 0
|
||||
.code: dq 0
|
||||
|
||||
startup_wake:
|
||||
cli
|
||||
|
||||
xor ax, ax
|
||||
mov ds, ax
|
||||
mov es, ax
|
||||
mov ss, ax
|
||||
mov sp, 0
|
||||
|
||||
mov edi, [trampoline.page_table]
|
||||
mov cr3, edi
|
||||
|
||||
mov eax, cr0
|
||||
and al, 11110011b
|
||||
or al, 00100010b
|
||||
mov cr0, eax
|
||||
|
||||
mov eax, cr4
|
||||
or eax, 1 << 9 | 1 << 7 | 1 << 5 | 1 << 4
|
||||
mov cr4, eax
|
||||
|
||||
fninit
|
||||
|
||||
lgdt [gdtr]
|
||||
|
||||
mov ecx, 0xC0000080
|
||||
rdmsr
|
||||
or eax, 1 << 11 | 1 << 8
|
||||
wrmsr
|
||||
|
||||
mov ebx, cr0
|
||||
or ebx, 1 << 31 | 1 << 16 | 1
|
||||
mov cr0, ebx
|
||||
|
||||
jmp gdt.kernel_code:long_mode_wake
|
||||
|
||||
USE64
|
||||
long_mode_wake:
|
||||
mov rax, gdt.kernel_data
|
||||
mov ds, rax
|
||||
mov es, rax
|
||||
mov fs, rax
|
||||
mov gs, rax
|
||||
mov ss, rax
|
||||
|
||||
mov rsp, [trampoline.stack]
|
||||
mov rax, [trampoline.code]
|
||||
jmp rax
|
||||
|
||||
struc GDTEntry
|
||||
.limitl resw 1
|
||||
.basel resw 1
|
||||
.basem resb 1
|
||||
.attribute resb 1
|
||||
.flags__limith resb 1
|
||||
.baseh resb 1
|
||||
endstruc
|
||||
|
||||
attrib:
|
||||
.present equ 1 << 7
|
||||
.user equ 1 << 4
|
||||
.code equ 1 << 3
|
||||
.writable equ 1 << 1
|
||||
|
||||
flags:
|
||||
.long_mode equ 1 << 5
|
||||
|
||||
gdtr:
|
||||
dw gdt.end + 1
|
||||
dq gdt
|
||||
|
||||
gdt:
|
||||
.null equ $ - gdt
|
||||
dq 0
|
||||
|
||||
.kernel_code equ $ - gdt
|
||||
istruc GDTEntry
|
||||
at GDTEntry.limitl, dw 0
|
||||
at GDTEntry.basel, dw 0
|
||||
at GDTEntry.basem, db 0
|
||||
at GDTEntry.attribute, db attrib.present | attrib.user | attrib.code
|
||||
at GDTEntry.flags__limith, db flags.long_mode
|
||||
at GDTEntry.baseh, db 0
|
||||
iend
|
||||
|
||||
.kernel_data equ $ - gdt
|
||||
istruc GDTEntry
|
||||
at GDTEntry.limitl, dw 0
|
||||
at GDTEntry.basel, dw 0
|
||||
at GDTEntry.basem, db 0
|
||||
at GDTEntry.attribute, db attrib.present | attrib.user | attrib.writable
|
||||
at GDTEntry.flags__limith, db 0
|
||||
at GDTEntry.baseh, db 0
|
||||
iend
|
||||
|
||||
.end equ $ - gdt
|
||||
@@ -4,10 +4,16 @@ use crate::{
|
||||
percpu::PercpuBlock,
|
||||
syscall::FloatRegisters,
|
||||
};
|
||||
use core::{mem::offset_of, ptr};
|
||||
use core::{mem::offset_of, ptr, sync::atomic::AtomicBool};
|
||||
use spin::Once;
|
||||
use syscall::{EnvRegisters, Result};
|
||||
|
||||
/// This must be used by the kernel to ensure that context switches are done atomically
|
||||
/// Compare and exchange this to true when beginning a context switch on any CPU
|
||||
/// The `Context::switch_to` function will set it back to false, allowing other CPU's to switch
|
||||
/// This must be done, as no locks can be held on the stack during switch
|
||||
pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
// 512 bytes for registers, extra bytes for fpcr and fpsr
|
||||
pub const KFX_ALIGN: usize = 16;
|
||||
|
||||
|
||||
@@ -2,11 +2,13 @@ use crate::{
|
||||
arch::interrupt::InterruptStack, context::context::Kstack, memory::RmmA, percpu::PercpuBlock,
|
||||
syscall::FloatRegisters,
|
||||
};
|
||||
use core::mem::offset_of;
|
||||
use core::{mem::offset_of, sync::atomic::AtomicBool};
|
||||
use rmm::{Arch, VirtualAddress};
|
||||
use spin::Once;
|
||||
use syscall::{error::*, EnvRegisters};
|
||||
|
||||
pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
pub const KFX_ALIGN: usize = 16;
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use core::mem::offset_of;
|
||||
use core::{mem::offset_of, sync::atomic::AtomicBool};
|
||||
use rmm::{Arch, VirtualAddress};
|
||||
use spin::Once;
|
||||
use syscall::{error::*, EnvRegisters};
|
||||
@@ -14,6 +14,12 @@ use crate::{
|
||||
syscall::FloatRegisters,
|
||||
};
|
||||
|
||||
/// This must be used by the kernel to ensure that context switches are done atomically
|
||||
/// Compare and exchange this to true when beginning a context switch on any CPU
|
||||
/// The `Context::switch_to` function will set it back to false, allowing other CPU's to switch
|
||||
/// This must be done, as no locks can be held on the stack during switch
|
||||
pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
const ST_RESERVED: u128 = 0xFFFF_FFFF_FFFF_0000_0000_0000_0000_0000;
|
||||
|
||||
pub const KFX_ALIGN: usize = 16;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use core::{
|
||||
ptr::{addr_of, addr_of_mut},
|
||||
sync::atomic::AtomicBool,
|
||||
};
|
||||
|
||||
use crate::syscall::FloatRegisters;
|
||||
@@ -11,6 +12,12 @@ use spin::Once;
|
||||
use syscall::{error::*, EnvRegisters};
|
||||
use x86::msr;
|
||||
|
||||
/// This must be used by the kernel to ensure that context switches are done atomically
|
||||
/// Compare and exchange this to true when beginning a context switch on any CPU
|
||||
/// The `Context::switch_to` function will set it back to false, allowing other CPU's to switch
|
||||
/// This must be done, as no locks can be held on the stack during switch
|
||||
pub static CONTEXT_SWITCH_LOCK: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
const ST_RESERVED: u128 = 0xFFFF_FFFF_FFFF_0000_0000_0000_0000_0000;
|
||||
|
||||
#[cfg(cpu_feature_never = "xsave")]
|
||||
|
||||
@@ -148,8 +148,6 @@ pub struct Context {
|
||||
pub euid: u32,
|
||||
pub egid: u32,
|
||||
pub pid: usize,
|
||||
/// Supplementary group IDs for access control decisions.
|
||||
pub groups: Vec<u32>,
|
||||
|
||||
// See [`PreemptGuard`]
|
||||
//
|
||||
@@ -206,7 +204,6 @@ impl Context {
|
||||
euid: 0,
|
||||
egid: 0,
|
||||
pid: 0,
|
||||
groups: Vec::new(),
|
||||
|
||||
#[cfg(feature = "syscall_debug")]
|
||||
syscall_debug_info: crate::syscall::debug::SyscallDebugInfo::default(),
|
||||
@@ -482,7 +479,6 @@ impl Context {
|
||||
uid: self.euid,
|
||||
gid: self.egid,
|
||||
pid: self.pid,
|
||||
groups: self.groups.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ use crate::{
|
||||
event,
|
||||
scheme::{self, SchemeId},
|
||||
sync::{CleanLockToken, RwLock, L6},
|
||||
syscall::error::{Error, Result, ESTALE},
|
||||
syscall::error::Result,
|
||||
};
|
||||
use alloc::sync::Arc;
|
||||
use syscall::{schemev2::NewFdFlags, RwFlags, O_APPEND, O_NONBLOCK};
|
||||
@@ -18,7 +18,6 @@ pub struct FileDescription {
|
||||
pub offset: u64,
|
||||
/// The scheme that this file refers to
|
||||
pub scheme: SchemeId,
|
||||
pub scheme_generation: Option<u64>,
|
||||
/// The number the scheme uses to refer to this file
|
||||
pub number: usize,
|
||||
/// The flags passed to open or fcntl(SETFL)
|
||||
@@ -33,52 +32,6 @@ bitflags! {
|
||||
}
|
||||
}
|
||||
impl FileDescription {
|
||||
pub fn with_generation(
|
||||
scheme: SchemeId,
|
||||
scheme_generation: Option<u64>,
|
||||
number: usize,
|
||||
offset: u64,
|
||||
flags: u32,
|
||||
internal_flags: InternalFlags,
|
||||
) -> Self {
|
||||
Self {
|
||||
offset,
|
||||
scheme,
|
||||
scheme_generation,
|
||||
number,
|
||||
flags,
|
||||
internal_flags,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(
|
||||
scheme: SchemeId,
|
||||
number: usize,
|
||||
offset: u64,
|
||||
flags: u32,
|
||||
internal_flags: InternalFlags,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Self {
|
||||
Self::with_generation(
|
||||
scheme,
|
||||
Some(scheme::current_scheme_generation(token.token(), scheme)),
|
||||
number,
|
||||
offset,
|
||||
flags,
|
||||
internal_flags,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_scheme(&self, token: &mut CleanLockToken) -> Result<scheme::KernelSchemes> {
|
||||
if let Some(expected_generation) = self.scheme_generation
|
||||
&& expected_generation != scheme::current_scheme_generation(token.token(), self.scheme)
|
||||
{
|
||||
return Err(Error::new(ESTALE));
|
||||
}
|
||||
|
||||
scheme::get_scheme(token.token(), self.scheme)
|
||||
}
|
||||
|
||||
pub fn rw_flags(&self, rw: RwFlags) -> u32 {
|
||||
let mut ret = self.flags & !(O_NONBLOCK | O_APPEND) as u32;
|
||||
if rw.contains(RwFlags::APPEND) {
|
||||
@@ -123,7 +76,7 @@ impl FileDescription {
|
||||
pub fn try_close(self, token: &mut CleanLockToken) -> Result<()> {
|
||||
event::unregister_file(self.scheme, self.number, token);
|
||||
|
||||
let scheme = self.get_scheme(token)?;
|
||||
let scheme = scheme::get_scheme(token.token(), self.scheme)?;
|
||||
|
||||
scheme.close(self.number, token)
|
||||
}
|
||||
@@ -132,12 +85,12 @@ impl FileDescription {
|
||||
impl FileDescriptor {
|
||||
pub fn close(self, token: &mut CleanLockToken) -> Result<()> {
|
||||
{
|
||||
let (desc, number, internal_flags) = {
|
||||
let (scheme_id, number, internal_flags) = {
|
||||
let desc = self.description.read(token.token());
|
||||
(*desc, desc.number, desc.internal_flags)
|
||||
(desc.scheme, desc.number, desc.internal_flags)
|
||||
};
|
||||
if internal_flags.contains(InternalFlags::NOTIFY_ON_NEXT_DETACH) {
|
||||
let scheme = desc.get_scheme(token)?;
|
||||
let scheme = scheme::get_scheme(token.token(), scheme_id)?;
|
||||
scheme.detach(number, token)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,13 +64,14 @@ impl UnmapResult {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let (scheme, number) = {
|
||||
let desc = *description.read(token.token());
|
||||
(desc.get_scheme(token)?, desc.number)
|
||||
let (scheme_id, number) = {
|
||||
let desc = description.write(token.token());
|
||||
(desc.scheme, desc.number)
|
||||
};
|
||||
|
||||
let funmap_result = scheme
|
||||
.kfunmap(number, base_offset, self.size, self.flags, token);
|
||||
let scheme_opt = scheme::get_scheme(token.token(), scheme_id);
|
||||
let funmap_result = scheme_opt
|
||||
.and_then(|scheme| scheme.kfunmap(number, base_offset, self.size, self.flags, token));
|
||||
|
||||
if let Ok(fd) = Arc::try_unwrap(description) {
|
||||
fd.into_inner().try_close(token)?;
|
||||
@@ -2686,13 +2687,20 @@ fn correct_inner<'l>(
|
||||
// XXX: This is cheating, but guaranteed we won't deadlock because we've dropped addr_space_guard
|
||||
let mut token = unsafe { CleanLockToken::new() };
|
||||
|
||||
let desc = *file_ref.description.read(token.token());
|
||||
let scheme = desc.get_scheme(&mut token).map_err(|_| PfError::Segv)?;
|
||||
let scheme_number = desc.number;
|
||||
let user_inner = match scheme {
|
||||
KernelSchemes::User(user) => user.inner,
|
||||
_ => return Err(PfError::Segv),
|
||||
let (scheme_id, scheme_number) = {
|
||||
let desc = &file_ref.description.read(token.token());
|
||||
(desc.scheme, desc.number)
|
||||
};
|
||||
let user_inner = scheme::get_scheme(token.token(), scheme_id)
|
||||
.ok()
|
||||
.and_then(|s| {
|
||||
if let KernelSchemes::User(user) = s {
|
||||
Some(user.inner)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.ok_or(PfError::Segv)?;
|
||||
|
||||
let offset = file_ref.base_offset as u64 + (pages_from_grant_start * PAGE_SIZE) as u64;
|
||||
user_inner
|
||||
|
||||
@@ -14,8 +14,8 @@ use crate::{
|
||||
memory::{RmmA, RmmArch, TableKind},
|
||||
percpu::PercpuBlock,
|
||||
sync::{
|
||||
ArcRwLockWriteGuard, CleanLockToken, LockToken, McsMutex, McsMutexGuard, Mutex,
|
||||
MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard, L0, L1, L2, L4,
|
||||
ArcRwLockWriteGuard, CleanLockToken, LockToken, Mutex, MutexGuard, RwLock, RwLockReadGuard,
|
||||
RwLockWriteGuard, L0, L1, L2, L4,
|
||||
},
|
||||
syscall::error::Result,
|
||||
};
|
||||
@@ -74,12 +74,10 @@ pub use self::arch::empty_cr3;
|
||||
// the context file descriptors.
|
||||
static CONTEXTS: RwLock<L2, BTreeSet<ContextRef>> = RwLock::new(BTreeSet::new());
|
||||
|
||||
// Actual context store for the scheduler — uses MCS fair spinlock to
|
||||
// eliminate cache-line bouncing under multi-CPU contention.
|
||||
static RUN_CONTEXTS: McsMutex<L1, RunContextData> = McsMutex::new(RunContextData::new());
|
||||
// Actual context store for the scheduler
|
||||
static RUN_CONTEXTS: Mutex<L1, RunContextData> = Mutex::new(RunContextData::new());
|
||||
|
||||
// Context that has been pushed out from RUN_CONTEXTS after being idle.
|
||||
// Uses regular Mutex (lower contention; wakeup_contexts uses try_lock).
|
||||
// Context that has been pushed out from RUN_CONTEXTS after being idle
|
||||
static IDLE_CONTEXTS: Mutex<L2, VecDeque<WeakContextRef>> = Mutex::new(VecDeque::new());
|
||||
|
||||
pub struct RunContextData {
|
||||
@@ -115,7 +113,7 @@ pub fn idle_contexts_try(
|
||||
IDLE_CONTEXTS.try_lock(token)
|
||||
}
|
||||
|
||||
pub fn run_contexts(token: LockToken<'_, L0>) -> McsMutexGuard<'_, L1, RunContextData> {
|
||||
pub fn run_contexts(token: LockToken<'_, L0>) -> MutexGuard<'_, L1, RunContextData> {
|
||||
RUN_CONTEXTS.lock(token)
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ use crate::{
|
||||
use alloc::{sync::Arc, vec::Vec};
|
||||
use core::{
|
||||
cell::{Cell, RefCell},
|
||||
mem,
|
||||
hint, mem,
|
||||
sync::atomic::Ordering,
|
||||
};
|
||||
use syscall::PtraceFlags;
|
||||
@@ -26,11 +26,6 @@ enum UpdateResult {
|
||||
Blocked,
|
||||
}
|
||||
|
||||
/// Default number of PIT ticks before triggering a context switch.
|
||||
/// At ~2.25 ms per tick, 3 ticks ≈ 6.75 ms timeslice.
|
||||
/// Configurable per-CPU via `ContextSwitchPercpu::preempt_interval`.
|
||||
const DEFAULT_PREEMPT_INTERVAL: usize = 3;
|
||||
|
||||
// A simple geometric series where value[i] ~= value[i - 1] * 1.25
|
||||
const SCHED_PRIO_TO_WEIGHT: [usize; 40] = [
|
||||
88761, 71755, 56483, 46273, 36291, 29154, 23254, 18705, 14949, 11916, 9548, 7620, 6100, 4904,
|
||||
@@ -95,15 +90,13 @@ struct SwitchResultInner {
|
||||
///
|
||||
/// The function also calls the signal handler after switching contexts.
|
||||
pub fn tick(token: &mut CleanLockToken) {
|
||||
let percpu = PercpuBlock::current();
|
||||
let ticks_cell = &percpu.switch_internals.pit_ticks;
|
||||
let ticks_cell = &PercpuBlock::current().switch_internals.pit_ticks;
|
||||
|
||||
let new_ticks = ticks_cell.get() + 1;
|
||||
ticks_cell.set(new_ticks);
|
||||
|
||||
// Trigger a context switch when the per-CPU preempt interval is reached.
|
||||
let interval = percpu.switch_internals.preempt_interval.get();
|
||||
if new_ticks >= interval {
|
||||
// Trigger a context switch after every 3 ticks (approx. 6.75 ms).
|
||||
if new_ticks >= 3 {
|
||||
switch(token);
|
||||
crate::context::signal::signal_handler(token);
|
||||
}
|
||||
@@ -127,10 +120,7 @@ pub unsafe extern "C" fn switch_finish_hook() {
|
||||
crate::arch::stop::emergency_reset();
|
||||
}
|
||||
}
|
||||
PercpuBlock::current()
|
||||
.switch_internals
|
||||
.in_context_switch
|
||||
.set(false);
|
||||
arch::CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst);
|
||||
crate::percpu::switch_arch_hook();
|
||||
}
|
||||
}
|
||||
@@ -160,15 +150,16 @@ pub fn switch(token: &mut CleanLockToken) -> SwitchResult {
|
||||
//set PIT Interrupt counter to 0, giving each process same amount of PIT ticks
|
||||
percpu.switch_internals.pit_ticks.set(0);
|
||||
|
||||
// Acquire the per-CPU context switch flag. Each CPU can only be in one context
|
||||
// switch at a time. The per-context write locks provide cross-CPU safety; this
|
||||
// flag catches re-entrant switches on the same CPU (a kernel bug).
|
||||
debug_assert!(
|
||||
!percpu.switch_internals.in_context_switch.get(),
|
||||
"context switch re-entry on CPU {}",
|
||||
percpu.cpu_id
|
||||
);
|
||||
percpu.switch_internals.in_context_switch.set(true);
|
||||
// Acquire the global lock to ensure exclusive access during context switch and avoid
|
||||
// issues that would be caused by the unsafe operations below
|
||||
// TODO: Better memory orderings?
|
||||
while arch::CONTEXT_SWITCH_LOCK
|
||||
.compare_exchange_weak(false, true, Ordering::SeqCst, Ordering::Relaxed)
|
||||
.is_err()
|
||||
{
|
||||
hint::spin_loop();
|
||||
percpu.maybe_handle_tlb_shootdown();
|
||||
}
|
||||
|
||||
// Lock the previous context.
|
||||
let prev_context_lock = crate::context::current();
|
||||
@@ -176,8 +167,8 @@ pub fn switch(token: &mut CleanLockToken) -> SwitchResult {
|
||||
let mut prev_context_guard = unsafe { prev_context_lock.write_arc() };
|
||||
|
||||
if !prev_context_guard.is_preemptable() {
|
||||
// Unset per-CPU context switch flag
|
||||
percpu.switch_internals.in_context_switch.set(false);
|
||||
// Unset global lock
|
||||
arch::CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst);
|
||||
|
||||
// Pretend to have finished switching, so CPU is not idled
|
||||
return SwitchResult::Switched;
|
||||
@@ -301,8 +292,8 @@ pub fn switch(token: &mut CleanLockToken) -> SwitchResult {
|
||||
SwitchResult::Switched
|
||||
}
|
||||
_ => {
|
||||
// No target was found, unset per-CPU context switch flag and return
|
||||
percpu.switch_internals.in_context_switch.set(false);
|
||||
// No target was found, unset global lock and return
|
||||
arch::CONTEXT_SWITCH_LOCK.store(false, Ordering::SeqCst);
|
||||
|
||||
percpu.stats.set_state(cpu_stats::CpuState::Idle);
|
||||
|
||||
@@ -361,7 +352,6 @@ fn wakeup_contexts(token: &mut CleanLockToken, switch_time: u128) -> Vec<(usize,
|
||||
}
|
||||
|
||||
/// This is the scheduler function which currently utilises Deficit Weighted Round Robin Scheduler
|
||||
/// with NUMA-aware context selection preference.
|
||||
fn select_next_context(
|
||||
token: &mut CleanLockToken,
|
||||
percpu: &PercpuBlock,
|
||||
@@ -387,10 +377,6 @@ fn select_next_context(
|
||||
let total_contexts: usize = contexts_list.iter().map(|q| q.len()).sum();
|
||||
let mut skipped_contexts = 0;
|
||||
|
||||
// NUMA-aware selection: remember cross-node fallback candidate.
|
||||
let my_numa_node = percpu.numa_node.get();
|
||||
let mut cross_node_fallback: Option<(usize, ArcContextLockWriteGuard)> = None;
|
||||
|
||||
'priority: loop {
|
||||
i = (i + 1) % 40;
|
||||
total_iters += 1;
|
||||
@@ -455,44 +441,9 @@ fn select_next_context(
|
||||
// Is this context runnable on this CPU?
|
||||
let sw = unsafe { update_runnable(&mut next_context_guard, cpu_id, switch_time) };
|
||||
if let UpdateResult::CanSwitch = sw {
|
||||
// NUMA-aware selection: check if this context's last CPU was on the same node.
|
||||
let same_node = if my_numa_node != u8::MAX {
|
||||
next_context_guard.cpu_id
|
||||
.map(|cid| {
|
||||
crate::percpu::get_for_cpu(cid)
|
||||
.map(|p| p.numa_node.get() == my_numa_node)
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.unwrap_or(true) // New context (no last CPU) — treat as same node
|
||||
} else {
|
||||
true // No NUMA info — treat all as same node
|
||||
};
|
||||
|
||||
if same_node {
|
||||
// Cache-warm: select immediately
|
||||
percpu.current_prio.set(next_context_guard.prio);
|
||||
next_context_guard_opt = Some(next_context_guard);
|
||||
balance[i] -= SCHED_PRIO_TO_WEIGHT[20];
|
||||
break 'priority;
|
||||
} else {
|
||||
// Cross-node candidate: save as fallback, keep scanning for same-node
|
||||
if cross_node_fallback.is_none() {
|
||||
// Cache the priority and balance for later
|
||||
cross_node_fallback =
|
||||
Some((next_context_guard.prio, next_context_guard));
|
||||
balance[i] -= SCHED_PRIO_TO_WEIGHT[20];
|
||||
// Don't break — keep looking for a same-node context
|
||||
continue;
|
||||
} else {
|
||||
// Already have a cross-node fallback; push this one back
|
||||
contexts.push_back(next_context_ref);
|
||||
skipped_contexts += 1;
|
||||
if skipped_contexts >= total_contexts {
|
||||
break 'priority;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
next_context_guard_opt = Some(next_context_guard);
|
||||
balance[i] -= SCHED_PRIO_TO_WEIGHT[20];
|
||||
break 'priority;
|
||||
} else {
|
||||
if matches!(sw, UpdateResult::Blocked) {
|
||||
idle_contexts(token.token()).push_back(next_context_ref);
|
||||
@@ -507,15 +458,6 @@ fn select_next_context(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we found a cross-node fallback but no same-node context, use it
|
||||
if next_context_guard_opt.is_none() {
|
||||
if let Some((prio, guard)) = cross_node_fallback {
|
||||
percpu.current_prio.set(prio);
|
||||
next_context_guard_opt = Some(guard);
|
||||
}
|
||||
}
|
||||
|
||||
percpu.balance.set(balance);
|
||||
percpu.last_queue.set(i);
|
||||
|
||||
@@ -523,10 +465,7 @@ fn select_next_context(
|
||||
// Send the old process to the back of the line (if it is still runnable)
|
||||
let prev_ctx = WeakContextRef(Arc::downgrade(&prev_context_lock));
|
||||
if prev_context_guard.status.is_runnable() {
|
||||
let raw_prio = prev_context_guard.prio;
|
||||
let prio = percpu.effective_prio(raw_prio);
|
||||
// Clear PI donation — previous context is being re-queued
|
||||
percpu.pi_donated_prio.store(u32::MAX, Ordering::Relaxed);
|
||||
let prio = prev_context_guard.prio;
|
||||
contexts_list[prio].push_back(prev_ctx);
|
||||
} else {
|
||||
idle_contexts(token.token()).push_back(prev_ctx);
|
||||
@@ -538,8 +477,7 @@ fn select_next_context(
|
||||
return Ok(Some(next_context_guard));
|
||||
} else {
|
||||
if !was_idle && !Arc::ptr_eq(&prev_context_lock, &idle_context) {
|
||||
// Switching to idle context — cache lowest priority
|
||||
percpu.current_prio.set(39);
|
||||
// We switch into the idle context
|
||||
Ok(Some(unsafe { idle_context.write_arc() }))
|
||||
} else {
|
||||
// We found no other process to run.
|
||||
@@ -556,13 +494,6 @@ pub struct ContextSwitchPercpu {
|
||||
switch_result: Cell<Option<SwitchResultInner>>,
|
||||
switch_time: Cell<u128>,
|
||||
pit_ticks: Cell<usize>,
|
||||
/// Per-CPU context switch flag. Set to true during a context switch on this CPU.
|
||||
/// Replaced the global CONTEXT_SWITCH_LOCK to eliminate cross-CPU serialization.
|
||||
in_context_switch: Cell<bool>,
|
||||
/// Number of PIT ticks before triggering a context switch.
|
||||
/// Default: 3 (≈6.75 ms). Lower values improve interactive responsiveness;
|
||||
/// higher values improve throughput for batch/compute workloads.
|
||||
preempt_interval: Cell<usize>,
|
||||
|
||||
current_ctxt: RefCell<Option<Arc<ContextLock>>>,
|
||||
|
||||
@@ -577,8 +508,6 @@ impl ContextSwitchPercpu {
|
||||
switch_result: Cell::new(None),
|
||||
switch_time: Cell::new(0),
|
||||
pit_ticks: Cell::new(0),
|
||||
in_context_switch: Cell::new(false),
|
||||
preempt_interval: Cell::new(DEFAULT_PREEMPT_INTERVAL),
|
||||
current_ctxt: RefCell::new(None),
|
||||
idle_ctxt: RefCell::new(None),
|
||||
being_sigkilled: Cell::new(false),
|
||||
|
||||
@@ -42,18 +42,17 @@ impl core::fmt::Display for LogicalCpuId {
|
||||
}
|
||||
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
pub const MAX_CPU_COUNT: u32 = 256;
|
||||
pub const MAX_CPU_COUNT: u32 = 128;
|
||||
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
pub const MAX_CPU_COUNT: u32 = 32;
|
||||
|
||||
const SET_WORDS: usize = (MAX_CPU_COUNT / usize::BITS) as usize;
|
||||
|
||||
// TODO: Support more than 256 CPUs.
|
||||
// TODO: Support more than 128 CPUs.
|
||||
// The maximum number of CPUs on Linux is configurable, and the type for LogicalCpuSet and
|
||||
// LogicalCpuId may be optimized accordingly. In that case, box the mask if it's larger than some
|
||||
// base size (probably 256 bytes). AMD EPYC has 128C/256T, Threadripper PRO 96C/192T —
|
||||
// 256 covers current hardware.
|
||||
// base size (probably 256 bytes).
|
||||
#[derive(Debug)]
|
||||
pub struct LogicalCpuSet([AtomicUsize; SET_WORDS]);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use alloc::sync::Arc;
|
||||
use core::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
|
||||
use core::sync::atomic::{AtomicUsize, Ordering};
|
||||
use hashbrown::{hash_map::DefaultHashBuilder, HashMap};
|
||||
use smallvec::SmallVec;
|
||||
use syscall::data::GlobalSchemes;
|
||||
@@ -23,7 +23,6 @@ int_like!(EventQueueId, AtomicEventQueueId, usize, AtomicUsize);
|
||||
pub struct EventQueue {
|
||||
id: EventQueueId,
|
||||
queue: WaitQueue<Event>,
|
||||
pub eventfd: Option<(AtomicU64, bool)>, // (counter, semaphore_mode)
|
||||
}
|
||||
|
||||
impl EventQueue {
|
||||
@@ -31,15 +30,6 @@ impl EventQueue {
|
||||
EventQueue {
|
||||
id,
|
||||
queue: WaitQueue::new(),
|
||||
eventfd: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_eventfd(id: EventQueueId, initval: u64, semaphore: bool) -> EventQueue {
|
||||
EventQueue {
|
||||
id,
|
||||
queue: WaitQueue::new(),
|
||||
eventfd: Some((AtomicU64::new(initval), semaphore)),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -70,9 +70,6 @@ mod log;
|
||||
/// Memory management
|
||||
mod memory;
|
||||
|
||||
/// NUMA topology
|
||||
mod numa;
|
||||
|
||||
/// Panic
|
||||
mod panic;
|
||||
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
/// NUMA topology hints for the kernel scheduler.
|
||||
///
|
||||
/// NUMA discovery (SRAT/SLIT parsing) is performed during kernel ACPI init
|
||||
/// (`acpi::init()`). The kernel stores a lightweight copy for O(1) scheduling
|
||||
/// lookups. If no SRAT is found, `init_default()` creates a single-node topology.
|
||||
use crate::acpi::srat;
|
||||
use crate::cpu_set::{LogicalCpuId, LogicalCpuSet};
|
||||
use core::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
const MAX_NUMA_NODES: usize = 8;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct NumaHint {
|
||||
pub node_id: u8,
|
||||
pub cpus: LogicalCpuSet,
|
||||
}
|
||||
|
||||
pub struct NumaTopology {
|
||||
pub nodes: [Option<NumaHint>; MAX_NUMA_NODES],
|
||||
pub initialized: AtomicBool,
|
||||
}
|
||||
|
||||
impl NumaTopology {
|
||||
pub const fn new() -> Self {
|
||||
const NONE: Option<NumaHint> = None;
|
||||
Self { nodes: [NONE; MAX_NUMA_NODES], initialized: AtomicBool::new(false) }
|
||||
}
|
||||
|
||||
pub fn node_for_cpu(&self, cpu: LogicalCpuId) -> Option<u8> {
|
||||
for node in self.nodes.iter().flatten() {
|
||||
if node.cpus.contains(cpu) { return Some(node.node_id); }
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn same_node(&self, cpu1: LogicalCpuId, cpu2: LogicalCpuId) -> bool {
|
||||
self.node_for_cpu(cpu1) == self.node_for_cpu(cpu2)
|
||||
}
|
||||
}
|
||||
|
||||
static mut NUMA_TOPOLOGY: NumaTopology = NumaTopology::new();
|
||||
|
||||
pub fn topology() -> &'static NumaTopology { unsafe { &NUMA_TOPOLOGY } }
|
||||
|
||||
/// Initialize NUMA topology from SRAT data parsed during ACPI init.
|
||||
pub fn init_from_srat(apic_ids: &[(u32, LogicalCpuId)]) {
|
||||
let topo = topology();
|
||||
if topo.initialized.swap(true, Ordering::AcqRel) { return; }
|
||||
if !srat::is_available() { init_default_inner(); return; }
|
||||
unsafe {
|
||||
let topo_mut = &mut *core::ptr::addr_of_mut!(NUMA_TOPOLOGY);
|
||||
for &(apic_id, cpu_id) in apic_ids {
|
||||
if let Some(node) = srat::numa_node_for_apic(apic_id) {
|
||||
let idx = node as usize;
|
||||
if idx < MAX_NUMA_NODES {
|
||||
topo_mut.nodes[idx].get_or_insert_with(|| NumaHint { node_id: node, cpus: LogicalCpuSet::empty() }).cpus.atomic_set(cpu_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
if topo_mut.nodes.iter().all(|n| n.is_none()) {
|
||||
topo_mut.nodes[0] = Some(NumaHint { node_id: 0, cpus: LogicalCpuSet::all() });
|
||||
}
|
||||
}
|
||||
let node_count = topology().nodes.iter().filter(|n| n.is_some()).count();
|
||||
debug!("NUMA: {node_count} node(s) from SRAT");
|
||||
}
|
||||
|
||||
/// Fallback: single-node topology.
|
||||
pub fn init_default() {
|
||||
let topo = topology();
|
||||
if topo.initialized.swap(true, Ordering::AcqRel) { return; }
|
||||
init_default_inner();
|
||||
}
|
||||
|
||||
fn init_default_inner() {
|
||||
unsafe {
|
||||
let topo_mut = &mut *core::ptr::addr_of_mut!(NUMA_TOPOLOGY);
|
||||
topo_mut.nodes[0] = Some(NumaHint { node_id: 0, cpus: LogicalCpuSet::all() });
|
||||
}
|
||||
debug!("NUMA: single-node topology (no SRAT)");
|
||||
}
|
||||
@@ -4,14 +4,9 @@ use alloc::{
|
||||
};
|
||||
use core::{
|
||||
cell::{Cell, RefCell},
|
||||
hint,
|
||||
sync::atomic::{AtomicBool, AtomicPtr, AtomicU32, AtomicU64, Ordering},
|
||||
sync::atomic::{AtomicBool, AtomicPtr, Ordering},
|
||||
};
|
||||
|
||||
/// Maximum number of pages to flush individually using INVLPG before falling
|
||||
/// back to a full TLB flush (CR3 reload).
|
||||
const TLB_RANGE_THRESHOLD: u32 = 32;
|
||||
|
||||
use rmm::Arch;
|
||||
use syscall::PtraceFlags;
|
||||
|
||||
@@ -21,7 +16,7 @@ use crate::{
|
||||
cpu_set::{LogicalCpuId, MAX_CPU_COUNT},
|
||||
cpu_stats::{CpuStats, CpuStatsData},
|
||||
ptrace::Session,
|
||||
sync::{mcs::McsNode, mcs::McsRawLock, CleanLockToken},
|
||||
sync::CleanLockToken,
|
||||
syscall::debug::SyscallDebugInfo,
|
||||
};
|
||||
|
||||
@@ -39,38 +34,6 @@ pub struct PercpuBlock {
|
||||
pub balance: Cell<[usize; 40]>,
|
||||
pub last_queue: Cell<usize>,
|
||||
|
||||
/// Per-CPU MCS node for the scheduler run-queue lock (RUN_CONTEXTS).
|
||||
pub mcs_sched_node: McsNode,
|
||||
|
||||
/// Counts how many times the scheduler MCS lock acquisition was contended.
|
||||
pub mcs_contention_count: Cell<u64>,
|
||||
|
||||
/// TLB shootdown range: start virtual address (page-aligned).
|
||||
/// Set to 0 for a full flush. Only valid when `wants_tlb_shootdown` is true.
|
||||
pub tlb_flush_start: AtomicU64,
|
||||
/// TLB shootdown range: number of pages to invalidate.
|
||||
pub tlb_flush_count: AtomicU32,
|
||||
|
||||
/// Priority inheritance donation. When another CPU is blocked waiting on a
|
||||
/// lock this CPU holds, the blocked CPU may donate its priority here.
|
||||
/// `u32::MAX` means no donation; otherwise it's a priority level (0-39).
|
||||
pub pi_donated_prio: AtomicU32,
|
||||
|
||||
/// Cached priority of the currently-running context on this CPU.
|
||||
/// Set by the scheduler when selecting a new context. Read by the MCS
|
||||
/// lock during priority donation — avoids acquiring the context RwLock
|
||||
/// from the spin loop. Default 39 (lowest priority).
|
||||
pub current_prio: Cell<usize>,
|
||||
|
||||
/// NUMA proximity domain for this CPU. Set during ACPI init from SRAT.
|
||||
/// `u8::MAX` means unknown (no SRAT or APIC ID not listed).
|
||||
pub numa_node: Cell<u8>,
|
||||
|
||||
/// Pointer to the MCS lock this CPU is currently spinning on (for transitive PI).
|
||||
/// `null` when not waiting on any lock. Set in McsRawLock::acquire() before
|
||||
/// entering the spin loop, cleared upon acquisition.
|
||||
pub waiting_on_lock: AtomicPtr<McsRawLock>,
|
||||
|
||||
// TODO: Put mailbox queues here, e.g. for TLB shootdown? Just be sure to 128-byte align it
|
||||
// first to avoid cache invalidation.
|
||||
pub profiling: Option<&'static crate::profiling::RingBuffer>,
|
||||
@@ -94,15 +57,6 @@ pub unsafe fn init_tlb_shootdown(id: LogicalCpuId, block: *mut PercpuBlock) {
|
||||
ALL_PERCPU_BLOCKS[id.get() as usize].store(block, Ordering::Release)
|
||||
}
|
||||
|
||||
/// Get a reference to another CPU's PercpuBlock by logical CPU ID.
|
||||
pub fn get_for_cpu(id: LogicalCpuId) -> Option<&'static PercpuBlock> {
|
||||
unsafe {
|
||||
ALL_PERCPU_BLOCKS[id.get() as usize]
|
||||
.load(Ordering::Acquire)
|
||||
.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_all_stats() -> Vec<(LogicalCpuId, CpuStatsData)> {
|
||||
let mut res = ALL_PERCPU_BLOCKS
|
||||
.iter()
|
||||
@@ -147,148 +101,25 @@ pub fn shootdown_tlb_ipi(target: Option<LogicalCpuId>) {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
}
|
||||
// Full flush — clear range info (Release ordering ensures the flag
|
||||
// swap and these stores are visible to the handler before the IPI).
|
||||
percpublock.tlb_flush_start.store(0, Ordering::Release);
|
||||
percpublock.tlb_flush_count.store(0, Ordering::Release);
|
||||
|
||||
crate::ipi::ipi_single(crate::ipi::IpiKind::Tlb, percpublock);
|
||||
} else {
|
||||
// Broadcast TLB shootdown: set flag on all other CPUs, then send a single
|
||||
// IPI with "all except self" destination shorthand instead of N individual IPIs.
|
||||
let my_percpublock = PercpuBlock::current();
|
||||
for id in 0..crate::cpu_count() {
|
||||
let target_id = LogicalCpuId::new(id);
|
||||
if target_id == my_percpublock.cpu_id {
|
||||
continue;
|
||||
}
|
||||
let Some(percpublock) = (unsafe {
|
||||
ALL_PERCPU_BLOCKS[id as usize]
|
||||
.load(Ordering::Acquire)
|
||||
.as_ref()
|
||||
}) else {
|
||||
continue;
|
||||
};
|
||||
// Wait if this CPU still has a pending shootdown from a previous request
|
||||
#[expect(clippy::bool_comparison)]
|
||||
while percpublock
|
||||
.wants_tlb_shootdown
|
||||
.swap(true, Ordering::Release)
|
||||
== true
|
||||
{
|
||||
while percpublock.wants_tlb_shootdown.load(Ordering::Relaxed) == true {
|
||||
my_percpublock.maybe_handle_tlb_shootdown();
|
||||
hint::spin_loop();
|
||||
}
|
||||
}
|
||||
// Full flush — clear range info (Release ordering)
|
||||
percpublock.tlb_flush_start.store(0, Ordering::Release);
|
||||
percpublock.tlb_flush_count.store(0, Ordering::Release);
|
||||
// TODO: Optimize: use global counter and percpu ack counters, send IPI using
|
||||
// destination shorthand "all CPUs".
|
||||
shootdown_tlb_ipi(Some(LogicalCpuId::new(id)));
|
||||
}
|
||||
// Single broadcast IPI to all other CPUs using destination shorthand
|
||||
crate::ipi::ipi(crate::ipi::IpiKind::Tlb, crate::ipi::IpiTarget::Other);
|
||||
}
|
||||
}
|
||||
|
||||
/// Range-based TLB shootdown IPI. Only invalidates the specified virtual address
|
||||
/// range using INVLPG per page for ranges up to TLB_RANGE_THRESHOLD pages.
|
||||
/// Falls back to full flush for larger ranges.
|
||||
pub fn shootdown_tlb_ipi_range(target: Option<LogicalCpuId>, start: usize, count: usize) {
|
||||
if cfg!(not(feature = "multi_core")) {
|
||||
return;
|
||||
}
|
||||
|
||||
let start_aligned = start as u64 & !0xFFF;
|
||||
let count_u32 = count as u32;
|
||||
let use_range = count_u32 > 0 && count_u32 <= TLB_RANGE_THRESHOLD;
|
||||
|
||||
let set_range = |percpublock: &PercpuBlock| {
|
||||
if use_range {
|
||||
percpublock.tlb_flush_start.store(start_aligned, Ordering::Release);
|
||||
percpublock.tlb_flush_count.store(count_u32, Ordering::Release);
|
||||
} else {
|
||||
percpublock.tlb_flush_start.store(0, Ordering::Release);
|
||||
percpublock.tlb_flush_count.store(0, Ordering::Release);
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(target) = target {
|
||||
let my_percpublock = PercpuBlock::current();
|
||||
assert_ne!(target, my_percpublock.cpu_id);
|
||||
|
||||
let Some(percpublock) = (unsafe {
|
||||
ALL_PERCPU_BLOCKS[target.get() as usize]
|
||||
.load(Ordering::Acquire)
|
||||
.as_ref()
|
||||
}) else {
|
||||
return;
|
||||
};
|
||||
#[expect(clippy::bool_comparison)]
|
||||
while percpublock.wants_tlb_shootdown.swap(true, Ordering::Release) == true {
|
||||
while percpublock.wants_tlb_shootdown.load(Ordering::Relaxed) == true {
|
||||
my_percpublock.maybe_handle_tlb_shootdown();
|
||||
hint::spin_loop();
|
||||
}
|
||||
}
|
||||
set_range(percpublock);
|
||||
crate::ipi::ipi_single(crate::ipi::IpiKind::Tlb, percpublock);
|
||||
} else {
|
||||
let my_percpublock = PercpuBlock::current();
|
||||
for id in 0..crate::cpu_count() {
|
||||
let target_id = LogicalCpuId::new(id);
|
||||
if target_id == my_percpublock.cpu_id {
|
||||
continue;
|
||||
}
|
||||
let Some(percpublock) = (unsafe {
|
||||
ALL_PERCPU_BLOCKS[id as usize]
|
||||
.load(Ordering::Acquire)
|
||||
.as_ref()
|
||||
}) else {
|
||||
continue;
|
||||
};
|
||||
#[expect(clippy::bool_comparison)]
|
||||
while percpublock.wants_tlb_shootdown.swap(true, Ordering::Release) == true {
|
||||
while percpublock.wants_tlb_shootdown.load(Ordering::Relaxed) == true {
|
||||
my_percpublock.maybe_handle_tlb_shootdown();
|
||||
hint::spin_loop();
|
||||
}
|
||||
}
|
||||
set_range(percpublock);
|
||||
}
|
||||
crate::ipi::ipi(crate::ipi::IpiKind::Tlb, crate::ipi::IpiTarget::Other);
|
||||
}
|
||||
}
|
||||
impl PercpuBlock {
|
||||
/// Return the effective scheduling priority, accounting for priority inheritance.
|
||||
/// Lower number = higher priority (0-39 range).
|
||||
pub fn effective_prio(&self, context_prio: usize) -> usize {
|
||||
let donated = self.pi_donated_prio.load(Ordering::Relaxed);
|
||||
if donated < context_prio as u32 {
|
||||
donated as usize
|
||||
} else {
|
||||
context_prio
|
||||
}
|
||||
}
|
||||
|
||||
pub fn maybe_handle_tlb_shootdown(&self) {
|
||||
#[expect(clippy::bool_comparison)]
|
||||
if self.wants_tlb_shootdown.swap(false, Ordering::Relaxed) == false {
|
||||
return;
|
||||
}
|
||||
|
||||
let start = self.tlb_flush_start.load(Ordering::Acquire);
|
||||
let count = self.tlb_flush_count.load(Ordering::Acquire);
|
||||
|
||||
if start != 0 && count > 0 && count <= TLB_RANGE_THRESHOLD {
|
||||
// Range-based flush using INVLPG per page — cheaper than full CR3 reload.
|
||||
for i in 0..count {
|
||||
let addr = start + (i as u64) * 4096;
|
||||
crate::memory::RmmA::invalidate(rmm::VirtualAddress::new(addr as usize));
|
||||
}
|
||||
} else {
|
||||
// Full TLB flush (CR3 reload) for large ranges or global shootdowns.
|
||||
crate::memory::RmmA::invalidate_all();
|
||||
}
|
||||
// TODO: Finer-grained flush
|
||||
crate::memory::RmmA::invalidate_all();
|
||||
|
||||
if let Some(addrsp) = &*self.current_addrsp.borrow() {
|
||||
addrsp.tlb_ack.fetch_add(1, Ordering::Release);
|
||||
@@ -358,14 +189,6 @@ impl PercpuBlock {
|
||||
wants_tlb_shootdown: AtomicBool::new(false),
|
||||
balance: Cell::new([0; 40]),
|
||||
last_queue: Cell::new(39),
|
||||
mcs_sched_node: McsNode::new(),
|
||||
mcs_contention_count: Cell::new(0),
|
||||
tlb_flush_start: AtomicU64::new(0),
|
||||
tlb_flush_count: AtomicU32::new(0),
|
||||
pi_donated_prio: AtomicU32::new(u32::MAX),
|
||||
current_prio: Cell::new(39),
|
||||
numa_node: Cell::new(u8::MAX),
|
||||
waiting_on_lock: AtomicPtr::new(core::ptr::null_mut()),
|
||||
ptrace_flags: Cell::new(PtraceFlags::empty()),
|
||||
ptrace_session: RefCell::new(None),
|
||||
inside_syscall: Cell::new(false),
|
||||
|
||||
@@ -10,7 +10,6 @@ use syscall::{
|
||||
|
||||
use crate::{
|
||||
acpi::{RxsdtEnum, RXSDT_ENUM},
|
||||
arch::sleep,
|
||||
context::file::InternalFlags,
|
||||
event,
|
||||
sync::{CleanLockToken, RwLock, WaitCondition, L1},
|
||||
@@ -41,7 +40,6 @@ enum HandleKind {
|
||||
TopLevel,
|
||||
Rxsdt,
|
||||
ShutdownPipe,
|
||||
SleepControl,
|
||||
SchemeRoot,
|
||||
}
|
||||
|
||||
@@ -148,11 +146,11 @@ impl KernelScheme for AcpiScheme {
|
||||
if flags & O_EXCL == O_EXCL || flags & O_SYMLINK == O_SYMLINK {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
|
||||
return Err(Error::new(EROFS));
|
||||
}
|
||||
let (handle_kind, int_flags) = match path {
|
||||
"" => {
|
||||
if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
|
||||
return Err(Error::new(EROFS));
|
||||
}
|
||||
if flags & O_DIRECTORY != O_DIRECTORY && flags & O_STAT != O_STAT {
|
||||
return Err(Error::new(EISDIR));
|
||||
}
|
||||
@@ -160,36 +158,17 @@ impl KernelScheme for AcpiScheme {
|
||||
(HandleKind::TopLevel, InternalFlags::POSITIONED)
|
||||
}
|
||||
"rxsdt" => {
|
||||
if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
|
||||
return Err(Error::new(EROFS));
|
||||
}
|
||||
if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT {
|
||||
return Err(Error::new(ENOTDIR));
|
||||
}
|
||||
(HandleKind::Rxsdt, InternalFlags::POSITIONED)
|
||||
}
|
||||
"kstop" => {
|
||||
if flags & O_ACCMODE != O_RDONLY && flags & O_STAT != O_STAT {
|
||||
return Err(Error::new(EROFS));
|
||||
}
|
||||
if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT {
|
||||
return Err(Error::new(ENOTDIR));
|
||||
}
|
||||
(HandleKind::ShutdownPipe, InternalFlags::empty())
|
||||
}
|
||||
"sleep" => {
|
||||
if flags & O_ACCMODE == O_RDONLY || flags & O_STAT == O_STAT {
|
||||
// allowed
|
||||
} else if flags & O_ACCMODE != syscall::flag::O_WRONLY
|
||||
&& flags & O_ACCMODE != syscall::flag::O_RDWR
|
||||
{
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT {
|
||||
return Err(Error::new(ENOTDIR));
|
||||
}
|
||||
(HandleKind::SleepControl, InternalFlags::POSITIONED)
|
||||
}
|
||||
_ => return Err(Error::new(ENOENT)),
|
||||
};
|
||||
|
||||
@@ -212,7 +191,6 @@ impl KernelScheme for AcpiScheme {
|
||||
Ok(match handle.kind {
|
||||
HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?.len() as u64,
|
||||
HandleKind::ShutdownPipe => 1,
|
||||
HandleKind::SleepControl => sleep::available_sleep_states().len() as u64,
|
||||
HandleKind::TopLevel => 0,
|
||||
HandleKind::SchemeRoot => return Err(Error::new(EBADF))?,
|
||||
})
|
||||
@@ -275,7 +253,6 @@ impl KernelScheme for AcpiScheme {
|
||||
|
||||
return dst_buf.copy_exactly(&[0x42]).map(|()| 1);
|
||||
}
|
||||
HandleKind::SleepControl => sleep::available_sleep_states(),
|
||||
HandleKind::Rxsdt => DATA.get().ok_or(Error::new(EBADFD))?,
|
||||
HandleKind::TopLevel => return Err(Error::new(EISDIR)),
|
||||
HandleKind::SchemeRoot => return Err(Error::new(EBADF)),
|
||||
@@ -318,45 +295,11 @@ impl KernelScheme for AcpiScheme {
|
||||
kind: DirentKind::Socket,
|
||||
name: "kstop",
|
||||
inode: 0,
|
||||
next_opaque_id: 2,
|
||||
})?;
|
||||
}
|
||||
if opaque <= 2 {
|
||||
buf.entry(DirEntry {
|
||||
kind: DirentKind::Regular,
|
||||
name: "sleep",
|
||||
inode: 0,
|
||||
next_opaque_id: u64::MAX,
|
||||
})?;
|
||||
}
|
||||
Ok(buf.finalize())
|
||||
}
|
||||
fn kwrite(
|
||||
&self,
|
||||
id: usize,
|
||||
buf: crate::syscall::usercopy::UserSliceRo,
|
||||
_flags: u32,
|
||||
_stored_flags: u32,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<usize> {
|
||||
let handle = *HANDLES.read(token.token()).get(id)?;
|
||||
|
||||
if handle.stat {
|
||||
return Err(Error::new(EBADF));
|
||||
}
|
||||
|
||||
match handle.kind {
|
||||
HandleKind::SleepControl => {
|
||||
let mut tmp = [0_u8; 16];
|
||||
let len = buf.copy_common_bytes_to_slice(&mut tmp)?;
|
||||
let request = core::str::from_utf8(&tmp[..len]).map_err(|_| Error::new(EINVAL))?;
|
||||
sleep::trigger_sleep_request(request)?;
|
||||
Ok(len)
|
||||
}
|
||||
HandleKind::SchemeRoot => Err(Error::new(EBADF)),
|
||||
_ => Err(Error::new(EBADF)),
|
||||
}
|
||||
}
|
||||
fn kfpath(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result<usize> {
|
||||
//TODO: construct useful path?
|
||||
buf.copy_common_bytes_from_slice("/scheme/kernel.acpi/".as_bytes())
|
||||
@@ -385,11 +328,6 @@ impl KernelScheme for AcpiScheme {
|
||||
st_size: 1,
|
||||
..Default::default()
|
||||
},
|
||||
HandleKind::SleepControl => Stat {
|
||||
st_mode: MODE_FILE,
|
||||
st_size: sleep::available_sleep_states().len().try_into().unwrap_or(u64::MAX),
|
||||
..Default::default()
|
||||
},
|
||||
HandleKind::SchemeRoot => return Err(Error::new(EBADF)),
|
||||
})?;
|
||||
|
||||
|
||||
@@ -22,10 +22,9 @@ struct Handle {
|
||||
|
||||
static HANDLES: RwLock<L1, HandleMap<Handle>> = RwLock::new(HandleMap::new());
|
||||
|
||||
/// Add to the input queue, translating CR to NL (ICRNL) for serial console compatibility.
|
||||
/// Add to the input queue
|
||||
pub fn debug_input(data: u8, token: &mut CleanLockToken) {
|
||||
let translated = if data == b'\r' { b'\n' } else { data };
|
||||
INPUT.send(translated, token);
|
||||
INPUT.send(data, token);
|
||||
}
|
||||
|
||||
// Notify readers of input updates
|
||||
@@ -107,16 +106,12 @@ impl KernelScheme for DebugScheme {
|
||||
fn fevent(
|
||||
&self,
|
||||
id: usize,
|
||||
flags: EventFlags,
|
||||
_flags: EventFlags,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<EventFlags> {
|
||||
let _handle = *HANDLES.read(token.token()).get(id)?;
|
||||
|
||||
let mut ready = EventFlags::empty();
|
||||
if flags.contains(EventFlags::EVENT_READ) {
|
||||
ready |= EventFlags::EVENT_READ;
|
||||
}
|
||||
Ok(ready)
|
||||
Ok(EventFlags::empty())
|
||||
}
|
||||
|
||||
fn fsync(&self, id: usize, token: &mut CleanLockToken) -> Result<()> {
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use alloc::sync::Arc;
|
||||
use core::sync::atomic::Ordering;
|
||||
use syscall::{EventFlags, O_NONBLOCK};
|
||||
|
||||
use crate::{
|
||||
@@ -26,25 +25,12 @@ impl KernelScheme for EventScheme {
|
||||
fn kopenat(
|
||||
&self,
|
||||
id: usize,
|
||||
user_buf: StrOrBytes,
|
||||
_user_buf: StrOrBytes,
|
||||
_flags: usize,
|
||||
_fcntl_flags: u32,
|
||||
_ctx: CallerCtx,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<OpenResult> {
|
||||
let path = match &user_buf {
|
||||
StrOrBytes::Str(s) => s,
|
||||
StrOrBytes::Bytes(b) => core::str::from_utf8(b).unwrap_or(""),
|
||||
};
|
||||
if path.starts_with("eventfd/") {
|
||||
let rest = &path[8..]; // after "eventfd/"
|
||||
let mut parts = rest.split('/');
|
||||
let initval: u64 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
|
||||
let sem: bool = parts.next().and_then(|s| s.parse().ok()).unwrap_or(false);
|
||||
let id = next_queue_id();
|
||||
queues_mut(token.token()).insert(id, Arc::new(EventQueue::new_eventfd(id, initval, sem)));
|
||||
return Ok(OpenResult::SchemeLocal(id.get(), InternalFlags::empty()));
|
||||
}
|
||||
if id != SCHEME_ROOT_ID {
|
||||
return Err(Error::new(EACCES));
|
||||
}
|
||||
@@ -81,31 +67,6 @@ impl KernelScheme for EventScheme {
|
||||
handle.clone()
|
||||
};
|
||||
|
||||
if let Some((ref counter, semaphore)) = queue.eventfd {
|
||||
let is_nonblock = flags & O_NONBLOCK as u32 != 0;
|
||||
if semaphore {
|
||||
let val = counter.load(Ordering::Acquire);
|
||||
if val == 0 {
|
||||
if is_nonblock { return Err(Error::new(EAGAIN)); }
|
||||
// Blocking wait not implemented for eventfd in kernel
|
||||
return Err(Error::new(EAGAIN));
|
||||
}
|
||||
if counter.compare_exchange(val, val - 1, Ordering::AcqRel, Ordering::Relaxed).is_ok() {
|
||||
let one: u64 = 1;
|
||||
buf.copy_from_slice(unsafe { core::slice::from_raw_parts(&one as *const u64 as *const u8, 8) })?;
|
||||
return Ok(8);
|
||||
}
|
||||
return Err(Error::new(EAGAIN));
|
||||
} else {
|
||||
let val = counter.swap(0, Ordering::AcqRel);
|
||||
if val == 0 && is_nonblock {
|
||||
return Err(Error::new(EAGAIN));
|
||||
}
|
||||
buf.copy_from_slice(unsafe { core::slice::from_raw_parts(&val as *const u64 as *const u8, 8) })?;
|
||||
return Ok(8);
|
||||
}
|
||||
}
|
||||
|
||||
queue.read(buf, flags & O_NONBLOCK as u32 == 0, token)
|
||||
}
|
||||
|
||||
@@ -124,19 +85,6 @@ impl KernelScheme for EventScheme {
|
||||
let handle = handles.get(&id).ok_or(Error::new(EBADF))?;
|
||||
handle.clone()
|
||||
};
|
||||
|
||||
if let Some((ref counter, _semaphore)) = queue.eventfd {
|
||||
if buf.len() >= 8 {
|
||||
let mut bytes = [0u8; 8];
|
||||
buf.copy_to_slice(&mut bytes)?;
|
||||
let val = u64::from_ne_bytes(bytes);
|
||||
if val == u64::MAX { return Err(Error::new(EINVAL)); }
|
||||
counter.fetch_add(val, Ordering::AcqRel);
|
||||
return Ok(8);
|
||||
}
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
|
||||
let mut events_written = 0;
|
||||
|
||||
for chunk in buf.in_exact_chunks(size_of::<Event>()) {
|
||||
|
||||
@@ -18,9 +18,6 @@ use syscall::{
|
||||
use crate::context::file::InternalFlags;
|
||||
|
||||
use super::{CallerCtx, HandleMap, OpenResult, SchemeExt, StrOrBytes};
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
use crate::arch::device::{ioapic, local_apic::ApicId};
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
use crate::arch::interrupt::{available_irqs_iter, irq::acknowledge, is_reserved, set_reserved};
|
||||
#[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
|
||||
@@ -59,11 +56,8 @@ const INO_AVAIL: u64 = 0x8000_0000_0000_0000;
|
||||
const INO_BSP: u64 = 0x8001_0000_0000_0000;
|
||||
const INO_PHANDLE: u64 = 0x8003_0000_0000_0000;
|
||||
|
||||
/// Add to the input queue, with iommu validation gate for MSI vectors
|
||||
/// Add to the input queue
|
||||
pub fn irq_trigger(irq: u8, token: &mut CleanLockToken) {
|
||||
if irq >= 16 && !iommu_validate_msi_irq(irq) {
|
||||
return;
|
||||
}
|
||||
COUNTS.lock()[irq as usize] += 1;
|
||||
let fds: SmallVec<[usize; 8]> = {
|
||||
HANDLES
|
||||
@@ -83,17 +77,16 @@ pub fn irq_trigger(irq: u8, token: &mut CleanLockToken) {
|
||||
#[allow(dead_code)]
|
||||
enum Handle {
|
||||
SchemeRoot,
|
||||
Irq { ack: AtomicUsize, irq: u8, cpu_id: LogicalCpuId },
|
||||
Irq { ack: AtomicUsize, irq: u8 },
|
||||
Avail(LogicalCpuId),
|
||||
TopLevel,
|
||||
Phandle(u8, Vec<u8>),
|
||||
Bsp,
|
||||
IrqAffinity { irq: u8, mask: AtomicUsize },
|
||||
}
|
||||
impl Handle {
|
||||
fn as_irq_handle(&self) -> Option<(&AtomicUsize, u8)> {
|
||||
match self {
|
||||
&Self::Irq { ref ack, irq, cpu_id: _ } => Some((ack, irq)),
|
||||
&Self::Irq { ref ack, irq } => Some((ack, irq)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -147,7 +140,6 @@ impl IrqScheme {
|
||||
Handle::Irq {
|
||||
ack: AtomicUsize::new(0),
|
||||
irq: irq_number,
|
||||
cpu_id: LogicalCpuId::BSP,
|
||||
},
|
||||
InternalFlags::empty(),
|
||||
)
|
||||
@@ -166,7 +158,6 @@ impl IrqScheme {
|
||||
Handle::Irq {
|
||||
ack: AtomicUsize::new(0),
|
||||
irq: irq_number,
|
||||
cpu_id,
|
||||
},
|
||||
InternalFlags::empty(),
|
||||
)
|
||||
@@ -208,7 +199,6 @@ impl IrqScheme {
|
||||
Handle::Irq {
|
||||
ack: AtomicUsize::new(0),
|
||||
irq: irq_number as u8,
|
||||
cpu_id: LogicalCpuId::new(0),
|
||||
},
|
||||
InternalFlags::empty(),
|
||||
)
|
||||
@@ -224,14 +214,6 @@ const fn vector_to_irq(vector: u8) -> u8 {
|
||||
vector - 32
|
||||
}
|
||||
|
||||
const fn msi_vector_is_valid(vector: u8) -> bool {
|
||||
vector >= 32 && vector < 0xEF
|
||||
}
|
||||
|
||||
fn iommu_validate_msi_irq(_irq: u8) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
impl crate::scheme::KernelScheme for IrqScheme {
|
||||
fn scheme_root(&self, token: &mut CleanLockToken) -> Result<usize> {
|
||||
let id = HANDLES.write(token.token()).insert(Handle::SchemeRoot);
|
||||
@@ -298,21 +280,7 @@ impl crate::scheme::KernelScheme for IrqScheme {
|
||||
InternalFlags::POSITIONED,
|
||||
)
|
||||
} else if let Some(path_str) = path_str.strip_prefix('/') {
|
||||
let (irq_str, affinity) = path_str
|
||||
.trim_end_matches('/')
|
||||
.rsplit_once('/')
|
||||
.map(|(a, b)| (a, Some(b)))
|
||||
.unwrap_or((path_str.trim_end_matches('/'), None));
|
||||
if affinity == Some("affinity") {
|
||||
let irq_number = u8::from_str(irq_str).or(Err(Error::new(ENOENT)))?;
|
||||
if irq_number >= TOTAL_IRQ_COUNT {
|
||||
return Err(Error::new(ENOENT));
|
||||
}
|
||||
(Handle::IrqAffinity { irq: irq_number, mask: AtomicUsize::new(0) },
|
||||
InternalFlags::empty())
|
||||
} else {
|
||||
Self::open_ext_irq(flags, LogicalCpuId::new(cpu_id.into()), path_str)?
|
||||
}
|
||||
Self::open_ext_irq(flags, LogicalCpuId::new(cpu_id.into()), path_str)?
|
||||
} else {
|
||||
return Err(Error::new(ENOENT));
|
||||
}
|
||||
@@ -339,20 +307,12 @@ impl crate::scheme::KernelScheme for IrqScheme {
|
||||
}
|
||||
#[cfg(not(dtb))]
|
||||
panic!("")
|
||||
} else if let Some(rest) = path_str.strip_suffix("/affinity") {
|
||||
let irq_number = u8::from_str(rest).or(Err(Error::new(ENOENT)))?;
|
||||
if irq_number >= TOTAL_IRQ_COUNT {
|
||||
return Err(Error::new(ENOENT));
|
||||
}
|
||||
(Handle::IrqAffinity { irq: irq_number, mask: AtomicUsize::new(0) },
|
||||
InternalFlags::empty())
|
||||
} else if let Ok(plain_irq_number) = u8::from_str(path_str) {
|
||||
if plain_irq_number < BASE_IRQ_COUNT {
|
||||
(
|
||||
Handle::Irq {
|
||||
ack: AtomicUsize::new(0),
|
||||
irq: plain_irq_number,
|
||||
cpu_id: LogicalCpuId::BSP,
|
||||
},
|
||||
InternalFlags::empty(),
|
||||
)
|
||||
@@ -408,7 +368,6 @@ impl crate::scheme::KernelScheme for IrqScheme {
|
||||
}
|
||||
}
|
||||
Handle::Avail(cpu_id) => {
|
||||
let mut listed = 0;
|
||||
for vector in available_irqs_iter(cpu_id).skip(opaque) {
|
||||
let irq = vector_to_irq(vector);
|
||||
if cpu_id == LogicalCpuId::BSP && irq < BASE_IRQ_COUNT {
|
||||
@@ -422,9 +381,7 @@ impl crate::scheme::KernelScheme for IrqScheme {
|
||||
name: &intermediate,
|
||||
next_opaque_id: u64::from(vector) + 1,
|
||||
})?;
|
||||
listed += 1;
|
||||
}
|
||||
info!("irq getdents Avail: cpu_id={} opaque={} listed={}", cpu_id.get(), opaque, listed);
|
||||
}
|
||||
_ => return Err(Error::new(ENOTDIR)),
|
||||
}
|
||||
@@ -459,14 +416,11 @@ impl crate::scheme::KernelScheme for IrqScheme {
|
||||
let handle = handles_guard.get(id)?;
|
||||
|
||||
if let &Handle::Irq {
|
||||
irq: handle_irq,
|
||||
cpu_id: handle_cpu_id,
|
||||
..
|
||||
irq: handle_irq, ..
|
||||
} = handle
|
||||
&& handle_irq > BASE_IRQ_COUNT
|
||||
{
|
||||
info!("irq close: unreserving vector {} on cpu_id={}", irq_to_vector(handle_irq), handle_cpu_id.get());
|
||||
set_reserved(handle_cpu_id, irq_to_vector(handle_irq), false);
|
||||
set_reserved(LogicalCpuId::BSP, irq_to_vector(handle_irq), false);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -482,32 +436,9 @@ impl crate::scheme::KernelScheme for IrqScheme {
|
||||
let handle = handles_guard.get(file)?;
|
||||
|
||||
match handle {
|
||||
&Handle::IrqAffinity { irq: _handle_irq, ref mask } => {
|
||||
if buffer.len() < size_of::<u32>() {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
let mut raw = [0u8; size_of::<u32>()];
|
||||
buffer.copy_to_slice(&mut raw)?;
|
||||
let cpu_id = u32::from_ne_bytes(raw);
|
||||
let cpus = CPUS.get().ok_or(Error::new(EIO))?;
|
||||
if !cpus.contains(&(cpu_id as u8)) {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
// Reprogram the IOAPIC redirection entry for x86 targets.
|
||||
// Non-IOAPIC IRQs (e.g. MSI) will return false -> EIO.
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
{
|
||||
if !unsafe { ioapic::set_affinity(_handle_irq, ApicId::new(cpu_id)) } {
|
||||
return Err(Error::new(EIO));
|
||||
}
|
||||
}
|
||||
mask.store(cpu_id as usize, Ordering::Release);
|
||||
Ok(size_of::<u32>())
|
||||
}
|
||||
&Handle::Irq {
|
||||
irq: handle_irq,
|
||||
ack: ref handle_ack,
|
||||
cpu_id: _,
|
||||
} => {
|
||||
if buffer.len() < size_of::<usize>() {
|
||||
return Err(Error::new(EINVAL));
|
||||
@@ -544,15 +475,6 @@ impl crate::scheme::KernelScheme for IrqScheme {
|
||||
st_nlink: 1,
|
||||
..Default::default()
|
||||
},
|
||||
Handle::IrqAffinity { irq, .. } => Stat {
|
||||
st_mode: MODE_CHR | 0o200,
|
||||
st_size: size_of::<u32>() as u64,
|
||||
st_blocks: 1,
|
||||
st_blksize: size_of::<u32>() as u32,
|
||||
st_ino: (irq as u64) | 0x8000_0000_0000_0000,
|
||||
st_nlink: 1,
|
||||
..Default::default()
|
||||
},
|
||||
Handle::Bsp => Stat {
|
||||
st_mode: MODE_CHR | 0o400,
|
||||
st_size: size_of::<usize>() as u64,
|
||||
@@ -594,9 +516,8 @@ impl crate::scheme::KernelScheme for IrqScheme {
|
||||
|
||||
let scheme_path = match handle {
|
||||
Handle::Irq { irq, .. } => format!("irq:{}", irq),
|
||||
Handle::IrqAffinity { irq, .. } => format!("irq:{}/affinity", irq),
|
||||
Handle::Bsp => "irq:bsp".to_owned(),
|
||||
Handle::Avail(cpu_id) => format!("irq:cpu-{:02x}", cpu_id.get()),
|
||||
Handle::Avail(cpu_id) => format!("irq:cpu-{:2x}", cpu_id.get()),
|
||||
Handle::Phandle(phandle, _) => format!("irq:phandle-{}", phandle),
|
||||
Handle::TopLevel => "irq:".to_owned(),
|
||||
_ => return Err(Error::new(EBADF)),
|
||||
@@ -622,7 +543,6 @@ impl crate::scheme::KernelScheme for IrqScheme {
|
||||
Handle::Irq {
|
||||
irq: handle_irq,
|
||||
ack: ref handle_ack,
|
||||
cpu_id: _,
|
||||
} => {
|
||||
if buffer.len() < size_of::<usize>() {
|
||||
return Err(Error::new(EINVAL));
|
||||
@@ -642,7 +562,7 @@ impl crate::scheme::KernelScheme for IrqScheme {
|
||||
buffer.write_u32(LogicalCpuId::BSP.get())?;
|
||||
Ok(size_of::<usize>())
|
||||
}
|
||||
Handle::Avail(_) | Handle::TopLevel | Handle::Phandle(_, _) | Handle::SchemeRoot | Handle::IrqAffinity { .. } => {
|
||||
Handle::Avail(_) | Handle::TopLevel | Handle::Phandle(_, _) | Handle::SchemeRoot => {
|
||||
Err(Error::new(EISDIR))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ use alloc::{
|
||||
};
|
||||
use core::{
|
||||
str,
|
||||
sync::atomic::{AtomicU64, AtomicUsize, Ordering},
|
||||
sync::atomic::{AtomicUsize, Ordering},
|
||||
};
|
||||
use hashbrown::hash_map::{self, DefaultHashBuilder, HashMap};
|
||||
use spin::Once;
|
||||
@@ -169,7 +169,6 @@ enum Handle {
|
||||
|
||||
/// Schemes list
|
||||
static HANDLES: Once<RwLock<L1, HashMap<SchemeId, Handle>>> = Once::new();
|
||||
static SCHEME_GENERATIONS: Once<RwLock<L1, HashMap<SchemeId, AtomicU64>>> = Once::new();
|
||||
static SCHEME_LIST_NEXT_ID: AtomicUsize = AtomicUsize::new(MAX_GLOBAL_SCHEMES);
|
||||
static SCHEME_LIST_ID: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
@@ -205,10 +204,6 @@ fn init_schemes() -> RwLock<L1, HashMap<SchemeId, Handle>> {
|
||||
RwLock::new(handles)
|
||||
}
|
||||
|
||||
fn init_scheme_generations() -> RwLock<L1, HashMap<SchemeId, AtomicU64>> {
|
||||
RwLock::new(HashMap::new())
|
||||
}
|
||||
|
||||
/// Get a handle to a scheme.
|
||||
pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result<KernelSchemes> {
|
||||
match handles().read(token).get(&scheme_id) {
|
||||
@@ -217,33 +212,10 @@ pub fn get_scheme(token: LockToken<'_, L0>, scheme_id: SchemeId) -> Result<Kerne
|
||||
}
|
||||
}
|
||||
|
||||
pub fn current_scheme_generation(token: LockToken<'_, L0>, scheme_id: SchemeId) -> u64 {
|
||||
scheme_generations()
|
||||
.read(token)
|
||||
.get(&scheme_id)
|
||||
.map(|generation| generation.load(Ordering::Acquire))
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
fn handles<'a>() -> &'a RwLock<L1, HashMap<SchemeId, Handle>> {
|
||||
HANDLES.call_once(init_schemes)
|
||||
}
|
||||
|
||||
fn scheme_generations<'a>() -> &'a RwLock<L1, HashMap<SchemeId, AtomicU64>> {
|
||||
SCHEME_GENERATIONS.call_once(init_scheme_generations)
|
||||
}
|
||||
|
||||
fn increment_scheme_generation(scheme_id: SchemeId, token: &mut CleanLockToken) {
|
||||
match scheme_generations().write(token.token()).entry(scheme_id) {
|
||||
hash_map::Entry::Occupied(entry) => {
|
||||
entry.get().fetch_add(1, Ordering::AcqRel);
|
||||
}
|
||||
hash_map::Entry::Vacant(entry) => {
|
||||
entry.insert(AtomicU64::new(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scheme list type
|
||||
pub struct SchemeList;
|
||||
|
||||
@@ -288,14 +260,9 @@ impl SchemeList {
|
||||
|
||||
/// Remove a scheme
|
||||
fn remove(&self, id: usize, token: &mut CleanLockToken) {
|
||||
let scheme_id = SchemeId(id);
|
||||
let scheme = handles().write(token.token()).remove(&scheme_id);
|
||||
let scheme = handles().write(token.token()).remove(&SchemeId(id));
|
||||
|
||||
assert!(scheme.is_some());
|
||||
if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme.as_ref() {
|
||||
user.inner.fail_pending_calls(token);
|
||||
}
|
||||
increment_scheme_generation(scheme_id, token);
|
||||
if let Some(Handle::Scheme(KernelSchemes::User(user))) = scheme
|
||||
&& let Some(user) = Arc::into_inner(user.inner)
|
||||
{
|
||||
@@ -320,32 +287,32 @@ impl KernelScheme for SchemeList {
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<OpenResult> {
|
||||
let scheme_id = SchemeId(scheme_id);
|
||||
let maybe_inner = {
|
||||
let handles = handles().read(token.token());
|
||||
match handles.get(&scheme_id).ok_or(Error::new(EBADF))? {
|
||||
Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => Some(inner.clone()),
|
||||
Handle::SchemeCreationCapability => None,
|
||||
_ => return Err(Error::new(EBADF)),
|
||||
match handles()
|
||||
.read(token.token())
|
||||
.get(&scheme_id)
|
||||
.ok_or(Error::new(EBADF))?
|
||||
{
|
||||
Handle::Scheme(KernelSchemes::User(UserScheme { inner })) => {
|
||||
let inner = inner.clone();
|
||||
assert!(scheme_id == inner.scheme_id);
|
||||
let scheme = scheme_id;
|
||||
let params = unsafe { user_buf.read_exact::<NewFdParams>()? };
|
||||
|
||||
return Ok(OpenResult::External(Arc::new(RwLock::new(
|
||||
FileDescription {
|
||||
scheme,
|
||||
number: params.number,
|
||||
offset: params.offset,
|
||||
flags: params.flags as u32,
|
||||
internal_flags: InternalFlags::from_extra0(params.internal_flags)
|
||||
.ok_or(Error::new(EINVAL))?,
|
||||
},
|
||||
))));
|
||||
}
|
||||
Handle::SchemeCreationCapability => (),
|
||||
_ => return Err(Error::new(EBADF)),
|
||||
};
|
||||
|
||||
if let Some(inner) = maybe_inner {
|
||||
assert!(scheme_id == inner.scheme_id);
|
||||
let params = unsafe { user_buf.read_exact::<NewFdParams>()? };
|
||||
|
||||
return Ok(OpenResult::External(Arc::new(RwLock::new(
|
||||
FileDescription::new(
|
||||
scheme_id,
|
||||
params.number,
|
||||
params.offset,
|
||||
params.flags as u32,
|
||||
InternalFlags::from_extra0(params.internal_flags)
|
||||
.ok_or(Error::new(EINVAL))?,
|
||||
token,
|
||||
),
|
||||
))));
|
||||
}
|
||||
|
||||
const EXPECTED: &[u8] = b"create-scheme";
|
||||
let mut buf = [0u8; EXPECTED.len()];
|
||||
|
||||
@@ -810,7 +777,6 @@ pub struct CallerCtx {
|
||||
pub pid: usize,
|
||||
pub uid: u32,
|
||||
pub gid: u32,
|
||||
pub groups: alloc::vec::Vec<u32>,
|
||||
}
|
||||
impl CallerCtx {
|
||||
pub fn filter_uid_gid(self, euid: u32, egid: u32) -> Self {
|
||||
@@ -819,7 +785,6 @@ impl CallerCtx {
|
||||
pid: self.pid,
|
||||
uid: euid,
|
||||
gid: egid,
|
||||
groups: self.groups,
|
||||
}
|
||||
} else {
|
||||
self
|
||||
|
||||
@@ -1,10 +1,5 @@
|
||||
use alloc::{
|
||||
collections::VecDeque,
|
||||
string::{String, ToString},
|
||||
sync::Arc,
|
||||
vec::Vec,
|
||||
};
|
||||
use core::sync::atomic::{AtomicUsize, Ordering};
|
||||
use alloc::{collections::VecDeque, sync::Arc, vec::Vec};
|
||||
use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
|
||||
use syscall::{data::GlobalSchemes, CallFlags};
|
||||
|
||||
@@ -19,228 +14,67 @@ use crate::{
|
||||
sync::{CleanLockToken, Mutex, RwLock, WaitCondition, L1},
|
||||
syscall::{
|
||||
data::Stat,
|
||||
error::{
|
||||
Error, Result, EAGAIN, EBADF, EEXIST, EINVAL, EINTR, ENOENT, ENOTDIR, EPIPE,
|
||||
},
|
||||
flag::{
|
||||
EventFlags, EVENT_READ, EVENT_WRITE, MODE_FIFO, O_ACCMODE, O_DIRECTORY,
|
||||
O_NONBLOCK, O_RDONLY, O_RDWR, O_STAT, O_WRONLY,
|
||||
},
|
||||
error::{Error, Result, EAGAIN, EBADF, EINTR, EINVAL, ENOENT, EPIPE},
|
||||
flag::{EventFlags, EVENT_READ, EVENT_WRITE, MODE_FIFO, O_NONBLOCK},
|
||||
usercopy::{UserSliceRo, UserSliceRw, UserSliceWo},
|
||||
},
|
||||
};
|
||||
|
||||
use super::{CallerCtx, KernelScheme, OpenResult, SchemeExt, StrOrBytes};
|
||||
|
||||
static PIPE_NEXT_ID: AtomicUsize = AtomicUsize::new(1);
|
||||
// TODO: Preallocate a number of scheme IDs, since there can only be *one* root namespace, and
|
||||
// therefore only *one* pipe scheme.
|
||||
static PIPE_NEXT_ID: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
#[derive(Clone)]
|
||||
enum Handle {
|
||||
Endpoint(EndpointHandle),
|
||||
Pipe(Arc<Pipe>),
|
||||
SchemeRoot,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
enum EndpointKind {
|
||||
Read,
|
||||
Write,
|
||||
ReadWrite,
|
||||
}
|
||||
|
||||
impl EndpointKind {
|
||||
fn can_read(self) -> bool {
|
||||
matches!(self, Self::Read | Self::ReadWrite)
|
||||
}
|
||||
|
||||
fn can_write(self) -> bool {
|
||||
matches!(self, Self::Write | Self::ReadWrite)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct EndpointHandle {
|
||||
pipe: Arc<Pipe>,
|
||||
kind: EndpointKind,
|
||||
named: Option<Arc<NamedPipe>>,
|
||||
}
|
||||
|
||||
struct NamedPipe {
|
||||
path: String,
|
||||
mode: u16,
|
||||
active: Mutex<L1, Option<Arc<Pipe>>>,
|
||||
}
|
||||
|
||||
static HANDLES: RwLock<L1, HashMap<usize, Handle>> =
|
||||
RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new()));
|
||||
static NAMED_PIPES: RwLock<L1, HashMap<String, Arc<NamedPipe>>> =
|
||||
// TODO: SLOB?
|
||||
static PIPES: RwLock<L1, HashMap<usize, Handle>> =
|
||||
RwLock::new(HashMap::with_hasher(DefaultHashBuilder::new()));
|
||||
|
||||
const MAX_QUEUE_SIZE: usize = 65536;
|
||||
|
||||
fn next_id() -> usize {
|
||||
PIPE_NEXT_ID.fetch_add(1, Ordering::Relaxed)
|
||||
}
|
||||
// In almost all places where Rust (and LLVM) uses pointers, they are limited to nonnegative isize,
|
||||
// so this is fine.
|
||||
const WRITE_NOT_READ_BIT: usize = 1;
|
||||
|
||||
fn endpoint_kind_from_flags(flags: usize) -> Result<EndpointKind> {
|
||||
match flags & O_ACCMODE {
|
||||
O_RDONLY => Ok(EndpointKind::Read),
|
||||
O_WRONLY => Ok(EndpointKind::Write),
|
||||
O_RDWR => Ok(EndpointKind::ReadWrite),
|
||||
_ => Err(Error::new(EINVAL)),
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_named_fifo_open(flags: usize) -> Result<()> {
|
||||
if flags & O_DIRECTORY == O_DIRECTORY && flags & O_STAT != O_STAT {
|
||||
return Err(Error::new(ENOTDIR));
|
||||
}
|
||||
|
||||
let _ = endpoint_kind_from_flags(flags)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn trigger_matching(
|
||||
pipe: &Arc<Pipe>,
|
||||
require_read: bool,
|
||||
require_write: bool,
|
||||
flags: EventFlags,
|
||||
token: &mut CleanLockToken,
|
||||
) {
|
||||
let ids = {
|
||||
let handles = HANDLES.read(token.token());
|
||||
handles
|
||||
.iter()
|
||||
.filter_map(|(id, handle)| match handle {
|
||||
Handle::Endpoint(endpoint)
|
||||
if Arc::ptr_eq(&endpoint.pipe, pipe)
|
||||
&& (!require_read || endpoint.kind.can_read())
|
||||
&& (!require_write || endpoint.kind.can_write()) =>
|
||||
{
|
||||
Some(*id)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
for id in ids {
|
||||
event::trigger(GlobalSchemes::Pipe.scheme_id(), id, flags, token);
|
||||
}
|
||||
}
|
||||
|
||||
fn open_endpoint(
|
||||
pipe: Arc<Pipe>,
|
||||
kind: EndpointKind,
|
||||
named: Option<Arc<NamedPipe>>,
|
||||
token: &mut CleanLockToken,
|
||||
) -> usize {
|
||||
if kind.can_read() {
|
||||
pipe.reader_count.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
if kind.can_write() {
|
||||
pipe.writer_count.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
let id = next_id();
|
||||
HANDLES.write(token.token()).insert(
|
||||
id,
|
||||
Handle::Endpoint(EndpointHandle { pipe, kind, named }),
|
||||
);
|
||||
id
|
||||
}
|
||||
|
||||
fn drop_wait_conditions_if_possible(pipe: Arc<Pipe>, token: &mut CleanLockToken) {
|
||||
if let Some(pipe) = Arc::into_inner(pipe) {
|
||||
{
|
||||
pipe.read_condition.into_drop(token);
|
||||
}
|
||||
{
|
||||
pipe.write_condition.into_drop(token);
|
||||
}
|
||||
}
|
||||
fn from_raw_id(id: usize) -> (bool, usize) {
|
||||
(id & WRITE_NOT_READ_BIT != 0, id & !WRITE_NOT_READ_BIT)
|
||||
}
|
||||
|
||||
pub fn pipe(token: &mut CleanLockToken) -> Result<(usize, usize)> {
|
||||
let pipe = Arc::new(Pipe::new());
|
||||
let read_id = open_endpoint(Arc::clone(&pipe), EndpointKind::Read, None, token);
|
||||
let write_id = open_endpoint(pipe, EndpointKind::Write, None, token);
|
||||
// Bit 0 is used for WRITE_NOT_READ_BIT
|
||||
let id = PIPE_NEXT_ID.fetch_add(2, Ordering::Relaxed);
|
||||
|
||||
Ok((read_id, write_id))
|
||||
}
|
||||
PIPES.write(token.token()).insert(
|
||||
id,
|
||||
Handle::Pipe(Arc::new(Pipe {
|
||||
queue: Mutex::new(VecDeque::new()),
|
||||
read_condition: WaitCondition::new(),
|
||||
write_condition: WaitCondition::new(),
|
||||
writer_is_alive: AtomicBool::new(true),
|
||||
reader_is_alive: AtomicBool::new(true),
|
||||
has_run_dup: AtomicBool::new(false),
|
||||
fd_queue: Mutex::new(VecDeque::new()),
|
||||
})),
|
||||
);
|
||||
|
||||
pub fn named_pipe_exists(path: &str, token: &mut CleanLockToken) -> bool {
|
||||
NAMED_PIPES.read(token.token()).contains_key(path)
|
||||
}
|
||||
|
||||
pub fn create_named_pipe(
|
||||
path: &str,
|
||||
display_path: &str,
|
||||
mode: u16,
|
||||
flags: usize,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<usize> {
|
||||
validate_named_fifo_open(flags)?;
|
||||
|
||||
let named = {
|
||||
let mut named_pipes = NAMED_PIPES.write(token.token());
|
||||
if named_pipes.contains_key(path) {
|
||||
return Err(Error::new(EEXIST));
|
||||
}
|
||||
|
||||
let named = Arc::new(NamedPipe {
|
||||
path: display_path.to_string(),
|
||||
mode,
|
||||
active: Mutex::new(None),
|
||||
});
|
||||
named_pipes.insert(path.to_string(), Arc::clone(&named));
|
||||
named
|
||||
};
|
||||
|
||||
let kind = endpoint_kind_from_flags(flags)?;
|
||||
let pipe = Arc::new(Pipe::new());
|
||||
*named.active.lock(token.token()) = Some(Arc::clone(&pipe));
|
||||
|
||||
Ok(open_endpoint(pipe, kind, Some(named), token))
|
||||
}
|
||||
|
||||
pub fn open_named_pipe(path: &str, flags: usize, token: &mut CleanLockToken) -> Result<Option<usize>> {
|
||||
validate_named_fifo_open(flags)?;
|
||||
|
||||
let named = match NAMED_PIPES.read(token.token()).get(path) {
|
||||
Some(named) => Arc::clone(named),
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
let kind = endpoint_kind_from_flags(flags)?;
|
||||
let pipe = {
|
||||
let mut active = named.active.lock(token.token());
|
||||
match active.as_ref() {
|
||||
Some(pipe) => Arc::clone(pipe),
|
||||
None => {
|
||||
let pipe = Arc::new(Pipe::new());
|
||||
*active = Some(Arc::clone(&pipe));
|
||||
pipe
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Some(open_endpoint(pipe, kind, Some(named), token)))
|
||||
}
|
||||
|
||||
pub fn unlink_named_pipe(path: &str, token: &mut CleanLockToken) -> bool {
|
||||
NAMED_PIPES.write(token.token()).remove(path).is_some()
|
||||
Ok((id, id | WRITE_NOT_READ_BIT))
|
||||
}
|
||||
|
||||
pub struct PipeScheme;
|
||||
|
||||
impl PipeScheme {
|
||||
fn get_endpoint(id: usize, token: &mut CleanLockToken) -> Result<EndpointHandle> {
|
||||
HANDLES
|
||||
fn get_pipe(key: usize, token: &mut CleanLockToken) -> Result<Arc<Pipe>> {
|
||||
PIPES
|
||||
.read(token.token())
|
||||
.get(&id)
|
||||
.get(&key)
|
||||
.and_then(|handle| match handle {
|
||||
Handle::Endpoint(endpoint) => Some(endpoint.clone()),
|
||||
Handle::SchemeRoot => None,
|
||||
Handle::Pipe(pipe) => Some(Arc::clone(pipe)),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or(Error::new(EBADF))
|
||||
}
|
||||
@@ -248,33 +82,32 @@ impl PipeScheme {
|
||||
|
||||
impl KernelScheme for PipeScheme {
|
||||
fn scheme_root(&self, token: &mut CleanLockToken) -> Result<usize> {
|
||||
let id = next_id();
|
||||
HANDLES.write(token.token()).insert(id, Handle::SchemeRoot);
|
||||
let id = PIPE_NEXT_ID.fetch_add(2, Ordering::Relaxed);
|
||||
PIPES.write(token.token()).insert(id, Handle::SchemeRoot);
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
fn fevent(
|
||||
&self,
|
||||
id: usize,
|
||||
flags: EventFlags,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<EventFlags> {
|
||||
let endpoint = Self::get_endpoint(id, token)?;
|
||||
let (is_writer_not_reader, key) = from_raw_id(id);
|
||||
let pipe = Self::get_pipe(key, token)?;
|
||||
|
||||
let mut ready = EventFlags::empty();
|
||||
|
||||
if endpoint.kind.can_write()
|
||||
if is_writer_not_reader
|
||||
&& flags.contains(EVENT_WRITE)
|
||||
&& (endpoint.pipe.queue.lock(token.token()).len() <= MAX_QUEUE_SIZE
|
||||
|| endpoint.pipe.reader_count.load(Ordering::Acquire) == 0)
|
||||
&& (pipe.queue.lock(token.token()).len() <= MAX_QUEUE_SIZE
|
||||
|| !pipe.reader_is_alive.load(Ordering::Acquire))
|
||||
{
|
||||
ready |= EventFlags::EVENT_WRITE;
|
||||
}
|
||||
|
||||
if endpoint.kind.can_read()
|
||||
if !is_writer_not_reader
|
||||
&& flags.contains(EVENT_READ)
|
||||
&& (!endpoint.pipe.queue.lock(token.token()).is_empty()
|
||||
|| endpoint.pipe.writer_count.load(Ordering::Acquire) == 0)
|
||||
&& (!pipe.queue.lock(token.token()).is_empty()
|
||||
|| !pipe.writer_is_alive.load(Ordering::Acquire))
|
||||
{
|
||||
ready |= EventFlags::EVENT_READ;
|
||||
}
|
||||
@@ -283,48 +116,46 @@ impl KernelScheme for PipeScheme {
|
||||
}
|
||||
|
||||
fn close(&self, id: usize, token: &mut CleanLockToken) -> Result<()> {
|
||||
let handle = HANDLES
|
||||
.write(token.token())
|
||||
.remove(&id)
|
||||
.ok_or(Error::new(EBADF))?;
|
||||
let (is_write_not_read, key) = from_raw_id(id);
|
||||
|
||||
let Handle::Endpoint(endpoint) = handle else {
|
||||
return Ok(());
|
||||
let pipe = Self::get_pipe(key, token)?;
|
||||
let scheme_id = GlobalSchemes::Pipe.scheme_id();
|
||||
|
||||
let can_remove = if is_write_not_read {
|
||||
pipe.writer_is_alive.store(false, Ordering::SeqCst);
|
||||
event::trigger(scheme_id, key, EVENT_READ, token);
|
||||
pipe.read_condition.notify(token);
|
||||
|
||||
!pipe.reader_is_alive.load(Ordering::SeqCst)
|
||||
} else {
|
||||
pipe.reader_is_alive.store(false, Ordering::SeqCst);
|
||||
event::trigger(scheme_id, key | WRITE_NOT_READ_BIT, EVENT_WRITE, token);
|
||||
pipe.write_condition.notify(token);
|
||||
|
||||
!pipe.writer_is_alive.load(Ordering::SeqCst)
|
||||
};
|
||||
|
||||
let mut last_reader = false;
|
||||
let mut last_writer = false;
|
||||
|
||||
if endpoint.kind.can_read() {
|
||||
last_reader = endpoint.pipe.reader_count.fetch_sub(1, Ordering::SeqCst) == 1;
|
||||
}
|
||||
if endpoint.kind.can_write() {
|
||||
last_writer = endpoint.pipe.writer_count.fetch_sub(1, Ordering::SeqCst) == 1;
|
||||
}
|
||||
|
||||
if last_writer {
|
||||
trigger_matching(&endpoint.pipe, true, false, EVENT_READ, token);
|
||||
endpoint.pipe.read_condition.notify(token);
|
||||
}
|
||||
if last_reader {
|
||||
trigger_matching(&endpoint.pipe, false, true, EVENT_WRITE, token);
|
||||
endpoint.pipe.write_condition.notify(token);
|
||||
}
|
||||
|
||||
let no_readers = endpoint.pipe.reader_count.load(Ordering::SeqCst) == 0;
|
||||
let no_writers = endpoint.pipe.writer_count.load(Ordering::SeqCst) == 0;
|
||||
if no_readers && no_writers {
|
||||
if let Some(named) = endpoint.named {
|
||||
let mut active = named.active.lock(token.token());
|
||||
if active
|
||||
.as_ref()
|
||||
.is_some_and(|active_pipe| Arc::ptr_eq(active_pipe, &endpoint.pipe))
|
||||
if can_remove {
|
||||
let handle = PIPES.write(token.token()).remove(&key);
|
||||
if let Some(Handle::Pipe(pipe)) = handle
|
||||
&& let Some(pipe) = Arc::into_inner(pipe)
|
||||
{
|
||||
{
|
||||
*active = None;
|
||||
pipe.read_condition.into_drop(token);
|
||||
}
|
||||
{
|
||||
pipe.write_condition.into_drop(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
drop_wait_conditions_if_possible(endpoint.pipe, token);
|
||||
if let Some(pipe) = Arc::into_inner(pipe) {
|
||||
{
|
||||
pipe.read_condition.into_drop(token);
|
||||
}
|
||||
{
|
||||
pipe.write_condition.into_drop(token);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -337,9 +168,9 @@ impl KernelScheme for PipeScheme {
|
||||
_ctx: CallerCtx,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<OpenResult> {
|
||||
let endpoint = Self::get_endpoint(old_id, token)?;
|
||||
let (is_writer_not_reader, key) = from_raw_id(old_id);
|
||||
|
||||
if !endpoint.kind.can_read() {
|
||||
if is_writer_not_reader {
|
||||
return Err(Error::new(EBADF));
|
||||
}
|
||||
|
||||
@@ -349,17 +180,17 @@ impl KernelScheme for PipeScheme {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
|
||||
let pipe = Self::get_pipe(key, token)?;
|
||||
|
||||
if pipe.has_run_dup.swap(true, Ordering::SeqCst) {
|
||||
return Err(Error::new(EBADF));
|
||||
}
|
||||
|
||||
Ok(OpenResult::SchemeLocal(
|
||||
open_endpoint(
|
||||
Arc::clone(&endpoint.pipe),
|
||||
EndpointKind::Write,
|
||||
endpoint.named,
|
||||
token,
|
||||
),
|
||||
key | WRITE_NOT_READ_BIT,
|
||||
InternalFlags::empty(),
|
||||
))
|
||||
}
|
||||
|
||||
fn kopenat(
|
||||
&self,
|
||||
id: usize,
|
||||
@@ -369,47 +200,40 @@ impl KernelScheme for PipeScheme {
|
||||
_ctx: CallerCtx,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<OpenResult> {
|
||||
let is_scheme_root = {
|
||||
let handles = HANDLES.read(token.token());
|
||||
match handles.get(&id) {
|
||||
Some(Handle::SchemeRoot) => true,
|
||||
Some(Handle::Endpoint(_)) => false,
|
||||
None => return Err(Error::new(EBADF)),
|
||||
}
|
||||
};
|
||||
let (_, key) = from_raw_id(id);
|
||||
|
||||
if is_scheme_root {
|
||||
let path = user_buf.as_str().or(Err(Error::new(EINVAL)))?;
|
||||
if !path.trim_start_matches('/').is_empty() {
|
||||
return Err(Error::new(ENOENT));
|
||||
{
|
||||
let guard = PIPES.read(token.token());
|
||||
if let Some(Handle::SchemeRoot) = guard.get(&key) {
|
||||
} else if let Some(Handle::Pipe(pipe_arc)) = guard.get(&key) {
|
||||
let pipe = Arc::clone(pipe_arc);
|
||||
drop(guard);
|
||||
|
||||
if user_buf.as_bytes() == b"write" {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
|
||||
if pipe.has_run_dup.swap(true, Ordering::SeqCst) {
|
||||
return Err(Error::new(EBADF));
|
||||
}
|
||||
|
||||
let pipe = Arc::new(Pipe::new());
|
||||
return Ok(OpenResult::SchemeLocal(
|
||||
open_endpoint(pipe, EndpointKind::Read, None, token),
|
||||
key | WRITE_NOT_READ_BIT,
|
||||
InternalFlags::empty(),
|
||||
));
|
||||
} else {
|
||||
return Err(Error::new(EBADF));
|
||||
}
|
||||
}
|
||||
|
||||
let endpoint = Self::get_endpoint(id, token)?;
|
||||
if !endpoint.kind.can_read() {
|
||||
return Err(Error::new(EBADF));
|
||||
let path = user_buf.as_str().or(Err(Error::new(EINVAL)))?;
|
||||
if !path.trim_start_matches('/').is_empty() {
|
||||
return Err(Error::new(ENOENT));
|
||||
}
|
||||
|
||||
let path = user_buf.as_bytes();
|
||||
if !path.is_empty() && path != b"write" {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
let (read_id, _) = pipe(token)?;
|
||||
|
||||
Ok(OpenResult::SchemeLocal(
|
||||
open_endpoint(
|
||||
Arc::clone(&endpoint.pipe),
|
||||
EndpointKind::Write,
|
||||
endpoint.named,
|
||||
token,
|
||||
),
|
||||
InternalFlags::empty(),
|
||||
))
|
||||
Ok(OpenResult::SchemeLocal(read_id, InternalFlags::empty()))
|
||||
}
|
||||
|
||||
fn kread(
|
||||
@@ -420,15 +244,16 @@ impl KernelScheme for PipeScheme {
|
||||
_stored_flags: u32,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<usize> {
|
||||
let endpoint = Self::get_endpoint(id, token)?;
|
||||
let (is_write_not_read, key) = from_raw_id(id);
|
||||
|
||||
if !endpoint.kind.can_read() {
|
||||
if is_write_not_read {
|
||||
return Err(Error::new(EBADF));
|
||||
}
|
||||
let pipe = Self::get_pipe(key, token)?;
|
||||
|
||||
loop {
|
||||
let vec = endpoint.pipe.queue.lock(token.token());
|
||||
let (mut vec, mut lock_token) = vec.into_split();
|
||||
let vec = pipe.queue.lock(token.token());
|
||||
let (mut vec, mut token) = vec.into_split();
|
||||
|
||||
let (s1, s2) = vec.as_slices();
|
||||
let s1_count = core::cmp::min(user_buf.len(), s1.len());
|
||||
@@ -448,34 +273,28 @@ impl KernelScheme for PipeScheme {
|
||||
let _ = vec.drain(..bytes_read);
|
||||
|
||||
if bytes_read > 0 {
|
||||
drop(vec);
|
||||
drop(lock_token);
|
||||
trigger_matching(&endpoint.pipe, false, true, EVENT_WRITE, token);
|
||||
endpoint.pipe.write_condition.notify(token);
|
||||
event::trigger_locked(
|
||||
GlobalSchemes::Pipe.scheme_id(),
|
||||
key | WRITE_NOT_READ_BIT,
|
||||
EVENT_WRITE,
|
||||
token.token(),
|
||||
);
|
||||
pipe.write_condition.notify_locked(token.token());
|
||||
|
||||
return Ok(bytes_read);
|
||||
}
|
||||
|
||||
if user_buf.is_empty() {
|
||||
} else if user_buf.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
if endpoint.pipe.writer_count.load(Ordering::SeqCst) == 0 {
|
||||
if !pipe.writer_is_alive.load(Ordering::SeqCst) {
|
||||
return Ok(0);
|
||||
}
|
||||
if fcntl_flags & O_NONBLOCK as u32 != 0 {
|
||||
} else if fcntl_flags & O_NONBLOCK as u32 != 0 {
|
||||
return Err(Error::new(EAGAIN));
|
||||
}
|
||||
if !endpoint
|
||||
.pipe
|
||||
.read_condition
|
||||
.wait(vec, "PipeRead::read", &mut lock_token)
|
||||
{
|
||||
} else if !pipe.read_condition.wait(vec, "PipeRead::read", &mut token) {
|
||||
return Err(Error::new(EINTR));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn kwrite(
|
||||
&self,
|
||||
id: usize,
|
||||
@@ -484,17 +303,18 @@ impl KernelScheme for PipeScheme {
|
||||
_stored_flags: u32,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<usize> {
|
||||
let endpoint = Self::get_endpoint(id, token)?;
|
||||
let (is_write_not_read, key) = from_raw_id(id);
|
||||
|
||||
if !endpoint.kind.can_write() {
|
||||
if !is_write_not_read {
|
||||
return Err(Error::new(EBADF));
|
||||
}
|
||||
let pipe = Self::get_pipe(key, token)?;
|
||||
|
||||
loop {
|
||||
let vec = endpoint.pipe.queue.lock(token.token());
|
||||
let (mut vec, mut lock_token) = vec.into_split();
|
||||
let vec = pipe.queue.lock(token.token());
|
||||
let (mut vec, mut token) = vec.into_split();
|
||||
|
||||
if endpoint.pipe.reader_count.load(Ordering::Relaxed) == 0 {
|
||||
if !pipe.reader_is_alive.load(Ordering::Relaxed) {
|
||||
return Err(Error::new(EPIPE));
|
||||
}
|
||||
|
||||
@@ -509,6 +329,7 @@ impl KernelScheme for PipeScheme {
|
||||
|
||||
let mut bytes_written = 0;
|
||||
|
||||
// TODO: Modify VecDeque so that the unwritten portions can be accessed directly?
|
||||
for (idx, chunk) in src_buf.in_variable_chunks(TMPBUF_SIZE).enumerate() {
|
||||
let chunk_byte_count = match chunk.copy_common_bytes_to_slice(&mut tmp_buf) {
|
||||
Ok(c) => c,
|
||||
@@ -520,52 +341,41 @@ impl KernelScheme for PipeScheme {
|
||||
}
|
||||
|
||||
if bytes_written > 0 {
|
||||
drop(vec);
|
||||
drop(lock_token);
|
||||
trigger_matching(&endpoint.pipe, true, false, EVENT_READ, token);
|
||||
endpoint.pipe.read_condition.notify(token);
|
||||
event::trigger_locked(
|
||||
GlobalSchemes::Pipe.scheme_id(),
|
||||
key,
|
||||
EVENT_READ,
|
||||
token.token(),
|
||||
);
|
||||
pipe.read_condition.notify_locked(token.token());
|
||||
|
||||
return Ok(bytes_written);
|
||||
}
|
||||
|
||||
if user_buf.is_empty() {
|
||||
} else if user_buf.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
if fcntl_flags & O_NONBLOCK as u32 != 0 {
|
||||
return Err(Error::new(EAGAIN));
|
||||
}
|
||||
if !endpoint
|
||||
.pipe
|
||||
} else if !pipe
|
||||
.write_condition
|
||||
.wait(vec, "PipeWrite::write", &mut lock_token)
|
||||
.wait(vec, "PipeWrite::write", &mut token)
|
||||
{
|
||||
return Err(Error::new(EINTR));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn kfpath(&self, id: usize, buf: UserSliceWo, token: &mut CleanLockToken) -> Result<usize> {
|
||||
let endpoint = Self::get_endpoint(id, token)?;
|
||||
if let Some(named) = endpoint.named {
|
||||
buf.copy_common_bytes_from_slice(named.path.as_bytes())
|
||||
} else {
|
||||
buf.copy_common_bytes_from_slice("/scheme/pipe/".as_bytes())
|
||||
}
|
||||
fn kfpath(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result<usize> {
|
||||
//TODO: construct useful path?
|
||||
buf.copy_common_bytes_from_slice("/scheme/pipe/".as_bytes())
|
||||
}
|
||||
|
||||
fn kfstat(&self, id: usize, buf: UserSliceWo, token: &mut CleanLockToken) -> Result<()> {
|
||||
let endpoint = Self::get_endpoint(id, token)?;
|
||||
let mode = endpoint.named.map_or(0o666, |named| named.mode);
|
||||
|
||||
fn kfstat(&self, _id: usize, buf: UserSliceWo, _token: &mut CleanLockToken) -> Result<()> {
|
||||
buf.copy_exactly(&Stat {
|
||||
st_mode: MODE_FIFO | mode,
|
||||
st_mode: MODE_FIFO | 0o666,
|
||||
..Default::default()
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn kfdwrite(
|
||||
&self,
|
||||
id: usize,
|
||||
@@ -575,17 +385,23 @@ impl KernelScheme for PipeScheme {
|
||||
_metadata: &[u64],
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<usize> {
|
||||
let endpoint = Self::get_endpoint(id, token)?;
|
||||
let (is_write_not_read, key) = from_raw_id(id);
|
||||
|
||||
if !endpoint.kind.can_write() {
|
||||
if !is_write_not_read {
|
||||
return Err(Error::new(EBADF));
|
||||
}
|
||||
let pipe = match Self::get_pipe(key, token) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
|
||||
loop {
|
||||
let vec = endpoint.pipe.fd_queue.lock(token.token());
|
||||
let (mut vec, mut lock_token) = vec.into_split();
|
||||
let vec = pipe.fd_queue.lock(token.token());
|
||||
let (mut vec, mut token) = vec.into_split();
|
||||
|
||||
if endpoint.pipe.reader_count.load(Ordering::Relaxed) == 0 {
|
||||
if !pipe.reader_is_alive.load(Ordering::Relaxed) {
|
||||
return Err(Error::new(EPIPE));
|
||||
}
|
||||
if descs.is_empty() {
|
||||
@@ -605,24 +421,25 @@ impl KernelScheme for PipeScheme {
|
||||
let fds_written = vec.len() - before_len;
|
||||
|
||||
if fds_written > 0 {
|
||||
drop(vec);
|
||||
drop(lock_token);
|
||||
trigger_matching(&endpoint.pipe, true, false, EVENT_READ, token);
|
||||
endpoint.pipe.read_condition.notify(token);
|
||||
event::trigger_locked(
|
||||
GlobalSchemes::Pipe.scheme_id(),
|
||||
key,
|
||||
EVENT_READ,
|
||||
token.token(),
|
||||
);
|
||||
pipe.read_condition.notify_locked(token.token());
|
||||
|
||||
return Ok(fds_written);
|
||||
}
|
||||
|
||||
if !endpoint
|
||||
.pipe
|
||||
if !pipe
|
||||
.write_condition
|
||||
.wait(vec, "PipeWrite::write", &mut lock_token)
|
||||
.wait(vec, "PipeWrite::write", &mut token)
|
||||
{
|
||||
return Err(Error::new(EINTR));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn kfdread(
|
||||
&self,
|
||||
id: usize,
|
||||
@@ -631,19 +448,25 @@ impl KernelScheme for PipeScheme {
|
||||
_metadata: &[u64],
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<usize> {
|
||||
let endpoint = Self::get_endpoint(id, token)?;
|
||||
let (is_write_not_read, key) = from_raw_id(id);
|
||||
|
||||
if !endpoint.kind.can_read() {
|
||||
if is_write_not_read {
|
||||
return Err(Error::new(EBADF));
|
||||
}
|
||||
let pipe = match Self::get_pipe(key, token) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
|
||||
if payload.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
loop {
|
||||
let vec = endpoint.pipe.fd_queue.lock(token.token());
|
||||
let (mut vec, mut lock_token) = vec.into_split();
|
||||
let vec = pipe.fd_queue.lock(token.token());
|
||||
let (mut vec, mut token) = vec.into_split();
|
||||
|
||||
let fds_available = vec.len();
|
||||
let max_fds_read = payload.len() / size_of::<usize>();
|
||||
@@ -656,33 +479,31 @@ impl KernelScheme for PipeScheme {
|
||||
fds_to_transfer,
|
||||
payload,
|
||||
flags.contains(CallFlags::FD_CLOEXEC),
|
||||
&mut lock_token,
|
||||
&mut token,
|
||||
)?;
|
||||
} else {
|
||||
bulk_add_fds(
|
||||
fds_to_transfer,
|
||||
payload,
|
||||
flags.contains(CallFlags::FD_CLOEXEC),
|
||||
&mut lock_token,
|
||||
&mut token,
|
||||
)?;
|
||||
}
|
||||
|
||||
drop(vec);
|
||||
drop(lock_token);
|
||||
trigger_matching(&endpoint.pipe, false, true, EVENT_WRITE, token);
|
||||
endpoint.pipe.write_condition.notify(token);
|
||||
event::trigger_locked(
|
||||
GlobalSchemes::Pipe.scheme_id(),
|
||||
key | WRITE_NOT_READ_BIT,
|
||||
EVENT_WRITE,
|
||||
token.token(),
|
||||
);
|
||||
pipe.write_condition.notify_locked(token.token());
|
||||
|
||||
return Ok(fds_to_read);
|
||||
}
|
||||
|
||||
if endpoint.pipe.writer_count.load(Ordering::SeqCst) == 0 {
|
||||
if !pipe.writer_is_alive.load(Ordering::SeqCst) {
|
||||
return Ok(0);
|
||||
}
|
||||
if !endpoint
|
||||
.pipe
|
||||
.read_condition
|
||||
.wait(vec, "PipeRead::read", &mut lock_token)
|
||||
{
|
||||
} else if !pipe.read_condition.wait(vec, "PipeRead::read", &mut token) {
|
||||
return Err(Error::new(EINTR));
|
||||
}
|
||||
}
|
||||
@@ -690,23 +511,11 @@ impl KernelScheme for PipeScheme {
|
||||
}
|
||||
|
||||
pub struct Pipe {
|
||||
read_condition: WaitCondition,
|
||||
write_condition: WaitCondition,
|
||||
read_condition: WaitCondition, // signals whether there are available bytes to read
|
||||
write_condition: WaitCondition, // signals whether there is room for additional bytes
|
||||
queue: Mutex<L1, VecDeque<u8>>,
|
||||
reader_count: AtomicUsize,
|
||||
writer_count: AtomicUsize,
|
||||
reader_is_alive: AtomicBool, // starts set, unset when reader closes
|
||||
writer_is_alive: AtomicBool, // starts set, unset when writer closes
|
||||
has_run_dup: AtomicBool,
|
||||
fd_queue: Mutex<L1, VecDeque<Arc<LockedFileDescription>>>,
|
||||
}
|
||||
|
||||
impl Pipe {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
read_condition: WaitCondition::new(),
|
||||
write_condition: WaitCondition::new(),
|
||||
queue: Mutex::new(VecDeque::new()),
|
||||
reader_count: AtomicUsize::new(0),
|
||||
writer_count: AtomicUsize::new(0),
|
||||
fd_queue: Mutex::new(VecDeque::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,7 +105,6 @@ enum ContextHandle {
|
||||
// Attr handles, to set ens/euid/egid/pid.
|
||||
Authority,
|
||||
Attr,
|
||||
Groups,
|
||||
|
||||
Status {
|
||||
privileged: bool,
|
||||
@@ -262,7 +261,6 @@ impl ProcScheme {
|
||||
let handle = match actual_name {
|
||||
"attrs" => ContextHandle::Attr,
|
||||
"status" => ContextHandle::Status { privileged: true },
|
||||
"groups" => ContextHandle::Groups,
|
||||
_ => return Err(Error::new(ENOENT)),
|
||||
};
|
||||
|
||||
@@ -308,11 +306,6 @@ impl ProcScheme {
|
||||
let id = NonZeroUsize::new(NEXT_ID.fetch_add(1, Ordering::Relaxed))
|
||||
.ok_or(Error::new(EMFILE))?;
|
||||
let context = context::spawn(true, Some(id), ret, token)?;
|
||||
{
|
||||
let parent_groups =
|
||||
context::current().read(token.token()).groups.clone();
|
||||
context.write(token.token()).groups = parent_groups;
|
||||
}
|
||||
HANDLES.write(token.token()).insert(
|
||||
id.get(),
|
||||
Handle {
|
||||
@@ -432,7 +425,6 @@ impl KernelScheme for ProcScheme {
|
||||
}
|
||||
|
||||
fn close(&self, id: usize, token: &mut CleanLockToken) -> Result<()> {
|
||||
let mut inner_token = unsafe { CleanLockToken::new() };
|
||||
let handle = HANDLES
|
||||
.write(token.token())
|
||||
.remove(&id)
|
||||
@@ -460,7 +452,9 @@ impl KernelScheme for ProcScheme {
|
||||
))]
|
||||
regs.set_arg1(arg1);
|
||||
|
||||
Ok(context.set_addr_space(Some(new), inner_token.downgrade()))
|
||||
// TODO: Lock ordering violation
|
||||
let mut token = unsafe { CleanLockToken::new() };
|
||||
Ok(context.set_addr_space(Some(new), token.downgrade()))
|
||||
})?;
|
||||
if let Some(old_ctx) = old_ctx
|
||||
&& let Some(addrspace) = Arc::into_inner(old_ctx)
|
||||
@@ -499,7 +493,6 @@ impl KernelScheme for ProcScheme {
|
||||
consume: bool,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<usize> {
|
||||
let mut inner_token = unsafe { CleanLockToken::new() };
|
||||
let handle = HANDLES
|
||||
.read(token.token())
|
||||
.get(&id)
|
||||
@@ -590,7 +583,9 @@ impl KernelScheme for ProcScheme {
|
||||
};
|
||||
// TODO: Allocated or AllocatedShared?
|
||||
let addrsp = AddrSpace::current()?;
|
||||
let page = addrsp.acquire_write(inner_token.downgrade()).mmap_anywhere(
|
||||
// TODO: Lock ordering violation
|
||||
let mut token = unsafe { CleanLockToken::new() };
|
||||
let page = addrsp.acquire_write(token.downgrade()).mmap_anywhere(
|
||||
&addrsp,
|
||||
NonZeroUsize::new(1).unwrap(),
|
||||
MapFlags::PROT_READ | MapFlags::PROT_WRITE,
|
||||
@@ -854,17 +849,17 @@ impl KernelScheme for ProcScheme {
|
||||
}
|
||||
}
|
||||
fn extract_scheme_number(fd: usize, token: &mut CleanLockToken) -> Result<(KernelSchemes, usize)> {
|
||||
let desc = {
|
||||
let (scheme_id, number) = {
|
||||
let current_lock = context::current();
|
||||
let mut current = current_lock.read(token.token());
|
||||
let (context, mut context_token) = current.token_split();
|
||||
let (context, mut token) = current.token_split();
|
||||
let file_descriptor = context
|
||||
.get_file(FileHandle::from(fd), &mut context_token)
|
||||
.get_file(FileHandle::from(fd), &mut token)
|
||||
.ok_or(Error::new(EBADF))?;
|
||||
*file_descriptor.description.read(context_token.token())
|
||||
let desc = file_descriptor.description.read(token.token());
|
||||
(desc.scheme, desc.number)
|
||||
};
|
||||
let scheme = desc.get_scheme(token)?;
|
||||
let number = desc.number;
|
||||
let scheme = scheme::get_scheme(token.token(), scheme_id)?;
|
||||
|
||||
Ok((scheme, number))
|
||||
}
|
||||
@@ -1276,39 +1271,6 @@ impl ContextHandle {
|
||||
guard.prio = (info.prio as usize).min(39);
|
||||
Ok(size_of::<ProcSchemeAttrs>())
|
||||
}
|
||||
Self::Groups => {
|
||||
const NGROUPS_MAX: usize = 65536;
|
||||
if buf.len() % size_of::<u32>() != 0 {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
let count = buf.len() / size_of::<u32>();
|
||||
if count > NGROUPS_MAX {
|
||||
return Err(Error::new(EINVAL));
|
||||
}
|
||||
let mut groups = Vec::with_capacity(count);
|
||||
for chunk in buf.in_exact_chunks(size_of::<u32>()).take(count) {
|
||||
groups.push(chunk.read_u32()?);
|
||||
}
|
||||
let proc_id = {
|
||||
let guard = context.read(token.token());
|
||||
guard.owner_proc_id
|
||||
};
|
||||
{
|
||||
let mut guard = context.write(token.token());
|
||||
guard.groups = groups.clone();
|
||||
}
|
||||
if let Some(pid) = proc_id {
|
||||
let mut contexts = context::contexts(token.downgrade());
|
||||
let (contexts, mut t) = contexts.token_split();
|
||||
for context_ref in contexts.iter() {
|
||||
let mut ctx = context_ref.write(t.token());
|
||||
if ctx.owner_proc_id == Some(pid) {
|
||||
ctx.groups = groups.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(count * size_of::<u32>())
|
||||
}
|
||||
ContextHandle::OpenViaDup => {
|
||||
let mut args = buf.usizes();
|
||||
|
||||
@@ -1513,15 +1475,6 @@ impl ContextHandle {
|
||||
debug_name,
|
||||
})
|
||||
}
|
||||
Self::Groups => {
|
||||
let c = &context.read(token.token());
|
||||
let max = buf.len() / size_of::<u32>();
|
||||
let count = c.groups.len().min(max);
|
||||
for (chunk, gid) in buf.in_exact_chunks(size_of::<u32>()).zip(&c.groups).take(count) {
|
||||
chunk.copy_from_slice(&gid.to_ne_bytes())?;
|
||||
}
|
||||
Ok(count * size_of::<u32>())
|
||||
}
|
||||
ContextHandle::Sighandler => {
|
||||
let data = match context.read(token.token()).sig {
|
||||
Some(ref sig) => SetSighandlerData {
|
||||
|
||||
@@ -80,7 +80,6 @@ const ONE: NonZeroUsize = match NonZeroUsize::new(1) {
|
||||
Some(one) => one,
|
||||
None => unreachable!(),
|
||||
};
|
||||
const MAX_SPURIOUS_WAKEUPS: usize = 100;
|
||||
|
||||
enum ParsedCqe {
|
||||
TriggerFevent {
|
||||
@@ -210,8 +209,6 @@ impl UserInner {
|
||||
caller_responsible: &mut PageSpan,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<Response> {
|
||||
let mut remaining_spurious_wakeups = MAX_SPURIOUS_WAKEUPS;
|
||||
|
||||
{
|
||||
// Disable preemption to avoid context switches between setting the
|
||||
// process state and sending the scheme request. The process is made
|
||||
@@ -264,10 +261,7 @@ impl UserInner {
|
||||
};
|
||||
|
||||
let states = self.states.lock(token.token());
|
||||
let (mut states, mut state_token) = states.into_split();
|
||||
let mut timed_out_descriptions = None;
|
||||
let mut remove_state = false;
|
||||
let mut timed_out = false;
|
||||
let (mut states, mut token) = states.into_split();
|
||||
match states.get_mut(sqe.tag as usize) {
|
||||
// invalid state
|
||||
None => return Err(Error::new(EBADFD)),
|
||||
@@ -280,35 +274,24 @@ impl UserInner {
|
||||
fds,
|
||||
} => {
|
||||
let maybe_eintr =
|
||||
eintr_if_sigkill(&mut callee_responsible, &mut state_token.token());
|
||||
|
||||
if maybe_eintr.is_ok() {
|
||||
remaining_spurious_wakeups =
|
||||
remaining_spurious_wakeups.saturating_sub(1);
|
||||
}
|
||||
|
||||
if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 {
|
||||
timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds));
|
||||
remove_state = true;
|
||||
} else {
|
||||
*o = State::Waiting {
|
||||
canceling: true,
|
||||
callee_responsible,
|
||||
context,
|
||||
fds,
|
||||
};
|
||||
}
|
||||
eintr_if_sigkill(&mut callee_responsible, &mut token.token());
|
||||
*o = State::Waiting {
|
||||
canceling: true,
|
||||
callee_responsible,
|
||||
context,
|
||||
fds,
|
||||
};
|
||||
|
||||
maybe_eintr?;
|
||||
|
||||
if remove_state {
|
||||
states.remove(sqe.tag as usize);
|
||||
timed_out = true;
|
||||
} else {
|
||||
context::current()
|
||||
.write(state_token.token())
|
||||
.block("UserInner::call (woken up after cancelation request)");
|
||||
}
|
||||
context::current()
|
||||
.write(token.token())
|
||||
.block("UserInner::call (woken up after cancelation request)");
|
||||
|
||||
// We do not want to drop the lock before blocking
|
||||
// as if we get preempted in between we might miss a
|
||||
// wakeup.
|
||||
drop(states);
|
||||
}
|
||||
// spurious wakeup
|
||||
State::Waiting {
|
||||
@@ -317,76 +300,60 @@ impl UserInner {
|
||||
context,
|
||||
mut callee_responsible,
|
||||
} => {
|
||||
let maybe_eintr = eintr_if_sigkill(&mut callee_responsible, &mut token);
|
||||
let current_context = context::current();
|
||||
let maybe_eintr =
|
||||
eintr_if_sigkill(&mut callee_responsible, &mut state_token);
|
||||
|
||||
if maybe_eintr.is_ok() {
|
||||
remaining_spurious_wakeups =
|
||||
remaining_spurious_wakeups.saturating_sub(1);
|
||||
}
|
||||
|
||||
if maybe_eintr.is_ok() && remaining_spurious_wakeups == 0 {
|
||||
timed_out_descriptions = Some(Self::collect_descriptions_to_close(fds));
|
||||
remove_state = true;
|
||||
} else {
|
||||
*o = State::Waiting {
|
||||
// Currently we treat all spurious wakeups to have the same behavior
|
||||
// as signals (i.e., we send a cancellation request). It is not something
|
||||
// that should happen, but it certainly can happen, for example if a context
|
||||
// is awoken through its thread handle without setting any sig bits, or if the
|
||||
// caller clears its own sig bits. If it actually is a signal, then it is the
|
||||
// intended behavior.
|
||||
canceling: true,
|
||||
fds,
|
||||
context,
|
||||
callee_responsible,
|
||||
};
|
||||
}
|
||||
*o = State::Waiting {
|
||||
// Currently we treat all spurious wakeups to have the same behavior
|
||||
// as signals (i.e., we send a cancellation request). It is not something
|
||||
// that should happen, but it certainly can happen, for example if a context
|
||||
// is awoken through its thread handle without setting any sig bits, or if the
|
||||
// caller clears its own sig bits. If it actually is a signal, then it is the
|
||||
// intended behavior.
|
||||
canceling: true,
|
||||
fds,
|
||||
context,
|
||||
callee_responsible,
|
||||
};
|
||||
|
||||
maybe_eintr?;
|
||||
|
||||
if remove_state {
|
||||
states.remove(sqe.tag as usize);
|
||||
timed_out = true;
|
||||
} else {
|
||||
// We do not want to preempt between sending the
|
||||
// cancellation and blocking again where we might
|
||||
// miss a wakeup.
|
||||
let mut preempt =
|
||||
PreemptGuardL1::new(¤t_context, &mut state_token);
|
||||
let token = preempt.token();
|
||||
// We do not want to preempt between sending the
|
||||
// cancellation and blocking again where we might
|
||||
// miss a wakeup.
|
||||
let mut preempt = PreemptGuardL1::new(¤t_context, &mut token);
|
||||
let token = preempt.token();
|
||||
|
||||
self.todo.send_locked(
|
||||
Sqe {
|
||||
opcode: Opcode::Cancel as u8,
|
||||
sqe_flags: SqeFlags::ONEWAY,
|
||||
tag: sqe.tag,
|
||||
..Default::default()
|
||||
},
|
||||
token.token(),
|
||||
);
|
||||
event::trigger_locked(
|
||||
self.root_id,
|
||||
self.scheme_id.get(),
|
||||
EVENT_READ,
|
||||
token.token(),
|
||||
);
|
||||
self.todo.send_locked(
|
||||
Sqe {
|
||||
opcode: Opcode::Cancel as u8,
|
||||
sqe_flags: SqeFlags::ONEWAY,
|
||||
tag: sqe.tag,
|
||||
..Default::default()
|
||||
},
|
||||
token.token(),
|
||||
);
|
||||
event::trigger_locked(
|
||||
self.root_id,
|
||||
self.scheme_id.get(),
|
||||
EVENT_READ,
|
||||
token.token(),
|
||||
);
|
||||
|
||||
// 1. If cancellation was requested and arrived
|
||||
// before the scheme processed the request, an
|
||||
// acknowledgement will be sent back after the
|
||||
// cancellation is processed and we will be woken up
|
||||
// again. State will be State::Responded then.
|
||||
//
|
||||
// 2. If cancellation was requested but the scheme
|
||||
// already processed the request, we will receive
|
||||
// the actual response next and woken up again.
|
||||
// State will be State::Responded then.
|
||||
context::current()
|
||||
.write(token.token())
|
||||
.block("UserInner::call (spurious wakeup)");
|
||||
}
|
||||
// 1. If cancellation was requested and arrived
|
||||
// before the scheme processed the request, an
|
||||
// acknowledgement will be sent back after the
|
||||
// cancellation is processed and we will be woken up
|
||||
// again. State will be State::Responded then.
|
||||
//
|
||||
// 2. If cancellation was requested but the scheme
|
||||
// already processed the request, we will receive
|
||||
// the actual response next and woken up again.
|
||||
// State will be State::Responded then.
|
||||
context::current()
|
||||
.write(token.token())
|
||||
.block("UserInner::call (spurious wakeup)");
|
||||
drop(states);
|
||||
}
|
||||
|
||||
// invalid state
|
||||
@@ -401,70 +368,10 @@ impl UserInner {
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
if let Some(descriptions) = timed_out_descriptions {
|
||||
drop(states);
|
||||
for desc in descriptions {
|
||||
let _ = desc.try_close(token);
|
||||
}
|
||||
}
|
||||
|
||||
if timed_out {
|
||||
return Err(Error::new(ETIMEDOUT));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_descriptions_to_close(
|
||||
fds: Vec<Arc<LockedFileDescription>>,
|
||||
) -> Vec<FileDescription> {
|
||||
fds.into_iter()
|
||||
.filter_map(|fd| Arc::try_unwrap(fd).ok())
|
||||
.map(RwLock::into_inner)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn fail_pending_calls(&self, token: &mut CleanLockToken) {
|
||||
let descriptions_to_close = {
|
||||
let mut states_lock = self.states.lock(token.token());
|
||||
let (states, mut lock_token) = states_lock.token_split();
|
||||
let mut descriptions_to_close = Vec::new();
|
||||
let mut states_to_remove = Vec::new();
|
||||
|
||||
for (id, state) in states.iter_mut() {
|
||||
match mem::replace(state, State::Placeholder) {
|
||||
State::Waiting { context, fds, .. } => {
|
||||
descriptions_to_close.extend(Self::collect_descriptions_to_close(fds));
|
||||
|
||||
match context.upgrade() {
|
||||
Some(context) => {
|
||||
*state = State::Responded(Response::Regular(
|
||||
Err(Error::new(ENODEV)),
|
||||
0,
|
||||
false,
|
||||
));
|
||||
context.write(lock_token.token()).unblock();
|
||||
}
|
||||
None => states_to_remove.push(id),
|
||||
}
|
||||
}
|
||||
old_state => *state = old_state,
|
||||
}
|
||||
}
|
||||
|
||||
for id in states_to_remove {
|
||||
states.remove(id);
|
||||
}
|
||||
|
||||
descriptions_to_close
|
||||
};
|
||||
|
||||
for desc in descriptions_to_close {
|
||||
let _ = desc.try_close(token);
|
||||
}
|
||||
}
|
||||
|
||||
/// Map a readable structure to the scheme's userspace and return the
|
||||
/// pointer
|
||||
#[must_use = "copying back to head/tail buffers can fail"]
|
||||
@@ -1376,7 +1283,6 @@ impl UserInner {
|
||||
}
|
||||
|
||||
pub fn into_drop(self, token: &mut CleanLockToken) {
|
||||
self.fail_pending_calls(token);
|
||||
self.todo.condition.into_drop(token);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,16 +74,14 @@ impl MemoryEntry {
|
||||
}
|
||||
|
||||
struct MemoryMap {
|
||||
entries: [MemoryEntry; 1024],
|
||||
entries: [MemoryEntry; 512],
|
||||
size: usize,
|
||||
}
|
||||
|
||||
impl MemoryMap {
|
||||
fn register(&mut self, base: usize, size: usize, kind: BootloaderMemoryKind) {
|
||||
if self.size >= self.entries.len() {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
unsafe { core::arch::asm!("out dx, al", in("dx") 0x3F8u16, in("al") b'!', options(nostack, preserves_flags)); }
|
||||
panic!("Early memory map overflow at entry {} (max {})", self.size, self.entries.len());
|
||||
panic!("Early memory map overflow!");
|
||||
}
|
||||
let start = if kind == BootloaderMemoryKind::Free {
|
||||
align_up(base)
|
||||
@@ -136,7 +134,7 @@ static MEMORY_MAP: SyncUnsafeCell<MemoryMap> = SyncUnsafeCell::new(MemoryMap {
|
||||
start: 0,
|
||||
end: 0,
|
||||
kind: BootloaderMemoryKind::Null,
|
||||
}; 1024],
|
||||
}; 512],
|
||||
size: 0,
|
||||
});
|
||||
|
||||
@@ -325,16 +323,7 @@ unsafe fn map_memory<A: Arch>(areas: &[MemoryArea], mut bump_allocator: &mut Bum
|
||||
}
|
||||
}
|
||||
|
||||
let kernel_area = match (*MEMORY_MAP.get()).kernel() {
|
||||
Some(area) => area,
|
||||
None => {
|
||||
println!("FATAL: kernel memory area not found in boot memory map");
|
||||
println!("Cannot determine kernel base address. Halting.");
|
||||
loop {
|
||||
core::hint::spin_loop();
|
||||
}
|
||||
}
|
||||
};
|
||||
let kernel_area = (*MEMORY_MAP.get()).kernel().unwrap();
|
||||
let kernel_base = kernel_area.start;
|
||||
let kernel_size = kernel_area.end.saturating_sub(kernel_area.start);
|
||||
// Map kernel at KERNEL_OFFSET
|
||||
|
||||
@@ -149,15 +149,6 @@ static BOOTSTRAP: spin::Once<Bootstrap> = spin::Once::new();
|
||||
pub(crate) static AP_READY: AtomicBool = AtomicBool::new(false);
|
||||
static BSP_READY: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
#[cold]
|
||||
fn halt_boot(message: &str) -> ! {
|
||||
print!("{message}");
|
||||
println!("Kernel boot cannot continue. Halting.");
|
||||
loop {
|
||||
hint::spin_loop();
|
||||
}
|
||||
}
|
||||
|
||||
/// This is the kernel entry point for the primary CPU. The arch crate is responsible for calling this
|
||||
pub(crate) fn kmain(bootstrap: Bootstrap) -> ! {
|
||||
let mut token = unsafe { CleanLockToken::new() };
|
||||
@@ -189,7 +180,9 @@ pub(crate) fn kmain(bootstrap: Bootstrap) -> ! {
|
||||
context.euid = 0;
|
||||
context.egid = 0;
|
||||
}
|
||||
Err(_err) => halt_boot("FATAL: failed to spawn first userspace process userspace_init\n"),
|
||||
Err(err) => {
|
||||
panic!("failed to spawn userspace_init: {:?}", err);
|
||||
}
|
||||
}
|
||||
|
||||
run_userspace(&mut token)
|
||||
|
||||
@@ -1,188 +0,0 @@
|
||||
//! MCS (Mellor-Crummey Scott) fair spinlock.
|
||||
//!
|
||||
//! Each waiter spins on its own local `locked` flag instead of a shared lock
|
||||
//! word, eliminating cache-line bouncing under contention. FIFO ordering
|
||||
//! guarantees fairness. O(1) cache-line transfers on unlock.
|
||||
//!
|
||||
//! Supports transitive priority inheritance: when CPU A waits on a lock held
|
||||
//! by CPU B, and CPU B waits on a lock held by CPU C, A's priority is
|
||||
//! propagated through the chain to C (up to MAX_PI_CHAIN_DEPTH hops).
|
||||
|
||||
use core::sync::atomic::{AtomicBool, AtomicPtr, AtomicU32, Ordering};
|
||||
use core::{hint, ptr};
|
||||
|
||||
use crate::percpu::PercpuBlock;
|
||||
|
||||
/// Maximum depth for transitive priority inheritance chain following.
|
||||
/// Prevents infinite loops from theoretical lock cycles and bounds latency.
|
||||
/// Linux uses 20; 8 is conservative for a microkernel with fewer nesting levels.
|
||||
const MAX_PI_CHAIN_DEPTH: u32 = 8;
|
||||
|
||||
/// A node in the MCS lock queue.
|
||||
pub struct McsNode {
|
||||
pub next: AtomicPtr<McsNode>,
|
||||
pub locked: AtomicBool,
|
||||
}
|
||||
|
||||
impl McsNode {
|
||||
pub const fn new() -> Self {
|
||||
Self {
|
||||
next: AtomicPtr::new(ptr::null_mut()),
|
||||
locked: AtomicBool::new(false),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Raw MCS spinlock primitive.
|
||||
pub struct McsRawLock {
|
||||
tail: AtomicPtr<McsNode>,
|
||||
/// CPU ID of the current lock holder (for priority inheritance).
|
||||
/// `u32::MAX` means no holder.
|
||||
holder_cpu: AtomicU32,
|
||||
}
|
||||
|
||||
impl McsRawLock {
|
||||
pub const fn new() -> Self {
|
||||
Self {
|
||||
tail: AtomicPtr::new(ptr::null_mut()),
|
||||
holder_cpu: AtomicU32::new(u32::MAX),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn acquire(&self, node: &McsNode) -> bool {
|
||||
node.next.store(ptr::null_mut(), Ordering::Relaxed);
|
||||
node.locked.store(true, Ordering::Relaxed);
|
||||
let prev = self.tail.swap((node as *const McsNode).cast_mut(), Ordering::AcqRel);
|
||||
if prev.is_null() {
|
||||
// Uncontended — record ourselves as holder
|
||||
let cpu_id = PercpuBlock::current().cpu_id.get();
|
||||
self.holder_cpu.store(cpu_id, Ordering::Release);
|
||||
return false;
|
||||
}
|
||||
unsafe {
|
||||
(*prev).next.store((node as *const McsNode).cast_mut(), Ordering::Release);
|
||||
}
|
||||
let percpu = PercpuBlock::current();
|
||||
// Record which lock we're spinning on (for transitive PI chain following)
|
||||
percpu.waiting_on_lock.store(
|
||||
(self as *const McsRawLock).cast_mut(),
|
||||
Ordering::Release,
|
||||
);
|
||||
let mut donated = false;
|
||||
while node.locked.load(Ordering::Acquire) {
|
||||
percpu.maybe_handle_tlb_shootdown();
|
||||
// Donate priority to the lock holder (transitively) once per acquisition
|
||||
if !donated {
|
||||
self.maybe_donate_priority(percpu);
|
||||
donated = true;
|
||||
}
|
||||
hint::spin_loop();
|
||||
}
|
||||
// Clear waiting_on_lock before proceeding — we now hold the lock
|
||||
percpu.waiting_on_lock.store(ptr::null_mut(), Ordering::Release);
|
||||
self.holder_cpu.store(percpu.cpu_id.get(), Ordering::Release);
|
||||
true
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn release(&self, node: &McsNode) {
|
||||
// Clear priority inheritance donation — we no longer hold the lock
|
||||
PercpuBlock::current().pi_donated_prio.store(u32::MAX, Ordering::Release);
|
||||
// Clear holder CPU
|
||||
self.holder_cpu.store(u32::MAX, Ordering::Release);
|
||||
|
||||
let next = node.next.load(Ordering::Acquire);
|
||||
if next.is_null() {
|
||||
if self
|
||||
.tail
|
||||
.compare_exchange(
|
||||
(node as *const McsNode).cast_mut(),
|
||||
ptr::null_mut(),
|
||||
Ordering::AcqRel,
|
||||
Ordering::Acquire,
|
||||
)
|
||||
.is_ok()
|
||||
{
|
||||
return;
|
||||
}
|
||||
while node.next.load(Ordering::Acquire).is_null() {
|
||||
hint::spin_loop();
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
(*node.next.load(Ordering::Acquire)).locked.store(false, Ordering::Release);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn try_acquire(&self, node: &McsNode) -> bool {
|
||||
node.next.store(ptr::null_mut(), Ordering::Relaxed);
|
||||
node.locked.store(true, Ordering::Relaxed);
|
||||
let ok = self
|
||||
.tail
|
||||
.compare_exchange(
|
||||
ptr::null_mut(),
|
||||
(node as *const McsNode).cast_mut(),
|
||||
Ordering::AcqRel,
|
||||
Ordering::Acquire,
|
||||
)
|
||||
.is_ok();
|
||||
if ok {
|
||||
let cpu_id = PercpuBlock::current().cpu_id.get();
|
||||
self.holder_cpu.store(cpu_id, Ordering::Release);
|
||||
}
|
||||
ok
|
||||
}
|
||||
|
||||
/// Donate current CPU's context priority to the lock holder's CPU,
|
||||
/// following the PI chain transitively (A→B→C).
|
||||
///
|
||||
/// Reads priority from PercpuBlock::current_prio (cached by the scheduler)
|
||||
/// to avoid acquiring any lock in the MCS spin loop.
|
||||
///
|
||||
/// Chain following: if the holder is itself waiting on another lock,
|
||||
/// we propagate our priority to that lock's holder too, up to
|
||||
/// MAX_PI_CHAIN_DEPTH hops.
|
||||
fn maybe_donate_priority(&self, my_percpu: &PercpuBlock) {
|
||||
let my_prio = my_percpu.current_prio.get() as u32;
|
||||
let mut current_holder_cpu = self.holder_cpu.load(Ordering::Relaxed);
|
||||
|
||||
for _ in 0..MAX_PI_CHAIN_DEPTH {
|
||||
if current_holder_cpu == u32::MAX {
|
||||
return;
|
||||
}
|
||||
let holder_percpu = crate::percpu::get_for_cpu(
|
||||
crate::cpu_set::LogicalCpuId::new(current_holder_cpu),
|
||||
);
|
||||
let Some(holder) = holder_percpu else {
|
||||
return;
|
||||
};
|
||||
|
||||
// Donate if our priority is higher (lower number) than current donation
|
||||
let current_donated = holder.pi_donated_prio.load(Ordering::Relaxed);
|
||||
if my_prio < current_donated {
|
||||
holder.pi_donated_prio.store(my_prio, Ordering::Release);
|
||||
}
|
||||
|
||||
// Follow the chain: is this holder also waiting on another lock?
|
||||
let next_lock_ptr = holder.waiting_on_lock.load(Ordering::Relaxed);
|
||||
if next_lock_ptr.is_null() {
|
||||
return;
|
||||
}
|
||||
// SAFETY: The pointed-to McsRawLock is a long-lived struct field
|
||||
// (e.g., part of the run queue). The holder is currently spinning
|
||||
// in acquire(), so the pointer is valid. We only read holder_cpu
|
||||
// (an atomic u32) — no mutable access needed.
|
||||
let next_holder_cpu =
|
||||
unsafe { (*next_lock_ptr).holder_cpu.load(Ordering::Relaxed) };
|
||||
|
||||
// Cycle detection: if the next holder is the same CPU we just visited, stop
|
||||
if next_holder_cpu == current_holder_cpu {
|
||||
return;
|
||||
}
|
||||
current_holder_cpu = next_holder_cpu;
|
||||
}
|
||||
// Chain depth exhausted — stop to bound latency
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
pub use self::{ordered::*, wait_condition::WaitCondition, wait_queue::WaitQueue};
|
||||
|
||||
pub mod mcs;
|
||||
pub mod ordered;
|
||||
pub mod wait_condition;
|
||||
pub mod wait_queue;
|
||||
|
||||
@@ -52,9 +52,7 @@
|
||||
//! *g1 = 12;
|
||||
//! ```
|
||||
use alloc::sync::Arc;
|
||||
use core::cell::UnsafeCell;
|
||||
use core::marker::PhantomData;
|
||||
use core::ptr;
|
||||
|
||||
use crate::percpu::PercpuBlock;
|
||||
|
||||
@@ -734,143 +732,3 @@ impl<L: Level, T> Drop for ArcRwLockWriteGuard<L, T> {
|
||||
/// This function can only be called if no lock is held by the calling thread/task
|
||||
#[inline]
|
||||
pub fn check_no_locks(_: LockToken<'_, L0>) {}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// MCS-based fair mutex (McsMutex)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// A mutual exclusion lock using the MCS fair spinlock algorithm.
|
||||
///
|
||||
/// Unlike `Mutex<L, T>` which uses a simple spinlock (no fairness under
|
||||
/// contention), `McsMutex` uses Mellor-Crummey Scott queue-based spinning:
|
||||
///
|
||||
/// - Each waiter spins on its **own** local flag — no shared cache-line bouncing.
|
||||
/// - FIFO ordering prevents starvation.
|
||||
/// - O(1) cache-line transfers on unlock.
|
||||
///
|
||||
/// The MCS node is stored in [`crate::percpu::PercpuBlock::mcs_sched_node`], so
|
||||
/// this type is suitable for scheduler-internal locks where the holder is always
|
||||
/// the current CPU.
|
||||
pub struct McsMutex<L: Level, T> {
|
||||
raw: crate::sync::mcs::McsRawLock,
|
||||
data: UnsafeCell<T>,
|
||||
_phantom: PhantomData<L>,
|
||||
}
|
||||
|
||||
unsafe impl<L: Level, T: Send> Sync for McsMutex<L, T> {}
|
||||
unsafe impl<L: Level, T: Send> Send for McsMutex<L, T> {}
|
||||
|
||||
impl<L: Level, T> McsMutex<L, T> {
|
||||
pub const fn new(val: T) -> Self {
|
||||
Self {
|
||||
raw: crate::sync::mcs::McsRawLock::new(),
|
||||
data: UnsafeCell::new(val),
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<L: Level, T> McsMutex<L, T> {
|
||||
pub fn lock<'a, LP: Lower<L> + 'a>(
|
||||
&'a self,
|
||||
lock_token: LockToken<'a, LP>,
|
||||
) -> McsMutexGuard<'a, L, T> {
|
||||
let percpu = PercpuBlock::current();
|
||||
let contended = self.raw.acquire(&percpu.mcs_sched_node);
|
||||
if contended {
|
||||
percpu
|
||||
.mcs_contention_count
|
||||
.set(percpu.mcs_contention_count.get() + 1);
|
||||
}
|
||||
McsMutexGuard {
|
||||
lock: self,
|
||||
lock_token: LockToken::downgraded(lock_token),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_lock<'a, LP: Lower<L> + 'a>(
|
||||
&'a self,
|
||||
lock_token: LockToken<'a, LP>,
|
||||
) -> Option<McsMutexGuard<'a, L, T>> {
|
||||
let percpu = PercpuBlock::current();
|
||||
if self.raw.try_acquire(&percpu.mcs_sched_node) {
|
||||
Some(McsMutexGuard {
|
||||
lock: self,
|
||||
lock_token: LockToken::downgraded(lock_token),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct McsMutexGuard<'a, L: Level, T: 'a> {
|
||||
lock: &'a McsMutex<L, T>,
|
||||
lock_token: LockToken<'a, L>,
|
||||
}
|
||||
|
||||
impl<'a, L: Level, T: 'a> McsMutexGuard<'a, L, T> {
|
||||
pub fn token_split(&mut self) -> (&mut T, LockToken<'_, L>) {
|
||||
unsafe { (&mut *self.lock.data.get(), self.lock_token.token()) }
|
||||
}
|
||||
|
||||
pub fn into_split(self) -> (McsRawGuard<'a, L, T>, LockToken<'a, L>) {
|
||||
let lock_ref = self.lock;
|
||||
let token = unsafe { core::ptr::read(&self.lock_token) };
|
||||
core::mem::forget(self);
|
||||
(McsRawGuard { lock: lock_ref }, token)
|
||||
}
|
||||
|
||||
pub fn from_split(raw: McsRawGuard<'a, L, T>, token: LockToken<'a, L>) -> Self {
|
||||
let lock_ref = raw.lock;
|
||||
core::mem::forget(raw);
|
||||
Self {
|
||||
lock: lock_ref,
|
||||
lock_token: token,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<L: Level, T> core::ops::Deref for McsMutexGuard<'_, L, T> {
|
||||
type Target = T;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
unsafe { &*self.lock.data.get() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<L: Level, T> core::ops::DerefMut for McsMutexGuard<'_, L, T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
unsafe { &mut *self.lock.data.get() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<L: Level, T> Drop for McsMutexGuard<'_, L, T> {
|
||||
fn drop(&mut self) {
|
||||
let percpu = PercpuBlock::current();
|
||||
self.lock.raw.release(&percpu.mcs_sched_node);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct McsRawGuard<'a, L: Level, T: 'a> {
|
||||
lock: &'a McsMutex<L, T>,
|
||||
}
|
||||
|
||||
impl<L: Level, T> core::ops::Deref for McsRawGuard<'_, L, T> {
|
||||
type Target = T;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
unsafe { &*self.lock.data.get() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<L: Level, T> core::ops::DerefMut for McsRawGuard<'_, L, T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
unsafe { &mut *self.lock.data.get() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<L: Level, T> Drop for McsRawGuard<'_, L, T> {
|
||||
fn drop(&mut self) {
|
||||
let percpu = PercpuBlock::current();
|
||||
self.lock.raw.release(&percpu.mcs_sched_node);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
use core::num::NonZeroUsize;
|
||||
|
||||
use alloc::{format, string::{String, ToString}, sync::Arc, vec::Vec};
|
||||
use alloc::{string::String, sync::Arc, vec::Vec};
|
||||
use redox_path::RedoxPath;
|
||||
|
||||
use crate::{
|
||||
@@ -12,9 +12,9 @@ use crate::{
|
||||
memory::{AddrSpace, GenericFlusher, Grant, PageSpan, TlbShootdownActions},
|
||||
},
|
||||
memory::{Page, VirtualAddress, PAGE_SIZE},
|
||||
scheme::{self, pipe, FileHandle, KernelScheme, OpenResult, SchemeExt, StrOrBytes},
|
||||
scheme::{self, FileHandle, KernelScheme, OpenResult, StrOrBytes},
|
||||
sync::{CleanLockToken, RwLock},
|
||||
syscall::{data::{GlobalSchemes, Stat}, error::*, flag::*},
|
||||
syscall::{data::Stat, error::*, flag::*},
|
||||
};
|
||||
|
||||
use super::usercopy::{UserSlice, UserSliceRo, UserSliceRw, UserSliceWo};
|
||||
@@ -45,7 +45,7 @@ pub fn file_op_generic_ext<T>(
|
||||
(file, desc)
|
||||
};
|
||||
|
||||
let scheme = desc.get_scheme(token)?;
|
||||
let scheme = scheme::get_scheme(token.token(), desc.scheme)?;
|
||||
|
||||
op(&*scheme, file.description, desc, token)
|
||||
}
|
||||
@@ -62,32 +62,55 @@ pub fn copy_path_to_buf(raw_path: UserSliceRo, max_len: usize) -> Result<String>
|
||||
// TODO: Define elsewhere
|
||||
const PATH_MAX: usize = PAGE_SIZE;
|
||||
|
||||
fn fifo_path_key(scheme_id: scheme::SchemeId, number: usize, path: &str) -> String {
|
||||
if path.starts_with('/') {
|
||||
path.to_string()
|
||||
} else {
|
||||
format!("@fifo:{}:{}:{}", scheme_id.get(), number, path)
|
||||
}
|
||||
}
|
||||
|
||||
fn install_open_result(
|
||||
scheme_id: scheme::SchemeId,
|
||||
pub fn openat(
|
||||
fh: FileHandle,
|
||||
raw_path: UserSliceRo,
|
||||
flags: usize,
|
||||
open_result: OpenResult,
|
||||
fcntl_flags: u32,
|
||||
euid: u32,
|
||||
egid: u32,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<FileHandle> {
|
||||
let new_description = match open_result {
|
||||
OpenResult::SchemeLocal(number, internal_flags) => Arc::new(RwLock::new(
|
||||
FileDescription::new(
|
||||
scheme_id,
|
||||
number,
|
||||
0,
|
||||
(flags & !O_CLOEXEC) as u32,
|
||||
internal_flags,
|
||||
token,
|
||||
),
|
||||
)),
|
||||
OpenResult::External(desc) => desc,
|
||||
let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?;
|
||||
|
||||
let (scheme_id, number) = {
|
||||
let current_lock = context::current();
|
||||
let mut current = current_lock.read(token.token());
|
||||
let (context, mut token) = current.token_split();
|
||||
let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?;
|
||||
let desc = pipe.description.read(token.token());
|
||||
(desc.scheme, desc.number)
|
||||
};
|
||||
|
||||
let caller_ctx = context::current()
|
||||
.read(token.token())
|
||||
.caller_ctx()
|
||||
.filter_uid_gid(euid, egid);
|
||||
|
||||
let new_description = {
|
||||
let scheme = scheme::get_scheme(token.token(), scheme_id)?;
|
||||
|
||||
let res = scheme.kopenat(
|
||||
number,
|
||||
StrOrBytes::from_str(&path_buf),
|
||||
flags,
|
||||
fcntl_flags,
|
||||
caller_ctx,
|
||||
token,
|
||||
);
|
||||
|
||||
match res? {
|
||||
OpenResult::SchemeLocal(number, internal_flags) => {
|
||||
Arc::new(RwLock::new(FileDescription {
|
||||
offset: 0,
|
||||
internal_flags,
|
||||
scheme: scheme_id,
|
||||
number,
|
||||
flags: (flags & !O_CLOEXEC) as u32,
|
||||
}))
|
||||
}
|
||||
OpenResult::External(desc) => desc,
|
||||
}
|
||||
};
|
||||
|
||||
let current_lock = context::current();
|
||||
@@ -103,102 +126,6 @@ fn install_open_result(
|
||||
)
|
||||
.ok_or(Error::new(EMFILE))
|
||||
}
|
||||
|
||||
fn path_exists_in_scheme(
|
||||
scheme: &dyn KernelScheme,
|
||||
number: usize,
|
||||
path: &str,
|
||||
caller_ctx: scheme::CallerCtx,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<bool> {
|
||||
match scheme.kopenat(number, StrOrBytes::from_str(path), O_STAT, 0, caller_ctx, token) {
|
||||
Ok(OpenResult::SchemeLocal(number, _)) => {
|
||||
let _ = scheme.close(number, token);
|
||||
Ok(true)
|
||||
}
|
||||
Ok(OpenResult::External(_)) => Ok(true),
|
||||
Err(err) if err.errno == ENOENT => Ok(false),
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn openat(
|
||||
fh: FileHandle,
|
||||
raw_path: UserSliceRo,
|
||||
flags: usize,
|
||||
fcntl_flags: u32,
|
||||
euid: u32,
|
||||
egid: u32,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<FileHandle> {
|
||||
let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?;
|
||||
|
||||
let desc = {
|
||||
let current_lock = context::current();
|
||||
let mut current = current_lock.read(token.token());
|
||||
let (context, mut context_token) = current.token_split();
|
||||
let pipe = context
|
||||
.get_file(fh, &mut context_token)
|
||||
.ok_or(Error::new(EBADF))?;
|
||||
*pipe.description.read(context_token.token())
|
||||
};
|
||||
let scheme = desc.get_scheme(token)?;
|
||||
let number = desc.number;
|
||||
let scheme_id = desc.scheme;
|
||||
|
||||
let caller_ctx = context::current()
|
||||
.read(token.token())
|
||||
.caller_ctx()
|
||||
.filter_uid_gid(euid, egid);
|
||||
|
||||
let fifo_mode_requested = flags & MODE_FIFO as usize == MODE_FIFO as usize;
|
||||
let fifo_key = fifo_path_key(scheme_id, number, &path_buf);
|
||||
|
||||
if pipe::named_pipe_exists(&fifo_key, token) {
|
||||
if flags & O_EXCL == O_EXCL && flags & O_CREAT == O_CREAT {
|
||||
return Err(Error::new(EEXIST));
|
||||
}
|
||||
if fifo_mode_requested && flags & O_CREAT == O_CREAT {
|
||||
return Err(Error::new(EEXIST));
|
||||
}
|
||||
|
||||
let pipe_number = pipe::open_named_pipe(&fifo_key, flags, token)?
|
||||
.ok_or(Error::new(ENOENT))?;
|
||||
return install_open_result(
|
||||
GlobalSchemes::Pipe.scheme_id(),
|
||||
flags,
|
||||
OpenResult::SchemeLocal(pipe_number, InternalFlags::empty()),
|
||||
token,
|
||||
);
|
||||
}
|
||||
|
||||
if fifo_mode_requested && flags & O_CREAT == O_CREAT {
|
||||
if path_exists_in_scheme(&*scheme, number, &path_buf, caller_ctx, token)? {
|
||||
return Err(Error::new(EEXIST));
|
||||
}
|
||||
|
||||
let mode = u16::try_from(flags & 0o7777).map_err(|_| Error::new(EINVAL))?;
|
||||
let pipe_number = pipe::create_named_pipe(&fifo_key, &path_buf, mode, flags, token)?;
|
||||
|
||||
return install_open_result(
|
||||
GlobalSchemes::Pipe.scheme_id(),
|
||||
flags,
|
||||
OpenResult::SchemeLocal(pipe_number, InternalFlags::empty()),
|
||||
token,
|
||||
);
|
||||
}
|
||||
|
||||
let open_result = scheme.kopenat(
|
||||
number,
|
||||
StrOrBytes::from_str(&path_buf),
|
||||
flags,
|
||||
fcntl_flags,
|
||||
caller_ctx,
|
||||
token,
|
||||
)?;
|
||||
|
||||
install_open_result(scheme_id, flags, open_result, token)
|
||||
}
|
||||
/// Unlinkat syscall
|
||||
pub fn unlinkat(
|
||||
fh: FileHandle,
|
||||
@@ -210,27 +137,22 @@ pub fn unlinkat(
|
||||
) -> Result<()> {
|
||||
let path_buf = copy_path_to_buf(raw_path, PATH_MAX)?;
|
||||
|
||||
let desc = {
|
||||
let (number, scheme_id) = {
|
||||
let current_lock = context::current();
|
||||
let mut current = current_lock.read(token.token());
|
||||
let (context, mut context_token) = current.token_split();
|
||||
let pipe = context
|
||||
.get_file(fh, &mut context_token)
|
||||
.ok_or(Error::new(EBADF))?;
|
||||
*pipe.description.read(context_token.token())
|
||||
let (context, mut token) = current.token_split();
|
||||
let pipe = context.get_file(fh, &mut token).ok_or(Error::new(EBADF))?;
|
||||
let desc = pipe.description.read(token.token());
|
||||
(desc.number, desc.scheme)
|
||||
};
|
||||
let number = desc.number;
|
||||
let scheme = desc.get_scheme(token)?;
|
||||
|
||||
let scheme = scheme::get_scheme(token.token(), scheme_id)?;
|
||||
|
||||
let caller_ctx = context::current()
|
||||
.read(token.token())
|
||||
.caller_ctx()
|
||||
.filter_uid_gid(euid, egid);
|
||||
|
||||
if pipe::unlink_named_pipe(&fifo_path_key(desc.scheme, number, &path_buf), token) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
/*
|
||||
let mut path_buf = BorrowedHtBuf::head()?;
|
||||
let path = path_buf.use_for_string(raw_path)?;
|
||||
@@ -277,18 +199,17 @@ fn duplicate_file(
|
||||
let description = { *file.description.read(token.token()) };
|
||||
|
||||
let new_description = {
|
||||
let scheme = description.get_scheme(token)?;
|
||||
let scheme = scheme::get_scheme(token.token(), description.scheme)?;
|
||||
|
||||
match scheme.kdup(description.number, user_buf, caller_ctx, token)? {
|
||||
OpenResult::SchemeLocal(number, internal_flags) => {
|
||||
Arc::new(RwLock::new(FileDescription::new(
|
||||
description.scheme,
|
||||
number,
|
||||
0,
|
||||
description.flags,
|
||||
Arc::new(RwLock::new(FileDescription {
|
||||
offset: 0,
|
||||
internal_flags,
|
||||
token,
|
||||
)))
|
||||
scheme: description.scheme,
|
||||
number,
|
||||
flags: description.flags,
|
||||
}))
|
||||
}
|
||||
OpenResult::External(desc) => desc,
|
||||
}
|
||||
@@ -375,10 +296,11 @@ fn call_normal(
|
||||
}
|
||||
.ok_or(Error::new(EBADF))?;
|
||||
|
||||
let (scheme, number) = {
|
||||
let desc = *file.description.read(token.token());
|
||||
(desc.get_scheme(token)?, desc.number)
|
||||
let (scheme_id, number) = {
|
||||
let desc = file.description.read(token.token());
|
||||
(desc.scheme, desc.number)
|
||||
};
|
||||
let scheme = scheme::get_scheme(token.token(), scheme_id)?;
|
||||
|
||||
if flags.contains(CallFlags::STD_FS) {
|
||||
scheme.translate_std_fs_call(number, file.description, payload, flags, metadata, token)
|
||||
@@ -419,28 +341,28 @@ fn fdwrite_inner(
|
||||
) -> Result<usize> {
|
||||
// TODO: Ensure deadlocks can't happen
|
||||
let (scheme, number, descs_to_send) = {
|
||||
let desc = {
|
||||
let (scheme, number) = {
|
||||
let current_lock = context::current();
|
||||
let mut current = current_lock.read(token.token());
|
||||
let (context, mut context_token) = current.token_split();
|
||||
let (context, mut token) = current.token_split();
|
||||
let file_descriptor = context
|
||||
.get_file(socket, &mut context_token)
|
||||
.get_file(socket, &mut token)
|
||||
.ok_or(Error::new(EBADF))?;
|
||||
*file_descriptor.description.read(context_token.token())
|
||||
let desc = &file_descriptor.description.read(token.token());
|
||||
(desc.scheme, desc.number)
|
||||
};
|
||||
let scheme = desc.get_scheme(token)?;
|
||||
let number = desc.number;
|
||||
let scheme = scheme::get_scheme(token.token(), scheme)?;
|
||||
|
||||
let current_lock = context::current();
|
||||
let mut current = current_lock.read(token.token());
|
||||
let (context, mut context_token) = current.token_split();
|
||||
let (context, mut token) = current.token_split();
|
||||
(
|
||||
scheme,
|
||||
number,
|
||||
if flags.contains(CallFlags::FD_CLONE) {
|
||||
context.bulk_get_files(&target_fds, &mut context_token)
|
||||
context.bulk_get_files(&target_fds, &mut token)
|
||||
} else {
|
||||
context.bulk_remove_files(&target_fds, &mut context_token)
|
||||
context.bulk_remove_files(&target_fds, &mut token)
|
||||
}?
|
||||
.into_iter()
|
||||
.map(|f| f.description)
|
||||
@@ -473,22 +395,18 @@ fn call_fdread(
|
||||
metadata: &[u64],
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<usize> {
|
||||
let desc = {
|
||||
let current_lock = context::current();
|
||||
let mut current = current_lock.read(token.token());
|
||||
let (context, mut context_token) = current.token_split();
|
||||
let file_descriptor = context
|
||||
.get_file(fd, &mut context_token)
|
||||
.ok_or(Error::new(EBADF))?;
|
||||
*file_descriptor.description.read(context_token.token())
|
||||
};
|
||||
let (scheme, number) = {
|
||||
let scheme = desc.get_scheme(token)?;
|
||||
let number = desc.number;
|
||||
(
|
||||
scheme,
|
||||
number,
|
||||
)
|
||||
let (scheme, number) = {
|
||||
let current_lock = context::current();
|
||||
let mut current = current_lock.read(token.token());
|
||||
let (context, mut token) = current.token_split();
|
||||
let file_descriptor = context.get_file(fd, &mut token).ok_or(Error::new(EBADF))?;
|
||||
let desc = file_descriptor.description.read(token.token());
|
||||
(desc.scheme, desc.number)
|
||||
};
|
||||
let scheme = scheme::get_scheme(token.token(), scheme)?;
|
||||
|
||||
(scheme, number)
|
||||
};
|
||||
|
||||
scheme.kfdread(number, payload, flags, metadata, token)
|
||||
@@ -522,9 +440,9 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken)
|
||||
}
|
||||
.ok_or(Error::new(EBADF))?;
|
||||
|
||||
let (number, flags, desc) = {
|
||||
let desc = *file.description.read(token.token());
|
||||
(desc.number, desc.flags, desc)
|
||||
let (scheme_id, number, flags) = {
|
||||
let desc = file.description.write(token.token());
|
||||
(desc.scheme, desc.number, desc.flags)
|
||||
};
|
||||
|
||||
if cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC {
|
||||
@@ -542,7 +460,7 @@ pub fn fcntl(fd: FileHandle, cmd: usize, arg: usize, token: &mut CleanLockToken)
|
||||
|
||||
// Communicate fcntl with scheme
|
||||
if cmd != F_GETFD && cmd != F_SETFD {
|
||||
let scheme = desc.get_scheme(token)?;
|
||||
let scheme = scheme::get_scheme(token.token(), scheme_id)?;
|
||||
|
||||
scheme.fcntl(number, cmd, arg, token)?;
|
||||
};
|
||||
@@ -600,11 +518,13 @@ pub fn flink(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken)
|
||||
let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?;
|
||||
let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?;
|
||||
|
||||
let (number, scheme) = {
|
||||
let desc = *file.description.read(token.token());
|
||||
(desc.number, desc.get_scheme(token)?)
|
||||
let (number, scheme_id) = {
|
||||
let desc = file.description.read(token.token());
|
||||
(desc.number, desc.scheme)
|
||||
};
|
||||
|
||||
let scheme = scheme::get_scheme(token.token(), scheme_id)?;
|
||||
|
||||
// TODO: Check EXDEV.
|
||||
/*
|
||||
if scheme_id != description.scheme {
|
||||
@@ -634,11 +554,13 @@ pub fn frename(fd: FileHandle, raw_path: UserSliceRo, token: &mut CleanLockToken
|
||||
let path = RedoxPath::from_absolute(&path_buf).ok_or(Error::new(EINVAL))?;
|
||||
let (_, reference) = path.as_parts().ok_or(Error::new(EINVAL))?;
|
||||
|
||||
let (number, scheme) = {
|
||||
let desc = *file.description.read(token.token());
|
||||
(desc.number, desc.get_scheme(token)?)
|
||||
let (number, scheme_id) = {
|
||||
let desc = file.description.read(token.token());
|
||||
(desc.number, desc.scheme)
|
||||
};
|
||||
|
||||
let scheme = scheme::get_scheme(token.token(), scheme_id)?;
|
||||
|
||||
// TODO: Check EXDEV.
|
||||
/*
|
||||
if scheme_id != description.scheme {
|
||||
|
||||
@@ -28,11 +28,6 @@ use crate::{
|
||||
sync::CleanLockToken,
|
||||
};
|
||||
|
||||
/// Local syscall numbers not yet in the redox_syscall crate.
|
||||
/// These are allocated from the 987+ range to avoid collisions with crate numbers.
|
||||
pub const SYS_SCHED_SETAFFINITY: usize = 987;
|
||||
pub const SYS_SCHED_GETAFFINITY: usize = 988;
|
||||
|
||||
/// Debug
|
||||
pub mod debug;
|
||||
|
||||
@@ -225,10 +220,6 @@ pub fn syscall(
|
||||
unlinkat(fd, UserSlice::ro(c, d)?, e, f as _, g as _, token).map(|()| 0)
|
||||
}
|
||||
SYS_YIELD => sched_yield(token).map(|()| 0),
|
||||
|
||||
// P17-3: CPU affinity syscalls. Numbers allocated locally (not yet in redox_syscall crate).
|
||||
SYS_SCHED_SETAFFINITY => sched_setaffinity(b, UserSlice::ro(c, d)?, token),
|
||||
SYS_SCHED_GETAFFINITY => sched_getaffinity(b, UserSlice::wo(c, d)?, token),
|
||||
SYS_NANOSLEEP => nanosleep(
|
||||
UserSlice::ro(b, size_of::<TimeSpec>())?,
|
||||
UserSlice::wo(c, size_of::<TimeSpec>())?.none_if_null(),
|
||||
|
||||
@@ -11,7 +11,6 @@ use crate::{
|
||||
memory::{AddrSpace, Grant, PageSpan},
|
||||
ContextRef,
|
||||
},
|
||||
cpu_set::RawMask,
|
||||
event,
|
||||
sync::{CleanLockToken, RwLock},
|
||||
syscall::flag::{EventFlags, O_CREAT, O_RDWR},
|
||||
@@ -272,95 +271,24 @@ unsafe fn bootstrap_mem(bootstrap: &crate::startup::Bootstrap) -> &'static [u8]
|
||||
}
|
||||
|
||||
fn insert_fd(scheme: SchemeId, number: usize, cloexec: bool, token: &mut CleanLockToken) -> usize {
|
||||
let description = Arc::new(RwLock::new(FileDescription::new(
|
||||
scheme,
|
||||
number,
|
||||
0,
|
||||
(O_CREAT | O_RDWR) as u32,
|
||||
InternalFlags::empty(),
|
||||
token,
|
||||
)));
|
||||
|
||||
let current_lock = context::current();
|
||||
let mut current = current_lock.read(token.token());
|
||||
let (context, mut context_token) = current.token_split();
|
||||
let (context, mut token) = current.token_split();
|
||||
context
|
||||
.add_file_min(
|
||||
FileDescriptor {
|
||||
description,
|
||||
description: Arc::new(RwLock::new(FileDescription {
|
||||
scheme,
|
||||
number,
|
||||
offset: 0,
|
||||
flags: (O_CREAT | O_RDWR) as u32,
|
||||
internal_flags: InternalFlags::empty(),
|
||||
})),
|
||||
cloexec,
|
||||
},
|
||||
syscall::flag::UPPER_FDTBL_TAG + scheme.get(),
|
||||
&mut context_token,
|
||||
&mut token,
|
||||
)
|
||||
.expect("failed to insert fd to current context")
|
||||
.get()
|
||||
}
|
||||
|
||||
/// Set CPU affinity mask for a process.
|
||||
///
|
||||
/// # Arguments (syscall ABI)
|
||||
/// - `pid`: Process ID (0 = current process; other PIDs not yet supported)
|
||||
/// - `mask_ptr`: Pointer to a `RawMask` (32 bytes on 64-bit, 256-bit bitmap)
|
||||
/// - `mask_len`: Length of mask in bytes (must equal `size_of::<RawMask>()`)
|
||||
pub fn sched_setaffinity(
|
||||
pid: usize,
|
||||
mask_ptr: super::usercopy::UserSliceRo,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<usize> {
|
||||
// Validate mask size
|
||||
if mask_ptr.len() != core::mem::size_of::<RawMask>() {
|
||||
return Err(Error::new(super::error::EINVAL));
|
||||
}
|
||||
|
||||
// pid == 0 means current process
|
||||
let target = if pid == 0 {
|
||||
context::current()
|
||||
} else {
|
||||
// TODO: Support PID-based lookup (requires context list iteration
|
||||
// with lock token downgrades). For now, only pid=0 is supported.
|
||||
return Err(Error::new(super::error::ESRCH));
|
||||
};
|
||||
|
||||
// Read mask from userspace
|
||||
let raw_mask: RawMask = unsafe { mask_ptr.read_exact() }?;
|
||||
|
||||
// Apply to context's affinity mask
|
||||
let mut ctx = target.write(token.token());
|
||||
ctx.sched_affinity.override_from(&raw_mask);
|
||||
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
/// Get CPU affinity mask for a process.
|
||||
///
|
||||
/// # Arguments (syscall ABI)
|
||||
/// - `pid`: Process ID (0 = current process; other PIDs not yet supported)
|
||||
/// - `mask_ptr`: Pointer to a `RawMask` buffer (32 bytes on 64-bit)
|
||||
/// - `mask_len`: Length of buffer in bytes (must equal `size_of::<RawMask>()`)
|
||||
///
|
||||
/// # Returns
|
||||
/// Number of bytes written to mask_ptr on success.
|
||||
pub fn sched_getaffinity(
|
||||
pid: usize,
|
||||
mask_ptr: super::usercopy::UserSliceWo,
|
||||
token: &mut CleanLockToken,
|
||||
) -> Result<usize> {
|
||||
// Validate mask size
|
||||
if mask_ptr.len() != core::mem::size_of::<RawMask>() {
|
||||
return Err(Error::new(super::error::EINVAL));
|
||||
}
|
||||
|
||||
// pid == 0 means current process
|
||||
let target = if pid == 0 {
|
||||
context::current()
|
||||
} else {
|
||||
return Err(Error::new(super::error::ESRCH));
|
||||
};
|
||||
|
||||
let ctx = target.read(token.token());
|
||||
let raw_mask = ctx.sched_affinity.to_raw();
|
||||
mask_ptr.copy_common_bytes_from_slice(crate::cpu_set::mask_as_bytes(&raw_mask))?;
|
||||
|
||||
Ok(core::mem::size_of::<RawMask>())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user