chore: remove patch leftovers (.orig/.rej)

This commit is contained in:
2026-05-17 13:57:56 +03:00
parent 081ed10a8b
commit ef999ebc8f
4 changed files with 0 additions and 454 deletions
@@ -1,162 +0,0 @@
use core::{
hint,
sync::atomic::{AtomicU8, Ordering},
};
use crate::{
arch::{
device::local_apic::the_local_apic,
start::{kstart_ap, KernelArgsAp},
},
cpu_set::LogicalCpuId,
memory::{
allocate_p2frame, Frame, KernelMapper, Page, PageFlags, PhysicalAddress, RmmA, RmmArch,
VirtualAddress, PAGE_SIZE,
},
startup::AP_READY,
};
use super::{Madt, MadtEntry};
const TRAMPOLINE: usize = 0x8000;
static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline"));
pub(super) fn init(madt: Madt) {
let local_apic = unsafe { the_local_apic() };
let me = local_apic.id();
if local_apic.x2 {
debug!(" X2APIC {}", me.get());
} else {
debug!(" XAPIC {}: {:>08X}", me.get(), local_apic.address);
}
if cfg!(not(feature = "multi_core")) {
return;
}
// Map trampoline
let trampoline_frame = Frame::containing(PhysicalAddress::new(TRAMPOLINE));
let trampoline_page = Page::containing_address(VirtualAddress::new(TRAMPOLINE));
let (result, page_table_physaddr) = unsafe {
//TODO: do not have writable and executable!
let mut mapper = KernelMapper::lock_rw();
let result = mapper
.map_phys(
trampoline_page.start_address(),
trampoline_frame.base(),
PageFlags::new().execute(true).write(true),
)
.expect("failed to map trampoline");
(result, mapper.table().phys().data())
};
result.flush();
// Write trampoline, make sure TRAMPOLINE page is free for use
for (i, val) in TRAMPOLINE_DATA.iter().enumerate() {
unsafe {
(*((TRAMPOLINE as *mut u8).add(i) as *const AtomicU8)).store(*val, Ordering::SeqCst);
}
}
unsafe {
let preliminary_cpu_count = madt.iter().filter(|e| matches!(e, MadtEntry::LocalApic(entry) if u32::from(entry.id) == me.get() || entry.flags & 1 == 1)).count();
crate::profiling::allocate(preliminary_cpu_count as u32);
}
for madt_entry in madt.iter() {
debug!(" {:x?}", madt_entry);
if let MadtEntry::LocalApic(ap_local_apic) = madt_entry {
if u32::from(ap_local_apic.id) == me.get() {
debug!(" This is my local APIC");
} else if ap_local_apic.flags & 1 == 1 {
let cpu_id = LogicalCpuId::next();
// Allocate a stack
let stack_start = RmmA::phys_to_virt(
allocate_p2frame(4)
.expect("no more frames in acpi stack_start")
.base(),
)
.data();
let stack_end = stack_start + (PAGE_SIZE << 4);
let pcr_ptr = crate::arch::gdt::allocate_and_init_pcr(cpu_id, stack_end);
let idt_ptr = crate::arch::idt::allocate_and_init_idt(cpu_id);
let args = KernelArgsAp {
stack_end: stack_end as *mut u8,
cpu_id,
pcr_ptr,
idt_ptr,
};
let ap_ready = (TRAMPOLINE + 8) as *mut u64;
let ap_args_ptr = unsafe { ap_ready.add(1) };
let ap_page_table = unsafe { ap_ready.add(2) };
let ap_code = unsafe { ap_ready.add(3) };
// Set the ap_ready to 0, volatile
unsafe {
ap_ready.write(0);
ap_args_ptr.write(&args as *const _ as u64);
ap_page_table.write(page_table_physaddr as u64);
#[expect(clippy::fn_to_numeric_cast)]
ap_code.write(kstart_ap as u64);
// Ensure all trampoline writes are visible to the AP before
// it starts executing. asm!("") is only a compiler barrier;
// fence(SeqCst) is a full hardware memory barrier.
core::sync::atomic::fence(Ordering::SeqCst);
};
AP_READY.store(false, Ordering::SeqCst);
// Send INIT IPI
{
let mut icr = 0x4500;
if local_apic.x2 {
icr |= u64::from(ap_local_apic.id) << 32;
} else {
icr |= u64::from(ap_local_apic.id) << 56;
}
local_apic.set_icr(icr);
}
// Send START IPI
{
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
let mut icr = 0x4600 | ap_segment as u64;
if local_apic.x2 {
icr |= u64::from(ap_local_apic.id) << 32;
} else {
icr |= u64::from(ap_local_apic.id) << 56;
}
local_apic.set_icr(icr);
}
// Wait for trampoline ready
while unsafe { (*ap_ready.cast::<AtomicU8>()).load(Ordering::SeqCst) } == 0 {
hint::spin_loop();
}
while !AP_READY.load(Ordering::SeqCst) {
hint::spin_loop();
}
RmmA::invalidate_all();
}
}
}
// Unmap trampoline
let (_frame, _, flush) = unsafe {
KernelMapper::lock_rw()
.unmap_phys(trampoline_page.start_address())
.expect("failed to unmap trampoline page")
};
flush.flush();
}
@@ -1,194 +0,0 @@
--- src/acpi/madt/arch/x86.rs
+++ src/acpi/madt/arch/x86.rs
@@ -20,6 +22,7 @@
use super::{Madt, MadtEntry};
+use alloc::collections::BTreeSet;
use alloc::vec::Vec;
/// Maximum number of APIC→CPU mappings we track for NUMA topology.
@@ -47,6 +50,67 @@
const TRAMPOLINE: usize = 0x8000;
static TRAMPOLINE_DATA: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/trampoline"));
+/// Estimate TSC frequency in MHz from CPUID.
+///
+/// Tries CPUID leaf 0x16 (Processor Frequency Information) first,
+/// then CPUID leaf 0x15 (TSC/Core Crystal Clock Ratio).
+/// Returns None if frequency cannot be determined.
+fn tsc_freq_mhz_cpuid() -> Option<u64> {
+ let max_leaf = unsafe { core::arch::x86_64::__cpuid(0).eax as u32 };
+
+ // CPUID leaf 0x16: EAX = Core Base Frequency in MHz (Intel)
+ if max_leaf >= 0x16 {
+ let mhz = unsafe { core::arch::x86_64::__cpuid(0x16) }.eax as u64;
+ if mhz > 0 {
+ return Some(mhz);
+ }
+ }
+
+ // CPUID leaf 0x15: EAX = denominator, EBX = numerator, ECX = crystal Hz
+ if max_leaf >= 0x15 {
+ let res = unsafe { core::arch::x86_64::__cpuid(0x15) };
+ let denom = res.eax as u64;
+ let numer = res.ebx as u64;
+ let crystal_hz = res.ecx as u64;
+ if denom > 0 && numer > 0 && crystal_hz > 0 {
+ // TSC freq = crystal_hz * numer / denom
+ let tsc_hz = crystal_hz * numer / denom;
+ return Some(tsc_hz / 1_000_000); // Hz → MHz
+ }
+ }
+
+ None
+}
+
+/// Early-boot microsecond delay using the Time Stamp Counter.
+///
+/// Uses CPUID-based TSC frequency estimation when available.
+/// Falls back to a conservative spin loop calibrated for the
+/// minimum expected CPU speed (1 GHz).
+///
+/// # Safety
+/// Must only be called after the BSP TSC is running (always true
+/// after CPU reset on x86).
+fn early_udelay(us: u64) {
+ if let Some(mhz) = tsc_freq_mhz_cpuid() {
+ // TSC-based delay: precise on invariant TSC (all modern x86).
+ // MHz = cycles per µs.
+ let target = unsafe { rdtsc() } + us * mhz;
+ while unsafe { rdtsc() } < target {
+ hint::spin_loop();
+ }
+ } else {
+ // Fallback: conservative spin loop.
+ // spin_loop() (PAUSE) is ~40 cycles on modern Intel, ~1 on AMD.
+ // At 1 GHz minimum: 1000 cycles/µs ÷ 40 cycles/iter = 25 iters/µs.
+ // Use 50 iters/µs for safety margin on slower/variable CPUs.
+ let iters = us.saturating_mul(50);
+ for _ in 0..iters {
+ hint::spin_loop();
+ }
+ }
+}
+
fn current_x2apic_processor_uid(madt: &Madt, apic_id: u32) -> Option<u32> {
madt.iter().find_map(|entry| match entry {
MadtEntry::LocalX2Apic(x2apic) if x2apic.x2apic_id == apic_id => Some(x2apic.processor_uid),
@@ -235,20 +329,53 @@
local_apic.set_icr(icr);
}
- // Send START IPI
+ // Intel SDM Vol 3A §8.4.4: wait 10ms after INIT deassert
+ // before sending first SIPI. Modern CPUs may need less,
+ // but 10ms is the safe specification-compliant value.
+ early_udelay(10_000);
+
+ // Send START IPI #1
{
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
- let mut icr = 0x4600 | ap_segment as u64;
-
+ // ICR: Delivery Mode=StartUp(110), Vector=ap_segment
+ // Note: bit 14 (Level) must be 0 for SIPI per Intel SDM.
+ let mut icr = 0x0600 | ap_segment as u64;
if local_apic.x2 {
icr |= u64::from(ap_local_apic.id) << 32;
} else {
icr |= u64::from(ap_local_apic.id) << 56;
}
+ local_apic.set_icr(icr);
+ }
+
+ // Intel SDM: wait 200µs between SIPIs
+ early_udelay(200);
+ // Send START IPI #2 (recommended for compatibility)
+ {
+ let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
+ let mut icr = 0x0600 | ap_segment as u64;
+ if local_apic.x2 {
+ icr |= u64::from(ap_local_apic.id) << 32;
+ } else {
+ icr |= u64::from(ap_local_apic.id) << 56;
+ }
local_apic.set_icr(icr);
}
+ // Wait briefly for SIPI to be accepted
+ early_udelay(200);
+
+ // Check ESR for delivery errors after SIPI sequence.
+ // Bit 5 = Send Accept Error, Bit 6 = Send Illegal Vector.
+ let esr_val = unsafe { local_apic.esr() };
+ if esr_val != 0 {
+ println!(
+ "KERNEL AP: CPU {} SIPI delivery error (ESR={:#x}), continuing",
+ ap_local_apic.id, esr_val
+ );
+ }
+
// Wait for trampoline ready with timeout
let mut trampoline_ready = false;
for _ in 0..AP_SPIN_LIMIT {
@@ -343,34 +470,50 @@
}
AP_READY.store(false, Ordering::SeqCst);
+ // Clear APIC Error Status Register before starting AP.
+ unsafe { local_apic.esr(); }
+
+ // Send INIT IPI (Assert)
{
let mut icr = 0x4500u64;
icr |= u64::from(apic_id) << 32;
local_apic.set_icr(icr);
}
- for _ in 0..100_000 {
- hint::spin_loop();
- }
+ // Intel SDM Vol 3A §8.4.4: wait 10ms after INIT
+ early_udelay(10_000);
+ // Send START IPI #1
{
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
- let mut icr = 0x4600u64 | ap_segment as u64;
+ let mut icr = 0x0600u64 | ap_segment as u64;
icr |= u64::from(apic_id) << 32;
local_apic.set_icr(icr);
}
- for _ in 0..2_000_000 {
- hint::spin_loop();
- }
+ // Intel SDM: wait 200µs between SIPIs
+ early_udelay(200);
+ // Send START IPI #2 (recommended for compatibility)
{
let ap_segment = (TRAMPOLINE >> 12) & 0xFF;
- let mut icr = 0x4600u64 | ap_segment as u64;
+ let mut icr = 0x0600u64 | ap_segment as u64;
icr |= u64::from(apic_id) << 32;
local_apic.set_icr(icr);
}
+ // Wait briefly for SIPI acceptance
+ early_udelay(200);
+
+ // Check ESR for delivery errors.
+ let esr_val = unsafe { local_apic.esr() };
+ if esr_val != 0 {
+ println!(
+ "KERNEL AP: CPU {} SIPI delivery error (ESR={:#x}), continuing",
+ apic_id, esr_val
+ );
+ }
+
let mut trampoline_ready = false;
for _ in 0..AP_SPIN_LIMIT {
if unsafe { (*ap_ready.cast::<AtomicU8>()).load(Ordering::SeqCst) } != 0 {
@@ -1,11 +0,0 @@
--- src/arch/x86_shared/idt.rs
+++ src/arch/x86_shared/idt.rs
@@ -110,6 +110,8 @@
}
pub fn available_irqs_iter(cpu_id: LogicalCpuId) -> impl Iterator<Item = u8> + 'static {
+ let count = (32..=254).filter(|&index| !is_reserved(cpu_id, index)).count();
+ info!("available_irqs_iter: cpu_id={} count={}", cpu_id.get(), count);
(32..=254).filter(move |&index| !is_reserved(cpu_id, index))
}
@@ -1,87 +0,0 @@
--- src/context/switch.rs
+++ src/context/switch.rs
@@ -361,6 +361,7 @@
}
/// This is the scheduler function which currently utilises Deficit Weighted Round Robin Scheduler
+/// with NUMA-aware context selection preference.
fn select_next_context(
token: &mut CleanLockToken,
percpu: &PercpuBlock,
@@ -386,6 +387,10 @@
let total_contexts: usize = contexts_list.iter().map(|q| q.len()).sum();
let mut skipped_contexts = 0;
+ // NUMA-aware selection: remember cross-node fallback candidate.
+ let my_numa_node = percpu.numa_node.get();
+ let mut cross_node_fallback: Option<(usize, ArcContextLockWriteGuard)> = None;
+
'priority: loop {
i = (i + 1) % 40;
total_iters += 1;
@@ -450,11 +455,44 @@
// Is this context runnable on this CPU?
let sw = unsafe { update_runnable(&mut next_context_guard, cpu_id, switch_time) };
if let UpdateResult::CanSwitch = sw {
- // Cache the new context's priority for MCS lock priority donation.
- percpu.current_prio.set(next_context_guard.prio);
- next_context_guard_opt = Some(next_context_guard);
- balance[i] -= SCHED_PRIO_TO_WEIGHT[20];
- break 'priority;
+ // NUMA-aware selection: check if this context's last CPU was on the same node.
+ let same_node = if my_numa_node != u8::MAX {
+ next_context_guard.cpu_id
+ .map(|cid| {
+ crate::percpu::get_for_cpu(cid)
+ .map(|p| p.numa_node.get() == my_numa_node)
+ .unwrap_or(false)
+ })
+ .unwrap_or(true) // New context (no last CPU) — treat as same node
+ } else {
+ true // No NUMA info — treat all as same node
+ };
+
+ if same_node {
+ // Cache-warm: select immediately
+ percpu.current_prio.set(next_context_guard.prio);
+ next_context_guard_opt = Some(next_context_guard);
+ balance[i] -= SCHED_PRIO_TO_WEIGHT[20];
+ break 'priority;
+ } else {
+ // Cross-node candidate: save as fallback, keep scanning for same-node
+ if cross_node_fallback.is_none() {
+ // Cache the priority and balance for later
+ cross_node_fallback =
+ Some((next_context_guard.prio, next_context_guard));
+ balance[i] -= SCHED_PRIO_TO_WEIGHT[20];
+ // Don't break — keep looking for a same-node context
+ continue;
+ } else {
+ // Already have a cross-node fallback; push this one back
+ contexts.push_back(next_context_ref);
+ skipped_contexts += 1;
+ if skipped_contexts >= total_contexts {
+ break 'priority;
+ }
+ continue;
+ }
+ }
} else {
if matches!(sw, UpdateResult::Blocked) {
idle_contexts(token.token()).push_back(next_context_ref);
@@ -469,6 +507,15 @@
}
}
}
+
+ // If we found a cross-node fallback but no same-node context, use it
+ if next_context_guard_opt.is_none() {
+ if let Some((prio, guard)) = cross_node_fallback {
+ percpu.current_prio.set(prio);
+ next_context_guard_opt = Some(guard);
+ }
+ }
+
percpu.balance.set(balance);
percpu.last_queue.set(i);