029472d5e3
dma.rs: IommuDmaAllocator (145 lines) - New struct wires existing IOMMU daemon (1003 lines) to existing DmaBuffer (261) - allocate(): phys-contiguous alloc via scheme:memory, then MAP through IOMMU domain - unmap(): sends UNMAP to IOMMU domain, releases IOVA - Inlined IOMMU protocol constants — no new crate dependency - encode_iommu_request/decode_iommu_response for scheme write/read cycle Documentation updates: - IMPLEMENTATION-MASTER-PLAN.md: K2 DMA/IOMMU section expanded from 3-line gap list to full audit with component inventory, gap analysis, implementation plan (D2.1-D2.5), Linux reference table. Added K2b thread/fork audit. - CPU-DMA-IRQ-MSI-SCHEDULER-FIX-PLAN.md: Phase 1 (MSI) marked complete with per-task status. Phase 2 (DMA) re-scoped from 'create' to 'wire' based on audit. Phase 3 (scheduler) marked mostly done. - IRQ-AND-LOWLEVEL-CONTROLLERS-ENHANCEMENT-PLAN.md: kernel MSI support noted as materially strong with P8-msi.patch reference. Audit findings: - IOMMU daemon is solid: 1003-line lib.rs with full scheme protocol, 427-line amd_vi.rs, host-runnable tests. Needs wiring, not rewriting. - DmaBuffer exists but is IOMMU-unaware — IommuDmaAllocator bridges this. - relibc rlct_clone is correct for threads (shares addr space implicitly). '3 IPC hops' claim is microkernel-architectural, not a real perf issue. - No stale docs to archive at this time.
407 lines
13 KiB
Rust
407 lines
13 KiB
Rust
use core::ptr::NonNull;
|
|
use std::sync::atomic::{AtomicI32, Ordering};
|
|
|
|
use redox_syscall::data::Map;
|
|
use redox_syscall::flag::{MapFlags, MAP_PRIVATE, O_CLOEXEC, PROT_READ, PROT_WRITE};
|
|
use redox_syscall::PAGE_SIZE;
|
|
use syscall as redox_syscall;
|
|
|
|
use crate::{DriverError, Result};
|
|
|
|
// IOMMU protocol constants (mirrored from iommu daemon's scheme protocol)
|
|
const IOMMU_REQ_SIZE: usize = 32;
|
|
const IOMMU_RSP_SIZE: usize = 36;
|
|
const IOMMU_VERSION: u16 = 1;
|
|
const IOMMU_OP_MAP: u16 = 0x0010;
|
|
const IOMMU_OP_UNMAP: u16 = 0x0011;
|
|
|
|
fn encode_iommu_request(opcode: u16, arg0: u32, arg1: u64, arg2: u64, arg3: u64) -> [u8; IOMMU_REQ_SIZE] {
|
|
let mut bytes = [0u8; IOMMU_REQ_SIZE];
|
|
bytes[0..2].copy_from_slice(&opcode.to_le_bytes());
|
|
bytes[2..4].copy_from_slice(&IOMMU_VERSION.to_le_bytes());
|
|
bytes[4..8].copy_from_slice(&arg0.to_le_bytes());
|
|
bytes[8..16].copy_from_slice(&arg1.to_le_bytes());
|
|
bytes[16..24].copy_from_slice(&arg2.to_le_bytes());
|
|
bytes[24..32].copy_from_slice(&arg3.to_le_bytes());
|
|
bytes
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct IommuResponse {
|
|
status: i32,
|
|
arg1: u64,
|
|
}
|
|
|
|
fn decode_iommu_response(bytes: &[u8]) -> Option<IommuResponse> {
|
|
if bytes.len() < IOMMU_RSP_SIZE { return None; }
|
|
let status = i32::from_le_bytes(bytes[0..4].try_into().ok()?);
|
|
let arg1 = u64::from_le_bytes(bytes[12..20].try_into().ok()?);
|
|
Some(IommuResponse { status, arg1 })
|
|
}
|
|
|
|
fn write_iommu_request(fd: i32, opcode: u16, arg0: u32, arg1: u64, arg2: u64, arg3: u64) -> Result<IommuResponse> {
|
|
let req_bytes = encode_iommu_request(opcode, arg0, arg1, arg2, arg3);
|
|
let written = libredox::call::write(fd as usize, &req_bytes)
|
|
.map_err(|e| DriverError::Io(std::io::Error::from_raw_os_error(e.errno())))?;
|
|
if written < IOMMU_REQ_SIZE {
|
|
return Err(DriverError::Other(format!("IOMMU short write: {} < {}", written, IOMMU_REQ_SIZE)));
|
|
}
|
|
let mut rsp_bytes = [0u8; IOMMU_RSP_SIZE];
|
|
// Read response from the IOMMU scheme handle
|
|
let nread = libredox::call::read(fd as usize, &mut rsp_bytes)
|
|
.map_err(|e| DriverError::Io(std::io::Error::from_raw_os_error(e.errno())))?;
|
|
if nread == 0 {
|
|
return Err(DriverError::Other("IOMMU empty response".into()));
|
|
}
|
|
decode_iommu_response(&rsp_bytes[..nread])
|
|
.ok_or_else(|| DriverError::Other("IOMMU malformed response".into()))
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
enum DmaMemoryType {
|
|
Writeback,
|
|
Uncacheable,
|
|
}
|
|
|
|
impl DmaMemoryType {
|
|
const fn suffix(self) -> &'static str {
|
|
match self {
|
|
Self::Writeback => "wb",
|
|
Self::Uncacheable => "uc",
|
|
}
|
|
}
|
|
}
|
|
|
|
const DMA_MEMORY_TYPE: DmaMemoryType = if cfg!(any(target_arch = "x86", target_arch = "x86_64")) {
|
|
DmaMemoryType::Writeback
|
|
} else {
|
|
DmaMemoryType::Uncacheable
|
|
};
|
|
|
|
/// SAFETY: Cached FD for `/scheme/memory/scheme-root`. -1 means uninitialized.
|
|
/// This FD is process-lifetime cached for performance. If scheme:memory
|
|
/// restarts (which should never happen — it's a kernel scheme), all
|
|
/// in-flight DMA operations are already undefined behavior.
|
|
static DMA_MEMORY_FD: AtomicI32 = AtomicI32::new(-1);
|
|
|
|
fn get_dma_memory_fd() -> Result<i32> {
|
|
let current = DMA_MEMORY_FD.load(Ordering::Acquire);
|
|
if current >= 0 {
|
|
return Ok(current);
|
|
}
|
|
|
|
let fd = libredox::call::open("/scheme/memory/scheme-root", O_CLOEXEC as i32, 0)
|
|
.map_err(|e| DriverError::Io(std::io::Error::from_raw_os_error(e.errno())))?;
|
|
|
|
let raw = fd as i32;
|
|
// Try to store; if another thread won the race, close ours and use theirs.
|
|
match DMA_MEMORY_FD.compare_exchange(-1, raw, Ordering::AcqRel, Ordering::Acquire) {
|
|
Ok(_) => Ok(raw),
|
|
Err(existing) => {
|
|
let _ = libredox::call::close(fd as usize);
|
|
Ok(existing)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn virt_to_phys_cached(virt: usize) -> Result<usize> {
|
|
// Use a cached fd for address translation
|
|
static TRANSLATION_FD: AtomicI32 = AtomicI32::new(-1);
|
|
|
|
let raw = match TRANSLATION_FD.load(Ordering::Acquire) {
|
|
fd if fd >= 0 => fd,
|
|
_ => {
|
|
let fd = libredox::Fd::open("/scheme/memory/translation", O_CLOEXEC as i32, 0)
|
|
.map_err(|e| DriverError::Io(std::io::Error::from_raw_os_error(e.errno())))?;
|
|
let raw = fd.raw() as i32;
|
|
// Leak the fd intentionally — it's a global cache
|
|
std::mem::forget(fd);
|
|
match TRANSLATION_FD.compare_exchange(-1, raw, Ordering::AcqRel, Ordering::Acquire) {
|
|
Ok(_) => raw,
|
|
Err(existing) => {
|
|
let _ = libredox::call::close(raw as usize);
|
|
existing
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
let mut buf = virt.to_ne_bytes();
|
|
libredox::call::call_ro(
|
|
raw as usize,
|
|
&mut buf,
|
|
redox_syscall::CallFlags::empty(),
|
|
&[],
|
|
)
|
|
.map_err(DriverError::from)?;
|
|
Ok(usize::from_ne_bytes(buf))
|
|
}
|
|
|
|
enum DmaStorage {
|
|
/// Allocated via scheme:memory — freed via munmap
|
|
SchemeMapped {
|
|
ptr: NonNull<u8>,
|
|
size: usize,
|
|
region_fd: i32,
|
|
},
|
|
/// Allocated via heap — freed via dealloc
|
|
Heap {
|
|
ptr: NonNull<u8>,
|
|
layout: std::alloc::Layout,
|
|
},
|
|
}
|
|
|
|
pub struct DmaBuffer {
|
|
storage: DmaStorage,
|
|
phys_addr: usize,
|
|
size: usize,
|
|
}
|
|
|
|
impl DmaBuffer {
|
|
/// Allocate a physically contiguous DMA buffer.
|
|
///
|
|
/// Uses scheme:memory to allocate real physical pages, ensuring the buffer
|
|
/// is safe for DMA hardware access. Falls back to heap allocation only in
|
|
/// non-Redox environments (e.g., Linux host for testing), logging a warning.
|
|
pub fn allocate(size: usize, align: usize) -> Result<Self> {
|
|
let align = align.max(64);
|
|
let aligned_size = size.next_multiple_of(PAGE_SIZE).max(align);
|
|
|
|
// Attempt 1: Allocate via scheme:memory (physically contiguous)
|
|
if let Ok(mem_fd) = get_dma_memory_fd() {
|
|
if let Ok(mapped) = Self::allocate_via_scheme(mem_fd, aligned_size, align) {
|
|
return Ok(mapped);
|
|
}
|
|
}
|
|
|
|
// Fallback: heap allocation (NOT physically contiguous — log warning)
|
|
log::warn!(
|
|
"DmaBuffer: falling back to heap allocation ({} bytes) — NOT physically contiguous!",
|
|
size
|
|
);
|
|
let layout = std::alloc::Layout::from_size_align(size, align)
|
|
.map_err(|e| DriverError::Other(format!("invalid DMA layout: {e}")))?;
|
|
|
|
let ptr = unsafe { std::alloc::alloc_zeroed(layout) };
|
|
let ptr = NonNull::new(ptr).ok_or_else(|| {
|
|
DriverError::Other(format!(
|
|
"DMA allocation failed: {size} bytes aligned to {align}"
|
|
))
|
|
})?;
|
|
|
|
let phys_addr = virt_to_phys_cached(ptr.as_ptr() as usize)?;
|
|
|
|
Ok(Self {
|
|
storage: DmaStorage::Heap { ptr, layout },
|
|
phys_addr,
|
|
size,
|
|
})
|
|
}
|
|
|
|
/// Allocate physically contiguous memory via scheme:memory/physical.
|
|
fn allocate_via_scheme(mem_fd: i32, size: usize, _align: usize) -> Result<Self> {
|
|
// Open a physical memory region of the requested size
|
|
let path = format!("zeroed@{}?phys_contiguous", DMA_MEMORY_TYPE.suffix());
|
|
let region_fd = libredox::call::openat(mem_fd as usize, &path, O_CLOEXEC as i32, 0)
|
|
.map_err(|e| DriverError::Io(std::io::Error::from_raw_os_error(e.errno())))?;
|
|
|
|
let map = Map {
|
|
offset: 0,
|
|
size,
|
|
flags: MapFlags::from_bits_truncate((MAP_PRIVATE | PROT_READ | PROT_WRITE).bits()),
|
|
address: 0,
|
|
};
|
|
|
|
// Map it into our address space through SYS_FMAP with combined map+prot flags.
|
|
let ptr = unsafe { redox_syscall::call::fmap(region_fd as usize, &map) }.map_err(|e| {
|
|
let _ = libredox::call::close(region_fd as usize);
|
|
DriverError::MappingFailed {
|
|
phys: 0,
|
|
size,
|
|
reason: format!("DMA mmap failed: {e:?}"),
|
|
}
|
|
})?;
|
|
|
|
let _ = libredox::call::close(region_fd as usize);
|
|
|
|
let phys_addr = virt_to_phys_cached(ptr as usize)?;
|
|
for page in 1..size.div_ceil(PAGE_SIZE) {
|
|
let translated = virt_to_phys_cached(ptr as usize + page * PAGE_SIZE)?;
|
|
if translated != phys_addr + page * PAGE_SIZE {
|
|
return Err(DriverError::Other(format!(
|
|
"DMA mapping is not physically contiguous across page {}: expected {:#x}, got {:#x}",
|
|
page,
|
|
phys_addr + page * PAGE_SIZE,
|
|
translated
|
|
)));
|
|
}
|
|
}
|
|
let ptr = NonNull::new(ptr as *mut u8)
|
|
.ok_or_else(|| DriverError::Other("DMA mmap returned null".into()))?;
|
|
|
|
log::debug!(
|
|
"DmaBuffer: {} bytes at virt={:#x} phys={:#x} (physically contiguous)",
|
|
size,
|
|
ptr.as_ptr() as usize,
|
|
phys_addr
|
|
);
|
|
|
|
Ok(Self {
|
|
storage: DmaStorage::SchemeMapped {
|
|
ptr,
|
|
size,
|
|
region_fd: region_fd as i32,
|
|
},
|
|
phys_addr,
|
|
size,
|
|
})
|
|
}
|
|
|
|
pub fn as_ptr(&self) -> *const u8 {
|
|
match &self.storage {
|
|
DmaStorage::SchemeMapped { ptr, .. } | DmaStorage::Heap { ptr, .. } => ptr.as_ptr(),
|
|
}
|
|
}
|
|
|
|
pub fn as_mut_ptr(&mut self) -> *mut u8 {
|
|
match &mut self.storage {
|
|
DmaStorage::SchemeMapped { ptr, .. } | DmaStorage::Heap { ptr, .. } => ptr.as_ptr(),
|
|
}
|
|
}
|
|
|
|
pub fn physical_address(&self) -> usize {
|
|
self.phys_addr
|
|
}
|
|
|
|
pub fn len(&self) -> usize {
|
|
self.size
|
|
}
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
self.size == 0
|
|
}
|
|
|
|
/// Returns true if this buffer is guaranteed physically contiguous.
|
|
/// On real hardware, this must be true for DMA to work safely.
|
|
pub fn is_physically_contiguous(&self) -> bool {
|
|
matches!(self.storage, DmaStorage::SchemeMapped { .. })
|
|
}
|
|
}
|
|
|
|
impl Drop for DmaBuffer {
|
|
fn drop(&mut self) {
|
|
match &self.storage {
|
|
DmaStorage::SchemeMapped {
|
|
ptr,
|
|
size,
|
|
region_fd,
|
|
} => {
|
|
let _ = unsafe { libredox::call::munmap(ptr.as_ptr() as *mut (), *size) };
|
|
let _ = libredox::call::close(*region_fd as usize);
|
|
}
|
|
DmaStorage::Heap { ptr, layout } => {
|
|
unsafe { std::alloc::dealloc(ptr.as_ptr(), *layout) };
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
unsafe impl Send for DmaBuffer {}
|
|
unsafe impl Sync for DmaBuffer {}
|
|
|
|
/// IOMMU-backed DMA allocator.
|
|
///
|
|
/// Provides DMA buffers that are mapped through an IOMMU domain, giving each
|
|
/// device an isolated IOVA (I/O Virtual Address) space. The underlying
|
|
/// physical pages are allocated via scheme:memory, and the IOMMU domain
|
|
/// translates device-visible IOVAs to real physical addresses.
|
|
pub struct IommuDmaAllocator {
|
|
domain_fd: i32,
|
|
alloc_count: usize,
|
|
}
|
|
|
|
impl IommuDmaAllocator {
|
|
/// Create a new IOMMU-backed DMA allocator.
|
|
///
|
|
/// `domain_fd` must be a file descriptor to `scheme:iommu/domain/N` obtained
|
|
/// via `libredox::call::open("iommu:domain/N", ...)`.
|
|
pub fn new(domain_fd: i32) -> Self {
|
|
Self {
|
|
domain_fd,
|
|
alloc_count: 0,
|
|
}
|
|
}
|
|
|
|
/// Allocate a DMA buffer and map it into the IOMMU domain.
|
|
///
|
|
/// Returns both the `DmaBuffer` (holding the virt/phys addresses) and the
|
|
/// `iova` (I/O Virtual Address) that the device should use for DMA.
|
|
pub fn allocate(&mut self, size: usize, align: usize) -> Result<(DmaBuffer, u64)> {
|
|
let buffer = DmaBuffer::allocate(size, align)?;
|
|
let phys = buffer.physical_address();
|
|
|
|
let iova = self.map_to_iommu(phys as u64, buffer.len() as u64)?;
|
|
self.alloc_count += 1;
|
|
|
|
log::debug!(
|
|
"IommuDmaAllocator: alloc #{}: phys={:#x} iova={:#x} size={}",
|
|
self.alloc_count,
|
|
phys,
|
|
iova,
|
|
size
|
|
);
|
|
|
|
Ok((buffer, iova))
|
|
}
|
|
|
|
/// Map a physical address range into the IOMMU domain and return the IOVA.
|
|
fn map_to_iommu(&self, phys: u64, size: u64) -> Result<u64> {
|
|
let response = write_iommu_request(
|
|
self.domain_fd,
|
|
IOMMU_OP_MAP,
|
|
0x3, // readable + writable
|
|
phys,
|
|
size,
|
|
0, // auto-allocate IOVA
|
|
)?;
|
|
if response.status != 0 {
|
|
return Err(DriverError::Other(format!(
|
|
"IOMMU MAP failed: phys={:#x} size={} status={}",
|
|
phys, size, response.status
|
|
)));
|
|
}
|
|
Ok(response.arg1)
|
|
}
|
|
|
|
/// Unmap an IOVA range from the IOMMU domain.
|
|
pub fn unmap(&self, iova: u64) {
|
|
if let Err(e) = write_iommu_request(
|
|
self.domain_fd,
|
|
IOMMU_OP_UNMAP,
|
|
0,
|
|
iova,
|
|
0,
|
|
0,
|
|
) {
|
|
log::warn!("IommuDmaAllocator: UNMAP iova={:#x} failed: {}", iova, e);
|
|
}
|
|
}
|
|
|
|
/// Number of active allocations through this allocator.
|
|
pub fn alloc_count(&self) -> usize {
|
|
self.alloc_count
|
|
}
|
|
}
|
|
|
|
impl Drop for IommuDmaAllocator {
|
|
fn drop(&mut self) {
|
|
if self.alloc_count > 0 {
|
|
log::info!(
|
|
"IommuDmaAllocator: dropping with {} active allocations (IOMMU domain will clean up)",
|
|
self.alloc_count
|
|
);
|
|
}
|
|
let _ = libredox::call::close(self.domain_fd as usize);
|
|
}
|
|
}
|