Add runtime tools and Red Bear service wiring

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
2026-04-14 10:50:42 +01:00
parent fd60edc823
commit 51f3c21121
62 changed files with 9613 additions and 881 deletions
@@ -0,0 +1,524 @@
use std::error::Error as StdError;
use std::fmt;
const ACPI_HEADER_BYTES: usize = 36;
const IVRS_HEADER_BYTES: usize = ACPI_HEADER_BYTES + 4;
const IVHD_HEADER_BYTES: usize = 0x18;
const IVHD_TYPE_10: u8 = 0x10;
const IVHD_TYPE_11: u8 = 0x11;
const IVMD_TYPE_20: u8 = 0x20;
const IVMD_TYPE_21: u8 = 0x21;
const IVHD_ALL: u8 = 0x00;
const IVHD_SEL: u8 = 0x01;
const IVHD_SOR: u8 = 0x02;
const IVHD_EOR: u8 = 0x03;
const IVHD_PAD4: u8 = 0x42;
const IVHD_PAD8: u8 = 0x43;
const IVHD_VAR: u8 = 0x44;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Bdf(pub u16);
impl Bdf {
pub const fn new(bus: u8, device: u8, function: u8) -> Self {
Self(((bus as u16) << 8) | (((device as u16) & 0x1F) << 3) | ((function as u16) & 0x7))
}
pub const fn raw(self) -> u16 {
self.0
}
pub const fn bus(self) -> u8 {
(self.0 >> 8) as u8
}
pub const fn device(self) -> u8 {
((self.0 >> 3) & 0x1F) as u8
}
pub const fn function(self) -> u8 {
(self.0 & 0x7) as u8
}
}
impl fmt::Display for Bdf {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{:02x}:{:02x}.{}",
self.bus(),
self.device(),
self.function()
)
}
}
pub fn parse_bdf(text: &str) -> Option<Bdf> {
let trimmed = text.trim();
if trimmed.is_empty() {
return None;
}
if let Some(raw) = trimmed.strip_prefix("0x") {
return u16::from_str_radix(raw, 16).ok().map(Bdf);
}
if trimmed.contains('.') {
let (head, function) = trimmed.rsplit_once('.')?;
let function = u8::from_str_radix(function, 16)
.or_else(|_| function.parse::<u8>())
.ok()?;
let parts: Vec<&str> = head.split(':').collect();
let (bus, device) = match parts.as_slice() {
[bus, device] => (*bus, *device),
[_, bus, device] => (*bus, *device),
_ => return None,
};
let bus = u8::from_str_radix(bus, 16).ok()?;
let device = u8::from_str_radix(device, 16).ok()?;
return Some(Bdf::new(bus, device, function));
}
u16::from_str_radix(trimmed, 16).ok().map(Bdf)
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum IvhdEntry {
All { flags: u8 },
Select { bdf: Bdf, flags: u8 },
StartRange { bdf: Bdf, flags: u8 },
EndRange { bdf: Bdf },
Padding { kind: u8, length: usize },
Variable { kind: u8, payload: Vec<u8> },
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct IommuUnitInfo {
pub entry_type: u8,
pub flags: u8,
pub length: u16,
pub iommu_bdf: Bdf,
pub capability_offset: u16,
pub mmio_base: u64,
pub pci_segment_group: u16,
pub iommu_info: u16,
pub iommu_efr: u32,
pub device_entries: Vec<IvhdEntry>,
}
impl IommuUnitInfo {
pub fn unit_id(&self) -> u8 {
((self.iommu_info >> 6) & 0x7F) as u8
}
pub fn msi_number(&self) -> u8 {
(self.iommu_info & 0x3F) as u8
}
pub fn handles_device(&self, bdf: Bdf) -> bool {
let mut all = false;
let mut range_start: Option<u16> = None;
for entry in &self.device_entries {
match *entry {
IvhdEntry::All { .. } => all = true,
IvhdEntry::Select { bdf: selected, .. } if selected == bdf => return true,
IvhdEntry::StartRange { bdf: start, .. } => range_start = Some(start.raw()),
IvhdEntry::EndRange { bdf: end } => {
if let Some(start) = range_start.take() {
let raw = bdf.raw();
if (start..=end.raw()).contains(&raw) {
return true;
}
}
}
_ => {}
}
}
all
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct IvrsInfo {
pub revision: u8,
pub iv_info: u32,
pub units: Vec<IommuUnitInfo>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum IvrsError {
TooShort,
InvalidSignature([u8; 4]),
InvalidLength(u32),
InvalidChecksum,
TruncatedEntry { offset: usize },
InvalidEntryLength { offset: usize, length: usize },
InvalidIvhdLength { offset: usize, length: usize },
InvalidVariableLength { offset: usize, length: usize },
}
impl fmt::Display for IvrsError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::TooShort => write!(f, "IVRS table is shorter than the ACPI header"),
Self::InvalidSignature(sig) => write!(
f,
"invalid IVRS signature {:?}",
String::from_utf8_lossy(sig)
),
Self::InvalidLength(length) => write!(f, "invalid IVRS table length {length}"),
Self::InvalidChecksum => write!(f, "IVRS checksum validation failed"),
Self::TruncatedEntry { offset } => {
write!(f, "truncated IVRS entry at offset {offset:#x}")
}
Self::InvalidEntryLength { offset, length } => {
write!(
f,
"invalid IVRS entry length {length} at offset {offset:#x}"
)
}
Self::InvalidIvhdLength { offset, length } => {
write!(
f,
"invalid IVHD entry length {length} at offset {offset:#x}"
)
}
Self::InvalidVariableLength { offset, length } => {
write!(
f,
"invalid IVHD variable-length entry {length} at offset {offset:#x}"
)
}
}
}
}
impl StdError for IvrsError {}
pub fn parse_ivrs(bytes: &[u8]) -> Result<IvrsInfo, IvrsError> {
if bytes.len() < IVRS_HEADER_BYTES {
return Err(IvrsError::TooShort);
}
let signature = bytes[0..4].try_into().map_err(|_| IvrsError::TooShort)?;
if signature != *b"IVRS" {
return Err(IvrsError::InvalidSignature(signature));
}
let length = read_u32(bytes, 4).ok_or(IvrsError::TooShort)?;
if length < IVRS_HEADER_BYTES as u32 {
return Err(IvrsError::InvalidLength(length));
}
if bytes.len() < length as usize {
return Err(IvrsError::TooShort);
}
let table = &bytes[..length as usize];
if table.iter().fold(0u8, |sum, byte| sum.wrapping_add(*byte)) != 0 {
return Err(IvrsError::InvalidChecksum);
}
let revision = table[8];
let iv_info = read_u32(table, ACPI_HEADER_BYTES).ok_or(IvrsError::TooShort)?;
let mut units = Vec::new();
let mut offset = IVRS_HEADER_BYTES;
while offset < table.len() {
if offset + 4 > table.len() {
return Err(IvrsError::TruncatedEntry { offset });
}
let entry_type = table[offset];
let entry_length =
read_u16(table, offset + 2).ok_or(IvrsError::TruncatedEntry { offset })? as usize;
if entry_length < 4 {
return Err(IvrsError::InvalidEntryLength {
offset,
length: entry_length,
});
}
if offset + entry_length > table.len() {
return Err(IvrsError::TruncatedEntry { offset });
}
let entry = &table[offset..offset + entry_length];
if matches!(entry_type, IVHD_TYPE_10 | IVHD_TYPE_11) {
units.push(parse_ivhd(entry, offset)?);
}
if matches!(entry_type, IVMD_TYPE_20 | IVMD_TYPE_21) {
offset += entry_length;
continue;
}
offset += entry_length;
}
Ok(IvrsInfo {
revision,
iv_info,
units,
})
}
fn parse_ivhd(entry: &[u8], table_offset: usize) -> Result<IommuUnitInfo, IvrsError> {
if entry.len() < IVHD_HEADER_BYTES {
return Err(IvrsError::InvalidIvhdLength {
offset: table_offset,
length: entry.len(),
});
}
let mut device_entries = Vec::new();
let mut offset = IVHD_HEADER_BYTES;
while offset < entry.len() {
let kind = entry[offset];
match kind {
IVHD_ALL => {
ensure_remaining(entry, offset, 4, table_offset)?;
device_entries.push(IvhdEntry::All {
flags: entry[offset + 1],
});
offset += 4;
}
IVHD_SEL => {
ensure_remaining(entry, offset, 4, table_offset)?;
device_entries.push(IvhdEntry::Select {
bdf: Bdf(
read_u16(entry, offset + 2).ok_or(IvrsError::TruncatedEntry {
offset: table_offset + offset,
})?,
),
flags: entry[offset + 1],
});
offset += 4;
}
IVHD_SOR => {
ensure_remaining(entry, offset, 4, table_offset)?;
device_entries.push(IvhdEntry::StartRange {
bdf: Bdf(
read_u16(entry, offset + 2).ok_or(IvrsError::TruncatedEntry {
offset: table_offset + offset,
})?,
),
flags: entry[offset + 1],
});
offset += 4;
}
IVHD_EOR => {
ensure_remaining(entry, offset, 4, table_offset)?;
device_entries.push(IvhdEntry::EndRange {
bdf: Bdf(
read_u16(entry, offset + 2).ok_or(IvrsError::TruncatedEntry {
offset: table_offset + offset,
})?,
),
});
offset += 4;
}
IVHD_PAD4 => {
ensure_remaining(entry, offset, 8, table_offset)?;
device_entries.push(IvhdEntry::Padding { kind, length: 8 });
offset += 8;
}
IVHD_PAD8 => {
ensure_remaining(entry, offset, 12, table_offset)?;
device_entries.push(IvhdEntry::Padding { kind, length: 12 });
offset += 12;
}
IVHD_VAR => {
ensure_remaining(entry, offset, 2, table_offset)?;
let variable_length = entry[offset + 1] as usize;
if variable_length < 2 {
return Err(IvrsError::InvalidVariableLength {
offset: table_offset + offset,
length: variable_length,
});
}
ensure_remaining(entry, offset, variable_length, table_offset)?;
device_entries.push(IvhdEntry::Variable {
kind,
payload: entry[offset + 2..offset + variable_length].to_vec(),
});
offset += variable_length;
}
_ => {
ensure_remaining(entry, offset, 4, table_offset)?;
device_entries.push(IvhdEntry::Variable {
kind,
payload: entry[offset + 1..offset + 4].to_vec(),
});
offset += 4;
}
}
}
Ok(IommuUnitInfo {
entry_type: entry[0],
flags: entry[1],
length: read_u16(entry, 2).ok_or(IvrsError::TruncatedEntry {
offset: table_offset,
})?,
iommu_bdf: Bdf(read_u16(entry, 4).ok_or(IvrsError::TruncatedEntry {
offset: table_offset,
})?),
capability_offset: read_u16(entry, 6).ok_or(IvrsError::TruncatedEntry {
offset: table_offset,
})?,
mmio_base: read_u64(entry, 8).ok_or(IvrsError::TruncatedEntry {
offset: table_offset,
})?,
pci_segment_group: read_u16(entry, 16).ok_or(IvrsError::TruncatedEntry {
offset: table_offset,
})?,
iommu_info: read_u16(entry, 18).ok_or(IvrsError::TruncatedEntry {
offset: table_offset,
})?,
iommu_efr: read_u32(entry, 20).ok_or(IvrsError::TruncatedEntry {
offset: table_offset,
})?,
device_entries,
})
}
fn ensure_remaining(
entry: &[u8],
offset: usize,
length: usize,
table_offset: usize,
) -> Result<(), IvrsError> {
if offset + length > entry.len() {
return Err(IvrsError::TruncatedEntry {
offset: table_offset + offset,
});
}
Ok(())
}
fn read_u16(bytes: &[u8], offset: usize) -> Option<u16> {
bytes
.get(offset..offset + 2)?
.try_into()
.ok()
.map(u16::from_le_bytes)
}
fn read_u32(bytes: &[u8], offset: usize) -> Option<u32> {
bytes
.get(offset..offset + 4)?
.try_into()
.ok()
.map(u32::from_le_bytes)
}
fn read_u64(bytes: &[u8], offset: usize) -> Option<u64> {
bytes
.get(offset..offset + 8)?
.try_into()
.ok()
.map(u64::from_le_bytes)
}
#[cfg(test)]
mod tests {
use super::{parse_bdf, parse_ivrs, Bdf, IommuUnitInfo, IvhdEntry, IVRS_HEADER_BYTES};
fn build_ivrs(units: &[Vec<u8>]) -> Vec<u8> {
let length = (IVRS_HEADER_BYTES + units.iter().map(Vec::len).sum::<usize>()) as u32;
let mut bytes = vec![0u8; length as usize];
bytes[0..4].copy_from_slice(b"IVRS");
bytes[4..8].copy_from_slice(&length.to_le_bytes());
bytes[8] = 3;
bytes[10..16].copy_from_slice(b"RDBEAR");
bytes[16..24].copy_from_slice(b"AMDVI ");
bytes[36..40].copy_from_slice(&0x0123_4567u32.to_le_bytes());
let mut offset = IVRS_HEADER_BYTES;
for unit in units {
bytes[offset..offset + unit.len()].copy_from_slice(unit);
offset += unit.len();
}
let checksum =
(!bytes.iter().fold(0u8, |sum, byte| sum.wrapping_add(*byte))).wrapping_add(1);
bytes[9] = checksum;
bytes
}
fn build_ivhd(mmio_base: u64, iommu_bdf: Bdf, entries: &[u8]) -> Vec<u8> {
let length = (0x18 + entries.len()) as u16;
let mut bytes = vec![0u8; length as usize];
bytes[0] = 0x11;
bytes[1] = 0xA0;
bytes[2..4].copy_from_slice(&length.to_le_bytes());
bytes[4..6].copy_from_slice(&iommu_bdf.raw().to_le_bytes());
bytes[6..8].copy_from_slice(&0x0040u16.to_le_bytes());
bytes[8..16].copy_from_slice(&mmio_base.to_le_bytes());
bytes[16..18].copy_from_slice(&0u16.to_le_bytes());
bytes[18..20].copy_from_slice(&0x01c2u16.to_le_bytes());
bytes[20..24].copy_from_slice(&0x00aa_5500u32.to_le_bytes());
bytes[24..].copy_from_slice(entries);
bytes
}
#[test]
fn parses_bdf_text_forms() {
assert_eq!(parse_bdf("00:14.0"), Some(Bdf::new(0x00, 0x14, 0x0)));
assert_eq!(parse_bdf("0000:02:00.1"), Some(Bdf::new(0x02, 0x00, 0x1)));
assert_eq!(parse_bdf("0x1234"), Some(Bdf(0x1234)));
assert_eq!(parse_bdf("zz:zz.z"), None);
}
#[test]
fn parses_ivrs_with_multiple_units() {
let unit0_entries = [
0x01, 0x11, 0x08, 0x00, // select 00:01.0
0x02, 0x22, 0x10, 0x00, // start range 00:02.0
0x03, 0x00, 0x17, 0x00, // end range 00:02.7
];
let unit1_entries = [0x00, 0x00, 0x00, 0x00];
let table = build_ivrs(&[
build_ivhd(0xfee0_0000, Bdf::new(0, 0x18, 2), &unit0_entries),
build_ivhd(0xfee1_0000, Bdf::new(0, 0x18, 3), &unit1_entries),
]);
let parsed = parse_ivrs(&table).unwrap_or_else(|err| panic!("IVRS parse failed: {err}"));
assert_eq!(parsed.units.len(), 2);
assert_eq!(parsed.units[0].mmio_base, 0xfee0_0000);
assert_eq!(parsed.units[1].iommu_bdf, Bdf::new(0, 0x18, 3));
let unit = &parsed.units[0];
assert!(unit.handles_device(Bdf::new(0, 1, 0)));
assert!(unit.handles_device(Bdf::new(0, 2, 3)));
assert!(!unit.handles_device(Bdf::new(0, 3, 0)));
assert_eq!(unit.unit_id(), 7);
assert_eq!(unit.msi_number(), 2);
}
#[test]
fn all_entry_covers_entire_bus_space() {
let unit = IommuUnitInfo {
entry_type: 0x11,
flags: 0,
length: 0x1c,
iommu_bdf: Bdf::new(0, 0x18, 2),
capability_offset: 0x40,
mmio_base: 0xfee0_0000,
pci_segment_group: 0,
iommu_info: 0,
iommu_efr: 0,
device_entries: vec![IvhdEntry::All { flags: 0 }],
};
assert!(unit.handles_device(Bdf::new(0x80, 0x1f, 7)));
}
}
@@ -0,0 +1,416 @@
use core::ptr::{read_volatile, write_volatile};
use log::{debug, warn};
use redox_driver_sys::memory::{CacheType, MmioProt, MmioRegion};
use crate::acpi::{parse_ivrs, Bdf, IommuUnitInfo, IvrsError};
use crate::command_buffer::{CommandBuffer, CommandEntry, EventLog, EventLogEntry};
use crate::device_table::{DeviceTable, DeviceTableEntry, DEVICE_TABLE_ENTRIES};
use crate::interrupt::InterruptRemapTable;
use crate::mmio::{control, ext_feature, status, AmdViMmio, AMD_VI_MMIO_BYTES};
use crate::page_table::DomainPageTables;
const CMD_BUF_LEN_ENCODING: u64 = 0x09;
const EVT_LOG_LEN_ENCODING: u64 = 0x09;
const DEV_TABLE_SIZE_ENCODING: u64 = 0x0F;
const DEFAULT_CMD_ENTRIES: usize = 512;
const DEFAULT_EVT_ENTRIES: usize = 512;
const DEFAULT_IRT_ENTRIES: usize = 4096;
const COMPLETION_TOKEN: u32 = 0xA11D_F00D;
struct MmioMapping {
region: MmioRegion,
base: *mut AmdViMmio,
}
pub struct AmdViUnit {
info: IommuUnitInfo,
mmio: Option<MmioMapping>,
device_table: Option<DeviceTable>,
command_buffer: Option<CommandBuffer>,
event_log: Option<EventLog>,
interrupt_table: Option<InterruptRemapTable>,
completion_store: Option<redox_driver_sys::dma::DmaBuffer>,
command_tail: usize,
event_head: usize,
initialized: bool,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct AmdViEvent {
pub unit_id: u8,
pub event_code: u16,
pub event_flags: u16,
pub device_id: Bdf,
pub address: u64,
}
impl AmdViUnit {
pub fn detect(ivrs: &[u8]) -> Result<Vec<Self>, IvrsError> {
let parsed = parse_ivrs(ivrs)?;
Ok(parsed.units.into_iter().map(Self::from_info).collect())
}
pub fn from_info(info: IommuUnitInfo) -> Self {
Self {
info,
mmio: None,
device_table: None,
command_buffer: None,
event_log: None,
interrupt_table: None,
completion_store: None,
command_tail: 0,
event_head: 0,
initialized: false,
}
}
pub fn info(&self) -> &IommuUnitInfo {
&self.info
}
pub fn initialized(&self) -> bool {
self.initialized
}
pub fn handles_device(&self, bdf: Bdf) -> bool {
self.info.handles_device(bdf)
}
pub fn init(&mut self) -> Result<(), String> {
if self.initialized {
return Ok(());
}
let region = MmioRegion::map(
self.info.mmio_base,
AMD_VI_MMIO_BYTES,
CacheType::DeviceMemory,
MmioProt::READ_WRITE,
)
.map_err(|err| {
format!(
"failed to map AMD-Vi MMIO {:#x}: {err}",
self.info.mmio_base
)
})?;
let base = region.as_ptr() as *mut AmdViMmio;
self.mmio = Some(MmioMapping { region, base });
self.disable_unit()?;
let device_table = DeviceTable::new().map_err(|err| err.to_string())?;
let command_buffer =
CommandBuffer::new(DEFAULT_CMD_ENTRIES).map_err(|err| err.to_string())?;
let event_log = EventLog::new(DEFAULT_EVT_ENTRIES).map_err(|err| err.to_string())?;
let interrupt_table =
InterruptRemapTable::new(DEFAULT_IRT_ENTRIES).map_err(|err| err.to_string())?;
self.program_bars(&device_table, &command_buffer, &event_log)?;
self.reset_ring_pointers()?;
self.device_table = Some(device_table);
self.command_buffer = Some(command_buffer);
self.event_log = Some(event_log);
self.interrupt_table = Some(interrupt_table);
let ext = self.mmio_read_extended_feature()?;
let mut control_value = control::EVENT_LOG_EN | control::CMD_BUF_EN;
if ext & ext_feature::XT_SUP != 0 {
control_value |= control::XT_EN;
}
if ext & ext_feature::NX_SUP != 0 {
control_value |= control::NX_EN;
}
unsafe {
AmdViMmio::write_control(self.mmio_base()?, control_value);
}
self.flush_configuration()?;
unsafe {
AmdViMmio::write_control(self.mmio_base()?, control_value | control::IOMMU_ENABLE);
}
self.wait_for_running(true)?;
self.initialized = true;
Ok(())
}
pub fn assign_device(&mut self, bdf: Bdf, domain: &DomainPageTables) -> Result<(), String> {
if !self.initialized {
return Err("AMD-Vi unit is not initialized".to_string());
}
if !self.handles_device(bdf) {
return Err(format!(
"AMD-Vi unit {} does not cover device {bdf}",
self.info.unit_id()
));
}
let interrupt_table = self
.interrupt_table
.as_ref()
.ok_or_else(|| "interrupt remap table not initialized".to_string())?;
let device_table = self
.device_table
.as_mut()
.ok_or_else(|| "device table not initialized".to_string())?;
let mut entry = DeviceTableEntry::new();
entry.set_valid(true);
entry.set_translation_valid(true);
entry.set_read_permission(true);
entry.set_write_permission(true);
entry.set_mode(domain.levels());
entry.set_page_table_root(domain.root_address());
entry.set_interrupt_remap(true);
entry.set_interrupt_write(true);
entry.set_interrupt_control(0x02);
entry.set_int_table_len(interrupt_table.len_encoding());
entry.set_int_remap_table_ptr(interrupt_table.physical_address() as u64);
device_table.set_entry(bdf.raw(), &entry);
self.submit_command(CommandEntry::invalidate_devtab_entry(bdf.raw()))?;
self.submit_command(CommandEntry::invalidate_interrupt_table(bdf.raw()))?;
self.wait_for_completion()?;
Ok(())
}
pub fn drain_events(&mut self) -> Result<Vec<AmdViEvent>, String> {
let mut drained = Vec::new();
if !self.initialized {
return Ok(drained);
}
let base = self.mmio_base()?;
let event_log = self
.event_log
.as_ref()
.ok_or_else(|| "event log not initialized".to_string())?;
let tail = unsafe { AmdViMmio::read_evt_log_tail(base) as usize % event_log.capacity() };
while self.event_head != tail {
let event = event_log.read_entry(self.event_head);
drained.push(self.decode_event(event));
self.event_head = (self.event_head + 1) % event_log.capacity();
}
unsafe {
AmdViMmio::write_evt_log_head(base, self.event_head as u64);
}
Ok(drained)
}
fn decode_event(&self, event: EventLogEntry) -> AmdViEvent {
AmdViEvent {
unit_id: self.info.unit_id(),
event_code: event.event_type() as u16,
event_flags: event.event_flags(),
device_id: Bdf(event.device_id()),
address: event.virtual_address(),
}
}
fn disable_unit(&mut self) -> Result<(), String> {
let base = self.mmio_base()?;
unsafe {
AmdViMmio::write_control(base, 0);
}
self.wait_for_running(false)
}
fn wait_for_running(&self, expected: bool) -> Result<(), String> {
let base = self.mmio_base()?;
for _ in 0..100_000 {
let running = unsafe { AmdViMmio::read_status(base) } & status::IOMMU_RUNNING != 0;
if running == expected {
return Ok(());
}
std::hint::spin_loop();
}
Err(format!(
"timed out waiting for AMD-Vi unit {} running={expected}",
self.info.unit_id()
))
}
fn program_bars(
&mut self,
device_table: &DeviceTable,
command_buffer: &CommandBuffer,
event_log: &EventLog,
) -> Result<(), String> {
let base = self.mmio_base()?;
unsafe {
AmdViMmio::write_dev_table_bar(
base,
(device_table.physical_address() as u64 & !0xFFF) | DEV_TABLE_SIZE_ENCODING,
);
AmdViMmio::write_cmd_buf_bar(
base,
(command_buffer.physical_address() as u64 & !0xFFF) | CMD_BUF_LEN_ENCODING,
);
AmdViMmio::write_evt_log_bar(
base,
(event_log.physical_address() as u64 & !0xFFF) | EVT_LOG_LEN_ENCODING,
);
AmdViMmio::write_exclusion_base(base, 0);
AmdViMmio::write_exclusion_limit(base, 0);
}
Ok(())
}
fn reset_ring_pointers(&mut self) -> Result<(), String> {
let base = self.mmio_base()?;
unsafe {
AmdViMmio::write_cmd_buf_head(base, 0);
AmdViMmio::write_cmd_buf_tail(base, 0);
AmdViMmio::write_evt_log_head(base, 0);
}
self.command_tail = 0;
self.event_head = 0;
Ok(())
}
fn flush_configuration(&mut self) -> Result<(), String> {
let ext = self.mmio_read_extended_feature()?;
if ext & ext_feature::IA_SUP != 0 {
self.submit_command(CommandEntry::invalidate_all())?;
} else if let Some(table) = self.device_table.as_ref() {
let mut pending_invalidations = Vec::new();
for device_id in 0..DEVICE_TABLE_ENTRIES {
let entry = table.get_entry(device_id as u16);
if entry.valid() {
pending_invalidations.push(device_id as u16);
}
}
for device_id in pending_invalidations {
self.submit_command(CommandEntry::invalidate_devtab_entry(device_id))?;
}
} else {
warn!("amd-vi: device table not yet allocated while flushing configuration");
}
self.wait_for_completion()
}
fn submit_command(&mut self, command: CommandEntry) -> Result<(), String> {
let base = self.mmio_base()?;
let command_buffer = self
.command_buffer
.as_mut()
.ok_or_else(|| "command buffer not initialized".to_string())?;
let head =
unsafe { AmdViMmio::read_cmd_buf_head(base) as usize % command_buffer.capacity() };
let next_tail = (self.command_tail + 1) % command_buffer.capacity();
if next_tail == head {
return Err("AMD-Vi command buffer is full".to_string());
}
command_buffer.write_command(self.command_tail, &command);
self.command_tail = next_tail;
unsafe {
AmdViMmio::write_cmd_buf_tail(base, self.command_tail as u64);
}
Ok(())
}
fn wait_for_completion(&mut self) -> Result<(), String> {
let completion_store = match self.completion_store.take() {
Some(buffer) => buffer,
None => redox_driver_sys::dma::DmaBuffer::allocate(8, 8)
.map_err(|err| format!("failed to allocate completion wait store: {err}"))?,
};
let completion_ptr = completion_store.as_ptr() as *const u32;
let completion_mut = completion_store.as_ptr() as *mut u32;
unsafe {
write_volatile(completion_mut, 0);
}
let completion_phys = completion_store.physical_address() as u64;
self.submit_command(CommandEntry::completion_wait(
completion_phys,
COMPLETION_TOKEN,
))?;
for _ in 0..100_000 {
if unsafe { read_volatile(completion_ptr) } == COMPLETION_TOKEN {
self.completion_store = Some(completion_store);
return Ok(());
}
std::hint::spin_loop();
}
self.completion_store = Some(completion_store);
Err("timed out waiting for AMD-Vi command completion".to_string())
}
fn mmio_read_extended_feature(&self) -> Result<u64, String> {
let base = self.mmio_base()?;
Ok(unsafe { AmdViMmio::read_extended_feature(base) })
}
fn mmio_base(&self) -> Result<*mut AmdViMmio, String> {
self.mmio
.as_ref()
.map(|mapping| mapping.base)
.ok_or_else(|| "AMD-Vi MMIO is not mapped".to_string())
}
}
impl Drop for AmdViUnit {
fn drop(&mut self) {
if let Some(mapping) = &self.mmio {
debug!(
"amd-vi: dropping unit {} mapped at {:#x} ({:#x} bytes)",
self.info.unit_id(),
self.info.mmio_base,
mapping.region.size()
);
}
}
}
#[cfg(test)]
mod tests {
use crate::acpi::Bdf;
use super::AmdViUnit;
fn build_ivrs_with_unit() -> Vec<u8> {
let mut table = vec![0u8; 40 + 28];
table[0..4].copy_from_slice(b"IVRS");
table[4..8].copy_from_slice(&(68u32).to_le_bytes());
table[8] = 3;
table[10..16].copy_from_slice(b"RDBEAR");
table[16..24].copy_from_slice(b"AMDVI ");
let offset = 40;
table[offset] = 0x11;
table[offset + 1] = 0x20;
table[offset + 2..offset + 4].copy_from_slice(&(28u16).to_le_bytes());
table[offset + 4..offset + 6].copy_from_slice(&Bdf::new(0, 0x18, 2).raw().to_le_bytes());
table[offset + 6..offset + 8].copy_from_slice(&0x40u16.to_le_bytes());
table[offset + 8..offset + 16].copy_from_slice(&0xfee0_0000u64.to_le_bytes());
table[offset + 16..offset + 18].copy_from_slice(&0u16.to_le_bytes());
table[offset + 18..offset + 20].copy_from_slice(&0x0081u16.to_le_bytes());
table[offset + 20..offset + 24].copy_from_slice(&0u32.to_le_bytes());
table[offset + 24..offset + 28].copy_from_slice(&[0x00, 0, 0, 0]);
let checksum =
(!table.iter().fold(0u8, |sum, byte| sum.wrapping_add(*byte))).wrapping_add(1);
table[9] = checksum;
table
}
#[test]
fn detect_builds_units_from_ivrs() {
let units = AmdViUnit::detect(&build_ivrs_with_unit())
.unwrap_or_else(|err| panic!("amd-vi detect failed: {err}"));
assert_eq!(units.len(), 1);
assert_eq!(units[0].info().mmio_base, 0xfee0_0000);
assert!(units[0].handles_device(Bdf::new(0x80, 0x1f, 7)));
}
}
@@ -0,0 +1,371 @@
use core::mem::size_of;
use core::slice;
use redox_driver_sys::dma::DmaBuffer;
pub const COMMAND_ENTRY_SIZE: usize = 16;
pub const EVENT_LOG_ENTRY_SIZE: usize = 16;
const DMA_ALIGNMENT: usize = 4096;
pub const CMD_COMPLETION_WAIT: u32 = 0x01;
pub const CMD_INVALIDATE_DEVTAB_ENTRY: u32 = 0x02;
pub const CMD_INVALIDATE_IOMMU_PAGES: u32 = 0x03;
pub const CMD_INVALIDATE_INTERRUPT_TABLE: u32 = 0x04;
pub const CMD_INVALIDATE_IOMMU_ALL: u32 = 0x05;
pub const EVENT_IO_PAGE_FAULT: u32 = 0x01;
pub const EVENT_INVALIDATE_DEVICE_TABLE: u32 = 0x02;
const COMPLETION_WAIT_STORE_BIT: u32 = 1 << 4;
const COMPLETION_WAIT_INTERRUPT_BIT: u32 = 1 << 5;
const INVALIDATE_PAGES_PDE_BIT: u32 = 1 << 12;
const INVALIDATE_PAGES_SIZE_BIT: u32 = 1 << 13;
/// Command buffer entry (128 bits = 16 bytes = 4 × u32).
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[repr(C)]
pub struct CommandEntry {
words: [u32; 4],
}
impl CommandEntry {
pub const fn new() -> Self {
Self { words: [0; 4] }
}
pub const fn from_words(words: [u32; 4]) -> Self {
Self { words }
}
pub fn words(&self) -> [u32; 4] {
self.words
}
pub fn opcode(&self) -> u32 {
self.words[0] & 0xF
}
/// COMPLETION_WAIT (opcode 0x01).
pub fn completion_wait(store_addr: u64, store_data: u32) -> Self {
debug_assert_eq!(
store_addr & 0x7,
0,
"completion wait store address must be 8-byte aligned"
);
Self {
words: [
CMD_COMPLETION_WAIT | COMPLETION_WAIT_STORE_BIT,
store_addr as u32,
(store_addr >> 32) as u32,
store_data,
],
}
}
/// INVALIDATE_DEVTAB_ENTRY (opcode 0x02).
pub fn invalidate_devtab_entry(device_id: u16) -> Self {
Self {
words: [CMD_INVALIDATE_DEVTAB_ENTRY, device_id as u32, 0, 0],
}
}
pub fn invalidate_pages(domain_id: u16, addr: u64) -> Self {
Self::invalidate_pages_with_flags(domain_id, addr, false, false)
}
pub fn invalidate_pages_with_flags(domain_id: u16, addr: u64, pde: bool, size: bool) -> Self {
let mut word0 = CMD_INVALIDATE_IOMMU_PAGES;
if pde {
word0 |= INVALIDATE_PAGES_PDE_BIT;
}
if size {
word0 |= INVALIDATE_PAGES_SIZE_BIT;
}
Self {
words: [word0, domain_id as u32, addr as u32, (addr >> 32) as u32],
}
}
pub fn invalidate_interrupt_table(device_id: u16) -> Self {
Self {
words: [CMD_INVALIDATE_INTERRUPT_TABLE, device_id as u32, 0, 0],
}
}
/// INVALIDATE_IOMMU_ALL (opcode 0x05).
pub fn invalidate_all() -> Self {
Self {
words: [CMD_INVALIDATE_IOMMU_ALL, 0, 0, 0],
}
}
pub fn completion_wait_store(&self) -> bool {
self.words[0] & COMPLETION_WAIT_STORE_BIT != 0
}
pub fn completion_wait_interrupt(&self) -> bool {
self.words[0] & COMPLETION_WAIT_INTERRUPT_BIT != 0
}
pub fn completion_wait_store_address(&self) -> u64 {
(self.words[1] as u64) | ((self.words[2] as u64) << 32)
}
pub fn completion_wait_store_data(&self) -> u32 {
self.words[3]
}
pub fn invalidate_device_id(&self) -> u16 {
self.words[1] as u16
}
pub fn invalidate_pages_pde(&self) -> bool {
self.words[0] & INVALIDATE_PAGES_PDE_BIT != 0
}
pub fn invalidate_pages_size(&self) -> bool {
self.words[0] & INVALIDATE_PAGES_SIZE_BIT != 0
}
pub fn invalidate_pages_address(&self) -> u64 {
(self.words[2] as u64) | ((self.words[3] as u64) << 32)
}
}
const _: () = assert!(size_of::<CommandEntry>() == COMMAND_ENTRY_SIZE);
pub struct CommandBuffer {
buffer: DmaBuffer,
capacity: usize,
}
impl CommandBuffer {
pub fn new(entry_count: usize) -> Result<Self, &'static str> {
if entry_count == 0 {
return Err("IOMMU command buffer entry count must be non-zero");
}
let byte_len = entry_count
.checked_mul(COMMAND_ENTRY_SIZE)
.ok_or("IOMMU command buffer size overflow")?;
let buffer = DmaBuffer::allocate(byte_len, DMA_ALIGNMENT)
.map_err(|_| "failed to allocate IOMMU command buffer")?;
if buffer.len() < byte_len {
return Err("IOMMU command buffer allocation was smaller than requested");
}
if !buffer.is_physically_contiguous() {
return Err("IOMMU command buffer allocation is not physically contiguous");
}
if buffer.physical_address() & (DMA_ALIGNMENT - 1) != 0 {
return Err("IOMMU command buffer allocation is not 4KiB-aligned");
}
Ok(Self {
buffer,
capacity: entry_count,
})
}
pub fn physical_address(&self) -> usize {
self.buffer.physical_address()
}
/// Write a command at the given index.
pub fn write_command(&mut self, index: usize, cmd: &CommandEntry) {
assert!(index < self.capacity, "IOMMU command index out of bounds");
self.commands_mut()[index] = *cmd;
}
pub fn capacity(&self) -> usize {
self.capacity
}
fn commands_mut(&mut self) -> &mut [CommandEntry] {
unsafe {
slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as *mut CommandEntry, self.capacity)
}
}
}
/// Event log entry (128 bits = 16 bytes).
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[repr(C)]
pub struct EventLogEntry {
words: [u32; 4],
}
impl EventLogEntry {
pub const fn new() -> Self {
Self { words: [0; 4] }
}
pub const fn from_words(words: [u32; 4]) -> Self {
Self { words }
}
pub fn words(&self) -> [u32; 4] {
self.words
}
pub fn event_type(&self) -> u32 {
self.words[0] & 0xFFFF
}
pub fn event_flags(&self) -> u16 {
((self.words[0] >> 16) & 0xFFFF) as u16
}
pub fn device_id(&self) -> u16 {
self.words[1] as u16
}
pub fn virtual_address(&self) -> u64 {
((self.words[3] as u64) << 32) | (self.words[2] as u64)
}
}
const _: () = assert!(size_of::<EventLogEntry>() == EVENT_LOG_ENTRY_SIZE);
pub struct EventLog {
buffer: DmaBuffer,
capacity: usize,
}
impl EventLog {
pub fn new(entry_count: usize) -> Result<Self, &'static str> {
if entry_count == 0 {
return Err("IOMMU event log entry count must be non-zero");
}
let byte_len = entry_count
.checked_mul(EVENT_LOG_ENTRY_SIZE)
.ok_or("IOMMU event log size overflow")?;
let buffer = DmaBuffer::allocate(byte_len, DMA_ALIGNMENT)
.map_err(|_| "failed to allocate IOMMU event log")?;
if buffer.len() < byte_len {
return Err("IOMMU event log allocation was smaller than requested");
}
if !buffer.is_physically_contiguous() {
return Err("IOMMU event log allocation is not physically contiguous");
}
if buffer.physical_address() & (DMA_ALIGNMENT - 1) != 0 {
return Err("IOMMU event log allocation is not 4KiB-aligned");
}
Ok(Self {
buffer,
capacity: entry_count,
})
}
pub fn physical_address(&self) -> usize {
self.buffer.physical_address()
}
pub fn read_entry(&self, index: usize) -> EventLogEntry {
assert!(index < self.capacity, "IOMMU event log index out of bounds");
self.entries()[index]
}
pub fn capacity(&self) -> usize {
self.capacity
}
fn entries(&self) -> &[EventLogEntry] {
unsafe {
slice::from_raw_parts(self.buffer.as_ptr() as *const EventLogEntry, self.capacity)
}
}
}
#[cfg(test)]
mod tests {
use super::{
CommandEntry, EventLogEntry, CMD_COMPLETION_WAIT, CMD_INVALIDATE_DEVTAB_ENTRY,
CMD_INVALIDATE_IOMMU_ALL, CMD_INVALIDATE_IOMMU_PAGES, EVENT_IO_PAGE_FAULT,
};
#[test]
fn test_completion_wait_command() {
let store_addr = 0x1234_5000_0000_1000;
let store_data = 0xabcdefff;
let cmd = CommandEntry::completion_wait(store_addr, store_data);
let words = cmd.words();
assert_eq!(cmd.opcode(), CMD_COMPLETION_WAIT);
assert!(cmd.completion_wait_store());
assert!(!cmd.completion_wait_interrupt());
assert_eq!(words[1], store_addr as u32);
assert_eq!(words[2], (store_addr >> 32) as u32);
assert_eq!(words[3], store_data);
assert_eq!(cmd.completion_wait_store_address(), store_addr);
assert_eq!(cmd.completion_wait_store_data(), store_data);
}
#[test]
fn test_invalidate_devtab_command() {
let device_id = 0x1234;
let cmd = CommandEntry::invalidate_devtab_entry(device_id);
let words = cmd.words();
assert_eq!(cmd.opcode(), CMD_INVALIDATE_DEVTAB_ENTRY);
assert_eq!(cmd.invalidate_device_id(), device_id);
assert_eq!(words[1], device_id as u32);
assert_eq!(words[2], 0);
assert_eq!(words[3], 0);
}
#[test]
fn test_invalidate_pages_command() {
let device_id = 0x4321;
let addr = 0xfeed_cafe_b000;
let cmd = CommandEntry::invalidate_pages(device_id, addr);
let words = cmd.words();
assert_eq!(cmd.opcode(), CMD_INVALIDATE_IOMMU_PAGES);
assert_eq!(cmd.invalidate_device_id(), device_id);
assert!(!cmd.invalidate_pages_pde());
assert!(!cmd.invalidate_pages_size());
assert_eq!(words[1], device_id as u32);
assert_eq!(cmd.invalidate_pages_address(), addr);
}
#[test]
fn test_invalidate_all_command() {
let cmd = CommandEntry::invalidate_all();
let words = cmd.words();
assert_eq!(cmd.opcode(), CMD_INVALIDATE_IOMMU_ALL);
assert_eq!(words[1], 0);
assert_eq!(words[2], 0);
assert_eq!(words[3], 0);
}
#[test]
fn test_event_entry_parsing() {
let device_id = 0x2468;
let address = 0x0123_4567_89ab_cdef;
let entry = EventLogEntry::from_words([
EVENT_IO_PAGE_FAULT | ((0x5a as u32) << 16),
device_id as u32,
address as u32,
(address >> 32) as u32,
]);
assert_eq!(entry.event_type(), EVENT_IO_PAGE_FAULT);
assert_eq!(entry.event_flags(), 0x5a);
assert_eq!(entry.device_id(), device_id);
assert_eq!(entry.virtual_address(), address);
}
}
@@ -0,0 +1,337 @@
use core::mem::size_of;
use core::slice;
use redox_driver_sys::dma::DmaBuffer;
/// AMD-Vi Device Table: 65536 entries × 32 bytes = 2 MiB.
pub const DEVICE_TABLE_ENTRIES: usize = 65_536;
pub const DTE_SIZE: usize = 32;
const DEVICE_TABLE_BYTES: usize = DEVICE_TABLE_ENTRIES * DTE_SIZE;
const DTE_VALID_BIT: u64 = 1 << 0;
const DTE_TRANSLATION_VALID_BIT: u64 = 1 << 1;
const DTE_WRITE_PERMISSION_BIT: u64 = 1 << 4;
const DTE_READ_PERMISSION_BIT: u64 = 1 << 5;
const DTE_SNOOP_ENABLE_BIT: u64 = 1 << 8;
const DTE_MODE_SHIFT: u32 = 9;
const DTE_MODE_MASK: u64 = 0x7 << DTE_MODE_SHIFT;
const DTE_PAGE_TABLE_ROOT_MASK: u64 = ((1u64 << 40) - 1) << 12;
const DTE_INTERRUPT_REMAP_BIT: u64 = 1 << 61;
const DTE_INTERRUPT_WRITE_BIT: u64 = 1 << 62;
const DTE_INT_TABLE_LEN_MASK: u64 = 0xF;
const DTE_INT_CONTROL_SHIFT: u32 = 4;
const DTE_INT_CONTROL_MASK: u64 = 0x3 << DTE_INT_CONTROL_SHIFT;
const DTE_INT_REMAP_TABLE_PTR_SHIFT: u32 = 6;
const DTE_INT_REMAP_TABLE_PTR_MASK: u64 = ((1u64 << 46) - 1) << DTE_INT_REMAP_TABLE_PTR_SHIFT;
/// Device Table Entry (DTE) — 256 bits (32 bytes = 4 × u64).
///
/// Layout follows AMD IOMMU Spec 48882 Rev 3.10, Section 3.2.2.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[repr(C)]
pub struct DeviceTableEntry {
data: [u64; 4],
}
impl DeviceTableEntry {
pub const fn new() -> Self {
Self { data: [0; 4] }
}
pub fn valid(&self) -> bool {
self.data[0] & DTE_VALID_BIT != 0
}
pub fn set_valid(&mut self, value: bool) {
if value {
self.data[0] |= DTE_VALID_BIT;
} else {
self.data[0] &= !DTE_VALID_BIT;
}
}
pub fn translation_valid(&self) -> bool {
self.data[0] & DTE_TRANSLATION_VALID_BIT != 0
}
pub fn set_translation_valid(&mut self, value: bool) {
if value {
self.data[0] |= DTE_TRANSLATION_VALID_BIT;
} else {
self.data[0] &= !DTE_TRANSLATION_VALID_BIT;
}
}
pub fn write_permission(&self) -> bool {
self.data[0] & DTE_WRITE_PERMISSION_BIT != 0
}
pub fn set_write_permission(&mut self, value: bool) {
if value {
self.data[0] |= DTE_WRITE_PERMISSION_BIT;
} else {
self.data[0] &= !DTE_WRITE_PERMISSION_BIT;
}
}
pub fn read_permission(&self) -> bool {
self.data[0] & DTE_READ_PERMISSION_BIT != 0
}
pub fn set_read_permission(&mut self, value: bool) {
if value {
self.data[0] |= DTE_READ_PERMISSION_BIT;
} else {
self.data[0] &= !DTE_READ_PERMISSION_BIT;
}
}
pub fn snoop_enable(&self) -> bool {
self.data[0] & DTE_SNOOP_ENABLE_BIT != 0
}
pub fn set_snoop_enable(&mut self, value: bool) {
if value {
self.data[0] |= DTE_SNOOP_ENABLE_BIT;
} else {
self.data[0] &= !DTE_SNOOP_ENABLE_BIT;
}
}
pub fn mode(&self) -> u8 {
((self.data[0] & DTE_MODE_MASK) >> DTE_MODE_SHIFT) as u8
}
pub fn set_mode(&mut self, mode: u8) {
self.data[0] = (self.data[0] & !DTE_MODE_MASK) | (((mode as u64) & 0x7) << DTE_MODE_SHIFT);
}
/// Returns the full, 4KiB-aligned physical address stored in bits 12:51.
pub fn page_table_root(&self) -> u64 {
self.data[0] & DTE_PAGE_TABLE_ROOT_MASK
}
pub fn set_page_table_root(&mut self, phys: u64) {
self.data[0] =
(self.data[0] & !DTE_PAGE_TABLE_ROOT_MASK) | (phys & DTE_PAGE_TABLE_ROOT_MASK);
}
/// Interrupt remapping enable (bit 61 of word 0 in the AMD-Vi DTE).
pub fn interrupt_remap(&self) -> bool {
self.data[0] & DTE_INTERRUPT_REMAP_BIT != 0
}
pub fn set_interrupt_remap(&mut self, value: bool) {
if value {
self.data[0] |= DTE_INTERRUPT_REMAP_BIT;
} else {
self.data[0] &= !DTE_INTERRUPT_REMAP_BIT;
}
}
/// Interrupt write permission (bit 62 of word 0 in the AMD-Vi DTE).
pub fn interrupt_write(&self) -> bool {
self.data[0] & DTE_INTERRUPT_WRITE_BIT != 0
}
pub fn set_interrupt_write(&mut self, value: bool) {
if value {
self.data[0] |= DTE_INTERRUPT_WRITE_BIT;
} else {
self.data[0] &= !DTE_INTERRUPT_WRITE_BIT;
}
}
pub fn int_table_len(&self) -> u8 {
(self.data[1] & DTE_INT_TABLE_LEN_MASK) as u8
}
pub fn set_int_table_len(&mut self, len: u8) {
self.data[1] =
(self.data[1] & !DTE_INT_TABLE_LEN_MASK) | ((len as u64) & DTE_INT_TABLE_LEN_MASK);
}
pub fn interrupt_control(&self) -> u8 {
((self.data[1] & DTE_INT_CONTROL_MASK) >> DTE_INT_CONTROL_SHIFT) as u8
}
pub fn set_interrupt_control(&mut self, control: u8) {
self.data[1] = (self.data[1] & !DTE_INT_CONTROL_MASK)
| (((control as u64) & 0x3) << DTE_INT_CONTROL_SHIFT);
}
/// Returns the interrupt remap table pointer bits stored in word 1.
pub fn int_remap_table_ptr(&self) -> u64 {
self.data[1] & DTE_INT_REMAP_TABLE_PTR_MASK
}
pub fn set_int_remap_table_ptr(&mut self, phys: u64) {
self.data[1] =
(self.data[1] & !DTE_INT_REMAP_TABLE_PTR_MASK) | (phys & DTE_INT_REMAP_TABLE_PTR_MASK);
}
}
const _: () = assert!(size_of::<DeviceTableEntry>() == DTE_SIZE);
/// Device Table — manages the 65536-entry device table.
pub struct DeviceTable {
buffer: DmaBuffer,
}
impl DeviceTable {
/// Allocate a new device table (65536 × 32 bytes = 2 MiB).
pub fn new() -> Result<Self, &'static str> {
let buffer = DmaBuffer::allocate(DEVICE_TABLE_BYTES, 4096)
.map_err(|_| "failed to allocate IOMMU device table")?;
if buffer.len() < DEVICE_TABLE_BYTES {
return Err("IOMMU device table allocation was smaller than requested");
}
if !buffer.is_physically_contiguous() {
return Err("IOMMU device table allocation is not physically contiguous");
}
Ok(Self { buffer })
}
pub fn get_entry(&self, device_id: u16) -> DeviceTableEntry {
self.entries()[device_id as usize]
}
pub fn set_entry(&mut self, device_id: u16, entry: &DeviceTableEntry) {
self.entries_mut()[device_id as usize] = *entry;
}
pub fn clear_entry(&mut self, device_id: u16) {
self.entries_mut()[device_id as usize] = DeviceTableEntry::new();
}
pub fn physical_address(&self) -> usize {
self.buffer.physical_address()
}
/// Convert PCI BDF to device ID.
/// Bus: bits 8:15, Device: bits 3:7, Function: bits 0:2.
pub fn bdf_to_device_id(bus: u8, device: u8, function: u8) -> u16 {
((bus as u16) << 8) | ((device as u16) << 3) | (function as u16)
}
fn entries(&self) -> &[DeviceTableEntry] {
unsafe {
slice::from_raw_parts(
self.buffer.as_ptr() as *const DeviceTableEntry,
DEVICE_TABLE_ENTRIES,
)
}
}
fn entries_mut(&mut self) -> &mut [DeviceTableEntry] {
unsafe {
slice::from_raw_parts_mut(
self.buffer.as_mut_ptr() as *mut DeviceTableEntry,
DEVICE_TABLE_ENTRIES,
)
}
}
}
#[cfg(test)]
mod tests {
use super::{DeviceTable, DeviceTableEntry, DTE_PAGE_TABLE_ROOT_MASK};
fn try_allocate_table() -> Option<DeviceTable> {
match DeviceTable::new() {
Ok(table) => Some(table),
Err(err) => {
eprintln!("skipping DeviceTable allocation-dependent test: {err}");
None
}
}
}
#[test]
fn test_dte_valid_bit() {
let mut entry = DeviceTableEntry::new();
assert!(!entry.valid());
entry.set_valid(true);
assert!(entry.valid());
entry.set_valid(false);
assert!(!entry.valid());
}
#[test]
fn test_dte_translation_valid() {
let mut entry = DeviceTableEntry::new();
assert!(!entry.translation_valid());
entry.set_translation_valid(true);
assert!(entry.translation_valid());
entry.set_translation_valid(false);
assert!(!entry.translation_valid());
}
#[test]
fn test_dte_mode_4level() {
let mut entry = DeviceTableEntry::new();
entry.set_mode(4);
assert_eq!(entry.mode(), 4);
}
#[test]
fn test_dte_permissions_and_interrupt_control() {
let mut entry = DeviceTableEntry::new();
entry.set_read_permission(true);
entry.set_write_permission(true);
entry.set_snoop_enable(true);
entry.set_interrupt_control(0x02);
assert!(entry.read_permission());
assert!(entry.write_permission());
assert!(entry.snoop_enable());
assert_eq!(entry.interrupt_control(), 0x02);
}
#[test]
fn test_dte_page_table_root() {
let mut entry = DeviceTableEntry::new();
entry.set_page_table_root(0x1234_5000);
assert_eq!(entry.page_table_root(), 0x1234_5000);
assert_eq!(entry.data[0] & DTE_PAGE_TABLE_ROOT_MASK, 0x1234_5000);
}
#[test]
fn test_bdf_encoding() {
assert_eq!(DeviceTable::bdf_to_device_id(0x12, 0x05, 0x03), 0x122b);
assert_eq!(DeviceTable::bdf_to_device_id(0xff, 0x1f, 0x07), 0xffff);
}
#[test]
fn test_clear_entry() -> Result<(), &'static str> {
let Some(mut table) = try_allocate_table() else {
return Ok(());
};
let device_id = DeviceTable::bdf_to_device_id(0x02, 0x00, 0x00);
let mut entry = DeviceTableEntry::new();
entry.set_valid(true);
entry.set_translation_valid(true);
entry.set_mode(4);
entry.set_page_table_root(0x1234_5000);
table.set_entry(device_id, &entry);
assert_eq!(table.get_entry(device_id), entry);
table.clear_entry(device_id);
assert_eq!(table.get_entry(device_id), DeviceTableEntry::new());
Ok(())
}
}
@@ -0,0 +1,215 @@
use core::mem::size_of;
use core::slice;
use redox_driver_sys::dma::DmaBuffer;
pub const IRTE_SIZE: usize = 16;
pub const MAX_INTERRUPT_REMAP_ENTRIES: usize = 4096;
const DMA_ALIGNMENT: usize = 4096;
const IRTE_REMAP_ENABLE: u64 = 1 << 0;
const IRTE_SUPPRESS_IOPF: u64 = 1 << 1;
const IRTE_INT_TYPE_SHIFT: u64 = 2;
const IRTE_INT_TYPE_MASK: u64 = 0x7 << IRTE_INT_TYPE_SHIFT;
const IRTE_DEST_MODE: u64 = 1 << 8;
const IRTE_DEST_LOW_SHIFT: u64 = 16;
const IRTE_DEST_LOW_MASK: u64 = 0xFFFF << IRTE_DEST_LOW_SHIFT;
const IRTE_VECTOR_SHIFT: u64 = 32;
const IRTE_VECTOR_MASK: u64 = 0xFF << IRTE_VECTOR_SHIFT;
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[repr(C)]
pub struct AmdIrte {
data: [u64; 2],
}
impl AmdIrte {
pub const fn new() -> Self {
Self { data: [0; 2] }
}
pub fn remap_enabled(&self) -> bool {
self.data[0] & IRTE_REMAP_ENABLE != 0
}
pub fn set_remap_enabled(&mut self, value: bool) {
if value {
self.data[0] |= IRTE_REMAP_ENABLE;
} else {
self.data[0] &= !IRTE_REMAP_ENABLE;
}
}
pub fn suppress_io_page_faults(&self) -> bool {
self.data[0] & IRTE_SUPPRESS_IOPF != 0
}
pub fn set_suppress_io_page_faults(&mut self, value: bool) {
if value {
self.data[0] |= IRTE_SUPPRESS_IOPF;
} else {
self.data[0] &= !IRTE_SUPPRESS_IOPF;
}
}
pub fn interrupt_type(&self) -> u8 {
((self.data[0] & IRTE_INT_TYPE_MASK) >> IRTE_INT_TYPE_SHIFT) as u8
}
pub fn set_interrupt_type(&mut self, value: u8) {
self.data[0] = (self.data[0] & !IRTE_INT_TYPE_MASK)
| ((u64::from(value) & 0x7) << IRTE_INT_TYPE_SHIFT);
}
pub fn destination_mode(&self) -> bool {
self.data[0] & IRTE_DEST_MODE != 0
}
pub fn set_destination_mode(&mut self, logical: bool) {
if logical {
self.data[0] |= IRTE_DEST_MODE;
} else {
self.data[0] &= !IRTE_DEST_MODE;
}
}
pub fn destination(&self) -> u32 {
(((self.data[1] & 0xFFFF_FFFF) as u32) << 16)
| (((self.data[0] & IRTE_DEST_LOW_MASK) >> IRTE_DEST_LOW_SHIFT) as u32)
}
pub fn set_destination(&mut self, apic_id: u32) {
self.data[0] = (self.data[0] & !IRTE_DEST_LOW_MASK)
| ((u64::from(apic_id & 0xFFFF)) << IRTE_DEST_LOW_SHIFT);
self.data[1] = (self.data[1] & !0xFFFF_FFFF) | u64::from(apic_id >> 16);
}
pub fn vector(&self) -> u8 {
((self.data[0] & IRTE_VECTOR_MASK) >> IRTE_VECTOR_SHIFT) as u8
}
pub fn set_vector(&mut self, vector: u8) {
self.data[0] =
(self.data[0] & !IRTE_VECTOR_MASK) | (u64::from(vector) << IRTE_VECTOR_SHIFT);
}
}
const _: () = assert!(size_of::<AmdIrte>() == IRTE_SIZE);
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct IrteConfig {
pub vector: u8,
pub destination: u32,
pub logical_destination: bool,
pub interrupt_type: u8,
pub suppress_io_page_faults: bool,
}
pub struct InterruptRemapTable {
buffer: DmaBuffer,
capacity: usize,
}
impl InterruptRemapTable {
pub fn new(entry_count: usize) -> Result<Self, &'static str> {
if !(2..=MAX_INTERRUPT_REMAP_ENTRIES).contains(&entry_count) {
return Err("interrupt remap table entry count must be between 2 and 4096");
}
if !entry_count.is_power_of_two() {
return Err("interrupt remap table entry count must be a power of two");
}
let byte_len = entry_count
.checked_mul(IRTE_SIZE)
.ok_or("interrupt remap table size overflow")?;
let buffer = DmaBuffer::allocate(byte_len, DMA_ALIGNMENT)
.map_err(|_| "failed to allocate interrupt remap table")?;
if buffer.len() < byte_len {
return Err("interrupt remap table allocation was smaller than requested");
}
if !buffer.is_physically_contiguous() {
return Err("interrupt remap table allocation is not physically contiguous");
}
Ok(Self {
buffer,
capacity: entry_count,
})
}
pub fn capacity(&self) -> usize {
self.capacity
}
pub fn len_encoding(&self) -> u8 {
self.capacity.ilog2() as u8 - 1
}
pub fn physical_address(&self) -> usize {
self.buffer.physical_address()
}
pub fn entry(&self, index: usize) -> AmdIrte {
assert!(
index < self.capacity,
"interrupt remap table index out of bounds"
);
self.entries()[index]
}
pub fn set_entry(&mut self, index: usize, entry: AmdIrte) {
assert!(
index < self.capacity,
"interrupt remap table index out of bounds"
);
self.entries_mut()[index] = entry;
}
pub fn clear_entry(&mut self, index: usize) {
self.set_entry(index, AmdIrte::new());
}
pub fn configure(&mut self, index: usize, config: IrteConfig) {
let mut entry = AmdIrte::new();
entry.set_remap_enabled(true);
entry.set_suppress_io_page_faults(config.suppress_io_page_faults);
entry.set_interrupt_type(config.interrupt_type);
entry.set_destination_mode(config.logical_destination);
entry.set_destination(config.destination);
entry.set_vector(config.vector);
self.set_entry(index, entry);
}
fn entries(&self) -> &[AmdIrte] {
unsafe { slice::from_raw_parts(self.buffer.as_ptr().cast::<AmdIrte>(), self.capacity) }
}
fn entries_mut(&mut self) -> &mut [AmdIrte] {
unsafe {
slice::from_raw_parts_mut(self.buffer.as_mut_ptr().cast::<AmdIrte>(), self.capacity)
}
}
}
#[cfg(test)]
mod tests {
use super::AmdIrte;
#[test]
fn irte_accessors_round_trip() {
let mut irte = AmdIrte::new();
irte.set_remap_enabled(true);
irte.set_suppress_io_page_faults(true);
irte.set_interrupt_type(3);
irte.set_destination_mode(true);
irte.set_destination(0x1234_5678);
irte.set_vector(0x52);
assert!(irte.remap_enabled());
assert!(irte.suppress_io_page_faults());
assert_eq!(irte.interrupt_type(), 3);
assert!(irte.destination_mode());
assert_eq!(irte.destination(), 0x1234_5678);
assert_eq!(irte.vector(), 0x52);
}
}
@@ -0,0 +1,868 @@
//! AMD-Vi-backed scheme:iommu implementation.
pub mod acpi;
pub mod amd_vi;
pub mod command_buffer;
pub mod device_table;
pub mod interrupt;
pub mod mmio;
pub mod page_table;
use std::collections::BTreeMap;
use acpi::{parse_bdf, Bdf};
use amd_vi::AmdViUnit;
use page_table::{DomainPageTables, MappingFlags};
use redox_scheme::SchemeBlockMut;
use syscall::data::Stat;
use syscall::error::{Error, Result, EBADF, EINVAL, EIO, EISDIR, ENODEV, ENOENT};
use syscall::flag::{EventFlags, MODE_DIR, MODE_FILE, SEEK_CUR, SEEK_END, SEEK_SET};
pub const IOMMU_PROTOCOL_VERSION: u16 = 1;
pub mod opcode {
pub const QUERY: u16 = 0x0000;
pub const CREATE_DOMAIN: u16 = 0x0001;
pub const DESTROY_DOMAIN: u16 = 0x0002;
pub const MAP: u16 = 0x0010;
pub const UNMAP: u16 = 0x0011;
pub const ASSIGN_DEVICE: u16 = 0x0020;
pub const UNASSIGN_DEVICE: u16 = 0x0021;
pub const DRAIN_EVENTS: u16 = 0x0030;
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct IommuRequest {
pub opcode: u16,
pub version: u16,
pub arg0: u32,
pub arg1: u64,
pub arg2: u64,
pub arg3: u64,
}
impl IommuRequest {
pub const SIZE: usize = 32;
pub const fn new(opcode: u16, arg0: u32, arg1: u64, arg2: u64, arg3: u64) -> Self {
Self {
opcode,
version: IOMMU_PROTOCOL_VERSION,
arg0,
arg1,
arg2,
arg3,
}
}
pub fn from_bytes(bytes: &[u8]) -> Option<Self> {
let header = bytes.get(..Self::SIZE)?;
Some(Self {
opcode: u16::from_le_bytes(header.get(0..2)?.try_into().ok()?),
version: u16::from_le_bytes(header.get(2..4)?.try_into().ok()?),
arg0: u32::from_le_bytes(header.get(4..8)?.try_into().ok()?),
arg1: u64::from_le_bytes(header.get(8..16)?.try_into().ok()?),
arg2: u64::from_le_bytes(header.get(16..24)?.try_into().ok()?),
arg3: u64::from_le_bytes(header.get(24..32)?.try_into().ok()?),
})
}
pub fn to_bytes(self) -> [u8; Self::SIZE] {
let mut bytes = [0u8; Self::SIZE];
bytes[0..2].copy_from_slice(&self.opcode.to_le_bytes());
bytes[2..4].copy_from_slice(&self.version.to_le_bytes());
bytes[4..8].copy_from_slice(&self.arg0.to_le_bytes());
bytes[8..16].copy_from_slice(&self.arg1.to_le_bytes());
bytes[16..24].copy_from_slice(&self.arg2.to_le_bytes());
bytes[24..32].copy_from_slice(&self.arg3.to_le_bytes());
bytes
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct IommuResponse {
pub status: i32,
pub kind: u16,
pub version: u16,
pub arg0: u32,
pub arg1: u64,
pub arg2: u64,
pub arg3: u64,
}
impl IommuResponse {
pub const SIZE: usize = 36;
pub const fn success(kind: u16, arg0: u32, arg1: u64, arg2: u64, arg3: u64) -> Self {
Self {
status: 0,
kind,
version: IOMMU_PROTOCOL_VERSION,
arg0,
arg1,
arg2,
arg3,
}
}
pub const fn error(kind: u16, errno: i32) -> Self {
Self {
status: -errno,
kind,
version: IOMMU_PROTOCOL_VERSION,
arg0: 0,
arg1: 0,
arg2: 0,
arg3: 0,
}
}
pub fn from_bytes(bytes: &[u8]) -> Option<Self> {
let header = bytes.get(..Self::SIZE)?;
Some(Self {
status: i32::from_le_bytes(header.get(0..4)?.try_into().ok()?),
kind: u16::from_le_bytes(header.get(4..6)?.try_into().ok()?),
version: u16::from_le_bytes(header.get(6..8)?.try_into().ok()?),
arg0: u32::from_le_bytes(header.get(8..12)?.try_into().ok()?),
arg1: u64::from_le_bytes(header.get(12..20)?.try_into().ok()?),
arg2: u64::from_le_bytes(header.get(20..28)?.try_into().ok()?),
arg3: u64::from_le_bytes(header.get(28..36)?.try_into().ok()?),
})
}
pub fn to_bytes(self) -> [u8; Self::SIZE] {
let mut bytes = [0u8; Self::SIZE];
bytes[0..4].copy_from_slice(&self.status.to_le_bytes());
bytes[4..6].copy_from_slice(&self.kind.to_le_bytes());
bytes[6..8].copy_from_slice(&self.version.to_le_bytes());
bytes[8..12].copy_from_slice(&self.arg0.to_le_bytes());
bytes[12..20].copy_from_slice(&self.arg1.to_le_bytes());
bytes[20..28].copy_from_slice(&self.arg2.to_le_bytes());
bytes[28..36].copy_from_slice(&self.arg3.to_le_bytes());
bytes
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum HandleKind {
Root,
Control,
Domain(u16),
Device(Bdf),
}
#[derive(Clone, Debug)]
struct Handle {
kind: HandleKind,
offset: usize,
response: Vec<u8>,
}
pub struct IommuScheme {
units: Vec<AmdViUnit>,
next_id: usize,
handles: BTreeMap<usize, Handle>,
domains: BTreeMap<u16, DomainPageTables>,
device_assignments: BTreeMap<Bdf, (u16, usize)>,
}
impl IommuScheme {
pub fn new() -> Self {
Self::with_units(Vec::new())
}
pub fn with_units(units: Vec<AmdViUnit>) -> Self {
Self {
units,
next_id: 0,
handles: BTreeMap::new(),
domains: BTreeMap::new(),
device_assignments: BTreeMap::new(),
}
}
pub fn unit_count(&self) -> usize {
self.units.len()
}
fn insert_handle(&mut self, kind: HandleKind) -> usize {
let id = self.next_id;
self.next_id = self.next_id.saturating_add(1);
self.handles.insert(
id,
Handle {
kind,
offset: 0,
response: Vec::new(),
},
);
id
}
fn ensure_domain_exists(&mut self, domain_id: u16) -> core::result::Result<(), i32> {
if self.domains.contains_key(&domain_id) {
return Ok(());
}
let domain = DomainPageTables::new(domain_id).map_err(|_| EIO as i32)?;
self.domains.insert(domain_id, domain);
Ok(())
}
fn next_domain_id(&self) -> Option<u16> {
(1..u16::MAX).find(|domain_id| !self.domains.contains_key(domain_id))
}
fn root_listing(&self) -> Vec<u8> {
let mut listing = String::from("control\n");
for (index, unit) in self.units.iter().enumerate() {
let state = if unit.initialized() {
"initialized"
} else {
"detected"
};
listing.push_str(&format!(
"unit/{index} {} mmio={:#x} state={}\n",
unit.info().iommu_bdf,
unit.info().mmio_base,
state
));
}
for domain_id in self.domains.keys() {
listing.push_str(&format!("domain/{domain_id}\n"));
}
for bdf in self.device_assignments.keys() {
listing.push_str(&format!("device/{bdf}\n"));
}
listing.into_bytes()
}
fn parse_domain_id(path: &str) -> Option<u16> {
let trimmed = path.trim();
trimmed
.strip_prefix("0x")
.and_then(|hex| u16::from_str_radix(hex, 16).ok())
.or_else(|| trimmed.parse::<u16>().ok())
.or_else(|| u16::from_str_radix(trimmed, 16).ok())
}
fn map_flags(bits: u32) -> MappingFlags {
let flags = MappingFlags {
readable: bits & 0x1 != 0,
writable: bits & 0x2 != 0,
executable: bits & 0x4 != 0,
force_coherent: bits & 0x8 != 0,
user: bits & 0x10 != 0,
};
if !flags.readable
&& !flags.writable
&& !flags.executable
&& !flags.force_coherent
&& !flags.user
{
MappingFlags::read_write()
} else {
flags
}
}
fn choose_unit_for_device(
&self,
bdf: Bdf,
requested_unit: Option<usize>,
) -> core::result::Result<usize, i32> {
if let Some(index) = requested_unit {
let Some(unit) = self.units.get(index) else {
return Err(ENODEV as i32);
};
if unit.handles_device(bdf) {
return Ok(index);
}
return Err(ENODEV as i32);
}
self.units
.iter()
.position(|unit| unit.handles_device(bdf))
.ok_or(ENODEV as i32)
}
fn dispatch_request(&mut self, kind: HandleKind, request: IommuRequest) -> IommuResponse {
if request.version != IOMMU_PROTOCOL_VERSION {
return IommuResponse::error(request.opcode, EINVAL as i32);
}
match kind {
HandleKind::Root => IommuResponse::error(request.opcode, EISDIR as i32),
HandleKind::Control => self.handle_control_request(request),
HandleKind::Domain(domain_id) => self.handle_domain_request(domain_id, request),
HandleKind::Device(bdf) => self.handle_device_request(bdf, request),
}
}
fn handle_control_request(&mut self, request: IommuRequest) -> IommuResponse {
match request.opcode {
opcode::QUERY => IommuResponse::success(
request.opcode,
self.units.len() as u32,
self.domains.len() as u64,
self.device_assignments.len() as u64,
self.units.iter().filter(|unit| unit.initialized()).count() as u64,
),
opcode::CREATE_DOMAIN => {
let domain_id = if request.arg0 == 0 {
match self.next_domain_id() {
Some(domain_id) => domain_id,
None => return IommuResponse::error(request.opcode, EIO as i32),
}
} else {
request.arg0 as u16
};
if let Err(errno) = self.ensure_domain_exists(domain_id) {
return IommuResponse::error(request.opcode, errno);
}
let Some(domain) = self.domains.get(&domain_id) else {
return IommuResponse::error(request.opcode, EIO as i32);
};
IommuResponse::success(
request.opcode,
domain_id as u32,
domain.root_address(),
domain.levels() as u64,
domain.mapping_count() as u64,
)
}
opcode::DESTROY_DOMAIN => {
let domain_id = request.arg0 as u16;
if self
.device_assignments
.values()
.any(|(assigned_domain, _)| *assigned_domain == domain_id)
{
return IommuResponse::error(request.opcode, EINVAL as i32);
}
if self.domains.remove(&domain_id).is_none() {
return IommuResponse::error(request.opcode, ENOENT as i32);
}
IommuResponse::success(request.opcode, domain_id as u32, 0, 0, 0)
}
opcode::DRAIN_EVENTS => {
let requested_index = if request.arg0 == u32::MAX {
None
} else {
Some(request.arg0 as usize)
};
let mut count = 0u32;
let mut first_code = 0u64;
let mut first_device = 0u64;
let mut first_address = 0u64;
for (index, unit) in self.units.iter_mut().enumerate() {
if requested_index.is_some() && requested_index != Some(index) {
continue;
}
match unit.drain_events() {
Ok(events) => {
if let Some(event) = events.first() {
if count == 0 {
first_code = u64::from(event.event_code);
first_device = u64::from(event.device_id.raw());
first_address = event.address;
}
count = count.saturating_add(events.len() as u32);
}
}
Err(_) => return IommuResponse::error(request.opcode, EIO as i32),
}
}
IommuResponse::success(
request.opcode,
count,
first_code,
first_device,
first_address,
)
}
_ => IommuResponse::error(request.opcode, EINVAL as i32),
}
}
fn handle_domain_request(&mut self, domain_id: u16, request: IommuRequest) -> IommuResponse {
if let Err(errno) = self.ensure_domain_exists(domain_id) {
return IommuResponse::error(request.opcode, errno);
}
match request.opcode {
opcode::QUERY => {
let Some(domain) = self.domains.get(&domain_id) else {
return IommuResponse::error(request.opcode, ENOENT as i32);
};
IommuResponse::success(
request.opcode,
domain_id as u32,
domain.root_address(),
domain.levels() as u64,
domain.mapping_count() as u64,
)
}
opcode::MAP => {
let flags = Self::map_flags(request.arg0);
let preferred_iova = if request.arg3 == 0 {
None
} else {
Some(request.arg3)
};
let Some(domain) = self.domains.get_mut(&domain_id) else {
return IommuResponse::error(request.opcode, ENOENT as i32);
};
match domain.map_range(request.arg1, request.arg2, flags, preferred_iova) {
Ok(iova) => IommuResponse::success(
request.opcode,
domain_id as u32,
iova,
request.arg2,
0,
),
Err(_) => IommuResponse::error(request.opcode, EIO as i32),
}
}
opcode::UNMAP => {
let Some(domain) = self.domains.get_mut(&domain_id) else {
return IommuResponse::error(request.opcode, ENOENT as i32);
};
match domain.unmap_range(request.arg1) {
Ok(size) => IommuResponse::success(
request.opcode,
domain_id as u32,
request.arg1,
size,
0,
),
Err(_) => IommuResponse::error(request.opcode, ENOENT as i32),
}
}
_ => IommuResponse::error(request.opcode, EINVAL as i32),
}
}
fn handle_device_request(&mut self, bdf: Bdf, request: IommuRequest) -> IommuResponse {
match request.opcode {
opcode::QUERY => {
let (domain_id, unit_index) = self
.device_assignments
.get(&bdf)
.copied()
.unwrap_or((0, usize::MAX));
IommuResponse::success(
request.opcode,
domain_id as u32,
if unit_index == usize::MAX {
u64::MAX
} else {
unit_index as u64
},
u64::from(bdf.raw()),
0,
)
}
opcode::ASSIGN_DEVICE => {
let domain_id = request.arg0 as u16;
if let Err(errno) = self.ensure_domain_exists(domain_id) {
return IommuResponse::error(request.opcode, errno);
}
let requested_unit = if request.arg1 == u64::MAX {
None
} else {
Some(request.arg1 as usize)
};
let unit_index = match self.choose_unit_for_device(bdf, requested_unit) {
Ok(index) => index,
Err(errno) => return IommuResponse::error(request.opcode, errno),
};
let Some(domain) = self.domains.get(&domain_id) else {
return IommuResponse::error(request.opcode, ENOENT as i32);
};
let Some(unit) = self.units.get_mut(unit_index) else {
return IommuResponse::error(request.opcode, ENODEV as i32);
};
match unit.assign_device(bdf, domain) {
Ok(()) => {
self.device_assignments.insert(bdf, (domain_id, unit_index));
IommuResponse::success(
request.opcode,
domain_id as u32,
unit_index as u64,
u64::from(bdf.raw()),
0,
)
}
Err(_) => IommuResponse::error(request.opcode, EIO as i32),
}
}
opcode::UNASSIGN_DEVICE => {
if self.device_assignments.remove(&bdf).is_none() {
return IommuResponse::error(request.opcode, ENOENT as i32);
}
IommuResponse::success(request.opcode, 0, u64::from(bdf.raw()), 0, 0)
}
_ => IommuResponse::error(request.opcode, EINVAL as i32),
}
}
}
impl Default for IommuScheme {
fn default() -> Self {
Self::new()
}
}
impl SchemeBlockMut for IommuScheme {
fn open(&mut self, path: &str, _flags: usize, _uid: u32, _gid: u32) -> Result<Option<usize>> {
let cleaned = path.trim_matches('/');
let kind = if cleaned.is_empty() {
HandleKind::Root
} else if cleaned == "control" {
HandleKind::Control
} else if let Some(rest) = cleaned.strip_prefix("domain/") {
let domain_id = Self::parse_domain_id(rest).ok_or(Error::new(ENOENT))?;
self.ensure_domain_exists(domain_id).map_err(Error::new)?;
HandleKind::Domain(domain_id)
} else if let Some(rest) = cleaned.strip_prefix("device/") {
let bdf = parse_bdf(rest).ok_or(Error::new(ENOENT))?;
HandleKind::Device(bdf)
} else {
return Err(Error::new(ENOENT));
};
Ok(Some(self.insert_handle(kind)))
}
fn read(&mut self, id: usize, buf: &mut [u8]) -> Result<Option<usize>> {
let (kind, offset, response) = {
let handle = self.handles.get(&id).ok_or(Error::new(EBADF))?;
(handle.kind, handle.offset, handle.response.clone())
};
let content = match kind {
HandleKind::Root => self.root_listing(),
_ => response,
};
if offset >= content.len() {
return Ok(Some(0));
}
let to_copy = (content.len() - offset).min(buf.len());
buf[..to_copy].copy_from_slice(&content[offset..offset + to_copy]);
let handle = self.handles.get_mut(&id).ok_or(Error::new(EBADF))?;
handle.offset = offset + to_copy;
Ok(Some(to_copy))
}
fn write(&mut self, id: usize, buf: &[u8]) -> Result<Option<usize>> {
let kind = self
.handles
.get(&id)
.map(|handle| handle.kind)
.ok_or(Error::new(EBADF))?;
if kind == HandleKind::Root {
return Err(Error::new(EISDIR));
}
let response = match IommuRequest::from_bytes(buf) {
Some(request) => self.dispatch_request(kind, request),
None => IommuResponse::error(0, EINVAL as i32),
};
let handle = self.handles.get_mut(&id).ok_or(Error::new(EBADF))?;
handle.response = response.to_bytes().to_vec();
handle.offset = 0;
Ok(Some(buf.len()))
}
fn seek(&mut self, id: usize, pos: isize, whence: usize) -> Result<Option<isize>> {
let (kind, current_offset, response_len) = {
let handle = self.handles.get(&id).ok_or(Error::new(EBADF))?;
(handle.kind, handle.offset, handle.response.len())
};
let content_len = match kind {
HandleKind::Root => self.root_listing().len(),
_ => response_len,
};
let new_offset = match whence {
SEEK_SET => pos,
SEEK_CUR => current_offset as isize + pos,
SEEK_END => content_len as isize + pos,
_ => return Err(Error::new(EINVAL)),
};
if new_offset < 0 {
return Err(Error::new(EINVAL));
}
let handle = self.handles.get_mut(&id).ok_or(Error::new(EBADF))?;
handle.offset = new_offset as usize;
Ok(Some(new_offset))
}
fn fpath(&mut self, id: usize, buf: &mut [u8]) -> Result<Option<usize>> {
let kind = self
.handles
.get(&id)
.map(|handle| handle.kind)
.ok_or(Error::new(EBADF))?;
let path = match kind {
HandleKind::Root => "iommu:".to_string(),
HandleKind::Control => "iommu:control".to_string(),
HandleKind::Domain(domain_id) => format!("iommu:domain/{domain_id}"),
HandleKind::Device(bdf) => format!("iommu:device/{bdf}"),
};
let bytes = path.as_bytes();
let to_copy = bytes.len().min(buf.len());
buf[..to_copy].copy_from_slice(&bytes[..to_copy]);
Ok(Some(to_copy))
}
fn fstat(&mut self, id: usize, stat: &mut Stat) -> Result<Option<usize>> {
let kind = self
.handles
.get(&id)
.map(|handle| handle.kind)
.ok_or(Error::new(EBADF))?;
match kind {
HandleKind::Root => {
stat.st_mode = MODE_DIR | 0o555;
stat.st_size = self.root_listing().len() as u64;
}
_ => {
let response_len = self
.handles
.get(&id)
.map(|handle| handle.response.len())
.ok_or(Error::new(EBADF))?;
stat.st_mode = MODE_FILE | 0o666;
stat.st_size = response_len as u64;
}
}
stat.st_blksize = 4096;
stat.st_blocks = stat.st_size.div_ceil(512);
Ok(Some(0))
}
fn fevent(&mut self, id: usize, _flags: EventFlags) -> Result<Option<EventFlags>> {
let _ = self.handles.get(&id).ok_or(Error::new(EBADF))?;
Ok(Some(EventFlags::empty()))
}
fn close(&mut self, id: usize) -> Result<Option<usize>> {
if self.handles.remove(&id).is_none() {
return Err(Error::new(EBADF));
}
Ok(Some(0))
}
}
#[cfg(all(test, not(target_os = "redox")))]
mod host_redox_stubs {
use core::ptr;
use syscall::error::{EINVAL, ENOSYS};
fn error_result(errno: i32) -> usize {
usize::wrapping_neg(errno as usize)
}
#[no_mangle]
pub extern "C" fn redox_open_v1(
_path_base: *const u8,
_path_len: usize,
_flags: u32,
_mode: u16,
) -> usize {
error_result(ENOSYS)
}
#[no_mangle]
pub extern "C" fn redox_openat_v1(
_fd: usize,
_buf: *const u8,
_path_len: usize,
_flags: u32,
_fcntl_flags: u32,
) -> usize {
error_result(ENOSYS)
}
#[no_mangle]
pub extern "C" fn redox_close_v1(_fd: usize) -> usize {
0
}
#[no_mangle]
pub extern "C" fn redox_mmap_v1(
_addr: *mut (),
_unaligned_len: usize,
_prot: u32,
_flags: u32,
_fd: usize,
_offset: u64,
) -> usize {
error_result(ENOSYS)
}
#[no_mangle]
pub extern "C" fn redox_munmap_v1(_addr: *mut (), _unaligned_len: usize) -> usize {
0
}
#[no_mangle]
pub extern "C" fn redox_sys_call_v0(
_fd: usize,
_payload: *mut u8,
_payload_len: usize,
_flags: usize,
_metadata: *const u64,
_metadata_len: usize,
) -> usize {
error_result(ENOSYS)
}
#[no_mangle]
pub extern "C" fn redox_strerror_v1(dst: *mut u8, dst_len: *mut usize, _error: u32) -> usize {
if dst.is_null() || dst_len.is_null() {
return error_result(EINVAL);
}
let message = b"host test stub";
unsafe {
let writable = *dst_len;
let count = writable.min(message.len());
ptr::copy_nonoverlapping(message.as_ptr(), dst, count);
*dst_len = count;
}
0
}
}
#[cfg(test)]
mod tests {
use super::{opcode, IommuRequest, IommuResponse, IommuScheme};
use crate::page_table::PAGE_SIZE;
use redox_scheme::SchemeBlockMut;
fn read_response(scheme: &mut IommuScheme, id: usize) -> IommuResponse {
let mut bytes = [0u8; IommuResponse::SIZE];
let count = scheme
.read(id, &mut bytes)
.unwrap_or_else(|err| panic!("read failed: {err}"))
.unwrap_or_else(|| panic!("expected response bytes"));
IommuResponse::from_bytes(&bytes[..count])
.unwrap_or_else(|| panic!("invalid response bytes"))
}
#[test]
fn request_round_trip_serialization() {
let request = IommuRequest::new(opcode::MAP, 7, 0x1000, 0x2000, 0x3000);
let encoded = request.to_bytes();
let decoded = IommuRequest::from_bytes(&encoded)
.unwrap_or_else(|| panic!("failed to deserialize request"));
assert_eq!(decoded, request);
}
#[test]
fn root_lists_control_endpoint() {
let mut scheme = IommuScheme::new();
let root = scheme
.open("", 0, 0, 0)
.unwrap_or_else(|err| panic!("open root failed: {err}"))
.unwrap_or_else(|| panic!("root open returned no handle"));
let mut bytes = [0u8; 128];
let count = scheme
.read(root, &mut bytes)
.unwrap_or_else(|err| panic!("read root failed: {err}"))
.unwrap_or_else(|| panic!("expected root bytes"));
let listing = String::from_utf8_lossy(&bytes[..count]);
assert!(listing.contains("control"));
}
#[test]
fn control_can_create_and_query_domain() {
let mut scheme = IommuScheme::new();
let control = scheme
.open("control", 0, 0, 0)
.unwrap_or_else(|err| panic!("open control failed: {err}"))
.unwrap_or_else(|| panic!("control open returned no handle"));
let request = IommuRequest::new(opcode::CREATE_DOMAIN, 7, 0, 0, 0);
scheme
.write(control, &request.to_bytes())
.unwrap_or_else(|err| panic!("create domain write failed: {err}"));
let response = read_response(&mut scheme, control);
assert_eq!(response.status, 0);
assert_eq!(response.arg0, 7);
assert_ne!(response.arg1, 0);
let query = IommuRequest::new(opcode::QUERY, 0, 0, 0, 0);
scheme
.write(control, &query.to_bytes())
.unwrap_or_else(|err| panic!("control query failed: {err}"));
let query_response = read_response(&mut scheme, control);
assert_eq!(query_response.status, 0);
assert_eq!(query_response.arg0, 0);
assert_eq!(query_response.arg1, 1);
}
#[test]
fn domain_handle_can_map_pages() {
let mut scheme = IommuScheme::new();
let domain = scheme
.open("domain/5", 0, 0, 0)
.unwrap_or_else(|err| panic!("open domain failed: {err}"))
.unwrap_or_else(|| panic!("domain open returned no handle"));
let map = IommuRequest::new(opcode::MAP, 0x3, 0x4000_0000, PAGE_SIZE * 2, 0);
scheme
.write(domain, &map.to_bytes())
.unwrap_or_else(|err| panic!("domain map write failed: {err}"));
let response = read_response(&mut scheme, domain);
assert_eq!(response.status, 0);
assert_eq!(response.arg0, 5);
assert_ne!(response.arg1, 0);
let unmap = IommuRequest::new(opcode::UNMAP, 0, response.arg1, 0, 0);
scheme
.write(domain, &unmap.to_bytes())
.unwrap_or_else(|err| panic!("domain unmap write failed: {err}"));
let unmap_response = read_response(&mut scheme, domain);
assert_eq!(unmap_response.status, 0);
assert_eq!(unmap_response.arg2, PAGE_SIZE * 2);
}
#[test]
fn assigning_without_detected_units_returns_error_response() {
let mut scheme = IommuScheme::new();
let device = scheme
.open("device/00:14.0", 0, 0, 0)
.unwrap_or_else(|err| panic!("open device failed: {err}"))
.unwrap_or_else(|| panic!("device open returned no handle"));
let assign = IommuRequest::new(opcode::ASSIGN_DEVICE, 1, u64::MAX, 0, 0);
scheme
.write(device, &assign.to_bytes())
.unwrap_or_else(|err| panic!("device assign write failed: {err}"));
let response = read_response(&mut scheme, device);
assert!(response.status < 0);
}
}
@@ -0,0 +1,135 @@
//! IOMMU daemon — provides scheme:iommu for DMA remapping.
use std::env;
use std::fs;
use std::process;
use iommu::amd_vi::AmdViUnit;
#[cfg(target_os = "redox")]
use iommu::IommuScheme;
use log::{error, info, LevelFilter, Metadata, Record};
#[cfg(target_os = "redox")]
use redox_scheme::{SignalBehavior, Socket};
struct StderrLogger {
level: LevelFilter,
}
impl log::Log for StderrLogger {
fn enabled(&self, metadata: &Metadata) -> bool {
metadata.level() <= self.level
}
fn log(&self, record: &Record) {
if self.enabled(record.metadata()) {
eprintln!("[{}] {}", record.level(), record.args());
}
}
fn flush(&self) {}
}
fn init_logging(level: LevelFilter) {
if log::set_boxed_logger(Box::new(StderrLogger { level })).is_err() {
return;
}
log::set_max_level(level);
}
fn detect_units_from_env() -> Result<Vec<AmdViUnit>, String> {
let Some(path) = env::var_os("IOMMU_IVRS_PATH") else {
return Ok(Vec::new());
};
let bytes = fs::read(&path).map_err(|err| {
format!(
"failed to read IVRS table from {}: {err}",
path.to_string_lossy()
)
})?;
let units = AmdViUnit::detect(&bytes).map_err(|err| format!("failed to parse IVRS: {err}"))?;
Ok(units)
}
#[cfg(target_os = "redox")]
fn run() -> Result<(), String> {
let mut units = detect_units_from_env()?;
info!("iommu: detected {} AMD-Vi unit(s)", units.len());
for (index, unit) in units.iter_mut().enumerate() {
match unit.init() {
Ok(()) => info!(
"iommu: initialized unit {} at MMIO {:#x}",
index,
unit.info().mmio_base
),
Err(err) => error!(
"iommu: failed to initialize unit {} at MMIO {:#x}: {}",
index,
unit.info().mmio_base,
err
),
}
}
let socket =
Socket::create("iommu").map_err(|e| format!("failed to register iommu scheme: {e}"))?;
info!("iommu: registered scheme:iommu");
let mut scheme = IommuScheme::with_units(units);
loop {
let request = match socket.next_request(SignalBehavior::Restart) {
Ok(Some(request)) => request,
Ok(None) => {
info!("iommu: scheme unmounted, exiting");
break;
}
Err(e) => {
error!("iommu: failed to read scheme request: {e}");
continue;
}
};
let response = match request.handle_scheme_block_mut(&mut scheme) {
Ok(response) => response,
Err(_request) => {
error!("iommu: failed to handle request");
continue;
}
};
if let Err(e) = socket.write_response(response, SignalBehavior::Restart) {
error!("iommu: failed to write response: {e}");
}
}
Ok(())
}
#[cfg(not(target_os = "redox"))]
fn run() -> Result<(), String> {
let units = detect_units_from_env()?;
info!(
"iommu: host build stub active; parsed {} AMD-Vi unit(s) from IOMMU_IVRS_PATH",
units.len()
);
Ok(())
}
fn main() {
let log_level = match env::var("IOMMU_LOG").as_deref() {
Ok("debug") => LevelFilter::Debug,
Ok("trace") => LevelFilter::Trace,
Ok("warn") => LevelFilter::Warn,
Ok("error") => LevelFilter::Error,
_ => LevelFilter::Info,
};
init_logging(log_level);
if let Err(e) = run() {
error!("iommu: fatal error: {e}");
process::exit(1);
}
}
@@ -0,0 +1,241 @@
use core::mem::{offset_of, size_of};
use core::ptr::{addr_of, addr_of_mut, read_volatile, write_volatile};
pub const AMD_VI_MMIO_BYTES: usize = 0x2038;
pub mod offsets {
pub const DEV_TABLE_BAR: usize = 0x0000;
pub const CMD_BUF_BAR: usize = 0x0008;
pub const EVT_LOG_BAR: usize = 0x0010;
pub const CONTROL: usize = 0x0018;
pub const EXCLUSION_BASE: usize = 0x0020;
pub const EXCLUSION_LIMIT: usize = 0x0028;
pub const EXTENDED_FEATURE: usize = 0x0030;
pub const PPR_LOG_BAR: usize = 0x0038;
pub const CMD_BUF_HEAD: usize = 0x2000;
pub const CMD_BUF_TAIL: usize = 0x2008;
pub const EVT_LOG_HEAD: usize = 0x2010;
pub const EVT_LOG_TAIL: usize = 0x2018;
pub const STATUS: usize = 0x2020;
pub const PPR_LOG_HEAD: usize = 0x2028;
pub const PPR_LOG_TAIL: usize = 0x2030;
}
pub mod control {
pub const IOMMU_ENABLE: u32 = 1 << 0;
pub const HT_TUN_EN: u32 = 1 << 1;
pub const EVENT_LOG_EN: u32 = 1 << 2;
pub const EVENT_INT_EN: u32 = 1 << 3;
pub const COM_WAIT_INT_EN: u32 = 1 << 4;
pub const CMD_BUF_EN: u32 = 1 << 5;
pub const PPR_LOG_EN: u32 = 1 << 6;
pub const PPR_INT_EN: u32 = 1 << 7;
pub const PPR_EN: u32 = 1 << 8;
pub const GT_EN: u32 = 1 << 9;
pub const GA_EN: u32 = 1 << 10;
pub const CRW: u32 = 1 << 12;
pub const SMIF_EN: u32 = 1 << 13;
pub const SLFW_EN: u32 = 1 << 14;
pub const SMIF_LOG_EN: u32 = 1 << 15;
pub const GAM_EN_0: u32 = 1 << 16;
pub const GAM_EN_1: u32 = 1 << 17;
pub const GAM_EN_2: u32 = 1 << 18;
pub const XT_EN: u32 = 1 << 22;
pub const NX_EN: u32 = 1 << 23;
pub const IRQ_TABLE_LEN_EN: u32 = 1 << 24;
}
pub mod status {
pub const IOMMU_RUNNING: u32 = 1 << 0;
pub const EVENT_OVERFLOW: u32 = 1 << 1;
pub const EVENT_LOG_INT: u32 = 1 << 2;
pub const COM_WAIT_INT: u32 = 1 << 3;
pub const PPR_OVERFLOW: u32 = 1 << 4;
pub const PPR_INT: u32 = 1 << 5;
}
pub mod ext_feature {
pub const PREF_SUP: u64 = 1 << 0;
pub const PPR_SUP: u64 = 1 << 1;
pub const XT_SUP: u64 = 1 << 2;
pub const NX_SUP: u64 = 1 << 3;
pub const GT_SUP: u64 = 1 << 4;
pub const IA_SUP: u64 = 1 << 6;
pub const GA_SUP: u64 = 1 << 7;
pub const HE_SUP: u64 = 1 << 8;
pub const PC_SUP: u64 = 1 << 9;
pub const GI_SUP: u64 = 1 << 57;
pub const HA_SUP: u64 = 1 << 58;
}
#[repr(C)]
pub struct AmdViMmio {
pub dev_table_bar: u64,
pub cmd_buf_bar: u64,
pub evt_log_bar: u64,
pub control: u32,
_reserved0: u32,
pub exclusion_base: u64,
pub exclusion_limit: u64,
pub extended_feature: u64,
pub ppr_log_bar: u64,
_reserved1: [u8; 0x2000 - 0x40],
pub cmd_buf_head: u64,
pub cmd_buf_tail: u64,
pub evt_log_head: u64,
pub evt_log_tail: u64,
pub status: u32,
_reserved2: u32,
pub ppr_log_head: u64,
pub ppr_log_tail: u64,
}
const _: () = assert!(size_of::<AmdViMmio>() == AMD_VI_MMIO_BYTES);
const _: () = assert!(offset_of!(AmdViMmio, dev_table_bar) == offsets::DEV_TABLE_BAR);
const _: () = assert!(offset_of!(AmdViMmio, cmd_buf_bar) == offsets::CMD_BUF_BAR);
const _: () = assert!(offset_of!(AmdViMmio, evt_log_bar) == offsets::EVT_LOG_BAR);
const _: () = assert!(offset_of!(AmdViMmio, control) == offsets::CONTROL);
const _: () = assert!(offset_of!(AmdViMmio, extended_feature) == offsets::EXTENDED_FEATURE);
const _: () = assert!(offset_of!(AmdViMmio, cmd_buf_head) == offsets::CMD_BUF_HEAD);
const _: () = assert!(offset_of!(AmdViMmio, cmd_buf_tail) == offsets::CMD_BUF_TAIL);
const _: () = assert!(offset_of!(AmdViMmio, evt_log_head) == offsets::EVT_LOG_HEAD);
const _: () = assert!(offset_of!(AmdViMmio, evt_log_tail) == offsets::EVT_LOG_TAIL);
const _: () = assert!(offset_of!(AmdViMmio, status) == offsets::STATUS);
const _: () = assert!(offset_of!(AmdViMmio, ppr_log_head) == offsets::PPR_LOG_HEAD);
const _: () = assert!(offset_of!(AmdViMmio, ppr_log_tail) == offsets::PPR_LOG_TAIL);
impl AmdViMmio {
pub unsafe fn read_dev_table_bar(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).dev_table_bar))
}
pub unsafe fn write_dev_table_bar(base: *mut Self, value: u64) {
write_volatile(addr_of_mut!((*base).dev_table_bar), value);
}
pub unsafe fn read_cmd_buf_bar(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).cmd_buf_bar))
}
pub unsafe fn write_cmd_buf_bar(base: *mut Self, value: u64) {
write_volatile(addr_of_mut!((*base).cmd_buf_bar), value);
}
pub unsafe fn read_evt_log_bar(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).evt_log_bar))
}
pub unsafe fn write_evt_log_bar(base: *mut Self, value: u64) {
write_volatile(addr_of_mut!((*base).evt_log_bar), value);
}
pub unsafe fn read_control(base: *mut Self) -> u32 {
read_volatile(addr_of!((*base).control))
}
pub unsafe fn write_control(base: *mut Self, value: u32) {
write_volatile(addr_of_mut!((*base).control), value);
}
pub unsafe fn read_exclusion_base(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).exclusion_base))
}
pub unsafe fn write_exclusion_base(base: *mut Self, value: u64) {
write_volatile(addr_of_mut!((*base).exclusion_base), value);
}
pub unsafe fn read_exclusion_limit(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).exclusion_limit))
}
pub unsafe fn write_exclusion_limit(base: *mut Self, value: u64) {
write_volatile(addr_of_mut!((*base).exclusion_limit), value);
}
pub unsafe fn read_extended_feature(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).extended_feature))
}
pub unsafe fn read_ppr_log_bar(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).ppr_log_bar))
}
pub unsafe fn write_ppr_log_bar(base: *mut Self, value: u64) {
write_volatile(addr_of_mut!((*base).ppr_log_bar), value);
}
pub unsafe fn read_cmd_buf_head(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).cmd_buf_head))
}
pub unsafe fn write_cmd_buf_head(base: *mut Self, value: u64) {
write_volatile(addr_of_mut!((*base).cmd_buf_head), value);
}
pub unsafe fn read_cmd_buf_tail(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).cmd_buf_tail))
}
pub unsafe fn write_cmd_buf_tail(base: *mut Self, value: u64) {
write_volatile(addr_of_mut!((*base).cmd_buf_tail), value);
}
pub unsafe fn read_evt_log_head(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).evt_log_head))
}
pub unsafe fn write_evt_log_head(base: *mut Self, value: u64) {
write_volatile(addr_of_mut!((*base).evt_log_head), value);
}
pub unsafe fn read_evt_log_tail(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).evt_log_tail))
}
pub unsafe fn read_status(base: *mut Self) -> u32 {
read_volatile(addr_of!((*base).status))
}
pub unsafe fn read_ppr_log_head(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).ppr_log_head))
}
pub unsafe fn write_ppr_log_head(base: *mut Self, value: u64) {
write_volatile(addr_of_mut!((*base).ppr_log_head), value);
}
pub unsafe fn read_ppr_log_tail(base: *mut Self) -> u64 {
read_volatile(addr_of!((*base).ppr_log_tail))
}
}
#[cfg(test)]
mod tests {
use core::mem::MaybeUninit;
use super::{offsets, AmdViMmio};
#[test]
fn register_accessors_use_expected_offsets() {
let mut mmio = MaybeUninit::<AmdViMmio>::zeroed();
let base = mmio.as_mut_ptr();
unsafe {
AmdViMmio::write_control(base, 0xdead_beef);
AmdViMmio::write_cmd_buf_head(base, 0x1122_3344_5566_7788);
AmdViMmio::write_dev_table_bar(base, 0x2000);
assert_eq!(AmdViMmio::read_control(base), 0xdead_beef);
assert_eq!(AmdViMmio::read_cmd_buf_head(base), 0x1122_3344_5566_7788);
assert_eq!(AmdViMmio::read_dev_table_bar(base), 0x2000);
let byte_base = base.cast::<u8>();
let control_ptr = byte_base.add(offsets::CONTROL).cast::<u32>();
let head_ptr = byte_base.add(offsets::CMD_BUF_HEAD).cast::<u64>();
assert_eq!(core::ptr::read_volatile(control_ptr), 0xdead_beef);
assert_eq!(core::ptr::read_volatile(head_ptr), 0x1122_3344_5566_7788);
}
}
}
@@ -0,0 +1,690 @@
use core::alloc::Layout;
use core::mem::size_of;
use core::ptr::NonNull;
use core::slice;
use std::collections::BTreeMap;
use redox_driver_sys::dma::DmaBuffer;
pub const PAGE_SIZE: u64 = 4096;
pub const PTES_PER_PAGE: usize = 512;
pub const DEFAULT_IOMMU_LEVELS: u8 = 4;
pub const DEFAULT_IOVA_BASE: u64 = 0x1_0000_0000;
pub const DEFAULT_IOVA_LIMIT: u64 = 0x0000_FFFF_FFFF_F000;
const PTE_PRESENT: u64 = 1 << 0;
const PTE_USER: u64 = 1 << 1;
const PTE_WRITE: u64 = 1 << 2;
const PTE_READ: u64 = 1 << 3;
const PTE_NEXT_LEVEL_SHIFT: u64 = 9;
const PTE_NEXT_LEVEL_MASK: u64 = 0x7 << PTE_NEXT_LEVEL_SHIFT;
const PTE_OUTPUT_ADDR_MASK: u64 = 0x000F_FFFF_FFFF_F000;
const PTE_FORCE_COHERENT: u64 = 1 << 59;
const PTE_IRQ_REMAP: u64 = 1 << 61;
const PTE_IRQ_WRITE: u64 = 1 << 62;
const PTE_NO_EXECUTE: u64 = 1 << 63;
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[repr(transparent)]
pub struct AmdPte(pub u64);
impl AmdPte {
pub const fn new() -> Self {
Self(0)
}
pub fn present(&self) -> bool {
self.0 & PTE_PRESENT != 0
}
pub fn set_present(&mut self, value: bool) {
if value {
self.0 |= PTE_PRESENT;
} else {
self.0 &= !PTE_PRESENT;
}
}
pub fn user(&self) -> bool {
self.0 & PTE_USER != 0
}
pub fn set_user(&mut self, value: bool) {
if value {
self.0 |= PTE_USER;
} else {
self.0 &= !PTE_USER;
}
}
pub fn writable(&self) -> bool {
self.0 & PTE_WRITE != 0
}
pub fn set_writable(&mut self, value: bool) {
if value {
self.0 |= PTE_WRITE;
} else {
self.0 &= !PTE_WRITE;
}
}
pub fn readable(&self) -> bool {
self.0 & PTE_READ != 0
}
pub fn set_readable(&mut self, value: bool) {
if value {
self.0 |= PTE_READ;
} else {
self.0 &= !PTE_READ;
}
}
pub fn next_level(&self) -> u8 {
((self.0 & PTE_NEXT_LEVEL_MASK) >> PTE_NEXT_LEVEL_SHIFT) as u8
}
pub fn set_next_level(&mut self, level: u8) {
self.0 =
(self.0 & !PTE_NEXT_LEVEL_MASK) | ((u64::from(level) & 0x7) << PTE_NEXT_LEVEL_SHIFT);
}
pub fn output_addr(&self) -> u64 {
self.0 & PTE_OUTPUT_ADDR_MASK
}
pub fn set_output_addr(&mut self, addr: u64) {
self.0 = (self.0 & !PTE_OUTPUT_ADDR_MASK) | (addr & PTE_OUTPUT_ADDR_MASK);
}
pub fn force_coherent(&self) -> bool {
self.0 & PTE_FORCE_COHERENT != 0
}
pub fn set_force_coherent(&mut self, value: bool) {
if value {
self.0 |= PTE_FORCE_COHERENT;
} else {
self.0 &= !PTE_FORCE_COHERENT;
}
}
pub fn interrupt_remap(&self) -> bool {
self.0 & PTE_IRQ_REMAP != 0
}
pub fn set_interrupt_remap(&mut self, value: bool) {
if value {
self.0 |= PTE_IRQ_REMAP;
} else {
self.0 &= !PTE_IRQ_REMAP;
}
}
pub fn interrupt_write(&self) -> bool {
self.0 & PTE_IRQ_WRITE != 0
}
pub fn set_interrupt_write(&mut self, value: bool) {
if value {
self.0 |= PTE_IRQ_WRITE;
} else {
self.0 &= !PTE_IRQ_WRITE;
}
}
pub fn no_execute(&self) -> bool {
self.0 & PTE_NO_EXECUTE != 0
}
pub fn set_no_execute(&mut self, value: bool) {
if value {
self.0 |= PTE_NO_EXECUTE;
} else {
self.0 &= !PTE_NO_EXECUTE;
}
}
pub fn leaf(addr: u64, flags: MappingFlags) -> Self {
let mut entry = Self::new();
entry.set_present(true);
entry.set_output_addr(addr);
entry.set_readable(flags.readable);
entry.set_writable(flags.writable);
entry.set_user(flags.user);
entry.set_force_coherent(flags.force_coherent);
entry.set_no_execute(!flags.executable);
entry
}
pub fn pointer(addr: u64, next_level: u8) -> Self {
let mut entry = Self::new();
entry.set_present(true);
entry.set_next_level(next_level);
entry.set_output_addr(addr);
entry
}
}
const _: () = assert!(size_of::<AmdPte>() == 8);
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct MappingFlags {
pub readable: bool,
pub writable: bool,
pub executable: bool,
pub force_coherent: bool,
pub user: bool,
}
impl Default for MappingFlags {
fn default() -> Self {
Self::read_write()
}
}
impl MappingFlags {
pub const fn read_write() -> Self {
Self {
readable: true,
writable: true,
executable: false,
force_coherent: false,
user: false,
}
}
}
enum PageStorage {
Dma(DmaBuffer),
Host {
ptr: NonNull<u8>,
layout: Layout,
len: usize,
},
}
struct PageBuffer {
storage: PageStorage,
phys_addr: usize,
}
impl PageBuffer {
fn allocate(len: usize, align: usize) -> Result<Self, &'static str> {
match DmaBuffer::allocate(len, align) {
Ok(buffer) => Ok(Self {
phys_addr: buffer.physical_address(),
storage: PageStorage::Dma(buffer),
}),
Err(_) => {
let layout = Layout::from_size_align(len, align)
.map_err(|_| "invalid page-table allocation layout")?;
let ptr = unsafe { std::alloc::alloc_zeroed(layout) };
let ptr = NonNull::new(ptr).ok_or("failed to allocate host page-table memory")?;
Ok(Self {
phys_addr: ptr.as_ptr() as usize,
storage: PageStorage::Host { ptr, layout, len },
})
}
}
}
fn as_ptr(&self) -> *const u8 {
match &self.storage {
PageStorage::Dma(buffer) => buffer.as_ptr(),
PageStorage::Host { ptr, .. } => ptr.as_ptr(),
}
}
fn as_mut_ptr(&mut self) -> *mut u8 {
match &mut self.storage {
PageStorage::Dma(buffer) => buffer.as_mut_ptr(),
PageStorage::Host { ptr, .. } => ptr.as_ptr(),
}
}
fn physical_address(&self) -> usize {
self.phys_addr
}
fn len(&self) -> usize {
match &self.storage {
PageStorage::Dma(buffer) => buffer.len(),
PageStorage::Host { len, .. } => *len,
}
}
}
impl Drop for PageBuffer {
fn drop(&mut self) {
if let PageStorage::Host { ptr, layout, .. } = &self.storage {
unsafe {
std::alloc::dealloc(ptr.as_ptr(), *layout);
}
}
}
}
unsafe impl Send for PageBuffer {}
unsafe impl Sync for PageBuffer {}
struct PageTablePage {
buffer: PageBuffer,
}
impl PageTablePage {
fn new() -> Result<Self, &'static str> {
let buffer = PageBuffer::allocate(PAGE_SIZE as usize, PAGE_SIZE as usize)?;
if buffer.len() < PAGE_SIZE as usize {
return Err("page-table allocation smaller than one page");
}
Ok(Self { buffer })
}
fn physical_address(&self) -> u64 {
self.buffer.physical_address() as u64
}
fn entry(&self, index: usize) -> AmdPte {
self.entries()[index]
}
fn set_entry(&mut self, index: usize, entry: AmdPte) {
self.entries_mut()[index] = entry;
}
fn entries(&self) -> &[AmdPte] {
unsafe { slice::from_raw_parts(self.buffer.as_ptr().cast::<AmdPte>(), PTES_PER_PAGE) }
}
fn entries_mut(&mut self) -> &mut [AmdPte] {
unsafe {
slice::from_raw_parts_mut(self.buffer.as_mut_ptr().cast::<AmdPte>(), PTES_PER_PAGE)
}
}
}
struct PageTableNode {
page: PageTablePage,
children: BTreeMap<usize, Box<PageTableNode>>,
}
impl PageTableNode {
fn new() -> Result<Self, &'static str> {
Ok(Self {
page: PageTablePage::new()?,
children: BTreeMap::new(),
})
}
fn phys_addr(&self) -> u64 {
self.page.physical_address()
}
}
pub struct PageTable {
levels: u8,
root: Box<PageTableNode>,
}
impl PageTable {
pub fn new(levels: u8) -> Result<Self, &'static str> {
if !(1..=6).contains(&levels) {
return Err("AMD-Vi page tables support between 1 and 6 levels");
}
Ok(Self {
levels,
root: Box::new(PageTableNode::new()?),
})
}
pub fn levels(&self) -> u8 {
self.levels
}
pub fn root_address(&self) -> u64 {
self.root.phys_addr()
}
pub fn map_page(
&mut self,
iova: u64,
phys: u64,
flags: MappingFlags,
) -> Result<(), &'static str> {
if iova & (PAGE_SIZE - 1) != 0 || phys & (PAGE_SIZE - 1) != 0 {
return Err("IOMMU mappings must be 4KiB-aligned");
}
let mut node = self.root.as_mut();
for level in (2..=self.levels).rev() {
let index = page_table_index(level, iova);
if !node.children.contains_key(&index) {
let child = Box::new(PageTableNode::new()?);
let child_phys = child.phys_addr();
node.page
.set_entry(index, AmdPte::pointer(child_phys, level - 1));
node.children.insert(index, child);
}
let child = node
.children
.get_mut(&index)
.ok_or("failed to descend page table")?;
node = child.as_mut();
}
let leaf_index = page_table_index(1, iova);
node.page.set_entry(leaf_index, AmdPte::leaf(phys, flags));
Ok(())
}
pub fn unmap_page(&mut self, iova: u64) -> bool {
Self::unmap_in_node(self.root.as_mut(), self.levels, iova)
}
pub fn translate(&self, iova: u64) -> Option<u64> {
let page_base = iova & !(PAGE_SIZE - 1);
let page_offset = iova & (PAGE_SIZE - 1);
let mut node = self.root.as_ref();
for level in (2..=self.levels).rev() {
let index = page_table_index(level, page_base);
let entry = node.page.entry(index);
if !entry.present() {
return None;
}
node = node.children.get(&index)?.as_ref();
}
let leaf = node.page.entry(page_table_index(1, page_base));
if !leaf.present() {
return None;
}
Some(leaf.output_addr() + page_offset)
}
fn unmap_in_node(node: &mut PageTableNode, level: u8, iova: u64) -> bool {
if level == 1 {
let index = page_table_index(1, iova);
let present = node.page.entry(index).present();
if present {
node.page.set_entry(index, AmdPte::new());
}
return present;
}
let index = page_table_index(level, iova);
let Some(child) = node.children.get_mut(&index) else {
return false;
};
Self::unmap_in_node(child.as_mut(), level - 1, iova)
}
}
fn page_table_index(level: u8, address: u64) -> usize {
((address >> (12 + ((u64::from(level) - 1) * 9))) & 0x1FF) as usize
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct DomainMapping {
pub iova: u64,
pub phys: u64,
pub size: u64,
pub flags: MappingFlags,
}
pub struct IovaAllocator {
base: u64,
limit: u64,
allocations: BTreeMap<u64, u64>,
}
impl IovaAllocator {
pub fn new(base: u64, limit: u64) -> Self {
Self {
base,
limit,
allocations: BTreeMap::new(),
}
}
pub fn allocate(&mut self, size: u64, align: u64) -> Option<u64> {
let size = align_up(size.max(PAGE_SIZE), PAGE_SIZE)?;
let align = align.max(PAGE_SIZE).next_power_of_two();
let mut cursor = align_up(self.base, align)?;
for (&start, &length) in &self.allocations {
if cursor.checked_add(size)? <= start {
self.allocations.insert(cursor, size);
return Some(cursor);
}
cursor = align_up(start.checked_add(length)?, align)?;
}
if cursor.checked_add(size)? > self.limit {
return None;
}
self.allocations.insert(cursor, size);
Some(cursor)
}
pub fn reserve(&mut self, start: u64, size: u64) -> bool {
let Some(end) = start.checked_add(size) else {
return false;
};
if start < self.base || end > self.limit {
return false;
}
let prev = self.allocations.range(..=start).next_back();
if let Some((&prev_start, &prev_len)) = prev {
let Some(prev_end) = prev_start.checked_add(prev_len) else {
return false;
};
if prev_end > start {
return false;
}
}
let next = self.allocations.range(start..).next();
if let Some((&next_start, _)) = next {
if next_start < end {
return false;
}
}
self.allocations.insert(start, size);
true
}
pub fn free(&mut self, start: u64) -> bool {
self.allocations.remove(&start).is_some()
}
pub fn allocated_size(&self, start: u64) -> Option<u64> {
self.allocations.get(&start).copied()
}
pub fn allocation_count(&self) -> usize {
self.allocations.len()
}
}
pub struct DomainPageTables {
domain_id: u16,
page_table: PageTable,
allocator: IovaAllocator,
mappings: BTreeMap<u64, DomainMapping>,
}
impl DomainPageTables {
pub fn new(domain_id: u16) -> Result<Self, &'static str> {
Self::with_range(domain_id, DEFAULT_IOVA_BASE, DEFAULT_IOVA_LIMIT)
}
pub fn with_range(domain_id: u16, base: u64, limit: u64) -> Result<Self, &'static str> {
Ok(Self {
domain_id,
page_table: PageTable::new(DEFAULT_IOMMU_LEVELS)?,
allocator: IovaAllocator::new(base, limit),
mappings: BTreeMap::new(),
})
}
pub fn domain_id(&self) -> u16 {
self.domain_id
}
pub fn root_address(&self) -> u64 {
self.page_table.root_address()
}
pub fn levels(&self) -> u8 {
self.page_table.levels()
}
pub fn map_range(
&mut self,
phys: u64,
size: u64,
flags: MappingFlags,
preferred_iova: Option<u64>,
) -> Result<u64, &'static str> {
if size == 0 {
return Err("IOMMU map size must be non-zero");
}
if phys & (PAGE_SIZE - 1) != 0 {
return Err("IOMMU physical mappings must be page-aligned");
}
let size = align_up(size, PAGE_SIZE).ok_or("IOMMU map size overflow")?;
let iova = if let Some(requested) = preferred_iova {
if requested & (PAGE_SIZE - 1) != 0 {
return Err("IOMMU IOVA mappings must be page-aligned");
}
if !self.allocator.reserve(requested, size) {
return Err("requested IOVA range is unavailable");
}
requested
} else {
self.allocator
.allocate(size, PAGE_SIZE)
.ok_or("unable to allocate an IOVA range")?
};
let mut mapped = 0u64;
while mapped < size {
if let Err(err) = self
.page_table
.map_page(iova + mapped, phys + mapped, flags)
{
let mut rollback = 0u64;
while rollback < mapped {
let _ = self.page_table.unmap_page(iova + rollback);
rollback += PAGE_SIZE;
}
let _ = self.allocator.free(iova);
return Err(err);
}
mapped += PAGE_SIZE;
}
self.mappings.insert(
iova,
DomainMapping {
iova,
phys,
size,
flags,
},
);
Ok(iova)
}
pub fn unmap_range(&mut self, iova: u64) -> Result<u64, &'static str> {
let mapping = self
.mappings
.remove(&iova)
.ok_or("IOMMU mapping does not exist")?;
let mut offset = 0u64;
while offset < mapping.size {
let _ = self.page_table.unmap_page(mapping.iova + offset);
offset += PAGE_SIZE;
}
let _ = self.allocator.free(mapping.iova);
Ok(mapping.size)
}
pub fn mapping(&self, iova: u64) -> Option<&DomainMapping> {
self.mappings.get(&iova)
}
pub fn mapping_count(&self) -> usize {
self.mappings.len()
}
}
fn align_up(value: u64, align: u64) -> Option<u64> {
let mask = align.checked_sub(1)?;
value.checked_add(mask).map(|rounded| rounded & !mask)
}
#[cfg(test)]
mod tests {
use super::{AmdPte, DomainPageTables, IovaAllocator, MappingFlags, PageTable, PAGE_SIZE};
#[test]
fn amd_pte_leaf_sets_permissions() {
let pte = AmdPte::leaf(0x1234_5000, MappingFlags::read_write());
assert!(pte.present());
assert!(pte.readable());
assert!(pte.writable());
assert!(pte.no_execute());
assert_eq!(pte.output_addr(), 0x1234_5000);
}
#[test]
fn iova_allocator_finds_gap_and_reuses_freed_ranges() {
let mut allocator = IovaAllocator::new(0x1000, 0x10_0000);
let first = allocator.allocate(PAGE_SIZE, PAGE_SIZE).unwrap_or(0);
let second = allocator.allocate(PAGE_SIZE * 2, PAGE_SIZE).unwrap_or(0);
assert_eq!(first, 0x1000);
assert_eq!(second, 0x2000);
assert!(allocator.free(first));
let reused = allocator.allocate(PAGE_SIZE, PAGE_SIZE).unwrap_or(0);
assert_eq!(reused, first);
}
#[test]
fn page_table_translate_round_trips_mapping() {
let mut table =
PageTable::new(4).unwrap_or_else(|err| panic!("page table create failed: {err}"));
table
.map_page(0x4000, 0x2000_0000, MappingFlags::read_write())
.unwrap_or_else(|err| panic!("page table map failed: {err}"));
assert_eq!(table.translate(0x4123), Some(0x2000_0123));
assert!(table.unmap_page(0x4000));
assert_eq!(table.translate(0x4123), None);
}
#[test]
fn domain_page_tables_allocate_iova_and_unmap() {
let mut domain = DomainPageTables::new(7)
.unwrap_or_else(|err| panic!("domain page table create failed: {err}"));
let iova = domain
.map_range(0x3000_0000, PAGE_SIZE * 2, MappingFlags::read_write(), None)
.unwrap_or_else(|err| panic!("domain mapping failed: {err}"));
let mapping = domain
.mapping(iova)
.unwrap_or_else(|| panic!("mapping missing"));
assert_eq!(mapping.size, PAGE_SIZE * 2);
assert!(domain.unmap_range(iova).is_ok());
assert!(domain.mapping(iova).is_none());
}
}