Red Bear OS base baseline from 0.1.0 pre-patched archive

This commit is contained in:
Red Bear OS
2026-06-27 09:21:43 +03:00
commit dd08b76a39
433 changed files with 78493 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
/target
+28
View File
@@ -0,0 +1,28 @@
[package]
name = "nvmed"
description = "NVM Express (NVMe) driver"
version = "0.1.0"
edition = "2021"
[dependencies]
bitflags.workspace = true
futures = "0.3"
libredox.workspace = true
log.workspace = true
parking_lot.workspace = true
redox_event.workspace = true
redox_syscall = { workspace = true, features = ["std"] }
smallvec.workspace = true
executor = { path = "../../executor" }
common = { path = "../../common" }
daemon = { path = "../../../daemon" }
driver-block = { path = "../driver-block" }
partitionlib = { path = "../partitionlib" }
pcid = { path = "../../pcid" }
[features]
default = []
[lints]
workspace = true
+154
View File
@@ -0,0 +1,154 @@
use std::cell::RefCell;
use std::fs::File;
use std::io::{self, Read, Write};
use std::os::fd::AsRawFd;
use std::rc::Rc;
use std::sync::Arc;
use std::usize;
use driver_block::{Disk, DiskScheme};
use pcid_interface::{irq_helpers, PciFunctionHandle};
use crate::nvme::NvmeNamespace;
use self::nvme::Nvme;
mod nvme;
struct NvmeDisk {
nvme: Arc<Nvme>,
ns: NvmeNamespace,
}
impl Disk for NvmeDisk {
fn block_size(&self) -> u32 {
self.ns.block_size.try_into().unwrap()
}
fn size(&self) -> u64 {
self.ns.blocks * self.ns.block_size
}
async fn read(&mut self, block: u64, buffer: &mut [u8]) -> syscall::Result<usize> {
self.nvme.namespace_read(&self.ns, block, buffer).await
}
async fn write(&mut self, block: u64, buffer: &[u8]) -> syscall::Result<usize> {
self.nvme.namespace_write(&self.ns, block, buffer).await
}
}
fn time_arm(time_handle: &mut File, secs: i64) -> io::Result<()> {
let mut time_buf = [0_u8; core::mem::size_of::<libredox::data::TimeSpec>()];
if time_handle.read(&mut time_buf)? < time_buf.len() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"time read too small",
));
}
match libredox::data::timespec_from_mut_bytes(&mut time_buf) {
time => {
time.tv_sec += secs;
}
}
time_handle.write(&time_buf)?;
Ok(())
}
fn main() {
pcid_interface::pci_daemon(daemon);
}
fn daemon(daemon: daemon::Daemon, mut pcid_handle: PciFunctionHandle) -> ! {
let pci_config = pcid_handle.config();
let scheme_name = format!("disk.{}-nvme", pci_config.func.name());
common::setup_logging(
"disk",
"pci",
&scheme_name,
common::output_level(),
common::file_level(),
);
log::debug!("NVME PCI CONFIG: {:?}", pci_config);
let address = unsafe { pcid_handle.map_bar(0).ptr };
let interrupt_vector = irq_helpers::pci_allocate_interrupt_vector(&mut pcid_handle, "nvmed");
let iv = interrupt_vector.vector();
let irq_handle = interrupt_vector.irq_handle().try_clone().unwrap();
let mut nvme = Nvme::new(address.as_ptr() as usize, interrupt_vector, pcid_handle)
.expect("nvmed: failed to allocate driver data");
unsafe { nvme.init().expect("nvmed: failed to init") }
log::debug!("Finished base initialization");
let nvme = Arc::new(nvme);
let executor = nvme::executor::init(Arc::clone(&nvme), iv, false /* FIXME */, irq_handle);
let mut time_handle = File::open(&format!("/scheme/time/{}", libredox::flag::CLOCK_MONOTONIC))
.expect("failed to open time handle");
let mut time_events = Box::pin(
executor.register_external_event(time_handle.as_raw_fd() as usize, event::EventFlags::READ),
);
// Try to init namespaces for 5 seconds
time_arm(&mut time_handle, 5).expect("failed to arm timer");
let namespaces = executor.block_on(async {
let namespaces_future = nvme.init_with_queues();
let time_future = time_events.as_mut().next();
futures::pin_mut!(namespaces_future);
futures::pin_mut!(time_future);
match futures::future::select(namespaces_future, time_future).await {
futures::future::Either::Left((namespaces, _)) => namespaces,
futures::future::Either::Right(_) => panic!("timeout on init"),
}
});
log::debug!("Initialized!");
let scheme = Rc::new(RefCell::new(DiskScheme::new(
Some(daemon),
scheme_name,
namespaces
.into_iter()
.map(|(k, ns)| {
(
k,
NvmeDisk {
nvme: nvme.clone(),
ns,
},
)
})
.collect(),
&*executor,
)));
let mut scheme_events = Box::pin(executor.register_external_event(
scheme.borrow().event_handle().raw(),
event::EventFlags::READ,
));
libredox::call::setrens(0, 0).expect("nvmed: failed to enter null namespace");
log::debug!("Starting to listen for scheme events");
executor.block_on(async {
loop {
log::trace!("new event iteration");
if let Err(err) = scheme.borrow_mut().tick().await {
log::error!("scheme error: {err}");
}
let _ = scheme_events.as_mut().next().await;
}
});
//TODO: destroy NVMe stuff
std::process::exit(0);
}
+162
View File
@@ -0,0 +1,162 @@
use super::NvmeCmd;
impl NvmeCmd {
pub fn create_io_completion_queue(
cid: u16,
qid: u16,
ptr: usize,
size: u16,
iv: Option<u16>,
) -> Self {
const DW11_PHYSICALLY_CONTIGUOUS_BIT: u32 = 0x0000_0001;
const DW11_ENABLE_INTERRUPTS_BIT: u32 = 0x0000_0002;
const DW11_INTERRUPT_VECTOR_SHIFT: u8 = 16;
Self {
opcode: 5,
flags: 0,
cid,
nsid: 0,
_rsvd: 0,
mptr: 0,
dptr: [ptr as u64, 0],
cdw10: ((size as u32) << 16) | (qid as u32),
cdw11: DW11_PHYSICALLY_CONTIGUOUS_BIT
| if let Some(iv) = iv {
// enable interrupts if a vector is present
DW11_ENABLE_INTERRUPTS_BIT | (u32::from(iv) << DW11_INTERRUPT_VECTOR_SHIFT)
} else {
0
},
cdw12: 0,
cdw13: 0,
cdw14: 0,
cdw15: 0,
}
}
pub fn create_io_submission_queue(
cid: u16,
qid: u16,
ptr: usize,
size: u16,
cqid: u16,
) -> Self {
Self {
opcode: 1,
flags: 0,
cid,
nsid: 0,
_rsvd: 0,
mptr: 0,
dptr: [ptr as u64, 0],
cdw10: ((size as u32) << 16) | (qid as u32),
cdw11: ((cqid as u32) << 16) | 1, /* Physically Contiguous */
//TODO: QPRIO
cdw12: 0, //TODO: NVMSETID
cdw13: 0,
cdw14: 0,
cdw15: 0,
}
}
pub fn identify_namespace(cid: u16, ptr: usize, nsid: u32) -> Self {
Self {
opcode: 6,
flags: 0,
cid,
nsid,
_rsvd: 0,
mptr: 0,
dptr: [ptr as u64, 0],
cdw10: 0,
cdw11: 0,
cdw12: 0,
cdw13: 0,
cdw14: 0,
cdw15: 0,
}
}
pub fn identify_controller(cid: u16, ptr: usize) -> Self {
Self {
opcode: 6,
flags: 0,
cid,
nsid: 0,
_rsvd: 0,
mptr: 0,
dptr: [ptr as u64, 0],
cdw10: 1,
cdw11: 0,
cdw12: 0,
cdw13: 0,
cdw14: 0,
cdw15: 0,
}
}
pub fn identify_namespace_list(cid: u16, ptr: usize, base: u32) -> Self {
Self {
opcode: 6,
flags: 0,
cid,
nsid: base,
_rsvd: 0,
mptr: 0,
dptr: [ptr as u64, 0],
cdw10: 2,
cdw11: 0,
cdw12: 0,
cdw13: 0,
cdw14: 0,
cdw15: 0,
}
}
pub fn get_features(cid: u16, ptr: usize, fid: u8) -> Self {
Self {
opcode: 0xA,
dptr: [ptr as u64, 0],
cdw10: u32::from(fid), // TODO: SEL
..Default::default()
}
}
pub fn io_read(cid: u16, nsid: u32, lba: u64, blocks_1: u16, ptr0: u64, ptr1: u64) -> Self {
Self {
opcode: 2,
flags: 0,
cid,
nsid,
_rsvd: 0,
mptr: 0,
dptr: [ptr0, ptr1],
cdw10: lba as u32,
cdw11: (lba >> 32) as u32,
cdw12: blocks_1 as u32,
cdw13: 0,
cdw14: 0,
cdw15: 0,
}
}
pub fn io_write(cid: u16, nsid: u32, lba: u64, blocks_1: u16, ptr0: u64, ptr1: u64) -> Self {
Self {
opcode: 1,
flags: 0,
cid,
nsid,
_rsvd: 0,
mptr: 0,
dptr: [ptr0, ptr1],
cdw10: lba as u32,
cdw11: (lba >> 32) as u32,
cdw12: blocks_1 as u32,
cdw13: 0,
cdw14: 0,
cdw15: 0,
}
}
}
@@ -0,0 +1,82 @@
use std::cell::RefCell;
use std::fs::File;
use std::rc::Rc;
use std::sync::Arc;
use executor::{Hardware, LocalExecutor};
use super::{CmdId, CqId, Nvme, NvmeCmd, NvmeComp, SqId};
pub struct NvmeHw;
impl Hardware for NvmeHw {
type Iv = u16;
type Sqe = NvmeCmd;
type Cqe = NvmeComp;
type CmdId = CmdId;
type CqId = CqId;
type SqId = SqId;
type GlobalCtxt = Arc<Nvme>;
fn mask_vector(ctxt: &Arc<Nvme>, iv: Self::Iv) {
ctxt.set_vector_masked(iv, true)
}
fn unmask_vector(ctxt: &Arc<Nvme>, iv: Self::Iv) {
ctxt.set_vector_masked(iv, false)
}
fn set_sqe_cmdid(sqe: &mut NvmeCmd, id: CmdId) {
sqe.cid = id;
}
fn get_cqe_cmdid(cqe: &Self::Cqe) -> Self::CmdId {
cqe.cid
}
fn vtable() -> &'static std::task::RawWakerVTable {
&VTABLE
}
fn current() -> std::rc::Rc<executor::LocalExecutor<Self>> {
THE_EXECUTOR.with(|exec| Rc::clone(exec.borrow().as_ref().unwrap()))
}
fn try_submit(
nvme: &Arc<Nvme>,
sq_id: Self::SqId,
success: impl FnOnce(Self::CmdId) -> Self::Sqe,
fail: impl FnOnce(),
) -> Option<(Self::CqId, Self::CmdId)> {
let ctxt = nvme.cur_thread_ctxt();
let ctxt = ctxt.lock();
nvme.try_submit_raw(&*ctxt, sq_id, success, fail)
}
fn poll_cqes(nvme: &Arc<Nvme>, mut handle: impl FnMut(Self::CqId, Self::Cqe)) {
let ctxt = nvme.cur_thread_ctxt();
let ctxt = ctxt.lock();
for (sq_cq_id, (sq, cq)) in ctxt.queues.borrow_mut().iter_mut() {
while let Some((new_head, cqe)) = cq.complete() {
unsafe {
nvme.completion_queue_head(*sq_cq_id, new_head);
}
sq.head = cqe.sq_head;
log::trace!("new head {new_head} cqe {cqe:?}");
handle(*sq_cq_id, cqe);
}
}
}
fn sq_cq(_ctxt: &Arc<Nvme>, id: Self::CqId) -> Self::SqId {
id
}
}
static VTABLE: std::task::RawWakerVTable = executor::vtable::<NvmeHw>();
thread_local! {
static THE_EXECUTOR: RefCell<Option<Rc<LocalExecutor<NvmeHw>>>> = RefCell::new(None);
}
pub type NvmeExecutor = LocalExecutor<NvmeHw>;
pub fn init(nvme: Arc<Nvme>, iv: u16, intx: bool, irq_handle: File) -> Rc<LocalExecutor<NvmeHw>> {
let this = Rc::new(executor::init_raw(nvme, iv, intx, irq_handle));
THE_EXECUTOR.with(|exec| *exec.borrow_mut() = Some(Rc::clone(&this)));
this
}
+228
View File
@@ -0,0 +1,228 @@
use super::{Nvme, NvmeCmd, NvmeNamespace};
use common::dma::Dma;
/// See NVME spec section 5.15.2.2.
#[derive(Clone, Copy)]
#[repr(C, packed)]
pub struct IdentifyControllerData {
/// PCI vendor ID, always the same as in the PCI function header.
pub vid: u16,
/// PCI subsystem vendor ID.
pub ssvid: u16,
/// ASCII
pub serial_no: [u8; 20],
/// ASCII
pub model_no: [u8; 48],
/// ASCII
pub firmware_rev: [u8; 8],
// TODO: Lots of fields
pub _4k_pad: [u8; 4096 - 72],
}
/// See NVME spec section 5.15.2.1.
#[derive(Clone, Copy)]
#[repr(C, packed)]
pub struct IdentifyNamespaceData {
pub nsze: u64,
pub ncap: u64,
pub nuse: u64,
pub nsfeat: u8,
pub nlbaf: u8,
pub flbas: u8,
pub mc: u8,
pub dpc: u8,
pub dps: u8,
pub nmic: u8,
pub rescap: u8,
// 32
pub fpi: u8,
pub dlfeat: u8,
pub nawun: u16,
pub nawupf: u16,
pub nacwu: u16,
// 40
pub nabsn: u16,
pub nabo: u16,
pub nabspf: u16,
pub noiob: u16,
// 48
pub nvmcap: u128,
// 64
pub npwg: u16,
pub npwa: u16,
pub npdg: u16,
pub npda: u16,
// 72
pub nows: u16,
pub _rsvd1: [u8; 18],
// 92
pub anagrpid: u32,
pub _rsvd2: [u8; 3],
pub nsattr: u8,
// 100
pub nvmsetid: u16,
pub endgid: u16,
pub nguid: [u8; 16],
pub eui64: u64,
pub lba_format_support: [LbaFormat; 16],
pub _rsvd3: [u8; 192],
pub vendor_specific: [u8; 3712],
}
impl IdentifyNamespaceData {
pub fn size_in_blocks(&self) -> u64 {
self.nsze
}
pub fn capacity_in_blocks(&self) -> u64 {
self.ncap
}
/// Guaranteed to be within 0..=15
pub fn formatted_lba_size_idx(&self) -> usize {
(self.flbas & 0xF) as usize
}
pub fn formatted_lba_size(&self) -> &LbaFormat {
&self.lba_format_support[self.formatted_lba_size_idx()]
}
pub fn has_metadata_after_data(&self) -> bool {
(self.flbas & (1 << 4)) != 0
}
}
#[derive(Clone, Copy)]
#[repr(C, packed)]
pub struct LbaFormat(pub u32);
#[repr(u8)]
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum RelativePerformance {
Best = 0b00,
Better,
Good,
Degraded,
}
impl Ord for RelativePerformance {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
// higher performance is better, hence reversed
Ord::cmp(&(*self as u8), &(*other as u8)).reverse()
}
}
impl PartialOrd for RelativePerformance {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(Ord::cmp(self, other))
}
}
impl LbaFormat {
pub fn relative_performance(&self) -> RelativePerformance {
match ((self.0 >> 24) & 0b11) {
0b00 => RelativePerformance::Best,
0b01 => RelativePerformance::Better,
0b10 => RelativePerformance::Good,
0b11 => RelativePerformance::Degraded,
_ => unreachable!(),
}
}
pub fn is_available(&self) -> bool {
self.log_lba_data_size() != 0
}
pub fn log_lba_data_size(&self) -> u8 {
((self.0 >> 16) & 0xFF) as u8
}
pub fn lba_data_size(&self) -> Option<u64> {
if self.log_lba_data_size() < 9 {
return None;
}
if self.log_lba_data_size() >= 32 {
return None;
}
Some(1u64 << self.log_lba_data_size())
}
pub fn metadata_size(&self) -> u16 {
(self.0 & 0xFFFF) as u16
}
}
impl Nvme {
/// Returns the serial number, model, and firmware, in that order.
pub async fn identify_controller(&self) {
// TODO: Use same buffer
let data: Dma<IdentifyControllerData> = unsafe { Dma::zeroed().unwrap().assume_init() };
// println!(" - Attempting to identify controller");
let comp = self
.submit_and_complete_admin_command(|cid| {
NvmeCmd::identify_controller(cid, data.physical())
})
.await;
log::trace!("Completion: {:?}", comp);
// println!(" - Dumping identify controller");
let model_cow = String::from_utf8_lossy(&data.model_no);
let serial_cow = String::from_utf8_lossy(&data.serial_no);
let fw_cow = String::from_utf8_lossy(&data.firmware_rev);
let model = model_cow.trim();
let serial = serial_cow.trim();
let firmware = fw_cow.trim();
log::info!(
" - Model: {} Serial: {} Firmware: {}",
model,
serial,
firmware,
);
}
pub async fn identify_namespace_list(&self, base: u32) -> Vec<u32> {
// TODO: Use buffer
let data: Dma<[u32; 1024]> = unsafe { Dma::zeroed().unwrap().assume_init() };
// println!(" - Attempting to retrieve namespace ID list");
let comp = self
.submit_and_complete_admin_command(|cid| {
NvmeCmd::identify_namespace_list(cid, data.physical(), base)
})
.await;
log::trace!("Completion2: {:?}", comp);
// println!(" - Dumping namespace ID list");
data.iter().copied().take_while(|&nsid| nsid != 0).collect()
}
pub async fn identify_namespace(&self, nsid: u32) -> NvmeNamespace {
//TODO: Use buffer
let data: Dma<IdentifyNamespaceData> = unsafe { Dma::zeroed().unwrap().assume_init() };
log::debug!("Attempting to identify namespace {nsid}");
let comp = self
.submit_and_complete_admin_command(|cid| {
NvmeCmd::identify_namespace(cid, data.physical(), nsid)
})
.await;
log::debug!("Dumping identify namespace");
let size = data.size_in_blocks();
let capacity = data.capacity_in_blocks();
log::info!("NSID: {} Size: {} Capacity: {}", nsid, size, capacity);
let block_size = data
.formatted_lba_size()
.lba_data_size()
.expect("nvmed: error: size outside 512-2^64 range");
log::debug!("NVME block size: {}", block_size);
NvmeNamespace {
id: nsid,
blocks: size,
block_size,
}
}
}
+541
View File
@@ -0,0 +1,541 @@
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap};
use std::convert::TryFrom;
use std::iter;
use std::sync::atomic::AtomicU16;
use std::sync::Arc;
use parking_lot::{Mutex, ReentrantMutex, RwLock};
use pcid_interface::irq_helpers::InterruptVector;
use common::io::{Io, Mmio};
use common::timeout::Timeout;
use syscall::error::{Error, Result, EIO};
use common::dma::Dma;
pub mod cmd;
pub mod executor;
pub mod identify;
pub mod queues;
use self::executor::NvmeExecutor;
pub use self::queues::{NvmeCmd, NvmeCmdQueue, NvmeComp, NvmeCompQueue};
use pcid_interface::PciFunctionHandle;
#[repr(C, packed)]
pub struct NvmeRegs {
/// Controller Capabilities
cap_low: Mmio<u32>,
cap_high: Mmio<u32>,
/// Version
vs: Mmio<u32>,
/// Interrupt mask set
intms: Mmio<u32>,
/// Interrupt mask clear
intmc: Mmio<u32>,
/// Controller configuration
cc: Mmio<u32>,
/// Reserved
_rsvd: Mmio<u32>,
/// Controller status
csts: Mmio<u32>,
/// NVM subsystem reset
nssr: Mmio<u32>,
/// Admin queue attributes
aqa: Mmio<u32>,
/// Admin submission queue base address
asq_low: Mmio<u32>,
asq_high: Mmio<u32>,
/// Admin completion queue base address
acq_low: Mmio<u32>,
acq_high: Mmio<u32>,
/// Controller memory buffer location
cmbloc: Mmio<u32>,
/// Controller memory buffer size
cmbsz: Mmio<u32>,
}
#[derive(Copy, Clone, Debug)]
pub struct NvmeNamespace {
pub id: u32,
pub blocks: u64,
pub block_size: u64,
}
pub type CqId = u16;
pub type SqId = u16;
pub type CmdId = u16;
pub type AtomicCqId = AtomicU16;
pub type AtomicSqId = AtomicU16;
pub type AtomicCmdId = AtomicU16;
pub type Iv = u16;
pub struct Nvme {
interrupt_vector: Mutex<InterruptVector>,
pcid_interface: Mutex<PciFunctionHandle>,
regs: RwLock<&'static mut NvmeRegs>,
sq_ivs: RwLock<HashMap<SqId, Iv>>,
cq_ivs: RwLock<HashMap<CqId, Iv>>,
// maps interrupt vectors with the completion queues they have
thread_ctxts: RwLock<HashMap<Iv, Arc<ReentrantMutex<ThreadCtxt>>>>,
next_sqid: AtomicSqId,
next_cqid: AtomicCqId,
}
pub struct ThreadCtxt {
buffer: RefCell<Dma<[u8; 512 * 4096]>>, // 2MB of buffer
buffer_prp: RefCell<Dma<[u64; 512]>>, // 4KB of PRP for the buffer
// Yes, technically NVME allows multiple submission queues to be mapped to the same completion
// queue, but we don't use that feature.
queues: RefCell<HashMap<u16, (NvmeCmdQueue, NvmeCompQueue)>>,
}
unsafe impl Send for Nvme {}
unsafe impl Sync for Nvme {}
/// How to handle full submission queues.
pub enum FullSqHandling {
/// Return an error immediately prior to posting the command.
ErrorDirectly,
/// Tell the executor that we want to be notified when a command on the same submission queue
/// has been completed.
Wait,
}
impl Nvme {
pub fn new(
address: usize,
interrupt_vector: InterruptVector,
pcid_interface: PciFunctionHandle,
) -> Result<Self> {
Ok(Nvme {
regs: RwLock::new(unsafe { &mut *(address as *mut NvmeRegs) }),
thread_ctxts: RwLock::new(
iter::once((
0_u16,
Arc::new(ReentrantMutex::new(ThreadCtxt {
buffer: RefCell::new(unsafe { Dma::zeroed()?.assume_init() }),
buffer_prp: RefCell::new(unsafe { Dma::zeroed()?.assume_init() }),
queues: RefCell::new(
iter::once((0, (NvmeCmdQueue::new()?, NvmeCompQueue::new()?)))
.collect(),
),
})),
))
.collect(),
),
cq_ivs: RwLock::new(iter::once((0, 0)).collect()),
sq_ivs: RwLock::new(iter::once((0, 0)).collect()),
interrupt_vector: Mutex::new(interrupt_vector),
pcid_interface: Mutex::new(pcid_interface),
// TODO
next_sqid: AtomicSqId::new(2),
next_cqid: AtomicCqId::new(2),
})
}
/// Write to a doorbell register.
///
/// # Locking
/// Locks `regs`.
unsafe fn doorbell_write(&self, index: usize, value: u32) {
use std::ops::DerefMut;
let mut regs_guard = self.regs.write();
let regs: &mut NvmeRegs = regs_guard.deref_mut();
let dstrd = (regs.cap_high.read() & 0b1111) as usize;
let addr = (regs as *mut NvmeRegs as usize) + 0x1000 + index * (4 << dstrd);
(&mut *(addr as *mut Mmio<u32>)).write(value);
}
fn cur_thread_ctxt(&self) -> Arc<ReentrantMutex<ThreadCtxt>> {
// TODO: multi-threading
Arc::clone(self.thread_ctxts.read().get(&0).unwrap())
}
pub unsafe fn submission_queue_tail(&self, qid: u16, tail: u16) {
self.doorbell_write(2 * (qid as usize), u32::from(tail));
}
pub unsafe fn completion_queue_head(&self, qid: u16, head: u16) {
self.doorbell_write(2 * (qid as usize) + 1, u32::from(head));
}
pub unsafe fn init(&mut self) -> Result<()> {
let thread_ctxts = self.thread_ctxts.get_mut();
{
let regs = self.regs.read();
log::debug!("CAP_LOW: {:X}", regs.cap_low.read());
log::debug!("CAP_HIGH: {:X}", regs.cap_high.read());
log::debug!("VS: {:X}", regs.vs.read());
log::debug!("CC: {:X}", regs.cc.read());
log::debug!("CSTS: {:X}", regs.csts.read());
}
log::debug!("Disabling controller.");
self.regs.get_mut().cc.writef(1, false);
{
log::trace!("Waiting for not ready.");
let timeout = Timeout::from_secs(1);
loop {
let csts = self.regs.get_mut().csts.read();
log::trace!("CSTS: {:X}", csts);
if csts & 1 == 1 {
timeout.run().map_err(|()| {
log::error!("failed to wait for not ready");
Error::new(EIO)
})?;
} else {
break;
}
}
}
if !self.interrupt_vector.get_mut().set_masked_if_fast(false) {
self.regs.get_mut().intms.write(0xFFFF_FFFF);
self.regs.get_mut().intmc.write(0x0000_0001);
}
for (qid, iv) in self.cq_ivs.get_mut().iter_mut() {
let ctxt = thread_ctxts.get(&0).unwrap().lock();
let queues = ctxt.queues.borrow();
let &(ref cq, ref sq) = queues.get(qid).unwrap();
log::debug!(
"iv {iv} [cq {qid}: {:X}, {}] [sq {qid}: {:X}, {}]",
cq.data.physical(),
cq.data.len(),
sq.data.physical(),
sq.data.len()
);
}
{
let main_ctxt = thread_ctxts.get(&0).unwrap().lock();
for (i, prp) in main_ctxt.buffer_prp.borrow_mut().iter_mut().enumerate() {
*prp = (main_ctxt.buffer.borrow_mut().physical() + i * 4096) as u64;
}
let regs = self.regs.get_mut();
let mut queues = main_ctxt.queues.borrow_mut();
let (asq, acq) = queues.get_mut(&0).unwrap();
regs.aqa
.write(((acq.data.len() as u32 - 1) << 16) | (asq.data.len() as u32 - 1));
regs.asq_low.write(asq.data.physical() as u32);
regs.asq_high
.write((asq.data.physical() as u64 >> 32) as u32);
regs.acq_low.write(acq.data.physical() as u32);
regs.acq_high
.write((acq.data.physical() as u64 >> 32) as u32);
// Set IOCQES, IOSQES, AMS, MPS, and CSS
let mut cc = regs.cc.read();
cc &= 0xFF00000F;
cc |= (4 << 20) | (6 << 16);
regs.cc.write(cc);
}
log::debug!("Enabling controller.");
self.regs.get_mut().cc.writef(1, true);
{
log::debug!("Waiting for ready");
let timeout = Timeout::from_secs(1);
loop {
let csts = self.regs.get_mut().csts.read();
log::debug!("CSTS: {:X}", csts);
if csts & 1 == 0 {
timeout.run().map_err(|()| {
log::error!("failed to wait for ready");
Error::new(EIO)
})?;
} else {
break;
}
}
}
Ok(())
}
pub fn set_vector_masked(&self, vector: u16, masked: bool) {
let mut interrupt_vector_guard = (&self).interrupt_vector.lock();
if !interrupt_vector_guard.set_masked_if_fast(masked) {
let mut to_mask = 0x0000_0000;
let mut to_clear = 0x0000_0000;
let vector = vector as u8;
if masked {
assert_ne!(
to_clear & (1 << vector),
(1 << vector),
"nvmed: internal error: cannot both mask and set"
);
to_mask |= 1 << vector;
} else {
assert_ne!(
to_mask & (1 << vector),
(1 << vector),
"nvmed: internal error: cannot both mask and set"
);
to_clear |= 1 << vector;
}
if to_mask != 0 {
(&self).regs.write().intms.write(to_mask);
}
if to_clear != 0 {
(&self).regs.write().intmc.write(to_clear);
}
}
}
pub async fn submit_and_complete_command(
&self,
sq_id: SqId,
cmd_init: impl FnOnce(CmdId) -> NvmeCmd,
) -> NvmeComp {
NvmeExecutor::current().submit(sq_id, cmd_init(0)).await
}
pub async fn submit_and_complete_admin_command(
&self,
cmd_init: impl FnOnce(CmdId) -> NvmeCmd,
) -> NvmeComp {
self.submit_and_complete_command(0, cmd_init).await
}
pub fn try_submit_raw(
&self,
ctxt: &ThreadCtxt,
sq_id: SqId,
cmd_init: impl FnOnce(CmdId) -> NvmeCmd,
fail: impl FnOnce(),
) -> Option<(CqId, CmdId)> {
match ctxt.queues.borrow_mut().get_mut(&sq_id).unwrap() {
(sq, _cq) => {
if sq.is_full() {
fail();
return None;
}
let cmd_id = sq.tail;
let tail = sq.submit_unchecked(cmd_init(cmd_id));
// TODO: Submit in bulk
unsafe {
self.submission_queue_tail(sq_id, tail);
}
Some((sq_id, cmd_id))
}
}
}
pub async fn create_io_completion_queue(
&self,
io_cq_id: CqId,
vector: Option<Iv>,
) -> NvmeCompQueue {
let queue = NvmeCompQueue::new().expect("nvmed: failed to allocate I/O completion queue");
let len = u16::try_from(queue.data.len())
.expect("nvmed: internal error: I/O CQ longer than 2^16 entries");
let raw_len = len
.checked_sub(1)
.expect("nvmed: internal error: CQID 0 for I/O CQ");
let comp = self
.submit_and_complete_admin_command(|cid| {
NvmeCmd::create_io_completion_queue(
cid,
io_cq_id,
queue.data.physical(),
raw_len,
vector,
)
})
.await;
/*match comp.status.specific {
1 => panic!("invalid queue identifier"),
2 => panic!("invalid queue size"),
8 => panic!("invalid interrupt vector"),
_ => (),
}*/
queue
}
pub async fn create_io_submission_queue(&self, io_sq_id: SqId, io_cq_id: CqId) -> NvmeCmdQueue {
let q = NvmeCmdQueue::new().expect("failed to create submission queue");
let len = u16::try_from(q.data.len())
.expect("nvmed: internal error: I/O SQ longer than 2^16 entries");
let raw_len = len
.checked_sub(1)
.expect("nvmed: internal error: SQID 0 for I/O SQ");
let comp = self
.submit_and_complete_admin_command(|cid| {
NvmeCmd::create_io_submission_queue(
cid,
io_sq_id,
q.data.physical(),
raw_len,
io_cq_id,
)
})
.await;
/*match comp.status.specific {
0 => panic!("completion queue invalid"),
1 => panic!("invalid queue identifier"),
2 => panic!("invalid queue size"),
_ => (),
}*/
q
}
pub async fn init_with_queues(&self) -> BTreeMap<u32, NvmeNamespace> {
log::trace!("preinit");
self.identify_controller().await;
let nsids = self.identify_namespace_list(0).await;
log::debug!("first commands");
let mut namespaces = BTreeMap::new();
for nsid in nsids.iter().copied() {
namespaces.insert(nsid, self.identify_namespace(nsid).await);
}
// TODO: Multiple queues
let cq = self.create_io_completion_queue(1, Some(0)).await;
log::trace!("created compq");
let sq = self.create_io_submission_queue(1, 1).await;
log::trace!("created subq");
self.thread_ctxts
.read()
.get(&0)
.unwrap()
.lock()
.queues
.borrow_mut()
.insert(1, (sq, cq));
self.sq_ivs.write().insert(1, 0);
self.cq_ivs.write().insert(1, 0);
namespaces
}
async fn namespace_rw(
&self,
ctxt: &ThreadCtxt,
namespace: &NvmeNamespace,
lba: u64,
blocks_1: u16,
write: bool,
) -> Result<()> {
let block_size = namespace.block_size;
let prp = ctxt.buffer_prp.borrow_mut();
let bytes = ((blocks_1 as u64) + 1) * block_size;
let (ptr0, ptr1) = if bytes <= 4096 {
(prp[0], 0)
} else if bytes <= 8192 {
(prp[0], prp[1])
} else {
(prp[0], (prp.physical() + 8) as u64)
};
let mut cmd = NvmeCmd::default();
let comp = self
.submit_and_complete_command(1, |cid| {
cmd = if write {
NvmeCmd::io_write(cid, namespace.id, lba, blocks_1, ptr0, ptr1)
} else {
NvmeCmd::io_read(cid, namespace.id, lba, blocks_1, ptr0, ptr1)
};
cmd.clone()
})
.await;
let status = comp.status >> 1;
if status == 0 {
Ok(())
} else {
log::error!("command {:#x?} failed with status {:#x}", cmd, status);
Err(Error::new(EIO))
}
}
pub async fn namespace_read(
&self,
namespace: &NvmeNamespace,
mut lba: u64,
buf: &mut [u8],
) -> Result<usize> {
let ctxt = self.cur_thread_ctxt();
let ctxt = ctxt.lock();
let block_size = namespace.block_size as usize;
for chunk in buf.chunks_mut(/* TODO: buf len */ 8192) {
let blocks = (chunk.len() + block_size - 1) / block_size;
assert!(blocks > 0);
assert!(blocks <= 0x1_0000);
self.namespace_rw(&*ctxt, namespace, lba, (blocks - 1) as u16, false)
.await?;
chunk.copy_from_slice(&ctxt.buffer.borrow()[..chunk.len()]);
lba += blocks as u64;
}
Ok(buf.len())
}
pub async fn namespace_write(
&self,
namespace: &NvmeNamespace,
mut lba: u64,
buf: &[u8],
) -> Result<usize> {
let ctxt = self.cur_thread_ctxt();
let ctxt = ctxt.lock();
let block_size = namespace.block_size as usize;
for chunk in buf.chunks(/* TODO: buf len */ 8192) {
let blocks = (chunk.len() + block_size - 1) / block_size;
assert!(blocks > 0);
assert!(blocks <= 0x1_0000);
ctxt.buffer.borrow_mut()[..chunk.len()].copy_from_slice(chunk);
self.namespace_rw(&*ctxt, namespace, lba, (blocks - 1) as u16, true)
.await?;
lba += blocks as u64;
}
Ok(buf.len())
}
}
+151
View File
@@ -0,0 +1,151 @@
use std::cell::UnsafeCell;
use std::ptr;
use syscall::Result;
use common::dma::Dma;
/// A submission queue entry.
#[derive(Clone, Copy, Debug, Default)]
#[repr(C, packed)]
pub struct NvmeCmd {
/// Opcode
pub opcode: u8,
/// Flags
pub flags: u8,
/// Command ID
pub cid: u16,
/// Namespace identifier
pub nsid: u32,
/// Reserved
pub _rsvd: u64,
/// Metadata pointer
pub mptr: u64,
/// Data pointer
pub dptr: [u64; 2],
/// Command dword 10
pub cdw10: u32,
/// Command dword 11
pub cdw11: u32,
/// Command dword 12
pub cdw12: u32,
/// Command dword 13
pub cdw13: u32,
/// Command dword 14
pub cdw14: u32,
/// Command dword 15
pub cdw15: u32,
}
/// A completion queue entry.
#[derive(Clone, Copy, Debug)]
#[repr(C, packed)]
pub struct NvmeComp {
pub command_specific: u32,
pub _rsvd: u32,
pub sq_head: u16,
pub sq_id: u16,
pub cid: u16,
pub status: u16,
}
/// Completion queue
pub struct NvmeCompQueue {
pub data: Dma<[UnsafeCell<NvmeComp>]>,
pub head: u16,
pub phase: bool,
}
impl NvmeCompQueue {
pub fn new() -> Result<Self> {
Ok(Self {
data: unsafe { Dma::zeroed_slice(256)?.assume_init() },
head: 0,
phase: true,
})
}
/// Get a new completion queue entry, or return None if no entry is available yet.
pub(crate) fn complete(&mut self) -> Option<(u16, NvmeComp)> {
let entry = unsafe { ptr::read_volatile(self.data[usize::from(self.head)].get()) };
if ((entry.status & 1) == 1) == self.phase {
self.head = (self.head + 1) % (self.data.len() as u16);
if self.head == 0 {
self.phase = !self.phase;
}
Some((self.head, entry))
} else {
None
}
}
/// Get a new CQ entry, busy waiting until an entry appears.
pub fn complete_spin(&mut self) -> (u16, NvmeComp) {
log::debug!("Waiting for new CQ entry");
loop {
if let Some(some) = self.complete() {
return some;
} else {
unsafe {
std::hint::spin_loop();
}
}
}
}
}
/// Submission queue
pub struct NvmeCmdQueue {
pub data: Dma<[UnsafeCell<NvmeCmd>]>,
pub tail: u16,
pub head: u16,
}
impl NvmeCmdQueue {
pub fn new() -> Result<Self> {
Ok(Self {
data: unsafe { Dma::zeroed_slice(64)?.assume_init() },
tail: 0,
head: 0,
})
}
pub fn is_empty(&self) -> bool {
self.head == self.tail
}
pub fn is_full(&self) -> bool {
self.head == self.tail + 1
}
/// Add a new submission command entry to the queue. The caller must ensure that the queue have free
/// entries; this can be checked using `is_full`.
pub fn submit_unchecked(&mut self, entry: NvmeCmd) -> u16 {
unsafe { ptr::write_volatile(self.data[usize::from(self.tail)].get(), entry) }
self.tail = (self.tail + 1) % (self.data.len() as u16);
self.tail
}
}
#[derive(Debug)]
pub enum Status {
GenericCmdStatus(u8),
CommandSpecificStatus(u8),
IntegrityError(u8),
PathRelatedStatus(u8),
Rsvd(u8),
Vendor(u8),
}
impl Status {
pub fn parse(raw: u16) -> Self {
let code = (raw >> 1) as u8;
match (raw >> 9) & 0b111 {
0 => Self::GenericCmdStatus(code),
1 => Self::CommandSpecificStatus(code),
2 => Self::IntegrityError(code),
3 => Self::PathRelatedStatus(code),
4..=6 => Self::Rsvd(code),
7 => Self::Vendor(code),
_ => unreachable!(),
}
}
}