fix: harden driver-manager lifecycle

This commit is contained in:
2026-05-09 01:32:57 +01:00
parent 2f11cc39e2
commit 84416f679e
4 changed files with 209 additions and 64 deletions
@@ -1,8 +1,8 @@
use std::collections::HashMap;
use std::fs::{self, File, OpenOptions};
use std::fs;
use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
use std::path::Path;
use std::process::Command;
use std::process::{Child, Command};
use std::string::String;
use std::sync::Mutex;
use std::vec::Vec;
@@ -17,8 +17,8 @@ use serde::Deserialize;
#[derive(Debug)]
struct SpawnedDriver {
pid: u32,
bind_handle: File,
child: Child,
channel_fd: OwnedFd,
}
#[derive(Debug)]
@@ -95,6 +95,14 @@ impl DriverConfig {
let matches: Vec<DriverMatch> =
driver.r#match.into_iter().map(DriverMatch::from).collect();
if matches.is_empty() {
log::warn!(
"driver-manager: config {} driver={} has no PCI match entries and will not bind from PCI enumeration",
path.display(),
driver.name
);
}
configs.push(DriverConfig {
name: driver.name,
description: driver.description,
@@ -120,27 +128,13 @@ fn pci_device_path(info: &DeviceInfo) -> String {
}
}
fn claim_pci_device(info: &DeviceInfo) -> Result<(String, File), ProbeResult> {
let device_path = pci_device_path(info);
let bind_path = format!("{}/bind", device_path);
match OpenOptions::new().read(true).write(true).open(&bind_path) {
Ok(bind_handle) => Ok((device_path, bind_handle)),
Err(err) => match err.raw_os_error() {
Some(code) if code == syscall::EALREADY as i32 || code == 114 => {
log::debug!("device {} already claimed via {}", info.id.path, bind_path);
Err(ProbeResult::NotSupported)
}
_ => Err(ProbeResult::Deferred {
reason: format!("bind {} failed: {}", bind_path, err),
}),
},
}
}
fn open_pcid_channel(device_path: &str) -> Result<OwnedFd, ProbeResult> {
let mut handle = match PciFunctionHandle::connect_by_path(Path::new(device_path)) {
Ok(handle) => handle,
Err(err) if err.raw_os_error() == Some(syscall::ENOLCK) => {
log::debug!("device {} already has an active pcid client", device_path);
return Err(ProbeResult::NotSupported);
}
Err(err) => {
return Err(ProbeResult::Deferred {
reason: format!("open channel for {} failed: {}", device_path, err),
@@ -148,6 +142,10 @@ fn open_pcid_channel(device_path: &str) -> Result<OwnedFd, ProbeResult> {
}
};
log::info!(
"driver-manager: enabling PCI device channel {}",
device_path
);
handle.enable_device();
let channel_fd = handle.into_inner_fd();
@@ -183,7 +181,14 @@ impl Driver for DriverConfig {
let device_key = info.id.path.clone();
{
let spawned = self.spawned.lock().unwrap();
let spawned = match self.spawned.lock() {
Ok(spawned) => spawned,
Err(err) => {
return ProbeResult::Fatal {
reason: format!("spawn state lock poisoned: {err}"),
};
}
};
if spawned.contains_key(&device_key) {
log::debug!("driver {} already bound to {}", self.name, device_key);
return ProbeResult::Bound;
@@ -223,10 +228,7 @@ impl Driver for DriverConfig {
log::info!("probing {} with driver {}", device_key, self.name);
let (device_path, bind_handle) = match claim_pci_device(info) {
Ok(claimed) => claimed,
Err(result) => return result,
};
let device_path = pci_device_path(info);
let channel_fd = match open_pcid_channel(&device_path) {
Ok(channel_fd) => channel_fd,
@@ -250,8 +252,15 @@ impl Driver for DriverConfig {
pid,
device_key
);
let mut spawned = self.spawned.lock().unwrap();
spawned.insert(device_key, SpawnedDriver { pid, bind_handle });
let mut spawned = match self.spawned.lock() {
Ok(spawned) => spawned,
Err(err) => {
return ProbeResult::Fatal {
reason: format!("spawn state lock poisoned after spawn: {err}"),
};
}
};
spawned.insert(device_key, SpawnedDriver { child, channel_fd });
ProbeResult::Bound
}
Err(e) => ProbeResult::Fatal {
@@ -262,20 +271,47 @@ impl Driver for DriverConfig {
fn remove(&self, info: &DeviceInfo) -> Result<(), DriverError> {
let device_key = info.id.path.clone();
let binding = {
let mut spawned = self.spawned.lock().unwrap();
let mut binding = {
let mut spawned = self.spawned.lock().map_err(|err| {
log::error!(
"failed to unbind driver {} from {}: spawn state lock poisoned: {}",
self.name,
device_key,
err
);
DriverError::Other("spawn state lock poisoned")
})?;
spawned.remove(&device_key)
};
match binding {
match binding.as_mut() {
Some(binding) => {
let bind_fd = binding.bind_handle.as_raw_fd();
let pid = binding.child.id();
let channel_fd = binding.channel_fd.as_raw_fd();
if let Err(err) = binding.child.kill() {
log::warn!(
"failed to terminate driver {} pid {} for device {}: {}",
self.name,
pid,
device_key,
err
);
}
if let Err(err) = binding.child.wait() {
log::warn!(
"failed to reap driver {} pid {} for device {}: {}",
self.name,
pid,
device_key,
err
);
}
log::info!(
"unbound: device {} from driver {} (pid {}, bind fd {})",
"unbound: device {} from driver {} (pid {}, pcid channel fd {})",
device_key,
self.name,
binding.pid,
bind_fd
pid,
channel_fd
);
Ok(())
}
@@ -49,7 +49,6 @@ pub fn run_hotplug_loop(
driver_name,
} => {
track_pci_device(device, &mut seen_pci_devices);
notify_bound_device(scheme.as_ref(), device, driver_name);
log::debug!("hotplug: already bound {} -> {}", device.path, driver_name);
}
ProbeEvent::ProbeCompleted {
@@ -93,8 +92,40 @@ pub fn run_hotplug_loop(
if pci_enumerated {
for pci_addr in scheme.bound_device_addresses() {
if !seen_pci_devices.contains(&pci_addr) {
log::info!("hotplug: removed {}", pci_addr);
notify_unbind(scheme.as_ref(), &pci_addr);
let device_id = DeviceId {
bus: String::from("pci"),
path: pci_addr.clone(),
};
match manager.lock() {
Ok(mut mgr) => match mgr.remove_device(&device_id) {
Ok(Some(driver_name)) => {
log::info!("hotplug: removed {} from {}", pci_addr, driver_name);
notify_unbind(scheme.as_ref(), &pci_addr);
}
Ok(core::option::Option::None) => {
log::warn!(
"hotplug: {} disappeared but had no manager binding",
pci_addr
);
notify_unbind(scheme.as_ref(), &pci_addr);
}
Err(err) => {
log::error!(
"hotplug: failed to detach removed device {}: {:?}",
pci_addr,
err
);
}
},
Err(err) => {
log::error!(
"hotplug: failed to detach removed device {}: manager lock poisoned: {}",
pci_addr,
err
);
break;
}
}
}
}
}
@@ -58,36 +58,55 @@ fn run_enumeration(
device,
driver_name,
result,
} => {
match result {
ProbeResult::Bound => {
log::info!("bound: {} -> {}", device.path, driver_name);
notify_bound_device(scheme, device, driver_name);
bound += 1;
}
ProbeResult::Deferred { reason } => {
log::info!("deferred: {} -> {} ({})", device.path, driver_name, reason);
deferred += 1;
}
ProbeResult::Fatal { reason } => {
log::error!("fatal: {} -> {} ({})", device.path, driver_name, reason);
}
_ => {}
} => match result {
ProbeResult::Bound => {
log::info!("bound: {} -> {}", device.path, driver_name);
notify_bound_device(scheme, device, driver_name);
bound += 1;
}
}
ProbeResult::Deferred { reason } => {
log::info!("deferred: {} -> {} ({})", device.path, driver_name, reason);
deferred += 1;
}
ProbeResult::Fatal { reason } => {
log::error!("fatal: {} -> {} ({})", device.path, driver_name, reason);
}
_ => {}
},
ProbeEvent::BusEnumerated { bus, device_count } => {
log::info!("bus {} enumerated {} device(s)", bus, device_count);
}
ProbeEvent::BusEnumerationFailed { bus, error } => {
log::error!("bus {} enumeration failed: {:?}", bus, error);
}
_ => {}
ProbeEvent::AlreadyBound {
device,
driver_name,
} => {
log::debug!("already bound: {} -> {}", device.path, driver_name);
}
ProbeEvent::NoDriverFound { device } => {
log::info!("no driver found for {} device {}", device.bus, device.path);
}
ProbeEvent::MissingDriver {
device,
driver_name,
} => {
log::error!(
"deferred probe lost driver: {} device {} wanted {}",
device.bus,
device.path,
driver_name
);
}
}
}
log::info!(
"enumeration complete: {} bound, {} deferred ({}ms total)",
bound, deferred, enum_duration.as_millis()
bound,
deferred,
enum_duration.as_millis()
);
(bound, deferred)
@@ -105,6 +124,22 @@ fn reset_timeline_log() {
}
}
fn json_escape(value: &str) -> String {
let mut escaped = String::with_capacity(value.len());
for ch in value.chars() {
match ch {
'\\' => escaped.push_str("\\\\"),
'"' => escaped.push_str("\\\""),
'\n' => escaped.push_str("\\n"),
'\r' => escaped.push_str("\\r"),
'\t' => escaped.push_str("\\t"),
ch if ch.is_control() => escaped.push_str(&format!("\\u{:04x}", ch as u32)),
ch => escaped.push(ch),
}
}
escaped
}
fn log_timeline(event: &ProbeEvent) {
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
@@ -114,7 +149,15 @@ fn log_timeline(event: &ProbeEvent) {
let entry = match event {
ProbeEvent::BusEnumerated { bus, device_count } => format!(
r#"{{"ts":{},"event":"bus_enumerated","bus":"{}","count":{}}}"#,
timestamp, bus, device_count
timestamp,
json_escape(bus),
device_count
),
ProbeEvent::BusEnumerationFailed { bus, error } => format!(
r#"{{"ts":{},"event":"bus_enumeration_failed","bus":"{}","error":"{}"}}"#,
timestamp,
json_escape(bus),
json_escape(&format!("{error:?}"))
),
ProbeEvent::ProbeCompleted {
device,
@@ -129,10 +172,38 @@ fn log_timeline(event: &ProbeEvent) {
};
format!(
r#"{{"ts":{},"event":"probe","device":"{}","driver":"{}","status":"{}"}}"#,
timestamp, device.path, driver_name, status
timestamp,
json_escape(&device.path),
json_escape(driver_name),
status
)
}
_ => return,
ProbeEvent::NoDriverFound { device } => format!(
r#"{{"ts":{},"event":"no_driver","bus":"{}","device":"{}"}}"#,
timestamp,
json_escape(&device.bus),
json_escape(&device.path)
),
ProbeEvent::AlreadyBound {
device,
driver_name,
} => format!(
r#"{{"ts":{},"event":"already_bound","bus":"{}","device":"{}","driver":"{}"}}"#,
timestamp,
json_escape(&device.bus),
json_escape(&device.path),
json_escape(driver_name)
),
ProbeEvent::MissingDriver {
device,
driver_name,
} => format!(
r#"{{"ts":{},"event":"missing_driver","bus":"{}","device":"{}","driver":"{}"}}"#,
timestamp,
json_escape(&device.bus),
json_escape(&device.path),
json_escape(driver_name)
),
};
match OpenOptions::new()
@@ -230,7 +301,7 @@ fn main() {
if hotplug_mode {
log::info!("entering hotplug event loop");
hotplug::run_hotplug_loop(manager.clone(), scheme.clone(), 2000);
return;
idle_forever();
}
let max_retries = 30u32;
@@ -269,7 +340,7 @@ fn main() {
if remaining == 0 {
log::info!("all deferred resolved after {} retries", retry);
return;
idle_forever();
}
if newly_bound > 0 {
@@ -283,5 +354,12 @@ fn main() {
}
log::warn!("deferred probe retry limit reached");
process::exit(0);
idle_forever();
}
fn idle_forever() -> ! {
log::info!("driver-manager: entering persistent idle loop");
loop {
thread::sleep(Duration::from_secs(3600));
}
}
@@ -183,7 +183,7 @@ impl DriverManagerScheme {
#[cfg(target_os = "redox")]
fn read_handle_string(&self, kind: &HandleKind) -> Result<String> {
match kind {
HandleKind::Root => Ok("devices\nevents\n".to_string()),
HandleKind::Root => Ok("devices\nbound\nevents\n".to_string()),
HandleKind::Devices => {
let addresses = self.sorted_bound_addresses().map_err(|err| {
log::error!("driver-manager: failed to read bound device list: {err}");