fix: harden driver-manager lifecycle
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::{self, File, OpenOptions};
|
||||
use std::fs;
|
||||
use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::process::{Child, Command};
|
||||
use std::string::String;
|
||||
use std::sync::Mutex;
|
||||
use std::vec::Vec;
|
||||
@@ -17,8 +17,8 @@ use serde::Deserialize;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct SpawnedDriver {
|
||||
pid: u32,
|
||||
bind_handle: File,
|
||||
child: Child,
|
||||
channel_fd: OwnedFd,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -95,6 +95,14 @@ impl DriverConfig {
|
||||
let matches: Vec<DriverMatch> =
|
||||
driver.r#match.into_iter().map(DriverMatch::from).collect();
|
||||
|
||||
if matches.is_empty() {
|
||||
log::warn!(
|
||||
"driver-manager: config {} driver={} has no PCI match entries and will not bind from PCI enumeration",
|
||||
path.display(),
|
||||
driver.name
|
||||
);
|
||||
}
|
||||
|
||||
configs.push(DriverConfig {
|
||||
name: driver.name,
|
||||
description: driver.description,
|
||||
@@ -120,27 +128,13 @@ fn pci_device_path(info: &DeviceInfo) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn claim_pci_device(info: &DeviceInfo) -> Result<(String, File), ProbeResult> {
|
||||
let device_path = pci_device_path(info);
|
||||
let bind_path = format!("{}/bind", device_path);
|
||||
|
||||
match OpenOptions::new().read(true).write(true).open(&bind_path) {
|
||||
Ok(bind_handle) => Ok((device_path, bind_handle)),
|
||||
Err(err) => match err.raw_os_error() {
|
||||
Some(code) if code == syscall::EALREADY as i32 || code == 114 => {
|
||||
log::debug!("device {} already claimed via {}", info.id.path, bind_path);
|
||||
Err(ProbeResult::NotSupported)
|
||||
}
|
||||
_ => Err(ProbeResult::Deferred {
|
||||
reason: format!("bind {} failed: {}", bind_path, err),
|
||||
}),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn open_pcid_channel(device_path: &str) -> Result<OwnedFd, ProbeResult> {
|
||||
let mut handle = match PciFunctionHandle::connect_by_path(Path::new(device_path)) {
|
||||
Ok(handle) => handle,
|
||||
Err(err) if err.raw_os_error() == Some(syscall::ENOLCK) => {
|
||||
log::debug!("device {} already has an active pcid client", device_path);
|
||||
return Err(ProbeResult::NotSupported);
|
||||
}
|
||||
Err(err) => {
|
||||
return Err(ProbeResult::Deferred {
|
||||
reason: format!("open channel for {} failed: {}", device_path, err),
|
||||
@@ -148,6 +142,10 @@ fn open_pcid_channel(device_path: &str) -> Result<OwnedFd, ProbeResult> {
|
||||
}
|
||||
};
|
||||
|
||||
log::info!(
|
||||
"driver-manager: enabling PCI device channel {}",
|
||||
device_path
|
||||
);
|
||||
handle.enable_device();
|
||||
|
||||
let channel_fd = handle.into_inner_fd();
|
||||
@@ -183,7 +181,14 @@ impl Driver for DriverConfig {
|
||||
let device_key = info.id.path.clone();
|
||||
|
||||
{
|
||||
let spawned = self.spawned.lock().unwrap();
|
||||
let spawned = match self.spawned.lock() {
|
||||
Ok(spawned) => spawned,
|
||||
Err(err) => {
|
||||
return ProbeResult::Fatal {
|
||||
reason: format!("spawn state lock poisoned: {err}"),
|
||||
};
|
||||
}
|
||||
};
|
||||
if spawned.contains_key(&device_key) {
|
||||
log::debug!("driver {} already bound to {}", self.name, device_key);
|
||||
return ProbeResult::Bound;
|
||||
@@ -223,10 +228,7 @@ impl Driver for DriverConfig {
|
||||
|
||||
log::info!("probing {} with driver {}", device_key, self.name);
|
||||
|
||||
let (device_path, bind_handle) = match claim_pci_device(info) {
|
||||
Ok(claimed) => claimed,
|
||||
Err(result) => return result,
|
||||
};
|
||||
let device_path = pci_device_path(info);
|
||||
|
||||
let channel_fd = match open_pcid_channel(&device_path) {
|
||||
Ok(channel_fd) => channel_fd,
|
||||
@@ -250,8 +252,15 @@ impl Driver for DriverConfig {
|
||||
pid,
|
||||
device_key
|
||||
);
|
||||
let mut spawned = self.spawned.lock().unwrap();
|
||||
spawned.insert(device_key, SpawnedDriver { pid, bind_handle });
|
||||
let mut spawned = match self.spawned.lock() {
|
||||
Ok(spawned) => spawned,
|
||||
Err(err) => {
|
||||
return ProbeResult::Fatal {
|
||||
reason: format!("spawn state lock poisoned after spawn: {err}"),
|
||||
};
|
||||
}
|
||||
};
|
||||
spawned.insert(device_key, SpawnedDriver { child, channel_fd });
|
||||
ProbeResult::Bound
|
||||
}
|
||||
Err(e) => ProbeResult::Fatal {
|
||||
@@ -262,20 +271,47 @@ impl Driver for DriverConfig {
|
||||
|
||||
fn remove(&self, info: &DeviceInfo) -> Result<(), DriverError> {
|
||||
let device_key = info.id.path.clone();
|
||||
let binding = {
|
||||
let mut spawned = self.spawned.lock().unwrap();
|
||||
let mut binding = {
|
||||
let mut spawned = self.spawned.lock().map_err(|err| {
|
||||
log::error!(
|
||||
"failed to unbind driver {} from {}: spawn state lock poisoned: {}",
|
||||
self.name,
|
||||
device_key,
|
||||
err
|
||||
);
|
||||
DriverError::Other("spawn state lock poisoned")
|
||||
})?;
|
||||
spawned.remove(&device_key)
|
||||
};
|
||||
|
||||
match binding {
|
||||
match binding.as_mut() {
|
||||
Some(binding) => {
|
||||
let bind_fd = binding.bind_handle.as_raw_fd();
|
||||
let pid = binding.child.id();
|
||||
let channel_fd = binding.channel_fd.as_raw_fd();
|
||||
if let Err(err) = binding.child.kill() {
|
||||
log::warn!(
|
||||
"failed to terminate driver {} pid {} for device {}: {}",
|
||||
self.name,
|
||||
pid,
|
||||
device_key,
|
||||
err
|
||||
);
|
||||
}
|
||||
if let Err(err) = binding.child.wait() {
|
||||
log::warn!(
|
||||
"failed to reap driver {} pid {} for device {}: {}",
|
||||
self.name,
|
||||
pid,
|
||||
device_key,
|
||||
err
|
||||
);
|
||||
}
|
||||
log::info!(
|
||||
"unbound: device {} from driver {} (pid {}, bind fd {})",
|
||||
"unbound: device {} from driver {} (pid {}, pcid channel fd {})",
|
||||
device_key,
|
||||
self.name,
|
||||
binding.pid,
|
||||
bind_fd
|
||||
pid,
|
||||
channel_fd
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -49,7 +49,6 @@ pub fn run_hotplug_loop(
|
||||
driver_name,
|
||||
} => {
|
||||
track_pci_device(device, &mut seen_pci_devices);
|
||||
notify_bound_device(scheme.as_ref(), device, driver_name);
|
||||
log::debug!("hotplug: already bound {} -> {}", device.path, driver_name);
|
||||
}
|
||||
ProbeEvent::ProbeCompleted {
|
||||
@@ -93,8 +92,40 @@ pub fn run_hotplug_loop(
|
||||
if pci_enumerated {
|
||||
for pci_addr in scheme.bound_device_addresses() {
|
||||
if !seen_pci_devices.contains(&pci_addr) {
|
||||
log::info!("hotplug: removed {}", pci_addr);
|
||||
notify_unbind(scheme.as_ref(), &pci_addr);
|
||||
let device_id = DeviceId {
|
||||
bus: String::from("pci"),
|
||||
path: pci_addr.clone(),
|
||||
};
|
||||
match manager.lock() {
|
||||
Ok(mut mgr) => match mgr.remove_device(&device_id) {
|
||||
Ok(Some(driver_name)) => {
|
||||
log::info!("hotplug: removed {} from {}", pci_addr, driver_name);
|
||||
notify_unbind(scheme.as_ref(), &pci_addr);
|
||||
}
|
||||
Ok(core::option::Option::None) => {
|
||||
log::warn!(
|
||||
"hotplug: {} disappeared but had no manager binding",
|
||||
pci_addr
|
||||
);
|
||||
notify_unbind(scheme.as_ref(), &pci_addr);
|
||||
}
|
||||
Err(err) => {
|
||||
log::error!(
|
||||
"hotplug: failed to detach removed device {}: {:?}",
|
||||
pci_addr,
|
||||
err
|
||||
);
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
log::error!(
|
||||
"hotplug: failed to detach removed device {}: manager lock poisoned: {}",
|
||||
pci_addr,
|
||||
err
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,36 +58,55 @@ fn run_enumeration(
|
||||
device,
|
||||
driver_name,
|
||||
result,
|
||||
} => {
|
||||
match result {
|
||||
ProbeResult::Bound => {
|
||||
log::info!("bound: {} -> {}", device.path, driver_name);
|
||||
notify_bound_device(scheme, device, driver_name);
|
||||
bound += 1;
|
||||
}
|
||||
ProbeResult::Deferred { reason } => {
|
||||
log::info!("deferred: {} -> {} ({})", device.path, driver_name, reason);
|
||||
deferred += 1;
|
||||
}
|
||||
ProbeResult::Fatal { reason } => {
|
||||
log::error!("fatal: {} -> {} ({})", device.path, driver_name, reason);
|
||||
}
|
||||
_ => {}
|
||||
} => match result {
|
||||
ProbeResult::Bound => {
|
||||
log::info!("bound: {} -> {}", device.path, driver_name);
|
||||
notify_bound_device(scheme, device, driver_name);
|
||||
bound += 1;
|
||||
}
|
||||
}
|
||||
ProbeResult::Deferred { reason } => {
|
||||
log::info!("deferred: {} -> {} ({})", device.path, driver_name, reason);
|
||||
deferred += 1;
|
||||
}
|
||||
ProbeResult::Fatal { reason } => {
|
||||
log::error!("fatal: {} -> {} ({})", device.path, driver_name, reason);
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
ProbeEvent::BusEnumerated { bus, device_count } => {
|
||||
log::info!("bus {} enumerated {} device(s)", bus, device_count);
|
||||
}
|
||||
ProbeEvent::BusEnumerationFailed { bus, error } => {
|
||||
log::error!("bus {} enumeration failed: {:?}", bus, error);
|
||||
}
|
||||
_ => {}
|
||||
ProbeEvent::AlreadyBound {
|
||||
device,
|
||||
driver_name,
|
||||
} => {
|
||||
log::debug!("already bound: {} -> {}", device.path, driver_name);
|
||||
}
|
||||
ProbeEvent::NoDriverFound { device } => {
|
||||
log::info!("no driver found for {} device {}", device.bus, device.path);
|
||||
}
|
||||
ProbeEvent::MissingDriver {
|
||||
device,
|
||||
driver_name,
|
||||
} => {
|
||||
log::error!(
|
||||
"deferred probe lost driver: {} device {} wanted {}",
|
||||
device.bus,
|
||||
device.path,
|
||||
driver_name
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log::info!(
|
||||
"enumeration complete: {} bound, {} deferred ({}ms total)",
|
||||
bound, deferred, enum_duration.as_millis()
|
||||
bound,
|
||||
deferred,
|
||||
enum_duration.as_millis()
|
||||
);
|
||||
|
||||
(bound, deferred)
|
||||
@@ -105,6 +124,22 @@ fn reset_timeline_log() {
|
||||
}
|
||||
}
|
||||
|
||||
fn json_escape(value: &str) -> String {
|
||||
let mut escaped = String::with_capacity(value.len());
|
||||
for ch in value.chars() {
|
||||
match ch {
|
||||
'\\' => escaped.push_str("\\\\"),
|
||||
'"' => escaped.push_str("\\\""),
|
||||
'\n' => escaped.push_str("\\n"),
|
||||
'\r' => escaped.push_str("\\r"),
|
||||
'\t' => escaped.push_str("\\t"),
|
||||
ch if ch.is_control() => escaped.push_str(&format!("\\u{:04x}", ch as u32)),
|
||||
ch => escaped.push(ch),
|
||||
}
|
||||
}
|
||||
escaped
|
||||
}
|
||||
|
||||
fn log_timeline(event: &ProbeEvent) {
|
||||
let timestamp = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
@@ -114,7 +149,15 @@ fn log_timeline(event: &ProbeEvent) {
|
||||
let entry = match event {
|
||||
ProbeEvent::BusEnumerated { bus, device_count } => format!(
|
||||
r#"{{"ts":{},"event":"bus_enumerated","bus":"{}","count":{}}}"#,
|
||||
timestamp, bus, device_count
|
||||
timestamp,
|
||||
json_escape(bus),
|
||||
device_count
|
||||
),
|
||||
ProbeEvent::BusEnumerationFailed { bus, error } => format!(
|
||||
r#"{{"ts":{},"event":"bus_enumeration_failed","bus":"{}","error":"{}"}}"#,
|
||||
timestamp,
|
||||
json_escape(bus),
|
||||
json_escape(&format!("{error:?}"))
|
||||
),
|
||||
ProbeEvent::ProbeCompleted {
|
||||
device,
|
||||
@@ -129,10 +172,38 @@ fn log_timeline(event: &ProbeEvent) {
|
||||
};
|
||||
format!(
|
||||
r#"{{"ts":{},"event":"probe","device":"{}","driver":"{}","status":"{}"}}"#,
|
||||
timestamp, device.path, driver_name, status
|
||||
timestamp,
|
||||
json_escape(&device.path),
|
||||
json_escape(driver_name),
|
||||
status
|
||||
)
|
||||
}
|
||||
_ => return,
|
||||
ProbeEvent::NoDriverFound { device } => format!(
|
||||
r#"{{"ts":{},"event":"no_driver","bus":"{}","device":"{}"}}"#,
|
||||
timestamp,
|
||||
json_escape(&device.bus),
|
||||
json_escape(&device.path)
|
||||
),
|
||||
ProbeEvent::AlreadyBound {
|
||||
device,
|
||||
driver_name,
|
||||
} => format!(
|
||||
r#"{{"ts":{},"event":"already_bound","bus":"{}","device":"{}","driver":"{}"}}"#,
|
||||
timestamp,
|
||||
json_escape(&device.bus),
|
||||
json_escape(&device.path),
|
||||
json_escape(driver_name)
|
||||
),
|
||||
ProbeEvent::MissingDriver {
|
||||
device,
|
||||
driver_name,
|
||||
} => format!(
|
||||
r#"{{"ts":{},"event":"missing_driver","bus":"{}","device":"{}","driver":"{}"}}"#,
|
||||
timestamp,
|
||||
json_escape(&device.bus),
|
||||
json_escape(&device.path),
|
||||
json_escape(driver_name)
|
||||
),
|
||||
};
|
||||
|
||||
match OpenOptions::new()
|
||||
@@ -230,7 +301,7 @@ fn main() {
|
||||
if hotplug_mode {
|
||||
log::info!("entering hotplug event loop");
|
||||
hotplug::run_hotplug_loop(manager.clone(), scheme.clone(), 2000);
|
||||
return;
|
||||
idle_forever();
|
||||
}
|
||||
|
||||
let max_retries = 30u32;
|
||||
@@ -269,7 +340,7 @@ fn main() {
|
||||
|
||||
if remaining == 0 {
|
||||
log::info!("all deferred resolved after {} retries", retry);
|
||||
return;
|
||||
idle_forever();
|
||||
}
|
||||
|
||||
if newly_bound > 0 {
|
||||
@@ -283,5 +354,12 @@ fn main() {
|
||||
}
|
||||
|
||||
log::warn!("deferred probe retry limit reached");
|
||||
process::exit(0);
|
||||
idle_forever();
|
||||
}
|
||||
|
||||
fn idle_forever() -> ! {
|
||||
log::info!("driver-manager: entering persistent idle loop");
|
||||
loop {
|
||||
thread::sleep(Duration::from_secs(3600));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -183,7 +183,7 @@ impl DriverManagerScheme {
|
||||
#[cfg(target_os = "redox")]
|
||||
fn read_handle_string(&self, kind: &HandleKind) -> Result<String> {
|
||||
match kind {
|
||||
HandleKind::Root => Ok("devices\nevents\n".to_string()),
|
||||
HandleKind::Root => Ok("devices\nbound\nevents\n".to_string()),
|
||||
HandleKind::Devices => {
|
||||
let addresses = self.sorted_bound_addresses().map_err(|err| {
|
||||
log::error!("driver-manager: failed to read bound device list: {err}");
|
||||
|
||||
Reference in New Issue
Block a user