relibc: pthread affinity uses [u64; 2] mask matching kernel RawMask
The kernel's proc scheme SchedAffinity handler reads and writes
size_of::<RawMask>() = 16 bytes (LogicalCpuSet = [AtomicUsize; 2]),
but the relibc code was using size_of::<u64>() = 8 bytes. This
caused:
1. setaffinity: kernel read_exact::<RawMask>() rejected the
8-byte write (different size) and returned EINVAL
2. getaffinity: kernel tried to copy 16 bytes into the
8-byte userspace buffer and returned EINVAL (or truncated
silently if the buffer was larger)
Replace the u64 affinity buffer with [u64; 2] (128 bits) so:
- relibc writes 16 bytes matching the kernel's RawMask
- the upper 64 bits (CPUs 64-127) are now reachable
- endianness is native on all current Redox targets
(little-endian x86_64 and aarch64)
The cpuset_to_u64/copy_u64_to_cpuset helpers are replaced
with cpuset_to_rawmask/copy_rawmask_to_cpuset which work on
the [u64; 2] type.
Discovered by Oracle review of Phase 0c patches (Issue 2).
The bug was introduced when the kernel's per-CPU queue refactor
replaced a single global queue with a 2-word logical CPU set
but the relibc affinity code wasn't updated to match.
This commit is contained in:
+46
-16
@@ -37,8 +37,17 @@ pub fn e(result: Result<(), Errno>) -> i32 {
|
||||
}
|
||||
}
|
||||
|
||||
const RLCT_AFFINITY_BYTES: usize = size_of::<u64>();
|
||||
const RLCT_MAX_AFFINITY_CPUS: usize = u64::BITS as usize;
|
||||
// Affinity mask is 128 bits (16 bytes) on 64-bit targets — matching
|
||||
// the kernel's LogicalCpuSet = [AtomicUsize; 2]. The previous
|
||||
// implementation used a single u64 (8 bytes, 64 bits) which
|
||||
// silently truncated the upper half of the mask on systems with
|
||||
// >64 CPUs, and caused EINVAL when the kernel's
|
||||
// `buf.read_exexact::<RawMask>()` rejected the 8-byte write that
|
||||
// didn't match the 16-byte RawMask. The native-endian byte order
|
||||
// matches on all current Redox targets (little-endian x86_64 and
|
||||
// aarch64, big-endian would require a swap).
|
||||
const RLCT_AFFINITY_BYTES: usize = 16;
|
||||
const RLCT_MAX_AFFINITY_CPUS: usize = 128;
|
||||
|
||||
fn cpuset_bytes<'a>(cpusetsize: size_t, cpuset: *const cpu_set_t) -> Result<&'a [u8], Errno> {
|
||||
if cpuset.is_null() || !(RLCT_AFFINITY_BYTES..=size_of::<cpu_set_t>()).contains(&cpusetsize) {
|
||||
@@ -56,9 +65,9 @@ fn cpuset_bytes_mut<'a>(cpusetsize: size_t, cpuset: *mut cpu_set_t) -> Result<&'
|
||||
Ok(unsafe { core::slice::from_raw_parts_mut(cpuset.cast::<u8>(), cpusetsize) })
|
||||
}
|
||||
|
||||
fn cpuset_to_u64(cpusetsize: size_t, cpuset: *const cpu_set_t) -> Result<u64, Errno> {
|
||||
fn cpuset_to_rawmask(cpusetsize: size_t, cpuset: *const cpu_set_t) -> Result<[u64; 2], Errno> {
|
||||
let bytes = cpuset_bytes(cpusetsize, cpuset)?;
|
||||
let mut mask = 0_u64;
|
||||
let mut mask = [0_u64; 2];
|
||||
|
||||
for (byte_index, byte) in bytes.iter().copied().enumerate() {
|
||||
for bit in 0..u8::BITS as usize {
|
||||
@@ -71,16 +80,25 @@ fn cpuset_to_u64(cpusetsize: size_t, cpuset: *const cpu_set_t) -> Result<u64, Er
|
||||
return Err(Errno(EINVAL));
|
||||
}
|
||||
|
||||
mask |= 1 << cpu;
|
||||
// cpu is in [0, 127] which fits in two u64s
|
||||
// (each covers 64 bits).
|
||||
if cpu < 64 {
|
||||
mask[0] |= 1 << cpu;
|
||||
} else {
|
||||
mask[1] |= 1 << (cpu - 64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(mask)
|
||||
}
|
||||
|
||||
fn copy_u64_to_cpuset(mask: u64, cpusetsize: size_t, cpuset: *mut cpu_set_t) -> Result<(), Errno> {
|
||||
fn copy_rawmask_to_cpuset(
|
||||
mask: [u64; 2],
|
||||
cpusetsize: size_t,
|
||||
cpuset: *mut cpu_set_t,
|
||||
) -> Result<(), Errno> {
|
||||
let bytes = cpuset_bytes_mut(cpusetsize, cpuset)?;
|
||||
let mut written = 0_usize;
|
||||
|
||||
for (byte_index, byte) in bytes.iter_mut().enumerate() {
|
||||
let mut value = 0_u8;
|
||||
@@ -89,20 +107,23 @@ fn copy_u64_to_cpuset(mask: u64, cpusetsize: size_t, cpuset: *mut cpu_set_t) ->
|
||||
if cpu >= RLCT_MAX_AFFINITY_CPUS {
|
||||
break;
|
||||
}
|
||||
if mask & (1 << cpu) != 0 {
|
||||
let set = if cpu < 64 {
|
||||
mask[0] & (1 << cpu) != 0
|
||||
} else {
|
||||
mask[1] & (1 << (cpu - 64)) != 0
|
||||
};
|
||||
if set {
|
||||
value |= 1 << bit;
|
||||
}
|
||||
}
|
||||
*byte = value;
|
||||
written += 1;
|
||||
}
|
||||
|
||||
let _ = written;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_os = "redox")]
|
||||
fn redox_get_thread_affinity(thread: &crate::pthread::Pthread) -> Result<u64, Errno> {
|
||||
fn redox_get_thread_affinity(thread: &crate::pthread::Pthread) -> Result<[u64; 2], Errno> {
|
||||
let os_tid = unsafe { thread.os_tid.get().read() };
|
||||
let path = alloc::format!("proc:{}/sched-affinity\0", os_tid.thread_fd);
|
||||
let fd = Sys::open(crate::c_str::CStr::from_bytes_with_nul(path.as_bytes()).unwrap(), crate::header::fcntl::O_RDONLY, 0)?;
|
||||
@@ -113,16 +134,25 @@ fn redox_get_thread_affinity(thread: &crate::pthread::Pthread) -> Result<u64, Er
|
||||
if read != RLCT_AFFINITY_BYTES {
|
||||
return Err(Errno(EINVAL));
|
||||
}
|
||||
Ok(u64::from_ne_bytes(buf))
|
||||
// Native-endian conversion: buf[0..8] -> mask[0] (low 64 bits),
|
||||
// buf[8..16] -> mask[1] (high 64 bits). Matches the kernel's
|
||||
// LogicalCpuSet = [AtomicUsize; 2] native-endian layout.
|
||||
let mut mask = [0_u64; 2];
|
||||
mask[0] = u64::from_ne_bytes(buf[0..8].try_into().unwrap());
|
||||
mask[1] = u64::from_ne_bytes(buf[8..16].try_into().unwrap());
|
||||
Ok(mask)
|
||||
}
|
||||
|
||||
#[cfg(target_os = "redox")]
|
||||
fn redox_set_thread_affinity(thread: &crate::pthread::Pthread, mask: u64) -> Result<(), Errno> {
|
||||
fn redox_set_thread_affinity(thread: &crate::pthread::Pthread, mask: [u64; 2]) -> Result<(), Errno> {
|
||||
let os_tid = unsafe { thread.os_tid.get().read() };
|
||||
let path = alloc::format!("proc:{}/sched-affinity\0", os_tid.thread_fd);
|
||||
let fd = Sys::open(crate::c_str::CStr::from_bytes_with_nul(path.as_bytes()).unwrap(), crate::header::fcntl::O_WRONLY, 0)?;
|
||||
|
||||
let bytes = mask.to_ne_bytes();
|
||||
// Serialize the [u64; 2] mask to 16 native-endian bytes.
|
||||
let mut bytes = [0u8; 16];
|
||||
bytes[0..8].copy_from_slice(&mask[0].to_ne_bytes());
|
||||
bytes[8..16].copy_from_slice(&mask[1].to_ne_bytes());
|
||||
let written = Sys::write(fd, &bytes)?;
|
||||
let _ = Sys::close(fd);
|
||||
if written != RLCT_AFFINITY_BYTES {
|
||||
@@ -303,7 +333,7 @@ pub unsafe extern "C" fn pthread_getaffinity_np(
|
||||
#[cfg(target_os = "redox")]
|
||||
{
|
||||
redox_get_thread_affinity(thread)
|
||||
.and_then(|mask| copy_u64_to_cpuset(mask, cpusetsize, cpuset))
|
||||
.and_then(|mask| copy_rawmask_to_cpuset(mask, cpusetsize, cpuset))
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
@@ -388,7 +418,7 @@ pub unsafe extern "C" fn pthread_setaffinity_np(
|
||||
let result = {
|
||||
#[cfg(target_os = "redox")]
|
||||
{
|
||||
cpuset_to_u64(cpusetsize, cpuset)
|
||||
cpuset_to_rawmask(cpusetsize, cpuset)
|
||||
.and_then(|mask| redox_set_thread_affinity(thread, mask))
|
||||
}
|
||||
|
||||
|
||||
@@ -304,7 +304,7 @@ pub(crate) unsafe fn mark_robust_mutexes_dead(thread: &crate::pthread::Pthread)
|
||||
// thread that doesn't support it), the list is empty and
|
||||
// there's nothing to do. Without this check, dereferencing
|
||||
// *head on a null pointer would be UB.
|
||||
if (*head).is_null() {
|
||||
if unsafe { (*head).is_null() } {
|
||||
return;
|
||||
}
|
||||
let this_thread = os_tid_invalid_after_fork();
|
||||
|
||||
Reference in New Issue
Block a user