intel: batch buffer + fence timeline (Phase 4 render path)
Add batch.rs — GPU command buffer construction helpers. - BatchBuffer: append-style command builder for MI_BATCH_BUFFER_START, MI_FLUSH_DW, MI_STORE_DWORD_IMM, MI_STORE_DATA_IMM, MI_USER_INTERRUPT, MI_ARB_CHECK, MI_NOOP, PIPE_CONTROL (flush L3 + CS stall + global GTT write + store data index) - mi_flush_dw_cmd(), mi_batch_buffer_end_cmd() helpers - PPGTT directory init helper Add fence.rs — GPU/CPU synchronization. - FenceTimeline: atomic seqno allocation and signal tracking compare_exchange for lock-free concurrent signaling - Fence: per-submission fence with signal() and wait_timeout() Spin-wait with configurable timeout - Send + Sync for cross-thread fence passing Modules declared but not yet wired into IntelDriver struct. Linux reference: i915_sw_fence.c, i915_gem_execbuffer.c
This commit is contained in:
@@ -0,0 +1,155 @@
|
||||
use crate::driver::Result;
|
||||
|
||||
const MI_BATCH_BUFFER_START: u32 = 0x3100_0000;
|
||||
const MI_BATCH_BUFFER_END: u32 = 0x0500_0000;
|
||||
const MI_NOOP: u32 = 0x0000_0000;
|
||||
const MI_STORE_DWORD_IMM: u32 = 0x1000_0000;
|
||||
const MI_STORE_DATA_IMM: u32 = 0x0200_0000;
|
||||
const MI_USER_INTERRUPT: u32 = 0x0200_0000;
|
||||
const MI_FLUSH_DW: u32 = 0x0200_0000;
|
||||
const MI_ARB_CHECK: u32 = 0x0500_0000;
|
||||
|
||||
const PIPE_CONTROL: u32 = 0x7A00_0000;
|
||||
const PIPE_CONTROL_FLUSH_L3: u32 = 1 << 27;
|
||||
const PIPE_CONTROL_CS_STALL: u32 = 1 << 20;
|
||||
const PIPE_CONTROL_GLOBAL_GTT_WRITE: u32 = 1 << 2;
|
||||
const PIPE_CONTROL_STORE_DATA_INDEX: u32 = 1 << 21;
|
||||
|
||||
pub struct BatchBuffer {
|
||||
commands: Vec<u32>,
|
||||
}
|
||||
|
||||
impl BatchBuffer {
|
||||
pub fn new(capacity: usize) -> Self {
|
||||
Self { commands: Vec::with_capacity(capacity) }
|
||||
}
|
||||
|
||||
pub fn add_noop(&mut self) {
|
||||
self.commands.push(MI_NOOP);
|
||||
}
|
||||
|
||||
pub fn add_batch_buffer_start(&mut self, gtt_addr: u64, secondary: bool) {
|
||||
let mut cmd = MI_BATCH_BUFFER_START | (2 - 2);
|
||||
if secondary { cmd |= 1 << 22; }
|
||||
self.commands.push(cmd);
|
||||
self.commands.push(gtt_addr as u32);
|
||||
self.commands.push((gtt_addr >> 32) as u32);
|
||||
}
|
||||
|
||||
pub fn add_batch_buffer_end(&mut self) {
|
||||
self.commands.push(MI_BATCH_BUFFER_END);
|
||||
}
|
||||
|
||||
pub fn add_flush(&mut self) {
|
||||
self.commands.push(MI_FLUSH_DW);
|
||||
self.commands.push(0);
|
||||
self.commands.push(0);
|
||||
self.commands.push(0);
|
||||
}
|
||||
|
||||
pub fn add_mi_flush_dw(&mut self) {
|
||||
self.commands.push(MI_FLUSH_DW);
|
||||
self.commands.push(0);
|
||||
self.commands.push(0);
|
||||
self.commands.push(0);
|
||||
}
|
||||
|
||||
pub fn add_store_dword(&mut self, gtt_addr: u64, value: u32) {
|
||||
let cmd = MI_STORE_DWORD_IMM | (4 - 2);
|
||||
self.commands.push(cmd);
|
||||
self.commands.push(gtt_addr as u32);
|
||||
self.commands.push((gtt_addr >> 32) as u32);
|
||||
self.commands.push(value);
|
||||
}
|
||||
|
||||
pub fn add_store_data_imm(&mut self, gtt_addr: u64, value: u32) {
|
||||
let cmd = MI_STORE_DATA_IMM | (5 - 2);
|
||||
self.commands.push(cmd);
|
||||
self.commands.push(gtt_addr as u32);
|
||||
self.commands.push((gtt_addr >> 32) as u32);
|
||||
self.commands.push(value);
|
||||
self.commands.push(0);
|
||||
}
|
||||
|
||||
pub fn add_user_interrupt(&mut self) {
|
||||
self.commands.push(MI_USER_INTERRUPT | (1 - 2));
|
||||
}
|
||||
|
||||
pub fn add_arb_check(&mut self) {
|
||||
self.commands.push(MI_ARB_CHECK);
|
||||
}
|
||||
|
||||
pub fn add_pipe_control_flush(&mut self) {
|
||||
self.commands.push(PIPE_CONTROL | (6 - 2));
|
||||
self.commands.push(PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_CS_STALL);
|
||||
self.commands.push(0);
|
||||
self.commands.push(0);
|
||||
self.commands.push(0);
|
||||
self.commands.push(0);
|
||||
}
|
||||
|
||||
pub fn add_pipe_control_write(&mut self, gtt_addr: u64, value: u64) {
|
||||
self.commands.push(PIPE_CONTROL | (6 - 2));
|
||||
self.commands.push(
|
||||
PIPE_CONTROL_FLUSH_L3
|
||||
| PIPE_CONTROL_CS_STALL
|
||||
| PIPE_CONTROL_GLOBAL_GTT_WRITE
|
||||
| PIPE_CONTROL_STORE_DATA_INDEX,
|
||||
);
|
||||
self.commands.push(gtt_addr as u32);
|
||||
self.commands.push((gtt_addr >> 32) as u32);
|
||||
self.commands.push(value as u32);
|
||||
self.commands.push((value >> 32) as u32);
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> &[u32] {
|
||||
&self.commands
|
||||
}
|
||||
|
||||
pub fn len_dwords(&self) -> usize {
|
||||
self.commands.len()
|
||||
}
|
||||
|
||||
pub fn into_vec(self) -> Vec<u32> {
|
||||
self.commands
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mi_flush_dw_cmd() -> Vec<u32> {
|
||||
vec![MI_FLUSH_DW, 0, 0, 0]
|
||||
}
|
||||
|
||||
pub fn mi_batch_buffer_end_cmd() -> Vec<u32> {
|
||||
vec![MI_BATCH_BUFFER_END]
|
||||
}
|
||||
|
||||
pub fn make_ppgtt_directory(n_entries: usize) -> Vec<u64> {
|
||||
let mut dir = Vec::with_capacity(n_entries);
|
||||
for _ in 0..n_entries {
|
||||
dir.push(0);
|
||||
}
|
||||
dir
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_batch_buffer_basic() {
|
||||
let mut bb = BatchBuffer::new(16);
|
||||
bb.add_flush();
|
||||
bb.add_batch_buffer_end();
|
||||
let cmds = bb.into_vec();
|
||||
assert_eq!(cmds.len(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_store_dword() {
|
||||
let mut bb = BatchBuffer::new(16);
|
||||
bb.add_store_dword(0x1000, 42);
|
||||
let cmds = bb.into_vec();
|
||||
assert_eq!(cmds[0], MI_STORE_DWORD_IMM | (4 - 2));
|
||||
assert_eq!(cmds[3], 42);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::driver::Result;
|
||||
use crate::driver::DriverError;
|
||||
|
||||
pub struct FenceTimeline {
|
||||
next_seqno: AtomicU64,
|
||||
last_completed: AtomicU64,
|
||||
}
|
||||
|
||||
impl FenceTimeline {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
next_seqno: AtomicU64::new(1),
|
||||
last_completed: AtomicU64::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn allocate_seqno(&self) -> u64 {
|
||||
self.next_seqno.fetch_add(1, Ordering::SeqCst)
|
||||
}
|
||||
|
||||
pub fn signal(&self, seqno: u64) {
|
||||
let mut current = self.last_completed.load(Ordering::Acquire);
|
||||
while seqno > current {
|
||||
match self.last_completed.compare_exchange_weak(
|
||||
current, seqno,
|
||||
Ordering::Release, Ordering::Acquire,
|
||||
) {
|
||||
Ok(_) => break,
|
||||
Err(actual) => current = actual,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_completed(&self, seqno: u64) -> bool {
|
||||
self.last_completed.load(Ordering::Acquire) >= seqno
|
||||
}
|
||||
|
||||
pub fn wait(&self, seqno: u64, timeout_ms: u64) -> Result<()> {
|
||||
let deadline = std::time::Instant::now() + std::time::Duration::from_millis(timeout_ms);
|
||||
loop {
|
||||
if self.is_completed(seqno) { return Ok(()); }
|
||||
if std::time::Instant::now() > deadline {
|
||||
return Err(DriverError::Other(format!("fence wait timeout: seqno {}", seqno)));
|
||||
}
|
||||
std::hint::spin_loop();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn last_completed_seqno(&self) -> u64 {
|
||||
self.last_completed.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
pub fn next_seqno(&self) -> u64 {
|
||||
self.next_seqno.load(Ordering::SeqCst)
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Send for FenceTimeline {}
|
||||
unsafe impl Sync for FenceTimeline {}
|
||||
|
||||
pub struct Fence {
|
||||
timeline: Arc<FenceTimeline>,
|
||||
seqno: u64,
|
||||
}
|
||||
|
||||
impl Fence {
|
||||
pub fn new(timeline: Arc<FenceTimeline>) -> Self {
|
||||
let seqno = timeline.allocate_seqno();
|
||||
Self { timeline, seqno }
|
||||
}
|
||||
|
||||
pub fn seqno(&self) -> u64 {
|
||||
self.seqno
|
||||
}
|
||||
|
||||
pub fn signal(&self) {
|
||||
self.timeline.signal(self.seqno);
|
||||
}
|
||||
|
||||
pub fn is_completed(&self) -> bool {
|
||||
self.timeline.is_completed(self.seqno)
|
||||
}
|
||||
|
||||
pub fn wait_timeout(&self, timeout_ms: u64) -> Result<()> {
|
||||
self.timeline.wait(self.seqno, timeout_ms)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_fence_basic() {
|
||||
let timeline = Arc::new(FenceTimeline::new());
|
||||
let fence = Fence::new(timeline.clone());
|
||||
assert!(!fence.is_completed());
|
||||
fence.signal();
|
||||
assert!(fence.is_completed());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timeline_ordering() {
|
||||
let timeline = Arc::new(FenceTimeline::new());
|
||||
let f1 = Fence::new(timeline.clone());
|
||||
let f2 = Fence::new(timeline.clone());
|
||||
assert!(f1.seqno() < f2.seqno());
|
||||
f2.signal();
|
||||
assert!(f1.is_completed());
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod batch;
|
||||
pub mod cursor;
|
||||
pub mod display;
|
||||
pub mod display_cdclk;
|
||||
@@ -8,6 +9,7 @@ pub mod display_power;
|
||||
pub mod display_watermark;
|
||||
pub mod dp_aux;
|
||||
pub mod dp_link;
|
||||
pub mod fence;
|
||||
pub mod gmbus;
|
||||
pub mod gtt;
|
||||
pub mod hotplug;
|
||||
|
||||
Reference in New Issue
Block a user