intel: batch buffer + fence timeline (Phase 4 render path)

Add batch.rs — GPU command buffer construction helpers.
- BatchBuffer: append-style command builder for MI_BATCH_BUFFER_START,
  MI_FLUSH_DW, MI_STORE_DWORD_IMM, MI_STORE_DATA_IMM,
  MI_USER_INTERRUPT, MI_ARB_CHECK, MI_NOOP, PIPE_CONTROL
  (flush L3 + CS stall + global GTT write + store data index)
- mi_flush_dw_cmd(), mi_batch_buffer_end_cmd() helpers
- PPGTT directory init helper

Add fence.rs — GPU/CPU synchronization.
- FenceTimeline: atomic seqno allocation and signal tracking
  compare_exchange for lock-free concurrent signaling
- Fence: per-submission fence with signal() and wait_timeout()
  Spin-wait with configurable timeout
- Send + Sync for cross-thread fence passing

Modules declared but not yet wired into IntelDriver struct.
Linux reference: i915_sw_fence.c, i915_gem_execbuffer.c
This commit is contained in:
2026-05-30 09:08:56 +03:00
parent 901fc44b6f
commit 493555b105
3 changed files with 271 additions and 0 deletions
@@ -0,0 +1,155 @@
use crate::driver::Result;
const MI_BATCH_BUFFER_START: u32 = 0x3100_0000;
const MI_BATCH_BUFFER_END: u32 = 0x0500_0000;
const MI_NOOP: u32 = 0x0000_0000;
const MI_STORE_DWORD_IMM: u32 = 0x1000_0000;
const MI_STORE_DATA_IMM: u32 = 0x0200_0000;
const MI_USER_INTERRUPT: u32 = 0x0200_0000;
const MI_FLUSH_DW: u32 = 0x0200_0000;
const MI_ARB_CHECK: u32 = 0x0500_0000;
const PIPE_CONTROL: u32 = 0x7A00_0000;
const PIPE_CONTROL_FLUSH_L3: u32 = 1 << 27;
const PIPE_CONTROL_CS_STALL: u32 = 1 << 20;
const PIPE_CONTROL_GLOBAL_GTT_WRITE: u32 = 1 << 2;
const PIPE_CONTROL_STORE_DATA_INDEX: u32 = 1 << 21;
pub struct BatchBuffer {
commands: Vec<u32>,
}
impl BatchBuffer {
pub fn new(capacity: usize) -> Self {
Self { commands: Vec::with_capacity(capacity) }
}
pub fn add_noop(&mut self) {
self.commands.push(MI_NOOP);
}
pub fn add_batch_buffer_start(&mut self, gtt_addr: u64, secondary: bool) {
let mut cmd = MI_BATCH_BUFFER_START | (2 - 2);
if secondary { cmd |= 1 << 22; }
self.commands.push(cmd);
self.commands.push(gtt_addr as u32);
self.commands.push((gtt_addr >> 32) as u32);
}
pub fn add_batch_buffer_end(&mut self) {
self.commands.push(MI_BATCH_BUFFER_END);
}
pub fn add_flush(&mut self) {
self.commands.push(MI_FLUSH_DW);
self.commands.push(0);
self.commands.push(0);
self.commands.push(0);
}
pub fn add_mi_flush_dw(&mut self) {
self.commands.push(MI_FLUSH_DW);
self.commands.push(0);
self.commands.push(0);
self.commands.push(0);
}
pub fn add_store_dword(&mut self, gtt_addr: u64, value: u32) {
let cmd = MI_STORE_DWORD_IMM | (4 - 2);
self.commands.push(cmd);
self.commands.push(gtt_addr as u32);
self.commands.push((gtt_addr >> 32) as u32);
self.commands.push(value);
}
pub fn add_store_data_imm(&mut self, gtt_addr: u64, value: u32) {
let cmd = MI_STORE_DATA_IMM | (5 - 2);
self.commands.push(cmd);
self.commands.push(gtt_addr as u32);
self.commands.push((gtt_addr >> 32) as u32);
self.commands.push(value);
self.commands.push(0);
}
pub fn add_user_interrupt(&mut self) {
self.commands.push(MI_USER_INTERRUPT | (1 - 2));
}
pub fn add_arb_check(&mut self) {
self.commands.push(MI_ARB_CHECK);
}
pub fn add_pipe_control_flush(&mut self) {
self.commands.push(PIPE_CONTROL | (6 - 2));
self.commands.push(PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_CS_STALL);
self.commands.push(0);
self.commands.push(0);
self.commands.push(0);
self.commands.push(0);
}
pub fn add_pipe_control_write(&mut self, gtt_addr: u64, value: u64) {
self.commands.push(PIPE_CONTROL | (6 - 2));
self.commands.push(
PIPE_CONTROL_FLUSH_L3
| PIPE_CONTROL_CS_STALL
| PIPE_CONTROL_GLOBAL_GTT_WRITE
| PIPE_CONTROL_STORE_DATA_INDEX,
);
self.commands.push(gtt_addr as u32);
self.commands.push((gtt_addr >> 32) as u32);
self.commands.push(value as u32);
self.commands.push((value >> 32) as u32);
}
pub fn as_slice(&self) -> &[u32] {
&self.commands
}
pub fn len_dwords(&self) -> usize {
self.commands.len()
}
pub fn into_vec(self) -> Vec<u32> {
self.commands
}
}
pub fn mi_flush_dw_cmd() -> Vec<u32> {
vec![MI_FLUSH_DW, 0, 0, 0]
}
pub fn mi_batch_buffer_end_cmd() -> Vec<u32> {
vec![MI_BATCH_BUFFER_END]
}
pub fn make_ppgtt_directory(n_entries: usize) -> Vec<u64> {
let mut dir = Vec::with_capacity(n_entries);
for _ in 0..n_entries {
dir.push(0);
}
dir
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_batch_buffer_basic() {
let mut bb = BatchBuffer::new(16);
bb.add_flush();
bb.add_batch_buffer_end();
let cmds = bb.into_vec();
assert_eq!(cmds.len(), 5);
}
#[test]
fn test_store_dword() {
let mut bb = BatchBuffer::new(16);
bb.add_store_dword(0x1000, 42);
let cmds = bb.into_vec();
assert_eq!(cmds[0], MI_STORE_DWORD_IMM | (4 - 2));
assert_eq!(cmds[3], 42);
}
}
@@ -0,0 +1,114 @@
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use crate::driver::Result;
use crate::driver::DriverError;
pub struct FenceTimeline {
next_seqno: AtomicU64,
last_completed: AtomicU64,
}
impl FenceTimeline {
pub fn new() -> Self {
Self {
next_seqno: AtomicU64::new(1),
last_completed: AtomicU64::new(0),
}
}
pub fn allocate_seqno(&self) -> u64 {
self.next_seqno.fetch_add(1, Ordering::SeqCst)
}
pub fn signal(&self, seqno: u64) {
let mut current = self.last_completed.load(Ordering::Acquire);
while seqno > current {
match self.last_completed.compare_exchange_weak(
current, seqno,
Ordering::Release, Ordering::Acquire,
) {
Ok(_) => break,
Err(actual) => current = actual,
}
}
}
pub fn is_completed(&self, seqno: u64) -> bool {
self.last_completed.load(Ordering::Acquire) >= seqno
}
pub fn wait(&self, seqno: u64, timeout_ms: u64) -> Result<()> {
let deadline = std::time::Instant::now() + std::time::Duration::from_millis(timeout_ms);
loop {
if self.is_completed(seqno) { return Ok(()); }
if std::time::Instant::now() > deadline {
return Err(DriverError::Other(format!("fence wait timeout: seqno {}", seqno)));
}
std::hint::spin_loop();
}
}
pub fn last_completed_seqno(&self) -> u64 {
self.last_completed.load(Ordering::Acquire)
}
pub fn next_seqno(&self) -> u64 {
self.next_seqno.load(Ordering::SeqCst)
}
}
unsafe impl Send for FenceTimeline {}
unsafe impl Sync for FenceTimeline {}
pub struct Fence {
timeline: Arc<FenceTimeline>,
seqno: u64,
}
impl Fence {
pub fn new(timeline: Arc<FenceTimeline>) -> Self {
let seqno = timeline.allocate_seqno();
Self { timeline, seqno }
}
pub fn seqno(&self) -> u64 {
self.seqno
}
pub fn signal(&self) {
self.timeline.signal(self.seqno);
}
pub fn is_completed(&self) -> bool {
self.timeline.is_completed(self.seqno)
}
pub fn wait_timeout(&self, timeout_ms: u64) -> Result<()> {
self.timeline.wait(self.seqno, timeout_ms)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_fence_basic() {
let timeline = Arc::new(FenceTimeline::new());
let fence = Fence::new(timeline.clone());
assert!(!fence.is_completed());
fence.signal();
assert!(fence.is_completed());
}
#[test]
fn test_timeline_ordering() {
let timeline = Arc::new(FenceTimeline::new());
let f1 = Fence::new(timeline.clone());
let f2 = Fence::new(timeline.clone());
assert!(f1.seqno() < f2.seqno());
f2.signal();
assert!(f1.is_completed());
}
}
@@ -1,3 +1,4 @@
pub mod batch;
pub mod cursor;
pub mod display;
pub mod display_cdclk;
@@ -8,6 +9,7 @@ pub mod display_power;
pub mod display_watermark;
pub mod dp_aux;
pub mod dp_link;
pub mod fence;
pub mod gmbus;
pub mod gtt;
pub mod hotplug;