[v13] Rust GPU buddy allocator bindings

[PATCH v13 1/2] rust: gpu: Add GPU buddy allocator bindings

Posted by Joel Fernandes 2 weeks, 5 days ago

Add safe Rust abstractions over the Linux kernel's GPU buddy
allocator for physical memory management. The GPU buddy allocator
implements a binary buddy system useful for GPU physical memory
allocation. nova-core will use it for physical memory allocation.

Cc: Nikola Djukic <ndjukic@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
 MAINTAINERS                     |   6 +
 rust/bindings/bindings_helper.h |  11 +
 rust/helpers/gpu.c              |  23 ++
 rust/helpers/helpers.c          |   1 +
 rust/kernel/gpu.rs              |   6 +
 rust/kernel/gpu/buddy.rs        | 622 ++++++++++++++++++++++++++++++++
 rust/kernel/lib.rs              |   2 +
 7 files changed, 671 insertions(+)
 create mode 100644 rust/helpers/gpu.c
 create mode 100644 rust/kernel/gpu.rs
 create mode 100644 rust/kernel/gpu/buddy.rs

diff --git a/MAINTAINERS b/MAINTAINERS
index e847099efcc2..cd9505d3be60 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8531,7 +8531,10 @@ T:	git https://gitlab.freedesktop.org/drm/rust/kernel.git
 F:	drivers/gpu/drm/nova/
 F:	drivers/gpu/drm/tyr/
 F:	drivers/gpu/nova-core/
+F:	rust/helpers/gpu.c
 F:	rust/kernel/drm/
+F:	rust/kernel/gpu.rs
+F:	rust/kernel/gpu/
 
 DRM DRIVERS FOR ALLWINNER A10
 M:	Chen-Yu Tsai <wens@kernel.org>
@@ -8952,6 +8955,9 @@ F:	drivers/gpu/drm/drm_buddy.c
 F:	drivers/gpu/tests/gpu_buddy_test.c
 F:	include/drm/drm_buddy.h
 F:	include/linux/gpu_buddy.h
+F:	rust/helpers/gpu.c
+F:	rust/kernel/gpu.rs
+F:	rust/kernel/gpu/
 
 DRM AUTOMATED TESTING
 M:	Helen Koike <helen.fornazier@gmail.com>
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index 083cc44aa952..dbb765a9fdbd 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -29,6 +29,7 @@
 #include <linux/hrtimer_types.h>
 
 #include <linux/acpi.h>
+#include <linux/gpu_buddy.h>
 #include <drm/drm_device.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
@@ -146,6 +147,16 @@ const vm_flags_t RUST_CONST_HELPER_VM_MIXEDMAP = VM_MIXEDMAP;
 const vm_flags_t RUST_CONST_HELPER_VM_HUGEPAGE = VM_HUGEPAGE;
 const vm_flags_t RUST_CONST_HELPER_VM_NOHUGEPAGE = VM_NOHUGEPAGE;
 
+#if IS_ENABLED(CONFIG_GPU_BUDDY)
+const unsigned long RUST_CONST_HELPER_GPU_BUDDY_RANGE_ALLOCATION = GPU_BUDDY_RANGE_ALLOCATION;
+const unsigned long RUST_CONST_HELPER_GPU_BUDDY_TOPDOWN_ALLOCATION = GPU_BUDDY_TOPDOWN_ALLOCATION;
+const unsigned long RUST_CONST_HELPER_GPU_BUDDY_CONTIGUOUS_ALLOCATION =
+								GPU_BUDDY_CONTIGUOUS_ALLOCATION;
+const unsigned long RUST_CONST_HELPER_GPU_BUDDY_CLEAR_ALLOCATION = GPU_BUDDY_CLEAR_ALLOCATION;
+const unsigned long RUST_CONST_HELPER_GPU_BUDDY_CLEARED = GPU_BUDDY_CLEARED;
+const unsigned long RUST_CONST_HELPER_GPU_BUDDY_TRIM_DISABLE = GPU_BUDDY_TRIM_DISABLE;
+#endif
+
 #if IS_ENABLED(CONFIG_ANDROID_BINDER_IPC_RUST)
 #include "../../drivers/android/binder/rust_binder.h"
 #include "../../drivers/android/binder/rust_binder_events.h"
diff --git a/rust/helpers/gpu.c b/rust/helpers/gpu.c
new file mode 100644
index 000000000000..38b1a4e6bef8
--- /dev/null
+++ b/rust/helpers/gpu.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/gpu_buddy.h>
+
+#ifdef CONFIG_GPU_BUDDY
+
+__rust_helper u64 rust_helper_gpu_buddy_block_offset(const struct gpu_buddy_block *block)
+{
+	return gpu_buddy_block_offset(block);
+}
+
+__rust_helper unsigned int rust_helper_gpu_buddy_block_order(struct gpu_buddy_block *block)
+{
+	return gpu_buddy_block_order(block);
+}
+
+__rust_helper u64 rust_helper_gpu_buddy_block_size(struct gpu_buddy *mm,
+						   struct gpu_buddy_block *block)
+{
+	return gpu_buddy_block_size(mm, block);
+}
+
+#endif /* CONFIG_GPU_BUDDY */
diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c
index 724fcb8240ac..a53929ce52a3 100644
--- a/rust/helpers/helpers.c
+++ b/rust/helpers/helpers.c
@@ -32,6 +32,7 @@
 #include "err.c"
 #include "irq.c"
 #include "fs.c"
+#include "gpu.c"
 #include "io.c"
 #include "jump_label.c"
 #include "kunit.c"
diff --git a/rust/kernel/gpu.rs b/rust/kernel/gpu.rs
new file mode 100644
index 000000000000..1dc5d0c8c09d
--- /dev/null
+++ b/rust/kernel/gpu.rs
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! GPU subsystem abstractions.
+
+#[cfg(CONFIG_GPU_BUDDY = "y")]
+pub mod buddy;
diff --git a/rust/kernel/gpu/buddy.rs b/rust/kernel/gpu/buddy.rs
new file mode 100644
index 000000000000..af99d66832fd
--- /dev/null
+++ b/rust/kernel/gpu/buddy.rs
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! GPU buddy allocator bindings.
+//!
+//! C header: [`include/linux/gpu_buddy.h`](srctree/include/linux/gpu_buddy.h)
+//!
+//! This module provides Rust abstractions over the Linux kernel's GPU buddy
+//! allocator, which implements a binary buddy memory allocator.
+//!
+//! The buddy allocator manages a contiguous address space and allocates blocks
+//! in power-of-two sizes, useful for GPU physical memory management.
+//!
+//! # Examples
+//!
+//! Create a buddy allocator and perform a basic range allocation:
+//!
+//! ```
+//! use kernel::{
+//!     gpu::buddy::{
+//!         GpuBuddy,
+//!         GpuBuddyAllocFlags,
+//!         GpuBuddyAllocMode,
+//!         GpuBuddyParams, //
+//!     },
+//!     prelude::*,
+//!     ptr::Alignment,
+//!     sizes::*, //
+//! };
+//!
+//! // Create a 1GB buddy allocator with 4KB minimum chunk size.
+//! let buddy = GpuBuddy::new(GpuBuddyParams {
+//!     base_offset: 0,
+//!     physical_memory_size: SZ_1G as u64,
+//!     chunk_size: Alignment::new::<SZ_4K>(),
+//! })?;
+//!
+//! assert_eq!(buddy.size(), SZ_1G as u64);
+//! assert_eq!(buddy.chunk_size(), Alignment::new::<SZ_4K>());
+//! let initial_free = buddy.free_memory();
+//!
+//! // Allocate 16MB. Block lands at the top of the address range.
+//! let allocated = KBox::pin_init(
+//!     buddy.alloc_blocks(
+//!         GpuBuddyAllocMode::Simple,
+//!         SZ_16M as u64,
+//!         Alignment::new::<SZ_16M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//! assert_eq!(buddy.free_memory(), initial_free - SZ_16M as u64);
+//!
+//! let block = allocated.iter().next().expect("expected one block");
+//! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64);
+//! assert_eq!(block.order(), 12); // 2^12 pages = 16MB
+//! assert_eq!(block.size(), SZ_16M as u64);
+//! assert_eq!(allocated.iter().count(), 1);
+//!
+//! // Dropping the allocation returns the range to the buddy allocator.
+//! drop(allocated);
+//! assert_eq!(buddy.free_memory(), initial_free);
+//! # Ok::<(), Error>(())
+//! ```
+//!
+//! Top-down allocation allocates from the highest addresses:
+//!
+//! ```
+//! # use kernel::{
+//! #     gpu::buddy::{GpuBuddy, GpuBuddyAllocMode, GpuBuddyAllocFlags, GpuBuddyParams},
+//! #     prelude::*,
+//! #     ptr::Alignment,
+//! #     sizes::*, //
+//! # };
+//! # let buddy = GpuBuddy::new(GpuBuddyParams {
+//! #     base_offset: 0,
+//! #     physical_memory_size: SZ_1G as u64,
+//! #     chunk_size: Alignment::new::<SZ_4K>(),
+//! # })?;
+//! # let initial_free = buddy.free_memory();
+//! let topdown = KBox::pin_init(
+//!     buddy.alloc_blocks(
+//!         GpuBuddyAllocMode::TopDown,
+//!         SZ_16M as u64,
+//!         Alignment::new::<SZ_16M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//! assert_eq!(buddy.free_memory(), initial_free - SZ_16M as u64);
+//!
+//! let block = topdown.iter().next().expect("expected one block");
+//! assert_eq!(block.offset(), (SZ_1G - SZ_16M) as u64);
+//! assert_eq!(block.order(), 12);
+//! assert_eq!(block.size(), SZ_16M as u64);
+//!
+//! // Dropping the allocation returns the range to the buddy allocator.
+//! drop(topdown);
+//! assert_eq!(buddy.free_memory(), initial_free);
+//! # Ok::<(), Error>(())
+//! ```
+//!
+//! Non-contiguous allocation can fill fragmented memory by returning multiple
+//! blocks:
+//!
+//! ```
+//! # use kernel::{
+//! #     gpu::buddy::{
+//! #         GpuBuddy, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams,
+//! #     },
+//! #     prelude::*,
+//! #     ptr::Alignment,
+//! #     sizes::*, //
+//! # };
+//! # let buddy = GpuBuddy::new(GpuBuddyParams {
+//! #     base_offset: 0,
+//! #     physical_memory_size: SZ_1G as u64,
+//! #     chunk_size: Alignment::new::<SZ_4K>(),
+//! # })?;
+//! # let initial_free = buddy.free_memory();
+//! // Create fragmentation by allocating 4MB blocks at [0,4M) and [8M,12M).
+//! let frag1 = KBox::pin_init(
+//!     buddy.alloc_blocks(
+//!         GpuBuddyAllocMode::Range { start: 0, end: SZ_4M as u64 },
+//!         SZ_4M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//! assert_eq!(buddy.free_memory(), initial_free - SZ_4M as u64);
+//!
+//! let frag2 = KBox::pin_init(
+//!     buddy.alloc_blocks(
+//!         GpuBuddyAllocMode::Range {
+//!             start: SZ_8M as u64,
+//!             end: (SZ_8M + SZ_4M) as u64,
+//!         },
+//!         SZ_4M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//! assert_eq!(buddy.free_memory(), initial_free - SZ_8M as u64);
+//!
+//! // Allocate 8MB, this returns 2 blocks from the holes.
+//! let fragmented = KBox::pin_init(
+//!     buddy.alloc_blocks(
+//!         GpuBuddyAllocMode::Range { start: 0, end: SZ_16M as u64 },
+//!         SZ_8M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//! assert_eq!(buddy.free_memory(), initial_free - SZ_16M as u64);
+//!
+//! let (mut count, mut total) = (0u32, 0u64);
+//! for block in fragmented.iter() {
+//!     assert_eq!(block.size(), SZ_4M as u64);
+//!     total += block.size();
+//!     count += 1;
+//! }
+//! assert_eq!(total, SZ_8M as u64);
+//! assert_eq!(count, 2);
+//! # Ok::<(), Error>(())
+//! ```
+//!
+//! Contiguous allocation fails when only fragmented space is available:
+//!
+//! ```
+//! # use kernel::{
+//! #     gpu::buddy::{
+//! #         GpuBuddy, GpuBuddyAllocFlag, GpuBuddyAllocFlags, GpuBuddyAllocMode, GpuBuddyParams,
+//! #     },
+//! #     prelude::*,
+//! #     ptr::Alignment,
+//! #     sizes::*, //
+//! # };
+//! // Create a small 16MB buddy allocator with fragmented memory.
+//! let small = GpuBuddy::new(GpuBuddyParams {
+//!     base_offset: 0,
+//!     physical_memory_size: SZ_16M as u64,
+//!     chunk_size: Alignment::new::<SZ_4K>(),
+//! })?;
+//!
+//! let _hole1 = KBox::pin_init(
+//!     small.alloc_blocks(
+//!         GpuBuddyAllocMode::Range { start: 0, end: SZ_4M as u64 },
+//!         SZ_4M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//!
+//! let _hole2 = KBox::pin_init(
+//!     small.alloc_blocks(
+//!         GpuBuddyAllocMode::Range {
+//!             start: SZ_8M as u64,
+//!             end: (SZ_8M + SZ_4M) as u64,
+//!         },
+//!         SZ_4M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlags::default(),
+//!     ),
+//!     GFP_KERNEL,
+//! )?;
+//!
+//! // 8MB contiguous should fail, only two non-contiguous 4MB holes exist.
+//! let result = KBox::pin_init(
+//!     small.alloc_blocks(
+//!         GpuBuddyAllocMode::Simple,
+//!         SZ_8M as u64,
+//!         Alignment::new::<SZ_4M>(),
+//!         GpuBuddyAllocFlag::Contiguous,
+//!     ),
+//!     GFP_KERNEL,
+//! );
+//! assert!(result.is_err());
+//! # Ok::<(), Error>(())
+//! ```
+
+use crate::{
+    bindings,
+    clist_create,
+    error::to_result,
+    interop::list::CListHead,
+    new_mutex,
+    prelude::*,
+    ptr::Alignment,
+    sync::{
+        lock::mutex::MutexGuard,
+        Arc,
+        Mutex, //
+    },
+    types::Opaque, //
+};
+
+/// Allocation mode for the GPU buddy allocator.
+///
+/// The mode determines the primary allocation strategy. Modes are mutually
+/// exclusive: an allocation is either simple, range-constrained, or top-down.
+///
+/// Orthogonal modifier flags (e.g., contiguous, clear) are specified separately
+/// via [`GpuBuddyAllocFlags`].
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum GpuBuddyAllocMode {
+    /// Simple allocation without constraints.
+    Simple,
+    /// Range-based allocation between `start` and `end` addresses.
+    Range {
+        /// Start of the allocation range.
+        start: u64,
+        /// End of the allocation range.
+        end: u64,
+    },
+    /// Allocate from top of address space downward.
+    TopDown,
+}
+
+impl GpuBuddyAllocMode {
+    // Returns the C flags corresponding to the allocation mode.
+    fn into_flags(self) -> usize {
+        match self {
+            Self::Simple => 0,
+            Self::Range { .. } => bindings::GPU_BUDDY_RANGE_ALLOCATION,
+            Self::TopDown => bindings::GPU_BUDDY_TOPDOWN_ALLOCATION,
+        }
+    }
+
+    // Extracts the range start/end, defaulting to (0, 0) for non-range modes.
+    fn range(self) -> (u64, u64) {
+        match self {
+            Self::Range { start, end } => (start, end),
+            _ => (0, 0),
+        }
+    }
+}
+
+crate::impl_flags!(
+    /// Modifier flags for GPU buddy allocation.
+    ///
+    /// These flags can be combined with any [`GpuBuddyAllocMode`] to control
+    /// additional allocation behavior.
+    #[derive(Clone, Copy, Default, PartialEq, Eq)]
+    pub struct GpuBuddyAllocFlags(u32);
+
+    /// Individual modifier flag for GPU buddy allocation.
+    #[derive(Clone, Copy, PartialEq, Eq)]
+    pub enum GpuBuddyAllocFlag {
+        /// Allocate physically contiguous blocks.
+        Contiguous = bindings::GPU_BUDDY_CONTIGUOUS_ALLOCATION as u32,
+
+        /// Request allocation from cleared (zeroed) memory.
+        Clear = bindings::GPU_BUDDY_CLEAR_ALLOCATION as u32,
+
+        /// Disable trimming of partially used blocks.
+        TrimDisable = bindings::GPU_BUDDY_TRIM_DISABLE as u32,
+    }
+);
+
+/// Parameters for creating a GPU buddy allocator.
+pub struct GpuBuddyParams {
+    /// Base offset (in bytes) where the managed memory region starts.
+    /// Allocations will be offset by this value.
+    pub base_offset: u64,
+    /// Total physical memory size (in bytes) managed by the allocator.
+    pub physical_memory_size: u64,
+    /// Minimum allocation unit / chunk size, must be >= 4KB.
+    pub chunk_size: Alignment,
+}
+
+/// Inner structure holding the actual buddy allocator.
+///
+/// # Synchronization
+///
+/// The C `gpu_buddy` API requires synchronization (see `include/linux/gpu_buddy.h`).
+/// Internal locking ensures all allocator and free operations are properly
+/// synchronized, preventing races between concurrent allocations and the
+/// freeing that occurs when [`AllocatedBlocks`] is dropped.
+///
+/// # Invariants
+///
+/// The inner [`Opaque`] contains an initialized buddy allocator.
+#[pin_data(PinnedDrop)]
+struct GpuBuddyInner {
+    #[pin]
+    inner: Opaque<bindings::gpu_buddy>,
+
+    // TODO: Replace `Mutex<()>` with `Mutex<Opaque<..>>` once `Mutex::new()`
+    // accepts `impl PinInit<T>`.
+    #[pin]
+    lock: Mutex<()>,
+    /// Cached creation parameters (do not change after init).
+    params: GpuBuddyParams,
+}
+
+impl GpuBuddyInner {
+    /// Create a pin-initializer for the buddy allocator.
+    fn new(params: GpuBuddyParams) -> impl PinInit<Self, Error> {
+        let size = params.physical_memory_size;
+        let chunk_size = params.chunk_size;
+
+        // INVARIANT: `gpu_buddy_init` returns 0 on success, at which point the
+        // `gpu_buddy` structure is initialized and ready for use with all
+        // `gpu_buddy_*` APIs. `try_pin_init!` only completes if all fields succeed,
+        // so the invariant holds when construction finishes.
+        try_pin_init!(Self {
+            inner <- Opaque::try_ffi_init(|ptr| {
+                // SAFETY: `ptr` points to valid uninitialized memory from the pin-init
+                // infrastructure. `gpu_buddy_init` will initialize the structure.
+                to_result(unsafe { bindings::gpu_buddy_init(ptr, size, chunk_size.as_usize() as u64) })
+            }),
+            lock <- new_mutex!(()),
+            params,
+        })
+    }
+
+    /// Lock the mutex and return a guard for accessing the allocator.
+    fn lock(&self) -> GpuBuddyGuard<'_> {
+        GpuBuddyGuard {
+            inner: self,
+            _guard: self.lock.lock(),
+        }
+    }
+}
+
+#[pinned_drop]
+impl PinnedDrop for GpuBuddyInner {
+    fn drop(self: Pin<&mut Self>) {
+        let guard = self.lock();
+
+        // SAFETY: Per the type invariant, `inner` contains an initialized
+        // allocator. `guard` provides exclusive access.
+        unsafe {
+            bindings::gpu_buddy_fini(guard.as_raw());
+        }
+    }
+}
+
+// SAFETY: `GpuBuddyInner` can be sent between threads.
+unsafe impl Send for GpuBuddyInner {}
+
+// SAFETY: `GpuBuddyInner` is `Sync` because `GpuBuddyInner::lock`
+// serializes all access to the C allocator, preventing data races.
+unsafe impl Sync for GpuBuddyInner {}
+
+// Guard that proves the lock is held, enabling access to the allocator.
+// The `_guard` holds the lock for the duration of this guard's lifetime.
+struct GpuBuddyGuard<'a> {
+    inner: &'a GpuBuddyInner,
+    _guard: MutexGuard<'a, ()>,
+}
+
+impl GpuBuddyGuard<'_> {
+    /// Get a raw pointer to the underlying C `gpu_buddy` structure.
+    fn as_raw(&self) -> *mut bindings::gpu_buddy {
+        self.inner.inner.get()
+    }
+}
+
+/// GPU buddy allocator instance.
+///
+/// This structure wraps the C `gpu_buddy` allocator using reference counting.
+/// The allocator is automatically cleaned up when all references are dropped.
+///
+/// Refer to the module-level documentation for usage examples.
+pub struct GpuBuddy(Arc<GpuBuddyInner>);
+
+impl GpuBuddy {
+    /// Create a new buddy allocator.
+    ///
+    /// Creates a buddy allocator that manages a contiguous address space of the given
+    /// size, with the specified minimum allocation unit (chunk_size must be at least 4KB).
+    pub fn new(params: GpuBuddyParams) -> Result<Self> {
+        Arc::pin_init(GpuBuddyInner::new(params), GFP_KERNEL).map(Self)
+    }
+
+    /// Get the base offset for allocations.
+    pub fn base_offset(&self) -> u64 {
+        self.0.params.base_offset
+    }
+
+    /// Get the chunk size (minimum allocation unit).
+    pub fn chunk_size(&self) -> Alignment {
+        self.0.params.chunk_size
+    }
+
+    /// Get the total managed size.
+    pub fn size(&self) -> u64 {
+        self.0.params.physical_memory_size
+    }
+
+    /// Get the available (free) memory in bytes.
+    pub fn free_memory(&self) -> u64 {
+        let guard = self.0.lock();
+
+        // SAFETY: Per the type invariant, `inner` contains an initialized allocator.
+        // `guard` provides exclusive access.
+        unsafe { (*guard.as_raw()).avail }
+    }
+
+    /// Allocate blocks from the buddy allocator.
+    ///
+    /// Returns a pin-initializer for [`AllocatedBlocks`].
+    pub fn alloc_blocks(
+        &self,
+        mode: GpuBuddyAllocMode,
+        size: u64,
+        min_block_size: Alignment,
+        flags: impl Into<GpuBuddyAllocFlags>,
+    ) -> impl PinInit<AllocatedBlocks, Error> {
+        let buddy_arc = Arc::clone(&self.0);
+        let (start, end) = mode.range();
+        let mode_flags = mode.into_flags();
+        let modifier_flags = u32::from(flags.into()) as usize;
+
+        // Create pin-initializer that initializes list and allocates blocks.
+        try_pin_init!(AllocatedBlocks {
+            buddy: buddy_arc,
+            list <- CListHead::new(),
+            _: {
+                // Reject zero-sized or inverted ranges.
+                if let GpuBuddyAllocMode::Range { start, end } = mode {
+                    if end <= start {
+                        Err::<(), Error>(EINVAL)?;
+                    }
+                }
+
+                // Lock while allocating to serialize with concurrent frees.
+                let guard = buddy.lock();
+
+                // SAFETY: Per the type invariant, `inner` contains an initialized
+                // allocator. `guard` provides exclusive access.
+                to_result(unsafe {
+                    bindings::gpu_buddy_alloc_blocks(
+                        guard.as_raw(),
+                        start,
+                        end,
+                        size,
+                        min_block_size.as_usize() as u64,
+                        list.as_raw(),
+                        mode_flags | modifier_flags,
+                    )
+                })?
+            }
+        })
+    }
+}
+
+/// Allocated blocks from the buddy allocator with automatic cleanup.
+///
+/// This structure owns a list of allocated blocks and ensures they are
+/// automatically freed when dropped. Use `iter()` to iterate over all
+/// allocated blocks.
+///
+/// # Invariants
+///
+/// - `list` is an initialized, valid list head containing allocated blocks.
+#[pin_data(PinnedDrop)]
+pub struct AllocatedBlocks {
+    #[pin]
+    list: CListHead,
+    buddy: Arc<GpuBuddyInner>,
+}
+
+impl AllocatedBlocks {
+    /// Check if the block list is empty.
+    pub fn is_empty(&self) -> bool {
+        // An empty list head points to itself.
+        !self.list.is_linked()
+    }
+
+    /// Iterate over allocated blocks.
+    ///
+    /// Returns an iterator yielding [`AllocatedBlock`] values. Each [`AllocatedBlock`]
+    /// borrows `self` and is only valid for the duration of that borrow.
+    pub fn iter(&self) -> impl Iterator<Item = AllocatedBlock<'_>> + '_ {
+        // SAFETY:
+        // - Per the type invariant, `list` is an initialized sentinel `list_head`
+        //   and is not concurrently modified (we hold a `&self` borrow).
+        // - The list contains `gpu_buddy_block` items linked via
+        //   `__bindgen_anon_1.link`.
+        // - `Block` is `#[repr(transparent)]` over `gpu_buddy_block`.
+        let head = self.list.as_raw();
+        let clist = clist_create!(unsafe {
+            head,
+            Block,
+            bindings::gpu_buddy_block,
+            __bindgen_anon_1.link
+        });
+
+        clist
+            .iter()
+            .map(|this| AllocatedBlock { this, blocks: self })
+    }
+}
+
+#[pinned_drop]
+impl PinnedDrop for AllocatedBlocks {
+    fn drop(self: Pin<&mut Self>) {
+        let guard = self.buddy.lock();
+
+        // SAFETY:
+        // - list is valid per the type's invariants.
+        // - guard provides exclusive access to the allocator.
+        unsafe {
+            bindings::gpu_buddy_free_list(guard.as_raw(), self.list.as_raw(), 0);
+        }
+    }
+}
+
+/// A GPU buddy block.
+///
+/// Transparent wrapper over C `gpu_buddy_block` structure. This type is returned
+/// as references during iteration over [`AllocatedBlocks`].
+///
+/// # Invariants
+///
+/// The inner [`Opaque`] contains a valid, allocated `gpu_buddy_block`.
+#[repr(transparent)]
+struct Block(Opaque<bindings::gpu_buddy_block>);
+
+impl Block {
+    /// Get a raw pointer to the underlying C block.
+    fn as_raw(&self) -> *mut bindings::gpu_buddy_block {
+        self.0.get()
+    }
+
+    /// Get the block's raw offset in the buddy address space (without base offset).
+    fn offset(&self) -> u64 {
+        // SAFETY: `self.as_raw()` is valid per the type's invariants.
+        unsafe { bindings::gpu_buddy_block_offset(self.as_raw()) }
+    }
+
+    /// Get the block order.
+    fn order(&self) -> u32 {
+        // SAFETY: `self.as_raw()` is valid per the type's invariants.
+        unsafe { bindings::gpu_buddy_block_order(self.as_raw()) }
+    }
+}
+
+// SAFETY: `Block` is a wrapper around `gpu_buddy_block` which can be
+// sent across threads safely.
+unsafe impl Send for Block {}
+
+// SAFETY: `Block` is only accessed through shared references after
+// allocation, and thus safe to access concurrently across threads.
+unsafe impl Sync for Block {}
+
+/// A buddy block paired with its owning [`AllocatedBlocks`] context.
+///
+/// Unlike a raw block, which only knows its offset within the buddy address
+/// space, an [`AllocatedBlock`] also has access to the allocator's `base_offset`
+/// and `chunk_size`, enabling it to compute absolute offsets and byte sizes.
+///
+/// Returned by [`AllocatedBlocks::iter()`].
+pub struct AllocatedBlock<'a> {
+    this: &'a Block,
+    blocks: &'a AllocatedBlocks,
+}
+
+impl AllocatedBlock<'_> {
+    /// Get the block's offset in the address space.
+    ///
+    /// Returns the absolute offset including the allocator's base offset.
+    /// This is the actual address to use for accessing the allocated memory.
+    pub fn offset(&self) -> u64 {
+        self.blocks.buddy.params.base_offset + self.this.offset()
+    }
+
+    /// Get the block order (size = chunk_size << order).
+    pub fn order(&self) -> u32 {
+        self.this.order()
+    }
+
+    /// Get the block's size in bytes.
+    pub fn size(&self) -> u64 {
+        (self.blocks.buddy.params.chunk_size.as_usize() as u64) << self.this.order()
+    }
+}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index bdcf632050ee..2652933e585f 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -102,6 +102,8 @@
 pub mod firmware;
 pub mod fmt;
 pub mod fs;
+#[cfg(CONFIG_GPU_BUDDY = "y")]
+pub mod gpu;
 #[cfg(CONFIG_I2C = "y")]
 pub mod i2c;
 pub mod id_pool;
-- 
2.34.1

Re: [PATCH v13 1/2] rust: gpu: Add GPU buddy allocator bindings

Posted by Alexandre Courbot 2 weeks, 4 days ago

Hi Joel,

On Wed Mar 18, 2026 at 7:03 AM JST, Joel Fernandes wrote:
> Add safe Rust abstractions over the Linux kernel's GPU buddy
> allocator for physical memory management. The GPU buddy allocator
> implements a binary buddy system useful for GPU physical memory
> allocation. nova-core will use it for physical memory allocation.
>
> Cc: Nikola Djukic <ndjukic@nvidia.com>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>

A few things came to mind when re-reading this again. I believe these
will be my final comments on this patch though (famous last words).

<snip>
> +//! # Examples
> +//!
> +//! Create a buddy allocator and perform a basic range allocation:
> +//!
> +//! ```
> +//! use kernel::{
> +//!     gpu::buddy::{
> +//!         GpuBuddy,
> +//!         GpuBuddyAllocFlags,
> +//!         GpuBuddyAllocMode,
> +//!         GpuBuddyParams, //
> +//!     },
> +//!     prelude::*,
> +//!     ptr::Alignment,
> +//!     sizes::*, //
> +//! };
> +//!
> +//! // Create a 1GB buddy allocator with 4KB minimum chunk size.
> +//! let buddy = GpuBuddy::new(GpuBuddyParams {
> +//!     base_offset: 0,
> +//!     physical_memory_size: SZ_1G as u64,
> +//!     chunk_size: Alignment::new::<SZ_4K>(),
> +//! })?;
> +//!
> +//! assert_eq!(buddy.size(), SZ_1G as u64);
> +//! assert_eq!(buddy.chunk_size(), Alignment::new::<SZ_4K>());

Note that you can also use

  assert_eq!(buddy.chunk_size().as_usize(), SZ_4K);

To avoid the `Alignment` constructor.

<snip>
> +impl GpuBuddyAllocMode {
> +    // Returns the C flags corresponding to the allocation mode.
> +    fn into_flags(self) -> usize {
> +        match self {
> +            Self::Simple => 0,
> +            Self::Range { .. } => bindings::GPU_BUDDY_RANGE_ALLOCATION,
> +            Self::TopDown => bindings::GPU_BUDDY_TOPDOWN_ALLOCATION,
> +        }
> +    }
> +
> +    // Extracts the range start/end, defaulting to (0, 0) for non-range modes.

Let's use `(0, 0)` so they are properly formatted (I know it's not a
doccomment, but the convention also applies to regular comments).

> +    fn range(self) -> (u64, u64) {
> +        match self {
> +            Self::Range { start, end } => (start, end),
> +            _ => (0, 0),
> +        }
> +    }
> +}
> +
> +crate::impl_flags!(
> +    /// Modifier flags for GPU buddy allocation.
> +    ///
> +    /// These flags can be combined with any [`GpuBuddyAllocMode`] to control
> +    /// additional allocation behavior.
> +    #[derive(Clone, Copy, Default, PartialEq, Eq)]
> +    pub struct GpuBuddyAllocFlags(u32);

I've realized a bit late that this should actually be
`GpuBuddyAllocFlags(usize)`.

The values are defined in the bindings as `usize`, and we convert them
to `u32`, only to convert them back into `usize` in `alloc_blocks`. I
know it goes against the idea that flags should not have a size
dependent on the architecture, but in this case it's just a consequence
of the C API not doing it - and in the end we have to conform, so there
is no point in resisting. Actually, `GpuBuddyAllocMode::into_flags`
already return a `usize`, so we're already halfway there.

Just going with the flow and using `usize` removes quite a few `as` in
the code. Ideally we would fix the C API and switch back to `u32` in the
near future but for now that's the best course of action imho.

I've checked whether it worked, and it does - here is a diff for reference:

https://github.com/Gnurou/linux/commit/2e1bfc2d8e1f93a76343c7c563b1f4b85a69ab8b

<snip>
> +    /// Get the base offset for allocations.
> +    pub fn base_offset(&self) -> u64 {
> +        self.0.params.base_offset
> +    }
> +
> +    /// Get the chunk size (minimum allocation unit).
> +    pub fn chunk_size(&self) -> Alignment {
> +        self.0.params.chunk_size
> +    }
> +
> +    /// Get the total managed size.
> +    pub fn size(&self) -> u64 {
> +        self.0.params.physical_memory_size
> +    }
> +
> +    /// Get the available (free) memory in bytes.
> +    pub fn free_memory(&self) -> u64 {

This name is a bit confusing as it can be interpreted as this method
frees memory, whereas it doesn't free anything, and doesn't even deal
with memory (but an address space that may or may not represent memory).

In the C `struct gpu_buddy`, the member representing the chunk size is
named `chunk_size`, and the total size `size`, making the two methods
above this one adopt the same name (by a happy coincidence maybe :)).

Let's do the same here - since we are querying `avail`, this method can
just be called `avail` to align with the C API.

In the same spirit, we should rename
`GpuBuddyParams::physical_memory_size` into just `size` because that's
the name of the corresponding field in `struct gpu_buddy` and again, we
are not limited to managing physical memory with this allocator.

> +        let guard = self.0.lock();
> +
> +        // SAFETY: Per the type invariant, `inner` contains an initialized allocator.
> +        // `guard` provides exclusive access.
> +        unsafe { (*guard.as_raw()).avail }
> +    }
> +
> +    /// Allocate blocks from the buddy allocator.
> +    ///
> +    /// Returns a pin-initializer for [`AllocatedBlocks`].
> +    pub fn alloc_blocks(
> +        &self,
> +        mode: GpuBuddyAllocMode,
> +        size: u64,
> +        min_block_size: Alignment,
> +        flags: impl Into<GpuBuddyAllocFlags>,
> +    ) -> impl PinInit<AllocatedBlocks, Error> {
> +        let buddy_arc = Arc::clone(&self.0);
> +        let (start, end) = mode.range();
> +        let mode_flags = mode.into_flags();
> +        let modifier_flags = u32::from(flags.into()) as usize;
> +
> +        // Create pin-initializer that initializes list and allocates blocks.
> +        try_pin_init!(AllocatedBlocks {
> +            buddy: buddy_arc,
> +            list <- CListHead::new(),
> +            _: {
> +                // Reject zero-sized or inverted ranges.
> +                if let GpuBuddyAllocMode::Range { start, end } = mode {
> +                    if end <= start {
> +                        Err::<(), Error>(EINVAL)?;
> +                    }
> +                }

Ah, indeed we want to disallow decreasing ranges. Actually, why not
prevent them from even being expressed by using an actual Rust `Range`?

This lets you turn this test into an `is_empty()` and removes 10 LoCs
overall. You lose the ability to copy `GpuBuddyAllocMode`, but we don't
need it in the first place.

Here is a diff showing what it looks like, feel free to pick it:

https://github.com/Gnurou/linux/commit/7f9348f6a64d0fbec7ddf99b78ca727a1ac1cd06

Re: [PATCH v13 1/2] rust: gpu: Add GPU buddy allocator bindings

Posted by Joel Fernandes 2 weeks, 5 days ago


On 3/17/2026 6:03 PM, Joel Fernandes wrote:
> +impl AllocatedBlocks {
> +    /// Check if the block list is empty.
> +    pub fn is_empty(&self) -> bool {
> +        // An empty list head points to itself.
> +        !self.list.is_linked()
> +    }
> +
> +    /// Iterate over allocated blocks.
> +    ///
> +    /// Returns an iterator yielding [`AllocatedBlock`] values. Each [`AllocatedBlock`]
> +    /// borrows `self` and is only valid for the duration of that borrow.
> +    pub fn iter(&self) -> impl Iterator<Item = AllocatedBlock<'_>> + '_ {
> +        // SAFETY:
> +        // - Per the type invariant, `list` is an initialized sentinel `list_head`

Sorry that there is a clippy error here, this SAFETY needs to be SAFETY* per:
https://lore.kernel.org/all/CANiq72kEnDyUpnWMZmheJytjioeiJUK_C-yQJk77dPid89LExw@mail.gmail.com/
https://lore.kernel.org/all/71b6a115-98f1-4b09-9c04-a99349f51e49@nvidia.com/

here is a fixup:
https://git.kernel.org/pub/scm/linux/kernel/git/jfern/linux.git/commit/?h=nova/mm&id=8ee92de5549324cba602c0e7a491596056f55153

[PATCH v13 1/2] rust: gpu: Add GPU buddy allocator bindings
[PATCH v13 2/2] MAINTAINERS: gpu: buddy: Update reviewer