dma::Coherent & dma::CoherentBox API

[PATCH v2 8/8] gpu: nova-core: convert to new dma::Coherent API

Posted by Danilo Krummrich 2 weeks ago

From: Gary Guo <gary@garyguo.net>

Remove all usages of dma::CoherentAllocation and use the new
dma::Coherent type instead.

Note that there are still remainders of the old dma::CoherentAllocation
API, such as as_slice() and as_slice_mut().

Signed-off-by: Gary Guo <gary@garyguo.net>
Co-developed-by: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: Danilo Krummrich <dakr@kernel.org>
---
 drivers/gpu/nova-core/dma.rs      | 19 ++++++-------
 drivers/gpu/nova-core/falcon.rs   |  5 ++--
 drivers/gpu/nova-core/gsp.rs      | 21 ++++++++------
 drivers/gpu/nova-core/gsp/cmdq.rs | 21 ++++++--------
 drivers/gpu/nova-core/gsp/fw.rs   | 46 ++++++++++---------------------
 5 files changed, 47 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/nova-core/dma.rs b/drivers/gpu/nova-core/dma.rs
index 7215398969da..3c19d5ffcfe8 100644
--- a/drivers/gpu/nova-core/dma.rs
+++ b/drivers/gpu/nova-core/dma.rs
@@ -9,13 +9,13 @@
 
 use kernel::{
     device,
-    dma::CoherentAllocation,
+    dma::Coherent,
     page::PAGE_SIZE,
     prelude::*, //
 };
 
 pub(crate) struct DmaObject {
-    dma: CoherentAllocation<u8>,
+    dma: Coherent<[u8]>,
 }
 
 impl DmaObject {
@@ -24,23 +24,22 @@ pub(crate) fn new(dev: &device::Device<device::Bound>, len: usize) -> Result<Sel
             .map_err(|_| EINVAL)?
             .pad_to_align()
             .size();
-        let dma = CoherentAllocation::alloc_coherent(dev, len, GFP_KERNEL | __GFP_ZERO)?;
+        let dma = Coherent::zeroed_slice(dev, len, GFP_KERNEL)?;
 
         Ok(Self { dma })
     }
 
     pub(crate) fn from_data(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> {
-        Self::new(dev, data.len()).and_then(|mut dma_obj| {
-            // SAFETY: We have just allocated the DMA memory, we are the only users and
-            // we haven't made the device aware of the handle yet.
-            unsafe { dma_obj.write(data, 0)? }
-            Ok(dma_obj)
-        })
+        let dma_obj = Self::new(dev, data.len())?;
+        // SAFETY: We have just allocated the DMA memory, we are the only users and
+        // we haven't made the device aware of the handle yet.
+        unsafe { dma_obj.as_mut()[..data.len()].copy_from_slice(data) };
+        Ok(dma_obj)
     }
 }
 
 impl Deref for DmaObject {
-    type Target = CoherentAllocation<u8>;
+    type Target = Coherent<[u8]>;
 
     fn deref(&self) -> &Self::Target {
         &self.dma
diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
index 7097a206ec3c..5bf8da8760bf 100644
--- a/drivers/gpu/nova-core/falcon.rs
+++ b/drivers/gpu/nova-core/falcon.rs
@@ -26,8 +26,7 @@
     gpu::Chipset,
     num::{
         self,
-        FromSafeCast,
-        IntoSafeCast, //
+        FromSafeCast, //
     },
     regs,
     regs::macros::RegisterBase, //
@@ -653,7 +652,7 @@ fn dma_wr(
             }
             FalconMem::Dmem => (
                 0,
-                dma_obj.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?,
+                dma_obj.dma_handle() + DmaAddress::from(load_offsets.src_start),
             ),
         };
         if dma_start % DmaAddress::from(DMA_LEN) > 0 {
diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index f0a50bdc4c00..a045c4189989 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -6,13 +6,15 @@
     device,
     dma::{
         Coherent,
-        CoherentAllocation,
         CoherentBox,
         DmaAddress, //
     },
     pci,
     prelude::*,
-    transmute::AsBytes, //
+    transmute::{
+        AsBytes,
+        FromBytes, //
+    }, //
 };
 
 pub(crate) mod cmdq;
@@ -44,6 +46,9 @@
 #[repr(C)]
 struct PteArray<const NUM_ENTRIES: usize>([u64; NUM_ENTRIES]);
 
+/// SAFETY: arrays of `u64` implement `FromBytes` and we are but a wrapper around one.
+unsafe impl<const NUM_ENTRIES: usize> FromBytes for PteArray<NUM_ENTRIES> {}
+
 /// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper around one.
 unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}
 
@@ -71,26 +76,24 @@ fn entry(start: DmaAddress, index: usize) -> Result<u64> {
 /// then pp points to index into the buffer where the next logging entry will
 /// be written. Therefore, the logging data is valid if:
 ///   1 <= pp < sizeof(buffer)/sizeof(u64)
-struct LogBuffer(CoherentAllocation<u8>);
+struct LogBuffer(Coherent<[u8]>);
 
 impl LogBuffer {
     /// Creates a new `LogBuffer` mapped on `dev`.
     fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
         const NUM_PAGES: usize = RM_LOG_BUFFER_NUM_PAGES;
 
-        let mut obj = Self(CoherentAllocation::<u8>::alloc_coherent(
+        let obj = Self(Coherent::<u8>::zeroed_slice(
             dev,
             NUM_PAGES * GSP_PAGE_SIZE,
-            GFP_KERNEL | __GFP_ZERO,
+            GFP_KERNEL,
         )?);
 
         let start_addr = obj.0.dma_handle();
 
         // SAFETY: `obj` has just been created and we are its sole user.
-        let pte_region = unsafe {
-            obj.0
-                .as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())?
-        };
+        let pte_region =
+            unsafe { &mut obj.0.as_mut()[size_of::<u64>()..][..NUM_PAGES * size_of::<u64>()] };
 
         // Write values one by one to avoid an on-stack instance of `PteArray`.
         for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index d36a62ba1c60..f38790601a0f 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -7,7 +7,7 @@
 use kernel::{
     device,
     dma::{
-        CoherentAllocation,
+        Coherent,
         DmaAddress, //
     },
     dma_write,
@@ -207,7 +207,7 @@ unsafe impl AsBytes for GspMem {}
 // that is not a problem because they are not used outside the kernel.
 unsafe impl FromBytes for GspMem {}
 
-/// Wrapper around [`GspMem`] to share it with the GPU using a [`CoherentAllocation`].
+/// Wrapper around [`GspMem`] to share it with the GPU using a [`Coherent`].
 ///
 /// This provides the low-level functionality to communicate with the GSP, including allocation of
 /// queue space to write messages to and management of read/write pointers.
@@ -218,7 +218,7 @@ unsafe impl FromBytes for GspMem {}
 ///   pointer and the GSP read pointer. This region is returned by [`Self::driver_write_area`].
 /// * The driver owns (i.e. can read from) the part of the GSP message queue between the CPU read
 ///   pointer and the GSP write pointer. This region is returned by [`Self::driver_read_area`].
-struct DmaGspMem(CoherentAllocation<GspMem>);
+struct DmaGspMem(Coherent<GspMem>);
 
 impl DmaGspMem {
     /// Allocate a new instance and map it for `dev`.
@@ -226,21 +226,20 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
         const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::<Msgq>() }>();
         const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>();
 
-        let gsp_mem =
-            CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
+        let gsp_mem = Coherent::<GspMem>::zeroed(dev, GFP_KERNEL)?;
 
         let start = gsp_mem.dma_handle();
         // Write values one by one to avoid an on-stack instance of `PteArray`.
         for i in 0..GspMem::PTE_ARRAY_SIZE {
-            dma_write!(gsp_mem, [0]?.ptes.0[i], PteArray::<0>::entry(start, i)?);
+            dma_write!(gsp_mem, .ptes.0[i], PteArray::<0>::entry(start, i)?);
         }
 
         dma_write!(
             gsp_mem,
-            [0]?.cpuq.tx,
+            .cpuq.tx,
             MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES)
         );
-        dma_write!(gsp_mem, [0]?.cpuq.rx, MsgqRxHeader::new());
+        dma_write!(gsp_mem, .cpuq.rx, MsgqRxHeader::new());
 
         Ok(Self(gsp_mem))
     }
@@ -255,10 +254,9 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
         let rx = self.gsp_read_ptr() as usize;
 
         // SAFETY:
-        // - The `CoherentAllocation` contains exactly one object.
         // - We will only access the driver-owned part of the shared memory.
         // - Per the safety statement of the function, no concurrent access will be performed.
-        let gsp_mem = &mut unsafe { self.0.as_slice_mut(0, 1) }.unwrap()[0];
+        let gsp_mem = unsafe { &mut *self.0.as_mut() };
         // PANIC: per the invariant of `cpu_write_ptr`, `tx` is `< MSGQ_NUM_PAGES`.
         let (before_tx, after_tx) = gsp_mem.cpuq.msgq.data.split_at_mut(tx);
 
@@ -309,10 +307,9 @@ fn driver_write_area_size(&self) -> usize {
         let rx = self.cpu_read_ptr() as usize;
 
         // SAFETY:
-        // - The `CoherentAllocation` contains exactly one object.
         // - We will only access the driver-owned part of the shared memory.
         // - Per the safety statement of the function, no concurrent access will be performed.
-        let gsp_mem = &unsafe { self.0.as_slice(0, 1) }.unwrap()[0];
+        let gsp_mem = unsafe { &*self.0.as_ptr() };
         let data = &gsp_mem.gspq.msgq.data;
 
         // The area starting at `rx` and ending at `tx - 1` modulo MSGQ_NUM_PAGES, inclusive,
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
index 0d8daf6a80b7..847b5eb215d4 100644
--- a/drivers/gpu/nova-core/gsp/fw.rs
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -40,8 +40,7 @@
     },
 };
 
-// TODO: Replace with `IoView` projections once available; the `unwrap()` calls go away once we
-// switch to the new `dma::Coherent` API.
+// TODO: Replace with `IoView` projections once available.
 pub(super) mod gsp_mem {
     use core::sync::atomic::{
         fence,
@@ -49,10 +48,9 @@ pub(super) mod gsp_mem {
     };
 
     use kernel::{
-        dma::CoherentAllocation,
+        dma::Coherent,
         dma_read,
-        dma_write,
-        prelude::*, //
+        dma_write, //
     };
 
     use crate::gsp::cmdq::{
@@ -60,49 +58,35 @@ pub(super) mod gsp_mem {
         MSGQ_NUM_PAGES, //
     };
 
-    pub(in crate::gsp) fn gsp_write_ptr(qs: &CoherentAllocation<GspMem>) -> u32 {
-        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
-        || -> Result<u32> { Ok(dma_read!(qs, [0]?.gspq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap()
+    pub(in crate::gsp) fn gsp_write_ptr(qs: &Coherent<GspMem>) -> u32 {
+        dma_read!(qs, .gspq.tx.0.writePtr) % MSGQ_NUM_PAGES
     }
 
-    pub(in crate::gsp) fn gsp_read_ptr(qs: &CoherentAllocation<GspMem>) -> u32 {
-        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
-        || -> Result<u32> { Ok(dma_read!(qs, [0]?.gspq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap()
+    pub(in crate::gsp) fn gsp_read_ptr(qs: &Coherent<GspMem>) -> u32 {
+        dma_read!(qs, .gspq.rx.0.readPtr) % MSGQ_NUM_PAGES
     }
 
-    pub(in crate::gsp) fn cpu_read_ptr(qs: &CoherentAllocation<GspMem>) -> u32 {
-        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
-        || -> Result<u32> { Ok(dma_read!(qs, [0]?.cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap()
+    pub(in crate::gsp) fn cpu_read_ptr(qs: &Coherent<GspMem>) -> u32 {
+        dma_read!(qs, .cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES
     }
 
-    pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &CoherentAllocation<GspMem>, count: u32) {
+    pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &Coherent<GspMem>, count: u32) {
         let rptr = cpu_read_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES;
 
         // Ensure read pointer is properly ordered.
         fence(Ordering::SeqCst);
 
-        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
-        || -> Result {
-            dma_write!(qs, [0]?.cpuq.rx.0.readPtr, rptr);
-            Ok(())
-        }()
-        .unwrap()
+        dma_write!(qs, .cpuq.rx.0.readPtr, rptr);
     }
 
-    pub(in crate::gsp) fn cpu_write_ptr(qs: &CoherentAllocation<GspMem>) -> u32 {
-        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
-        || -> Result<u32> { Ok(dma_read!(qs, [0]?.cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap()
+    pub(in crate::gsp) fn cpu_write_ptr(qs: &Coherent<GspMem>) -> u32 {
+        dma_read!(qs, .cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES
     }
 
-    pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &CoherentAllocation<GspMem>, count: u32) {
+    pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &Coherent<GspMem>, count: u32) {
         let wptr = cpu_write_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES;
 
-        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
-        || -> Result {
-            dma_write!(qs, [0]?.cpuq.tx.0.writePtr, wptr);
-            Ok(())
-        }()
-        .unwrap();
+        dma_write!(qs, .cpuq.tx.0.writePtr, wptr);
 
         // Ensure all command data is visible before triggering the GSP read.
         fence(Ordering::SeqCst);
-- 
2.53.0

Re: [PATCH v2 8/8] gpu: nova-core: convert to new dma::Coherent API

Posted by Gary Guo 1 week, 6 days ago

On Fri Mar 20, 2026 at 7:45 PM GMT, Danilo Krummrich wrote:
> From: Gary Guo <gary@garyguo.net>
>
> Remove all usages of dma::CoherentAllocation and use the new
> dma::Coherent type instead.
>
> Note that there are still remainders of the old dma::CoherentAllocation
> API, such as as_slice() and as_slice_mut().

Hi Danilo,

It looks like with Alex's "gpu: nova-core: create falcon firmware DMA objects
lazily" landed, all others users of the old API are now gone.

So this line could be dropped and `impl CoherentAllocation` and the type alias
can be removed after this patch.

Best,
Gary

>
> Signed-off-by: Gary Guo <gary@garyguo.net>
> Co-developed-by: Danilo Krummrich <dakr@kernel.org>
> Signed-off-by: Danilo Krummrich <dakr@kernel.org>
> ---
>  drivers/gpu/nova-core/dma.rs      | 19 ++++++-------
>  drivers/gpu/nova-core/falcon.rs   |  5 ++--
>  drivers/gpu/nova-core/gsp.rs      | 21 ++++++++------
>  drivers/gpu/nova-core/gsp/cmdq.rs | 21 ++++++--------
>  drivers/gpu/nova-core/gsp/fw.rs   | 46 ++++++++++---------------------
>  5 files changed, 47 insertions(+), 65 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/dma.rs b/drivers/gpu/nova-core/dma.rs
> index 7215398969da..3c19d5ffcfe8 100644
> --- a/drivers/gpu/nova-core/dma.rs
> +++ b/drivers/gpu/nova-core/dma.rs
> @@ -9,13 +9,13 @@
>  
>  use kernel::{
>      device,
> -    dma::CoherentAllocation,
> +    dma::Coherent,
>      page::PAGE_SIZE,
>      prelude::*, //
>  };
>  
>  pub(crate) struct DmaObject {
> -    dma: CoherentAllocation<u8>,
> +    dma: Coherent<[u8]>,
>  }
>  
>  impl DmaObject {
> @@ -24,23 +24,22 @@ pub(crate) fn new(dev: &device::Device<device::Bound>, len: usize) -> Result<Sel
>              .map_err(|_| EINVAL)?
>              .pad_to_align()
>              .size();
> -        let dma = CoherentAllocation::alloc_coherent(dev, len, GFP_KERNEL | __GFP_ZERO)?;
> +        let dma = Coherent::zeroed_slice(dev, len, GFP_KERNEL)?;
>  
>          Ok(Self { dma })
>      }
>  
>      pub(crate) fn from_data(dev: &device::Device<device::Bound>, data: &[u8]) -> Result<Self> {
> -        Self::new(dev, data.len()).and_then(|mut dma_obj| {
> -            // SAFETY: We have just allocated the DMA memory, we are the only users and
> -            // we haven't made the device aware of the handle yet.
> -            unsafe { dma_obj.write(data, 0)? }
> -            Ok(dma_obj)
> -        })
> +        let dma_obj = Self::new(dev, data.len())?;
> +        // SAFETY: We have just allocated the DMA memory, we are the only users and
> +        // we haven't made the device aware of the handle yet.
> +        unsafe { dma_obj.as_mut()[..data.len()].copy_from_slice(data) };
> +        Ok(dma_obj)
>      }
>  }
>  
>  impl Deref for DmaObject {
> -    type Target = CoherentAllocation<u8>;
> +    type Target = Coherent<[u8]>;
>  
>      fn deref(&self) -> &Self::Target {
>          &self.dma
> diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
> index 7097a206ec3c..5bf8da8760bf 100644
> --- a/drivers/gpu/nova-core/falcon.rs
> +++ b/drivers/gpu/nova-core/falcon.rs
> @@ -26,8 +26,7 @@
>      gpu::Chipset,
>      num::{
>          self,
> -        FromSafeCast,
> -        IntoSafeCast, //
> +        FromSafeCast, //
>      },
>      regs,
>      regs::macros::RegisterBase, //
> @@ -653,7 +652,7 @@ fn dma_wr(
>              }
>              FalconMem::Dmem => (
>                  0,
> -                dma_obj.dma_handle_with_offset(load_offsets.src_start.into_safe_cast())?,
> +                dma_obj.dma_handle() + DmaAddress::from(load_offsets.src_start),
>              ),
>          };
>          if dma_start % DmaAddress::from(DMA_LEN) > 0 {
> diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
> index f0a50bdc4c00..a045c4189989 100644
> --- a/drivers/gpu/nova-core/gsp.rs
> +++ b/drivers/gpu/nova-core/gsp.rs
> @@ -6,13 +6,15 @@
>      device,
>      dma::{
>          Coherent,
> -        CoherentAllocation,
>          CoherentBox,
>          DmaAddress, //
>      },
>      pci,
>      prelude::*,
> -    transmute::AsBytes, //
> +    transmute::{
> +        AsBytes,
> +        FromBytes, //
> +    }, //
>  };
>  
>  pub(crate) mod cmdq;
> @@ -44,6 +46,9 @@
>  #[repr(C)]
>  struct PteArray<const NUM_ENTRIES: usize>([u64; NUM_ENTRIES]);
>  
> +/// SAFETY: arrays of `u64` implement `FromBytes` and we are but a wrapper around one.
> +unsafe impl<const NUM_ENTRIES: usize> FromBytes for PteArray<NUM_ENTRIES> {}
> +
>  /// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper around one.
>  unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}
>  
> @@ -71,26 +76,24 @@ fn entry(start: DmaAddress, index: usize) -> Result<u64> {
>  /// then pp points to index into the buffer where the next logging entry will
>  /// be written. Therefore, the logging data is valid if:
>  ///   1 <= pp < sizeof(buffer)/sizeof(u64)
> -struct LogBuffer(CoherentAllocation<u8>);
> +struct LogBuffer(Coherent<[u8]>);
>  
>  impl LogBuffer {
>      /// Creates a new `LogBuffer` mapped on `dev`.
>      fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
>          const NUM_PAGES: usize = RM_LOG_BUFFER_NUM_PAGES;
>  
> -        let mut obj = Self(CoherentAllocation::<u8>::alloc_coherent(
> +        let obj = Self(Coherent::<u8>::zeroed_slice(
>              dev,
>              NUM_PAGES * GSP_PAGE_SIZE,
> -            GFP_KERNEL | __GFP_ZERO,
> +            GFP_KERNEL,
>          )?);
>  
>          let start_addr = obj.0.dma_handle();
>  
>          // SAFETY: `obj` has just been created and we are its sole user.
> -        let pte_region = unsafe {
> -            obj.0
> -                .as_slice_mut(size_of::<u64>(), NUM_PAGES * size_of::<u64>())?
> -        };
> +        let pte_region =
> +            unsafe { &mut obj.0.as_mut()[size_of::<u64>()..][..NUM_PAGES * size_of::<u64>()] };
>  
>          // Write values one by one to avoid an on-stack instance of `PteArray`.
>          for (i, chunk) in pte_region.chunks_exact_mut(size_of::<u64>()).enumerate() {
> diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
> index d36a62ba1c60..f38790601a0f 100644
> --- a/drivers/gpu/nova-core/gsp/cmdq.rs
> +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
> @@ -7,7 +7,7 @@
>  use kernel::{
>      device,
>      dma::{
> -        CoherentAllocation,
> +        Coherent,
>          DmaAddress, //
>      },
>      dma_write,
> @@ -207,7 +207,7 @@ unsafe impl AsBytes for GspMem {}
>  // that is not a problem because they are not used outside the kernel.
>  unsafe impl FromBytes for GspMem {}
>  
> -/// Wrapper around [`GspMem`] to share it with the GPU using a [`CoherentAllocation`].
> +/// Wrapper around [`GspMem`] to share it with the GPU using a [`Coherent`].
>  ///
>  /// This provides the low-level functionality to communicate with the GSP, including allocation of
>  /// queue space to write messages to and management of read/write pointers.
> @@ -218,7 +218,7 @@ unsafe impl FromBytes for GspMem {}
>  ///   pointer and the GSP read pointer. This region is returned by [`Self::driver_write_area`].
>  /// * The driver owns (i.e. can read from) the part of the GSP message queue between the CPU read
>  ///   pointer and the GSP write pointer. This region is returned by [`Self::driver_read_area`].
> -struct DmaGspMem(CoherentAllocation<GspMem>);
> +struct DmaGspMem(Coherent<GspMem>);
>  
>  impl DmaGspMem {
>      /// Allocate a new instance and map it for `dev`.
> @@ -226,21 +226,20 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
>          const MSGQ_SIZE: u32 = num::usize_into_u32::<{ size_of::<Msgq>() }>();
>          const RX_HDR_OFF: u32 = num::usize_into_u32::<{ mem::offset_of!(Msgq, rx) }>();
>  
> -        let gsp_mem =
> -            CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
> +        let gsp_mem = Coherent::<GspMem>::zeroed(dev, GFP_KERNEL)?;
>  
>          let start = gsp_mem.dma_handle();
>          // Write values one by one to avoid an on-stack instance of `PteArray`.
>          for i in 0..GspMem::PTE_ARRAY_SIZE {
> -            dma_write!(gsp_mem, [0]?.ptes.0[i], PteArray::<0>::entry(start, i)?);
> +            dma_write!(gsp_mem, .ptes.0[i], PteArray::<0>::entry(start, i)?);
>          }
>  
>          dma_write!(
>              gsp_mem,
> -            [0]?.cpuq.tx,
> +            .cpuq.tx,
>              MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES)
>          );
> -        dma_write!(gsp_mem, [0]?.cpuq.rx, MsgqRxHeader::new());
> +        dma_write!(gsp_mem, .cpuq.rx, MsgqRxHeader::new());
>  
>          Ok(Self(gsp_mem))
>      }
> @@ -255,10 +254,9 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
>          let rx = self.gsp_read_ptr() as usize;
>  
>          // SAFETY:
> -        // - The `CoherentAllocation` contains exactly one object.
>          // - We will only access the driver-owned part of the shared memory.
>          // - Per the safety statement of the function, no concurrent access will be performed.
> -        let gsp_mem = &mut unsafe { self.0.as_slice_mut(0, 1) }.unwrap()[0];
> +        let gsp_mem = unsafe { &mut *self.0.as_mut() };
>          // PANIC: per the invariant of `cpu_write_ptr`, `tx` is `< MSGQ_NUM_PAGES`.
>          let (before_tx, after_tx) = gsp_mem.cpuq.msgq.data.split_at_mut(tx);
>  
> @@ -309,10 +307,9 @@ fn driver_write_area_size(&self) -> usize {
>          let rx = self.cpu_read_ptr() as usize;
>  
>          // SAFETY:
> -        // - The `CoherentAllocation` contains exactly one object.
>          // - We will only access the driver-owned part of the shared memory.
>          // - Per the safety statement of the function, no concurrent access will be performed.
> -        let gsp_mem = &unsafe { self.0.as_slice(0, 1) }.unwrap()[0];
> +        let gsp_mem = unsafe { &*self.0.as_ptr() };
>          let data = &gsp_mem.gspq.msgq.data;
>  
>          // The area starting at `rx` and ending at `tx - 1` modulo MSGQ_NUM_PAGES, inclusive,
> diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
> index 0d8daf6a80b7..847b5eb215d4 100644
> --- a/drivers/gpu/nova-core/gsp/fw.rs
> +++ b/drivers/gpu/nova-core/gsp/fw.rs
> @@ -40,8 +40,7 @@
>      },
>  };
>  
> -// TODO: Replace with `IoView` projections once available; the `unwrap()` calls go away once we
> -// switch to the new `dma::Coherent` API.
> +// TODO: Replace with `IoView` projections once available.
>  pub(super) mod gsp_mem {
>      use core::sync::atomic::{
>          fence,
> @@ -49,10 +48,9 @@ pub(super) mod gsp_mem {
>      };
>  
>      use kernel::{
> -        dma::CoherentAllocation,
> +        dma::Coherent,
>          dma_read,
> -        dma_write,
> -        prelude::*, //
> +        dma_write, //
>      };
>  
>      use crate::gsp::cmdq::{
> @@ -60,49 +58,35 @@ pub(super) mod gsp_mem {
>          MSGQ_NUM_PAGES, //
>      };
>  
> -    pub(in crate::gsp) fn gsp_write_ptr(qs: &CoherentAllocation<GspMem>) -> u32 {
> -        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
> -        || -> Result<u32> { Ok(dma_read!(qs, [0]?.gspq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap()
> +    pub(in crate::gsp) fn gsp_write_ptr(qs: &Coherent<GspMem>) -> u32 {
> +        dma_read!(qs, .gspq.tx.0.writePtr) % MSGQ_NUM_PAGES
>      }
>  
> -    pub(in crate::gsp) fn gsp_read_ptr(qs: &CoherentAllocation<GspMem>) -> u32 {
> -        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
> -        || -> Result<u32> { Ok(dma_read!(qs, [0]?.gspq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap()
> +    pub(in crate::gsp) fn gsp_read_ptr(qs: &Coherent<GspMem>) -> u32 {
> +        dma_read!(qs, .gspq.rx.0.readPtr) % MSGQ_NUM_PAGES
>      }
>  
> -    pub(in crate::gsp) fn cpu_read_ptr(qs: &CoherentAllocation<GspMem>) -> u32 {
> -        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
> -        || -> Result<u32> { Ok(dma_read!(qs, [0]?.cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES) }().unwrap()
> +    pub(in crate::gsp) fn cpu_read_ptr(qs: &Coherent<GspMem>) -> u32 {
> +        dma_read!(qs, .cpuq.rx.0.readPtr) % MSGQ_NUM_PAGES
>      }
>  
> -    pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &CoherentAllocation<GspMem>, count: u32) {
> +    pub(in crate::gsp) fn advance_cpu_read_ptr(qs: &Coherent<GspMem>, count: u32) {
>          let rptr = cpu_read_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES;
>  
>          // Ensure read pointer is properly ordered.
>          fence(Ordering::SeqCst);
>  
> -        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
> -        || -> Result {
> -            dma_write!(qs, [0]?.cpuq.rx.0.readPtr, rptr);
> -            Ok(())
> -        }()
> -        .unwrap()
> +        dma_write!(qs, .cpuq.rx.0.readPtr, rptr);
>      }
>  
> -    pub(in crate::gsp) fn cpu_write_ptr(qs: &CoherentAllocation<GspMem>) -> u32 {
> -        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
> -        || -> Result<u32> { Ok(dma_read!(qs, [0]?.cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES) }().unwrap()
> +    pub(in crate::gsp) fn cpu_write_ptr(qs: &Coherent<GspMem>) -> u32 {
> +        dma_read!(qs, .cpuq.tx.0.writePtr) % MSGQ_NUM_PAGES
>      }
>  
> -    pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &CoherentAllocation<GspMem>, count: u32) {
> +    pub(in crate::gsp) fn advance_cpu_write_ptr(qs: &Coherent<GspMem>, count: u32) {
>          let wptr = cpu_write_ptr(qs).wrapping_add(count) % MSGQ_NUM_PAGES;
>  
> -        // PANIC: A `dma::CoherentAllocation` always contains at least one element.
> -        || -> Result {
> -            dma_write!(qs, [0]?.cpuq.tx.0.writePtr, wptr);
> -            Ok(())
> -        }()
> -        .unwrap();
> +        dma_write!(qs, .cpuq.tx.0.writePtr, wptr);
>  
>          // Ensure all command data is visible before triggering the GSP read.
>          fence(Ordering::SeqCst);

Re: [PATCH v2 8/8] gpu: nova-core: convert to new dma::Coherent API

Posted by Danilo Krummrich 1 week, 6 days ago

On 3/21/26 5:50 PM, Gary Guo wrote:
> It looks like with Alex's "gpu: nova-core: create falcon firmware DMA objects
> lazily" landed, all others users of the old API are now gone.

Good catch.

> So this line could be dropped and `impl CoherentAllocation` and the type alias
> can be removed after this patch.

I will drop the line on apply and add the below patch to remove
CoherentAllocation.

- Danilo

commit e96bc0b65bec48ac0f1cf2fc15b39c1b26b9d973 (HEAD -> drm-rust-next)
Author: Danilo Krummrich <dakr@kernel.org>
Date:   Sat Mar 21 19:18:08 2026 +0100

    rust: dma: remove dma::CoherentAllocation<T>

    Now that everything has been converted to the new dma::Coherent<T> API,
    remove dma::CoherentAllocation<T>.

    Suggested-by: Gary Guo <gary@garyguo.net>
    Signed-off-by: Danilo Krummrich <dakr@kernel.org>

diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs
index 6d2bec52806b..779d4babab9a 100644
--- a/rust/kernel/dma.rs
+++ b/rust/kernel/dma.rs
@@ -841,156 +841,6 @@ pub fn len(&self) -> usize {
     }
 }

-// Type alias for compatibility.
-#[doc(hidden)]
-pub type CoherentAllocation<T> = Coherent<[T]>;
-
-impl<T: AsBytes + FromBytes> CoherentAllocation<T> {
-    /// Allocates a region of `size_of::<T> * count` of coherent memory.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// # use kernel::device::{Bound, Device};
-    /// use kernel::dma::{attrs::*, CoherentAllocation};
-    ///
-    /// # fn test(dev: &Device<Bound>) -> Result {
-    /// let c: CoherentAllocation<u64> =
-    ///     CoherentAllocation::alloc_attrs(dev, 4, GFP_KERNEL, DMA_ATTR_NO_WARN)?;
-    /// # Ok::<(), Error>(()) }
-    /// ```
-    pub fn alloc_attrs(
-        dev: &device::Device<Bound>,
-        count: usize,
-        gfp_flags: kernel::alloc::Flags,
-        dma_attrs: Attrs,
-    ) -> Result<CoherentAllocation<T>> {
-        Coherent::alloc_slice_with_attrs(dev, count, gfp_flags, dma_attrs)
-    }
-
-    /// Performs the same functionality as [`CoherentAllocation::alloc_attrs`], except the
-    /// `dma_attrs` is 0 by default.
-    pub fn alloc_coherent(
-        dev: &device::Device<Bound>,
-        count: usize,
-        gfp_flags: kernel::alloc::Flags,
-    ) -> Result<CoherentAllocation<T>> {
-        CoherentAllocation::alloc_attrs(dev, count, gfp_flags, Attrs(0))
-    }
-
-    /// Returns the base address to the allocated region in the CPU's virtual address space.
-    pub fn start_ptr(&self) -> *const T {
-        self.as_ptr().cast()
-    }
-
-    /// Returns the base address to the allocated region in the CPU's virtual address space as
-    /// a mutable pointer.
-    pub fn start_ptr_mut(&mut self) -> *mut T {
-        self.as_mut_ptr().cast()
-    }
-
-    /// Returns a DMA handle starting at `offset` (in units of `T`) which may be given to the
-    /// device as the DMA address base of the region.
-    ///
-    /// Returns `EINVAL` if `offset` is not within the bounds of the allocation.
-    pub fn dma_handle_with_offset(&self, offset: usize) -> Result<DmaAddress> {
-        if offset >= self.len() {
-            Err(EINVAL)
-        } else {
-            Ok(self.dma_handle + (offset * core::mem::size_of::<T>()) as DmaAddress)
-        }
-    }
-
-    /// Common helper to validate a range applied from the allocated region in the CPU's virtual
-    /// address space.
-    fn validate_range(&self, offset: usize, count: usize) -> Result {
-        if offset.checked_add(count).ok_or(EOVERFLOW)? > self.len() {
-            return Err(EINVAL);
-        }
-        Ok(())
-    }
-
-    /// Returns the data from the region starting from `offset` as a slice.
-    /// `offset` and `count` are in units of `T`, not the number of bytes.
-    ///
-    /// For ringbuffer type of r/w access or use-cases where the pointer to the live data is needed,
-    /// [`CoherentAllocation::start_ptr`] or [`CoherentAllocation::start_ptr_mut`] could be used
-    /// instead.
-    ///
-    /// # Safety
-    ///
-    /// * Callers must ensure that the device does not read/write to/from memory while the returned
-    ///   slice is live.
-    /// * Callers must ensure that this call does not race with a write to the same region while
-    ///   the returned slice is live.
-    pub unsafe fn as_slice(&self, offset: usize, count: usize) -> Result<&[T]> {
-        self.validate_range(offset, count)?;
-        // SAFETY:
-        // - The pointer is valid due to type invariant on `CoherentAllocation`,
-        //   we've just checked that the range and index is within bounds. The immutability of the
-        //   data is also guaranteed by the safety requirements of the function.
-        // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked
-        //   that `self.count` won't overflow early in the constructor.
-        Ok(unsafe { core::slice::from_raw_parts(self.start_ptr().add(offset), count) })
-    }
-
-    /// Performs the same functionality as [`CoherentAllocation::as_slice`], except that a mutable
-    /// slice is returned.
-    ///
-    /// # Safety
-    ///
-    /// * Callers must ensure that the device does not read/write to/from memory while the returned
-    ///   slice is live.
-    /// * Callers must ensure that this call does not race with a read or write to the same region
-    ///   while the returned slice is live.
-    pub unsafe fn as_slice_mut(&mut self, offset: usize, count: usize) -> Result<&mut [T]> {
-        self.validate_range(offset, count)?;
-        // SAFETY:
-        // - The pointer is valid due to type invariant on `CoherentAllocation`,
-        //   we've just checked that the range and index is within bounds. The immutability of the
-        //   data is also guaranteed by the safety requirements of the function.
-        // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked
-        //   that `self.count` won't overflow early in the constructor.
-        Ok(unsafe { core::slice::from_raw_parts_mut(self.start_ptr_mut().add(offset), count) })
-    }
-
-    /// Writes data to the region starting from `offset`. `offset` is in units of `T`, not the
-    /// number of bytes.
-    ///
-    /// # Safety
-    ///
-    /// * Callers must ensure that this call does not race with a read or write to the same region
-    ///   that overlaps with this write.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// # fn test(alloc: &mut kernel::dma::CoherentAllocation<u8>) -> Result {
-    /// let somedata: [u8; 4] = [0xf; 4];
-    /// let buf: &[u8] = &somedata;
-    /// // SAFETY: There is no concurrent HW operation on the device and no other R/W access to the
-    /// // region.
-    /// unsafe { alloc.write(buf, 0)?; }
-    /// # Ok::<(), Error>(()) }
-    /// ```
-    pub unsafe fn write(&mut self, src: &[T], offset: usize) -> Result {
-        self.validate_range(offset, src.len())?;
-        // SAFETY:
-        // - The pointer is valid due to type invariant on `CoherentAllocation`
-        //   and we've just checked that the range and index is within bounds.
-        // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked
-        //   that `self.count` won't overflow early in the constructor.
-        unsafe {
-            core::ptr::copy_nonoverlapping(
-                src.as_ptr(),
-                self.start_ptr_mut().add(offset),
-                src.len(),
-            )
-        };
-        Ok(())
-    }
-}
-
 /// Note that the device configured to do DMA must be halted before this object is dropped.
 impl<T: KnownSize + ?Sized> Drop for Coherent<T> {
     fn drop(&mut self) {

Re: [PATCH v2 8/8] gpu: nova-core: convert to new dma::Coherent API

Posted by Gary Guo 1 week, 6 days ago

On Sat Mar 21, 2026 at 6:22 PM GMT, Danilo Krummrich wrote:
> On 3/21/26 5:50 PM, Gary Guo wrote:
> > It looks like with Alex's "gpu: nova-core: create falcon firmware DMA objects
> > lazily" landed, all others users of the old API are now gone.
> 
> Good catch.
> 
> > So this line could be dropped and `impl CoherentAllocation` and the type alias
> > can be removed after this patch.
> 
> I will drop the line on apply and add the below patch to remove
> CoherentAllocation.
> 
> - Danilo
> 
> commit e96bc0b65bec48ac0f1cf2fc15b39c1b26b9d973 (HEAD -> drm-rust-next)
> Author: Danilo Krummrich <dakr@kernel.org>
> Date:   Sat Mar 21 19:18:08 2026 +0100
> 
>     rust: dma: remove dma::CoherentAllocation<T>
> 
>     Now that everything has been converted to the new dma::Coherent<T> API,
>     remove dma::CoherentAllocation<T>.
> 
>     Suggested-by: Gary Guo <gary@garyguo.net>
>     Signed-off-by: Danilo Krummrich <dakr@kernel.org>

Reviewed-by: Gary Guo <gary@garyguo.net>