[PATCH v5 08/14] gpu: nova-core: gsp: Create rmargs

Alistair Popple posted 14 patches 2 months, 1 week ago
There is a newer version of this series
[PATCH v5 08/14] gpu: nova-core: gsp: Create rmargs
Posted by Alistair Popple 2 months, 1 week ago
Initialise the GSP resource manager arguments (rmargs) which provide
initialisation parameters to the GSP firmware during boot. The rmargs
structure contains arguments to configure the GSP message/command queue
location.

These are mapped for coherent DMA and added to the libos data structure
for access when booting GSP.

Signed-off-by: Alistair Popple <apopple@nvidia.com>

---

Changes for v5:
 - Derive Zeroable trait

Changes for v2:
 - Rebased on Alex's latest series
---
 drivers/gpu/nova-core/gsp.rs                  |  16 +++
 drivers/gpu/nova-core/gsp/cmdq.rs             |  24 +++-
 drivers/gpu/nova-core/gsp/fw.rs               |  60 ++++++++
 .../gpu/nova-core/gsp/fw/r570_144/bindings.rs | 132 ------------------
 4 files changed, 97 insertions(+), 135 deletions(-)

diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index 1d472c5fad7a..58b595b8badd 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -19,6 +19,7 @@
 mod fw;
 
 use fw::LibosMemoryRegionInitArgument;
+use fw::GspArgumentsCached;
 
 pub(crate) mod cmdq;
 
@@ -36,6 +37,7 @@ pub(crate) struct Gsp {
     logintr: LogBuffer,
     logrm: LogBuffer,
     pub(crate) cmdq: Cmdq,
+    rmargs: CoherentAllocation<GspArgumentsCached>,
 }
 
 #[repr(C)]
@@ -117,12 +119,26 @@ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self
 
         // Creates its own PTE array.
         let cmdq = Cmdq::new(dev)?;
+        let rmargs = CoherentAllocation::<GspArgumentsCached>::alloc_coherent(
+            dev,
+            1,
+            GFP_KERNEL | __GFP_ZERO,
+        )?;
+        dma_write!(libos[3] = LibosMemoryRegionInitArgument::new("RMARGS", &rmargs)?)?;
+
+        dma_write!(
+            rmargs[0] = fw::GspArgumentsCached::new(
+                fw::MessageQueueInitArguments::new(&cmdq),
+                fw::GspSrInitArguments::new()
+            )
+        )?;
 
         Ok(try_pin_init!(Self {
             libos,
             loginit,
             logintr,
             logrm,
+            rmargs,
             cmdq,
         }))
     }
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index 3f8cb7a35922..da074a2ed0d9 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -6,7 +6,7 @@
 
 use kernel::alloc::flags::GFP_KERNEL;
 use kernel::device;
-use kernel::dma::CoherentAllocation;
+use kernel::dma::{CoherentAllocation, DmaAddress};
 use kernel::dma_write;
 use kernel::io::poll::read_poll_timeout;
 use kernel::prelude::*;
@@ -247,10 +247,25 @@ pub(crate) struct Cmdq {
     dev: ARef<device::Device>,
     seq: u32,
     gsp_mem: DmaGspMem,
-    pub _nr_ptes: u32,
 }
 
 impl Cmdq {
+    /// Offset of the data after the PTEs.
+    const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq);
+
+    /// Offset of command queue ring buffer.
+    pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq)
+        + core::mem::offset_of!(Msgq, msgq)
+        - Self::POST_PTE_OFFSET;
+
+    /// Offset of message queue ring buffer.
+    pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq)
+        + core::mem::offset_of!(Msgq, msgq)
+        - Self::POST_PTE_OFFSET;
+
+    /// Number of page table entries for the GSP shared region.
+    pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
+
     pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<Cmdq> {
         let gsp_mem = DmaGspMem::new(dev)?;
         let nr_ptes = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
@@ -260,7 +275,6 @@ pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<Cmdq> {
             dev: dev.into(),
             seq: 0,
             gsp_mem,
-            _nr_ptes: nr_ptes as u32,
         })
     }
 
@@ -490,4 +504,8 @@ pub(crate) fn receive_msg_from_gsp<M: MessageFromGsp, R>(
             .advance_cpu_read_ptr(msg_header.length().div_ceil(GSP_PAGE_SIZE as u32));
         result
     }
+
+    pub(crate) fn dma_handle(&self) -> DmaAddress {
+        self.gsp_mem.0.dma_handle()
+    }
 }
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
index a2ce570ddfaf..70abda1c2af8 100644
--- a/drivers/gpu/nova-core/gsp/fw.rs
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -16,6 +16,7 @@
 
 use crate::firmware::gsp::GspFirmware;
 use crate::gpu::Chipset;
+use crate::gsp::cmdq::Cmdq;
 use crate::gsp::FbLayout;
 use crate::gsp::GSP_PAGE_SIZE;
 
@@ -483,3 +484,62 @@ unsafe impl AsBytes for GspMsgElement {}
 // SAFETY: This struct only contains integer types for which all bit patterns
 // are valid.
 unsafe impl FromBytes for GspMsgElement {}
+
+#[repr(transparent)]
+pub(crate) struct GspArgumentsCached(bindings::GSP_ARGUMENTS_CACHED);
+
+impl GspArgumentsCached {
+    pub(crate) fn new(
+        queue_arguments: MessageQueueInitArguments,
+        sr_arguments: GspSrInitArguments,
+    ) -> Self {
+        Self(bindings::GSP_ARGUMENTS_CACHED {
+            messageQueueInitArguments: queue_arguments.0,
+            srInitArguments: sr_arguments.0,
+            bDmemStack: 1,
+            ..Default::default()
+        })
+    }
+}
+
+impl From<GspArgumentsCached> for bindings::GSP_ARGUMENTS_CACHED {
+    fn from(value: GspArgumentsCached) -> Self {
+        value.0
+    }
+}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for GspArgumentsCached {}
+
+// SAFETY: This struct only contains integer types for which all bit patterns
+// are valid.
+unsafe impl FromBytes for GspArgumentsCached {}
+
+#[repr(transparent)]
+pub(crate) struct MessageQueueInitArguments(bindings::MESSAGE_QUEUE_INIT_ARGUMENTS);
+
+impl MessageQueueInitArguments {
+    pub(crate) fn new(cmdq: &Cmdq) -> Self {
+        Self(bindings::MESSAGE_QUEUE_INIT_ARGUMENTS {
+            sharedMemPhysAddr: cmdq.dma_handle(),
+            pageTableEntryCount: Cmdq::NUM_PTES as u32,
+            cmdQueueOffset: Cmdq::CMDQ_OFFSET as u64,
+            statQueueOffset: Cmdq::STATQ_OFFSET as u64,
+            ..Default::default()
+        })
+    }
+}
+
+#[repr(transparent)]
+pub(crate) struct GspSrInitArguments(bindings::GSP_SR_INIT_ARGUMENTS);
+
+impl GspSrInitArguments {
+    pub(crate) fn new() -> Self {
+        Self(bindings::GSP_SR_INIT_ARGUMENTS {
+            oldLevel: 0,
+            flags: 0,
+            bInPMTransition: 0,
+            ..Default::default()
+        })
+    }
+}
diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
index 1251b0c313ce..17fb2392ec3c 100644
--- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
+++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
@@ -321,138 +321,6 @@ fn fmt(&self, fmt: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
 pub type _bindgen_ty_3 = ffi::c_uint;
 #[repr(C)]
 #[derive(Debug, Default, Copy, Clone, Zeroable)]
-pub struct DOD_METHOD_DATA {
-    pub status: u32_,
-    pub acpiIdListLen: u32_,
-    pub acpiIdList: [u32_; 16usize],
-}
-#[repr(C)]
-#[derive(Debug, Default, Copy, Clone, Zeroable)]
-pub struct JT_METHOD_DATA {
-    pub status: u32_,
-    pub jtCaps: u32_,
-    pub jtRevId: u16_,
-    pub bSBIOSCaps: u8_,
-    pub __bindgen_padding_0: u8,
-}
-#[repr(C)]
-#[derive(Debug, Default, Copy, Clone, Zeroable)]
-pub struct MUX_METHOD_DATA_ELEMENT {
-    pub acpiId: u32_,
-    pub mode: u32_,
-    pub status: u32_,
-}
-#[repr(C)]
-#[derive(Debug, Default, Copy, Clone, Zeroable)]
-pub struct MUX_METHOD_DATA {
-    pub tableLen: u32_,
-    pub acpiIdMuxModeTable: [MUX_METHOD_DATA_ELEMENT; 16usize],
-    pub acpiIdMuxPartTable: [MUX_METHOD_DATA_ELEMENT; 16usize],
-    pub acpiIdMuxStateTable: [MUX_METHOD_DATA_ELEMENT; 16usize],
-}
-#[repr(C)]
-#[derive(Debug, Default, Copy, Clone, Zeroable)]
-pub struct CAPS_METHOD_DATA {
-    pub status: u32_,
-    pub optimusCaps: u32_,
-}
-#[repr(C)]
-#[derive(Debug, Default, Copy, Clone, Zeroable)]
-pub struct ACPI_METHOD_DATA {
-    pub bValid: u8_,
-    pub __bindgen_padding_0: [u8; 3usize],
-    pub dodMethodData: DOD_METHOD_DATA,
-    pub jtMethodData: JT_METHOD_DATA,
-    pub muxMethodData: MUX_METHOD_DATA,
-    pub capsMethodData: CAPS_METHOD_DATA,
-}
-#[repr(C)]
-#[derive(Debug, Default, Copy, Clone, Zeroable)]
-pub struct BUSINFO {
-    pub deviceID: u16_,
-    pub vendorID: u16_,
-    pub subdeviceID: u16_,
-    pub subvendorID: u16_,
-    pub revisionID: u8_,
-    pub __bindgen_padding_0: u8,
-}
-#[repr(C)]
-#[derive(Debug, Default, Copy, Clone, Zeroable)]
-pub struct GSP_VF_INFO {
-    pub totalVFs: u32_,
-    pub firstVFOffset: u32_,
-    pub FirstVFBar0Address: u64_,
-    pub FirstVFBar1Address: u64_,
-    pub FirstVFBar2Address: u64_,
-    pub b64bitBar0: u8_,
-    pub b64bitBar1: u8_,
-    pub b64bitBar2: u8_,
-    pub __bindgen_padding_0: [u8; 5usize],
-}
-#[repr(C)]
-#[derive(Debug, Default, Copy, Clone, Zeroable)]
-pub struct GSP_PCIE_CONFIG_REG {
-    pub linkCap: u32_,
-}
-#[repr(C)]
-#[derive(Debug, Default, Copy, Clone, Zeroable)]
-pub struct GspSystemInfo {
-    pub gpuPhysAddr: u64_,
-    pub gpuPhysFbAddr: u64_,
-    pub gpuPhysInstAddr: u64_,
-    pub gpuPhysIoAddr: u64_,
-    pub nvDomainBusDeviceFunc: u64_,
-    pub simAccessBufPhysAddr: u64_,
-    pub notifyOpSharedSurfacePhysAddr: u64_,
-    pub pcieAtomicsOpMask: u64_,
-    pub consoleMemSize: u64_,
-    pub maxUserVa: u64_,
-    pub pciConfigMirrorBase: u32_,
-    pub pciConfigMirrorSize: u32_,
-    pub PCIDeviceID: u32_,
-    pub PCISubDeviceID: u32_,
-    pub PCIRevisionID: u32_,
-    pub pcieAtomicsCplDeviceCapMask: u32_,
-    pub oorArch: u8_,
-    pub __bindgen_padding_0: [u8; 7usize],
-    pub clPdbProperties: u64_,
-    pub Chipset: u32_,
-    pub bGpuBehindBridge: u8_,
-    pub bFlrSupported: u8_,
-    pub b64bBar0Supported: u8_,
-    pub bMnocAvailable: u8_,
-    pub chipsetL1ssEnable: u32_,
-    pub bUpstreamL0sUnsupported: u8_,
-    pub bUpstreamL1Unsupported: u8_,
-    pub bUpstreamL1PorSupported: u8_,
-    pub bUpstreamL1PorMobileOnly: u8_,
-    pub bSystemHasMux: u8_,
-    pub upstreamAddressValid: u8_,
-    pub FHBBusInfo: BUSINFO,
-    pub chipsetIDInfo: BUSINFO,
-    pub __bindgen_padding_1: [u8; 2usize],
-    pub acpiMethodData: ACPI_METHOD_DATA,
-    pub hypervisorType: u32_,
-    pub bIsPassthru: u8_,
-    pub __bindgen_padding_2: [u8; 7usize],
-    pub sysTimerOffsetNs: u64_,
-    pub gspVFInfo: GSP_VF_INFO,
-    pub bIsPrimary: u8_,
-    pub isGridBuild: u8_,
-    pub __bindgen_padding_3: [u8; 2usize],
-    pub pcieConfigReg: GSP_PCIE_CONFIG_REG,
-    pub gridBuildCsp: u32_,
-    pub bPreserveVideoMemoryAllocations: u8_,
-    pub bTdrEventSupported: u8_,
-    pub bFeatureStretchVblankCapable: u8_,
-    pub bEnableDynamicGranularityPageArrays: u8_,
-    pub bClockBoostSupported: u8_,
-    pub bRouteDispIntrsToCPU: u8_,
-    pub __bindgen_padding_4: [u8; 6usize],
-    pub hostPageSize: u64_,
-}
-#[repr(C)]
-#[derive(Debug, Default, Copy, Clone, Zeroable)]
 pub struct MESSAGE_QUEUE_INIT_ARGUMENTS {
     pub sharedMemPhysAddr: u64_,
     pub pageTableEntryCount: u32_,
-- 
2.50.1
Re: [PATCH v5 08/14] gpu: nova-core: gsp: Create rmargs
Posted by Alexandre Courbot 2 months ago
On Mon Oct 13, 2025 at 3:20 PM JST, Alistair Popple wrote:
> Initialise the GSP resource manager arguments (rmargs) which provide
> initialisation parameters to the GSP firmware during boot. The rmargs
> structure contains arguments to configure the GSP message/command queue
> location.
>
> These are mapped for coherent DMA and added to the libos data structure
> for access when booting GSP.
>
> Signed-off-by: Alistair Popple <apopple@nvidia.com>
>
> ---
>
> Changes for v5:
>  - Derive Zeroable trait
>
> Changes for v2:
>  - Rebased on Alex's latest series
> ---
>  drivers/gpu/nova-core/gsp.rs                  |  16 +++
>  drivers/gpu/nova-core/gsp/cmdq.rs             |  24 +++-
>  drivers/gpu/nova-core/gsp/fw.rs               |  60 ++++++++
>  .../gpu/nova-core/gsp/fw/r570_144/bindings.rs | 132 ------------------

Mmm, looks like we are removing bindings. Can we not add them in the
first place? :)

>  4 files changed, 97 insertions(+), 135 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
> index 1d472c5fad7a..58b595b8badd 100644
> --- a/drivers/gpu/nova-core/gsp.rs
> +++ b/drivers/gpu/nova-core/gsp.rs
> @@ -19,6 +19,7 @@
>  mod fw;
>  
>  use fw::LibosMemoryRegionInitArgument;
> +use fw::GspArgumentsCached;
>  
>  pub(crate) mod cmdq;
>  
> @@ -36,6 +37,7 @@ pub(crate) struct Gsp {
>      logintr: LogBuffer,
>      logrm: LogBuffer,
>      pub(crate) cmdq: Cmdq,
> +    rmargs: CoherentAllocation<GspArgumentsCached>,
>  }
>  
>  #[repr(C)]
> @@ -117,12 +119,26 @@ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self
>  
>          // Creates its own PTE array.
>          let cmdq = Cmdq::new(dev)?;
> +        let rmargs = CoherentAllocation::<GspArgumentsCached>::alloc_coherent(

Let's add a space between the declaration of `cmdq` and `rmargs`.

> +            dev,
> +            1,
> +            GFP_KERNEL | __GFP_ZERO,
> +        )?;
> +        dma_write!(libos[3] = LibosMemoryRegionInitArgument::new("RMARGS", &rmargs)?)?;
> +
> +        dma_write!(
> +            rmargs[0] = fw::GspArgumentsCached::new(
> +                fw::MessageQueueInitArguments::new(&cmdq),
> +                fw::GspSrInitArguments::new()
> +            )
> +        )?;
>  
>          Ok(try_pin_init!(Self {
>              libos,
>              loginit,
>              logintr,
>              logrm,
> +            rmargs,
>              cmdq,
>          }))
>      }
> diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
> index 3f8cb7a35922..da074a2ed0d9 100644
> --- a/drivers/gpu/nova-core/gsp/cmdq.rs
> +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
> @@ -6,7 +6,7 @@
>  
>  use kernel::alloc::flags::GFP_KERNEL;
>  use kernel::device;
> -use kernel::dma::CoherentAllocation;
> +use kernel::dma::{CoherentAllocation, DmaAddress};
>  use kernel::dma_write;
>  use kernel::io::poll::read_poll_timeout;
>  use kernel::prelude::*;
> @@ -247,10 +247,25 @@ pub(crate) struct Cmdq {
>      dev: ARef<device::Device>,
>      seq: u32,
>      gsp_mem: DmaGspMem,
> -    pub _nr_ptes: u32,

We probably shouldn't have introduced this unused member in the first place.

>  }
>  
>  impl Cmdq {
> +    /// Offset of the data after the PTEs.
> +    const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq);
> +
> +    /// Offset of command queue ring buffer.
> +    pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq)
> +        + core::mem::offset_of!(Msgq, msgq)
> +        - Self::POST_PTE_OFFSET;
> +
> +    /// Offset of message queue ring buffer.
> +    pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq)
> +        + core::mem::offset_of!(Msgq, msgq)
> +        - Self::POST_PTE_OFFSET;
> +
> +    /// Number of page table entries for the GSP shared region.
> +    pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
> +
>      pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<Cmdq> {
>          let gsp_mem = DmaGspMem::new(dev)?;
>          let nr_ptes = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
> @@ -260,7 +275,6 @@ pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<Cmdq> {
>              dev: dev.into(),
>              seq: 0,
>              gsp_mem,
> -            _nr_ptes: nr_ptes as u32,
>          })
>      }
>  
> @@ -490,4 +504,8 @@ pub(crate) fn receive_msg_from_gsp<M: MessageFromGsp, R>(
>              .advance_cpu_read_ptr(msg_header.length().div_ceil(GSP_PAGE_SIZE as u32));
>          result
>      }
> +
> +    pub(crate) fn dma_handle(&self) -> DmaAddress {
> +        self.gsp_mem.0.dma_handle()
> +    }
>  }
> diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
> index a2ce570ddfaf..70abda1c2af8 100644
> --- a/drivers/gpu/nova-core/gsp/fw.rs
> +++ b/drivers/gpu/nova-core/gsp/fw.rs
> @@ -16,6 +16,7 @@
>  
>  use crate::firmware::gsp::GspFirmware;
>  use crate::gpu::Chipset;
> +use crate::gsp::cmdq::Cmdq;
>  use crate::gsp::FbLayout;
>  use crate::gsp::GSP_PAGE_SIZE;
>  
> @@ -483,3 +484,62 @@ unsafe impl AsBytes for GspMsgElement {}
>  // SAFETY: This struct only contains integer types for which all bit patterns
>  // are valid.
>  unsafe impl FromBytes for GspMsgElement {}
> +
> +#[repr(transparent)]
> +pub(crate) struct GspArgumentsCached(bindings::GSP_ARGUMENTS_CACHED);
> +
> +impl GspArgumentsCached {
> +    pub(crate) fn new(
> +        queue_arguments: MessageQueueInitArguments,
> +        sr_arguments: GspSrInitArguments,
> +    ) -> Self {
> +        Self(bindings::GSP_ARGUMENTS_CACHED {
> +            messageQueueInitArguments: queue_arguments.0,
> +            srInitArguments: sr_arguments.0,
> +            bDmemStack: 1,
> +            ..Default::default()
> +        })
> +    }
> +}
> +
> +impl From<GspArgumentsCached> for bindings::GSP_ARGUMENTS_CACHED {
> +    fn from(value: GspArgumentsCached) -> Self {
> +        value.0
> +    }
> +}

This `From` impl seems unneeded?
Re: [PATCH v5 08/14] gpu: nova-core: gsp: Create rmargs
Posted by Alistair Popple 2 months ago
On 2025-10-16 at 17:24 +1100, Alexandre Courbot <acourbot@nvidia.com> wrote...
> On Mon Oct 13, 2025 at 3:20 PM JST, Alistair Popple wrote:
> > Initialise the GSP resource manager arguments (rmargs) which provide
> > initialisation parameters to the GSP firmware during boot. The rmargs
> > structure contains arguments to configure the GSP message/command queue
> > location.
> >
> > These are mapped for coherent DMA and added to the libos data structure
> > for access when booting GSP.
> >
> > Signed-off-by: Alistair Popple <apopple@nvidia.com>
> >
> > ---
> >
> > Changes for v5:
> >  - Derive Zeroable trait
> >
> > Changes for v2:
> >  - Rebased on Alex's latest series
> > ---
> >  drivers/gpu/nova-core/gsp.rs                  |  16 +++
> >  drivers/gpu/nova-core/gsp/cmdq.rs             |  24 +++-
> >  drivers/gpu/nova-core/gsp/fw.rs               |  60 ++++++++
> >  .../gpu/nova-core/gsp/fw/r570_144/bindings.rs | 132 ------------------
> 
> Mmm, looks like we are removing bindings. Can we not add them in the
> first place? :)

Bah. That's just bad rebasing - updating the bindings to add Zeroable to them
was as much of a pain as I thought it would be :-)

> >  4 files changed, 97 insertions(+), 135 deletions(-)
> >
> > diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
> > index 1d472c5fad7a..58b595b8badd 100644
> > --- a/drivers/gpu/nova-core/gsp.rs
> > +++ b/drivers/gpu/nova-core/gsp.rs
> > @@ -19,6 +19,7 @@
> >  mod fw;
> >  
> >  use fw::LibosMemoryRegionInitArgument;
> > +use fw::GspArgumentsCached;
> >  
> >  pub(crate) mod cmdq;
> >  
> > @@ -36,6 +37,7 @@ pub(crate) struct Gsp {
> >      logintr: LogBuffer,
> >      logrm: LogBuffer,
> >      pub(crate) cmdq: Cmdq,
> > +    rmargs: CoherentAllocation<GspArgumentsCached>,
> >  }
> >  
> >  #[repr(C)]
> > @@ -117,12 +119,26 @@ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self
> >  
> >          // Creates its own PTE array.
> >          let cmdq = Cmdq::new(dev)?;
> > +        let rmargs = CoherentAllocation::<GspArgumentsCached>::alloc_coherent(
> 
> Let's add a space between the declaration of `cmdq` and `rmargs`.

Ok.

> 
> > +            dev,
> > +            1,
> > +            GFP_KERNEL | __GFP_ZERO,
> > +        )?;
> > +        dma_write!(libos[3] = LibosMemoryRegionInitArgument::new("RMARGS", &rmargs)?)?;
> > +
> > +        dma_write!(
> > +            rmargs[0] = fw::GspArgumentsCached::new(
> > +                fw::MessageQueueInitArguments::new(&cmdq),
> > +                fw::GspSrInitArguments::new()
> > +            )
> > +        )?;
> >  
> >          Ok(try_pin_init!(Self {
> >              libos,
> >              loginit,
> >              logintr,
> >              logrm,
> > +            rmargs,
> >              cmdq,
> >          }))
> >      }
> > diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
> > index 3f8cb7a35922..da074a2ed0d9 100644
> > --- a/drivers/gpu/nova-core/gsp/cmdq.rs
> > +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
> > @@ -6,7 +6,7 @@
> >  
> >  use kernel::alloc::flags::GFP_KERNEL;
> >  use kernel::device;
> > -use kernel::dma::CoherentAllocation;
> > +use kernel::dma::{CoherentAllocation, DmaAddress};
> >  use kernel::dma_write;
> >  use kernel::io::poll::read_poll_timeout;
> >  use kernel::prelude::*;
> > @@ -247,10 +247,25 @@ pub(crate) struct Cmdq {
> >      dev: ARef<device::Device>,
> >      seq: u32,
> >      gsp_mem: DmaGspMem,
> > -    pub _nr_ptes: u32,
> 
> We probably shouldn't have introduced this unused member in the first place.

Good point, it's a hangover from the implementations in previous versions.

> 
> >  }
> >  
> >  impl Cmdq {
> > +    /// Offset of the data after the PTEs.
> > +    const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq);
> > +
> > +    /// Offset of command queue ring buffer.
> > +    pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq)
> > +        + core::mem::offset_of!(Msgq, msgq)
> > +        - Self::POST_PTE_OFFSET;
> > +
> > +    /// Offset of message queue ring buffer.
> > +    pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq)
> > +        + core::mem::offset_of!(Msgq, msgq)
> > +        - Self::POST_PTE_OFFSET;
> > +
> > +    /// Number of page table entries for the GSP shared region.
> > +    pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
> > +
> >      pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<Cmdq> {
> >          let gsp_mem = DmaGspMem::new(dev)?;
> >          let nr_ptes = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
> > @@ -260,7 +275,6 @@ pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<Cmdq> {
> >              dev: dev.into(),
> >              seq: 0,
> >              gsp_mem,
> > -            _nr_ptes: nr_ptes as u32,
> >          })
> >      }
> >  
> > @@ -490,4 +504,8 @@ pub(crate) fn receive_msg_from_gsp<M: MessageFromGsp, R>(
> >              .advance_cpu_read_ptr(msg_header.length().div_ceil(GSP_PAGE_SIZE as u32));
> >          result
> >      }
> > +
> > +    pub(crate) fn dma_handle(&self) -> DmaAddress {
> > +        self.gsp_mem.0.dma_handle()
> > +    }
> >  }
> > diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
> > index a2ce570ddfaf..70abda1c2af8 100644
> > --- a/drivers/gpu/nova-core/gsp/fw.rs
> > +++ b/drivers/gpu/nova-core/gsp/fw.rs
> > @@ -16,6 +16,7 @@
> >  
> >  use crate::firmware::gsp::GspFirmware;
> >  use crate::gpu::Chipset;
> > +use crate::gsp::cmdq::Cmdq;
> >  use crate::gsp::FbLayout;
> >  use crate::gsp::GSP_PAGE_SIZE;
> >  
> > @@ -483,3 +484,62 @@ unsafe impl AsBytes for GspMsgElement {}
> >  // SAFETY: This struct only contains integer types for which all bit patterns
> >  // are valid.
> >  unsafe impl FromBytes for GspMsgElement {}
> > +
> > +#[repr(transparent)]
> > +pub(crate) struct GspArgumentsCached(bindings::GSP_ARGUMENTS_CACHED);
> > +
> > +impl GspArgumentsCached {
> > +    pub(crate) fn new(
> > +        queue_arguments: MessageQueueInitArguments,
> > +        sr_arguments: GspSrInitArguments,
> > +    ) -> Self {
> > +        Self(bindings::GSP_ARGUMENTS_CACHED {
> > +            messageQueueInitArguments: queue_arguments.0,
> > +            srInitArguments: sr_arguments.0,
> > +            bDmemStack: 1,
> > +            ..Default::default()
> > +        })
> > +    }
> > +}
> > +
> > +impl From<GspArgumentsCached> for bindings::GSP_ARGUMENTS_CACHED {
> > +    fn from(value: GspArgumentsCached) -> Self {
> > +        value.0
> > +    }
> > +}
> 
> This `From` impl seems unneeded?

Indeed. I don't remember why I added it, must have been needed in an earlier
version. I'm suprised clippy doesn't complain.