[v2] gpu: nova-core: Boot GSP to RISC-V active

[PATCH v2 06/10] gpu: nova-core: gsp: Create rmargs

Posted by Alistair Popple 4 months, 2 weeks ago

Initialise the GSP resource manager arguments (rmargs) which provide
initialisation parameters to the GSP firmware during boot. The rmargs
structure contains arguments to configure the GSP message/command queue
location.

These are mapped for coherent DMA and added to the libos data structure
for access when booting GSP.

Signed-off-by: Alistair Popple <apopple@nvidia.com>

---

Changes for v2:
 - Rebased on Alex's latest series
---
 drivers/gpu/nova-core/gsp.rs                  | 29 +++++++++++++++-
 drivers/gpu/nova-core/gsp/cmdq.rs             | 14 ++++++--
 drivers/gpu/nova-core/gsp/fw.rs               | 19 +++++++++++
 .../gpu/nova-core/gsp/fw/r570_144/bindings.rs | 33 +++++++++++++++++++
 4 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index 3d4028d67d2e..bb08bd537ec4 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -17,7 +17,10 @@
 use crate::fb::FbLayout;
 use crate::gsp::cmdq::GspCmdq;
 
-use fw::LibosMemoryRegionInitArgument;
+use fw::{
+    LibosMemoryRegionInitArgument, GSP_ARGUMENTS_CACHED, GSP_SR_INIT_ARGUMENTS,
+    MESSAGE_QUEUE_INIT_ARGUMENTS,
+};
 
 pub(crate) mod cmdq;
 
@@ -33,6 +36,7 @@ pub(crate) struct Gsp {
     pub logintr: CoherentAllocation<u8>,
     pub logrm: CoherentAllocation<u8>,
     pub cmdq: GspCmdq,
+    rmargs: CoherentAllocation<GSP_ARGUMENTS_CACHED>,
 }
 
 /// Creates a self-mapping page table for `obj` at its beginning.
@@ -90,12 +94,35 @@ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self
 
         // Creates its own PTE array
         let cmdq = GspCmdq::new(dev)?;
+        let rmargs =
+            create_coherent_dma_object::<GSP_ARGUMENTS_CACHED>(dev, "RMARGS", 1, &mut libos, 3)?;
+        let (shared_mem_phys_addr, cmd_queue_offset, stat_queue_offset) = cmdq.get_cmdq_offsets();
+
+        dma_write!(
+            rmargs[0].messageQueueInitArguments = MESSAGE_QUEUE_INIT_ARGUMENTS {
+                sharedMemPhysAddr: shared_mem_phys_addr,
+                pageTableEntryCount: cmdq.nr_ptes,
+                cmdQueueOffset: cmd_queue_offset,
+                statQueueOffset: stat_queue_offset,
+                ..Default::default()
+            }
+        )?;
+        dma_write!(
+            rmargs[0].srInitArguments = GSP_SR_INIT_ARGUMENTS {
+                oldLevel: 0,
+                flags: 0,
+                bInPMTransition: 0,
+                ..Default::default()
+            }
+        )?;
+        dma_write!(rmargs[0].bDmemStack = 1)?;
 
         Ok(try_pin_init!(Self {
             libos,
             loginit,
             logintr,
             logrm,
+            rmargs,
             cmdq,
         }))
     }
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index a9ba1a4c73d8..9170ccf4a064 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -99,7 +99,6 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
         Ok(Self(gsp_mem))
     }
 
-    #[expect(unused)]
     fn dma_handle(&self) -> DmaAddress {
         self.0.dma_handle()
     }
@@ -218,7 +217,7 @@ pub(crate) struct GspCmdq {
     dev: ARef<device::Device>,
     seq: u32,
     gsp_mem: DmaGspMem,
-    pub _nr_ptes: u32,
+    pub nr_ptes: u32,
 }
 
 impl GspCmdq {
@@ -231,7 +230,7 @@ pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<GspCmdq> {
             dev: dev.into(),
             seq: 0,
             gsp_mem,
-            _nr_ptes: nr_ptes as u32,
+            nr_ptes: nr_ptes as u32,
         })
     }
 
@@ -382,6 +381,15 @@ pub(crate) fn receive_msg_from_gsp<M: GspMessageFromGsp, R>(
             .advance_cpu_read_ptr(msg_header.rpc.length.div_ceil(GSP_PAGE_SIZE as u32));
         result
     }
+
+    pub(crate) fn get_cmdq_offsets(&self) -> (u64, u64, u64) {
+        (
+            self.gsp_mem.dma_handle(),
+            core::mem::offset_of!(Msgq, msgq) as u64,
+            (core::mem::offset_of!(GspMem, gspq) - core::mem::offset_of!(GspMem, cpuq)
+                + core::mem::offset_of!(Msgq, msgq)) as u64,
+        )
+    }
 }
 
 fn decode_gsp_function(function: u32) -> &'static str {
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
index 2e4255301e58..06841b103328 100644
--- a/drivers/gpu/nova-core/gsp/fw.rs
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -158,9 +158,15 @@ pub(crate) fn new(gsp_firmware: &GspFirmware, fb_layout: &FbLayout) -> Self {
 }
 
 pub(crate) use r570_144::{
+    GSP_ARGUMENTS_CACHED,
+
     // GSP firmware constants
     GSP_FW_WPR_META_MAGIC,
     GSP_FW_WPR_META_REVISION,
+    GSP_SR_INIT_ARGUMENTS,
+
+    // RM message queue parameters
+    MESSAGE_QUEUE_INIT_ARGUMENTS,
 
     // GSP events
     NV_VGPU_MSG_EVENT_GSP_INIT_DONE,
@@ -313,3 +319,16 @@ pub(crate) fn new(sequence: u32, cmd_size: usize, function: u32) -> Self {
         }
     }
 }
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for GSP_ARGUMENTS_CACHED {}
+
+// SAFETY: This struct only contains integer types for which all bit patterns
+// are valid.
+unsafe impl FromBytes for GSP_ARGUMENTS_CACHED {}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for MESSAGE_QUEUE_INIT_ARGUMENTS {}
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for GSP_SR_INIT_ARGUMENTS {}
diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
index 3d96d91e5b12..b87c4e6cb857 100644
--- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
+++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
@@ -319,6 +319,39 @@ fn fmt(&self, fmt: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
 pub const NV_VGPU_MSG_EVENT_NUM_EVENTS: _bindgen_ty_3 = 4131;
 pub type _bindgen_ty_3 = ffi::c_uint;
 #[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct MESSAGE_QUEUE_INIT_ARGUMENTS {
+    pub sharedMemPhysAddr: u64_,
+    pub pageTableEntryCount: u32_,
+    pub __bindgen_padding_0: [u8; 4usize],
+    pub cmdQueueOffset: u64_,
+    pub statQueueOffset: u64_,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct GSP_SR_INIT_ARGUMENTS {
+    pub oldLevel: u32_,
+    pub flags: u32_,
+    pub bInPMTransition: u8_,
+    pub __bindgen_padding_0: [u8; 3usize],
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct GSP_ARGUMENTS_CACHED {
+    pub messageQueueInitArguments: MESSAGE_QUEUE_INIT_ARGUMENTS,
+    pub srInitArguments: GSP_SR_INIT_ARGUMENTS,
+    pub gpuInstance: u32_,
+    pub bDmemStack: u8_,
+    pub __bindgen_padding_0: [u8; 7usize],
+    pub profilerArgs: GSP_ARGUMENTS_CACHED__bindgen_ty_1,
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct GSP_ARGUMENTS_CACHED__bindgen_ty_1 {
+    pub pa: u64_,
+    pub size: u64_,
+}
+#[repr(C)]
 #[derive(Copy, Clone)]
 pub union rpc_message_rpc_union_field_v03_00 {
     pub spare: u32_,
-- 
2.50.1

Re: [PATCH v2 06/10] gpu: nova-core: gsp: Create rmargs

Posted by Alexandre Courbot 4 months, 2 weeks ago

On Mon Sep 22, 2025 at 8:30 PM JST, Alistair Popple wrote:
> Initialise the GSP resource manager arguments (rmargs) which provide
> initialisation parameters to the GSP firmware during boot. The rmargs
> structure contains arguments to configure the GSP message/command queue
> location.
>
> These are mapped for coherent DMA and added to the libos data structure
> for access when booting GSP.
>
> Signed-off-by: Alistair Popple <apopple@nvidia.com>
>
> ---
>
> Changes for v2:
>  - Rebased on Alex's latest series
> ---
>  drivers/gpu/nova-core/gsp.rs                  | 29 +++++++++++++++-
>  drivers/gpu/nova-core/gsp/cmdq.rs             | 14 ++++++--
>  drivers/gpu/nova-core/gsp/fw.rs               | 19 +++++++++++
>  .../gpu/nova-core/gsp/fw/r570_144/bindings.rs | 33 +++++++++++++++++++
>  4 files changed, 91 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
> index 3d4028d67d2e..bb08bd537ec4 100644
> --- a/drivers/gpu/nova-core/gsp.rs
> +++ b/drivers/gpu/nova-core/gsp.rs
> @@ -17,7 +17,10 @@
>  use crate::fb::FbLayout;
>  use crate::gsp::cmdq::GspCmdq;
>  
> -use fw::LibosMemoryRegionInitArgument;
> +use fw::{
> +    LibosMemoryRegionInitArgument, GSP_ARGUMENTS_CACHED, GSP_SR_INIT_ARGUMENTS,
> +    MESSAGE_QUEUE_INIT_ARGUMENTS,
> +};
>  
>  pub(crate) mod cmdq;
>  
> @@ -33,6 +36,7 @@ pub(crate) struct Gsp {
>      pub logintr: CoherentAllocation<u8>,
>      pub logrm: CoherentAllocation<u8>,
>      pub cmdq: GspCmdq,
> +    rmargs: CoherentAllocation<GSP_ARGUMENTS_CACHED>,
>  }
>  
>  /// Creates a self-mapping page table for `obj` at its beginning.
> @@ -90,12 +94,35 @@ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self
>  
>          // Creates its own PTE array
>          let cmdq = GspCmdq::new(dev)?;
> +        let rmargs =
> +            create_coherent_dma_object::<GSP_ARGUMENTS_CACHED>(dev, "RMARGS", 1, &mut libos, 3)?;
> +        let (shared_mem_phys_addr, cmd_queue_offset, stat_queue_offset) = cmdq.get_cmdq_offsets();
> +
> +        dma_write!(
> +            rmargs[0].messageQueueInitArguments = MESSAGE_QUEUE_INIT_ARGUMENTS {
> +                sharedMemPhysAddr: shared_mem_phys_addr,
> +                pageTableEntryCount: cmdq.nr_ptes,
> +                cmdQueueOffset: cmd_queue_offset,
> +                statQueueOffset: stat_queue_offset,
> +                ..Default::default()
> +            }
> +        )?;
> +        dma_write!(
> +            rmargs[0].srInitArguments = GSP_SR_INIT_ARGUMENTS {
> +                oldLevel: 0,
> +                flags: 0,
> +                bInPMTransition: 0,
> +                ..Default::default()
> +            }
> +        )?;
> +        dma_write!(rmargs[0].bDmemStack = 1)?;

Wrapping our bindings is going to help clean up this code as well.

First, types named in CAPITALS_SNAKE_CASE are not idiomatic Rust and
look like constants. And it's not even like the bindings types are
consistently named that way, since we also have e.g. `GspFwWprMeta` - so
let's give them a proper public name and bring some consistency at the
same time.

This will make all the fields from `GSP_ARGUMENTS_CACHED` invisible to
this module as they should be, so the wrapping `GspArgumentsCached` type
should then have a constructor that receives a referene to the command
queue and takes the information is needs from it, similarly to
`GspFwWprMeta`. This will reduce the 3 `dma_write!` into a single one.

Then we should remove `get_cmdq_offsets`, which is super confusing. I am
also not fond of `cmdq.nr_ptes`. More on them below.

>  
>          Ok(try_pin_init!(Self {
>              libos,
>              loginit,
>              logintr,
>              logrm,
> +            rmargs,
>              cmdq,
>          }))
>      }
> diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
> index a9ba1a4c73d8..9170ccf4a064 100644
> --- a/drivers/gpu/nova-core/gsp/cmdq.rs
> +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
> @@ -99,7 +99,6 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
>          Ok(Self(gsp_mem))
>      }
>  
> -    #[expect(unused)]
>      fn dma_handle(&self) -> DmaAddress {
>          self.0.dma_handle()
>      }
> @@ -218,7 +217,7 @@ pub(crate) struct GspCmdq {
>      dev: ARef<device::Device>,
>      seq: u32,
>      gsp_mem: DmaGspMem,
> -    pub _nr_ptes: u32,
> +    pub nr_ptes: u32,
>  }
>  
>  impl GspCmdq {
> @@ -231,7 +230,7 @@ pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<GspCmdq> {
>              dev: dev.into(),
>              seq: 0,
>              gsp_mem,
> -            _nr_ptes: nr_ptes as u32,
> +            nr_ptes: nr_ptes as u32,
>          })
>      }
>  
> @@ -382,6 +381,15 @@ pub(crate) fn receive_msg_from_gsp<M: GspMessageFromGsp, R>(
>              .advance_cpu_read_ptr(msg_header.rpc.length.div_ceil(GSP_PAGE_SIZE as u32));
>          result
>      }
> +
> +    pub(crate) fn get_cmdq_offsets(&self) -> (u64, u64, u64) {
> +        (
> +            self.gsp_mem.dma_handle(),
> +            core::mem::offset_of!(Msgq, msgq) as u64,
> +            (core::mem::offset_of!(GspMem, gspq) - core::mem::offset_of!(GspMem, cpuq)
> +                + core::mem::offset_of!(Msgq, msgq)) as u64,
> +        )
> +    }

So this thing returns 3 u64s, one of which is actually a DMA handle,
while the two others are technically constants. The only thing that
needs to be inferred at runtime is the DMA handle - all the rest is
static.

So we can make the two last returned values associated constants of
`GspCmdq`:

  impl GspCmdq {
      /// Offset of the data after the PTEs.
      const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq);

      /// Offset of command queue ring buffer.
      pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq)
          + core::mem::offset_of!(Msgq, msgq)
          - Self::POST_PTE_OFFSET;

      /// Offset of message queue ring buffer.
      pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq)
          + core::mem::offset_of!(Msgq, msgq)
          - Self::POST_PTE_OFFSET;

`GspArgumentsCached::new` can then import `GspCmdq` and use these to
initialize its corresponding members.

Remains `nr_ptes`. It was introduced in the previous patch as follows:

    let nr_ptes = size_of::<GspMem>() >> GSP_PAGE_SHIFT;

Which turns out to also be a constant! So let's add it next to the others:

impl GspCmdq {
    ...
    /// Number of page table entries for the GSP shared region.
    pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT;

And you can remove `GspCmdq::nr_ptes` altogether.

With this, `GspArgumentsCached::new` can take a reference to the
`GspCmdq` to initialize from, grab its DMA handle, and initialize
everything else using the constants we defined above. We remove a bunch
of inconsistently-named imports from `gsp.rs`, and replace
firmware-dependent incantations to initialize our GSP arguments with a
single constructor call that tells exactly what it does in a single
line.

Re: [PATCH v2 06/10] gpu: nova-core: gsp: Create rmargs

Posted by Alistair Popple 4 months, 1 week ago

On 2025-09-26 at 17:27 +1000, Alexandre Courbot <acourbot@nvidia.com> wrote...
> On Mon Sep 22, 2025 at 8:30 PM JST, Alistair Popple wrote:
> > Initialise the GSP resource manager arguments (rmargs) which provide
> > initialisation parameters to the GSP firmware during boot. The rmargs
> > structure contains arguments to configure the GSP message/command queue
> > location.
> >
> > These are mapped for coherent DMA and added to the libos data structure
> > for access when booting GSP.
> >
> > Signed-off-by: Alistair Popple <apopple@nvidia.com>
> >
> > ---
> >
> > Changes for v2:
> >  - Rebased on Alex's latest series
> > ---
> >  drivers/gpu/nova-core/gsp.rs                  | 29 +++++++++++++++-
> >  drivers/gpu/nova-core/gsp/cmdq.rs             | 14 ++++++--
> >  drivers/gpu/nova-core/gsp/fw.rs               | 19 +++++++++++
> >  .../gpu/nova-core/gsp/fw/r570_144/bindings.rs | 33 +++++++++++++++++++
> >  4 files changed, 91 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
> > index 3d4028d67d2e..bb08bd537ec4 100644
> > --- a/drivers/gpu/nova-core/gsp.rs
> > +++ b/drivers/gpu/nova-core/gsp.rs
> > @@ -17,7 +17,10 @@
> >  use crate::fb::FbLayout;
> >  use crate::gsp::cmdq::GspCmdq;
> >  
> > -use fw::LibosMemoryRegionInitArgument;
> > +use fw::{
> > +    LibosMemoryRegionInitArgument, GSP_ARGUMENTS_CACHED, GSP_SR_INIT_ARGUMENTS,
> > +    MESSAGE_QUEUE_INIT_ARGUMENTS,
> > +};
> >  
> >  pub(crate) mod cmdq;
> >  
> > @@ -33,6 +36,7 @@ pub(crate) struct Gsp {
> >      pub logintr: CoherentAllocation<u8>,
> >      pub logrm: CoherentAllocation<u8>,
> >      pub cmdq: GspCmdq,
> > +    rmargs: CoherentAllocation<GSP_ARGUMENTS_CACHED>,
> >  }
> >  
> >  /// Creates a self-mapping page table for `obj` at its beginning.
> > @@ -90,12 +94,35 @@ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self
> >  
> >          // Creates its own PTE array
> >          let cmdq = GspCmdq::new(dev)?;
> > +        let rmargs =
> > +            create_coherent_dma_object::<GSP_ARGUMENTS_CACHED>(dev, "RMARGS", 1, &mut libos, 3)?;
> > +        let (shared_mem_phys_addr, cmd_queue_offset, stat_queue_offset) = cmdq.get_cmdq_offsets();
> > +
> > +        dma_write!(
> > +            rmargs[0].messageQueueInitArguments = MESSAGE_QUEUE_INIT_ARGUMENTS {
> > +                sharedMemPhysAddr: shared_mem_phys_addr,
> > +                pageTableEntryCount: cmdq.nr_ptes,
> > +                cmdQueueOffset: cmd_queue_offset,
> > +                statQueueOffset: stat_queue_offset,
> > +                ..Default::default()
> > +            }
> > +        )?;
> > +        dma_write!(
> > +            rmargs[0].srInitArguments = GSP_SR_INIT_ARGUMENTS {
> > +                oldLevel: 0,
> > +                flags: 0,
> > +                bInPMTransition: 0,
> > +                ..Default::default()
> > +            }
> > +        )?;
> > +        dma_write!(rmargs[0].bDmemStack = 1)?;
> 
> Wrapping our bindings is going to help clean up this code as well.
> 
> First, types named in CAPITALS_SNAKE_CASE are not idiomatic Rust and
> look like constants. And it's not even like the bindings types are
> consistently named that way, since we also have e.g. `GspFwWprMeta` - so
> let's give them a proper public name and bring some consistency at the
> same time.

I think there are two aspects to the bindings - one which was addressed in
the comments for patch 5 is how to abstract them. The other is that the way we
currently generate them results in some  ugly name.

Given we want to generate these from our internal IDL eventually I would favour
fixing this naming ugliness by touching up the currently generated bindings. So
maybe I will do that for the next revision.

> This will make all the fields from `GSP_ARGUMENTS_CACHED` invisible to
> this module as they should be, so the wrapping `GspArgumentsCached` type
> should then have a constructor that receives a referene to the command
> queue and takes the information is needs from it, similarly to
> `GspFwWprMeta`. This will reduce the 3 `dma_write!` into a single one.
> 
> Then we should remove `get_cmdq_offsets`, which is super confusing. I am
> also not fond of `cmdq.nr_ptes`. More on them below.

I will admit that was a bit of a hack.

> >  
> >          Ok(try_pin_init!(Self {
> >              libos,
> >              loginit,
> >              logintr,
> >              logrm,
> > +            rmargs,
> >              cmdq,
> >          }))
> >      }
> > diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
> > index a9ba1a4c73d8..9170ccf4a064 100644
> > --- a/drivers/gpu/nova-core/gsp/cmdq.rs
> > +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
> > @@ -99,7 +99,6 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
> >          Ok(Self(gsp_mem))
> >      }
> >  
> > -    #[expect(unused)]
> >      fn dma_handle(&self) -> DmaAddress {
> >          self.0.dma_handle()
> >      }
> > @@ -218,7 +217,7 @@ pub(crate) struct GspCmdq {
> >      dev: ARef<device::Device>,
> >      seq: u32,
> >      gsp_mem: DmaGspMem,
> > -    pub _nr_ptes: u32,
> > +    pub nr_ptes: u32,
> >  }
> >  
> >  impl GspCmdq {
> > @@ -231,7 +230,7 @@ pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<GspCmdq> {
> >              dev: dev.into(),
> >              seq: 0,
> >              gsp_mem,
> > -            _nr_ptes: nr_ptes as u32,
> > +            nr_ptes: nr_ptes as u32,
> >          })
> >      }
> >  
> > @@ -382,6 +381,15 @@ pub(crate) fn receive_msg_from_gsp<M: GspMessageFromGsp, R>(
> >              .advance_cpu_read_ptr(msg_header.rpc.length.div_ceil(GSP_PAGE_SIZE as u32));
> >          result
> >      }
> > +
> > +    pub(crate) fn get_cmdq_offsets(&self) -> (u64, u64, u64) {
> > +        (
> > +            self.gsp_mem.dma_handle(),
> > +            core::mem::offset_of!(Msgq, msgq) as u64,
> > +            (core::mem::offset_of!(GspMem, gspq) - core::mem::offset_of!(GspMem, cpuq)
> > +                + core::mem::offset_of!(Msgq, msgq)) as u64,
> > +        )
> > +    }
> 
> So this thing returns 3 u64s, one of which is actually a DMA handle,
> while the two others are technically constants. The only thing that
> needs to be inferred at runtime is the DMA handle - all the rest is
> static.

Thanks! That is a useful observation for cleaning these up.

> So we can make the two last returned values associated constants of
> `GspCmdq`:
> 
>   impl GspCmdq {
>       /// Offset of the data after the PTEs.
>       const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq);
> 
>       /// Offset of command queue ring buffer.
>       pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq)
>           + core::mem::offset_of!(Msgq, msgq)
>           - Self::POST_PTE_OFFSET;
> 
>       /// Offset of message queue ring buffer.
>       pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq)
>           + core::mem::offset_of!(Msgq, msgq)
>           - Self::POST_PTE_OFFSET;
> 
> `GspArgumentsCached::new` can then import `GspCmdq` and use these to
> initialize its corresponding members.
> 
> Remains `nr_ptes`. It was introduced in the previous patch as follows:
> 
>     let nr_ptes = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
> 
> Which turns out to also be a constant! So let's add it next to the others:
> 
> impl GspCmdq {
>     ...
>     /// Number of page table entries for the GSP shared region.
>     pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
> 
> And you can remove `GspCmdq::nr_ptes` altogether.
> 
> With this, `GspArgumentsCached::new` can take a reference to the
> `GspCmdq` to initialize from, grab its DMA handle, and initialize
> everything else using the constants we defined above. We remove a bunch
> of inconsistently-named imports from `gsp.rs`, and replace
> firmware-dependent incantations to initialize our GSP arguments with a
> single constructor call that tells exactly what it does in a single
> line.

So this would also live in `fw.rs`? What I'm really concerned about is that if
we're not allowed access the C bindings outside of `fw.rs` then everything ends
up in `fw.rs`, and worse still `fw.rs` basically ends up importing everything as
well, tightly coupling everything into one big blob.

So ugly naming aside, I really can't see the advantage of this. It would seem
much cleaner to have to have gsp.rs import the C bindings it needs and tie that
togeather with the other higher level Gsp abstractions.

Although I agree with the overall sentiment - the code could be better here -
we could still create GspArgumentsCached::new() as you suggest just there's no
reason for it to be in `fw.rs`. The type aliases I had were just to hide the
ugly names really.

Re: [PATCH v2 06/10] gpu: nova-core: gsp: Create rmargs

Posted by Danilo Krummrich 4 months, 1 week ago

On Mon Sep 29, 2025 at 8:36 AM CEST, Alistair Popple wrote:
> On 2025-09-26 at 17:27 +1000, Alexandre Courbot <acourbot@nvidia.com> wrote...
>> On Mon Sep 22, 2025 at 8:30 PM JST, Alistair Popple wrote:
>> > @@ -33,6 +36,7 @@ pub(crate) struct Gsp {
>> >      pub logintr: CoherentAllocation<u8>,
>> >      pub logrm: CoherentAllocation<u8>,
>> >      pub cmdq: GspCmdq,
>> > +    rmargs: CoherentAllocation<GSP_ARGUMENTS_CACHED>,
>> >  }
>> >  
>> >  /// Creates a self-mapping page table for `obj` at its beginning.
>> > @@ -90,12 +94,35 @@ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self
>> >  
>> >          // Creates its own PTE array
>> >          let cmdq = GspCmdq::new(dev)?;
>> > +        let rmargs =
>> > +            create_coherent_dma_object::<GSP_ARGUMENTS_CACHED>(dev, "RMARGS", 1, &mut libos, 3)?;
>> > +        let (shared_mem_phys_addr, cmd_queue_offset, stat_queue_offset) = cmdq.get_cmdq_offsets();
>> > +
>> > +        dma_write!(
>> > +            rmargs[0].messageQueueInitArguments = MESSAGE_QUEUE_INIT_ARGUMENTS {
>> > +                sharedMemPhysAddr: shared_mem_phys_addr,
>> > +                pageTableEntryCount: cmdq.nr_ptes,
>> > +                cmdQueueOffset: cmd_queue_offset,
>> > +                statQueueOffset: stat_queue_offset,
>> > +                ..Default::default()
>> > +            }
>> > +        )?;
>> > +        dma_write!(
>> > +            rmargs[0].srInitArguments = GSP_SR_INIT_ARGUMENTS {
>> > +                oldLevel: 0,
>> > +                flags: 0,
>> > +                bInPMTransition: 0,
>> > +                ..Default::default()
>> > +            }
>> > +        )?;
>> > +        dma_write!(rmargs[0].bDmemStack = 1)?;
>> 
>> Wrapping our bindings is going to help clean up this code as well.
>> 
>> First, types named in CAPITALS_SNAKE_CASE are not idiomatic Rust and
>> look like constants. And it's not even like the bindings types are
>> consistently named that way, since we also have e.g. `GspFwWprMeta` - so
>> let's give them a proper public name and bring some consistency at the
>> same time.
>
> I think there are two aspects to the bindings - one which was addressed in
> the comments for patch 5 is how to abstract them. The other is that the way we
> currently generate them results in some  ugly name.
>
> Given we want to generate these from our internal IDL eventually I would favour
> fixing this naming ugliness by touching up the currently generated bindings. So
> maybe I will do that for the next revision.

It's not about fixing the name of the generated C bindings, it's about not
leaking firmware specific structures into core code of the driver.

Please hide it in an abstraction that can deal with differences between firmware
version internally; see also [1].

[1] https://lore.kernel.org/all/DCUAYNNP97QI.1VOX5XUS9KP7K@kernel.org/

>> This will make all the fields from `GSP_ARGUMENTS_CACHED` invisible to
>> this module as they should be, so the wrapping `GspArgumentsCached` type
>> should then have a constructor that receives a referene to the command
>> queue and takes the information is needs from it, similarly to
>> `GspFwWprMeta`. This will reduce the 3 `dma_write!` into a single one.
>> 
>> Then we should remove `get_cmdq_offsets`, which is super confusing. I am
>> also not fond of `cmdq.nr_ptes`. More on them below.
>
> I will admit that was a bit of a hack.
>
>> >  
>> >          Ok(try_pin_init!(Self {
>> >              libos,
>> >              loginit,
>> >              logintr,
>> >              logrm,
>> > +            rmargs,
>> >              cmdq,
>> >          }))
>> >      }
>> > diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
>> > index a9ba1a4c73d8..9170ccf4a064 100644
>> > --- a/drivers/gpu/nova-core/gsp/cmdq.rs
>> > +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
>> > @@ -99,7 +99,6 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
>> >          Ok(Self(gsp_mem))
>> >      }
>> >  
>> > -    #[expect(unused)]
>> >      fn dma_handle(&self) -> DmaAddress {
>> >          self.0.dma_handle()
>> >      }
>> > @@ -218,7 +217,7 @@ pub(crate) struct GspCmdq {
>> >      dev: ARef<device::Device>,
>> >      seq: u32,
>> >      gsp_mem: DmaGspMem,
>> > -    pub _nr_ptes: u32,
>> > +    pub nr_ptes: u32,
>> >  }
>> >  
>> >  impl GspCmdq {
>> > @@ -231,7 +230,7 @@ pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<GspCmdq> {
>> >              dev: dev.into(),
>> >              seq: 0,
>> >              gsp_mem,
>> > -            _nr_ptes: nr_ptes as u32,
>> > +            nr_ptes: nr_ptes as u32,
>> >          })
>> >      }
>> >  
>> > @@ -382,6 +381,15 @@ pub(crate) fn receive_msg_from_gsp<M: GspMessageFromGsp, R>(
>> >              .advance_cpu_read_ptr(msg_header.rpc.length.div_ceil(GSP_PAGE_SIZE as u32));
>> >          result
>> >      }
>> > +
>> > +    pub(crate) fn get_cmdq_offsets(&self) -> (u64, u64, u64) {
>> > +        (
>> > +            self.gsp_mem.dma_handle(),
>> > +            core::mem::offset_of!(Msgq, msgq) as u64,
>> > +            (core::mem::offset_of!(GspMem, gspq) - core::mem::offset_of!(GspMem, cpuq)
>> > +                + core::mem::offset_of!(Msgq, msgq)) as u64,
>> > +        )
>> > +    }
>> 
>> So this thing returns 3 u64s, one of which is actually a DMA handle,
>> while the two others are technically constants. The only thing that
>> needs to be inferred at runtime is the DMA handle - all the rest is
>> static.
>
> Thanks! That is a useful observation for cleaning these up.

Please also make sure to use the DmaAddress type instead of a raw u64 for DMA
addresses.

>> So we can make the two last returned values associated constants of
>> `GspCmdq`:
>> 
>>   impl GspCmdq {
>>       /// Offset of the data after the PTEs.
>>       const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq);
>> 
>>       /// Offset of command queue ring buffer.
>>       pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq)
>>           + core::mem::offset_of!(Msgq, msgq)
>>           - Self::POST_PTE_OFFSET;
>> 
>>       /// Offset of message queue ring buffer.
>>       pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq)
>>           + core::mem::offset_of!(Msgq, msgq)
>>           - Self::POST_PTE_OFFSET;
>> 
>> `GspArgumentsCached::new` can then import `GspCmdq` and use these to
>> initialize its corresponding members.
>> 
>> Remains `nr_ptes`. It was introduced in the previous patch as follows:
>> 
>>     let nr_ptes = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
>> 
>> Which turns out to also be a constant! So let's add it next to the others:
>> 
>> impl GspCmdq {
>>     ...
>>     /// Number of page table entries for the GSP shared region.
>>     pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
>> 
>> And you can remove `GspCmdq::nr_ptes` altogether.
>> 
>> With this, `GspArgumentsCached::new` can take a reference to the
>> `GspCmdq` to initialize from, grab its DMA handle, and initialize
>> everything else using the constants we defined above. We remove a bunch
>> of inconsistently-named imports from `gsp.rs`, and replace
>> firmware-dependent incantations to initialize our GSP arguments with a
>> single constructor call that tells exactly what it does in a single
>> line.
>
> So this would also live in `fw.rs`? What I'm really concerned about is that if
> we're not allowed access the C bindings outside of `fw.rs` then everything ends
> up in `fw.rs`, and worse still `fw.rs` basically ends up importing everything as
> well, tightly coupling everything into one big blob.

You can (and probably should) extend the module structure, i.e. add a
sub-directory ./gsp/fw/ and structure things accordingly.

Re: [PATCH v2 06/10] gpu: nova-core: gsp: Create rmargs

Posted by Alistair Popple 4 months, 1 week ago

On 2025-09-29 at 17:18 +1000, Danilo Krummrich <dakr@kernel.org> wrote...
> On Mon Sep 29, 2025 at 8:36 AM CEST, Alistair Popple wrote:
> > On 2025-09-26 at 17:27 +1000, Alexandre Courbot <acourbot@nvidia.com> wrote...
> >> On Mon Sep 22, 2025 at 8:30 PM JST, Alistair Popple wrote:
> >> > @@ -33,6 +36,7 @@ pub(crate) struct Gsp {
> >> >      pub logintr: CoherentAllocation<u8>,
> >> >      pub logrm: CoherentAllocation<u8>,
> >> >      pub cmdq: GspCmdq,
> >> > +    rmargs: CoherentAllocation<GSP_ARGUMENTS_CACHED>,
> >> >  }
> >> >  
> >> >  /// Creates a self-mapping page table for `obj` at its beginning.
> >> > @@ -90,12 +94,35 @@ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self
> >> >  
> >> >          // Creates its own PTE array
> >> >          let cmdq = GspCmdq::new(dev)?;
> >> > +        let rmargs =
> >> > +            create_coherent_dma_object::<GSP_ARGUMENTS_CACHED>(dev, "RMARGS", 1, &mut libos, 3)?;
> >> > +        let (shared_mem_phys_addr, cmd_queue_offset, stat_queue_offset) = cmdq.get_cmdq_offsets();
> >> > +
> >> > +        dma_write!(
> >> > +            rmargs[0].messageQueueInitArguments = MESSAGE_QUEUE_INIT_ARGUMENTS {
> >> > +                sharedMemPhysAddr: shared_mem_phys_addr,
> >> > +                pageTableEntryCount: cmdq.nr_ptes,
> >> > +                cmdQueueOffset: cmd_queue_offset,
> >> > +                statQueueOffset: stat_queue_offset,
> >> > +                ..Default::default()
> >> > +            }
> >> > +        )?;
> >> > +        dma_write!(
> >> > +            rmargs[0].srInitArguments = GSP_SR_INIT_ARGUMENTS {
> >> > +                oldLevel: 0,
> >> > +                flags: 0,
> >> > +                bInPMTransition: 0,
> >> > +                ..Default::default()
> >> > +            }
> >> > +        )?;
> >> > +        dma_write!(rmargs[0].bDmemStack = 1)?;
> >> 
> >> Wrapping our bindings is going to help clean up this code as well.
> >> 
> >> First, types named in CAPITALS_SNAKE_CASE are not idiomatic Rust and
> >> look like constants. And it's not even like the bindings types are
> >> consistently named that way, since we also have e.g. `GspFwWprMeta` - so
> >> let's give them a proper public name and bring some consistency at the
> >> same time.
> >
> > I think there are two aspects to the bindings - one which was addressed in
> > the comments for patch 5 is how to abstract them. The other is that the way we
> > currently generate them results in some  ugly name.
> >
> > Given we want to generate these from our internal IDL eventually I would favour
> > fixing this naming ugliness by touching up the currently generated bindings. So
> > maybe I will do that for the next revision.
> 
> It's not about fixing the name of the generated C bindings, it's about not
> leaking firmware specific structures into core code of the driver.

I don't disagree. Please see my comments on patch 5, which deals more with the
type of abstraction we want to provide.

> Please hide it in an abstraction that can deal with differences between firmware
> version internally; see also [1].
> 
> [1] https://lore.kernel.org/all/DCUAYNNP97QI.1VOX5XUS9KP7K@kernel.org/
> 
> >> This will make all the fields from `GSP_ARGUMENTS_CACHED` invisible to
> >> this module as they should be, so the wrapping `GspArgumentsCached` type
> >> should then have a constructor that receives a referene to the command
> >> queue and takes the information is needs from it, similarly to
> >> `GspFwWprMeta`. This will reduce the 3 `dma_write!` into a single one.
> >> 
> >> Then we should remove `get_cmdq_offsets`, which is super confusing. I am
> >> also not fond of `cmdq.nr_ptes`. More on them below.
> >
> > I will admit that was a bit of a hack.
> >
> >> >  
> >> >          Ok(try_pin_init!(Self {
> >> >              libos,
> >> >              loginit,
> >> >              logintr,
> >> >              logrm,
> >> > +            rmargs,
> >> >              cmdq,
> >> >          }))
> >> >      }
> >> > diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
> >> > index a9ba1a4c73d8..9170ccf4a064 100644
> >> > --- a/drivers/gpu/nova-core/gsp/cmdq.rs
> >> > +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
> >> > @@ -99,7 +99,6 @@ fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
> >> >          Ok(Self(gsp_mem))
> >> >      }
> >> >  
> >> > -    #[expect(unused)]
> >> >      fn dma_handle(&self) -> DmaAddress {
> >> >          self.0.dma_handle()
> >> >      }
> >> > @@ -218,7 +217,7 @@ pub(crate) struct GspCmdq {
> >> >      dev: ARef<device::Device>,
> >> >      seq: u32,
> >> >      gsp_mem: DmaGspMem,
> >> > -    pub _nr_ptes: u32,
> >> > +    pub nr_ptes: u32,
> >> >  }
> >> >  
> >> >  impl GspCmdq {
> >> > @@ -231,7 +230,7 @@ pub(crate) fn new(dev: &device::Device<device::Bound>) -> Result<GspCmdq> {
> >> >              dev: dev.into(),
> >> >              seq: 0,
> >> >              gsp_mem,
> >> > -            _nr_ptes: nr_ptes as u32,
> >> > +            nr_ptes: nr_ptes as u32,
> >> >          })
> >> >      }
> >> >  
> >> > @@ -382,6 +381,15 @@ pub(crate) fn receive_msg_from_gsp<M: GspMessageFromGsp, R>(
> >> >              .advance_cpu_read_ptr(msg_header.rpc.length.div_ceil(GSP_PAGE_SIZE as u32));
> >> >          result
> >> >      }
> >> > +
> >> > +    pub(crate) fn get_cmdq_offsets(&self) -> (u64, u64, u64) {
> >> > +        (
> >> > +            self.gsp_mem.dma_handle(),
> >> > +            core::mem::offset_of!(Msgq, msgq) as u64,
> >> > +            (core::mem::offset_of!(GspMem, gspq) - core::mem::offset_of!(GspMem, cpuq)
> >> > +                + core::mem::offset_of!(Msgq, msgq)) as u64,
> >> > +        )
> >> > +    }
> >> 
> >> So this thing returns 3 u64s, one of which is actually a DMA handle,
> >> while the two others are technically constants. The only thing that
> >> needs to be inferred at runtime is the DMA handle - all the rest is
> >> static.
> >
> > Thanks! That is a useful observation for cleaning these up.
> 
> Please also make sure to use the DmaAddress type instead of a raw u64 for DMA
> addresses.
> 
> >> So we can make the two last returned values associated constants of
> >> `GspCmdq`:
> >> 
> >>   impl GspCmdq {
> >>       /// Offset of the data after the PTEs.
> >>       const POST_PTE_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq);
> >> 
> >>       /// Offset of command queue ring buffer.
> >>       pub(crate) const CMDQ_OFFSET: usize = core::mem::offset_of!(GspMem, cpuq)
> >>           + core::mem::offset_of!(Msgq, msgq)
> >>           - Self::POST_PTE_OFFSET;
> >> 
> >>       /// Offset of message queue ring buffer.
> >>       pub(crate) const STATQ_OFFSET: usize = core::mem::offset_of!(GspMem, gspq)
> >>           + core::mem::offset_of!(Msgq, msgq)
> >>           - Self::POST_PTE_OFFSET;
> >> 
> >> `GspArgumentsCached::new` can then import `GspCmdq` and use these to
> >> initialize its corresponding members.
> >> 
> >> Remains `nr_ptes`. It was introduced in the previous patch as follows:
> >> 
> >>     let nr_ptes = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
> >> 
> >> Which turns out to also be a constant! So let's add it next to the others:
> >> 
> >> impl GspCmdq {
> >>     ...
> >>     /// Number of page table entries for the GSP shared region.
> >>     pub(crate) const NUM_PTES: usize = size_of::<GspMem>() >> GSP_PAGE_SHIFT;
> >> 
> >> And you can remove `GspCmdq::nr_ptes` altogether.
> >> 
> >> With this, `GspArgumentsCached::new` can take a reference to the
> >> `GspCmdq` to initialize from, grab its DMA handle, and initialize
> >> everything else using the constants we defined above. We remove a bunch
> >> of inconsistently-named imports from `gsp.rs`, and replace
> >> firmware-dependent incantations to initialize our GSP arguments with a
> >> single constructor call that tells exactly what it does in a single
> >> line.
> >
> > So this would also live in `fw.rs`? What I'm really concerned about is that if
> > we're not allowed access the C bindings outside of `fw.rs` then everything ends
> > up in `fw.rs`, and worse still `fw.rs` basically ends up importing everything as
> > well, tightly coupling everything into one big blob.
> 
> You can (and probably should) extend the module structure, i.e. add a
> sub-directory ./gsp/fw/ and structure things accordingly.

This is what I thought the gsp directory was for, providing abstractions for the
Gsp. IMHO another layer of abstraction below this seems unnecessary, although
this is discussed in a bit more detail at the end of my comments on patch 5.