[PATCH v5 02/14] gpu: nova-core: Create initial Gsp

Alistair Popple posted 14 patches 2 months, 1 week ago
There is a newer version of this series
[PATCH v5 02/14] gpu: nova-core: Create initial Gsp
Posted by Alistair Popple 2 months, 1 week ago
The GSP requires several areas of memory to operate. Each of these have
their own simple embedded page tables. Set these up and map them for DMA
to/from GSP using CoherentAllocation's. Return the DMA handle describing
where each of these regions are for future use when booting GSP.

Signed-off-by: Alistair Popple <apopple@nvidia.com>

---

Changes for v5:
 - Move GSP_HEAP_ALIGNMENT to gsp/fw.rs and add a comment.
 - Create a LogBuffer type.
 - Use checked_add to ensure PTE values don't overflow.
 - Added some type documentation (shamelessly stolen from Nouveau)

Change for v3:
 - Clean up the PTE array creation, with much thanks to Alex for doing
   most it (please let me know if I should put you as co-developer!)

Changes for v2:
 - Renamed GspMemOjbects to Gsp as that is what they are
 - Rebased on Alex's latest series
---
 drivers/gpu/nova-core/gpu.rs                  |   2 +-
 drivers/gpu/nova-core/gsp.rs                  | 106 ++++++++++++++++--
 drivers/gpu/nova-core/gsp/fw.rs               |  64 ++++++++++-
 .../gpu/nova-core/gsp/fw/r570_144/bindings.rs |  19 ++++
 4 files changed, 179 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index ea124d1912e7..c1396775e9b6 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -197,7 +197,7 @@ pub(crate) fn new<'a>(
 
             sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset, bar, true)?,
 
-            gsp <- Gsp::new(),
+            gsp <- Gsp::new(pdev)?,
 
             _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
 
diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
index 221281da1a45..f1727173bd42 100644
--- a/drivers/gpu/nova-core/gsp.rs
+++ b/drivers/gpu/nova-core/gsp.rs
@@ -2,25 +2,117 @@
 
 mod boot;
 
+use kernel::device;
+use kernel::dma::CoherentAllocation;
+use kernel::dma::DmaAddress;
+use kernel::dma_write;
+use kernel::pci;
 use kernel::prelude::*;
-use kernel::ptr::Alignment;
+use kernel::transmute::AsBytes;
 
 pub(crate) use fw::{GspFwWprMeta, LibosParams};
 
 mod fw;
 
+use fw::LibosMemoryRegionInitArgument;
+
 pub(crate) const GSP_PAGE_SHIFT: usize = 12;
 pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT;
-pub(crate) const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>();
+
+/// Number of GSP pages to use in a RM log buffer.
+const RM_LOG_BUFFER_NUM_PAGES: usize = 0x10;
 
 /// GSP runtime data.
-///
-/// This is an empty pinned placeholder for now.
 #[pin_data]
-pub(crate) struct Gsp {}
+pub(crate) struct Gsp {
+    pub(crate) libos: CoherentAllocation<LibosMemoryRegionInitArgument>,
+    loginit: LogBuffer,
+    logintr: LogBuffer,
+    logrm: LogBuffer,
+}
+
+#[repr(C)]
+struct PteArray<const NUM_ENTRIES: usize>([u64; NUM_ENTRIES]);
+
+/// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper around it.
+unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}
+
+impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> {
+    fn new(handle: DmaAddress) -> Result<Self> {
+        let mut ptes = [0u64; NUM_PAGES];
+        for (i, pte) in ptes.iter_mut().enumerate() {
+            *pte = handle
+                .checked_add((i as u64) << GSP_PAGE_SHIFT)
+                .ok_or(EOVERFLOW)?;
+        }
+
+        Ok(Self(ptes))
+    }
+}
+
+/// The logging buffers are byte queues that contain encoded printf-like
+/// messages from GSP-RM.  They need to be decoded by a special application
+/// that can parse the buffers.
+///
+/// The 'loginit' buffer contains logs from early GSP-RM init and
+/// exception dumps.  The 'logrm' buffer contains the subsequent logs. Both are
+/// written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE.
+///
+/// The physical address map for the log buffer is stored in the buffer
+/// itself, starting with offset 1. Offset 0 contains the "put" pointer (pp).
+/// Initially, pp is equal to 0. If the buffer has valid logging data in it,
+/// then pp points to index into the buffer where the next logging entry will
+/// be written. Therefore, the logging data is valid if:
+///   1 <= pp < sizeof(buffer)/sizeof(u64)
+struct LogBuffer(CoherentAllocation<u8>);
+
+impl LogBuffer {
+    fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
+        const NUM_PAGES: usize = RM_LOG_BUFFER_NUM_PAGES;
+
+        let mut obj = Self(CoherentAllocation::<u8>::alloc_coherent(
+            dev,
+            NUM_PAGES * GSP_PAGE_SIZE,
+            GFP_KERNEL | __GFP_ZERO,
+        )?);
+        let ptes = PteArray::<NUM_PAGES>::new(obj.0.dma_handle())?;
+
+        // SAFETY: `obj` has just been created and we are its sole user.
+        unsafe {
+            // Copy the self-mapping PTE at the expected location.
+            obj.0
+                .as_slice_mut(size_of::<u64>(), size_of_val(&ptes))?
+                .copy_from_slice(ptes.as_bytes())
+        };
+
+        Ok(obj)
+    }
+}
 
 impl Gsp {
-    pub(crate) fn new() -> impl PinInit<Self> {
-        pin_init!(Self {})
+    pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self, Error>> {
+        let dev = pdev.as_ref();
+        let libos = CoherentAllocation::<LibosMemoryRegionInitArgument>::alloc_coherent(
+            dev,
+            GSP_PAGE_SIZE / size_of::<LibosMemoryRegionInitArgument>(),
+            GFP_KERNEL | __GFP_ZERO,
+        )?;
+
+        // Initialise the logging structures. The OpenRM equivalents are in:
+        // _kgspInitLibosLoggingStructures (allocates memory for buffers)
+        // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array)
+        let loginit = LogBuffer::new(dev)?;
+        dma_write!(libos[0] = LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0)?)?;
+        let logintr = LogBuffer::new(dev)?;
+        dma_write!(libos[1] = LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0)?)?;
+        let logrm = LogBuffer::new(dev)?;
+        dma_write!(libos[2] = LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0)?)?;
+
+        Ok(try_pin_init!(Self {
+            libos,
+            loginit,
+            logintr,
+            logrm,
+        }))
     }
 }
diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
index 181baa401770..c3bececc29cd 100644
--- a/drivers/gpu/nova-core/gsp/fw.rs
+++ b/drivers/gpu/nova-core/gsp/fw.rs
@@ -7,15 +7,20 @@
 
 use core::ops::Range;
 
-use kernel::ptr::Alignable;
+use kernel::dma::CoherentAllocation;
+use kernel::prelude::*;
+use kernel::ptr::{Alignable, Alignment};
 use kernel::sizes::SZ_1M;
+use kernel::transmute::{AsBytes, FromBytes};
 
 use crate::gpu::Chipset;
-use crate::gsp;
 
 /// Dummy type to group methods related to heap parameters for running the GSP firmware.
 pub(crate) struct GspFwHeapParams(());
 
+/// Minimum required alignment for the GSP heap.
+const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>();
+
 impl GspFwHeapParams {
     /// Returns the amount of GSP-RM heap memory used during GSP-RM boot and initialization (up to
     /// and including the first client subdevice allocation).
@@ -29,7 +34,7 @@ fn base_rm_size(_chipset: Chipset) -> u64 {
     /// Returns the amount of heap memory required to support a single channel allocation.
     fn client_alloc_size() -> u64 {
         u64::from(bindings::GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE)
-            .align_up(gsp::GSP_HEAP_ALIGNMENT)
+            .align_up(GSP_HEAP_ALIGNMENT)
             .unwrap_or(u64::MAX)
     }
 
@@ -40,7 +45,7 @@ fn management_overhead(fb_size: u64) -> u64 {
 
         u64::from(bindings::GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB)
             .saturating_mul(fb_size_gb)
-            .align_up(gsp::GSP_HEAP_ALIGNMENT)
+            .align_up(GSP_HEAP_ALIGNMENT)
             .unwrap_or(u64::MAX)
     }
 }
@@ -99,3 +104,54 @@ pub(crate) fn wpr_heap_size(&self, chipset: Chipset, fb_size: u64) -> u64 {
 /// addresses of the GSP bootloader and firmware.
 #[repr(transparent)]
 pub(crate) struct GspFwWprMeta(bindings::GspFwWprMeta);
+
+/// Struct containing the arguments required to pass a memory buffer to the GSP
+/// for use during initialisation.
+///
+/// The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is
+/// configured for a larger page size (e.g. 64K pages), we need to give
+/// the GSP an array of 4K pages. Since we only create physically contiguous
+/// buffers the math to calculate the addresses is simple.
+///
+/// The buffers must be a multiple of GSP_PAGE_SIZE.  GSP-RM also currently
+/// ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the
+/// buffers to be physically contiguous anyway.
+///
+/// The memory allocated for the arguments must remain until the GSP sends the
+/// init_done RPC.
+#[repr(transparent)]
+pub(crate) struct LibosMemoryRegionInitArgument(bindings::LibosMemoryRegionInitArgument);
+
+// SAFETY: Padding is explicit and will not contain uninitialized data.
+unsafe impl AsBytes for LibosMemoryRegionInitArgument {}
+
+// SAFETY: This struct only contains integer types for which all bit patterns
+// are valid.
+unsafe impl FromBytes for LibosMemoryRegionInitArgument {}
+
+impl LibosMemoryRegionInitArgument {
+    pub(crate) fn new<A: AsBytes + FromBytes>(
+        name: &'static str,
+        obj: &CoherentAllocation<A>,
+    ) -> Result<Self> {
+        /// Generates the `ID8` identifier required for some GSP objects.
+        fn id8(name: &str) -> u64 {
+            let mut bytes = [0u8; core::mem::size_of::<u64>()];
+
+            for (c, b) in name.bytes().rev().zip(&mut bytes) {
+                *b = c;
+            }
+
+            u64::from_ne_bytes(bytes)
+        }
+
+        Ok(Self(bindings::LibosMemoryRegionInitArgument {
+            id8: id8(name),
+            pa: obj.dma_handle(),
+            size: obj.size() as u64,
+            kind: bindings::LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS.try_into()?,
+            loc: bindings::LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM.try_into()?,
+            ..Default::default()
+        }))
+    }
+}
diff --git a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
index 0407000cca22..6a14cc324391 100644
--- a/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
+++ b/drivers/gpu/nova-core/gsp/fw/r570_144/bindings.rs
@@ -124,3 +124,22 @@ fn default() -> Self {
         }
     }
 }
+pub type LibosAddress = u64_;
+pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_NONE: LibosMemoryRegionKind = 0;
+pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS: LibosMemoryRegionKind = 1;
+pub const LibosMemoryRegionKind_LIBOS_MEMORY_REGION_RADIX3: LibosMemoryRegionKind = 2;
+pub type LibosMemoryRegionKind = ffi::c_uint;
+pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_NONE: LibosMemoryRegionLoc = 0;
+pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM: LibosMemoryRegionLoc = 1;
+pub const LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_FB: LibosMemoryRegionLoc = 2;
+pub type LibosMemoryRegionLoc = ffi::c_uint;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct LibosMemoryRegionInitArgument {
+    pub id8: LibosAddress,
+    pub pa: LibosAddress,
+    pub size: LibosAddress,
+    pub kind: u8_,
+    pub loc: u8_,
+    pub __bindgen_padding_0: [u8; 6usize],
+}
-- 
2.50.1
Re: [PATCH v5 02/14] gpu: nova-core: Create initial Gsp
Posted by Alexandre Courbot 2 months ago
On Mon Oct 13, 2025 at 3:20 PM JST, Alistair Popple wrote:
> The GSP requires several areas of memory to operate. Each of these have
> their own simple embedded page tables. Set these up and map them for DMA
> to/from GSP using CoherentAllocation's. Return the DMA handle describing
> where each of these regions are for future use when booting GSP.
>
> Signed-off-by: Alistair Popple <apopple@nvidia.com>
>
> ---
>
> Changes for v5:
>  - Move GSP_HEAP_ALIGNMENT to gsp/fw.rs and add a comment.
>  - Create a LogBuffer type.
>  - Use checked_add to ensure PTE values don't overflow.
>  - Added some type documentation (shamelessly stolen from Nouveau)
>
> Change for v3:
>  - Clean up the PTE array creation, with much thanks to Alex for doing
>    most it (please let me know if I should put you as co-developer!)
>
> Changes for v2:
>  - Renamed GspMemOjbects to Gsp as that is what they are
>  - Rebased on Alex's latest series
> ---
>  drivers/gpu/nova-core/gpu.rs                  |   2 +-
>  drivers/gpu/nova-core/gsp.rs                  | 106 ++++++++++++++++--
>  drivers/gpu/nova-core/gsp/fw.rs               |  64 ++++++++++-
>  .../gpu/nova-core/gsp/fw/r570_144/bindings.rs |  19 ++++
>  4 files changed, 179 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index ea124d1912e7..c1396775e9b6 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -197,7 +197,7 @@ pub(crate) fn new<'a>(
>  
>              sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset, bar, true)?,
>  
> -            gsp <- Gsp::new(),
> +            gsp <- Gsp::new(pdev)?,
>  
>              _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
>  
> diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
> index 221281da1a45..f1727173bd42 100644
> --- a/drivers/gpu/nova-core/gsp.rs
> +++ b/drivers/gpu/nova-core/gsp.rs
> @@ -2,25 +2,117 @@
>  
>  mod boot;
>  
> +use kernel::device;
> +use kernel::dma::CoherentAllocation;
> +use kernel::dma::DmaAddress;
> +use kernel::dma_write;
> +use kernel::pci;
>  use kernel::prelude::*;
> -use kernel::ptr::Alignment;
> +use kernel::transmute::AsBytes;
>  
>  pub(crate) use fw::{GspFwWprMeta, LibosParams};
>  
>  mod fw;
>  
> +use fw::LibosMemoryRegionInitArgument;
> +
>  pub(crate) const GSP_PAGE_SHIFT: usize = 12;
>  pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT;
> -pub(crate) const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>();
> +
> +/// Number of GSP pages to use in a RM log buffer.
> +const RM_LOG_BUFFER_NUM_PAGES: usize = 0x10;
>  
>  /// GSP runtime data.
> -///
> -/// This is an empty pinned placeholder for now.
>  #[pin_data]
> -pub(crate) struct Gsp {}
> +pub(crate) struct Gsp {
> +    pub(crate) libos: CoherentAllocation<LibosMemoryRegionInitArgument>,
> +    loginit: LogBuffer,
> +    logintr: LogBuffer,
> +    logrm: LogBuffer,
> +}
> +
> +#[repr(C)]
> +struct PteArray<const NUM_ENTRIES: usize>([u64; NUM_ENTRIES]);

I'd just document this structure a bit as it is not obvious what it does
from the name alone.

> +
> +/// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper around it.
> +unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}
> +
> +impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> {
> +    fn new(handle: DmaAddress) -> Result<Self> {
> +        let mut ptes = [0u64; NUM_PAGES];
> +        for (i, pte) in ptes.iter_mut().enumerate() {
> +            *pte = handle
> +                .checked_add((i as u64) << GSP_PAGE_SHIFT)
> +                .ok_or(EOVERFLOW)?;
> +        }
> +
> +        Ok(Self(ptes))
> +    }
> +}
> +
> +/// The logging buffers are byte queues that contain encoded printf-like
> +/// messages from GSP-RM.  They need to be decoded by a special application
> +/// that can parse the buffers.
> +///
> +/// The 'loginit' buffer contains logs from early GSP-RM init and
> +/// exception dumps.  The 'logrm' buffer contains the subsequent logs. Both are
> +/// written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE.
> +///
> +/// The physical address map for the log buffer is stored in the buffer
> +/// itself, starting with offset 1. Offset 0 contains the "put" pointer (pp).
> +/// Initially, pp is equal to 0. If the buffer has valid logging data in it,
> +/// then pp points to index into the buffer where the next logging entry will
> +/// be written. Therefore, the logging data is valid if:
> +///   1 <= pp < sizeof(buffer)/sizeof(u64)

Maybe we should mention what happens to the address map, namely that it
gets overwritten by the buffer data and is only used for the initial
setup.

> +struct LogBuffer(CoherentAllocation<u8>);
> +
> +impl LogBuffer {
> +    fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
> +        const NUM_PAGES: usize = RM_LOG_BUFFER_NUM_PAGES;
> +
> +        let mut obj = Self(CoherentAllocation::<u8>::alloc_coherent(
> +            dev,
> +            NUM_PAGES * GSP_PAGE_SIZE,
> +            GFP_KERNEL | __GFP_ZERO,
> +        )?);
> +        let ptes = PteArray::<NUM_PAGES>::new(obj.0.dma_handle())?;
> +
> +        // SAFETY: `obj` has just been created and we are its sole user.
> +        unsafe {
> +            // Copy the self-mapping PTE at the expected location.
> +            obj.0
> +                .as_slice_mut(size_of::<u64>(), size_of_val(&ptes))?
> +                .copy_from_slice(ptes.as_bytes())
> +        };
> +
> +        Ok(obj)
> +    }
> +}
>  
>  impl Gsp {
> -    pub(crate) fn new() -> impl PinInit<Self> {
> -        pin_init!(Self {})
> +    pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self, Error>> {
> +        let dev = pdev.as_ref();
> +        let libos = CoherentAllocation::<LibosMemoryRegionInitArgument>::alloc_coherent(
> +            dev,
> +            GSP_PAGE_SIZE / size_of::<LibosMemoryRegionInitArgument>(),
> +            GFP_KERNEL | __GFP_ZERO,
> +        )?;
> +
> +        // Initialise the logging structures. The OpenRM equivalents are in:
> +        // _kgspInitLibosLoggingStructures (allocates memory for buffers)
> +        // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array)
> +        let loginit = LogBuffer::new(dev)?;
> +        dma_write!(libos[0] = LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0)?)?;
> +        let logintr = LogBuffer::new(dev)?;
> +        dma_write!(libos[1] = LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0)?)?;
> +        let logrm = LogBuffer::new(dev)?;
> +        dma_write!(libos[2] = LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0)?)?;

Let's maybe add a space before each "let" statement.

> +
> +        Ok(try_pin_init!(Self {
> +            libos,
> +            loginit,
> +            logintr,
> +            logrm,
> +        }))
>      }
>  }
> diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
> index 181baa401770..c3bececc29cd 100644
> --- a/drivers/gpu/nova-core/gsp/fw.rs
> +++ b/drivers/gpu/nova-core/gsp/fw.rs
> @@ -7,15 +7,20 @@
>  
>  use core::ops::Range;
>  
> -use kernel::ptr::Alignable;
> +use kernel::dma::CoherentAllocation;
> +use kernel::prelude::*;
> +use kernel::ptr::{Alignable, Alignment};
>  use kernel::sizes::SZ_1M;
> +use kernel::transmute::{AsBytes, FromBytes};
>  
>  use crate::gpu::Chipset;
> -use crate::gsp;
>  
>  /// Dummy type to group methods related to heap parameters for running the GSP firmware.
>  pub(crate) struct GspFwHeapParams(());
>  
> +/// Minimum required alignment for the GSP heap.
> +const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>();
> +
>  impl GspFwHeapParams {
>      /// Returns the amount of GSP-RM heap memory used during GSP-RM boot and initialization (up to
>      /// and including the first client subdevice allocation).
> @@ -29,7 +34,7 @@ fn base_rm_size(_chipset: Chipset) -> u64 {
>      /// Returns the amount of heap memory required to support a single channel allocation.
>      fn client_alloc_size() -> u64 {
>          u64::from(bindings::GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE)
> -            .align_up(gsp::GSP_HEAP_ALIGNMENT)
> +            .align_up(GSP_HEAP_ALIGNMENT)
>              .unwrap_or(u64::MAX)
>      }
>  
> @@ -40,7 +45,7 @@ fn management_overhead(fb_size: u64) -> u64 {
>  
>          u64::from(bindings::GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB)
>              .saturating_mul(fb_size_gb)
> -            .align_up(gsp::GSP_HEAP_ALIGNMENT)
> +            .align_up(GSP_HEAP_ALIGNMENT)
>              .unwrap_or(u64::MAX)
>      }
>  }
> @@ -99,3 +104,54 @@ pub(crate) fn wpr_heap_size(&self, chipset: Chipset, fb_size: u64) -> u64 {
>  /// addresses of the GSP bootloader and firmware.
>  #[repr(transparent)]
>  pub(crate) struct GspFwWprMeta(bindings::GspFwWprMeta);
> +
> +/// Struct containing the arguments required to pass a memory buffer to the GSP
> +/// for use during initialisation.
> +///
> +/// The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is
> +/// configured for a larger page size (e.g. 64K pages), we need to give
> +/// the GSP an array of 4K pages. Since we only create physically contiguous
> +/// buffers the math to calculate the addresses is simple.
> +///
> +/// The buffers must be a multiple of GSP_PAGE_SIZE.  GSP-RM also currently
> +/// ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the
> +/// buffers to be physically contiguous anyway.
> +///
> +/// The memory allocated for the arguments must remain until the GSP sends the
> +/// init_done RPC.
> +#[repr(transparent)]
> +pub(crate) struct LibosMemoryRegionInitArgument(bindings::LibosMemoryRegionInitArgument);
> +
> +// SAFETY: Padding is explicit and will not contain uninitialized data.
> +unsafe impl AsBytes for LibosMemoryRegionInitArgument {}
> +
> +// SAFETY: This struct only contains integer types for which all bit patterns
> +// are valid.
> +unsafe impl FromBytes for LibosMemoryRegionInitArgument {}
> +
> +impl LibosMemoryRegionInitArgument {
> +    pub(crate) fn new<A: AsBytes + FromBytes>(
> +        name: &'static str,
> +        obj: &CoherentAllocation<A>,
> +    ) -> Result<Self> {
> +        /// Generates the `ID8` identifier required for some GSP objects.
> +        fn id8(name: &str) -> u64 {
> +            let mut bytes = [0u8; core::mem::size_of::<u64>()];
> +
> +            for (c, b) in name.bytes().rev().zip(&mut bytes) {
> +                *b = c;
> +            }
> +
> +            u64::from_ne_bytes(bytes)
> +        }
> +
> +        Ok(Self(bindings::LibosMemoryRegionInitArgument {
> +            id8: id8(name),
> +            pa: obj.dma_handle(),
> +            size: obj.size() as u64,
> +            kind: bindings::LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS.try_into()?,
> +            loc: bindings::LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM.try_into()?,

The unneeded runtime check is a bit unfortunate, and its removal would
allow us to make this method non-fallible, but I cannot find a good
alternative that also doesn't clutter the code. Can't wait for const
traits methods! :)
Re: [PATCH v5 02/14] gpu: nova-core: Create initial Gsp
Posted by Alistair Popple 2 months ago
On 2025-10-16 at 17:22 +1100, Alexandre Courbot <acourbot@nvidia.com> wrote...
> On Mon Oct 13, 2025 at 3:20 PM JST, Alistair Popple wrote:
> > The GSP requires several areas of memory to operate. Each of these have
> > their own simple embedded page tables. Set these up and map them for DMA
> > to/from GSP using CoherentAllocation's. Return the DMA handle describing
> > where each of these regions are for future use when booting GSP.
> >
> > Signed-off-by: Alistair Popple <apopple@nvidia.com>
> >
> > ---
> >
> > Changes for v5:
> >  - Move GSP_HEAP_ALIGNMENT to gsp/fw.rs and add a comment.
> >  - Create a LogBuffer type.
> >  - Use checked_add to ensure PTE values don't overflow.
> >  - Added some type documentation (shamelessly stolen from Nouveau)
> >
> > Change for v3:
> >  - Clean up the PTE array creation, with much thanks to Alex for doing
> >    most it (please let me know if I should put you as co-developer!)
> >
> > Changes for v2:
> >  - Renamed GspMemOjbects to Gsp as that is what they are
> >  - Rebased on Alex's latest series
> > ---
> >  drivers/gpu/nova-core/gpu.rs                  |   2 +-
> >  drivers/gpu/nova-core/gsp.rs                  | 106 ++++++++++++++++--
> >  drivers/gpu/nova-core/gsp/fw.rs               |  64 ++++++++++-
> >  .../gpu/nova-core/gsp/fw/r570_144/bindings.rs |  19 ++++
> >  4 files changed, 179 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> > index ea124d1912e7..c1396775e9b6 100644
> > --- a/drivers/gpu/nova-core/gpu.rs
> > +++ b/drivers/gpu/nova-core/gpu.rs
> > @@ -197,7 +197,7 @@ pub(crate) fn new<'a>(
> >  
> >              sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset, bar, true)?,
> >  
> > -            gsp <- Gsp::new(),
> > +            gsp <- Gsp::new(pdev)?,
> >  
> >              _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
> >  
> > diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
> > index 221281da1a45..f1727173bd42 100644
> > --- a/drivers/gpu/nova-core/gsp.rs
> > +++ b/drivers/gpu/nova-core/gsp.rs
> > @@ -2,25 +2,117 @@
> >  
> >  mod boot;
> >  
> > +use kernel::device;
> > +use kernel::dma::CoherentAllocation;
> > +use kernel::dma::DmaAddress;
> > +use kernel::dma_write;
> > +use kernel::pci;
> >  use kernel::prelude::*;
> > -use kernel::ptr::Alignment;
> > +use kernel::transmute::AsBytes;
> >  
> >  pub(crate) use fw::{GspFwWprMeta, LibosParams};
> >  
> >  mod fw;
> >  
> > +use fw::LibosMemoryRegionInitArgument;
> > +
> >  pub(crate) const GSP_PAGE_SHIFT: usize = 12;
> >  pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT;
> > -pub(crate) const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>();
> > +
> > +/// Number of GSP pages to use in a RM log buffer.
> > +const RM_LOG_BUFFER_NUM_PAGES: usize = 0x10;
> >  
> >  /// GSP runtime data.
> > -///
> > -/// This is an empty pinned placeholder for now.
> >  #[pin_data]
> > -pub(crate) struct Gsp {}
> > +pub(crate) struct Gsp {
> > +    pub(crate) libos: CoherentAllocation<LibosMemoryRegionInitArgument>,
> > +    loginit: LogBuffer,
> > +    logintr: LogBuffer,
> > +    logrm: LogBuffer,
> > +}
> > +
> > +#[repr(C)]
> > +struct PteArray<const NUM_ENTRIES: usize>([u64; NUM_ENTRIES]);
> 
> I'd just document this structure a bit as it is not obvious what it does
> from the name alone.

Ok. As discussed I'm going to put your Co-developed-by on v6 and leave this one
to you given I think you're more familiar with it :-)

> > +
> > +/// SAFETY: arrays of `u64` implement `AsBytes` and we are but a wrapper around it.
> > +unsafe impl<const NUM_ENTRIES: usize> AsBytes for PteArray<NUM_ENTRIES> {}
> > +
> > +impl<const NUM_PAGES: usize> PteArray<NUM_PAGES> {
> > +    fn new(handle: DmaAddress) -> Result<Self> {
> > +        let mut ptes = [0u64; NUM_PAGES];
> > +        for (i, pte) in ptes.iter_mut().enumerate() {
> > +            *pte = handle
> > +                .checked_add((i as u64) << GSP_PAGE_SHIFT)
> > +                .ok_or(EOVERFLOW)?;
> > +        }
> > +
> > +        Ok(Self(ptes))
> > +    }
> > +}
> > +
> > +/// The logging buffers are byte queues that contain encoded printf-like
> > +/// messages from GSP-RM.  They need to be decoded by a special application
> > +/// that can parse the buffers.
> > +///
> > +/// The 'loginit' buffer contains logs from early GSP-RM init and
> > +/// exception dumps.  The 'logrm' buffer contains the subsequent logs. Both are
> > +/// written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE.
> > +///
> > +/// The physical address map for the log buffer is stored in the buffer
> > +/// itself, starting with offset 1. Offset 0 contains the "put" pointer (pp).
> > +/// Initially, pp is equal to 0. If the buffer has valid logging data in it,
> > +/// then pp points to index into the buffer where the next logging entry will
> > +/// be written. Therefore, the logging data is valid if:
> > +///   1 <= pp < sizeof(buffer)/sizeof(u64)
> 
> Maybe we should mention what happens to the address map, namely that it
> gets overwritten by the buffer data and is only used for the initial
> setup.

Ditto.

> > +struct LogBuffer(CoherentAllocation<u8>);
> > +
> > +impl LogBuffer {
> > +    fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
> > +        const NUM_PAGES: usize = RM_LOG_BUFFER_NUM_PAGES;
> > +
> > +        let mut obj = Self(CoherentAllocation::<u8>::alloc_coherent(
> > +            dev,
> > +            NUM_PAGES * GSP_PAGE_SIZE,
> > +            GFP_KERNEL | __GFP_ZERO,
> > +        )?);
> > +        let ptes = PteArray::<NUM_PAGES>::new(obj.0.dma_handle())?;
> > +
> > +        // SAFETY: `obj` has just been created and we are its sole user.
> > +        unsafe {
> > +            // Copy the self-mapping PTE at the expected location.
> > +            obj.0
> > +                .as_slice_mut(size_of::<u64>(), size_of_val(&ptes))?
> > +                .copy_from_slice(ptes.as_bytes())
> > +        };
> > +
> > +        Ok(obj)
> > +    }
> > +}
> >  
> >  impl Gsp {
> > -    pub(crate) fn new() -> impl PinInit<Self> {
> > -        pin_init!(Self {})
> > +    pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self, Error>> {
> > +        let dev = pdev.as_ref();
> > +        let libos = CoherentAllocation::<LibosMemoryRegionInitArgument>::alloc_coherent(
> > +            dev,
> > +            GSP_PAGE_SIZE / size_of::<LibosMemoryRegionInitArgument>(),
> > +            GFP_KERNEL | __GFP_ZERO,
> > +        )?;
> > +
> > +        // Initialise the logging structures. The OpenRM equivalents are in:
> > +        // _kgspInitLibosLoggingStructures (allocates memory for buffers)
> > +        // kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array)
> > +        let loginit = LogBuffer::new(dev)?;
> > +        dma_write!(libos[0] = LibosMemoryRegionInitArgument::new("LOGINIT", &loginit.0)?)?;
> > +        let logintr = LogBuffer::new(dev)?;
> > +        dma_write!(libos[1] = LibosMemoryRegionInitArgument::new("LOGINTR", &logintr.0)?)?;
> > +        let logrm = LogBuffer::new(dev)?;
> > +        dma_write!(libos[2] = LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0)?)?;
> 
> Let's maybe add a space before each "let" statement.

Ok.

> > +
> > +        Ok(try_pin_init!(Self {
> > +            libos,
> > +            loginit,
> > +            logintr,
> > +            logrm,
> > +        }))
> >      }
> >  }
> > diff --git a/drivers/gpu/nova-core/gsp/fw.rs b/drivers/gpu/nova-core/gsp/fw.rs
> > index 181baa401770..c3bececc29cd 100644
> > --- a/drivers/gpu/nova-core/gsp/fw.rs
> > +++ b/drivers/gpu/nova-core/gsp/fw.rs
> > @@ -7,15 +7,20 @@
> >  
> >  use core::ops::Range;
> >  
> > -use kernel::ptr::Alignable;
> > +use kernel::dma::CoherentAllocation;
> > +use kernel::prelude::*;
> > +use kernel::ptr::{Alignable, Alignment};
> >  use kernel::sizes::SZ_1M;
> > +use kernel::transmute::{AsBytes, FromBytes};
> >  
> >  use crate::gpu::Chipset;
> > -use crate::gsp;
> >  
> >  /// Dummy type to group methods related to heap parameters for running the GSP firmware.
> >  pub(crate) struct GspFwHeapParams(());
> >  
> > +/// Minimum required alignment for the GSP heap.
> > +const GSP_HEAP_ALIGNMENT: Alignment = Alignment::new::<{ 1 << 20 }>();
> > +
> >  impl GspFwHeapParams {
> >      /// Returns the amount of GSP-RM heap memory used during GSP-RM boot and initialization (up to
> >      /// and including the first client subdevice allocation).
> > @@ -29,7 +34,7 @@ fn base_rm_size(_chipset: Chipset) -> u64 {
> >      /// Returns the amount of heap memory required to support a single channel allocation.
> >      fn client_alloc_size() -> u64 {
> >          u64::from(bindings::GSP_FW_HEAP_PARAM_CLIENT_ALLOC_SIZE)
> > -            .align_up(gsp::GSP_HEAP_ALIGNMENT)
> > +            .align_up(GSP_HEAP_ALIGNMENT)
> >              .unwrap_or(u64::MAX)
> >      }
> >  
> > @@ -40,7 +45,7 @@ fn management_overhead(fb_size: u64) -> u64 {
> >  
> >          u64::from(bindings::GSP_FW_HEAP_PARAM_SIZE_PER_GB_FB)
> >              .saturating_mul(fb_size_gb)
> > -            .align_up(gsp::GSP_HEAP_ALIGNMENT)
> > +            .align_up(GSP_HEAP_ALIGNMENT)
> >              .unwrap_or(u64::MAX)
> >      }
> >  }
> > @@ -99,3 +104,54 @@ pub(crate) fn wpr_heap_size(&self, chipset: Chipset, fb_size: u64) -> u64 {
> >  /// addresses of the GSP bootloader and firmware.
> >  #[repr(transparent)]
> >  pub(crate) struct GspFwWprMeta(bindings::GspFwWprMeta);
> > +
> > +/// Struct containing the arguments required to pass a memory buffer to the GSP
> > +/// for use during initialisation.
> > +///
> > +/// The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is
> > +/// configured for a larger page size (e.g. 64K pages), we need to give
> > +/// the GSP an array of 4K pages. Since we only create physically contiguous
> > +/// buffers the math to calculate the addresses is simple.
> > +///
> > +/// The buffers must be a multiple of GSP_PAGE_SIZE.  GSP-RM also currently
> > +/// ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the
> > +/// buffers to be physically contiguous anyway.
> > +///
> > +/// The memory allocated for the arguments must remain until the GSP sends the
> > +/// init_done RPC.
> > +#[repr(transparent)]
> > +pub(crate) struct LibosMemoryRegionInitArgument(bindings::LibosMemoryRegionInitArgument);
> > +
> > +// SAFETY: Padding is explicit and will not contain uninitialized data.
> > +unsafe impl AsBytes for LibosMemoryRegionInitArgument {}
> > +
> > +// SAFETY: This struct only contains integer types for which all bit patterns
> > +// are valid.
> > +unsafe impl FromBytes for LibosMemoryRegionInitArgument {}
> > +
> > +impl LibosMemoryRegionInitArgument {
> > +    pub(crate) fn new<A: AsBytes + FromBytes>(
> > +        name: &'static str,
> > +        obj: &CoherentAllocation<A>,
> > +    ) -> Result<Self> {
> > +        /// Generates the `ID8` identifier required for some GSP objects.
> > +        fn id8(name: &str) -> u64 {
> > +            let mut bytes = [0u8; core::mem::size_of::<u64>()];
> > +
> > +            for (c, b) in name.bytes().rev().zip(&mut bytes) {
> > +                *b = c;
> > +            }
> > +
> > +            u64::from_ne_bytes(bytes)
> > +        }
> > +
> > +        Ok(Self(bindings::LibosMemoryRegionInitArgument {
> > +            id8: id8(name),
> > +            pa: obj.dma_handle(),
> > +            size: obj.size() as u64,
> > +            kind: bindings::LibosMemoryRegionKind_LIBOS_MEMORY_REGION_CONTIGUOUS.try_into()?,
> > +            loc: bindings::LibosMemoryRegionLoc_LIBOS_MEMORY_REGION_LOC_SYSMEM.try_into()?,
> 
> The unneeded runtime check is a bit unfortunate, and its removal would
> allow us to make this method non-fallible, but I cannot find a good
> alternative that also doesn't clutter the code. Can't wait for const
> traits methods! :)