[v6] gpu: nova-core: firmware: Hopper/Blackwell support

[PATCH v6 07/34] gpu: nova-core: move GPU init and DMA mask setup into Gpu::new()

Posted by John Hubbard 1 month ago

Move Spec creation, the dev_info log, and DMA mask setup from the
driver's probe() into Gpu::new(), so that all GPU-specific
initialization lives in the Gpu constructor.

This restructures Gpu::new() to use pin_init_scope wrapping
try_pin_init!, which allows running fallible setup code (Spec::new,
dma_set_mask_and_coherent) before the pin-initializer. The parameter
type changes from pci::Device<device::Bound> to pci::Device<device::Core>
because the DMA call requires the Core device state.

Also makes Chipset::arch() const, adds Spec::chipset() accessor, and
makes Spec::new() pub(crate) for use by later patches.

No functional change: the same 47-bit DMA mask is applied.

Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Gary Guo <gary@garyguo.net>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/gpu/nova-core/driver.rs | 15 --------
 drivers/gpu/nova-core/gpu.rs    | 64 ++++++++++++++++++++++-----------
 2 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index 84b0e1703150..e07f7122b35c 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -4,8 +4,6 @@
     auxiliary,
     device::Core,
     devres::Devres,
-    dma::Device,
-    dma::DmaMask,
     pci,
     pci::{
         Class,
@@ -38,14 +36,6 @@ pub(crate) struct NovaCore {
 
 const BAR0_SIZE: usize = SZ_16M;
 
-// For now we only support Ampere which can use up to 47-bit DMA addresses.
-//
-// TODO: Add an abstraction for this to support newer GPUs which may support
-// larger DMA addresses. Limiting these GPUs to smaller address widths won't
-// have any adverse affects, unless installed on systems which require larger
-// DMA addresses. These systems should be quite rare.
-const GPU_DMA_BITS: u32 = 47;
-
 pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
 
 kernel::pci_device_table!(
@@ -84,11 +74,6 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, E
             pdev.enable_device_mem()?;
             pdev.set_master();
 
-            // SAFETY: No concurrent DMA allocations or mappings can be made because
-            // the device is still being probed and therefore isn't being used by
-            // other threads of execution.
-            unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
-
             let bar = Arc::pin_init(
                 pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0"),
                 GFP_KERNEL,
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 3b4ccc3d18b9..a7f1957880ff 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -3,6 +3,10 @@
 use kernel::{
     device,
     devres::Devres,
+    dma::{
+        Device,
+        DmaMask, //
+    },
     fmt,
     pci,
     prelude::*,
@@ -162,6 +166,10 @@ pub(crate) enum Architecture {
     Blackwell = 0x1b,
 }
 
+// TODO: Set the DMA mask per-architecture. Hopper and Blackwell support 52-bit
+// DMA addresses. For now, use 47-bit which is correct for Turing, Ampere, and Ada.
+const GPU_DMA_BITS: u32 = 47;
+
 impl TryFrom<u8> for Architecture {
     type Error = Error;
 
@@ -211,7 +219,7 @@ pub(crate) struct Spec {
 }
 
 impl Spec {
-    fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
+    pub(crate) fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
         // Some brief notes about boot0 and boot42, in chronological order:
         //
         // NV04 through NV50:
@@ -241,6 +249,10 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
             dev_err!(dev, "Unsupported chipset: {}\n", boot42);
         })
     }
+
+    pub(crate) fn chipset(&self) -> Chipset {
+        self.chipset
+    }
 }
 
 impl TryFrom<regs::NV_PMC_BOOT_42> for Spec {
@@ -285,36 +297,46 @@ pub(crate) struct Gpu {
 
 impl Gpu {
     pub(crate) fn new<'a>(
-        pdev: &'a pci::Device<device::Bound>,
+        pdev: &'a pci::Device<device::Core>,
         devres_bar: Arc<Devres<Bar0>>,
         bar: &'a Bar0,
     ) -> impl PinInit<Self, Error> + 'a {
-        try_pin_init!(Self {
-            spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| {
-                dev_info!(pdev,"NVIDIA ({})\n", spec);
-            })?,
+        pin_init::pin_init_scope(move || {
+            let spec = Spec::new(pdev.as_ref(), bar)?;
+            dev_info!(pdev, "NVIDIA ({})\n", spec);
+
+            // SAFETY: No concurrent DMA allocations or mappings can be made because
+            // the device is still being probed and therefore isn't being used by
+            // other threads of execution.
+            unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
+
+            let chipset = spec.chipset();
 
-            // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
-            _: {
-                gfw::wait_gfw_boot_completion(bar)
-                    .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
-            },
+            Ok(try_pin_init!(Self {
+                // We must wait for GFW_BOOT completion before doing any significant setup
+                // on the GPU.
+                _: {
+                    gfw::wait_gfw_boot_completion(bar)
+                        .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
+                },
 
-            sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?,
+                sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, chipset)?,
 
-            gsp_falcon: Falcon::new(
-                pdev.as_ref(),
-                spec.chipset,
-            )
-            .inspect(|falcon| falcon.clear_swgen0_intr(bar))?,
+                gsp_falcon: Falcon::new(
+                    pdev.as_ref(),
+                    chipset,
+                )
+                .inspect(|falcon| falcon.clear_swgen0_intr(bar))?,
 
-            sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?,
+                sec2_falcon: Falcon::new(pdev.as_ref(), chipset)?,
 
-            gsp <- Gsp::new(pdev),
+                gsp <- Gsp::new(pdev),
 
-            _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
+                _: { gsp.boot(pdev, bar, chipset, gsp_falcon, sec2_falcon)? },
 
-            bar: devres_bar,
+                bar: devres_bar,
+                spec,
+            }))
         })
     }
 
-- 
2.53.0

Re: [PATCH v6 07/34] gpu: nova-core: move GPU init and DMA mask setup into Gpu::new()

Posted by Alexandre Courbot 1 month ago

On Tue Mar 10, 2026 at 11:10 AM JST, John Hubbard wrote:
> Move Spec creation, the dev_info log, and DMA mask setup from the
> driver's probe() into Gpu::new(), so that all GPU-specific
> initialization lives in the Gpu constructor.
>
> This restructures Gpu::new() to use pin_init_scope wrapping
> try_pin_init!, which allows running fallible setup code (Spec::new,
> dma_set_mask_and_coherent) before the pin-initializer. The parameter
> type changes from pci::Device<device::Bound> to pci::Device<device::Core>
> because the DMA call requires the Core device state.
>
> Also makes Chipset::arch() const, adds Spec::chipset() accessor, and
> makes Spec::new() pub(crate) for use by later patches.
>
> No functional change: the same 47-bit DMA mask is applied.
>
> Cc: Danilo Krummrich <dakr@kernel.org>
> Cc: Gary Guo <gary@garyguo.net>
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
> ---
>  drivers/gpu/nova-core/driver.rs | 15 --------
>  drivers/gpu/nova-core/gpu.rs    | 64 ++++++++++++++++++++++-----------
>  2 files changed, 43 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
> index 84b0e1703150..e07f7122b35c 100644
> --- a/drivers/gpu/nova-core/driver.rs
> +++ b/drivers/gpu/nova-core/driver.rs
> @@ -4,8 +4,6 @@
>      auxiliary,
>      device::Core,
>      devres::Devres,
> -    dma::Device,
> -    dma::DmaMask,
>      pci,
>      pci::{
>          Class,
> @@ -38,14 +36,6 @@ pub(crate) struct NovaCore {
>  
>  const BAR0_SIZE: usize = SZ_16M;
>  
> -// For now we only support Ampere which can use up to 47-bit DMA addresses.
> -//
> -// TODO: Add an abstraction for this to support newer GPUs which may support
> -// larger DMA addresses. Limiting these GPUs to smaller address widths won't
> -// have any adverse affects, unless installed on systems which require larger
> -// DMA addresses. These systems should be quite rare.
> -const GPU_DMA_BITS: u32 = 47;
> -
>  pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
>  
>  kernel::pci_device_table!(
> @@ -84,11 +74,6 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, E
>              pdev.enable_device_mem()?;
>              pdev.set_master();
>  
> -            // SAFETY: No concurrent DMA allocations or mappings can be made because
> -            // the device is still being probed and therefore isn't being used by
> -            // other threads of execution.
> -            unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };

I think the reason why this was in driver.rs is that the method we are
in (`probe`) is enough to justify the safety argument.

Whereas `Gpu::new` can technically be called at a different time, and
the safety requirement becomes more difficult to justify - we would
actually need to make `Gpu::new` unsafe and set its own `SAFETY` section
to cover the safety requirement of `dma_set_mask_and_coherent`.

So I think it is preferable to keep this part as-is.

> -
>              let bar = Arc::pin_init(
>                  pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0"),
>                  GFP_KERNEL,
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index 3b4ccc3d18b9..a7f1957880ff 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -3,6 +3,10 @@
>  use kernel::{
>      device,
>      devres::Devres,
> +    dma::{
> +        Device,
> +        DmaMask, //
> +    },
>      fmt,
>      pci,
>      prelude::*,
> @@ -162,6 +166,10 @@ pub(crate) enum Architecture {
>      Blackwell = 0x1b,
>  }
>  
> +// TODO: Set the DMA mask per-architecture. Hopper and Blackwell support 52-bit
> +// DMA addresses. For now, use 47-bit which is correct for Turing, Ampere, and Ada.
> +const GPU_DMA_BITS: u32 = 47;
> +
>  impl TryFrom<u8> for Architecture {
>      type Error = Error;
>  
> @@ -211,7 +219,7 @@ pub(crate) struct Spec {
>  }
>  
>  impl Spec {
> -    fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
> +    pub(crate) fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {

The visibility of this methods doesn't need to be changed with the
current code - although it will per the next patch if we keep the DMA
mask setup in `driver.rs`. If we opt for that direction, let's merge
that part into the next patch.