[v7] gpu: nova-core: firmware: Hopper/Blackwell support

[PATCH v7 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting

Posted by John Hubbard 2 weeks, 5 days ago

Hopper and Blackwell GPUs use FSP-based secure boot and do not require
waiting for GFW_BOOT completion. Skip this step for these architectures.

Move the GFW_BOOT policy into a dedicated GPU HAL in gpu/hal.rs. This
keeps the decision out of gpu.rs while avoiding unrelated subsystems
such as fb. Pre-Hopper families still wait for GFW_BOOT completion,
while Hopper and later use the FSP Chain of Trust boot path instead.

Cc: Danilo Krummrich <dakr@kernel.org>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/gpu/nova-core/gpu.rs     | 14 ++++++---
 drivers/gpu/nova-core/gpu/hal.rs | 54 ++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/nova-core/gpu/hal.rs

diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 9e140463603b..93f861ba20f3 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -23,6 +23,8 @@
     regs,
 };
 
+mod hal;
+
 macro_rules! define_chipset {
     ({ $($variant:ident = $value:expr),* $(,)* }) =>
     {
@@ -309,13 +311,17 @@ pub(crate) fn new<'a>(
         spec: Spec,
     ) -> impl PinInit<Self, Error> + 'a {
         let chipset = spec.chipset();
+        let hal = hal::gpu_hal(chipset);
 
         try_pin_init!(Self {
-            // We must wait for GFW_BOOT completion before doing any significant setup
-            // on the GPU.
             _: {
-                gfw::wait_gfw_boot_completion(bar)
-                    .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
+                // GFW_BOOT is the "GPU firmware boot complete" signal for the
+                // legacy devinit/FWSEC path. Pre-Hopper GPUs must wait for it
+                // before most GPU initialization. Hopper and later boot via FSP.
+                if hal.needs_gfw_boot() {
+                    gfw::wait_gfw_boot_completion(bar)
+                        .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
+                }
             },
 
             sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, chipset)?,
diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
new file mode 100644
index 000000000000..859c5e5fa21f
--- /dev/null
+++ b/drivers/gpu/nova-core/gpu/hal.rs
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use crate::gpu::{
+    Architecture,
+    Chipset, //
+};
+
+pub(crate) trait GpuHal {
+    /// Returns whether this hardware family still requires waiting for GFW_BOOT.
+    fn needs_gfw_boot(&self) -> bool;
+}
+
+struct Tu102;
+struct Ga100;
+struct Ga102;
+struct Fsp;
+
+impl GpuHal for Tu102 {
+    fn needs_gfw_boot(&self) -> bool {
+        true
+    }
+}
+
+impl GpuHal for Ga100 {
+    fn needs_gfw_boot(&self) -> bool {
+        true
+    }
+}
+
+impl GpuHal for Ga102 {
+    fn needs_gfw_boot(&self) -> bool {
+        true
+    }
+}
+
+impl GpuHal for Fsp {
+    fn needs_gfw_boot(&self) -> bool {
+        false
+    }
+}
+
+const TU102: Tu102 = Tu102;
+const GA100: Ga100 = Ga100;
+const GA102: Ga102 = Ga102;
+const FSP: Fsp = Fsp;
+
+pub(super) fn gpu_hal(chipset: Chipset) -> &'static dyn GpuHal {
+    match chipset.arch() {
+        Architecture::Turing => &TU102,
+        Architecture::Ampere if chipset == Chipset::GA100 => &GA100,
+        Architecture::Ampere | Architecture::Ada => &GA102,
+        Architecture::Hopper | Architecture::Blackwell => &FSP,
+    }
+}
-- 
2.53.0

Re: [PATCH v7 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting

Posted by Alexandre Courbot 2 weeks ago

On Wed Mar 18, 2026 at 7:53 AM JST, John Hubbard wrote:
> Hopper and Blackwell GPUs use FSP-based secure boot and do not require
> waiting for GFW_BOOT completion. Skip this step for these architectures.
>
> Move the GFW_BOOT policy into a dedicated GPU HAL in gpu/hal.rs. This
> keeps the decision out of gpu.rs while avoiding unrelated subsystems
> such as fb. Pre-Hopper families still wait for GFW_BOOT completion,
> while Hopper and later use the FSP Chain of Trust boot path instead.
>
> Cc: Danilo Krummrich <dakr@kernel.org>
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
> ---
>  drivers/gpu/nova-core/gpu.rs     | 14 ++++++---
>  drivers/gpu/nova-core/gpu/hal.rs | 54 ++++++++++++++++++++++++++++++++
>  2 files changed, 64 insertions(+), 4 deletions(-)
>  create mode 100644 drivers/gpu/nova-core/gpu/hal.rs
>
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index 9e140463603b..93f861ba20f3 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -23,6 +23,8 @@
>      regs,
>  };
>  
> +mod hal;
> +
>  macro_rules! define_chipset {
>      ({ $($variant:ident = $value:expr),* $(,)* }) =>
>      {
> @@ -309,13 +311,17 @@ pub(crate) fn new<'a>(
>          spec: Spec,
>      ) -> impl PinInit<Self, Error> + 'a {
>          let chipset = spec.chipset();
> +        let hal = hal::gpu_hal(chipset);
>  
>          try_pin_init!(Self {
> -            // We must wait for GFW_BOOT completion before doing any significant setup
> -            // on the GPU.
>              _: {
> -                gfw::wait_gfw_boot_completion(bar)
> -                    .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
> +                // GFW_BOOT is the "GPU firmware boot complete" signal for the
> +                // legacy devinit/FWSEC path. Pre-Hopper GPUs must wait for it
> +                // before most GPU initialization. Hopper and later boot via FSP.
> +                if hal.needs_gfw_boot() {
> +                    gfw::wait_gfw_boot_completion(bar)
> +                        .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
> +                }

Good that this is moved to a HAL method, but we can go one step further
and perform the actual wait in the HAL. I.e., this should become
`hal.wait_gfw_boot_completion` and the wait would happen in the HAL
itself (where such low-level stuff belongs), not here.

We could even move the contents of the `gfw` module into the correct HAL
since it won't be used anywhere, and simplify our top-level directory.

>              },
>  
>              sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, chipset)?,
> diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
> new file mode 100644
> index 000000000000..859c5e5fa21f
> --- /dev/null
> +++ b/drivers/gpu/nova-core/gpu/hal.rs
> @@ -0,0 +1,54 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +use crate::gpu::{
> +    Architecture,
> +    Chipset, //
> +};
> +
> +pub(crate) trait GpuHal {
> +    /// Returns whether this hardware family still requires waiting for GFW_BOOT.
> +    fn needs_gfw_boot(&self) -> bool;
> +}
> +
> +struct Tu102;
> +struct Ga100;
> +struct Ga102;
> +struct Fsp;
> +
> +impl GpuHal for Tu102 {
> +    fn needs_gfw_boot(&self) -> bool {
> +        true
> +    }
> +}
> +
> +impl GpuHal for Ga100 {
> +    fn needs_gfw_boot(&self) -> bool {
> +        true
> +    }
> +}
> +
> +impl GpuHal for Ga102 {
> +    fn needs_gfw_boot(&self) -> bool {
> +        true
> +    }
> +}
> +
> +impl GpuHal for Fsp {
> +    fn needs_gfw_boot(&self) -> bool {
> +        false
> +    }
> +}

3 of the HALs do exactly the same thing. You only need two: `Tu102` and
`Gh100`. `Fsp` is also not a valid name for a HAL, so far they have been
named after the first chip that makes use of them.

> +
> +const TU102: Tu102 = Tu102;
> +const GA100: Ga100 = Ga100;
> +const GA102: Ga102 = Ga102;
> +const FSP: Fsp = Fsp;
> +
> +pub(super) fn gpu_hal(chipset: Chipset) -> &'static dyn GpuHal {
> +    match chipset.arch() {
> +        Architecture::Turing => &TU102,
> +        Architecture::Ampere if chipset == Chipset::GA100 => &GA100,
> +        Architecture::Ampere | Architecture::Ada => &GA102,

This must be a copy/paste from somewhere because GA100 does not warrant
any exception here.

Re: [PATCH v7 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting

Posted by John Hubbard 1 week, 5 days ago

On 3/23/26 6:13 AM, Alexandre Courbot wrote:
> On Wed Mar 18, 2026 at 7:53 AM JST, John Hubbard wrote:
>> Hopper and Blackwell GPUs use FSP-based secure boot and do not require
>> waiting for GFW_BOOT completion. Skip this step for these architectures.
>>
>> Move the GFW_BOOT policy into a dedicated GPU HAL in gpu/hal.rs. This
>> keeps the decision out of gpu.rs while avoiding unrelated subsystems
>> such as fb. Pre-Hopper families still wait for GFW_BOOT completion,
>> while Hopper and later use the FSP Chain of Trust boot path instead.
>>
>> Cc: Danilo Krummrich <dakr@kernel.org>
>> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
>> ---
>>  drivers/gpu/nova-core/gpu.rs     | 14 ++++++---
>>  drivers/gpu/nova-core/gpu/hal.rs | 54 ++++++++++++++++++++++++++++++++
>>  2 files changed, 64 insertions(+), 4 deletions(-)
>>  create mode 100644 drivers/gpu/nova-core/gpu/hal.rs
>>
>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>> index 9e140463603b..93f861ba20f3 100644
>> --- a/drivers/gpu/nova-core/gpu.rs
>> +++ b/drivers/gpu/nova-core/gpu.rs
>> @@ -23,6 +23,8 @@
>>      regs,
>>  };
>>  
>> +mod hal;
>> +
>>  macro_rules! define_chipset {
>>      ({ $($variant:ident = $value:expr),* $(,)* }) =>
>>      {
>> @@ -309,13 +311,17 @@ pub(crate) fn new<'a>(
>>          spec: Spec,
>>      ) -> impl PinInit<Self, Error> + 'a {
>>          let chipset = spec.chipset();
>> +        let hal = hal::gpu_hal(chipset);
>>  
>>          try_pin_init!(Self {
>> -            // We must wait for GFW_BOOT completion before doing any significant setup
>> -            // on the GPU.
>>              _: {
>> -                gfw::wait_gfw_boot_completion(bar)
>> -                    .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
>> +                // GFW_BOOT is the "GPU firmware boot complete" signal for the
>> +                // legacy devinit/FWSEC path. Pre-Hopper GPUs must wait for it
>> +                // before most GPU initialization. Hopper and later boot via FSP.
>> +                if hal.needs_gfw_boot() {
>> +                    gfw::wait_gfw_boot_completion(bar)
>> +                        .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
>> +                }
> 
> Good that this is moved to a HAL method, but we can go one step further
> and perform the actual wait in the HAL. I.e., this should become
> `hal.wait_gfw_boot_completion` and the wait would happen in the HAL
> itself (where such low-level stuff belongs), not here.
> 
> We could even move the contents of the `gfw` module into the correct HAL
> since it won't be used anywhere, and simplify our top-level directory.

Yes. Done.

> 
>>              },
>>  
>>              sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, chipset)?,
>> diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
>> new file mode 100644
>> index 000000000000..859c5e5fa21f
>> --- /dev/null
>> +++ b/drivers/gpu/nova-core/gpu/hal.rs
>> @@ -0,0 +1,54 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +use crate::gpu::{
>> +    Architecture,
>> +    Chipset, //
>> +};
>> +
>> +pub(crate) trait GpuHal {
>> +    /// Returns whether this hardware family still requires waiting for GFW_BOOT.
>> +    fn needs_gfw_boot(&self) -> bool;
>> +}
>> +
>> +struct Tu102;
>> +struct Ga100;
>> +struct Ga102;
>> +struct Fsp;
>> +
>> +impl GpuHal for Tu102 {
>> +    fn needs_gfw_boot(&self) -> bool {
>> +        true
>> +    }
>> +}
>> +
>> +impl GpuHal for Ga100 {
>> +    fn needs_gfw_boot(&self) -> bool {
>> +        true
>> +    }
>> +}
>> +
>> +impl GpuHal for Ga102 {
>> +    fn needs_gfw_boot(&self) -> bool {
>> +        true
>> +    }
>> +}
>> +
>> +impl GpuHal for Fsp {
>> +    fn needs_gfw_boot(&self) -> bool {
>> +        false
>> +    }
>> +}
> 
> 3 of the HALs do exactly the same thing. You only need two: `Tu102` and
> `Gh100`. `Fsp` is also not a valid name for a HAL, so far they have been
> named after the first chip that makes use of them.

Right.

> 
>> +
>> +const TU102: Tu102 = Tu102;
>> +const GA100: Ga100 = Ga100;
>> +const GA102: Ga102 = Ga102;
>> +const FSP: Fsp = Fsp;
>> +
>> +pub(super) fn gpu_hal(chipset: Chipset) -> &'static dyn GpuHal {
>> +    match chipset.arch() {
>> +        Architecture::Turing => &TU102,
>> +        Architecture::Ampere if chipset == Chipset::GA100 => &GA100,
>> +        Architecture::Ampere | Architecture::Ada => &GA102,
> 
> This must be a copy/paste from somewhere because GA100 does not warrant
> any exception here.

Fixed.

thanks,
-- 
John Hubbard