[v9] gpu: nova-core: firmware: Hopper/Blackwell support

[PATCH v9 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting

Posted by John Hubbard 1 week ago

Hopper and Blackwell GPUs use FSP-based secure boot and do not
require waiting for GFW_BOOT completion. Move the GFW_BOOT wait
into a GPU HAL so the decision and the wait both live in the HAL.

Pre-Hopper families (Tu102 HAL) wait for GFW_BOOT completion.
Hopper and later (Gh100 HAL) skip it and boot via FSP instead.

Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/gpu/nova-core/gpu.rs     |  6 +++--
 drivers/gpu/nova-core/gpu/hal.rs | 41 ++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nova-core/gpu/hal.rs

diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index f70bfbda1614..8332ad67c0af 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -21,11 +21,12 @@
         Falcon, //
     },
     fb::SysmemFlush,
-    gfw,
     gsp::Gsp,
     regs,
 };
 
+mod hal;
+
 macro_rules! define_chipset {
     ({ $($variant:ident = $value:expr),* $(,)* }) =>
     {
@@ -311,6 +312,7 @@ pub(crate) fn new<'a>(
         spec: Spec,
     ) -> impl PinInit<Self, Error> + 'a {
         let dma_mask = spec.chipset().arch().dma_mask();
+        let hal = hal::gpu_hal(spec.chipset());
 
         try_pin_init!(Self {
             // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
@@ -319,7 +321,7 @@ pub(crate) fn new<'a>(
                 // still constructing it, so no concurrent DMA allocations can exist.
                 unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? };
 
-                gfw::wait_gfw_boot_completion(bar)
+                hal.wait_gfw_boot_completion(bar)
                     .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
             },
 
diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
new file mode 100644
index 000000000000..164410992659
--- /dev/null
+++ b/drivers/gpu/nova-core/gpu/hal.rs
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::prelude::*;
+
+use crate::{
+    driver::Bar0,
+    gfw,
+    gpu::{
+        Architecture,
+        Chipset, //
+    },
+};
+
+pub(crate) trait GpuHal {
+    /// Waits for GFW_BOOT completion if required by this hardware family.
+    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result;
+}
+
+struct Tu102;
+struct Gh100;
+
+impl GpuHal for Tu102 {
+    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
+        gfw::wait_gfw_boot_completion(bar)
+    }
+}
+
+impl GpuHal for Gh100 {
+    fn wait_gfw_boot_completion(&self, _bar: &Bar0) -> Result {
+        Ok(())
+    }
+}
+
+pub(super) fn gpu_hal(chipset: Chipset) -> &'static dyn GpuHal {
+    match chipset.arch() {
+        Architecture::Turing | Architecture::Ampere | Architecture::Ada => &Tu102,
+        Architecture::Hopper | Architecture::BlackwellGB10x | Architecture::BlackwellGB20x => {
+            &Gh100
+        }
+    }
+}
-- 
2.53.0

Re: [PATCH v9 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting

Posted by Alexandre Courbot 2 days, 10 hours ago

On Thu Mar 26, 2026 at 10:38 AM JST, John Hubbard wrote:
> Hopper and Blackwell GPUs use FSP-based secure boot and do not
> require waiting for GFW_BOOT completion. Move the GFW_BOOT wait
> into a GPU HAL so the decision and the wait both live in the HAL.
>
> Pre-Hopper families (Tu102 HAL) wait for GFW_BOOT completion.
> Hopper and later (Gh100 HAL) skip it and boot via FSP instead.
>
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
> ---
>  drivers/gpu/nova-core/gpu.rs     |  6 +++--
>  drivers/gpu/nova-core/gpu/hal.rs | 41 ++++++++++++++++++++++++++++++++
>  2 files changed, 45 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/nova-core/gpu/hal.rs
>
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index f70bfbda1614..8332ad67c0af 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -21,11 +21,12 @@
>          Falcon, //
>      },
>      fb::SysmemFlush,
> -    gfw,
>      gsp::Gsp,
>      regs,
>  };
>  
> +mod hal;
> +
>  macro_rules! define_chipset {
>      ({ $($variant:ident = $value:expr),* $(,)* }) =>
>      {
> @@ -311,6 +312,7 @@ pub(crate) fn new<'a>(
>          spec: Spec,
>      ) -> impl PinInit<Self, Error> + 'a {
>          let dma_mask = spec.chipset().arch().dma_mask();
> +        let hal = hal::gpu_hal(spec.chipset());
>  
>          try_pin_init!(Self {
>              // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
> @@ -319,7 +321,7 @@ pub(crate) fn new<'a>(
>                  // still constructing it, so no concurrent DMA allocations can exist.
>                  unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? };
>  
> -                gfw::wait_gfw_boot_completion(bar)
> +                hal.wait_gfw_boot_completion(bar)
>                      .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
>              },
>  
> diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
> new file mode 100644
> index 000000000000..164410992659
> --- /dev/null
> +++ b/drivers/gpu/nova-core/gpu/hal.rs
> @@ -0,0 +1,41 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +use kernel::prelude::*;
> +
> +use crate::{
> +    driver::Bar0,
> +    gfw,
> +    gpu::{
> +        Architecture,
> +        Chipset, //
> +    },
> +};
> +
> +pub(crate) trait GpuHal {
> +    /// Waits for GFW_BOOT completion if required by this hardware family.
> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result;
> +}
> +
> +struct Tu102;
> +struct Gh100;
> +
> +impl GpuHal for Tu102 {
> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
> +        gfw::wait_gfw_boot_completion(bar)
> +    }
> +}
> +
> +impl GpuHal for Gh100 {
> +    fn wait_gfw_boot_completion(&self, _bar: &Bar0) -> Result {
> +        Ok(())
> +    }
> +}

Please take a look at how other HALs are implemented: each HAL instance
is in its own module. That's not just a cosmetic choice; it allows us to
keep the chipset's specific HAL struct and its helpers completely
private and forces us to make code-sharing explicit. Furthermore, this
particular HAL is bound to grow, so let's split it properly from the
start.

If you do that it also makes more sense to use constants (contrary to
Gary's feedback on v8), if only to align with the rest of the driver.

Once this is done, making `gpu::hal::tu102` absorb the `gfw` module is
trivial, so let's do that while we are at it - having `gfw` as being
driver-wide makes little sense since it has a very limited role for a
specific subset of the chips we support.

Re: [PATCH v9 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting

Posted by Joel Fernandes 2 days, 7 hours ago


On 3/30/2026 10:52 AM, Alexandre Courbot wrote:
> On Thu Mar 26, 2026 at 10:38 AM JST, John Hubbard wrote:
>> Hopper and Blackwell GPUs use FSP-based secure boot and do not
>> require waiting for GFW_BOOT completion. Move the GFW_BOOT wait
>> into a GPU HAL so the decision and the wait both live in the HAL.
>>
>> Pre-Hopper families (Tu102 HAL) wait for GFW_BOOT completion.
>> Hopper and later (Gh100 HAL) skip it and boot via FSP instead.
>>
>> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
>> ---
>>  drivers/gpu/nova-core/gpu.rs     |  6 +++--
>>  drivers/gpu/nova-core/gpu/hal.rs | 41 ++++++++++++++++++++++++++++++++
>>  2 files changed, 45 insertions(+), 2 deletions(-)
>>  create mode 100644 drivers/gpu/nova-core/gpu/hal.rs
>>
>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>> index f70bfbda1614..8332ad67c0af 100644
>> --- a/drivers/gpu/nova-core/gpu.rs
>> +++ b/drivers/gpu/nova-core/gpu.rs
>> @@ -21,11 +21,12 @@
>>          Falcon, //
>>      },
>>      fb::SysmemFlush,
>> -    gfw,
>>      gsp::Gsp,
>>      regs,
>>  };
>>  
>> +mod hal;
>> +
>>  macro_rules! define_chipset {
>>      ({ $($variant:ident = $value:expr),* $(,)* }) =>
>>      {
>> @@ -311,6 +312,7 @@ pub(crate) fn new<'a>(
>>          spec: Spec,
>>      ) -> impl PinInit<Self, Error> + 'a {
>>          let dma_mask = spec.chipset().arch().dma_mask();
>> +        let hal = hal::gpu_hal(spec.chipset());
>>  
>>          try_pin_init!(Self {
>>              // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
>> @@ -319,7 +321,7 @@ pub(crate) fn new<'a>(
>>                  // still constructing it, so no concurrent DMA allocations can exist.
>>                  unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? };
>>  
>> -                gfw::wait_gfw_boot_completion(bar)
>> +                hal.wait_gfw_boot_completion(bar)
>>                      .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
>>              },
>>  
>> diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
>> new file mode 100644
>> index 000000000000..164410992659
>> --- /dev/null
>> +++ b/drivers/gpu/nova-core/gpu/hal.rs
>> @@ -0,0 +1,41 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +use kernel::prelude::*;
>> +
>> +use crate::{
>> +    driver::Bar0,
>> +    gfw,
>> +    gpu::{
>> +        Architecture,
>> +        Chipset, //
>> +    },
>> +};
>> +
>> +pub(crate) trait GpuHal {
>> +    /// Waits for GFW_BOOT completion if required by this hardware family.
>> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result;
>> +}
>> +
>> +struct Tu102;
>> +struct Gh100;
>> +
>> +impl GpuHal for Tu102 {
>> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
>> +        gfw::wait_gfw_boot_completion(bar)
>> +    }
>> +}
>> +
>> +impl GpuHal for Gh100 {
>> +    fn wait_gfw_boot_completion(&self, _bar: &Bar0) -> Result {
>> +        Ok(())
>> +    }
>> +}
> 
> Please take a look at how other HALs are implemented: each HAL instance
> is in its own module. That's not just a cosmetic choice; it allows us to
> keep the chipset's specific HAL struct and its helpers completely
> private and forces us to make code-sharing explicit. Furthermore, this
> particular HAL is bound to grow, so let's split it properly from the
> start.
> 
> If you do that it also makes more sense to use constants (contrary to
> Gary's feedback on v8), if only to align with the rest of the driver.
> 
> Once this is done, making `gpu::hal::tu102` absorb the `gfw` module is
> trivial, so let's do that while we are at it - having `gfw` as being
> driver-wide makes little sense since it has a very limited role for a
> specific subset of the chips we support.

I feel a HAL might be overkill for this. Looking at the series, this is also the
only method. I am doubtful future architectures will have to once again wait for
GFW boot (is that expected?).

If not, we can just match or conditional on .arch(). We do that already in other
places.

Something like:
  if spec.chipset().arch() < Architecture::Hopper {
      gfw::wait_gfw_boot_completion(bar)?;
  }


Thoughts?

thanks,
--
Joel Fernandes

Re: [PATCH v9 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting

Posted by Alexandre Courbot 2 days, 1 hour ago

On Tue Mar 31, 2026 at 3:33 AM JST, Joel Fernandes wrote:
>
>
> On 3/30/2026 10:52 AM, Alexandre Courbot wrote:
>> On Thu Mar 26, 2026 at 10:38 AM JST, John Hubbard wrote:
>>> Hopper and Blackwell GPUs use FSP-based secure boot and do not
>>> require waiting for GFW_BOOT completion. Move the GFW_BOOT wait
>>> into a GPU HAL so the decision and the wait both live in the HAL.
>>>
>>> Pre-Hopper families (Tu102 HAL) wait for GFW_BOOT completion.
>>> Hopper and later (Gh100 HAL) skip it and boot via FSP instead.
>>>
>>> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
>>> ---
>>>  drivers/gpu/nova-core/gpu.rs     |  6 +++--
>>>  drivers/gpu/nova-core/gpu/hal.rs | 41 ++++++++++++++++++++++++++++++++
>>>  2 files changed, 45 insertions(+), 2 deletions(-)
>>>  create mode 100644 drivers/gpu/nova-core/gpu/hal.rs
>>>
>>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>>> index f70bfbda1614..8332ad67c0af 100644
>>> --- a/drivers/gpu/nova-core/gpu.rs
>>> +++ b/drivers/gpu/nova-core/gpu.rs
>>> @@ -21,11 +21,12 @@
>>>          Falcon, //
>>>      },
>>>      fb::SysmemFlush,
>>> -    gfw,
>>>      gsp::Gsp,
>>>      regs,
>>>  };
>>>  
>>> +mod hal;
>>> +
>>>  macro_rules! define_chipset {
>>>      ({ $($variant:ident = $value:expr),* $(,)* }) =>
>>>      {
>>> @@ -311,6 +312,7 @@ pub(crate) fn new<'a>(
>>>          spec: Spec,
>>>      ) -> impl PinInit<Self, Error> + 'a {
>>>          let dma_mask = spec.chipset().arch().dma_mask();
>>> +        let hal = hal::gpu_hal(spec.chipset());
>>>  
>>>          try_pin_init!(Self {
>>>              // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
>>> @@ -319,7 +321,7 @@ pub(crate) fn new<'a>(
>>>                  // still constructing it, so no concurrent DMA allocations can exist.
>>>                  unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? };
>>>  
>>> -                gfw::wait_gfw_boot_completion(bar)
>>> +                hal.wait_gfw_boot_completion(bar)
>>>                      .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
>>>              },
>>>  
>>> diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
>>> new file mode 100644
>>> index 000000000000..164410992659
>>> --- /dev/null
>>> +++ b/drivers/gpu/nova-core/gpu/hal.rs
>>> @@ -0,0 +1,41 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +
>>> +use kernel::prelude::*;
>>> +
>>> +use crate::{
>>> +    driver::Bar0,
>>> +    gfw,
>>> +    gpu::{
>>> +        Architecture,
>>> +        Chipset, //
>>> +    },
>>> +};
>>> +
>>> +pub(crate) trait GpuHal {
>>> +    /// Waits for GFW_BOOT completion if required by this hardware family.
>>> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result;
>>> +}
>>> +
>>> +struct Tu102;
>>> +struct Gh100;
>>> +
>>> +impl GpuHal for Tu102 {
>>> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
>>> +        gfw::wait_gfw_boot_completion(bar)
>>> +    }
>>> +}
>>> +
>>> +impl GpuHal for Gh100 {
>>> +    fn wait_gfw_boot_completion(&self, _bar: &Bar0) -> Result {
>>> +        Ok(())
>>> +    }
>>> +}
>> 
>> Please take a look at how other HALs are implemented: each HAL instance
>> is in its own module. That's not just a cosmetic choice; it allows us to
>> keep the chipset's specific HAL struct and its helpers completely
>> private and forces us to make code-sharing explicit. Furthermore, this
>> particular HAL is bound to grow, so let's split it properly from the
>> start.
>> 
>> If you do that it also makes more sense to use constants (contrary to
>> Gary's feedback on v8), if only to align with the rest of the driver.
>> 
>> Once this is done, making `gpu::hal::tu102` absorb the `gfw` module is
>> trivial, so let's do that while we are at it - having `gfw` as being
>> driver-wide makes little sense since it has a very limited role for a
>> specific subset of the chips we support.
>
> I feel a HAL might be overkill for this. Looking at the series, this is also the
> only method. I am doubtful future architectures will have to once again wait for
> GFW boot (is that expected?).
>
> If not, we can just match or conditional on .arch(). We do that already in other
> places.
>
> Something like:
>   if spec.chipset().arch() < Architecture::Hopper {
>       gfw::wait_gfw_boot_completion(bar)?;
>   }

It's not about the size, it's whether the code paths diverge enough. In
this case they clearly do, and using a HAL lets us get rid of a
minor module that was at the root of the project (`gfw.rs`) entirely.

I expect this HAL to further grow with the different boot paths as well.
Not sure if that will happen in this series, but it will eventually.

Re: [PATCH v9 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting

Posted by Joel Fernandes 1 day, 9 hours ago

On Mar 30, 2026, at 8:19 PM, Alexandre Courbot <acourbot@nvidia.com> wrote:
> On Tue Mar 31, 2026 at 3:33 AM JST, Joel Fernandes wrote:
>>
>> On 3/30/2026 10:52 AM, Alexandre Courbot wrote:
>>> Please take a look at how other HALs are implemented: each HAL instance
>>> is in its own module. That's not just a cosmetic choice; it allows us to
>>> keep the chipset's specific HAL struct and its helpers completely
>>> private and forces us to make code-sharing explicit. Furthermore, this
>>> particular HAL is bound to grow, so let's split it properly from the
>>> start.
>>> If you do that it also makes more sense to use constants (contrary to
>>> Gary's feedback on v8), if only to align with the rest of the driver.
>>> Once this is done, making `gpu::hal::tu102` absorb the `gfw` module is
>>> trivial, so let's do that while we are at it - having `gfw` as being
>>> driver-wide makes little sense since it has a very limited role for a
>>> specific subset of the chips we support.
>>
>> I feel a HAL might be overkill for this. Looking at the series, this is
>> also the only method. I am doubtful future architectures will have to
>> once again wait for GFW boot (is that expected?).
>>
>> If not, we can just match or conditional on .arch(). We do that already
>> in other places.
>>
>> Something like:
>>  if spec.chipset().arch() < Architecture::Hopper {
>>      gfw::wait_gfw_boot_completion(bar)?;
>>  }
>
> It's not about the size

Simple things can trivially use match/if on chipset/arch, I just don't see
the benefit of a HAL for simple things, and only drawbacks. Even code-wise
doing the simple snippet above is simpler and cleaner, no need separate new
.rs file.

Even if we look at the HAL approach:

Some one reading the code always thinks that a wait is required but the
wait "mechanism" is arch/chipset-specific, not that NO wait is actually
required sometimes. ONLY if they look at the HAL implementation will they
know that on Hopper+ it is not required.
     hal.wait_gfw_boot_completion(bar)
         .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;

Instead in the snippet above, it is absolutely clear to the reader that
Hopper+ does not need to do GFW wait.

> , it's whether the code paths diverge enough. In
> this case they clearly do, and using a HAL lets us get rid of a
> minor module that was at the root of the project (`gfw.rs`) entirely.
>
> I expect this HAL to further grow with the different boot paths as well.
> Not sure if that will happen in this series, but it will eventually.

But just inlining the module in gpu.rs will also get rid of the module?
That's not a strong argument for a HAL. gfw just has one function.

Also, I checked and nouveau does not use HALs for GFW boot.

Other than that, I am concerned about HAL fragmentation (we already have
other more specific HALs like the fb HAL and falcon HAL). So should those
functionalities also go into gpu HAL? Its not clear.

What about other code doing match on chipset -- should all that also go
into this kitchen-sink gpu HAL then? It
needs to be consistent, not "let's put gfw in a HAL because we feel like it
even though we know we're never going to ever have to do per-chipset/arch
gfw wait." There is not much reason for that as far as I can see.

The argument that the HAL will grow in the future also doesn't hold,
since a HAL can be trivially added if and when needed. My understanding is
we don't want to add code until we need it and you haven't provided any
"future" things that will be added to this new common gpu HAL.

Please keep it simple :). The snippet I shared above is just 3 lines, not
new 41 line boiler plate HAL file.

Now if some variant of Blackwell starts doing some custom GFW boot scheme,
I am all for a HAL...

thanks,

-- 
Joel Fernandes

Re: [PATCH v9 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting

Posted by John Hubbard 2 days, 6 hours ago

On 3/30/26 11:33 AM, Joel Fernandes wrote:
> On 3/30/2026 10:52 AM, Alexandre Courbot wrote:
>> On Thu Mar 26, 2026 at 10:38 AM JST, John Hubbard wrote:
...
>>> +pub(crate) trait GpuHal {
>>> +    /// Waits for GFW_BOOT completion if required by this hardware family.
>>> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result;
>>> +}
>>> +
>>> +struct Tu102;
>>> +struct Gh100;
>>> +
>>> +impl GpuHal for Tu102 {
>>> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
>>> +        gfw::wait_gfw_boot_completion(bar)
>>> +    }
>>> +}
>>> +
>>> +impl GpuHal for Gh100 {
>>> +    fn wait_gfw_boot_completion(&self, _bar: &Bar0) -> Result {
>>> +        Ok(())
>>> +    }
>>> +}
>>
>> Please take a look at how other HALs are implemented: each HAL instance
>> is in its own module. That's not just a cosmetic choice; it allows us to
>> keep the chipset's specific HAL struct and its helpers completely
>> private and forces us to make code-sharing explicit. Furthermore, this
>> particular HAL is bound to grow, so let's split it properly from the
>> start.
>>
>> If you do that it also makes more sense to use constants (contrary to
>> Gary's feedback on v8), if only to align with the rest of the driver.
>>
>> Once this is done, making `gpu::hal::tu102` absorb the `gfw` module is
>> trivial, so let's do that while we are at it - having `gfw` as being
>> driver-wide makes little sense since it has a very limited role for a
>> specific subset of the chips we support.
> 
> I feel a HAL might be overkill for this. Looking at the series, this is also the
> only method. I am doubtful future architectures will have to once again wait for
> GFW boot (is that expected?).

Definitely not! We are moving to futurue firmware that does all of its
own initialization, without all of these intrusive and fussy
steps and interventions from the kernel driver that we are doing
today.

> 
> If not, we can just match or conditional on .arch(). We do that already in other
> places.
> 
> Something like:
>    if spec.chipset().arch() < Architecture::Hopper {
>        gfw::wait_gfw_boot_completion(bar)?;
>    }
> 
> 
> Thoughts?
> 

I have a feeling this matches an earlier version of the patchset,
actually. :).

I also think a HAL for this tiny aspect is overkill, but I also don't
really mind either putting it into a HAL, or not. Because either
way the code is readable.

thanks,
John Hubbard

Re: [PATCH v9 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting

Posted by Gary Guo 2 days, 10 hours ago

On Mon Mar 30, 2026 at 3:52 PM BST, Alexandre Courbot wrote:
> On Thu Mar 26, 2026 at 10:38 AM JST, John Hubbard wrote:
>> Hopper and Blackwell GPUs use FSP-based secure boot and do not
>> require waiting for GFW_BOOT completion. Move the GFW_BOOT wait
>> into a GPU HAL so the decision and the wait both live in the HAL.
>>
>> Pre-Hopper families (Tu102 HAL) wait for GFW_BOOT completion.
>> Hopper and later (Gh100 HAL) skip it and boot via FSP instead.
>>
>> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
>> ---
>>  drivers/gpu/nova-core/gpu.rs     |  6 +++--
>>  drivers/gpu/nova-core/gpu/hal.rs | 41 ++++++++++++++++++++++++++++++++
>>  2 files changed, 45 insertions(+), 2 deletions(-)
>>  create mode 100644 drivers/gpu/nova-core/gpu/hal.rs
>>
>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>> index f70bfbda1614..8332ad67c0af 100644
>> --- a/drivers/gpu/nova-core/gpu.rs
>> +++ b/drivers/gpu/nova-core/gpu.rs
>> @@ -21,11 +21,12 @@
>>          Falcon, //
>>      },
>>      fb::SysmemFlush,
>> -    gfw,
>>      gsp::Gsp,
>>      regs,
>>  };
>>  
>> +mod hal;
>> +
>>  macro_rules! define_chipset {
>>      ({ $($variant:ident = $value:expr),* $(,)* }) =>
>>      {
>> @@ -311,6 +312,7 @@ pub(crate) fn new<'a>(
>>          spec: Spec,
>>      ) -> impl PinInit<Self, Error> + 'a {
>>          let dma_mask = spec.chipset().arch().dma_mask();
>> +        let hal = hal::gpu_hal(spec.chipset());
>>  
>>          try_pin_init!(Self {
>>              // We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
>> @@ -319,7 +321,7 @@ pub(crate) fn new<'a>(
>>                  // still constructing it, so no concurrent DMA allocations can exist.
>>                  unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? };
>>  
>> -                gfw::wait_gfw_boot_completion(bar)
>> +                hal.wait_gfw_boot_completion(bar)
>>                      .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
>>              },
>>  
>> diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
>> new file mode 100644
>> index 000000000000..164410992659
>> --- /dev/null
>> +++ b/drivers/gpu/nova-core/gpu/hal.rs
>> @@ -0,0 +1,41 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +use kernel::prelude::*;
>> +
>> +use crate::{
>> +    driver::Bar0,
>> +    gfw,
>> +    gpu::{
>> +        Architecture,
>> +        Chipset, //
>> +    },
>> +};
>> +
>> +pub(crate) trait GpuHal {
>> +    /// Waits for GFW_BOOT completion if required by this hardware family.
>> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result;
>> +}
>> +
>> +struct Tu102;
>> +struct Gh100;
>> +
>> +impl GpuHal for Tu102 {
>> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
>> +        gfw::wait_gfw_boot_completion(bar)
>> +    }
>> +}
>> +
>> +impl GpuHal for Gh100 {
>> +    fn wait_gfw_boot_completion(&self, _bar: &Bar0) -> Result {
>> +        Ok(())
>> +    }
>> +}
>
> Please take a look at how other HALs are implemented: each HAL instance
> is in its own module. That's not just a cosmetic choice; it allows us to
> keep the chipset's specific HAL struct and its helpers completely
> private and forces us to make code-sharing explicit. Furthermore, this
> particular HAL is bound to grow, so let's split it properly from the
> start.
>
> If you do that it also makes more sense to use constants (contrary to
> Gary's feedback on v8), if only to align with the rest of the driver.

The exsiting cases in fb/hal declare the constants without even exposing it (the
only use case is to pass it to XXX_HAL.

These looks redundant and should be changed to

    pub(super) const GA100_HAL: &dyn FbHal = &Ga100;

etc. instead.

Best,
Gary

>
> Once this is done, making `gpu::hal::tu102` absorb the `gfw` module is
> trivial, so let's do that while we are at it - having `gfw` as being
> driver-wide makes little sense since it has a very limited role for a
> specific subset of the chips we support.