[PATCH v8 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting

John Hubbard posted 31 patches 1 week, 1 day ago
There is a newer version of this series
[PATCH v8 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting
Posted by John Hubbard 1 week, 1 day ago
Hopper and Blackwell GPUs use FSP-based secure boot and do not
require waiting for GFW_BOOT completion. Move the GFW_BOOT wait
into a GPU HAL so the decision and the wait both live in the HAL.

Pre-Hopper families (Tu102 HAL) wait for GFW_BOOT completion.
Hopper and later (Gh100 HAL) skip it and boot via FSP instead.

Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/gpu/nova-core/gpu.rs     |  6 +++--
 drivers/gpu/nova-core/gpu/hal.rs | 42 ++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nova-core/gpu/hal.rs

diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index e7c3860cfb28..5cef5b29cd3f 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -18,11 +18,12 @@
         Falcon, //
     },
     fb::SysmemFlush,
-    gfw,
     gsp::Gsp,
     regs,
 };
 
+mod hal;
+
 macro_rules! define_chipset {
     ({ $($variant:ident = $value:expr),* $(,)* }) =>
     {
@@ -309,10 +310,11 @@ pub(crate) fn new<'a>(
         spec: Spec,
     ) -> impl PinInit<Self, Error> + 'a {
         let chipset = spec.chipset();
+        let hal = hal::gpu_hal(chipset);
 
         try_pin_init!(Self {
             _: {
-                gfw::wait_gfw_boot_completion(bar)
+                hal.wait_gfw_boot_completion(bar)
                     .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
             },
 
diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
new file mode 100644
index 000000000000..2f9e18e67a35
--- /dev/null
+++ b/drivers/gpu/nova-core/gpu/hal.rs
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::prelude::*;
+
+use crate::{
+    driver::Bar0,
+    gfw,
+    gpu::{
+        Architecture,
+        Chipset, //
+    },
+};
+
+pub(crate) trait GpuHal {
+    /// Waits for GFW_BOOT completion if required by this hardware family.
+    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result;
+}
+
+struct Tu102;
+struct Gh100;
+
+impl GpuHal for Tu102 {
+    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
+        gfw::wait_gfw_boot_completion(bar)
+    }
+}
+
+impl GpuHal for Gh100 {
+    fn wait_gfw_boot_completion(&self, _bar: &Bar0) -> Result {
+        Ok(())
+    }
+}
+
+const TU102: Tu102 = Tu102;
+const GH100: Gh100 = Gh100;
+
+pub(super) fn gpu_hal(chipset: Chipset) -> &'static dyn GpuHal {
+    match chipset.arch() {
+        Architecture::Turing | Architecture::Ampere | Architecture::Ada => &TU102,
+        Architecture::Hopper | Architecture::Blackwell => &GH100,
+    }
+}
-- 
2.53.0
Re: [PATCH v8 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting
Posted by Gary Guo 1 week, 1 day ago
On Wed Mar 25, 2026 at 3:52 AM GMT, John Hubbard wrote:
> Hopper and Blackwell GPUs use FSP-based secure boot and do not
> require waiting for GFW_BOOT completion. Move the GFW_BOOT wait
> into a GPU HAL so the decision and the wait both live in the HAL.
>
> Pre-Hopper families (Tu102 HAL) wait for GFW_BOOT completion.
> Hopper and later (Gh100 HAL) skip it and boot via FSP instead.
>
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
> ---
>  drivers/gpu/nova-core/gpu.rs     |  6 +++--
>  drivers/gpu/nova-core/gpu/hal.rs | 42 ++++++++++++++++++++++++++++++++
>  2 files changed, 46 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/nova-core/gpu/hal.rs
>
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index e7c3860cfb28..5cef5b29cd3f 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -18,11 +18,12 @@
>          Falcon, //
>      },
>      fb::SysmemFlush,
> -    gfw,
>      gsp::Gsp,
>      regs,
>  };
>  
> +mod hal;
> +
>  macro_rules! define_chipset {
>      ({ $($variant:ident = $value:expr),* $(,)* }) =>
>      {
> @@ -309,10 +310,11 @@ pub(crate) fn new<'a>(
>          spec: Spec,
>      ) -> impl PinInit<Self, Error> + 'a {
>          let chipset = spec.chipset();
> +        let hal = hal::gpu_hal(chipset);
>  
>          try_pin_init!(Self {
>              _: {
> -                gfw::wait_gfw_boot_completion(bar)
> +                hal.wait_gfw_boot_completion(bar)
>                      .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
>              },
>  
> diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
> new file mode 100644
> index 000000000000..2f9e18e67a35
> --- /dev/null
> +++ b/drivers/gpu/nova-core/gpu/hal.rs
> @@ -0,0 +1,42 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +use kernel::prelude::*;
> +
> +use crate::{
> +    driver::Bar0,
> +    gfw,
> +    gpu::{
> +        Architecture,
> +        Chipset, //
> +    },
> +};
> +
> +pub(crate) trait GpuHal {
> +    /// Waits for GFW_BOOT completion if required by this hardware family.
> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result;
> +}
> +
> +struct Tu102;
> +struct Gh100;
> +
> +impl GpuHal for Tu102 {
> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
> +        gfw::wait_gfw_boot_completion(bar)
> +    }
> +}
> +
> +impl GpuHal for Gh100 {
> +    fn wait_gfw_boot_completion(&self, _bar: &Bar0) -> Result {
> +        Ok(())
> +    }
> +}
> +
> +const TU102: Tu102 = Tu102;
> +const GH100: Gh100 = Gh100;
> +

Why this instead of using `Tu102` and `Gh100` directly? They already exist in
value namespace due to the type being unit struct.

Best,
Gary

> +pub(super) fn gpu_hal(chipset: Chipset) -> &'static dyn GpuHal {
> +    match chipset.arch() {
> +        Architecture::Turing | Architecture::Ampere | Architecture::Ada => &TU102,
> +        Architecture::Hopper | Architecture::Blackwell => &GH100,
> +    }
> +}
Re: [PATCH v8 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting
Posted by John Hubbard 1 week, 1 day ago
On 3/25/26 8:45 AM, Gary Guo wrote:
> On Wed Mar 25, 2026 at 3:52 AM GMT, John Hubbard wrote:
>> Hopper and Blackwell GPUs use FSP-based secure boot and do not
>> require waiting for GFW_BOOT completion. Move the GFW_BOOT wait
>> into a GPU HAL so the decision and the wait both live in the HAL.
>>
>> Pre-Hopper families (Tu102 HAL) wait for GFW_BOOT completion.
>> Hopper and later (Gh100 HAL) skip it and boot via FSP instead.
>>
>> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
>> ---
>>  drivers/gpu/nova-core/gpu.rs     |  6 +++--
>>  drivers/gpu/nova-core/gpu/hal.rs | 42 ++++++++++++++++++++++++++++++++
>>  2 files changed, 46 insertions(+), 2 deletions(-)
>>  create mode 100644 drivers/gpu/nova-core/gpu/hal.rs
>>
>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>> index e7c3860cfb28..5cef5b29cd3f 100644
>> --- a/drivers/gpu/nova-core/gpu.rs
>> +++ b/drivers/gpu/nova-core/gpu.rs
>> @@ -18,11 +18,12 @@
>>          Falcon, //
>>      },
>>      fb::SysmemFlush,
>> -    gfw,
>>      gsp::Gsp,
>>      regs,
>>  };
>>  
>> +mod hal;
>> +
>>  macro_rules! define_chipset {
>>      ({ $($variant:ident = $value:expr),* $(,)* }) =>
>>      {
>> @@ -309,10 +310,11 @@ pub(crate) fn new<'a>(
>>          spec: Spec,
>>      ) -> impl PinInit<Self, Error> + 'a {
>>          let chipset = spec.chipset();
>> +        let hal = hal::gpu_hal(chipset);
>>  
>>          try_pin_init!(Self {
>>              _: {
>> -                gfw::wait_gfw_boot_completion(bar)
>> +                hal.wait_gfw_boot_completion(bar)
>>                      .inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
>>              },
>>  
>> diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
>> new file mode 100644
>> index 000000000000..2f9e18e67a35
>> --- /dev/null
>> +++ b/drivers/gpu/nova-core/gpu/hal.rs
>> @@ -0,0 +1,42 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +use kernel::prelude::*;
>> +
>> +use crate::{
>> +    driver::Bar0,
>> +    gfw,
>> +    gpu::{
>> +        Architecture,
>> +        Chipset, //
>> +    },
>> +};
>> +
>> +pub(crate) trait GpuHal {
>> +    /// Waits for GFW_BOOT completion if required by this hardware family.
>> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result;
>> +}
>> +
>> +struct Tu102;
>> +struct Gh100;
>> +
>> +impl GpuHal for Tu102 {
>> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
>> +        gfw::wait_gfw_boot_completion(bar)
>> +    }
>> +}
>> +
>> +impl GpuHal for Gh100 {
>> +    fn wait_gfw_boot_completion(&self, _bar: &Bar0) -> Result {
>> +        Ok(())
>> +    }
>> +}
>> +
>> +const TU102: Tu102 = Tu102;
>> +const GH100: Gh100 = Gh100;
>> +
> 
> Why this instead of using `Tu102` and `Gh100` directly? They already exist in
> value namespace due to the type being unit struct.
> 

Fixed, thanks.


thanks,
-- 
John Hubbard

> Best,
> Gary
> 
>> +pub(super) fn gpu_hal(chipset: Chipset) -> &'static dyn GpuHal {
>> +    match chipset.arch() {
>> +        Architecture::Turing | Architecture::Ampere | Architecture::Ada => &TU102,
>> +        Architecture::Hopper | Architecture::Blackwell => &GH100,
>> +    }
>> +}
>
Re: [PATCH v8 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting
Posted by Alexandre Courbot 1 week, 1 day ago
On Tue, 24 Mar 2026 20:52:17 -0700, John Hubbard <jhubbard@nvidia.com> wrote:
> diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
> new file mode 100644
> index 000000000000..2f9e18e67a35
> --- /dev/null
> +++ b/drivers/gpu/nova-core/gpu/hal.rs
> @@ -0,0 +1,42 @@
> [ ... skip 18 lines ... ]
> +struct Tu102;
> +struct Gh100;
> +
> +impl GpuHal for Tu102 {
> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
> +        gfw::wait_gfw_boot_completion(bar)

My suggestion was to move the body of `gfw::wait_gfw_boot_completion`
here and remove the `gfw` module entirely. But let's do that as a
follow-up.

-- 
Alexandre Courbot <acourbot@nvidia.com>
Re: [PATCH v8 06/31] gpu: nova-core: Hopper/Blackwell: skip GFW boot waiting
Posted by John Hubbard 1 week, 1 day ago
On 3/25/26 3:53 AM, Alexandre Courbot wrote:
> On Tue, 24 Mar 2026 20:52:17 -0700, John Hubbard <jhubbard@nvidia.com> wrote:
>> diff --git a/drivers/gpu/nova-core/gpu/hal.rs b/drivers/gpu/nova-core/gpu/hal.rs
>> new file mode 100644
>> index 000000000000..2f9e18e67a35
>> --- /dev/null
>> +++ b/drivers/gpu/nova-core/gpu/hal.rs
>> @@ -0,0 +1,42 @@
>> [ ... skip 18 lines ... ]
>> +struct Tu102;
>> +struct Gh100;
>> +
>> +impl GpuHal for Tu102 {
>> +    fn wait_gfw_boot_completion(&self, bar: &Bar0) -> Result {
>> +        gfw::wait_gfw_boot_completion(bar)
> 
> My suggestion was to move the body of `gfw::wait_gfw_boot_completion`
> here and remove the `gfw` module entirely. But let's do that as a
> follow-up.
> 

Ack.

thanks,
-- 
John Hubbard