[PATCH v6 28/34] gpu: nova-core: Hopper/Blackwell: add FSP Chain of Trust boot

John Hubbard posted 34 patches 1 month ago
There is a newer version of this series
[PATCH v6 28/34] gpu: nova-core: Hopper/Blackwell: add FSP Chain of Trust boot
Posted by John Hubbard 1 month ago
Add boot_fmc() which builds and sends the Chain of Trust message to FSP,
and FmcBootArgs which bundles the DMA-coherent boot parameters that FSP
reads at boot time. The FspFirmware struct fields become pub(crate) and
fmc_full changes from DmaObject to KVec<u8> for CPU-side signature
extraction.

Co-developed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/gpu/nova-core/firmware/fsp.rs |  14 ++-
 drivers/gpu/nova-core/fsp.rs          | 134 +++++++++++++++++++++++++-
 drivers/gpu/nova-core/gpu.rs          |   1 -
 drivers/gpu/nova-core/mctp.rs         |   2 -
 4 files changed, 141 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/nova-core/firmware/fsp.rs b/drivers/gpu/nova-core/firmware/fsp.rs
index cea9532ba5ff..bb35f363b998 100644
--- a/drivers/gpu/nova-core/firmware/fsp.rs
+++ b/drivers/gpu/nova-core/firmware/fsp.rs
@@ -13,16 +13,16 @@
     gpu::Chipset, //
 };
 
-#[expect(unused)]
+#[expect(dead_code)]
 pub(crate) struct FspFirmware {
     /// FMC firmware image data (only the "image" ELF section).
-    fmc_image: DmaObject,
+    pub(crate) fmc_image: DmaObject,
     /// Full FMC ELF data (for signature extraction).
-    fmc_full: DmaObject,
+    pub(crate) fmc_full: KVec<u8>,
 }
 
 impl FspFirmware {
-    #[expect(unused)]
+    #[expect(dead_code)]
     pub(crate) fn new(
         dev: &device::Device<device::Bound>,
         chipset: Chipset,
@@ -36,9 +36,13 @@ pub(crate) fn new(
             EINVAL
         })?;
 
+        // Copy the full ELF into a kernel vector for CPU-side signature extraction
+        let mut fmc_full = KVec::with_capacity(fw.data().len(), GFP_KERNEL)?;
+        fmc_full.extend_from_slice(fw.data(), GFP_KERNEL)?;
+
         Ok(Self {
             fmc_image: DmaObject::from_data(dev, fmc_image_data)?,
-            fmc_full: DmaObject::from_data(dev, fw.data())?,
+            fmc_full,
         })
     }
 }
diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
index 8926dd814a83..c66ad0a102a6 100644
--- a/drivers/gpu/nova-core/fsp.rs
+++ b/drivers/gpu/nova-core/fsp.rs
@@ -8,8 +8,14 @@
 
 use kernel::{
     device,
+    dma::CoherentAllocation,
     io::poll::read_poll_timeout,
     prelude::*,
+    ptr::{
+        Alignable,
+        Alignment, //
+    },
+    sizes::{SZ_1M, SZ_2M},
     time::Delta,
     transmute::{
         AsBytes,
@@ -38,7 +44,6 @@ pub(crate) const fn new(version: u16) -> Self {
     }
 
     /// Return the raw protocol version number for the wire format.
-    #[expect(dead_code)]
     pub(crate) const fn raw(self) -> u16 {
         self.0
     }
@@ -221,6 +226,73 @@ impl MessageToFsp for FspMessage {
     const NVDM_TYPE: u32 = NvdmType::Cot as u32;
 }
 
+/// Bundled arguments for FMC boot via FSP Chain of Trust.
+pub(crate) struct FmcBootArgs<'a> {
+    chipset: crate::gpu::Chipset,
+    fmc_image_fw: &'a crate::dma::DmaObject,
+    fmc_boot_params: kernel::dma::CoherentAllocation<GspFmcBootParams>,
+    resume: bool,
+    signatures: &'a FmcSignatures,
+}
+
+impl<'a> FmcBootArgs<'a> {
+    /// Build FMC boot arguments, allocating the DMA-coherent boot parameter
+    /// structure that FSP will read.
+    #[expect(dead_code)]
+    #[allow(clippy::too_many_arguments)]
+    pub(crate) fn new(
+        dev: &device::Device<device::Bound>,
+        chipset: crate::gpu::Chipset,
+        fmc_image_fw: &'a crate::dma::DmaObject,
+        wpr_meta_addr: u64,
+        wpr_meta_size: u32,
+        libos_addr: u64,
+        resume: bool,
+        signatures: &'a FmcSignatures,
+    ) -> Result<Self> {
+        const GSP_DMA_TARGET_COHERENT_SYSTEM: u32 = 1;
+        const GSP_DMA_TARGET_NONCOHERENT_SYSTEM: u32 = 2;
+
+        let fmc_boot_params = CoherentAllocation::<GspFmcBootParams>::alloc_coherent(
+            dev,
+            1,
+            GFP_KERNEL | __GFP_ZERO,
+        )?;
+
+        kernel::dma_write!(
+            fmc_boot_params[0].boot_gsp_rm_params.target = GSP_DMA_TARGET_COHERENT_SYSTEM
+        )?;
+        kernel::dma_write!(
+            fmc_boot_params[0].boot_gsp_rm_params.gsp_rm_desc_offset = wpr_meta_addr
+        )?;
+        kernel::dma_write!(fmc_boot_params[0].boot_gsp_rm_params.gsp_rm_desc_size = wpr_meta_size)?;
+
+        // Blackwell FSP expects wpr_carveout_offset and wpr_carveout_size to be zero;
+        // it obtains WPR info from other sources.
+        kernel::dma_write!(fmc_boot_params[0].boot_gsp_rm_params.b_is_gsp_rm_boot = 1)?;
+
+        kernel::dma_write!(
+            fmc_boot_params[0].gsp_rm_params.target = GSP_DMA_TARGET_NONCOHERENT_SYSTEM
+        )?;
+        kernel::dma_write!(fmc_boot_params[0].gsp_rm_params.boot_args_offset = libos_addr)?;
+
+        Ok(Self {
+            chipset,
+            fmc_image_fw,
+            fmc_boot_params,
+            resume,
+            signatures,
+        })
+    }
+
+    /// DMA address of the FMC boot parameters, needed after boot for lockdown
+    /// release polling.
+    #[expect(dead_code)]
+    pub(crate) fn boot_params_dma_handle(&self) -> u64 {
+        self.fmc_boot_params.dma_handle()
+    }
+}
+
 /// FSP interface for Hopper/Blackwell GPUs.
 pub(crate) struct Fsp;
 
@@ -315,8 +387,66 @@ pub(crate) fn extract_fmc_signatures(
         Ok(signatures)
     }
 
-    /// Send message to FSP and wait for response.
+    /// Boot GSP FMC via FSP Chain of Trust.
+    ///
+    /// Builds the COT message from the pre-configured [`FmcBootArgs`], sends it
+    /// to FSP, and waits for the response.
     #[expect(dead_code)]
+    pub(crate) fn boot_fmc(
+        dev: &device::Device<device::Bound>,
+        bar: &crate::driver::Bar0,
+        fsp_falcon: &crate::falcon::Falcon<crate::falcon::fsp::Fsp>,
+        args: &FmcBootArgs<'_>,
+    ) -> Result {
+        dev_dbg!(dev, "Starting FSP boot sequence for {}\n", args.chipset);
+
+        let fmc_addr = args.fmc_image_fw.dma_handle();
+        let fmc_boot_params_addr = args.fmc_boot_params.dma_handle();
+
+        // frts_offset is relative to FB end: FRTS_location = FB_END - frts_offset
+        let frts_offset = if !args.resume {
+            let mut frts_reserved_size = crate::fb::calc_non_wpr_heap_size(args.chipset);
+
+            frts_reserved_size += u64::from(crate::fb::PMU_RESERVED_SIZE);
+
+            frts_reserved_size
+                .align_up(Alignment::new::<SZ_2M>())
+                .ok_or(EINVAL)?
+        } else {
+            0
+        };
+        let frts_size: u32 = if !args.resume { SZ_1M as u32 } else { 0 };
+
+        let msg = KBox::new(
+            FspMessage {
+                mctp_header: MctpHeader::single_packet().raw(),
+                nvdm_header: NvdmHeader::new(NvdmType::Cot).raw(),
+
+                cot: NvdmPayloadCot {
+                    version: args.chipset.fsp_cot_version().ok_or(ENOTSUPP)?.raw(),
+                    size: u16::try_from(core::mem::size_of::<NvdmPayloadCot>())
+                        .map_err(|_| EINVAL)?,
+                    gsp_fmc_sysmem_offset: fmc_addr,
+                    frts_sysmem_offset: 0,
+                    frts_sysmem_size: 0,
+                    frts_vidmem_offset: frts_offset,
+                    frts_vidmem_size: frts_size,
+                    hash384: args.signatures.hash384,
+                    public_key: args.signatures.public_key,
+                    signature: args.signatures.signature,
+                    gsp_boot_args_sysmem_offset: fmc_boot_params_addr,
+                },
+            },
+            GFP_KERNEL,
+        )?;
+
+        Self::send_sync_fsp(dev, bar, fsp_falcon, &*msg)?;
+
+        dev_dbg!(dev, "FSP Chain of Trust completed successfully\n");
+        Ok(())
+    }
+
+    /// Send message to FSP and wait for response.
     fn send_sync_fsp<M>(
         dev: &device::Device<device::Bound>,
         bar: &crate::driver::Bar0,
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 02f69c5975d3..e9d07750fafe 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -140,7 +140,6 @@ pub(crate) const fn needs_fwsec_bootloader(self) -> bool {
     ///
     /// Hopper (GH100) uses version 1, Blackwell uses version 2.
     /// Returns `None` for architectures that do not use FSP.
-    #[expect(dead_code)]
     pub(crate) const fn fsp_cot_version(self) -> Option<FspCotVersion> {
         match self.arch() {
             Architecture::Hopper => Some(FspCotVersion::new(1)),
diff --git a/drivers/gpu/nova-core/mctp.rs b/drivers/gpu/nova-core/mctp.rs
index 0dafc31b230c..c4e36a46fd69 100644
--- a/drivers/gpu/nova-core/mctp.rs
+++ b/drivers/gpu/nova-core/mctp.rs
@@ -6,8 +6,6 @@
 //! Device Management) messages between the kernel driver and GPU firmware
 //! processors such as FSP and GSP.
 
-#![expect(dead_code)]
-
 /// NVDM message type identifiers carried over MCTP.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 #[repr(u32)]
-- 
2.53.0
Re: [PATCH v6 28/34] gpu: nova-core: Hopper/Blackwell: add FSP Chain of Trust boot
Posted by Alexandre Courbot 3 weeks, 2 days ago
On Tue Mar 10, 2026 at 11:11 AM JST, John Hubbard wrote:
> Add boot_fmc() which builds and sends the Chain of Trust message to FSP,
> and FmcBootArgs which bundles the DMA-coherent boot parameters that FSP
> reads at boot time. The FspFirmware struct fields become pub(crate) and
> fmc_full changes from DmaObject to KVec<u8> for CPU-side signature
> extraction.
>
> Co-developed-by: Alexandre Courbot <acourbot@nvidia.com>
> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
> ---
>  drivers/gpu/nova-core/firmware/fsp.rs |  14 ++-
>  drivers/gpu/nova-core/fsp.rs          | 134 +++++++++++++++++++++++++-
>  drivers/gpu/nova-core/gpu.rs          |   1 -
>  drivers/gpu/nova-core/mctp.rs         |   2 -
>  4 files changed, 141 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/firmware/fsp.rs b/drivers/gpu/nova-core/firmware/fsp.rs
> index cea9532ba5ff..bb35f363b998 100644
> --- a/drivers/gpu/nova-core/firmware/fsp.rs
> +++ b/drivers/gpu/nova-core/firmware/fsp.rs
> @@ -13,16 +13,16 @@
>      gpu::Chipset, //
>  };
>  
> -#[expect(unused)]
> +#[expect(dead_code)]
>  pub(crate) struct FspFirmware {
>      /// FMC firmware image data (only the "image" ELF section).
> -    fmc_image: DmaObject,
> +    pub(crate) fmc_image: DmaObject,
>      /// Full FMC ELF data (for signature extraction).
> -    fmc_full: DmaObject,
> +    pub(crate) fmc_full: KVec<u8>,

This looks like `fmc_full` should have been a `KVec` since the beginning
- unless I missed something there was no point in keeping it as a
`DmaObject` for a while.

>  }
>  
>  impl FspFirmware {
> -    #[expect(unused)]
> +    #[expect(dead_code)]
>      pub(crate) fn new(
>          dev: &device::Device<device::Bound>,
>          chipset: Chipset,
> @@ -36,9 +36,13 @@ pub(crate) fn new(
>              EINVAL
>          })?;
>  
> +        // Copy the full ELF into a kernel vector for CPU-side signature extraction
> +        let mut fmc_full = KVec::with_capacity(fw.data().len(), GFP_KERNEL)?;
> +        fmc_full.extend_from_slice(fw.data(), GFP_KERNEL)?;
> +
>          Ok(Self {
>              fmc_image: DmaObject::from_data(dev, fmc_image_data)?,
> -            fmc_full: DmaObject::from_data(dev, fw.data())?,
> +            fmc_full,
>          })
>      }
>  }
> diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
> index 8926dd814a83..c66ad0a102a6 100644
> --- a/drivers/gpu/nova-core/fsp.rs
> +++ b/drivers/gpu/nova-core/fsp.rs
> @@ -8,8 +8,14 @@
>  
>  use kernel::{
>      device,
> +    dma::CoherentAllocation,
>      io::poll::read_poll_timeout,
>      prelude::*,
> +    ptr::{
> +        Alignable,
> +        Alignment, //
> +    },
> +    sizes::{SZ_1M, SZ_2M},

Nit: import format. (can't wait for the rustfmt support for that one).

>      time::Delta,
>      transmute::{
>          AsBytes,
> @@ -38,7 +44,6 @@ pub(crate) const fn new(version: u16) -> Self {
>      }
>  
>      /// Return the raw protocol version number for the wire format.
> -    #[expect(dead_code)]
>      pub(crate) const fn raw(self) -> u16 {
>          self.0
>      }
> @@ -221,6 +226,73 @@ impl MessageToFsp for FspMessage {
>      const NVDM_TYPE: u32 = NvdmType::Cot as u32;
>  }
>  
> +/// Bundled arguments for FMC boot via FSP Chain of Trust.
> +pub(crate) struct FmcBootArgs<'a> {
> +    chipset: crate::gpu::Chipset,
> +    fmc_image_fw: &'a crate::dma::DmaObject,
> +    fmc_boot_params: kernel::dma::CoherentAllocation<GspFmcBootParams>,
> +    resume: bool,
> +    signatures: &'a FmcSignatures,
> +}
> +
> +impl<'a> FmcBootArgs<'a> {
> +    /// Build FMC boot arguments, allocating the DMA-coherent boot parameter
> +    /// structure that FSP will read.
> +    #[expect(dead_code)]
> +    #[allow(clippy::too_many_arguments)]
> +    pub(crate) fn new(
> +        dev: &device::Device<device::Bound>,
> +        chipset: crate::gpu::Chipset,
> +        fmc_image_fw: &'a crate::dma::DmaObject,
> +        wpr_meta_addr: u64,
> +        wpr_meta_size: u32,
> +        libos_addr: u64,
> +        resume: bool,
> +        signatures: &'a FmcSignatures,
> +    ) -> Result<Self> {
> +        const GSP_DMA_TARGET_COHERENT_SYSTEM: u32 = 1;
> +        const GSP_DMA_TARGET_NONCOHERENT_SYSTEM: u32 = 2;

I see these in OpenRM's
src/nvidia/arch/nvalloc/common/inc/gsp/gspifpub.h - can we add them to
the bindings?

> +
> +        let fmc_boot_params = CoherentAllocation::<GspFmcBootParams>::alloc_coherent(
> +            dev,
> +            1,
> +            GFP_KERNEL | __GFP_ZERO,
> +        )?;
> +
> +        kernel::dma_write!(
> +            fmc_boot_params[0].boot_gsp_rm_params.target = GSP_DMA_TARGET_COHERENT_SYSTEM
> +        )?;

Note: on the latest drm-rust-next, this needs to be updated to 

        kernel::dma_write!(
            fmc_boot_params,
            [0]?.boot_gsp_rm_params.target,
            GSP_DMA_TARGET_COHERENT_SYSTEM
        );

(same the those below)

> +        kernel::dma_write!(
> +            fmc_boot_params[0].boot_gsp_rm_params.gsp_rm_desc_offset = wpr_meta_addr
> +        )?;
> +        kernel::dma_write!(fmc_boot_params[0].boot_gsp_rm_params.gsp_rm_desc_size = wpr_meta_size)?;
> +
> +        // Blackwell FSP expects wpr_carveout_offset and wpr_carveout_size to be zero;
> +        // it obtains WPR info from other sources.
> +        kernel::dma_write!(fmc_boot_params[0].boot_gsp_rm_params.b_is_gsp_rm_boot = 1)?;

... or better, if we factor these writes into a single one:

        kernel::dma_write!(
            fmc_boot_params,
            [0]?.boot_gsp_rm_params,
            GspAcrBootGspRmParams {
                target: GSP_DMA_TARGET_COHERENT_SYSTEM,
                gsp_rm_desc_offset: wpr_meta_addr,
                gsp_rm_desc_size: wpr_meta_size,
                ..Default::default()
            }
        );

> +
> +        kernel::dma_write!(
> +            fmc_boot_params[0].gsp_rm_params.target = GSP_DMA_TARGET_NONCOHERENT_SYSTEM
> +        )?;
> +        kernel::dma_write!(fmc_boot_params[0].gsp_rm_params.boot_args_offset = libos_addr)?;

Here as well:

        kernel::dma_write!(
            fmc_boot_params,
            [0]?.gsp_rm_params,
            GspRmParams {
                target: GSP_DMA_TARGET_NONCOHERENT_SYSTEM,
                boot_args_offset: libos_addr,
            }
        );

> +
> +        Ok(Self {
> +            chipset,
> +            fmc_image_fw,
> +            fmc_boot_params,
> +            resume,
> +            signatures,
> +        })
> +    }
> +
> +    /// DMA address of the FMC boot parameters, needed after boot for lockdown
> +    /// release polling.
> +    #[expect(dead_code)]
> +    pub(crate) fn boot_params_dma_handle(&self) -> u64 {
> +        self.fmc_boot_params.dma_handle()
> +    }
> +}
> +
>  /// FSP interface for Hopper/Blackwell GPUs.
>  pub(crate) struct Fsp;
>  
> @@ -315,8 +387,66 @@ pub(crate) fn extract_fmc_signatures(
>          Ok(signatures)
>      }
>  
> -    /// Send message to FSP and wait for response.
> +    /// Boot GSP FMC via FSP Chain of Trust.
> +    ///
> +    /// Builds the COT message from the pre-configured [`FmcBootArgs`], sends it
> +    /// to FSP, and waits for the response.
>      #[expect(dead_code)]
> +    pub(crate) fn boot_fmc(
> +        dev: &device::Device<device::Bound>,
> +        bar: &crate::driver::Bar0,
> +        fsp_falcon: &crate::falcon::Falcon<crate::falcon::fsp::Fsp>,
> +        args: &FmcBootArgs<'_>,
> +    ) -> Result {
> +        dev_dbg!(dev, "Starting FSP boot sequence for {}\n", args.chipset);
> +
> +        let fmc_addr = args.fmc_image_fw.dma_handle();
> +        let fmc_boot_params_addr = args.fmc_boot_params.dma_handle();
> +
> +        // frts_offset is relative to FB end: FRTS_location = FB_END - frts_offset
> +        let frts_offset = if !args.resume {
> +            let mut frts_reserved_size = crate::fb::calc_non_wpr_heap_size(args.chipset);
> +
> +            frts_reserved_size += u64::from(crate::fb::PMU_RESERVED_SIZE);

Let's use a checked operation here.