[PATCH v10 17/28] gpu: nova-core: Hopper/Blackwell: add FSP secure boot completion waiting

John Hubbard posted 28 patches 1 day, 11 hours ago
[PATCH v10 17/28] gpu: nova-core: Hopper/Blackwell: add FSP secure boot completion waiting
Posted by John Hubbard 1 day, 11 hours ago
Add the FSP module with Fsp::wait_secure_boot(), which polls the I2CS
thermal scratch register until FSP signals boot success or the 5-second
timeout expires. Hopper and Blackwell use FSP instead of SEC2 for
secure boot.

Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/gpu/nova-core/fsp.rs       | 53 ++++++++++++++++++++++++++++++
 drivers/gpu/nova-core/gsp/boot.rs  |  5 ++-
 drivers/gpu/nova-core/nova_core.rs |  1 +
 drivers/gpu/nova-core/regs.rs      | 29 ++++++++++++++++
 4 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/nova-core/fsp.rs

diff --git a/drivers/gpu/nova-core/fsp.rs b/drivers/gpu/nova-core/fsp.rs
new file mode 100644
index 000000000000..55e543e80de8
--- /dev/null
+++ b/drivers/gpu/nova-core/fsp.rs
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! FSP (Firmware System Processor) interface for Hopper/Blackwell GPUs.
+//!
+//! Hopper/Blackwell use a simplified firmware boot sequence: FMC --> FSP --> GSP.
+//! Unlike Turing/Ampere/Ada, there is NO SEC2 (Security Engine 2) usage.
+//! FSP handles secure boot directly using FMC firmware + Chain of Trust.
+
+use kernel::{
+    device,
+    io::poll::read_poll_timeout,
+    prelude::*,
+    time::Delta, //
+};
+
+use crate::regs;
+
+/// FSP secure boot completion timeout in milliseconds.
+const FSP_SECURE_BOOT_TIMEOUT_MS: i64 = 5000;
+
+/// FSP interface for Hopper/Blackwell GPUs.
+pub(crate) struct Fsp;
+
+impl Fsp {
+    /// Wait for FSP secure boot completion.
+    ///
+    /// Polls the thermal scratch register until FSP signals boot completion
+    /// or timeout occurs.
+    pub(crate) fn wait_secure_boot(
+        dev: &device::Device<device::Bound>,
+        bar: &crate::driver::Bar0,
+        arch: crate::gpu::Architecture,
+    ) -> Result {
+        debug_assert!(
+            regs::read_fsp_boot_complete_status(bar, arch).is_some(),
+            "wait_secure_boot called on non-FSP architecture"
+        );
+
+        let timeout = Delta::from_millis(FSP_SECURE_BOOT_TIMEOUT_MS);
+
+        read_poll_timeout(
+            || regs::read_fsp_boot_complete_status(bar, arch).ok_or(ENOTSUPP),
+            |&status| status == regs::FSP_BOOT_COMPLETE_SUCCESS,
+            Delta::from_millis(10),
+            timeout,
+        )
+        .map_err(|_| {
+            dev_err!(dev, "FSP secure boot completion timeout\n");
+            ETIMEDOUT
+        })
+        .map(|_| ())
+    }
+}
diff --git a/drivers/gpu/nova-core/gsp/boot.rs b/drivers/gpu/nova-core/gsp/boot.rs
index 1998bd230185..9609cef3ff51 100644
--- a/drivers/gpu/nova-core/gsp/boot.rs
+++ b/drivers/gpu/nova-core/gsp/boot.rs
@@ -33,6 +33,7 @@
         gsp::GspFirmware,
         FIRMWARE_VERSION, //
     },
+    fsp::Fsp,
     gpu::{
         Architecture,
         Chipset, //
@@ -199,7 +200,7 @@ fn boot_via_sec2(
     /// the GSP boot internally - no manual GSP reset/boot is needed.
     fn boot_via_fsp(
         dev: &device::Device<device::Bound>,
-        _bar: &Bar0,
+        bar: &Bar0,
         chipset: Chipset,
         _gsp_falcon: &Falcon<Gsp>,
         _wpr_meta: &Coherent<GspFwWprMeta>,
@@ -209,6 +210,8 @@ fn boot_via_fsp(
 
         let _fsp_fw = FspFirmware::new(dev, chipset, FIRMWARE_VERSION)?;
 
+        Fsp::wait_secure_boot(dev, bar, chipset.arch())?;
+
         Err(ENOTSUPP)
     }
 
diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs
index 3a609f6937e4..53558ac0f619 100644
--- a/drivers/gpu/nova-core/nova_core.rs
+++ b/drivers/gpu/nova-core/nova_core.rs
@@ -17,6 +17,7 @@
 mod falcon;
 mod fb;
 mod firmware;
+mod fsp;
 mod gpu;
 mod gsp;
 #[macro_use]
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index 6faeed73901d..e4de7bfffde1 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -511,6 +511,35 @@ pub(crate) fn mem_scrubbing_done(self) -> bool {
     }
 }
 
+// PTHERM registers
+
+// FSP secure boot completion status register used by FSP to signal boot completion.
+// This is the NV_THERM_I2CS_SCRATCH register.
+// Different architectures use different addresses:
+// - Hopper (GH100) and Blackwell GB10x: 0x000200bc
+// - Blackwell GB20x: 0x00ad00bc
+pub(crate) fn fsp_thermal_scratch_reg_addr(arch: Architecture) -> Result<usize> {
+    match arch {
+        Architecture::Hopper | Architecture::BlackwellGB10x => Ok(0x000200bc),
+        Architecture::BlackwellGB20x => Ok(0x00ad00bc),
+        _ => Err(kernel::error::code::ENOTSUPP),
+    }
+}
+
+/// FSP writes this value to indicate successful boot completion.
+pub(crate) const FSP_BOOT_COMPLETE_SUCCESS: u32 = 0xff;
+
+/// Read FSP boot completion status from the architecture-specific thermal scratch register.
+///
+/// Returns `None` if the architecture does not have an FSP.
+pub(crate) fn read_fsp_boot_complete_status(
+    bar: &crate::driver::Bar0,
+    arch: Architecture,
+) -> Option<u32> {
+    let addr = fsp_thermal_scratch_reg_addr(arch).ok()?;
+    Some(bar.read32(addr))
+}
+
 // The modules below provide registers that are not identical on all supported chips. They should
 // only be used in HAL modules.
 
-- 
2.53.0