Blackwell GPUs moved the sysmem flush page registers away from the
legacy NV_PFB_NISO_FLUSH_SYSMEM_ADDR used by Ampere/Ada.
GB10x uses HSHUB0 registers, with both a primary and EG (egress) pair
that must be programmed to the same address. GB20x uses FBHUB0
registers.
Add separate GB100 and GB202 fb HALs, and split the Blackwell HAL
dispatch so that each uses its respective registers.
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
drivers/gpu/nova-core/fb/hal.rs | 8 ++-
drivers/gpu/nova-core/fb/hal/gb100.rs | 54 ++++++++++++++++---
drivers/gpu/nova-core/fb/hal/gb202.rs | 77 +++++++++++++++++++++++++++
drivers/gpu/nova-core/regs.rs | 36 +++++++++++++
4 files changed, 168 insertions(+), 7 deletions(-)
create mode 100644 drivers/gpu/nova-core/fb/hal/gb202.rs
diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs
index 478f80d640c1..65edf07c3222 100644
--- a/drivers/gpu/nova-core/fb/hal.rs
+++ b/drivers/gpu/nova-core/fb/hal.rs
@@ -13,9 +13,14 @@
mod ga100;
mod ga102;
mod gb100;
+mod gb202;
mod gh100;
mod tu102;
+/// Non-WPR heap size for Blackwell (2 MiB + 128 KiB).
+/// See Open RM: kgspCalculateFbLayout_GB100.
+const BLACKWELL_NON_WPR_HEAP_SIZE: u32 = 0x220000;
+
pub(crate) trait FbHal {
/// Returns the address of the currently-registered sysmem flush page.
fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64;
@@ -46,6 +51,7 @@ pub(crate) fn fb_hal(chipset: Chipset) -> &'static dyn FbHal {
Architecture::Ampere if chipset == Chipset::GA100 => ga100::GA100_HAL,
Architecture::Ampere | Architecture::Ada => ga102::GA102_HAL,
Architecture::Hopper => gh100::GH100_HAL,
- Architecture::BlackwellGB10x | Architecture::BlackwellGB20x => gb100::GB100_HAL,
+ Architecture::BlackwellGB10x => gb100::GB100_HAL,
+ Architecture::BlackwellGB20x => gb202::GB202_HAL,
}
}
diff --git a/drivers/gpu/nova-core/fb/hal/gb100.rs b/drivers/gpu/nova-core/fb/hal/gb100.rs
index bead99a6ca76..c6e2a505e6ae 100644
--- a/drivers/gpu/nova-core/fb/hal/gb100.rs
+++ b/drivers/gpu/nova-core/fb/hal/gb100.rs
@@ -1,21 +1,64 @@
// SPDX-License-Identifier: GPL-2.0
-use kernel::prelude::*;
+//! Blackwell GB10x framebuffer HAL.
+//!
+//! GB10x GPUs use HSHUB0 registers for the sysmem flush page. Both the primary and EG (egress)
+//! register pairs must be programmed to the same address, as required by hardware.
+
+use kernel::{
+ io::Io,
+ num::Bounded,
+ prelude::*, //
+};
use crate::{
driver::Bar0,
- fb::hal::FbHal, //
+ fb::hal::FbHal,
+ regs, //
};
struct Gb100;
+fn read_sysmem_flush_page_gb100(bar: &Bar0) -> u64 {
+ let lo = u64::from(
+ bar.read(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO)
+ .adr(),
+ );
+ let hi = u64::from(
+ bar.read(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI)
+ .adr(),
+ );
+
+ lo | (hi << 32)
+}
+
+fn write_sysmem_flush_page_gb100(bar: &Bar0, addr: Bounded<u64, 52>) {
+ // CAST: lower 32 bits. Hardware ignores bits 7:0.
+ let addr_lo = *addr as u32;
+ let addr_hi = addr.shr::<32, 20>().cast::<u32>();
+
+ // Write HI first. The hardware will trigger the flush on the LO write.
+
+ // Primary HSHUB pair.
+ bar.write_reg(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi));
+ bar.write_reg(regs::NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo));
+
+ // EG (egress) pair -- must match the primary pair.
+ bar.write_reg(regs::NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed().with_adr(addr_hi));
+ bar.write_reg(regs::NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed().with_adr(addr_lo));
+}
+
impl FbHal for Gb100 {
fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
- super::ga100::read_sysmem_flush_page_ga100(bar)
+ read_sysmem_flush_page_gb100(bar)
}
fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
- super::ga100::write_sysmem_flush_page_ga100(bar, addr);
+ let addr: Bounded<u64, 52> = Bounded::<u64, 64>::from(addr)
+ .try_shrink::<52>()
+ .ok_or(EINVAL)?;
+
+ write_sysmem_flush_page_gb100(bar, addr);
Ok(())
}
@@ -29,8 +72,7 @@ fn vidmem_size(&self, bar: &Bar0) -> u64 {
}
fn non_wpr_heap_size(&self) -> Option<u32> {
- // 2 MiB + 128 KiB non-WPR heap for Blackwell (see Open RM: kgspCalculateFbLayout_GB100).
- Some(0x220000)
+ Some(super::BLACKWELL_NON_WPR_HEAP_SIZE)
}
}
diff --git a/drivers/gpu/nova-core/fb/hal/gb202.rs b/drivers/gpu/nova-core/fb/hal/gb202.rs
new file mode 100644
index 000000000000..0eaabc9920c7
--- /dev/null
+++ b/drivers/gpu/nova-core/fb/hal/gb202.rs
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Blackwell GB20x framebuffer HAL.
+//!
+//! GB20x GPUs moved the sysmem flush registers from `NV_PFB_NISO_FLUSH_SYSMEM_ADDR` to
+//! `NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_{LO,HI}`.
+
+use kernel::{
+ io::Io,
+ num::Bounded,
+ prelude::*, //
+};
+
+use crate::{
+ driver::Bar0,
+ fb::hal::FbHal,
+ regs, //
+};
+
+struct Gb202;
+
+fn read_sysmem_flush_page_gb202(bar: &Bar0) -> u64 {
+ let lo = u64::from(
+ bar.read(regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO)
+ .adr(),
+ );
+ let hi = u64::from(
+ bar.read(regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI)
+ .adr(),
+ );
+
+ lo | (hi << 32)
+}
+
+fn write_sysmem_flush_page_gb202(bar: &Bar0, addr: Bounded<u64, 52>) {
+ // Write HI first. The hardware will trigger the flush on the LO write.
+ bar.write_reg(
+ regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI::zeroed()
+ .with_adr(addr.shr::<32, 20>().cast::<u32>()),
+ );
+ bar.write_reg(
+ regs::NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO::zeroed()
+ // CAST: lower 32 bits. Hardware ignores bits 7:0.
+ .with_adr(*addr as u32),
+ );
+}
+
+impl FbHal for Gb202 {
+ fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 {
+ read_sysmem_flush_page_gb202(bar)
+ }
+
+ fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result {
+ let addr: Bounded<u64, 52> = Bounded::<u64, 64>::from(addr)
+ .try_shrink::<52>()
+ .ok_or(EINVAL)?;
+
+ write_sysmem_flush_page_gb202(bar, addr);
+
+ Ok(())
+ }
+
+ fn supports_display(&self, bar: &Bar0) -> bool {
+ super::ga100::display_enabled_ga100(bar)
+ }
+
+ fn vidmem_size(&self, bar: &Bar0) -> u64 {
+ super::ga102::vidmem_size_ga102(bar)
+ }
+
+ fn non_wpr_heap_size(&self) -> Option<u32> {
+ Some(super::BLACKWELL_NON_WPR_HEAP_SIZE)
+ }
+}
+
+const GB202: Gb202 = Gb202;
+pub(super) const GB202_HAL: &dyn FbHal = &GB202;
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index c3ccae0c235f..77b11c7de3f8 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -145,6 +145,42 @@ fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result {
/// Bits 12..40 of the higher (exclusive) bound of the WPR2 region.
31:4 hi_val;
}
+
+ // Blackwell GB10x sysmem flush registers (HSHUB0).
+ //
+ // GB10x GPUs use two pairs of HSHUB registers for sysmembar: a primary pair and an EG
+ // (egress) pair. Both must be programmed to the same address. Hardware ignores bits 7:0
+ // of each LO register. HSHUB0 base is 0x00891000.
+
+ pub(crate) NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x00891e50 {
+ 31:0 adr => u32;
+ }
+
+ pub(crate) NV_PFB_HSHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x00891e54 {
+ 19:0 adr;
+ }
+
+ pub(crate) NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x008916c0 {
+ 31:0 adr => u32;
+ }
+
+ pub(crate) NV_PFB_HSHUB0_EG_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x008916c4 {
+ 19:0 adr;
+ }
+
+ // Blackwell GB20x sysmem flush registers (FBHUB0).
+ //
+ // Unlike the older NV_PFB_NISO_FLUSH_SYSMEM_ADDR registers which encode the address with an
+ // 8-bit right-shift, these registers take the raw address split into lower/upper 32-bit halves.
+ // The hardware ignores bits 7:0 of the LO register.
+
+ pub(crate) NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_LO(u32) @ 0x008a1d58 {
+ 31:0 adr => u32;
+ }
+
+ pub(crate) NV_PFB_FBHUB0_PCIE_FLUSH_SYSMEM_ADDR_HI(u32) @ 0x008a1d5c {
+ 19:0 adr;
+ }
}
impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE {
--
2.53.0