Replace the hardcoded 47-bit DMA mask with per-architecture values.
Add Architecture::dma_mask() with an exhaustive match, so new
architectures get a compile-time reminder to specify their width.
Set the DMA mask in Gpu::new(). Gpu owns all DMA allocations for
the device, so no concurrent allocations can exist while the
constructor is still running.
Move Spec creation into probe() so the dev_info is printed early,
and pass Spec into Gpu::new().
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
drivers/gpu/nova-core/driver.rs | 24 ++++++--------------
drivers/gpu/nova-core/gpu.rs | 39 +++++++++++++++++++++++----------
2 files changed, 35 insertions(+), 28 deletions(-)
diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index 84b0e1703150..bb82e63af044 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -4,8 +4,6 @@
auxiliary,
device::Core,
devres::Devres,
- dma::Device,
- dma::DmaMask,
pci,
pci::{
Class,
@@ -23,7 +21,10 @@
},
};
-use crate::gpu::Gpu;
+use crate::gpu::{
+ Gpu,
+ Spec, //
+};
/// Counter for generating unique auxiliary device IDs.
static AUXILIARY_ID_COUNTER: Atomic<u32> = Atomic::new(0);
@@ -38,14 +39,6 @@ pub(crate) struct NovaCore {
const BAR0_SIZE: usize = SZ_16M;
-// For now we only support Ampere which can use up to 47-bit DMA addresses.
-//
-// TODO: Add an abstraction for this to support newer GPUs which may support
-// larger DMA addresses. Limiting these GPUs to smaller address widths won't
-// have any adverse affects, unless installed on systems which require larger
-// DMA addresses. These systems should be quite rare.
-const GPU_DMA_BITS: u32 = 47;
-
pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
kernel::pci_device_table!(
@@ -84,18 +77,15 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> impl PinInit<Self, E
pdev.enable_device_mem()?;
pdev.set_master();
- // SAFETY: No concurrent DMA allocations or mappings can be made because
- // the device is still being probed and therefore isn't being used by
- // other threads of execution.
- unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
-
let bar = Arc::pin_init(
pdev.iomap_region_sized::<BAR0_SIZE>(0, c"nova-core/bar0"),
GFP_KERNEL,
)?;
+ let spec = Spec::new(pdev.as_ref(), bar.access(pdev.as_ref())?)?;
+ dev_info!(pdev, "NVIDIA ({})\n", spec);
Ok(try_pin_init!(Self {
- gpu <- Gpu::new(pdev, bar.clone(), bar.access(pdev.as_ref())?),
+ gpu <- Gpu::new(pdev, bar.clone(), bar.access(pdev.as_ref())?, spec),
_reg <- auxiliary::Registration::new(
pdev.as_ref(),
c"nova-drm",
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 685ae4c81268..f70bfbda1614 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -3,6 +3,10 @@
use kernel::{
device,
devres::Devres,
+ dma::{
+ Device,
+ DmaMask, //
+ },
fmt,
pci,
prelude::*,
@@ -160,6 +164,16 @@ pub(crate) enum Architecture {
BlackwellGB20x = 0x1b,
}
+impl Architecture {
+ /// Returns the DMA mask supported by this architecture.
+ pub(crate) const fn dma_mask(&self) -> DmaMask {
+ match self {
+ Self::Turing | Self::Ampere | Self::Ada => DmaMask::new::<47>(),
+ Self::Hopper | Self::BlackwellGB10x | Self::BlackwellGB20x => DmaMask::new::<52>(),
+ }
+ }
+}
+
impl TryFrom<u8> for Architecture {
type Error = Error;
@@ -212,7 +226,7 @@ pub(crate) struct Spec {
}
impl Spec {
- fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
+ pub(crate) fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
// Some brief notes about boot0 and boot42, in chronological order:
//
// NV04 through NV50:
@@ -244,7 +258,6 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
}
/// Returns this GPU's chipset.
- #[expect(dead_code)]
pub(crate) fn chipset(self) -> Chipset {
self.chipset
}
@@ -292,36 +305,40 @@ pub(crate) struct Gpu {
impl Gpu {
pub(crate) fn new<'a>(
- pdev: &'a pci::Device<device::Bound>,
+ pdev: &'a pci::Device<device::Core>,
devres_bar: Arc<Devres<Bar0>>,
bar: &'a Bar0,
+ spec: Spec,
) -> impl PinInit<Self, Error> + 'a {
- try_pin_init!(Self {
- spec: Spec::new(pdev.as_ref(), bar).inspect(|spec| {
- dev_info!(pdev,"NVIDIA ({})\n", spec);
- })?,
+ let dma_mask = spec.chipset().arch().dma_mask();
+ try_pin_init!(Self {
// We must wait for GFW_BOOT completion before doing any significant setup on the GPU.
_: {
+ // SAFETY: `Gpu` owns all DMA allocations for this device, and we are
+ // still constructing it, so no concurrent DMA allocations can exist.
+ unsafe { pdev.dma_set_mask_and_coherent(dma_mask)? };
+
gfw::wait_gfw_boot_completion(bar)
.inspect_err(|_| dev_err!(pdev, "GFW boot did not complete\n"))?;
},
- sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?,
+ sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset())?,
gsp_falcon: Falcon::new(
pdev.as_ref(),
- spec.chipset,
+ spec.chipset(),
)
.inspect(|falcon| falcon.clear_swgen0_intr(bar))?,
- sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset)?,
+ sec2_falcon: Falcon::new(pdev.as_ref(), spec.chipset())?,
gsp <- Gsp::new(pdev),
- _: { gsp.boot(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)? },
+ _: { gsp.boot(pdev, bar, spec.chipset(), gsp_falcon, sec2_falcon)? },
bar: devres_bar,
+ spec,
})
}
--
2.53.0