[PATCH 1/8] gpu: nova-core: convert PMC registers to kernel register macro

Alexandre Courbot posted 8 patches 2 weeks, 5 days ago
There is a newer version of this series
[PATCH 1/8] gpu: nova-core: convert PMC registers to kernel register macro
Posted by Alexandre Courbot 2 weeks, 5 days ago
Convert all PMC registers to use the kernel's register macro and update
the code accordingly.

nova-core's registers have some constant properties (like a 32-bit size
and a crate visibility), so introduce the `nv_reg` macro to shorten
their declaration.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
 drivers/gpu/nova-core/falcon.rs |  7 ++--
 drivers/gpu/nova-core/gpu.rs    | 37 ++++++++++-----------
 drivers/gpu/nova-core/regs.rs   | 73 +++++++++++++++++++++++++++++++----------
 3 files changed, 78 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
index 7097a206ec3c..4721865f59d9 100644
--- a/drivers/gpu/nova-core/falcon.rs
+++ b/drivers/gpu/nova-core/falcon.rs
@@ -13,7 +13,10 @@
         DmaAddress,
         DmaMask, //
     },
-    io::poll::read_poll_timeout,
+    io::{
+        poll::read_poll_timeout, //
+        Io,
+    },
     prelude::*,
     sync::aref::ARef,
     time::Delta,
@@ -532,7 +535,7 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
         self.hal.reset_wait_mem_scrubbing(bar)?;
 
         regs::NV_PFALCON_FALCON_RM::default()
-            .set_value(regs::NV_PMC_BOOT_0::read(bar).into())
+            .set_value(bar.read(regs::NV_PMC_BOOT_0).into())
             .write(bar, &E::ID);
 
         Ok(())
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 8579d632e717..d81abc7de3d7 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -4,6 +4,8 @@
     device,
     devres::Devres,
     fmt,
+    io::Io,
+    num::Bounded,
     pci,
     prelude::*,
     sync::Arc, //
@@ -129,24 +131,18 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 }
 
 /// Enum representation of the GPU generation.
-///
-/// TODO: remove the `Default` trait implementation, and the `#[default]`
-/// attribute, once the register!() macro (which creates Architecture items) no
-/// longer requires it for read-only fields.
-#[derive(fmt::Debug, Default, Copy, Clone)]
-#[repr(u8)]
+#[derive(fmt::Debug, Copy, Clone)]
 pub(crate) enum Architecture {
-    #[default]
     Turing = 0x16,
     Ampere = 0x17,
     Ada = 0x19,
 }
 
-impl TryFrom<u8> for Architecture {
+impl TryFrom<Bounded<u32, 6>> for Architecture {
     type Error = Error;
 
-    fn try_from(value: u8) -> Result<Self> {
-        match value {
+    fn try_from(value: Bounded<u32, 6>) -> Result<Self> {
+        match u8::from(value) {
             0x16 => Ok(Self::Turing),
             0x17 => Ok(Self::Ampere),
             0x19 => Ok(Self::Ada),
@@ -155,23 +151,26 @@ fn try_from(value: u8) -> Result<Self> {
     }
 }
 
-impl From<Architecture> for u8 {
+impl From<Architecture> for Bounded<u32, 6> {
     fn from(value: Architecture) -> Self {
-        // CAST: `Architecture` is `repr(u8)`, so this cast is always lossless.
-        value as u8
+        match value {
+            Architecture::Turing => Bounded::<u32, 6>::new::<0x16>(),
+            Architecture::Ampere => Bounded::<u32, 6>::new::<0x17>(),
+            Architecture::Ada => Bounded::<u32, 6>::new::<0x19>(),
+        }
     }
 }
 
 pub(crate) struct Revision {
-    major: u8,
-    minor: u8,
+    major: Bounded<u8, 4>,
+    minor: Bounded<u8, 4>,
 }
 
 impl From<regs::NV_PMC_BOOT_42> for Revision {
     fn from(boot0: regs::NV_PMC_BOOT_42) -> Self {
         Self {
-            major: boot0.major_revision(),
-            minor: boot0.minor_revision(),
+            major: boot0.major_revision().cast(),
+            minor: boot0.minor_revision().cast(),
         }
     }
 }
@@ -208,13 +207,13 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
         //     from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU.
         //     Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs.
 
-        let boot0 = regs::NV_PMC_BOOT_0::read(bar);
+        let boot0 = bar.read(regs::NV_PMC_BOOT_0);
 
         if boot0.is_older_than_fermi() {
             return Err(ENODEV);
         }
 
-        let boot42 = regs::NV_PMC_BOOT_42::read(bar);
+        let boot42 = bar.read(regs::NV_PMC_BOOT_42);
         Spec::try_from(boot42).inspect_err(|_| {
             dev_err!(dev, "Unsupported chipset: {}\n", boot42);
         })
diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
index 53f412f0ca32..62c2065e63ef 100644
--- a/drivers/gpu/nova-core/regs.rs
+++ b/drivers/gpu/nova-core/regs.rs
@@ -35,20 +35,64 @@
     num::FromSafeCast,
 };
 
+// All nova-core registers are 32-bit and `pub(crate)`. Wrap the `register!` macro to avoid
+// repeating this information for every register.
+macro_rules! nv_reg {
+    (
+        $(
+            $(#[$attr:meta])* $name:ident $([ $size:expr $(, stride = $stride:expr)? ])?
+                $(@ $offset:literal)?
+                $(@ $base:ident + $base_offset:literal)?
+                $(=> $alias:ident $(+ $alias_offset:ident)? $([$alias_idx:expr])? )?
+            $(, $comment:literal)? { $($fields:tt)* }
+        )*
+    )=> {
+        $(
+        ::kernel::io::register!(
+            @reg $(#[$attr])* pub(crate) $name(u32) $([$size $(, stride = $stride)?])?
+                $(@ $offset)?
+                $(@ $base + $base_offset)?
+                $(=> $alias $(+ $alias_offset)? $([$alias_idx])? )?
+            $(, $comment)? { $($fields)* }
+        );
+        )*
+    };
+}
+
 // PMC
 
-register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" {
-    3:0     minor_revision as u8, "Minor revision of the chip";
-    7:4     major_revision as u8, "Major revision of the chip";
-    8:8     architecture_1 as u8, "MSB of the architecture";
-    23:20   implementation as u8, "Implementation version of the architecture";
-    28:24   architecture_0 as u8, "Lower bits of the architecture";
-});
+nv_reg! {
+    /// Basic revision information about the GPU.
+    NV_PMC_BOOT_0 @ 0x00000000 {
+        /// Minor revision of the chip.
+        3:0     minor_revision;
+        /// Major revision of the chip.
+        7:4     major_revision;
+        /// MSB of the architecture.
+        8:8     architecture_1;
+        /// Implementation version of the architecture.
+        23:20   implementation;
+        /// Lower bits of the architecture.
+        28:24   architecture_0;
+    }
+
+    /// Extended architecture information.
+    NV_PMC_BOOT_42 @ 0x00000a00 {
+        /// Minor revision of the chip.
+        15:12   minor_revision;
+        /// Major revision of the chip.
+        19:16   major_revision;
+        /// Implementation version of the architecture.
+        23:20   implementation;
+        /// Architecture value.
+        29:24   architecture ?=> Architecture;
+    }
+}
 
 impl NV_PMC_BOOT_0 {
     pub(crate) fn is_older_than_fermi(self) -> bool {
         // From https://github.com/NVIDIA/open-gpu-doc/tree/master/manuals :
-        const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u8 = 0xc;
+        const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u32 = 0xc;
 
         // Older chips left arch1 zeroed out. That, combined with an arch0 value that is less than
         // GF100, means "older than Fermi".
@@ -56,13 +100,6 @@ pub(crate) fn is_older_than_fermi(self) -> bool {
     }
 }
 
-register!(NV_PMC_BOOT_42 @ 0x00000a00, "Extended architecture information" {
-    15:12   minor_revision as u8, "Minor revision of the chip";
-    19:16   major_revision as u8, "Major revision of the chip";
-    23:20   implementation as u8, "Implementation version of the architecture";
-    29:24   architecture as u8 ?=> Architecture, "Architecture value";
-});
-
 impl NV_PMC_BOOT_42 {
     /// Combines `architecture` and `implementation` to obtain a code unique to the chipset.
     pub(crate) fn chipset(self) -> Result<Chipset> {
@@ -76,8 +113,8 @@ pub(crate) fn chipset(self) -> Result<Chipset> {
 
     /// Returns the raw architecture value from the register.
     fn architecture_raw(self) -> u8 {
-        ((self.0 >> Self::ARCHITECTURE_RANGE.start()) & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1))
-            as u8
+        ((self.inner >> Self::ARCHITECTURE_RANGE.start())
+            & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1)) as u8
     }
 }
 
@@ -86,7 +123,7 @@ fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result {
         write!(
             f,
             "boot42 = 0x{:08x} (architecture 0x{:x}, implementation 0x{:x})",
-            self.0,
+            self.inner,
             self.architecture_raw(),
             self.implementation()
         )

-- 
2.53.0
Re: [PATCH 1/8] gpu: nova-core: convert PMC registers to kernel register macro
Posted by Eliot Courtney 2 weeks, 4 days ago
On Wed Mar 18, 2026 at 5:05 PM JST, Alexandre Courbot wrote:
> Convert all PMC registers to use the kernel's register macro and update
> the code accordingly.
>
> nova-core's registers have some constant properties (like a 32-bit size
> and a crate visibility), so introduce the `nv_reg` macro to shorten
> their declaration.
>
> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
> ---
>  drivers/gpu/nova-core/falcon.rs |  7 ++--
>  drivers/gpu/nova-core/gpu.rs    | 37 ++++++++++-----------
>  drivers/gpu/nova-core/regs.rs   | 73 +++++++++++++++++++++++++++++++----------
>  3 files changed, 78 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
> index 7097a206ec3c..4721865f59d9 100644
> --- a/drivers/gpu/nova-core/falcon.rs
> +++ b/drivers/gpu/nova-core/falcon.rs
> @@ -13,7 +13,10 @@
>          DmaAddress,
>          DmaMask, //
>      },
> -    io::poll::read_poll_timeout,
> +    io::{
> +        poll::read_poll_timeout, //
> +        Io,
> +    },

nit: // should be on the last import?

>      prelude::*,
>      sync::aref::ARef,
>      time::Delta,
> @@ -532,7 +535,7 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
>          self.hal.reset_wait_mem_scrubbing(bar)?;
>  
>          regs::NV_PFALCON_FALCON_RM::default()
> -            .set_value(regs::NV_PMC_BOOT_0::read(bar).into())
> +            .set_value(bar.read(regs::NV_PMC_BOOT_0).into())
>              .write(bar, &E::ID);
>  
>          Ok(())
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index 8579d632e717..d81abc7de3d7 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -4,6 +4,8 @@
>      device,
>      devres::Devres,
>      fmt,
> +    io::Io,
> +    num::Bounded,
>      pci,
>      prelude::*,
>      sync::Arc, //
> @@ -129,24 +131,18 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
>  }
>  
>  /// Enum representation of the GPU generation.
> -///
> -/// TODO: remove the `Default` trait implementation, and the `#[default]`
> -/// attribute, once the register!() macro (which creates Architecture items) no
> -/// longer requires it for read-only fields.
> -#[derive(fmt::Debug, Default, Copy, Clone)]
> -#[repr(u8)]
> +#[derive(fmt::Debug, Copy, Clone)]
>  pub(crate) enum Architecture {
> -    #[default]
>      Turing = 0x16,
>      Ampere = 0x17,
>      Ada = 0x19,
>  }
>  
> -impl TryFrom<u8> for Architecture {
> +impl TryFrom<Bounded<u32, 6>> for Architecture {
>      type Error = Error;
>  
> -    fn try_from(value: u8) -> Result<Self> {
> -        match value {
> +    fn try_from(value: Bounded<u32, 6>) -> Result<Self> {
> +        match u8::from(value) {
>              0x16 => Ok(Self::Turing),
>              0x17 => Ok(Self::Ampere),
>              0x19 => Ok(Self::Ada),
> @@ -155,23 +151,26 @@ fn try_from(value: u8) -> Result<Self> {
>      }
>  }
>  
> -impl From<Architecture> for u8 {
> +impl From<Architecture> for Bounded<u32, 6> {
>      fn from(value: Architecture) -> Self {
> -        // CAST: `Architecture` is `repr(u8)`, so this cast is always lossless.
> -        value as u8
> +        match value {
> +            Architecture::Turing => Bounded::<u32, 6>::new::<0x16>(),
> +            Architecture::Ampere => Bounded::<u32, 6>::new::<0x17>(),
> +            Architecture::Ada => Bounded::<u32, 6>::new::<0x19>(),
> +        }
>      }
>  }
>  
>  pub(crate) struct Revision {
> -    major: u8,
> -    minor: u8,
> +    major: Bounded<u8, 4>,
> +    minor: Bounded<u8, 4>,
>  }
>  
>  impl From<regs::NV_PMC_BOOT_42> for Revision {
>      fn from(boot0: regs::NV_PMC_BOOT_42) -> Self {
>          Self {
> -            major: boot0.major_revision(),
> -            minor: boot0.minor_revision(),
> +            major: boot0.major_revision().cast(),
> +            minor: boot0.minor_revision().cast(),
>          }
>      }
>  }
> @@ -208,13 +207,13 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
>          //     from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU.
>          //     Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs.
>  
> -        let boot0 = regs::NV_PMC_BOOT_0::read(bar);
> +        let boot0 = bar.read(regs::NV_PMC_BOOT_0);
>  
>          if boot0.is_older_than_fermi() {
>              return Err(ENODEV);
>          }
>  
> -        let boot42 = regs::NV_PMC_BOOT_42::read(bar);
> +        let boot42 = bar.read(regs::NV_PMC_BOOT_42);
>          Spec::try_from(boot42).inspect_err(|_| {
>              dev_err!(dev, "Unsupported chipset: {}\n", boot42);
>          })
> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
> index 53f412f0ca32..62c2065e63ef 100644
> --- a/drivers/gpu/nova-core/regs.rs
> +++ b/drivers/gpu/nova-core/regs.rs
> @@ -35,20 +35,64 @@
>      num::FromSafeCast,
>  };
>  
> +// All nova-core registers are 32-bit and `pub(crate)`. Wrap the `register!` macro to avoid
> +// repeating this information for every register.
> +macro_rules! nv_reg {
> +    (
> +        $(
> +            $(#[$attr:meta])* $name:ident $([ $size:expr $(, stride = $stride:expr)? ])?
> +                $(@ $offset:literal)?
> +                $(@ $base:ident + $base_offset:literal)?
> +                $(=> $alias:ident $(+ $alias_offset:ident)? $([$alias_idx:expr])? )?
> +            $(, $comment:literal)? { $($fields:tt)* }
> +        )*
> +    )=> {
> +        $(
> +        ::kernel::io::register!(
> +            @reg $(#[$attr])* pub(crate) $name(u32) $([$size $(, stride = $stride)?])?
> +                $(@ $offset)?
> +                $(@ $base + $base_offset)?
> +                $(=> $alias $(+ $alias_offset)? $([$alias_idx])? )?
> +            $(, $comment)? { $($fields)* }
> +        );
> +        )*
> +    };
> +}
> +

Is it really worth introducing this macro to save pub(crate) and (u32)?
Are we definitely going to always be using pub(crate) and u32?
Re: [PATCH 1/8] gpu: nova-core: convert PMC registers to kernel register macro
Posted by Alexandre Courbot 2 weeks, 4 days ago
On Thu Mar 19, 2026 at 10:42 AM JST, Eliot Courtney wrote:
> On Wed Mar 18, 2026 at 5:05 PM JST, Alexandre Courbot wrote:
>> Convert all PMC registers to use the kernel's register macro and update
>> the code accordingly.
>>
>> nova-core's registers have some constant properties (like a 32-bit size
>> and a crate visibility), so introduce the `nv_reg` macro to shorten
>> their declaration.
>>
>> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
>> ---
>>  drivers/gpu/nova-core/falcon.rs |  7 ++--
>>  drivers/gpu/nova-core/gpu.rs    | 37 ++++++++++-----------
>>  drivers/gpu/nova-core/regs.rs   | 73 +++++++++++++++++++++++++++++++----------
>>  3 files changed, 78 insertions(+), 39 deletions(-)
>>
>> diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
>> index 7097a206ec3c..4721865f59d9 100644
>> --- a/drivers/gpu/nova-core/falcon.rs
>> +++ b/drivers/gpu/nova-core/falcon.rs
>> @@ -13,7 +13,10 @@
>>          DmaAddress,
>>          DmaMask, //
>>      },
>> -    io::poll::read_poll_timeout,
>> +    io::{
>> +        poll::read_poll_timeout, //
>> +        Io,
>> +    },
>
> nit: // should be on the last import?

It should, thanks.

>
>>      prelude::*,
>>      sync::aref::ARef,
>>      time::Delta,
>> @@ -532,7 +535,7 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
>>          self.hal.reset_wait_mem_scrubbing(bar)?;
>>  
>>          regs::NV_PFALCON_FALCON_RM::default()
>> -            .set_value(regs::NV_PMC_BOOT_0::read(bar).into())
>> +            .set_value(bar.read(regs::NV_PMC_BOOT_0).into())
>>              .write(bar, &E::ID);
>>  
>>          Ok(())
>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>> index 8579d632e717..d81abc7de3d7 100644
>> --- a/drivers/gpu/nova-core/gpu.rs
>> +++ b/drivers/gpu/nova-core/gpu.rs
>> @@ -4,6 +4,8 @@
>>      device,
>>      devres::Devres,
>>      fmt,
>> +    io::Io,
>> +    num::Bounded,
>>      pci,
>>      prelude::*,
>>      sync::Arc, //
>> @@ -129,24 +131,18 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
>>  }
>>  
>>  /// Enum representation of the GPU generation.
>> -///
>> -/// TODO: remove the `Default` trait implementation, and the `#[default]`
>> -/// attribute, once the register!() macro (which creates Architecture items) no
>> -/// longer requires it for read-only fields.
>> -#[derive(fmt::Debug, Default, Copy, Clone)]
>> -#[repr(u8)]
>> +#[derive(fmt::Debug, Copy, Clone)]
>>  pub(crate) enum Architecture {
>> -    #[default]
>>      Turing = 0x16,
>>      Ampere = 0x17,
>>      Ada = 0x19,
>>  }
>>  
>> -impl TryFrom<u8> for Architecture {
>> +impl TryFrom<Bounded<u32, 6>> for Architecture {
>>      type Error = Error;
>>  
>> -    fn try_from(value: u8) -> Result<Self> {
>> -        match value {
>> +    fn try_from(value: Bounded<u32, 6>) -> Result<Self> {
>> +        match u8::from(value) {
>>              0x16 => Ok(Self::Turing),
>>              0x17 => Ok(Self::Ampere),
>>              0x19 => Ok(Self::Ada),
>> @@ -155,23 +151,26 @@ fn try_from(value: u8) -> Result<Self> {
>>      }
>>  }
>>  
>> -impl From<Architecture> for u8 {
>> +impl From<Architecture> for Bounded<u32, 6> {
>>      fn from(value: Architecture) -> Self {
>> -        // CAST: `Architecture` is `repr(u8)`, so this cast is always lossless.
>> -        value as u8
>> +        match value {
>> +            Architecture::Turing => Bounded::<u32, 6>::new::<0x16>(),
>> +            Architecture::Ampere => Bounded::<u32, 6>::new::<0x17>(),
>> +            Architecture::Ada => Bounded::<u32, 6>::new::<0x19>(),
>> +        }
>>      }
>>  }
>>  
>>  pub(crate) struct Revision {
>> -    major: u8,
>> -    minor: u8,
>> +    major: Bounded<u8, 4>,
>> +    minor: Bounded<u8, 4>,
>>  }
>>  
>>  impl From<regs::NV_PMC_BOOT_42> for Revision {
>>      fn from(boot0: regs::NV_PMC_BOOT_42) -> Self {
>>          Self {
>> -            major: boot0.major_revision(),
>> -            minor: boot0.minor_revision(),
>> +            major: boot0.major_revision().cast(),
>> +            minor: boot0.minor_revision().cast(),
>>          }
>>      }
>>  }
>> @@ -208,13 +207,13 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
>>          //     from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU.
>>          //     Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs.
>>  
>> -        let boot0 = regs::NV_PMC_BOOT_0::read(bar);
>> +        let boot0 = bar.read(regs::NV_PMC_BOOT_0);
>>  
>>          if boot0.is_older_than_fermi() {
>>              return Err(ENODEV);
>>          }
>>  
>> -        let boot42 = regs::NV_PMC_BOOT_42::read(bar);
>> +        let boot42 = bar.read(regs::NV_PMC_BOOT_42);
>>          Spec::try_from(boot42).inspect_err(|_| {
>>              dev_err!(dev, "Unsupported chipset: {}\n", boot42);
>>          })
>> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
>> index 53f412f0ca32..62c2065e63ef 100644
>> --- a/drivers/gpu/nova-core/regs.rs
>> +++ b/drivers/gpu/nova-core/regs.rs
>> @@ -35,20 +35,64 @@
>>      num::FromSafeCast,
>>  };
>>  
>> +// All nova-core registers are 32-bit and `pub(crate)`. Wrap the `register!` macro to avoid
>> +// repeating this information for every register.
>> +macro_rules! nv_reg {
>> +    (
>> +        $(
>> +            $(#[$attr:meta])* $name:ident $([ $size:expr $(, stride = $stride:expr)? ])?
>> +                $(@ $offset:literal)?
>> +                $(@ $base:ident + $base_offset:literal)?
>> +                $(=> $alias:ident $(+ $alias_offset:ident)? $([$alias_idx:expr])? )?
>> +            $(, $comment:literal)? { $($fields:tt)* }
>> +        )*
>> +    )=> {
>> +        $(
>> +        ::kernel::io::register!(
>> +            @reg $(#[$attr])* pub(crate) $name(u32) $([$size $(, stride = $stride)?])?
>> +                $(@ $offset)?
>> +                $(@ $base + $base_offset)?
>> +                $(=> $alias $(+ $alias_offset)? $([$alias_idx])? )?
>> +            $(, $comment)? { $($fields)* }
>> +        );
>> +        )*
>> +    };
>> +}
>> +
>
> Is it really worth introducing this macro to save pub(crate) and (u32)?
> Are we definitely going to always be using pub(crate) and u32?

So far we are. I'm not particularly passionate about it, but I think
it's nice not having to repeat ourselves (and potentially introduce
typos).
Re: [PATCH 1/8] gpu: nova-core: convert PMC registers to kernel register macro
Posted by Eliot Courtney 2 weeks, 4 days ago
On Thu Mar 19, 2026 at 11:07 AM JST, Alexandre Courbot wrote:
> On Thu Mar 19, 2026 at 10:42 AM JST, Eliot Courtney wrote:
>> On Wed Mar 18, 2026 at 5:05 PM JST, Alexandre Courbot wrote:
>>> Convert all PMC registers to use the kernel's register macro and update
>>> the code accordingly.
>>>
>>> nova-core's registers have some constant properties (like a 32-bit size
>>> and a crate visibility), so introduce the `nv_reg` macro to shorten
>>> their declaration.
>>>
>>> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
>>> ---
>>>  drivers/gpu/nova-core/falcon.rs |  7 ++--
>>>  drivers/gpu/nova-core/gpu.rs    | 37 ++++++++++-----------
>>>  drivers/gpu/nova-core/regs.rs   | 73 +++++++++++++++++++++++++++++++----------
>>>  3 files changed, 78 insertions(+), 39 deletions(-)
>>>
>>> diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
>>> index 7097a206ec3c..4721865f59d9 100644
>>> --- a/drivers/gpu/nova-core/falcon.rs
>>> +++ b/drivers/gpu/nova-core/falcon.rs
>>> @@ -13,7 +13,10 @@
>>>          DmaAddress,
>>>          DmaMask, //
>>>      },
>>> -    io::poll::read_poll_timeout,
>>> +    io::{
>>> +        poll::read_poll_timeout, //
>>> +        Io,
>>> +    },
>>
>> nit: // should be on the last import?
>
> It should, thanks.
>
>>
>>>      prelude::*,
>>>      sync::aref::ARef,
>>>      time::Delta,
>>> @@ -532,7 +535,7 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
>>>          self.hal.reset_wait_mem_scrubbing(bar)?;
>>>  
>>>          regs::NV_PFALCON_FALCON_RM::default()
>>> -            .set_value(regs::NV_PMC_BOOT_0::read(bar).into())
>>> +            .set_value(bar.read(regs::NV_PMC_BOOT_0).into())
>>>              .write(bar, &E::ID);
>>>  
>>>          Ok(())
>>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>>> index 8579d632e717..d81abc7de3d7 100644
>>> --- a/drivers/gpu/nova-core/gpu.rs
>>> +++ b/drivers/gpu/nova-core/gpu.rs
>>> @@ -4,6 +4,8 @@
>>>      device,
>>>      devres::Devres,
>>>      fmt,
>>> +    io::Io,
>>> +    num::Bounded,
>>>      pci,
>>>      prelude::*,
>>>      sync::Arc, //
>>> @@ -129,24 +131,18 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
>>>  }
>>>  
>>>  /// Enum representation of the GPU generation.
>>> -///
>>> -/// TODO: remove the `Default` trait implementation, and the `#[default]`
>>> -/// attribute, once the register!() macro (which creates Architecture items) no
>>> -/// longer requires it for read-only fields.
>>> -#[derive(fmt::Debug, Default, Copy, Clone)]
>>> -#[repr(u8)]
>>> +#[derive(fmt::Debug, Copy, Clone)]
>>>  pub(crate) enum Architecture {
>>> -    #[default]
>>>      Turing = 0x16,
>>>      Ampere = 0x17,
>>>      Ada = 0x19,
>>>  }
>>>  
>>> -impl TryFrom<u8> for Architecture {
>>> +impl TryFrom<Bounded<u32, 6>> for Architecture {
>>>      type Error = Error;
>>>  
>>> -    fn try_from(value: u8) -> Result<Self> {
>>> -        match value {
>>> +    fn try_from(value: Bounded<u32, 6>) -> Result<Self> {
>>> +        match u8::from(value) {
>>>              0x16 => Ok(Self::Turing),
>>>              0x17 => Ok(Self::Ampere),
>>>              0x19 => Ok(Self::Ada),
>>> @@ -155,23 +151,26 @@ fn try_from(value: u8) -> Result<Self> {
>>>      }
>>>  }
>>>  
>>> -impl From<Architecture> for u8 {
>>> +impl From<Architecture> for Bounded<u32, 6> {
>>>      fn from(value: Architecture) -> Self {
>>> -        // CAST: `Architecture` is `repr(u8)`, so this cast is always lossless.
>>> -        value as u8
>>> +        match value {
>>> +            Architecture::Turing => Bounded::<u32, 6>::new::<0x16>(),
>>> +            Architecture::Ampere => Bounded::<u32, 6>::new::<0x17>(),
>>> +            Architecture::Ada => Bounded::<u32, 6>::new::<0x19>(),
>>> +        }
>>>      }
>>>  }
>>>  
>>>  pub(crate) struct Revision {
>>> -    major: u8,
>>> -    minor: u8,
>>> +    major: Bounded<u8, 4>,
>>> +    minor: Bounded<u8, 4>,
>>>  }
>>>  
>>>  impl From<regs::NV_PMC_BOOT_42> for Revision {
>>>      fn from(boot0: regs::NV_PMC_BOOT_42) -> Self {
>>>          Self {
>>> -            major: boot0.major_revision(),
>>> -            minor: boot0.minor_revision(),
>>> +            major: boot0.major_revision().cast(),
>>> +            minor: boot0.minor_revision().cast(),
>>>          }
>>>      }
>>>  }
>>> @@ -208,13 +207,13 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
>>>          //     from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU.
>>>          //     Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs.
>>>  
>>> -        let boot0 = regs::NV_PMC_BOOT_0::read(bar);
>>> +        let boot0 = bar.read(regs::NV_PMC_BOOT_0);
>>>  
>>>          if boot0.is_older_than_fermi() {
>>>              return Err(ENODEV);
>>>          }
>>>  
>>> -        let boot42 = regs::NV_PMC_BOOT_42::read(bar);
>>> +        let boot42 = bar.read(regs::NV_PMC_BOOT_42);
>>>          Spec::try_from(boot42).inspect_err(|_| {
>>>              dev_err!(dev, "Unsupported chipset: {}\n", boot42);
>>>          })
>>> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
>>> index 53f412f0ca32..62c2065e63ef 100644
>>> --- a/drivers/gpu/nova-core/regs.rs
>>> +++ b/drivers/gpu/nova-core/regs.rs
>>> @@ -35,20 +35,64 @@
>>>      num::FromSafeCast,
>>>  };
>>>  
>>> +// All nova-core registers are 32-bit and `pub(crate)`. Wrap the `register!` macro to avoid
>>> +// repeating this information for every register.
>>> +macro_rules! nv_reg {
>>> +    (
>>> +        $(
>>> +            $(#[$attr:meta])* $name:ident $([ $size:expr $(, stride = $stride:expr)? ])?
>>> +                $(@ $offset:literal)?
>>> +                $(@ $base:ident + $base_offset:literal)?
>>> +                $(=> $alias:ident $(+ $alias_offset:ident)? $([$alias_idx:expr])? )?
>>> +            $(, $comment:literal)? { $($fields:tt)* }
>>> +        )*
>>> +    )=> {
>>> +        $(
>>> +        ::kernel::io::register!(
>>> +            @reg $(#[$attr])* pub(crate) $name(u32) $([$size $(, stride = $stride)?])?
>>> +                $(@ $offset)?
>>> +                $(@ $base + $base_offset)?
>>> +                $(=> $alias $(+ $alias_offset)? $([$alias_idx])? )?
>>> +            $(, $comment)? { $($fields)* }
>>> +        );
>>> +        )*
>>> +    };
>>> +}
>>> +
>>
>> Is it really worth introducing this macro to save pub(crate) and (u32)?
>> Are we definitely going to always be using pub(crate) and u32?
>
> So far we are. I'm not particularly passionate about it, but I think
> it's nice not having to repeat ourselves (and potentially introduce
> typos).

One downside is that the nested macros make it harder to check the
implementation since you have to go to this definition, then to the
register macro definition if you want to check something. And I also
feel like I need to read this intermediate macro definition to see if
it's doing anything special if I run into some error.

Personally I would probably go with just using the register macro
directly, and then we can tighten the visibility later if it's useful
without having to change all of these. But not a super strong opinion,
so up to you. Just doesn't feel like it saves us that much in typing the
extra tens of characters.
Re: [PATCH 1/8] gpu: nova-core: convert PMC registers to kernel register macro
Posted by Alexandre Courbot 2 weeks, 4 days ago
On Thu Mar 19, 2026 at 11:16 AM JST, Eliot Courtney wrote:
> On Thu Mar 19, 2026 at 11:07 AM JST, Alexandre Courbot wrote:
>> On Thu Mar 19, 2026 at 10:42 AM JST, Eliot Courtney wrote:
>>> On Wed Mar 18, 2026 at 5:05 PM JST, Alexandre Courbot wrote:
>>>> Convert all PMC registers to use the kernel's register macro and update
>>>> the code accordingly.
>>>>
>>>> nova-core's registers have some constant properties (like a 32-bit size
>>>> and a crate visibility), so introduce the `nv_reg` macro to shorten
>>>> their declaration.
>>>>
>>>> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
>>>> ---
>>>>  drivers/gpu/nova-core/falcon.rs |  7 ++--
>>>>  drivers/gpu/nova-core/gpu.rs    | 37 ++++++++++-----------
>>>>  drivers/gpu/nova-core/regs.rs   | 73 +++++++++++++++++++++++++++++++----------
>>>>  3 files changed, 78 insertions(+), 39 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
>>>> index 7097a206ec3c..4721865f59d9 100644
>>>> --- a/drivers/gpu/nova-core/falcon.rs
>>>> +++ b/drivers/gpu/nova-core/falcon.rs
>>>> @@ -13,7 +13,10 @@
>>>>          DmaAddress,
>>>>          DmaMask, //
>>>>      },
>>>> -    io::poll::read_poll_timeout,
>>>> +    io::{
>>>> +        poll::read_poll_timeout, //
>>>> +        Io,
>>>> +    },
>>>
>>> nit: // should be on the last import?
>>
>> It should, thanks.
>>
>>>
>>>>      prelude::*,
>>>>      sync::aref::ARef,
>>>>      time::Delta,
>>>> @@ -532,7 +535,7 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
>>>>          self.hal.reset_wait_mem_scrubbing(bar)?;
>>>>  
>>>>          regs::NV_PFALCON_FALCON_RM::default()
>>>> -            .set_value(regs::NV_PMC_BOOT_0::read(bar).into())
>>>> +            .set_value(bar.read(regs::NV_PMC_BOOT_0).into())
>>>>              .write(bar, &E::ID);
>>>>  
>>>>          Ok(())
>>>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>>>> index 8579d632e717..d81abc7de3d7 100644
>>>> --- a/drivers/gpu/nova-core/gpu.rs
>>>> +++ b/drivers/gpu/nova-core/gpu.rs
>>>> @@ -4,6 +4,8 @@
>>>>      device,
>>>>      devres::Devres,
>>>>      fmt,
>>>> +    io::Io,
>>>> +    num::Bounded,
>>>>      pci,
>>>>      prelude::*,
>>>>      sync::Arc, //
>>>> @@ -129,24 +131,18 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
>>>>  }
>>>>  
>>>>  /// Enum representation of the GPU generation.
>>>> -///
>>>> -/// TODO: remove the `Default` trait implementation, and the `#[default]`
>>>> -/// attribute, once the register!() macro (which creates Architecture items) no
>>>> -/// longer requires it for read-only fields.
>>>> -#[derive(fmt::Debug, Default, Copy, Clone)]
>>>> -#[repr(u8)]
>>>> +#[derive(fmt::Debug, Copy, Clone)]
>>>>  pub(crate) enum Architecture {
>>>> -    #[default]
>>>>      Turing = 0x16,
>>>>      Ampere = 0x17,
>>>>      Ada = 0x19,
>>>>  }
>>>>  
>>>> -impl TryFrom<u8> for Architecture {
>>>> +impl TryFrom<Bounded<u32, 6>> for Architecture {
>>>>      type Error = Error;
>>>>  
>>>> -    fn try_from(value: u8) -> Result<Self> {
>>>> -        match value {
>>>> +    fn try_from(value: Bounded<u32, 6>) -> Result<Self> {
>>>> +        match u8::from(value) {
>>>>              0x16 => Ok(Self::Turing),
>>>>              0x17 => Ok(Self::Ampere),
>>>>              0x19 => Ok(Self::Ada),
>>>> @@ -155,23 +151,26 @@ fn try_from(value: u8) -> Result<Self> {
>>>>      }
>>>>  }
>>>>  
>>>> -impl From<Architecture> for u8 {
>>>> +impl From<Architecture> for Bounded<u32, 6> {
>>>>      fn from(value: Architecture) -> Self {
>>>> -        // CAST: `Architecture` is `repr(u8)`, so this cast is always lossless.
>>>> -        value as u8
>>>> +        match value {
>>>> +            Architecture::Turing => Bounded::<u32, 6>::new::<0x16>(),
>>>> +            Architecture::Ampere => Bounded::<u32, 6>::new::<0x17>(),
>>>> +            Architecture::Ada => Bounded::<u32, 6>::new::<0x19>(),
>>>> +        }
>>>>      }
>>>>  }
>>>>  
>>>>  pub(crate) struct Revision {
>>>> -    major: u8,
>>>> -    minor: u8,
>>>> +    major: Bounded<u8, 4>,
>>>> +    minor: Bounded<u8, 4>,
>>>>  }
>>>>  
>>>>  impl From<regs::NV_PMC_BOOT_42> for Revision {
>>>>      fn from(boot0: regs::NV_PMC_BOOT_42) -> Self {
>>>>          Self {
>>>> -            major: boot0.major_revision(),
>>>> -            minor: boot0.minor_revision(),
>>>> +            major: boot0.major_revision().cast(),
>>>> +            minor: boot0.minor_revision().cast(),
>>>>          }
>>>>      }
>>>>  }
>>>> @@ -208,13 +207,13 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
>>>>          //     from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU.
>>>>          //     Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs.
>>>>  
>>>> -        let boot0 = regs::NV_PMC_BOOT_0::read(bar);
>>>> +        let boot0 = bar.read(regs::NV_PMC_BOOT_0);
>>>>  
>>>>          if boot0.is_older_than_fermi() {
>>>>              return Err(ENODEV);
>>>>          }
>>>>  
>>>> -        let boot42 = regs::NV_PMC_BOOT_42::read(bar);
>>>> +        let boot42 = bar.read(regs::NV_PMC_BOOT_42);
>>>>          Spec::try_from(boot42).inspect_err(|_| {
>>>>              dev_err!(dev, "Unsupported chipset: {}\n", boot42);
>>>>          })
>>>> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
>>>> index 53f412f0ca32..62c2065e63ef 100644
>>>> --- a/drivers/gpu/nova-core/regs.rs
>>>> +++ b/drivers/gpu/nova-core/regs.rs
>>>> @@ -35,20 +35,64 @@
>>>>      num::FromSafeCast,
>>>>  };
>>>>  
>>>> +// All nova-core registers are 32-bit and `pub(crate)`. Wrap the `register!` macro to avoid
>>>> +// repeating this information for every register.
>>>> +macro_rules! nv_reg {
>>>> +    (
>>>> +        $(
>>>> +            $(#[$attr:meta])* $name:ident $([ $size:expr $(, stride = $stride:expr)? ])?
>>>> +                $(@ $offset:literal)?
>>>> +                $(@ $base:ident + $base_offset:literal)?
>>>> +                $(=> $alias:ident $(+ $alias_offset:ident)? $([$alias_idx:expr])? )?
>>>> +            $(, $comment:literal)? { $($fields:tt)* }
>>>> +        )*
>>>> +    )=> {
>>>> +        $(
>>>> +        ::kernel::io::register!(
>>>> +            @reg $(#[$attr])* pub(crate) $name(u32) $([$size $(, stride = $stride)?])?
>>>> +                $(@ $offset)?
>>>> +                $(@ $base + $base_offset)?
>>>> +                $(=> $alias $(+ $alias_offset)? $([$alias_idx])? )?
>>>> +            $(, $comment)? { $($fields)* }
>>>> +        );
>>>> +        )*
>>>> +    };
>>>> +}
>>>> +
>>>
>>> Is it really worth introducing this macro to save pub(crate) and (u32)?
>>> Are we definitely going to always be using pub(crate) and u32?
>>
>> So far we are. I'm not particularly passionate about it, but I think
>> it's nice not having to repeat ourselves (and potentially introduce
>> typos).
>
> One downside is that the nested macros make it harder to check the
> implementation since you have to go to this definition, then to the
> register macro definition if you want to check something. And I also
> feel like I need to read this intermediate macro definition to see if
> it's doing anything special if I run into some error.
>
> Personally I would probably go with just using the register macro
> directly, and then we can tighten the visibility later if it's useful
> without having to change all of these. But not a super strong opinion,
> so up to you. Just doesn't feel like it saves us that much in typing the
> extra tens of characters.

IIRC Danilo also had reservations about `nv_reg` when we chatted about
it - I'll remove it from the next revision to see what it looks like.
Re: [PATCH 1/8] gpu: nova-core: convert PMC registers to kernel register macro
Posted by Gary Guo 2 weeks, 5 days ago
On Wed Mar 18, 2026 at 8:05 AM GMT, Alexandre Courbot wrote:
> Convert all PMC registers to use the kernel's register macro and update
> the code accordingly.
>
> nova-core's registers have some constant properties (like a 32-bit size
> and a crate visibility), so introduce the `nv_reg` macro to shorten
> their declaration.
>
> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
> ---
>  drivers/gpu/nova-core/falcon.rs |  7 ++--
>  drivers/gpu/nova-core/gpu.rs    | 37 ++++++++++-----------
>  drivers/gpu/nova-core/regs.rs   | 73 +++++++++++++++++++++++++++++++----------
>  3 files changed, 78 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
> index 7097a206ec3c..4721865f59d9 100644
> --- a/drivers/gpu/nova-core/falcon.rs
> +++ b/drivers/gpu/nova-core/falcon.rs
> @@ -13,7 +13,10 @@
>          DmaAddress,
>          DmaMask, //
>      },
> -    io::poll::read_poll_timeout,
> +    io::{
> +        poll::read_poll_timeout, //
> +        Io,
> +    },
>      prelude::*,
>      sync::aref::ARef,
>      time::Delta,
> @@ -532,7 +535,7 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
>          self.hal.reset_wait_mem_scrubbing(bar)?;
>  
>          regs::NV_PFALCON_FALCON_RM::default()
> -            .set_value(regs::NV_PMC_BOOT_0::read(bar).into())
> +            .set_value(bar.read(regs::NV_PMC_BOOT_0).into())
>              .write(bar, &E::ID);
>  
>          Ok(())
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index 8579d632e717..d81abc7de3d7 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -4,6 +4,8 @@
>      device,
>      devres::Devres,
>      fmt,
> +    io::Io,
> +    num::Bounded,
>      pci,
>      prelude::*,
>      sync::Arc, //
> @@ -129,24 +131,18 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
>  }
>  
>  /// Enum representation of the GPU generation.
> -///
> -/// TODO: remove the `Default` trait implementation, and the `#[default]`
> -/// attribute, once the register!() macro (which creates Architecture items) no
> -/// longer requires it for read-only fields.
> -#[derive(fmt::Debug, Default, Copy, Clone)]
> -#[repr(u8)]
> +#[derive(fmt::Debug, Copy, Clone)]
>  pub(crate) enum Architecture {
> -    #[default]
>      Turing = 0x16,
>      Ampere = 0x17,
>      Ada = 0x19,
>  }
>  
> -impl TryFrom<u8> for Architecture {
> +impl TryFrom<Bounded<u32, 6>> for Architecture {
>      type Error = Error;
>  
> -    fn try_from(value: u8) -> Result<Self> {
> -        match value {
> +    fn try_from(value: Bounded<u32, 6>) -> Result<Self> {
> +        match u8::from(value) {
>              0x16 => Ok(Self::Turing),
>              0x17 => Ok(Self::Ampere),
>              0x19 => Ok(Self::Ada),
> @@ -155,23 +151,26 @@ fn try_from(value: u8) -> Result<Self> {
>      }
>  }
>  
> -impl From<Architecture> for u8 {
> +impl From<Architecture> for Bounded<u32, 6> {
>      fn from(value: Architecture) -> Self {
> -        // CAST: `Architecture` is `repr(u8)`, so this cast is always lossless.
> -        value as u8
> +        match value {
> +            Architecture::Turing => Bounded::<u32, 6>::new::<0x16>(),
> +            Architecture::Ampere => Bounded::<u32, 6>::new::<0x17>(),
> +            Architecture::Ada => Bounded::<u32, 6>::new::<0x19>(),

Yikes.. this looks ugly.

> +        }
>      }
>  }
>  
>  pub(crate) struct Revision {
> -    major: u8,
> -    minor: u8,
> +    major: Bounded<u8, 4>,
> +    minor: Bounded<u8, 4>,
>  }
>  
>  impl From<regs::NV_PMC_BOOT_42> for Revision {
>      fn from(boot0: regs::NV_PMC_BOOT_42) -> Self {
>          Self {
> -            major: boot0.major_revision(),
> -            minor: boot0.minor_revision(),
> +            major: boot0.major_revision().cast(),
> +            minor: boot0.minor_revision().cast(),
>          }
>      }
>  }
> @@ -208,13 +207,13 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
>          //     from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU.
>          //     Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs.
>  
> -        let boot0 = regs::NV_PMC_BOOT_0::read(bar);
> +        let boot0 = bar.read(regs::NV_PMC_BOOT_0);
>  
>          if boot0.is_older_than_fermi() {
>              return Err(ENODEV);
>          }
>  
> -        let boot42 = regs::NV_PMC_BOOT_42::read(bar);
> +        let boot42 = bar.read(regs::NV_PMC_BOOT_42);
>          Spec::try_from(boot42).inspect_err(|_| {
>              dev_err!(dev, "Unsupported chipset: {}\n", boot42);
>          })
> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
> index 53f412f0ca32..62c2065e63ef 100644
> --- a/drivers/gpu/nova-core/regs.rs
> +++ b/drivers/gpu/nova-core/regs.rs
> @@ -35,20 +35,64 @@
>      num::FromSafeCast,
>  };
>  
> +// All nova-core registers are 32-bit and `pub(crate)`. Wrap the `register!` macro to avoid
> +// repeating this information for every register.
> +macro_rules! nv_reg {
> +    (
> +        $(
> +            $(#[$attr:meta])* $name:ident $([ $size:expr $(, stride = $stride:expr)? ])?
> +                $(@ $offset:literal)?
> +                $(@ $base:ident + $base_offset:literal)?
> +                $(=> $alias:ident $(+ $alias_offset:ident)? $([$alias_idx:expr])? )?
> +            $(, $comment:literal)? { $($fields:tt)* }
> +        )*
> +    )=> {
> +        $(
> +        ::kernel::io::register!(
> +            @reg $(#[$attr])* pub(crate) $name(u32) $([$size $(, stride = $stride)?])?
> +                $(@ $offset)?
> +                $(@ $base + $base_offset)?
> +                $(=> $alias $(+ $alias_offset)? $([$alias_idx])? )?
> +            $(, $comment)? { $($fields)* }
> +        );
> +        )*
> +    };
> +}
> +
>  // PMC
>  
> -register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" {
> -    3:0     minor_revision as u8, "Minor revision of the chip";
> -    7:4     major_revision as u8, "Major revision of the chip";
> -    8:8     architecture_1 as u8, "MSB of the architecture";
> -    23:20   implementation as u8, "Implementation version of the architecture";
> -    28:24   architecture_0 as u8, "Lower bits of the architecture";
> -});
> +nv_reg! {
> +    /// Basic revision information about the GPU.
> +    NV_PMC_BOOT_0 @ 0x00000000 {
> +        /// Minor revision of the chip.
> +        3:0     minor_revision;
> +        /// Major revision of the chip.
> +        7:4     major_revision;
> +        /// MSB of the architecture.
> +        8:8     architecture_1;
> +        /// Implementation version of the architecture.
> +        23:20   implementation;
> +        /// Lower bits of the architecture.
> +        28:24   architecture_0;
> +    }
> +
> +    /// Extended architecture information.
> +    NV_PMC_BOOT_42 @ 0x00000a00 {
> +        /// Minor revision of the chip.
> +        15:12   minor_revision;
> +        /// Major revision of the chip.
> +        19:16   major_revision;
> +        /// Implementation version of the architecture.
> +        23:20   implementation;
> +        /// Architecture value.
> +        29:24   architecture ?=> Architecture;
> +    }
> +}
>  
>  impl NV_PMC_BOOT_0 {
>      pub(crate) fn is_older_than_fermi(self) -> bool {
>          // From https://github.com/NVIDIA/open-gpu-doc/tree/master/manuals :
> -        const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u8 = 0xc;
> +        const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u32 = 0xc;
>  
>          // Older chips left arch1 zeroed out. That, combined with an arch0 value that is less than
>          // GF100, means "older than Fermi".
> @@ -56,13 +100,6 @@ pub(crate) fn is_older_than_fermi(self) -> bool {
>      }
>  }
>  
> -register!(NV_PMC_BOOT_42 @ 0x00000a00, "Extended architecture information" {
> -    15:12   minor_revision as u8, "Minor revision of the chip";
> -    19:16   major_revision as u8, "Major revision of the chip";
> -    23:20   implementation as u8, "Implementation version of the architecture";
> -    29:24   architecture as u8 ?=> Architecture, "Architecture value";
> -});
> -
>  impl NV_PMC_BOOT_42 {
>      /// Combines `architecture` and `implementation` to obtain a code unique to the chipset.
>      pub(crate) fn chipset(self) -> Result<Chipset> {
> @@ -76,8 +113,8 @@ pub(crate) fn chipset(self) -> Result<Chipset> {
>  
>      /// Returns the raw architecture value from the register.
>      fn architecture_raw(self) -> u8 {
> -        ((self.0 >> Self::ARCHITECTURE_RANGE.start()) & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1))
> -            as u8
> +        ((self.inner >> Self::ARCHITECTURE_RANGE.start())

This should be using `self.into_raw()` rather than accessing the `inner` field
directly (which should be considered impl detail of the macro).

Best,
Gary

> +            & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1)) as u8
>      }
>  }
>  
> @@ -86,7 +123,7 @@ fn fmt(&self, f: &mut kernel::fmt::Formatter<'_>) -> kernel::fmt::Result {
>          write!(
>              f,
>              "boot42 = 0x{:08x} (architecture 0x{:x}, implementation 0x{:x})",
> -            self.0,
> +            self.inner,
>              self.architecture_raw(),
>              self.implementation()
>          )
Re: [PATCH 1/8] gpu: nova-core: convert PMC registers to kernel register macro
Posted by Alexandre Courbot 2 weeks, 4 days ago
On Wed Mar 18, 2026 at 10:28 PM JST, Gary Guo wrote:
> On Wed Mar 18, 2026 at 8:05 AM GMT, Alexandre Courbot wrote:
>> Convert all PMC registers to use the kernel's register macro and update
>> the code accordingly.
>>
>> nova-core's registers have some constant properties (like a 32-bit size
>> and a crate visibility), so introduce the `nv_reg` macro to shorten
>> their declaration.
>>
>> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
>> ---
>>  drivers/gpu/nova-core/falcon.rs |  7 ++--
>>  drivers/gpu/nova-core/gpu.rs    | 37 ++++++++++-----------
>>  drivers/gpu/nova-core/regs.rs   | 73 +++++++++++++++++++++++++++++++----------
>>  3 files changed, 78 insertions(+), 39 deletions(-)
>>
>> diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs
>> index 7097a206ec3c..4721865f59d9 100644
>> --- a/drivers/gpu/nova-core/falcon.rs
>> +++ b/drivers/gpu/nova-core/falcon.rs
>> @@ -13,7 +13,10 @@
>>          DmaAddress,
>>          DmaMask, //
>>      },
>> -    io::poll::read_poll_timeout,
>> +    io::{
>> +        poll::read_poll_timeout, //
>> +        Io,
>> +    },
>>      prelude::*,
>>      sync::aref::ARef,
>>      time::Delta,
>> @@ -532,7 +535,7 @@ pub(crate) fn reset(&self, bar: &Bar0) -> Result {
>>          self.hal.reset_wait_mem_scrubbing(bar)?;
>>  
>>          regs::NV_PFALCON_FALCON_RM::default()
>> -            .set_value(regs::NV_PMC_BOOT_0::read(bar).into())
>> +            .set_value(bar.read(regs::NV_PMC_BOOT_0).into())
>>              .write(bar, &E::ID);
>>  
>>          Ok(())
>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
>> index 8579d632e717..d81abc7de3d7 100644
>> --- a/drivers/gpu/nova-core/gpu.rs
>> +++ b/drivers/gpu/nova-core/gpu.rs
>> @@ -4,6 +4,8 @@
>>      device,
>>      devres::Devres,
>>      fmt,
>> +    io::Io,
>> +    num::Bounded,
>>      pci,
>>      prelude::*,
>>      sync::Arc, //
>> @@ -129,24 +131,18 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
>>  }
>>  
>>  /// Enum representation of the GPU generation.
>> -///
>> -/// TODO: remove the `Default` trait implementation, and the `#[default]`
>> -/// attribute, once the register!() macro (which creates Architecture items) no
>> -/// longer requires it for read-only fields.
>> -#[derive(fmt::Debug, Default, Copy, Clone)]
>> -#[repr(u8)]
>> +#[derive(fmt::Debug, Copy, Clone)]
>>  pub(crate) enum Architecture {
>> -    #[default]
>>      Turing = 0x16,
>>      Ampere = 0x17,
>>      Ada = 0x19,
>>  }
>>  
>> -impl TryFrom<u8> for Architecture {
>> +impl TryFrom<Bounded<u32, 6>> for Architecture {
>>      type Error = Error;
>>  
>> -    fn try_from(value: u8) -> Result<Self> {
>> -        match value {
>> +    fn try_from(value: Bounded<u32, 6>) -> Result<Self> {
>> +        match u8::from(value) {
>>              0x16 => Ok(Self::Turing),
>>              0x17 => Ok(Self::Ampere),
>>              0x19 => Ok(Self::Ada),
>> @@ -155,23 +151,26 @@ fn try_from(value: u8) -> Result<Self> {
>>      }
>>  }
>>  
>> -impl From<Architecture> for u8 {
>> +impl From<Architecture> for Bounded<u32, 6> {
>>      fn from(value: Architecture) -> Self {
>> -        // CAST: `Architecture` is `repr(u8)`, so this cast is always lossless.
>> -        value as u8
>> +        match value {
>> +            Architecture::Turing => Bounded::<u32, 6>::new::<0x16>(),
>> +            Architecture::Ampere => Bounded::<u32, 6>::new::<0x17>(),
>> +            Architecture::Ada => Bounded::<u32, 6>::new::<0x19>(),
>
> Yikes.. this looks ugly.

Very ugly. This should be replaced by the `TryFrom` and `Into` derive
macros soon enough though (adding Jesung for visibility).

Another temporary solution would be to use `Bounded::from_expr` - in
this case we can turn this into a single statement. But since it is not
strictly a case where we cannot do without it, I preferred to eschew it.

>
>> +        }
>>      }
>>  }
>>  
>>  pub(crate) struct Revision {
>> -    major: u8,
>> -    minor: u8,
>> +    major: Bounded<u8, 4>,
>> +    minor: Bounded<u8, 4>,
>>  }
>>  
>>  impl From<regs::NV_PMC_BOOT_42> for Revision {
>>      fn from(boot0: regs::NV_PMC_BOOT_42) -> Self {
>>          Self {
>> -            major: boot0.major_revision(),
>> -            minor: boot0.minor_revision(),
>> +            major: boot0.major_revision().cast(),
>> +            minor: boot0.minor_revision().cast(),
>>          }
>>      }
>>  }
>> @@ -208,13 +207,13 @@ fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
>>          //     from an earlier (pre-Fermi) era, and then using boot42 to precisely identify the GPU.
>>          //     Somewhere in the Rubin timeframe, boot0 will no longer have space to add new GPU IDs.
>>  
>> -        let boot0 = regs::NV_PMC_BOOT_0::read(bar);
>> +        let boot0 = bar.read(regs::NV_PMC_BOOT_0);
>>  
>>          if boot0.is_older_than_fermi() {
>>              return Err(ENODEV);
>>          }
>>  
>> -        let boot42 = regs::NV_PMC_BOOT_42::read(bar);
>> +        let boot42 = bar.read(regs::NV_PMC_BOOT_42);
>>          Spec::try_from(boot42).inspect_err(|_| {
>>              dev_err!(dev, "Unsupported chipset: {}\n", boot42);
>>          })
>> diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs
>> index 53f412f0ca32..62c2065e63ef 100644
>> --- a/drivers/gpu/nova-core/regs.rs
>> +++ b/drivers/gpu/nova-core/regs.rs
>> @@ -35,20 +35,64 @@
>>      num::FromSafeCast,
>>  };
>>  
>> +// All nova-core registers are 32-bit and `pub(crate)`. Wrap the `register!` macro to avoid
>> +// repeating this information for every register.
>> +macro_rules! nv_reg {
>> +    (
>> +        $(
>> +            $(#[$attr:meta])* $name:ident $([ $size:expr $(, stride = $stride:expr)? ])?
>> +                $(@ $offset:literal)?
>> +                $(@ $base:ident + $base_offset:literal)?
>> +                $(=> $alias:ident $(+ $alias_offset:ident)? $([$alias_idx:expr])? )?
>> +            $(, $comment:literal)? { $($fields:tt)* }
>> +        )*
>> +    )=> {
>> +        $(
>> +        ::kernel::io::register!(
>> +            @reg $(#[$attr])* pub(crate) $name(u32) $([$size $(, stride = $stride)?])?
>> +                $(@ $offset)?
>> +                $(@ $base + $base_offset)?
>> +                $(=> $alias $(+ $alias_offset)? $([$alias_idx])? )?
>> +            $(, $comment)? { $($fields)* }
>> +        );
>> +        )*
>> +    };
>> +}
>> +
>>  // PMC
>>  
>> -register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" {
>> -    3:0     minor_revision as u8, "Minor revision of the chip";
>> -    7:4     major_revision as u8, "Major revision of the chip";
>> -    8:8     architecture_1 as u8, "MSB of the architecture";
>> -    23:20   implementation as u8, "Implementation version of the architecture";
>> -    28:24   architecture_0 as u8, "Lower bits of the architecture";
>> -});
>> +nv_reg! {
>> +    /// Basic revision information about the GPU.
>> +    NV_PMC_BOOT_0 @ 0x00000000 {
>> +        /// Minor revision of the chip.
>> +        3:0     minor_revision;
>> +        /// Major revision of the chip.
>> +        7:4     major_revision;
>> +        /// MSB of the architecture.
>> +        8:8     architecture_1;
>> +        /// Implementation version of the architecture.
>> +        23:20   implementation;
>> +        /// Lower bits of the architecture.
>> +        28:24   architecture_0;
>> +    }
>> +
>> +    /// Extended architecture information.
>> +    NV_PMC_BOOT_42 @ 0x00000a00 {
>> +        /// Minor revision of the chip.
>> +        15:12   minor_revision;
>> +        /// Major revision of the chip.
>> +        19:16   major_revision;
>> +        /// Implementation version of the architecture.
>> +        23:20   implementation;
>> +        /// Architecture value.
>> +        29:24   architecture ?=> Architecture;
>> +    }
>> +}
>>  
>>  impl NV_PMC_BOOT_0 {
>>      pub(crate) fn is_older_than_fermi(self) -> bool {
>>          // From https://github.com/NVIDIA/open-gpu-doc/tree/master/manuals :
>> -        const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u8 = 0xc;
>> +        const NV_PMC_BOOT_0_ARCHITECTURE_GF100: u32 = 0xc;
>>  
>>          // Older chips left arch1 zeroed out. That, combined with an arch0 value that is less than
>>          // GF100, means "older than Fermi".
>> @@ -56,13 +100,6 @@ pub(crate) fn is_older_than_fermi(self) -> bool {
>>      }
>>  }
>>  
>> -register!(NV_PMC_BOOT_42 @ 0x00000a00, "Extended architecture information" {
>> -    15:12   minor_revision as u8, "Minor revision of the chip";
>> -    19:16   major_revision as u8, "Major revision of the chip";
>> -    23:20   implementation as u8, "Implementation version of the architecture";
>> -    29:24   architecture as u8 ?=> Architecture, "Architecture value";
>> -});
>> -
>>  impl NV_PMC_BOOT_42 {
>>      /// Combines `architecture` and `implementation` to obtain a code unique to the chipset.
>>      pub(crate) fn chipset(self) -> Result<Chipset> {
>> @@ -76,8 +113,8 @@ pub(crate) fn chipset(self) -> Result<Chipset> {
>>  
>>      /// Returns the raw architecture value from the register.
>>      fn architecture_raw(self) -> u8 {
>> -        ((self.0 >> Self::ARCHITECTURE_RANGE.start()) & ((1 << Self::ARCHITECTURE_RANGE.len()) - 1))
>> -            as u8
>> +        ((self.inner >> Self::ARCHITECTURE_RANGE.start())
>
> This should be using `self.into_raw()` rather than accessing the `inner` field
> directly (which should be considered impl detail of the macro).

Indeed - done.