Right now the GSP boot code is very incomplete and limited to running
FRTS, so having it in `Gpu::new` is not a big constraint.
However, this will change as we add more steps of the GSP boot process,
some of which are rather cumbersome to perform on a
partially-constructed GPU instance.
Relatedly, booting the GSP typically happens only once in the GPU reset
cycle. Most of the data created to complete this step (notably firmware
loaded from user-space) is needed only temporary and can be discarded
once the GSP is booted; it then makes all the more sense to store these
as local variables of a dedicated method, instead of inside the `Gpu`
structure where they as kept as long as the GPU is bound, using dozens
of megabytes of host memory.
Thus, introduce a `start_gsp` method on the `Gpu` struct, which is
called by `probe` after the GPU is instantiated and will return the
running GSP instance, once the code completing its boot is integrated.
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
---
drivers/gpu/nova-core/driver.rs | 10 +++++++++-
drivers/gpu/nova-core/gpu.rs | 41 ++++++++++++++++++++++++++++++-----------
2 files changed, 39 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index 274989ea1fb4a5e3e6678a08920ddc76d2809ab2..1062014c0a488e959379f009c2e8029ffaa1e2f8 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -6,6 +6,8 @@
#[pin_data]
pub(crate) struct NovaCore {
+ // Placeholder for the real `Gsp` object once it is built.
+ pub(crate) gsp: (),
#[pin]
pub(crate) gpu: Gpu,
_reg: auxiliary::Registration,
@@ -40,8 +42,14 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self
)?;
let this = KBox::pin_init(
- try_pin_init!(Self {
+ try_pin_init!(&this in Self {
gpu <- Gpu::new(pdev, bar)?,
+ gsp <- {
+ // SAFETY: `this.gpu` is initialized to a valid value.
+ let gpu = unsafe { &(*this.as_ptr()).gpu };
+
+ gpu.start_gsp(pdev)?
+ },
_reg: auxiliary::Registration::new(
pdev.as_ref(),
c_str!("nova-drm"),
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index 4d0f759610ec6eb8a716c039a2e67859410da17c..8343276e52e6a87a8ff1aff9fc484e00d4b57417 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -172,6 +172,8 @@ pub(crate) struct Gpu {
/// System memory page required for flushing all pending GPU-side memory writes done through
/// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation).
sysmem_flush: SysmemFlush,
+ gsp_falcon: Falcon<Gsp>,
+ sec2_falcon: Falcon<Sec2>,
}
#[pinned_drop]
@@ -190,8 +192,8 @@ impl Gpu {
/// TODO: this needs to be moved into a larger type responsible for booting the whole GSP
/// (`GspBooter`?).
fn run_fwsec_frts(
+ &self,
dev: &device::Device<device::Bound>,
- falcon: &Falcon<Gsp>,
bar: &Bar0,
bios: &Vbios,
fb_layout: &FbLayout,
@@ -208,7 +210,7 @@ fn run_fwsec_frts(
let fwsec_frts = FwsecFirmware::new(
dev,
- falcon,
+ &self.gsp_falcon,
bar,
bios,
FwsecCommand::Frts {
@@ -218,7 +220,7 @@ fn run_fwsec_frts(
)?;
// Run FWSEC-FRTS to create the WPR2 region.
- fwsec_frts.run(dev, falcon, bar)?;
+ fwsec_frts.run(dev, &self.gsp_falcon, bar)?;
// SCRATCH_E contains the error code for FWSEC-FRTS.
let frts_status = regs::NV_PBUS_SW_SCRATCH_0E_FRTS_ERR::read(bar).frts_err_code();
@@ -263,6 +265,28 @@ fn run_fwsec_frts(
}
}
+ /// Attempt to start the GSP.
+ ///
+ /// This is a GPU-dependent and complex procedure that involves loading firmware files from
+ /// user-space, patching them with signatures, and building firmware-specific intricate data
+ /// structures that the GSP will use at runtime.
+ ///
+ /// Upon return, the GSP is up and running, and its runtime object given as return value.
+ pub(crate) fn start_gsp(&self, pdev: &pci::Device<device::Bound>) -> Result<()> {
+ let dev = pdev.as_ref();
+
+ let bar = self.bar.access(dev)?;
+
+ let bios = Vbios::new(dev, bar)?;
+
+ let fb_layout = FbLayout::new(self.spec.chipset, bar)?;
+ dev_dbg!(dev, "{:#x?}\n", fb_layout);
+
+ self.run_fwsec_frts(dev, bar, &bios, &fb_layout)?;
+
+ Ok(())
+ }
+
pub(crate) fn new(
pdev: &pci::Device<device::Bound>,
devres_bar: Arc<Devres<Bar0>>,
@@ -293,20 +317,15 @@ pub(crate) fn new(
)?;
gsp_falcon.clear_swgen0_intr(bar);
- let _sec2_falcon = Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?;
-
- let fb_layout = FbLayout::new(spec.chipset, bar)?;
- dev_dbg!(pdev.as_ref(), "{:#x?}\n", fb_layout);
-
- let bios = Vbios::new(pdev.as_ref(), bar)?;
-
- Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?;
+ let sec2_falcon = Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?;
Ok(pin_init!(Self {
spec,
bar: devres_bar,
fw,
sysmem_flush,
+ gsp_falcon,
+ sec2_falcon,
}))
}
}
--
2.51.0
On 9/2/25 4:31 PM, Alexandre Courbot wrote: > pub(crate) fn new( > pdev: &pci::Device<device::Bound>, > devres_bar: Arc<Devres<Bar0>>, The diff is hiding it, but with this patch we should also make sure that this returns impl PinInit<Self, Error> rather than Result<impl PinInit<Self>. I think this should be possible now. > @@ -293,20 +317,15 @@ pub(crate) fn new( > )?; > gsp_falcon.clear_swgen0_intr(bar); > > - let _sec2_falcon = Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?; > - > - let fb_layout = FbLayout::new(spec.chipset, bar)?; > - dev_dbg!(pdev.as_ref(), "{:#x?}\n", fb_layout); > - > - let bios = Vbios::new(pdev.as_ref(), bar)?; > - > - Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?; > + let sec2_falcon = Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?; > > Ok(pin_init!(Self { > spec, > bar: devres_bar, > fw, > sysmem_flush, > + gsp_falcon, > + sec2_falcon, > })) > } > } >
On Wed Sep 3, 2025 at 8:12 AM JST, Danilo Krummrich wrote: > On 9/2/25 4:31 PM, Alexandre Courbot wrote: >> pub(crate) fn new( >> pdev: &pci::Device<device::Bound>, >> devres_bar: Arc<Devres<Bar0>>, > > The diff is hiding it, but with this patch we should also make sure that this > returns impl PinInit<Self, Error> rather than Result<impl PinInit<Self>. > > I think this should be possible now. There is still code that can return errors (falcon creation, etc) - do you mean that we should move it into the pin initializer and turn it into a `try_pin_init`?
On Wed Sep 3, 2025 at 9:10 AM CEST, Alexandre Courbot wrote: > On Wed Sep 3, 2025 at 8:12 AM JST, Danilo Krummrich wrote: >> On 9/2/25 4:31 PM, Alexandre Courbot wrote: >>> pub(crate) fn new( >>> pdev: &pci::Device<device::Bound>, >>> devres_bar: Arc<Devres<Bar0>>, >> >> The diff is hiding it, but with this patch we should also make sure that this >> returns impl PinInit<Self, Error> rather than Result<impl PinInit<Self>. >> >> I think this should be possible now. > > There is still code that can return errors (falcon creation, etc) - do > you mean that we should move it into the pin initializer and turn it > into a `try_pin_init`? Yeah, that would be better practice, if it doesn't work out for a good reason we can also fall back to Result<impl PinInit<Self, Error>, but we should at least try to avoid it.
On Wed Sep 3, 2025 at 5:27 PM JST, Danilo Krummrich wrote: > On Wed Sep 3, 2025 at 9:10 AM CEST, Alexandre Courbot wrote: >> On Wed Sep 3, 2025 at 8:12 AM JST, Danilo Krummrich wrote: >>> On 9/2/25 4:31 PM, Alexandre Courbot wrote: >>>> pub(crate) fn new( >>>> pdev: &pci::Device<device::Bound>, >>>> devres_bar: Arc<Devres<Bar0>>, >>> >>> The diff is hiding it, but with this patch we should also make sure that this >>> returns impl PinInit<Self, Error> rather than Result<impl PinInit<Self>. >>> >>> I think this should be possible now. >> >> There is still code that can return errors (falcon creation, etc) - do >> you mean that we should move it into the pin initializer and turn it >> into a `try_pin_init`? > > Yeah, that would be better practice, if it doesn't work out for a good reason > we can also fall back to Result<impl PinInit<Self, Error>, but we should at > least try to avoid it. I tried but could not do it in a way that is satisfying. The problem is that `Gpu::new` receives a `Arc<Devres<Bar0>>`, which we need to `access` in order to do anything useful with it. If we first store it into the `Gpu` structure, then every subsequent member needs to `access` it in its own code block in order to perform their own initialization. This is quite cumbersome. If there is a way to obtain the `Bar0` once after the `bar` member of `Gpu` is initialized, and then use that instance with each remaining member, then that problem would go away but I am not aware of such a thing.
On Tue Sep 9, 2025 at 4:11 PM CEST, Alexandre Courbot wrote: > On Wed Sep 3, 2025 at 5:27 PM JST, Danilo Krummrich wrote: >> On Wed Sep 3, 2025 at 9:10 AM CEST, Alexandre Courbot wrote: >>> On Wed Sep 3, 2025 at 8:12 AM JST, Danilo Krummrich wrote: >>>> On 9/2/25 4:31 PM, Alexandre Courbot wrote: >>>>> pub(crate) fn new( >>>>> pdev: &pci::Device<device::Bound>, >>>>> devres_bar: Arc<Devres<Bar0>>, >>>> >>>> The diff is hiding it, but with this patch we should also make sure that this >>>> returns impl PinInit<Self, Error> rather than Result<impl PinInit<Self>. >>>> >>>> I think this should be possible now. >>> >>> There is still code that can return errors (falcon creation, etc) - do >>> you mean that we should move it into the pin initializer and turn it >>> into a `try_pin_init`? >> >> Yeah, that would be better practice, if it doesn't work out for a good reason >> we can also fall back to Result<impl PinInit<Self, Error>, but we should at >> least try to avoid it. > > I tried but could not do it in a way that is satisfying. The problem is > that `Gpu::new` receives a `Arc<Devres<Bar0>>`, which we need to > `access` in order to do anything useful with it. If we first store it > into the `Gpu` structure, then every subsequent member needs to `access` > it in its own code block in order to perform their own initialization. > This is quite cumbersome. > > If there is a way to obtain the `Bar0` once after the `bar` member of > `Gpu` is initialized, and then use that instance with each remaining > member, then that problem would go away but I am not aware of such a > thing. What about this? impl Gpu { pub(crate) fn new<'a>( dev: &'a Device<Bound>, bar: &'a Bar0 devres_bar: Arc<Devres<Bar0>>, ) -> impl PinInit<Self, Error> + 'a { try_pin_init(Self { bar: devres_bar, spec: Spec::new(bar)?, gsp_falcon: Falcon::<Gsp>::new(dev, spec.chipset)?, sec2_falcon: Falcon::<Sec2>::new(dev, spec.chipset)?, sysmem_flush: SysmemFlush::register(dev, bar, spec.chipset)? gsp <- Gsp::new(gsp_falcon, sec2_falcon, sysmem_flush)?, }) } }
On Tue Sep 9, 2025 at 11:43 PM JST, Danilo Krummrich wrote: > On Tue Sep 9, 2025 at 4:11 PM CEST, Alexandre Courbot wrote: >> On Wed Sep 3, 2025 at 5:27 PM JST, Danilo Krummrich wrote: >>> On Wed Sep 3, 2025 at 9:10 AM CEST, Alexandre Courbot wrote: >>>> On Wed Sep 3, 2025 at 8:12 AM JST, Danilo Krummrich wrote: >>>>> On 9/2/25 4:31 PM, Alexandre Courbot wrote: >>>>>> pub(crate) fn new( >>>>>> pdev: &pci::Device<device::Bound>, >>>>>> devres_bar: Arc<Devres<Bar0>>, >>>>> >>>>> The diff is hiding it, but with this patch we should also make sure that this >>>>> returns impl PinInit<Self, Error> rather than Result<impl PinInit<Self>. >>>>> >>>>> I think this should be possible now. >>>> >>>> There is still code that can return errors (falcon creation, etc) - do >>>> you mean that we should move it into the pin initializer and turn it >>>> into a `try_pin_init`? >>> >>> Yeah, that would be better practice, if it doesn't work out for a good reason >>> we can also fall back to Result<impl PinInit<Self, Error>, but we should at >>> least try to avoid it. >> >> I tried but could not do it in a way that is satisfying. The problem is >> that `Gpu::new` receives a `Arc<Devres<Bar0>>`, which we need to >> `access` in order to do anything useful with it. If we first store it >> into the `Gpu` structure, then every subsequent member needs to `access` >> it in its own code block in order to perform their own initialization. >> This is quite cumbersome. >> >> If there is a way to obtain the `Bar0` once after the `bar` member of >> `Gpu` is initialized, and then use that instance with each remaining >> member, then that problem would go away but I am not aware of such a >> thing. > > What about this? > > impl Gpu { > pub(crate) fn new<'a>( > dev: &'a Device<Bound>, > bar: &'a Bar0 > devres_bar: Arc<Devres<Bar0>>, > ) -> impl PinInit<Self, Error> + 'a { > try_pin_init(Self { > bar: devres_bar, > spec: Spec::new(bar)?, > gsp_falcon: Falcon::<Gsp>::new(dev, spec.chipset)?, > sec2_falcon: Falcon::<Sec2>::new(dev, spec.chipset)?, > sysmem_flush: SysmemFlush::register(dev, bar, spec.chipset)? > gsp <- Gsp::new(gsp_falcon, sec2_falcon, sysmem_flush)?, > }) > } > } It does work. The bizareness of passing the `bar` twice aside, here is what it looks like when I got it to compile: pub(crate) fn new<'a>( pdev: &'a pci::Device<device::Bound>, devres_bar: Arc<Devres<Bar0>>, bar: &'a Bar0, ) -> impl PinInit<Self, Error> + 'a { try_pin_init!(Self { spec: Spec::new(bar).inspect(|spec| { dev_info!( pdev.as_ref(), "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", spec.chipset, spec.chipset.arch(), spec.revision ); })?, sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, gsp_falcon: Falcon::<Gsp>::new( pdev.as_ref(), spec.chipset, bar, spec.chipset > Chipset::GA100, ) .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, sec2_falcon: Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?, gsp: Self::start_gsp(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)?, bar: devres_bar, }) } The wait for GFW initialization had to be moved to `probe`, but that's fine IMO. I do however find the code less readable in this form, less editable as well. And LSP seems lost, so I don't get any syntax highlighting in the `try_pin_init` block. Fundamentally, this changes the method from a fallible method returning a non-fallible initializer into a non-fallible method returning a fallible initializer. I'm ok with that, and maybe this new form will encourage us to keep this method short, which is what we want, but other than that what benefit are we getting from this change?
On Wed Sep 10, 2025 at 6:48 AM CEST, Alexandre Courbot wrote: > On Tue Sep 9, 2025 at 11:43 PM JST, Danilo Krummrich wrote: >> impl Gpu { >> pub(crate) fn new<'a>( >> dev: &'a Device<Bound>, >> bar: &'a Bar0 >> devres_bar: Arc<Devres<Bar0>>, >> ) -> impl PinInit<Self, Error> + 'a { >> try_pin_init(Self { >> bar: devres_bar, >> spec: Spec::new(bar)?, >> gsp_falcon: Falcon::<Gsp>::new(dev, spec.chipset)?, >> sec2_falcon: Falcon::<Sec2>::new(dev, spec.chipset)?, >> sysmem_flush: SysmemFlush::register(dev, bar, spec.chipset)? >> gsp <- Gsp::new(gsp_falcon, sec2_falcon, sysmem_flush)?, >> }) >> } >> } > > It does work. The bizareness of passing the `bar` twice aside, Yeah, but it really seems like a minor inconvinience. (Especially, since this will be the only occurance of this we'll ever have.) > here is what it looks like when I got it to compile: This looks great! > pub(crate) fn new<'a>( > pdev: &'a pci::Device<device::Bound>, > devres_bar: Arc<Devres<Bar0>>, > bar: &'a Bar0, > ) -> impl PinInit<Self, Error> + 'a { > try_pin_init!(Self { > spec: Spec::new(bar).inspect(|spec| { > dev_info!( > pdev.as_ref(), > "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", > spec.chipset, > spec.chipset.arch(), > spec.revision > ); > })?, + _: { + gfw::wait_gfw_boot_completion(bar) + .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; + }, > > sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, > > gsp_falcon: Falcon::<Gsp>::new( > pdev.as_ref(), > spec.chipset, > bar, > spec.chipset > Chipset::GA100, > ) > .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, > > sec2_falcon: Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?, > - gsp: Self::start_gsp(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)?, + gsp <- Self::start_gsp(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon), > > bar: devres_bar, > }) > } > > The wait for GFW initialization had to be moved to `probe`, but that's > fine IMO. That's not necessary, you can keep it in Gpu::new() -- I don't see what's preventing us from that. I inserted it in the code above. > I do however find the code less readable in this form, less > editable as well. And LSP seems lost, so I don't get any syntax > highlighting in the `try_pin_init` block. Benno is working on a syntax update, so automatic formatting etc. will properly work. Otherwise, I can't see how this is a downgrade. It represents the initialization process in a much clearer way that the current implementation of Gsp::new(), which is rather messy. > Fundamentally, this changes the method from a fallible method returning > a non-fallible initializer into a non-fallible method returning a > fallible initializer. Yeah, that's the best case when working with pin-init. > I'm ok with that, and maybe this new form will > encourage us to keep this method short, which is what we want, but other > than that what benefit are we getting from this change? The immediate benefit is that we don't need an extra allocation for the Gsp structure. The general benefit is that once we need to add more fields to structures that require pinning (such as locks -- and we will need a lot of them) we're prepared for it. If we're not prepared for it, I'm pretty sure that everytime someone needs to add e.g. a new lock for something, it will just result in a new Pin<KBox<T>>, because the initial pin-init hierarchy just isn't there, and creating a new allocation is more convinient than fixing the existing code. This is exactly what pin-init was introduced for in the kernel, such that we're not doing worse than equivalent C code. struct Foo { struct Bar bar; void *data; struct mutex data_lock; } struct Bar { void *data; struct mutex data_lock; } In C you can just kmalloc(sizeof(Foo), GFP_KERNEL) and subsequently initialize all fields. In Rust I don't want to fall back having to allocate for Bar *and* for Foo separately. If there are things to improve with pin-init, let's improve them, but let's not do worse than C code in this aspect.
On Wed Sep 10, 2025 at 5:01 PM JST, Danilo Krummrich wrote: > On Wed Sep 10, 2025 at 6:48 AM CEST, Alexandre Courbot wrote: >> On Tue Sep 9, 2025 at 11:43 PM JST, Danilo Krummrich wrote: >>> impl Gpu { >>> pub(crate) fn new<'a>( >>> dev: &'a Device<Bound>, >>> bar: &'a Bar0 >>> devres_bar: Arc<Devres<Bar0>>, >>> ) -> impl PinInit<Self, Error> + 'a { >>> try_pin_init(Self { >>> bar: devres_bar, >>> spec: Spec::new(bar)?, >>> gsp_falcon: Falcon::<Gsp>::new(dev, spec.chipset)?, >>> sec2_falcon: Falcon::<Sec2>::new(dev, spec.chipset)?, >>> sysmem_flush: SysmemFlush::register(dev, bar, spec.chipset)? >>> gsp <- Gsp::new(gsp_falcon, sec2_falcon, sysmem_flush)?, >>> }) >>> } >>> } >> >> It does work. The bizareness of passing the `bar` twice aside, > > Yeah, but it really seems like a minor inconvinience. (Especially, since this > will be the only occurance of this we'll ever have.) It's definitely not a big deal. > >> here is what it looks like when I got it to compile: > > This looks great! > >> pub(crate) fn new<'a>( >> pdev: &'a pci::Device<device::Bound>, >> devres_bar: Arc<Devres<Bar0>>, >> bar: &'a Bar0, >> ) -> impl PinInit<Self, Error> + 'a { >> try_pin_init!(Self { >> spec: Spec::new(bar).inspect(|spec| { >> dev_info!( >> pdev.as_ref(), >> "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", >> spec.chipset, >> spec.chipset.arch(), >> spec.revision >> ); >> })?, > > + _: { > + gfw::wait_gfw_boot_completion(bar) > + .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; > + }, >> >> sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, >> >> gsp_falcon: Falcon::<Gsp>::new( >> pdev.as_ref(), >> spec.chipset, >> bar, >> spec.chipset > Chipset::GA100, >> ) >> .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, >> >> sec2_falcon: Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?, >> > - gsp: Self::start_gsp(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)?, > + gsp <- Self::start_gsp(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon), >> >> bar: devres_bar, >> }) >> } >> >> The wait for GFW initialization had to be moved to `probe`, but that's >> fine IMO. > > That's not necessary, you can keep it in Gpu::new() -- I don't see what's > preventing us from that. I inserted it in the code above. This one didn't work for me, but maybe you have a newer version of the internal references patch: error: no rules expected reserved identifier `_` --> drivers/gpu/nova-core/gpu.rs:323:13 | 323 | _: { | ^ no rules expected this token in macro call | note: while trying to match `)` > >> I do however find the code less readable in this form, less >> editable as well. And LSP seems lost, so I don't get any syntax >> highlighting in the `try_pin_init` block. > > Benno is working on a syntax update, so automatic formatting etc. will properly > work. > > Otherwise, I can't see how this is a downgrade. It represents the initialization > process in a much clearer way that the current implementation of Gsp::new(), > which is rather messy. > >> Fundamentally, this changes the method from a fallible method returning >> a non-fallible initializer into a non-fallible method returning a >> fallible initializer. > > Yeah, that's the best case when working with pin-init. > >> I'm ok with that, and maybe this new form will >> encourage us to keep this method short, which is what we want, but other >> than that what benefit are we getting from this change? > > The immediate benefit is that we don't need an extra allocation for the Gsp > structure. > > The general benefit is that once we need to add more fields to > structures that require pinning (such as locks -- and we will need a lot of > them) we're prepared for it. > > If we're not prepared for it, I'm pretty sure that everytime someone needs to > add e.g. a new lock for something, it will just result in a new Pin<KBox<T>>, > because the initial pin-init hierarchy just isn't there, and creating a new > allocation is more convinient than fixing the existing code. > > This is exactly what pin-init was introduced for in the kernel, such that we're > not doing worse than equivalent C code. > > struct Foo { > struct Bar bar; > void *data; > struct mutex data_lock; > } > > struct Bar { > void *data; > struct mutex data_lock; > } > > In C you can just kmalloc(sizeof(Foo), GFP_KERNEL) and subsequently initialize > all fields. > > In Rust I don't want to fall back having to allocate for Bar *and* for Foo > separately. > > If there are things to improve with pin-init, let's improve them, but let's not > do worse than C code in this aspect. Maybe I need to get used to this way of initializing, but in any case I can definitely live with it. And it we split initialization into proper functions the code should remain readable. I'll try and slip this into the next revision of this patchset, or as a follow-up if there isn't one.
On Wed Sep 10, 2025 at 1:18 PM CEST, Alexandre Courbot wrote: > On Wed Sep 10, 2025 at 5:01 PM JST, Danilo Krummrich wrote: >> On Wed Sep 10, 2025 at 6:48 AM CEST, Alexandre Courbot wrote: >>> On Tue Sep 9, 2025 at 11:43 PM JST, Danilo Krummrich wrote: >>>> impl Gpu { >>>> pub(crate) fn new<'a>( >>>> dev: &'a Device<Bound>, >>>> bar: &'a Bar0 >>>> devres_bar: Arc<Devres<Bar0>>, >>>> ) -> impl PinInit<Self, Error> + 'a { >>>> try_pin_init(Self { >>>> bar: devres_bar, >>>> spec: Spec::new(bar)?, >>>> gsp_falcon: Falcon::<Gsp>::new(dev, spec.chipset)?, >>>> sec2_falcon: Falcon::<Sec2>::new(dev, spec.chipset)?, >>>> sysmem_flush: SysmemFlush::register(dev, bar, spec.chipset)? >>>> gsp <- Gsp::new(gsp_falcon, sec2_falcon, sysmem_flush)?, >>>> }) >>>> } >>>> } >>> >>> It does work. The bizareness of passing the `bar` twice aside, >> >> Yeah, but it really seems like a minor inconvinience. (Especially, since this >> will be the only occurance of this we'll ever have.) > > It's definitely not a big deal. > >> >>> here is what it looks like when I got it to compile: >> >> This looks great! >> >>> pub(crate) fn new<'a>( >>> pdev: &'a pci::Device<device::Bound>, >>> devres_bar: Arc<Devres<Bar0>>, >>> bar: &'a Bar0, >>> ) -> impl PinInit<Self, Error> + 'a { >>> try_pin_init!(Self { >>> spec: Spec::new(bar).inspect(|spec| { >>> dev_info!( >>> pdev.as_ref(), >>> "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", >>> spec.chipset, >>> spec.chipset.arch(), >>> spec.revision >>> ); >>> })?, >> >> + _: { >> + gfw::wait_gfw_boot_completion(bar) >> + .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; >> + }, >>> >>> sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, >>> >>> gsp_falcon: Falcon::<Gsp>::new( >>> pdev.as_ref(), >>> spec.chipset, >>> bar, >>> spec.chipset > Chipset::GA100, >>> ) >>> .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, >>> >>> sec2_falcon: Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?, >>> >> - gsp: Self::start_gsp(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)?, >> + gsp <- Self::start_gsp(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon), >>> >>> bar: devres_bar, >>> }) >>> } >>> >>> The wait for GFW initialization had to be moved to `probe`, but that's >>> fine IMO. >> >> That's not necessary, you can keep it in Gpu::new() -- I don't see what's >> preventing us from that. I inserted it in the code above. > > This one didn't work for me, but maybe you have a newer version of the > internal references patch: > > error: no rules expected reserved identifier `_` > --> drivers/gpu/nova-core/gpu.rs:323:13 > | > 323 | _: { > | ^ no rules expected this token in macro call > | > note: while trying to match `)` Yeah, you also need this patch [1]. [1] https://lore.kernel.org/all/20250905140534.3328297-1-lossin@kernel.org/
On Wed Sep 10, 2025 at 8:18 PM JST, Alexandre Courbot wrote: >>> here is what it looks like when I got it to compile: >> >> This looks great! >> >>> pub(crate) fn new<'a>( >>> pdev: &'a pci::Device<device::Bound>, >>> devres_bar: Arc<Devres<Bar0>>, >>> bar: &'a Bar0, >>> ) -> impl PinInit<Self, Error> + 'a { >>> try_pin_init!(Self { >>> spec: Spec::new(bar).inspect(|spec| { >>> dev_info!( >>> pdev.as_ref(), >>> "NVIDIA (Chipset: {}, Architecture: {:?}, Revision: {})\n", >>> spec.chipset, >>> spec.chipset.arch(), >>> spec.revision >>> ); >>> })?, >> >> + _: { >> + gfw::wait_gfw_boot_completion(bar) >> + .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; >> + }, >>> >>> sysmem_flush: SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?, >>> >>> gsp_falcon: Falcon::<Gsp>::new( >>> pdev.as_ref(), >>> spec.chipset, >>> bar, >>> spec.chipset > Chipset::GA100, >>> ) >>> .inspect(|falcon| falcon.clear_swgen0_intr(bar))?, >>> >>> sec2_falcon: Falcon::<Sec2>::new(pdev.as_ref(), spec.chipset, bar, true)?, >>> >> - gsp: Self::start_gsp(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon)?, >> + gsp <- Self::start_gsp(pdev, bar, spec.chipset, gsp_falcon, sec2_falcon), >>> >>> bar: devres_bar, >>> }) >>> } >>> >>> The wait for GFW initialization had to be moved to `probe`, but that's >>> fine IMO. >> >> That's not necessary, you can keep it in Gpu::new() -- I don't see what's >> preventing us from that. I inserted it in the code above. > > This one didn't work for me, but maybe you have a newer version of the > internal references patch: > > error: no rules expected reserved identifier `_` > --> drivers/gpu/nova-core/gpu.rs:323:13 > | > 323 | _: { > | ^ no rules expected this token in macro call > | > note: while trying to match `)` Ah, just found out about this, neat! https://lore.kernel.org/rust-for-linux/20250905140534.3328297-1-lossin@kernel.org/
On Tue Sep 2, 2025 at 4:31 PM CEST, Alexandre Courbot wrote: > diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs > index 274989ea1fb4a5e3e6678a08920ddc76d2809ab2..1062014c0a488e959379f009c2e8029ffaa1e2f8 100644 > --- a/drivers/gpu/nova-core/driver.rs > +++ b/drivers/gpu/nova-core/driver.rs > @@ -6,6 +6,8 @@ > > #[pin_data] > pub(crate) struct NovaCore { > + // Placeholder for the real `Gsp` object once it is built. > + pub(crate) gsp: (), > #[pin] > pub(crate) gpu: Gpu, > _reg: auxiliary::Registration, > @@ -40,8 +42,14 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self > )?; > > let this = KBox::pin_init( > - try_pin_init!(Self { > + try_pin_init!(&this in Self { > gpu <- Gpu::new(pdev, bar)?, > + gsp <- { > + // SAFETY: `this.gpu` is initialized to a valid value. > + let gpu = unsafe { &(*this.as_ptr()).gpu }; > + > + gpu.start_gsp(pdev)? > + }, Please use pin_chain() [1] for this. More in general, unsafe code should be the absolute last resort. If we add new unsafe code I'd love to see a comment justifying why there's no other way than using unsafe code for this, as we agreed in [2]. I did a quick grep on this series and I see 21 occurrences of "unsafe", if I substract the ones for annotations and for FromBytes impls, it's still 9 new ones. :( Do we really need all of them? Otherwise, I really like this, it's a great improvement over initializing everything into the Gpu struct -- thanks for the refactoring! [1] https://rust.docs.kernel.org/kernel/prelude/trait.PinInit.html#method.pin_chain [2] https://docs.kernel.org/gpu/nova/guidelines.html#language
On Wed Sep 3, 2025 at 4:53 AM JST, Danilo Krummrich wrote: > On Tue Sep 2, 2025 at 4:31 PM CEST, Alexandre Courbot wrote: >> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs >> index 274989ea1fb4a5e3e6678a08920ddc76d2809ab2..1062014c0a488e959379f009c2e8029ffaa1e2f8 100644 >> --- a/drivers/gpu/nova-core/driver.rs >> +++ b/drivers/gpu/nova-core/driver.rs >> @@ -6,6 +6,8 @@ >> >> #[pin_data] >> pub(crate) struct NovaCore { >> + // Placeholder for the real `Gsp` object once it is built. >> + pub(crate) gsp: (), >> #[pin] >> pub(crate) gpu: Gpu, >> _reg: auxiliary::Registration, >> @@ -40,8 +42,14 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self >> )?; >> >> let this = KBox::pin_init( >> - try_pin_init!(Self { >> + try_pin_init!(&this in Self { >> gpu <- Gpu::new(pdev, bar)?, >> + gsp <- { >> + // SAFETY: `this.gpu` is initialized to a valid value. >> + let gpu = unsafe { &(*this.as_ptr()).gpu }; >> + >> + gpu.start_gsp(pdev)? >> + }, > > Please use pin_chain() [1] for this. Sorry, but I couldn't figure out how I can use pin_chain here (and couldn't find any relevant example in the kernel code either). Can you elaborate a bit? > > More in general, unsafe code should be the absolute last resort. If we add new > unsafe code I'd love to see a comment justifying why there's no other way than > using unsafe code for this, as we agreed in [2]. > > I did a quick grep on this series and I see 21 occurrences of "unsafe", if I > substract the ones for annotations and for FromBytes impls, it's still 9 new > ones. :( > > Do we really need all of them? I've counted 16 uses of `unsafe`. :) - 3 in the bindgen-generated code (these can't be avoided), - 7 to implement `FromBytes`, - 1 to work around the fact that `FromBytes` doesn't work on slices yet (maybe that one can be removed) - 5 as a result of intra-dependencies in PinInit initializers (which we might be able to remove if I figure out how to use `pin_chain`). So best-case scenario would be that we will be down to 10 that are truly unavoidable.
On Wed Sep 3, 2025 at 9:08 AM CEST, Alexandre Courbot wrote: > On Wed Sep 3, 2025 at 4:53 AM JST, Danilo Krummrich wrote: >> On Tue Sep 2, 2025 at 4:31 PM CEST, Alexandre Courbot wrote: >>> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs >>> index 274989ea1fb4a5e3e6678a08920ddc76d2809ab2..1062014c0a488e959379f009c2e8029ffaa1e2f8 100644 >>> --- a/drivers/gpu/nova-core/driver.rs >>> +++ b/drivers/gpu/nova-core/driver.rs >>> @@ -6,6 +6,8 @@ >>> >>> #[pin_data] >>> pub(crate) struct NovaCore { >>> + // Placeholder for the real `Gsp` object once it is built. >>> + pub(crate) gsp: (), >>> #[pin] >>> pub(crate) gpu: Gpu, >>> _reg: auxiliary::Registration, >>> @@ -40,8 +42,14 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self >>> )?; >>> >>> let this = KBox::pin_init( >>> - try_pin_init!(Self { >>> + try_pin_init!(&this in Self { >>> gpu <- Gpu::new(pdev, bar)?, >>> + gsp <- { >>> + // SAFETY: `this.gpu` is initialized to a valid value. >>> + let gpu = unsafe { &(*this.as_ptr()).gpu }; >>> + >>> + gpu.start_gsp(pdev)? >>> + }, >> >> Please use pin_chain() [1] for this. > > Sorry, but I couldn't figure out how I can use pin_chain here (and > couldn't find any relevant example in the kernel code either). Can you > elaborate a bit? I thought of just doing the following, which I think should be equivalent (diff against current nova-next). diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 274989ea1fb4..6d62867f7503 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -41,7 +41,9 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self let this = KBox::pin_init( try_pin_init!(Self { - gpu <- Gpu::new(pdev, bar)?, + gpu <- Gpu::new(pdev, bar)?.pin_chain(|gpu| { + gpu.start_gsp(pdev) + }), _reg: auxiliary::Registration::new( pdev.as_ref(), c_str!("nova-drm"), diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 8caecaf7dfb4..211bc1a5a5b3 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -266,7 +266,7 @@ fn run_fwsec_frts( pub(crate) fn new( pdev: &pci::Device<device::Bound>, devres_bar: Arc<Devres<Bar0>>, - ) -> Result<impl PinInit<Self>> { + ) -> Result<impl PinInit<Self, Error>> { let bar = devres_bar.access(pdev.as_ref())?; let spec = Spec::new(bar)?; let fw = Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?; @@ -302,11 +302,16 @@ pub(crate) fn new( Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?; - Ok(pin_init!(Self { + Ok(try_pin_init!(Self { spec, bar: devres_bar, fw, sysmem_flush, })) } + + pub(crate) fn start_gsp(&self, _pdev: &pci::Device<device::Core>) -> Result { + // noop + Ok(()) + } } But maybe it doesn't capture your intend? >> >> More in general, unsafe code should be the absolute last resort. If we add new >> unsafe code I'd love to see a comment justifying why there's no other way than >> using unsafe code for this, as we agreed in [2]. >> >> I did a quick grep on this series and I see 21 occurrences of "unsafe", if I >> substract the ones for annotations and for FromBytes impls, it's still 9 new >> ones. :( >> >> Do we really need all of them? > > I've counted 16 uses of `unsafe`. :) I did a grep | wc on the mbox file, so it includes the 5 additional occurrences from the annotations. :) Otherwise the 9 "real" ones I counted seem to match the 3 bindgen ones (fine of course) plus the 5 ones from the pin initializers (we should avoid them). > > - 3 in the bindgen-generated code (these can't be avoided), > - 7 to implement `FromBytes`, > - 1 to work around the fact that `FromBytes` doesn't work on slices yet > (maybe that one can be removed) > - 5 as a result of intra-dependencies in PinInit initializers (which we > might be able to remove if I figure out how to use `pin_chain`). > > So best-case scenario would be that we will be down to 10 that are truly > unavoidable.
On Wed Sep 3, 2025 at 5:26 PM JST, Danilo Krummrich wrote: > On Wed Sep 3, 2025 at 9:08 AM CEST, Alexandre Courbot wrote: >> On Wed Sep 3, 2025 at 4:53 AM JST, Danilo Krummrich wrote: >>> On Tue Sep 2, 2025 at 4:31 PM CEST, Alexandre Courbot wrote: >>>> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs >>>> index 274989ea1fb4a5e3e6678a08920ddc76d2809ab2..1062014c0a488e959379f009c2e8029ffaa1e2f8 100644 >>>> --- a/drivers/gpu/nova-core/driver.rs >>>> +++ b/drivers/gpu/nova-core/driver.rs >>>> @@ -6,6 +6,8 @@ >>>> >>>> #[pin_data] >>>> pub(crate) struct NovaCore { >>>> + // Placeholder for the real `Gsp` object once it is built. >>>> + pub(crate) gsp: (), >>>> #[pin] >>>> pub(crate) gpu: Gpu, >>>> _reg: auxiliary::Registration, >>>> @@ -40,8 +42,14 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self >>>> )?; >>>> >>>> let this = KBox::pin_init( >>>> - try_pin_init!(Self { >>>> + try_pin_init!(&this in Self { >>>> gpu <- Gpu::new(pdev, bar)?, >>>> + gsp <- { >>>> + // SAFETY: `this.gpu` is initialized to a valid value. >>>> + let gpu = unsafe { &(*this.as_ptr()).gpu }; >>>> + >>>> + gpu.start_gsp(pdev)? >>>> + }, >>> >>> Please use pin_chain() [1] for this. >> >> Sorry, but I couldn't figure out how I can use pin_chain here (and >> couldn't find any relevant example in the kernel code either). Can you >> elaborate a bit? > > I thought of just doing the following, which I think should be equivalent (diff > against current nova-next). > > diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs > index 274989ea1fb4..6d62867f7503 100644 > --- a/drivers/gpu/nova-core/driver.rs > +++ b/drivers/gpu/nova-core/driver.rs > @@ -41,7 +41,9 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self > > let this = KBox::pin_init( > try_pin_init!(Self { > - gpu <- Gpu::new(pdev, bar)?, > + gpu <- Gpu::new(pdev, bar)?.pin_chain(|gpu| { > + gpu.start_gsp(pdev) > + }), > _reg: auxiliary::Registration::new( > pdev.as_ref(), > c_str!("nova-drm"), > diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs > index 8caecaf7dfb4..211bc1a5a5b3 100644 > --- a/drivers/gpu/nova-core/gpu.rs > +++ b/drivers/gpu/nova-core/gpu.rs > @@ -266,7 +266,7 @@ fn run_fwsec_frts( > pub(crate) fn new( > pdev: &pci::Device<device::Bound>, > devres_bar: Arc<Devres<Bar0>>, > - ) -> Result<impl PinInit<Self>> { > + ) -> Result<impl PinInit<Self, Error>> { > let bar = devres_bar.access(pdev.as_ref())?; > let spec = Spec::new(bar)?; > let fw = Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?; > @@ -302,11 +302,16 @@ pub(crate) fn new( > > Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?; > > - Ok(pin_init!(Self { > + Ok(try_pin_init!(Self { > spec, > bar: devres_bar, > fw, > sysmem_flush, > })) > } > + > + pub(crate) fn start_gsp(&self, _pdev: &pci::Device<device::Core>) -> Result { > + // noop > + Ok(()) > + } > } > > But maybe it doesn't capture your intend? The issue is that `start_gsp` returns a value (currently a placeholder `()`, but it will change into a real type) that needs to be stored into the newly-introduced `gsp` member of `NovaCore`. I could not figure how how `pin_chain` could help with this (and this is the same problem for the other `unsafe` statements in `firmware/gsp.rs`). It is a common pattern when initializing a pinned object, so I agree it would be nice support this without unsafe code.
On Wed Sep 3, 2025 at 12:44 PM CEST, Alexandre Courbot wrote: > On Wed Sep 3, 2025 at 5:26 PM JST, Danilo Krummrich wrote: >> On Wed Sep 3, 2025 at 9:08 AM CEST, Alexandre Courbot wrote: >>> On Wed Sep 3, 2025 at 4:53 AM JST, Danilo Krummrich wrote: >>>> On Tue Sep 2, 2025 at 4:31 PM CEST, Alexandre Courbot wrote: >>>>> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs >>>>> index 274989ea1fb4a5e3e6678a08920ddc76d2809ab2..1062014c0a488e959379f009c2e8029ffaa1e2f8 100644 >>>>> --- a/drivers/gpu/nova-core/driver.rs >>>>> +++ b/drivers/gpu/nova-core/driver.rs >>>>> @@ -6,6 +6,8 @@ >>>>> >>>>> #[pin_data] >>>>> pub(crate) struct NovaCore { >>>>> + // Placeholder for the real `Gsp` object once it is built. >>>>> + pub(crate) gsp: (), >>>>> #[pin] >>>>> pub(crate) gpu: Gpu, >>>>> _reg: auxiliary::Registration, >>>>> @@ -40,8 +42,14 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self >>>>> )?; >>>>> >>>>> let this = KBox::pin_init( >>>>> - try_pin_init!(Self { >>>>> + try_pin_init!(&this in Self { >>>>> gpu <- Gpu::new(pdev, bar)?, >>>>> + gsp <- { >>>>> + // SAFETY: `this.gpu` is initialized to a valid value. >>>>> + let gpu = unsafe { &(*this.as_ptr()).gpu }; >>>>> + >>>>> + gpu.start_gsp(pdev)? >>>>> + }, >>>> >>>> Please use pin_chain() [1] for this. >>> >>> Sorry, but I couldn't figure out how I can use pin_chain here (and >>> couldn't find any relevant example in the kernel code either). Can you >>> elaborate a bit? >> >> I thought of just doing the following, which I think should be equivalent (diff >> against current nova-next). >> >> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs >> index 274989ea1fb4..6d62867f7503 100644 >> --- a/drivers/gpu/nova-core/driver.rs >> +++ b/drivers/gpu/nova-core/driver.rs >> @@ -41,7 +41,9 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self >> >> let this = KBox::pin_init( >> try_pin_init!(Self { >> - gpu <- Gpu::new(pdev, bar)?, >> + gpu <- Gpu::new(pdev, bar)?.pin_chain(|gpu| { >> + gpu.start_gsp(pdev) >> + }), >> _reg: auxiliary::Registration::new( >> pdev.as_ref(), >> c_str!("nova-drm"), >> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs >> index 8caecaf7dfb4..211bc1a5a5b3 100644 >> --- a/drivers/gpu/nova-core/gpu.rs >> +++ b/drivers/gpu/nova-core/gpu.rs >> @@ -266,7 +266,7 @@ fn run_fwsec_frts( >> pub(crate) fn new( >> pdev: &pci::Device<device::Bound>, >> devres_bar: Arc<Devres<Bar0>>, >> - ) -> Result<impl PinInit<Self>> { >> + ) -> Result<impl PinInit<Self, Error>> { >> let bar = devres_bar.access(pdev.as_ref())?; >> let spec = Spec::new(bar)?; >> let fw = Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?; >> @@ -302,11 +302,16 @@ pub(crate) fn new( >> >> Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?; >> >> - Ok(pin_init!(Self { >> + Ok(try_pin_init!(Self { >> spec, >> bar: devres_bar, >> fw, >> sysmem_flush, >> })) >> } >> + >> + pub(crate) fn start_gsp(&self, _pdev: &pci::Device<device::Core>) -> Result { >> + // noop >> + Ok(()) >> + } >> } >> >> But maybe it doesn't capture your intend? > > The issue is that `start_gsp` returns a value (currently a placeholder > `()`, but it will change into a real type) that needs to be stored into > the newly-introduced `gsp` member of `NovaCore`. I could not figure how > how `pin_chain` could help with this (and this is the same problem for > the other `unsafe` statements in `firmware/gsp.rs`). Ok, I see, I think Benno is already working on a solution to access previously initialized fields from subsequent initializers. @Benno: What's the status of this? I haven't seen an issue for that in the pin-init GitHub repo, should we create one? However, in this case I'm a bit confused why we want Gsp next to Gpu? Why not just make Gsp a member of Gpu then? I thought the intent was to keep temporary values local to start_gsp() and not store them next to Gpu in the same allocation? > It is a common pattern when initializing a pinned object, so I agree it > would be nice support this without unsafe code.
On Wed Sep 3, 2025 at 8:05 PM JST, Danilo Krummrich wrote: > On Wed Sep 3, 2025 at 12:44 PM CEST, Alexandre Courbot wrote: >> On Wed Sep 3, 2025 at 5:26 PM JST, Danilo Krummrich wrote: >>> On Wed Sep 3, 2025 at 9:08 AM CEST, Alexandre Courbot wrote: >>>> On Wed Sep 3, 2025 at 4:53 AM JST, Danilo Krummrich wrote: >>>>> On Tue Sep 2, 2025 at 4:31 PM CEST, Alexandre Courbot wrote: >>>>>> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs >>>>>> index 274989ea1fb4a5e3e6678a08920ddc76d2809ab2..1062014c0a488e959379f009c2e8029ffaa1e2f8 100644 >>>>>> --- a/drivers/gpu/nova-core/driver.rs >>>>>> +++ b/drivers/gpu/nova-core/driver.rs >>>>>> @@ -6,6 +6,8 @@ >>>>>> >>>>>> #[pin_data] >>>>>> pub(crate) struct NovaCore { >>>>>> + // Placeholder for the real `Gsp` object once it is built. >>>>>> + pub(crate) gsp: (), >>>>>> #[pin] >>>>>> pub(crate) gpu: Gpu, >>>>>> _reg: auxiliary::Registration, >>>>>> @@ -40,8 +42,14 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self >>>>>> )?; >>>>>> >>>>>> let this = KBox::pin_init( >>>>>> - try_pin_init!(Self { >>>>>> + try_pin_init!(&this in Self { >>>>>> gpu <- Gpu::new(pdev, bar)?, >>>>>> + gsp <- { >>>>>> + // SAFETY: `this.gpu` is initialized to a valid value. >>>>>> + let gpu = unsafe { &(*this.as_ptr()).gpu }; >>>>>> + >>>>>> + gpu.start_gsp(pdev)? >>>>>> + }, >>>>> >>>>> Please use pin_chain() [1] for this. >>>> >>>> Sorry, but I couldn't figure out how I can use pin_chain here (and >>>> couldn't find any relevant example in the kernel code either). Can you >>>> elaborate a bit? >>> >>> I thought of just doing the following, which I think should be equivalent (diff >>> against current nova-next). >>> >>> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs >>> index 274989ea1fb4..6d62867f7503 100644 >>> --- a/drivers/gpu/nova-core/driver.rs >>> +++ b/drivers/gpu/nova-core/driver.rs >>> @@ -41,7 +41,9 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self >>> >>> let this = KBox::pin_init( >>> try_pin_init!(Self { >>> - gpu <- Gpu::new(pdev, bar)?, >>> + gpu <- Gpu::new(pdev, bar)?.pin_chain(|gpu| { >>> + gpu.start_gsp(pdev) >>> + }), >>> _reg: auxiliary::Registration::new( >>> pdev.as_ref(), >>> c_str!("nova-drm"), >>> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs >>> index 8caecaf7dfb4..211bc1a5a5b3 100644 >>> --- a/drivers/gpu/nova-core/gpu.rs >>> +++ b/drivers/gpu/nova-core/gpu.rs >>> @@ -266,7 +266,7 @@ fn run_fwsec_frts( >>> pub(crate) fn new( >>> pdev: &pci::Device<device::Bound>, >>> devres_bar: Arc<Devres<Bar0>>, >>> - ) -> Result<impl PinInit<Self>> { >>> + ) -> Result<impl PinInit<Self, Error>> { >>> let bar = devres_bar.access(pdev.as_ref())?; >>> let spec = Spec::new(bar)?; >>> let fw = Firmware::new(pdev.as_ref(), spec.chipset, FIRMWARE_VERSION)?; >>> @@ -302,11 +302,16 @@ pub(crate) fn new( >>> >>> Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?; >>> >>> - Ok(pin_init!(Self { >>> + Ok(try_pin_init!(Self { >>> spec, >>> bar: devres_bar, >>> fw, >>> sysmem_flush, >>> })) >>> } >>> + >>> + pub(crate) fn start_gsp(&self, _pdev: &pci::Device<device::Core>) -> Result { >>> + // noop >>> + Ok(()) >>> + } >>> } >>> >>> But maybe it doesn't capture your intend? >> >> The issue is that `start_gsp` returns a value (currently a placeholder >> `()`, but it will change into a real type) that needs to be stored into >> the newly-introduced `gsp` member of `NovaCore`. I could not figure how >> how `pin_chain` could help with this (and this is the same problem for >> the other `unsafe` statements in `firmware/gsp.rs`). > > Ok, I see, I think Benno is already working on a solution to access previously > initialized fields from subsequent initializers. > > @Benno: What's the status of this? I haven't seen an issue for that in the > pin-init GitHub repo, should we create one? > > However, in this case I'm a bit confused why we want Gsp next to Gpu? Why not > just make Gsp a member of Gpu then? To be honest I am not completely sure about the best layout yet and will need more visibility to understand whether this is optimal. But considering that we want to run the GSP boot process over a built `Gpu` instance, we cannot store the result of said process inside `Gpu` unless we put it inside e.g. an `Option`. But then the variant will always be `Some` after `probe` returns, and yet we will have to perform a match every time we want to access it. The current separation sounds reasonable to me for the time being, with `Gpu` containing purely hardware resources obtained without help from user-space, while `Gsp` is the result of running a bunch of firmwares. An alternative design would be to store `Gpu` inside `Gsp`, but `Gsp` inside `Gpu` is trickier due to the build order. No matter what we do, switching the layout later should be trivial if we don't choose the best one now. There is also an easy workaround to the sibling initialization issue, which is to store `Gpu` and `Gsp` behind `Pin<KBox>` - that way we can initialize both outside `try_pin_init!`, at the cost of two more heap allocations over the whole lifetime of the device. If we don't have a proper solution to the problem now, this might be better than using `unsafe` as a temporary solution. The same workaround could also be used for to `GspFirmware` and its page tables - since `GspFirmware` is temporary and can apparently be discarded after the GSP is booted, this shouldn't be a big issue. This will allow the driver to probe, and we can add TODO items to fix that later if a solution is in sight. > > I thought the intent was to keep temporary values local to start_gsp() and not > store them next to Gpu in the same allocation? It is not visible in the current patchset, but `start_gsp` will eventually return the runtime data of the GSP - notably its log buffers and command queue, which are needed to operate it. All the rest (notably the loaded firmwares) will be local to `start_gsp` and discarded upon its return.
On Wed Sep 3, 2025 at 2:29 PM CEST, Alexandre Courbot wrote: > To be honest I am not completely sure about the best layout yet and will > need more visibility to understand whether this is optimal. But > considering that we want to run the GSP boot process over a built `Gpu` > instance, we cannot store the result of said process inside `Gpu` unless > we put it inside e.g. an `Option`. But then the variant will always be > `Some` after `probe` returns, and yet we will have to perform a match > every time we want to access it. > > The current separation sounds reasonable to me for the time being, with > `Gpu` containing purely hardware resources obtained without help from > user-space, while `Gsp` is the result of running a bunch of firmwares. > An alternative design would be to store `Gpu` inside `Gsp`, but `Gsp` > inside `Gpu` is trickier due to the build order. No matter what we do, > switching the layout later should be trivial if we don't choose the > best one now. Gsp should be part of the Gpu object. The Gpu object represents the entire instance of the Gpu, including hardware ressources, firmware runtime state, etc. The initialization of the Gsp structure doesn't really need a Gpu structure to be constructed, it needs certain members of the Gpu structure, i.e. order of initialization of the members does matter. If it makes things more obvious we can always create new types and increase the hierarchy within the Gpu struct itself. The technical limitation you're facing is always the same, no matter the layout we choose: we need pin-init to provide us references to already initialized members. I will check with Benno in today's Rust call what's the best way to address this. > There is also an easy workaround to the sibling initialization issue, > which is to store `Gpu` and `Gsp` behind `Pin<KBox>` - that way we can > initialize both outside `try_pin_init!`, at the cost of two more heap > allocations over the whole lifetime of the device. If we don't have a > proper solution to the problem now, this might be better than using > `unsafe` as a temporary solution. Yeah, this workaround is much easier to implement when they're siblings (less allocations temporarily), but let's not design things this way because of that. As mentioned above, I will check with Benno today. > The same workaround could also be used for to `GspFirmware` and its page > tables - since `GspFirmware` is temporary and can apparently be > discarded after the GSP is booted, this shouldn't be a big issue. This > will allow the driver to probe, and we can add TODO items to fix that > later if a solution is in sight. > >> >> I thought the intent was to keep temporary values local to start_gsp() and not >> store them next to Gpu in the same allocation? > > It is not visible in the current patchset, but `start_gsp` will > eventually return the runtime data of the GSP - notably its log buffers > and command queue, which are needed to operate it. All the rest (notably > the loaded firmwares) will be local to `start_gsp` and discarded upon > its return. Ok, that makes sense, but it should really be part of the Gpu structure.
Hi Danilo, On Wed, Sep 03, 2025 at 04:53:57PM +0200, Danilo Krummrich wrote: > On Wed Sep 3, 2025 at 2:29 PM CEST, Alexandre Courbot wrote: > > To be honest I am not completely sure about the best layout yet and will > > need more visibility to understand whether this is optimal. But > > considering that we want to run the GSP boot process over a built `Gpu` > > instance, we cannot store the result of said process inside `Gpu` unless > > we put it inside e.g. an `Option`. But then the variant will always be > > `Some` after `probe` returns, and yet we will have to perform a match > > every time we want to access it. > > > > The current separation sounds reasonable to me for the time being, with > > `Gpu` containing purely hardware resources obtained without help from > > user-space, while `Gsp` is the result of running a bunch of firmwares. > > An alternative design would be to store `Gpu` inside `Gsp`, but `Gsp` > > inside `Gpu` is trickier due to the build order. No matter what we do, > > switching the layout later should be trivial if we don't choose the > > best one now. > > Gsp should be part of the Gpu object. Just checking, if Gsp is a part of Gpu as you mentioned, and start_gsp() is called from within Gpu::new(), does that not avoid the problem of referring to fields of pin-initialized structs entirely, at least for this usecase for nova? Or is there any nova-related usecase where this is problematic? thanks, - Joel [...] > The Gpu object represents the entire > instance of the Gpu, including hardware ressources, firmware runtime state, etc. > > The initialization of the Gsp structure doesn't really need a Gpu structure to > be constructed, it needs certain members of the Gpu structure, i.e. order of > initialization of the members does matter. > > If it makes things more obvious we can always create new types and increase the > hierarchy within the Gpu struct itself. > > The technical limitation you're facing is always the same, no matter the layout > we choose: we need pin-init to provide us references to already initialized > members. > > I will check with Benno in today's Rust call what's the best way to address > this. > > > There is also an easy workaround to the sibling initialization issue, > > which is to store `Gpu` and `Gsp` behind `Pin<KBox>` - that way we can > > initialize both outside `try_pin_init!`, at the cost of two more heap > > allocations over the whole lifetime of the device. If we don't have a > > proper solution to the problem now, this might be better than using > > `unsafe` as a temporary solution. > > Yeah, this workaround is much easier to implement when they're siblings (less > allocations temporarily), but let's not design things this way because of that. > > As mentioned above, I will check with Benno today. > > > The same workaround could also be used for to `GspFirmware` and its page > > tables - since `GspFirmware` is temporary and can apparently be > > discarded after the GSP is booted, this shouldn't be a big issue. This > > will allow the driver to probe, and we can add TODO items to fix that > > later if a solution is in sight. > > > >> > >> I thought the intent was to keep temporary values local to start_gsp() and not > >> store them next to Gpu in the same allocation? > > > > It is not visible in the current patchset, but `start_gsp` will > > eventually return the runtime data of the GSP - notably its log buffers > > and command queue, which are needed to operate it. All the rest (notably > > the loaded firmwares) will be local to `start_gsp` and discarded upon > > its return. > > Ok, that makes sense, but it should really be part of the Gpu structure.
On Wed Sep 3, 2025 at 4:08 PM JST, Alexandre Courbot wrote: > On Wed Sep 3, 2025 at 4:53 AM JST, Danilo Krummrich wrote: >> On Tue Sep 2, 2025 at 4:31 PM CEST, Alexandre Courbot wrote: >>> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs >>> index 274989ea1fb4a5e3e6678a08920ddc76d2809ab2..1062014c0a488e959379f009c2e8029ffaa1e2f8 100644 >>> --- a/drivers/gpu/nova-core/driver.rs >>> +++ b/drivers/gpu/nova-core/driver.rs >>> @@ -6,6 +6,8 @@ >>> >>> #[pin_data] >>> pub(crate) struct NovaCore { >>> + // Placeholder for the real `Gsp` object once it is built. >>> + pub(crate) gsp: (), >>> #[pin] >>> pub(crate) gpu: Gpu, >>> _reg: auxiliary::Registration, >>> @@ -40,8 +42,14 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self >>> )?; >>> >>> let this = KBox::pin_init( >>> - try_pin_init!(Self { >>> + try_pin_init!(&this in Self { >>> gpu <- Gpu::new(pdev, bar)?, >>> + gsp <- { >>> + // SAFETY: `this.gpu` is initialized to a valid value. >>> + let gpu = unsafe { &(*this.as_ptr()).gpu }; >>> + >>> + gpu.start_gsp(pdev)? >>> + }, >> >> Please use pin_chain() [1] for this. > > Sorry, but I couldn't figure out how I can use pin_chain here (and > couldn't find any relevant example in the kernel code either). Can you > elaborate a bit? To be more specific on what I don't get: I see how pin_chain could be used to initialize a structure which dependent member can take a temporary value (like a pointer set to `null`), but in this case `gsp` must be initialized with the result of `start_gsp`, and there is no "default" valid value for it meanwhile (I use `()` as its type, but it is a temporary placeholder). But maybe I am just misunderstanding something about how `pin_chain` can be used.
© 2016 - 2025 Red Hat, Inc.