While in KVM mode, the driver must be loaded after the hypervisor
initializes.
Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 25 ++++++++++++++++-----
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 10ca07c6dbe9..a04730b5fe41 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -4576,12 +4576,6 @@ static const struct of_device_id arm_smmu_of_match[] = {
};
MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
-static void arm_smmu_driver_unregister(struct platform_driver *drv)
-{
- arm_smmu_sva_notifier_synchronize();
- platform_driver_unregister(drv);
-}
-
static struct platform_driver arm_smmu_driver = {
.driver = {
.name = "arm-smmu-v3",
@@ -4592,8 +4586,27 @@ static struct platform_driver arm_smmu_driver = {
.remove = arm_smmu_device_remove,
.shutdown = arm_smmu_device_shutdown,
};
+
+#ifndef CONFIG_ARM_SMMU_V3_PKVM
+static void arm_smmu_driver_unregister(struct platform_driver *drv)
+{
+ arm_smmu_sva_notifier_synchronize();
+ platform_driver_unregister(drv);
+}
+
module_driver(arm_smmu_driver, platform_driver_register,
arm_smmu_driver_unregister);
+#else
+/*
+ * Must be done after the hypervisor initializes at module_init()
+ * No need for unregister as this is a built in driver.
+ */
+static int arm_smmu_driver_register(void)
+{
+ return platform_driver_register(&arm_smmu_driver);
+}
+device_initcall_sync(arm_smmu_driver_register);
+#endif /* !CONFIG_ARM_SMMU_V3_PKVM */
MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
MODULE_AUTHOR("Will Deacon <will@kernel.org>");
--
2.51.0.rc1.167.g924127e9c0-goog
On Tue, Aug 19, 2025 at 09:51:43PM +0000, Mostafa Saleh wrote:
> While in KVM mode, the driver must be loaded after the hypervisor
> initializes.
>
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 25 ++++++++++++++++-----
> 1 file changed, 19 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 10ca07c6dbe9..a04730b5fe41 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -4576,12 +4576,6 @@ static const struct of_device_id arm_smmu_of_match[] = {
> };
> MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
>
> -static void arm_smmu_driver_unregister(struct platform_driver *drv)
> -{
> - arm_smmu_sva_notifier_synchronize();
> - platform_driver_unregister(drv);
> -}
> -
> static struct platform_driver arm_smmu_driver = {
> .driver = {
> .name = "arm-smmu-v3",
> @@ -4592,8 +4586,27 @@ static struct platform_driver arm_smmu_driver = {
> .remove = arm_smmu_device_remove,
> .shutdown = arm_smmu_device_shutdown,
> };
> +
> +#ifndef CONFIG_ARM_SMMU_V3_PKVM
> +static void arm_smmu_driver_unregister(struct platform_driver *drv)
> +{
> + arm_smmu_sva_notifier_synchronize();
> + platform_driver_unregister(drv);
> +}
> +
> module_driver(arm_smmu_driver, platform_driver_register,
> arm_smmu_driver_unregister);
> +#else
> +/*
> + * Must be done after the hypervisor initializes at module_init()
> + * No need for unregister as this is a built in driver.
> + */
> +static int arm_smmu_driver_register(void)
> +{
> + return platform_driver_register(&arm_smmu_driver);
> +}
> +device_initcall_sync(arm_smmu_driver_register);
> +#endif /* !CONFIG_ARM_SMMU_V3_PKVM */
I think this is a bit grotty as we now have to reason about different
initialisation ordering based on CONFIG_ARM_SMMU_V3_PKVM. Could we
instead return -EPROBE_DEFER if the driver tries to probe before the
hypervisor is up?
Will
On Fri, Sep 12, 2025 at 02:54:11PM +0100, Will Deacon wrote:
> On Tue, Aug 19, 2025 at 09:51:43PM +0000, Mostafa Saleh wrote:
> > While in KVM mode, the driver must be loaded after the hypervisor
> > initializes.
> >
> > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > ---
> > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 25 ++++++++++++++++-----
> > 1 file changed, 19 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index 10ca07c6dbe9..a04730b5fe41 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -4576,12 +4576,6 @@ static const struct of_device_id arm_smmu_of_match[] = {
> > };
> > MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
> >
> > -static void arm_smmu_driver_unregister(struct platform_driver *drv)
> > -{
> > - arm_smmu_sva_notifier_synchronize();
> > - platform_driver_unregister(drv);
> > -}
> > -
> > static struct platform_driver arm_smmu_driver = {
> > .driver = {
> > .name = "arm-smmu-v3",
> > @@ -4592,8 +4586,27 @@ static struct platform_driver arm_smmu_driver = {
> > .remove = arm_smmu_device_remove,
> > .shutdown = arm_smmu_device_shutdown,
> > };
> > +
> > +#ifndef CONFIG_ARM_SMMU_V3_PKVM
> > +static void arm_smmu_driver_unregister(struct platform_driver *drv)
> > +{
> > + arm_smmu_sva_notifier_synchronize();
> > + platform_driver_unregister(drv);
> > +}
> > +
> > module_driver(arm_smmu_driver, platform_driver_register,
> > arm_smmu_driver_unregister);
> > +#else
> > +/*
> > + * Must be done after the hypervisor initializes at module_init()
> > + * No need for unregister as this is a built in driver.
> > + */
> > +static int arm_smmu_driver_register(void)
> > +{
> > + return platform_driver_register(&arm_smmu_driver);
> > +}
> > +device_initcall_sync(arm_smmu_driver_register);
> > +#endif /* !CONFIG_ARM_SMMU_V3_PKVM */
>
> I think this is a bit grotty as we now have to reason about different
> initialisation ordering based on CONFIG_ARM_SMMU_V3_PKVM. Could we
> instead return -EPROBE_DEFER if the driver tries to probe before the
> hypervisor is up?
I looked a bit into this and I think the current approach would be
better because:
1- In case KVM fails to initialise or was disabled from command line,
waiting for the hypervisor means SMMUs may never probe.
One of the things I was cautious to get right is the error path,
so if KVM or if the nested driver fails at any point at initialization,
the SMMUs should still be probed and the systems should still be running
even without KVM.
2- That's not as bad, but it leaks some KVM internals as we need to either
check (is_kvm_arm_initialised()\or kvm_protected_mode_initialized) from
driver code, as opposed to registering the driver late based on a kernel
config for the nested SMMUv3.
If we really want to avoid the current approach, we can keep deferring probe,
until a check for a new flag set from “finalize_pkvm” which is called
unconditionally of KVM state.
Thanks,
Mostafa
>
> Will
On Tue, Sep 23, 2025 at 02:35:48PM +0000, Mostafa Saleh wrote: > If we really want to avoid the current approach, we can keep deferring probe, > until a check for a new flag set from “finalize_pkvm” which is called > unconditionally of KVM state. I still think the pkvm drivers should be bound to some special pkvm device_driver and the driver core should handle all this special dancing: - Wait for pkvm to decide if it will start or not - Claim a device for pkvm and make it visible in some generic way,eg in sysfs - Fall back to using the normal driver once we conclude pkvm won't run. It sounds like a pain to open code all this logic in every pkvm driver? How many do you have? Jason
On Tue, Sep 23, 2025 at 02:38:06PM -0300, Jason Gunthorpe wrote: > On Tue, Sep 23, 2025 at 02:35:48PM +0000, Mostafa Saleh wrote: > > If we really want to avoid the current approach, we can keep deferring probe, > > until a check for a new flag set from “finalize_pkvm” which is called > > unconditionally of KVM state. > > I still think the pkvm drivers should be bound to some special pkvm > device_driver and the driver core should handle all this special > dancing: > - Wait for pkvm to decide if it will start or not > - Claim a device for pkvm and make it visible in some generic way,eg > in sysfs > - Fall back to using the normal driver once we conclude pkvm won't > run. > > It sounds like a pain to open code all this logic in every pkvm > driver? How many do you have? I though more about this, I think involving the driver core will be useful in the future for init, as it will ensure power domains are probed before the SMMUs when RPM is supported. One simple way to do that, is the make the KVM SMMUv3 driver bind to the SMMUs first until KVM finish init, then it unbinds them so the main driver can be bind to them, that will not require any changes or assumptions from the main driver, but in runtime the KVM driver can't interact with the driver model. Another possible solution, to keep a device bound to the KVM driver, is to probe the SMMUs from the KVM driver, then to create child devices; possibly use something as device_set_of_node_from_dev to bind those to the main SMMUv3 or find another way to probe the main SMMUv3 without changes. Then we have a clear parent/child representation in the kernel, we can also use sysfs/debugfs. But this might be more challenging, I will look more into both and will update the logic in v5. Thanks, Mostafa > > Jason
On Mon, Sep 29, 2025 at 11:10:11AM +0000, Mostafa Saleh wrote: > Another possible solution, to keep a device bound to the KVM driver, > is to probe the SMMUs from the KVM driver, then to create child devices; > possibly use something as device_set_of_node_from_dev to bind those to > the main SMMUv3 or find another way to probe the main SMMUv3 without > changes. I do prefer something more like this one, I think it is nice that the kvm specific driver will remain bound and visible so there is some breadcrumbs about what happened to the system for debugging/etc. Not sure how to do it, but I think it should be achievable.. Maybe even a simple faux/aux device and just pick up the of_node from the parent.. Jason
On Thu, Oct 02, 2025 at 12:13:08PM -0300, Jason Gunthorpe wrote:
> On Mon, Sep 29, 2025 at 11:10:11AM +0000, Mostafa Saleh wrote:
> > Another possible solution, to keep a device bound to the KVM driver,
> > is to probe the SMMUs from the KVM driver, then to create child devices;
> > possibly use something as device_set_of_node_from_dev to bind those to
> > the main SMMUv3 or find another way to probe the main SMMUv3 without
> > changes.
>
> I do prefer something more like this one, I think it is nice that the
> kvm specific driver will remain bound and visible so there is some
> breadcrumbs about what happened to the system for debugging/etc.
>
> Not sure how to do it, but I think it should be achievable..
>
> Maybe even a simple faux/aux device and just pick up the of_node from
> the parent..
I spent some time looking into this
With the approach of creating new devices as:
pdev = platform_device_alloc(dev_name(dev), PLATFORM_DEVID_AUTO);
pdev->dev.parent = dev;
device_set_node(&pdev->dev, dev->fwnode);
platform_device_add_resources(pdev, cur_pdev->resource,
cur_pdev->num_resources);
platform_device_add(pdev);
That is done from an init call after KVM init, where the KVM driver
probes the SMMUs, which then does
bus_rescan_devices(&platform_bus_type);
In the KVM driver probe, it had:
if (pdev->dev.parent->driver == &smmuv3_nesting_driver.driver)
return -ENODEV;
Which causes the main SMMU driver to probe the new devices.
However, that didn’t work because, as from Linux perspective the
nested driver was bound to all the SMMUs which means that any
device that is connected to an SMMUv3 has its dependencies met, which
caused those drivers to start probing without IOMMU ops.
Also, the approach with bind/unbind seemed to not work reliably
because of the same reason.
Looking into the probe path, it roughly does.
1) Device / Driver matching driver_match_device
2) Check suppliers before probe (device_links_check_suppliers)
3) Actual probe
I can’t see a way of adding dependencies in #1
For #2, there 2 problems,
i) It’s not clear how to create links, something as fwnode_link_add()
won’t work as one of the devices won’t have fwnode and device_link_add()
will need the device to be already created (and not sure how
to guarantee it won’t probe)
ii) Assuming we were able to create the link, it will be set to
DL_STATE_AVAILABLE once the nested driver probes, which won’t prevent
the main driver from probing till KVM initialises.
It seems device links are not the write tool to use.
So far, the requirements we need to satisfy are:
1- No driver should bind to the SMMUs before KVM initialises.
2- Back the nested driver with devices and possibly link them
The only possible solutions I see:
1- Keep patch as is
2- Check if KVM is initialised from the SMMUv3 driver,
if not -EPROBE_DEFER (as Will suggested), that will guarded by the
KVM driver macro and cmdline to enable protected mode.
Then if needed, we can create devices from the nested driver and link
them to the main ones in same initcall after the devices are created.
I can to look into more suggestions, otherwise, I will try with #2
with the -EPROBE_DEFER.
Thanks,
Mostafa
>
> Jason
On Wed, Nov 05, 2025 at 04:40:26PM +0000, Mostafa Saleh wrote: > However, that didn’t work because, as from Linux perspective the > nested driver was bound to all the SMMUs which means that any > device that is connected to an SMMUv3 has its dependencies met, which > caused those drivers to start probing without IOMMU ops. ?? What code is doing this? If a struct device gets a fwspec attached to it then it should not permit any driver to probe until iommu_init_device() has succeeded. This broadly needs to work to support iommu drivers as modules that are loaded by the initrd. So the general principal of causing devices to not progress should already be there and work, if it doesn't then maybe it needs some fixing. I expect iommu_init_device() to fail on devices up until the actual iommu driver is loaded. iommu_fwspec_ops() should fail because iommu_from_fwnode() will not find fwnode in the iommu_device_list until the iommu subsystem driver is bound, the kvm driver cannot supply this. So where do things go wrong for you? > It seems device links are not the write tool to use. Yes > So far, the requirements we need to satisfy are: > 1- No driver should bind to the SMMUs before KVM initialises. Using the above I'd expect a sequence where the KVM SMMU driver loads first, it does it's bit, then once KVM is happy it creates the actual SMMU driver which registers in iommu_device_list and triggers driver binding. This is basically an identical sequence to loading an iommu driver from the initrd - just the trigger for the delayed load is the kvm creating the device, not udev runnign. > 2- Check if KVM is initialised from the SMMUv3 driver, > if not -EPROBE_DEFER (as Will suggested), that will guarded by the > KVM driver macro and cmdline to enable protected mode. SMMUv3 driver shouldn't even be bound until KVM is ready and it is an actual working driver? Do this by not creating the struct device until it is ready. Also Greg will not like if you use platform devices here, use an aux device.. Jason
On Wed, Nov 05, 2025 at 01:12:08PM -0400, Jason Gunthorpe wrote: > On Wed, Nov 05, 2025 at 04:40:26PM +0000, Mostafa Saleh wrote: > > However, that didn’t work because, as from Linux perspective the > > nested driver was bound to all the SMMUs which means that any > > device that is connected to an SMMUv3 has its dependencies met, which > > caused those drivers to start probing without IOMMU ops. > > ?? > > What code is doing this? > > If a struct device gets a fwspec attached to it then it should not > permit any driver to probe until iommu_init_device() has > succeeded. This broadly needs to work to support iommu drivers as > modules that are loaded by the initrd. > > So the general principal of causing devices to not progress should > already be there and work, if it doesn't then maybe it needs some > fixing. > > I expect iommu_init_device() to fail on devices up until the actual > iommu driver is loaded. iommu_fwspec_ops() should fail because > iommu_from_fwnode() will not find fwnode in the iommu_device_list > until the iommu subsystem driver is bound, the kvm driver cannot > supply this. > > So where do things go wrong for you? Thanks for the explanation, I had a closer look, and indeed I was confused, iommu_init_device() was failing because of .probe_device(). Because of device_set_node(), now both devices have the same fwnode, so bus_find_device_by_fwnode() from arm_smmu_get_by_fwnode() was returning the wrong device. driver_find_device_by_fwnode() seems to work, but that makes me question the reliability of this approach. > > > It seems device links are not the write tool to use. > > Yes > > > So far, the requirements we need to satisfy are: > > 1- No driver should bind to the SMMUs before KVM initialises. > > Using the above I'd expect a sequence where the KVM SMMU driver loads > first, it does it's bit, then once KVM is happy it creates the actual > SMMU driver which registers in iommu_device_list and triggers driver > binding. > > This is basically an identical sequence to loading an iommu driver > from the initrd - just the trigger for the delayed load is the kvm > creating the device, not udev runnign. SMMUv3 driver as a module won't be a problem as modules are loaded later after KVM initialises. The problem is mainly with the SMMUv3 driver built-in, I don't think there is a way to delay loading of the driver, besides this patch, which registers the driver later in case of KVM. > > > 2- Check if KVM is initialised from the SMMUv3 driver, > > if not -EPROBE_DEFER (as Will suggested), that will guarded by the > > KVM driver macro and cmdline to enable protected mode. > > SMMUv3 driver shouldn't even be bound until KVM is ready and it is an > actual working driver? Do this by not creating the struct device until > it is ready. > > Also Greg will not like if you use platform devices here, use an aux > device.. > But I am not sure if it is possible with built-in drivers to delay the binding. Also, I had to use platform devices for this, as the KVM driver binds to the actual SMMUv3 nodes, and then duplicates them so the SMMUv3 driver can bind to the duplicate nodes, where the KVM devices are the parent, but this approach seems complicated, besides the problems mentioned above. The other approach would be to keep defering in case of KVM: @@ -4454,6 +4454,10 @@ static int arm_smmu_device_probe(struct platform_device *pdev) struct arm_smmu_device *smmu; struct device *dev = &pdev->dev; + if (IS_ENABLED(CONFIG_ARM_SMMU_V3_PKVM) && is_protected_kvm_enabled() && + !static_branch_unlikely(&kvm_protected_mode_initialized)) + return -EPROBE_DEFER; That works for me. And if we want to back the KVM driver with device I was thinking we can rely on impl_ops, that has 2 benefits: 1- The SMMUv3 devices can be the parent instead of KVM. 2- The KVM devices can be faux/aux as they are not coming from FW and don't need to be on the platform bus. And this is simpler. Besides this approach and the one in this patch, I don't see a simple way of achieving this without adding extra support in the driver model/platform bus to express such dependency. Thanks, Mostafa > Jason
On Thu, Nov 06, 2025 at 11:06:11AM +0000, Mostafa Saleh wrote:
> Thanks for the explanation, I had a closer look, and indeed I was
> confused, iommu_init_device() was failing because of .probe_device().
> Because of device_set_node(), now both devices have the same fwnode,
> so bus_find_device_by_fwnode() from arm_smmu_get_by_fwnode() was returning
> the wrong device.
>
> driver_find_device_by_fwnode() seems to work, but that makes me question
> the reliability of this approach.
Yeah, this stuff is nasty. See the discussion here.
https://lore.kernel.org/linux-iommu/0d5d4d02-eb78-43dc-8784-83c0760099f7@arm.com/
riscv doesn't search, so maybe ARM should follow it's technique:
static struct iommu_device *riscv_iommu_probe_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct riscv_iommu_device *iommu;
struct riscv_iommu_info *info;
struct riscv_iommu_dc *dc;
u64 tc;
int i;
if (!fwspec || !fwspec->iommu_fwnode->dev || !fwspec->num_ids)
return ERR_PTR(-ENODEV);
iommu = dev_get_drvdata(fwspec->iommu_fwnode->dev);
if (!iommu)
return ERR_PTR(-ENODEV);
It would make it reliable..
> > > 2- Check if KVM is initialised from the SMMUv3 driver,
> > > if not -EPROBE_DEFER (as Will suggested), that will guarded by the
> > > KVM driver macro and cmdline to enable protected mode.
> >
> > SMMUv3 driver shouldn't even be bound until KVM is ready and it is an
> > actual working driver? Do this by not creating the struct device until
> > it is ready.
> >
> > Also Greg will not like if you use platform devices here, use an aux
> > device..
>
> But I am not sure if it is possible with built-in drivers to delay
> the binding.
You should never be delaying binding, you should be delaying creating
the device that will be bound.
pkvm claims the platform device.
pkvm completes its initialization and then creates an aux device
smmu driver binds the aux device and grabs the real platform_device
smmu driver grabs the resources it needs from the parent, including
the of node. No duplication.
Seems straightforward to me.
> Also, I had to use platform devices for this, as the KVM driver binds
> to the actual SMMUv3 nodes, and then duplicates them so the SMMUv3
> driver can bind to the duplicate nodes, where the KVM devices are the
> parent, but this approach seems complicated, besides the problems
> mentioned above.
I don't think you need to do this this, you can use aux device and the
fwspec things all search the iommu_devices_list to find the
iommu_driver. You don't need to duplicate anything.
Create the aux driver when the emulated smmu is ready to go.
> That works for me. And if we want to back the KVM driver with device I was
> thinking we can rely on impl_ops, that has 2 benefits:
> 1- The SMMUv3 devices can be the parent instead of KVM.
> 2- The KVM devices can be faux/aux as they are not coming from FW and
> don't need to be on the platform bus.
IMHO this is backwards. The kvm driver should be probing first, the
smmu driver should come later once kvm is ready to go.
> Besides this approach and the one in this patch, I don't see a simple way
> of achieving this without adding extra support in the driver model/platform
> bus to express such dependency.
You shouldn't need anything like this.
Jason
On Thu, Nov 06, 2025 at 09:23:31AM -0400, Jason Gunthorpe wrote:
> On Thu, Nov 06, 2025 at 11:06:11AM +0000, Mostafa Saleh wrote:
> > Thanks for the explanation, I had a closer look, and indeed I was
> > confused, iommu_init_device() was failing because of .probe_device().
> > Because of device_set_node(), now both devices have the same fwnode,
> > so bus_find_device_by_fwnode() from arm_smmu_get_by_fwnode() was returning
> > the wrong device.
> >
> > driver_find_device_by_fwnode() seems to work, but that makes me question
> > the reliability of this approach.
>
> Yeah, this stuff is nasty. See the discussion here.
>
> https://lore.kernel.org/linux-iommu/0d5d4d02-eb78-43dc-8784-83c0760099f7@arm.com/
>
> riscv doesn't search, so maybe ARM should follow it's technique:
>
> static struct iommu_device *riscv_iommu_probe_device(struct device *dev)
> {
> struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> struct riscv_iommu_device *iommu;
> struct riscv_iommu_info *info;
> struct riscv_iommu_dc *dc;
> u64 tc;
> int i;
>
> if (!fwspec || !fwspec->iommu_fwnode->dev || !fwspec->num_ids)
> return ERR_PTR(-ENODEV);
>
> iommu = dev_get_drvdata(fwspec->iommu_fwnode->dev);
> if (!iommu)
> return ERR_PTR(-ENODEV);
>
> It would make it reliable..
That makes sense, and it will address the problem Robin was solving also:
https://lore.kernel.org/r/6d7ce1dc31873abdb75c895fb8bd2097cce098b4.1733406914.git.robin.murphy@arm.com
>
> > > > 2- Check if KVM is initialised from the SMMUv3 driver,
> > > > if not -EPROBE_DEFER (as Will suggested), that will guarded by the
> > > > KVM driver macro and cmdline to enable protected mode.
> > >
> > > SMMUv3 driver shouldn't even be bound until KVM is ready and it is an
> > > actual working driver? Do this by not creating the struct device until
> > > it is ready.
> > >
> > > Also Greg will not like if you use platform devices here, use an aux
> > > device..
> >
> > But I am not sure if it is possible with built-in drivers to delay
> > the binding.
>
> You should never be delaying binding, you should be delaying creating
> the device that will be bound.
>
> pkvm claims the platform device.
>
> pkvm completes its initialization and then creates an aux device
>
> smmu driver binds the aux device and grabs the real platform_device
>
> smmu driver grabs the resources it needs from the parent, including
> the of node. No duplication.
>
> Seems straightforward to me.
Maybe I am misunderstanding this, but that looks really intrusive to me,
at the moment arm-smmuv-3.c is a platform driver, and rely on the
platform bus to understand the device (platform_get_resource...)
You are suggesting to change that so it can also bind to AUX devices, then
change the “arm_smmu_device_probe” function to understand that and possibly
parse info from the parent device?
One of the main benefits from choosing trap and emulate was that it
looks transparent from the kernel of point view, so doing such radical
changes to adapt to KVM doesn't look right to me, I think the driver
should remain as is (a platform driver that thinks it's directly
talking to the HW).
The only thing we need to do is to make the SMMUs available after
KVM is up (at device_sync initcall).
>
> > Also, I had to use platform devices for this, as the KVM driver binds
> > to the actual SMMUv3 nodes, and then duplicates them so the SMMUv3
> > driver can bind to the duplicate nodes, where the KVM devices are the
> > parent, but this approach seems complicated, besides the problems
> > mentioned above.
>
> I don't think you need to do this this, you can use aux device and the
> fwspec things all search the iommu_devices_list to find the
> iommu_driver. You don't need to duplicate anything.
>
> Create the aux driver when the emulated smmu is ready to go.
See my point above.
>
> > That works for me. And if we want to back the KVM driver with device I was
> > thinking we can rely on impl_ops, that has 2 benefits:
>
> > 1- The SMMUv3 devices can be the parent instead of KVM.
> > 2- The KVM devices can be faux/aux as they are not coming from FW and
> > don't need to be on the platform bus.
>
> IMHO this is backwards. The kvm driver should be probing first, the
> smmu driver should come later once kvm is ready to go.
Agree.
>
> > Besides this approach and the one in this patch, I don't see a simple way
> > of achieving this without adding extra support in the driver model/platform
> > bus to express such dependency.
>
> You shouldn't need anything like this.
Agree.
Thanks,
Mostafa
>
> Jason
On Thu, Nov 06, 2025 at 04:54:38PM +0000, Mostafa Saleh wrote:
> Maybe I am misunderstanding this, but that looks really intrusive to me,
> at the moment arm-smmuv-3.c is a platform driver, and rely on the
> platform bus to understand the device (platform_get_resource...)
>
> You are suggesting to change that so it can also bind to AUX devices, then
> change the “arm_smmu_device_probe” function to understand that and possibly
> parse info from the parent device?
Yes, it is probably only a couple lines I think. You still have a
platform device, it just comes from a different spot.
I didn't it audit it closely, but basically it starts like this:
-static int arm_smmu_device_probe(struct platform_device *pdev)
+/*
+ * dev is the device that the driver is bound to
+ * pdev is the device that has the physical resources describing the smmu
+ */
+static int arm_smmu_device_probe_impl(struct device *dev,
+ struct platform_device *pdev)
{
int irq, ret;
struct resource *res;
resource_size_t ioaddr;
struct arm_smmu_device *smmu;
- struct device *dev = &pdev->dev;
smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
if (!smmu)
Probably needs some adjustments to switch places between pdev/dev, but
the ones I looked at were all OK already..
In the aux case dev is the aux dev, otherwise dev and pdev are the
same thing. devm related stuff has to dev.
> One of the main benefits from choosing trap and emulate was that it
> looks transparent from the kernel of point view, so doing such radical
> changes to adapt to KVM doesn't look right to me, I think the driver
> should remain as is (a platform driver that thinks it's directly
> talking to the HW).
I'm not so fixed on this idea, this kvm stuff makes enough meaningful
changes I don't think we need to sweep it all under the rug completely
fully transparently. If you need a couple of edits to the probe
function that's fine in my book.
Jason
© 2016 - 2026 Red Hat, Inc.