Require that Live Update preserved devices are in singleton iommu_groups
during preservation (outgoing kernel) and retrieval (incoming kernel).
PCI devices preserved across Live Update will be allowed to perform
memory transactions throughout the Live Update. Thus IOMMU groups for
preserved devices must remain fixed. Since all current use cases for
Live Update are for PCI devices in singleton iommu_groups, require that
as a starting point. This avoids the complexity of needing to enforce
arbitrary iommu_group topologies while still allowing all current use
cases.
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: David Matlack <dmatlack@google.com>
---
drivers/pci/liveupdate.c | 34 +++++++++++++++++++++++++++++++++-
1 file changed, 33 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/liveupdate.c b/drivers/pci/liveupdate.c
index bec7b3500057..a3dbe06650ff 100644
--- a/drivers/pci/liveupdate.c
+++ b/drivers/pci/liveupdate.c
@@ -75,6 +75,8 @@
*
* * The device must not be a Physical Function (PF).
*
+ * * The device must be the only device in its IOMMU group.
+ *
* Preservation Behavior
* =====================
*
@@ -105,6 +107,7 @@
#include <linux/bsearch.h>
#include <linux/io.h>
+#include <linux/iommu.h>
#include <linux/kexec_handover.h>
#include <linux/kho/abi/pci.h>
#include <linux/liveupdate.h>
@@ -222,6 +225,31 @@ static void pci_ser_delete(struct pci_ser *ser, struct pci_dev *dev)
ser->nr_devices--;
}
+static int count_devices(struct device *dev, void *__nr_devices)
+{
+ (*(int *)__nr_devices)++;
+ return 0;
+}
+
+static int pci_liveupdate_validate_iommu_group(struct pci_dev *dev)
+{
+ struct iommu_group *group;
+ int nr_devices = 0;
+
+ group = iommu_group_get(&dev->dev);
+ if (group) {
+ iommu_group_for_each_dev(group, &nr_devices, count_devices);
+ iommu_group_put(group);
+ }
+
+ if (nr_devices != 1) {
+ pci_warn(dev, "Live Update preserved devices must be in singleton iommu groups!");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int pci_liveupdate_preserve(struct pci_dev *dev)
{
struct pci_dev_ser new = INIT_PCI_DEV_SER(dev);
@@ -232,6 +260,10 @@ int pci_liveupdate_preserve(struct pci_dev *dev)
if (dev->is_virtfn || dev->is_physfn)
return -EINVAL;
+ ret = pci_liveupdate_validate_iommu_group(dev);
+ if (ret)
+ return ret;
+
guard(mutex)(&pci_flb_outgoing_lock);
if (dev->liveupdate_outgoing)
@@ -357,7 +389,7 @@ int pci_liveupdate_retrieve(struct pci_dev *dev)
if (!dev->liveupdate_incoming)
return -EINVAL;
- return 0;
+ return pci_liveupdate_validate_iommu_group(dev);
}
EXPORT_SYMBOL_GPL(pci_liveupdate_retrieve);
--
2.53.0.983.g0bb29b3bc5-goog
On Mon, Mar 23, 2026 at 11:57:55PM +0000, David Matlack wrote:
> Require that Live Update preserved devices are in singleton iommu_groups
> during preservation (outgoing kernel) and retrieval (incoming kernel).
>
> PCI devices preserved across Live Update will be allowed to perform
> memory transactions throughout the Live Update. Thus IOMMU groups for
> preserved devices must remain fixed. Since all current use cases for
> Live Update are for PCI devices in singleton iommu_groups, require that
> as a starting point. This avoids the complexity of needing to enforce
> arbitrary iommu_group topologies while still allowing all current use
> cases.
>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: David Matlack <dmatlack@google.com>
> ---
> drivers/pci/liveupdate.c | 34 +++++++++++++++++++++++++++++++++-
> 1 file changed, 33 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/pci/liveupdate.c b/drivers/pci/liveupdate.c
> index bec7b3500057..a3dbe06650ff 100644
> --- a/drivers/pci/liveupdate.c
> +++ b/drivers/pci/liveupdate.c
> @@ -75,6 +75,8 @@
> *
> * * The device must not be a Physical Function (PF).
> *
> + * * The device must be the only device in its IOMMU group.
> + *
> * Preservation Behavior
> * =====================
> *
> @@ -105,6 +107,7 @@
>
> #include <linux/bsearch.h>
> #include <linux/io.h>
> +#include <linux/iommu.h>
> #include <linux/kexec_handover.h>
> #include <linux/kho/abi/pci.h>
> #include <linux/liveupdate.h>
> @@ -222,6 +225,31 @@ static void pci_ser_delete(struct pci_ser *ser, struct pci_dev *dev)
> ser->nr_devices--;
> }
>
> +static int count_devices(struct device *dev, void *__nr_devices)
> +{
> + (*(int *)__nr_devices)++;
> + return 0;
> +}
> +
> +static int pci_liveupdate_validate_iommu_group(struct pci_dev *dev)
> +{
> + struct iommu_group *group;
> + int nr_devices = 0;
> +
> + group = iommu_group_get(&dev->dev);
> + if (group) {
> + iommu_group_for_each_dev(group, &nr_devices, count_devices);
> + iommu_group_put(group);
> + }
> +
> + if (nr_devices != 1) {
> + pci_warn(dev, "Live Update preserved devices must be in singleton iommu groups!");
> + return -EINVAL;
> + }
> +
> + return 0;
I assume the requirement is that there *is* an iommu_group and also
that dev is the only member. If so, I think the intent would be a
little clearer as:
group = iommu_group_get(&dev->dev);
if (!group)
goto no_group;
iommu_group_for_each_dev(group, &nr_devices, count_devices);
iommu_group_put(group);
if (nr_devices == 1) {
return 0;
no_group:
pci_warn(...);
return -EINVAL;
> +}
> +
> int pci_liveupdate_preserve(struct pci_dev *dev)
> {
> struct pci_dev_ser new = INIT_PCI_DEV_SER(dev);
> @@ -232,6 +260,10 @@ int pci_liveupdate_preserve(struct pci_dev *dev)
> if (dev->is_virtfn || dev->is_physfn)
> return -EINVAL;
>
> + ret = pci_liveupdate_validate_iommu_group(dev);
> + if (ret)
> + return ret;
> +
> guard(mutex)(&pci_flb_outgoing_lock);
>
> if (dev->liveupdate_outgoing)
> @@ -357,7 +389,7 @@ int pci_liveupdate_retrieve(struct pci_dev *dev)
> if (!dev->liveupdate_incoming)
> return -EINVAL;
>
> - return 0;
> + return pci_liveupdate_validate_iommu_group(dev);
> }
> EXPORT_SYMBOL_GPL(pci_liveupdate_retrieve);
>
> --
> 2.53.0.983.g0bb29b3bc5-goog
>
On 3/24/26 07:57, David Matlack wrote:
> Require that Live Update preserved devices are in singleton iommu_groups
> during preservation (outgoing kernel) and retrieval (incoming kernel).
>
> PCI devices preserved across Live Update will be allowed to perform
> memory transactions throughout the Live Update. Thus IOMMU groups for
> preserved devices must remain fixed. Since all current use cases for
> Live Update are for PCI devices in singleton iommu_groups, require that
> as a starting point. This avoids the complexity of needing to enforce
> arbitrary iommu_group topologies while still allowing all current use
> cases.
>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: David Matlack <dmatlack@google.com>
> ---
> drivers/pci/liveupdate.c | 34 +++++++++++++++++++++++++++++++++-
> 1 file changed, 33 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/pci/liveupdate.c b/drivers/pci/liveupdate.c
> index bec7b3500057..a3dbe06650ff 100644
> --- a/drivers/pci/liveupdate.c
> +++ b/drivers/pci/liveupdate.c
> @@ -75,6 +75,8 @@
> *
> * * The device must not be a Physical Function (PF).
> *
> + * * The device must be the only device in its IOMMU group.
> + *
> * Preservation Behavior
> * =====================
> *
> @@ -105,6 +107,7 @@
>
> #include <linux/bsearch.h>
> #include <linux/io.h>
> +#include <linux/iommu.h>
> #include <linux/kexec_handover.h>
> #include <linux/kho/abi/pci.h>
> #include <linux/liveupdate.h>
> @@ -222,6 +225,31 @@ static void pci_ser_delete(struct pci_ser *ser, struct pci_dev *dev)
> ser->nr_devices--;
> }
>
> +static int count_devices(struct device *dev, void *__nr_devices)
> +{
> + (*(int *)__nr_devices)++;
> + return 0;
> +}
> +
there was a related discussion on the singleton group check. have you
considered the device_group_immutable_singleton() in below link?
https://lore.kernel.org/linux-iommu/20220421052121.3464100-4-baolu.lu@linux.intel.com/
Regards,
Yi Liu
On 2026-03-24 09:07 PM, Yi Liu wrote:
> On 3/24/26 07:57, David Matlack wrote:
> > Require that Live Update preserved devices are in singleton iommu_groups
> > during preservation (outgoing kernel) and retrieval (incoming kernel).
> >
> > PCI devices preserved across Live Update will be allowed to perform
> > memory transactions throughout the Live Update. Thus IOMMU groups for
> > preserved devices must remain fixed. Since all current use cases for
> > Live Update are for PCI devices in singleton iommu_groups, require that
> > as a starting point. This avoids the complexity of needing to enforce
> > arbitrary iommu_group topologies while still allowing all current use
> > cases.
> >
> > Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> > Signed-off-by: David Matlack <dmatlack@google.com>
> > ---
> > drivers/pci/liveupdate.c | 34 +++++++++++++++++++++++++++++++++-
> > 1 file changed, 33 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/pci/liveupdate.c b/drivers/pci/liveupdate.c
> > index bec7b3500057..a3dbe06650ff 100644
> > --- a/drivers/pci/liveupdate.c
> > +++ b/drivers/pci/liveupdate.c
> > @@ -75,6 +75,8 @@
> > *
> > * * The device must not be a Physical Function (PF).
> > *
> > + * * The device must be the only device in its IOMMU group.
> > + *
> > * Preservation Behavior
> > * =====================
> > *
> > @@ -105,6 +107,7 @@
> > #include <linux/bsearch.h>
> > #include <linux/io.h>
> > +#include <linux/iommu.h>
> > #include <linux/kexec_handover.h>
> > #include <linux/kho/abi/pci.h>
> > #include <linux/liveupdate.h>
> > @@ -222,6 +225,31 @@ static void pci_ser_delete(struct pci_ser *ser, struct pci_dev *dev)
> > ser->nr_devices--;
> > }
> > +static int count_devices(struct device *dev, void *__nr_devices)
> > +{
> > + (*(int *)__nr_devices)++;
> > + return 0;
> > +}
> > +
>
> there was a related discussion on the singleton group check. have you
> considered the device_group_immutable_singleton() in below link?
>
> https://lore.kernel.org/linux-iommu/20220421052121.3464100-4-baolu.lu@linux.intel.com/
Thanks for the link.
Based on the discussion in the follow-up threads, I think the only check
in that function that is needed on top of what is in this patch to
ensure group immutability is this one:
/*
* The device could be considered to be fully isolated if
* all devices on the path from the device to the host-PCI
* bridge are protected from peer-to-peer DMA by ACS.
*/
if (!pci_acs_path_enabled(pdev, NULL, REQ_ACS_FLAGS))
return false;
However, this would restrict Live Update support to only device
topologies that have these flags enabled. I am not yet sure if this
would be overly restrictive for the scenarios we care about supporting.
An alternative way to ensure immutability would be to block adding
devices at probe time. i.e. Fail pci_device_group() if the device being
added has liveupdate_incoming=True, or if the group already contains a
device with liveupdate_{incoming,outgoing}=True. We would still need the
check in pci_liveupdate_preserve() to pretect against setting
liveupdate_outgoing=True on a device in a multi-device group.
On 3/25/26 02:00, David Matlack wrote:
> On 2026-03-24 09:07 PM, Yi Liu wrote:
>> On 3/24/26 07:57, David Matlack wrote:
>>> Require that Live Update preserved devices are in singleton iommu_groups
>>> during preservation (outgoing kernel) and retrieval (incoming kernel).
>>>
>>> PCI devices preserved across Live Update will be allowed to perform
>>> memory transactions throughout the Live Update. Thus IOMMU groups for
>>> preserved devices must remain fixed. Since all current use cases for
>>> Live Update are for PCI devices in singleton iommu_groups, require that
>>> as a starting point. This avoids the complexity of needing to enforce
>>> arbitrary iommu_group topologies while still allowing all current use
>>> cases.
>>>
>>> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
>>> Signed-off-by: David Matlack <dmatlack@google.com>
>>> ---
>>> drivers/pci/liveupdate.c | 34 +++++++++++++++++++++++++++++++++-
>>> 1 file changed, 33 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/pci/liveupdate.c b/drivers/pci/liveupdate.c
>>> index bec7b3500057..a3dbe06650ff 100644
>>> --- a/drivers/pci/liveupdate.c
>>> +++ b/drivers/pci/liveupdate.c
>>> @@ -75,6 +75,8 @@
>>> *
>>> * * The device must not be a Physical Function (PF).
>>> *
>>> + * * The device must be the only device in its IOMMU group.
>>> + *
>>> * Preservation Behavior
>>> * =====================
>>> *
>>> @@ -105,6 +107,7 @@
>>> #include <linux/bsearch.h>
>>> #include <linux/io.h>
>>> +#include <linux/iommu.h>
>>> #include <linux/kexec_handover.h>
>>> #include <linux/kho/abi/pci.h>
>>> #include <linux/liveupdate.h>
>>> @@ -222,6 +225,31 @@ static void pci_ser_delete(struct pci_ser *ser, struct pci_dev *dev)
>>> ser->nr_devices--;
>>> }
>>> +static int count_devices(struct device *dev, void *__nr_devices)
>>> +{
>>> + (*(int *)__nr_devices)++;
>>> + return 0;
>>> +}
>>> +
>>
>> there was a related discussion on the singleton group check. have you
>> considered the device_group_immutable_singleton() in below link?
>>
>> https://lore.kernel.org/linux-iommu/20220421052121.3464100-4-baolu.lu@linux.intel.com/
>
> Thanks for the link.
>
> Based on the discussion in the follow-up threads, I think the only check
> in that function that is needed on top of what is in this patch to
> ensure group immutability is this one:
>
> /*
> * The device could be considered to be fully isolated if
> * all devices on the path from the device to the host-PCI
> * bridge are protected from peer-to-peer DMA by ACS.
> */
> if (!pci_acs_path_enabled(pdev, NULL, REQ_ACS_FLAGS))
> return false;
>
> However, this would restrict Live Update support to only device
> topologies that have these flags enabled. I am not yet sure if this
> would be overly restrictive for the scenarios we care about supporting.
yes. It's a bit different from that thread in which not only require
singleton group but also need to be immutable.
> An alternative way to ensure immutability would be to block adding
> devices at probe time. i.e. Fail pci_device_group() if the device being
> added has liveupdate_incoming=True, or if the group already contains a
> device with liveupdate_{incoming,outgoing}=True. We would still need the
> check in pci_liveupdate_preserve() to pretect against setting
> liveupdate_outgoing=True on a device in a multi-device group.
this looks good to me. But you'll disallow hotplug-in during liveupdate.
not sure about if any decision w.r.t. hotplug. is it acceptable?
BTW. A question not specific to this patch. If failure happens after
executing kexec, is there any chance to fallback to the prior kernel?
Regards,
Yi Liu
On 2026-03-25 07:12 PM, Yi Liu wrote:
>
>
> On 3/25/26 02:00, David Matlack wrote:
> > On 2026-03-24 09:07 PM, Yi Liu wrote:
> > > On 3/24/26 07:57, David Matlack wrote:
> > > > Require that Live Update preserved devices are in singleton iommu_groups
> > > > during preservation (outgoing kernel) and retrieval (incoming kernel).
> > > >
> > > > PCI devices preserved across Live Update will be allowed to perform
> > > > memory transactions throughout the Live Update. Thus IOMMU groups for
> > > > preserved devices must remain fixed. Since all current use cases for
> > > > Live Update are for PCI devices in singleton iommu_groups, require that
> > > > as a starting point. This avoids the complexity of needing to enforce
> > > > arbitrary iommu_group topologies while still allowing all current use
> > > > cases.
> > > >
> > > > Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> > > > Signed-off-by: David Matlack <dmatlack@google.com>
> > > > ---
> > > > drivers/pci/liveupdate.c | 34 +++++++++++++++++++++++++++++++++-
> > > > 1 file changed, 33 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/pci/liveupdate.c b/drivers/pci/liveupdate.c
> > > > index bec7b3500057..a3dbe06650ff 100644
> > > > --- a/drivers/pci/liveupdate.c
> > > > +++ b/drivers/pci/liveupdate.c
> > > > @@ -75,6 +75,8 @@
> > > > *
> > > > * * The device must not be a Physical Function (PF).
> > > > *
> > > > + * * The device must be the only device in its IOMMU group.
> > > > + *
> > > > * Preservation Behavior
> > > > * =====================
> > > > *
> > > > @@ -105,6 +107,7 @@
> > > > #include <linux/bsearch.h>
> > > > #include <linux/io.h>
> > > > +#include <linux/iommu.h>
> > > > #include <linux/kexec_handover.h>
> > > > #include <linux/kho/abi/pci.h>
> > > > #include <linux/liveupdate.h>
> > > > @@ -222,6 +225,31 @@ static void pci_ser_delete(struct pci_ser *ser, struct pci_dev *dev)
> > > > ser->nr_devices--;
> > > > }
> > > > +static int count_devices(struct device *dev, void *__nr_devices)
> > > > +{
> > > > + (*(int *)__nr_devices)++;
> > > > + return 0;
> > > > +}
> > > > +
> > >
> > > there was a related discussion on the singleton group check. have you
> > > considered the device_group_immutable_singleton() in below link?
> > >
> > > https://lore.kernel.org/linux-iommu/20220421052121.3464100-4-baolu.lu@linux.intel.com/
> >
> > Thanks for the link.
> >
> > Based on the discussion in the follow-up threads, I think the only check
> > in that function that is needed on top of what is in this patch to
> > ensure group immutability is this one:
> >
> > /*
> > * The device could be considered to be fully isolated if
> > * all devices on the path from the device to the host-PCI
> > * bridge are protected from peer-to-peer DMA by ACS.
> > */
> > if (!pci_acs_path_enabled(pdev, NULL, REQ_ACS_FLAGS))
> > return false;
> >
> > However, this would restrict Live Update support to only device
> > topologies that have these flags enabled. I am not yet sure if this
> > would be overly restrictive for the scenarios we care about supporting.
>
> yes. It's a bit different from that thread in which not only require
> singleton group but also need to be immutable.
>
> > An alternative way to ensure immutability would be to block adding
> > devices at probe time. i.e. Fail pci_device_group() if the device being
> > added has liveupdate_incoming=True, or if the group already contains a
> > device with liveupdate_{incoming,outgoing}=True. We would still need the
> > check in pci_liveupdate_preserve() to pretect against setting
> > liveupdate_outgoing=True on a device in a multi-device group.
>
> this looks good to me. But you'll disallow hotplug-in during liveupdate.
> not sure about if any decision w.r.t. hotplug. is it acceptable?
Anyone doing hotplug during the middle of a Live Update is asking for
trouble IMO. And it would only prevent a hot-plugged device from coming
up if it were to be added to the iommu_group as an existing preserved
device. I think that is reasonable.
> BTW. A question not specific to this patch. If failure happens after
> executing kexec, is there any chance to fallback to the prior kernel?
There are many failure paths during the reboot() syscall that can return
back to userspace, and then userspace can figure out how to bring the
system (e.g. VMs) back online on the current kernel.
But otherwise, kexec is currently a one way door. Once you kexec, into
the new kernel, you would have to do another Live Update to get back
into the previous kernel.
© 2016 - 2026 Red Hat, Inc.