Save the PCI driver name into "struct pci_dev_ser" during the PCI
prepare callback.
After kexec, use driver_set_override() to ensure the device is
bound only to the saved driver.
Clear the override after the finish callback.
Signed-off-by: Chris Li <chrisl@kernel.org>
---
drivers/pci/liveupdate.c | 36 ++++++++++++++++++++++++++++++++++--
drivers/pci/pci.h | 2 ++
drivers/pci/probe.c | 2 ++
3 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/drivers/pci/liveupdate.c b/drivers/pci/liveupdate.c
index 41606df346f751c78f6c69caa275b4a76be72510..ae8f4dc5cf92577a4da83743c3b80bc72974a43e 100644
--- a/drivers/pci/liveupdate.c
+++ b/drivers/pci/liveupdate.c
@@ -21,6 +21,7 @@ static LIST_HEAD(probe_devices);
struct pci_dev_ser {
u32 path; /* domain + bus + slot + fn */
u32 flags;
+ char driver_name[63];
u64 driver_data; /* driver data */
};
@@ -87,6 +88,10 @@ static int build_liveupdate_devices(struct list_head *head)
static void dev_cleanup_liveupdate(struct device *dev)
{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (liveupdate_state_updated())
+ WARN_ON(driver_set_override(dev, &pdev->driver_override, "", 0));
dev->lu.flags &= ~LU_DEPENDED;
dev->lu.dev_state = NULL;
list_del_init(&dev->lu.lu_next);
@@ -135,7 +140,13 @@ static int pci_get_device_path(struct pci_dev *pdev)
static int pci_save_device_state(struct device *dev, struct pci_dev_ser *s)
{
struct pci_dev *pdev = to_pci_dev(dev);
+ const char *name = dev->driver->name;
+ if (!name)
+ return -ENXIO;
+ if (strlen(name) > sizeof(s->driver_name) - 1)
+ return -ENOSPC;
+ strscpy(s->driver_name, name, sizeof(s->driver_name));
s->path = pci_get_device_path(pdev);
s->flags = dev->lu.flags;
return 0;
@@ -363,8 +374,8 @@ static void pci_dev_do_restore(struct pci_dev *dev, struct pci_dev_ser *s)
{
dev->dev.lu.dev_state = s;
dev->dev.lu.flags = s->flags;
- pci_info(dev, "liveupdate restore flags %x data: [%llx]\n",
- s->flags, s->driver_data);
+ pci_info(dev, "liveupdate restore flags %x driver: %s data: [%llx]\n",
+ s->flags, s->driver_name, s->driver_data);
list_move_tail(&dev->dev.lu.lu_next, &probe_devices);
}
@@ -384,6 +395,27 @@ void pci_liveupdate_restore(struct pci_dev *dev)
return pci_dev_do_restore(dev, s);
}
+void pci_liveupdate_override_driver(struct pci_dev *dev)
+{
+ struct pci_dev_ser *s = dev->dev.lu.dev_state;
+ int ret;
+ int len;
+
+ if (!s)
+ return;
+
+ len = strlen(s->driver_name);
+ if (!len)
+ return;
+
+ ret = driver_set_override(&dev->dev,
+ &dev->driver_override,
+ s->driver_name, len);
+ if (ret)
+ panic("PCI Liveupdate override driver failed: %s", s->driver_name);
+}
+
+
static int __init pci_liveupdate_init(void)
{
int ret;
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index c9a7383753949994e031dc362920286a475fe2ab..b79a18c5e948980fe2ef3f0a10e0d795b1eee6d7 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1161,8 +1161,10 @@ static inline int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int inde
#ifdef CONFIG_LIVEUPDATE
void pci_liveupdate_restore(struct pci_dev *dev);
+void pci_liveupdate_override_driver(struct pci_dev *dev);
#else
static inline void pci_liveupdate_restore(struct pci_dev *dev) {}
+static inline void pci_liveupdate_override_driver(struct pci_dev *dev) {}
#endif
#endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index a0605af1a699cd07b09897172803dcba1d2da9f9..e41a1bef2083aa9184fd1c894d5de964f19d5c01 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2714,6 +2714,8 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
/* Set up MSI IRQ domain */
pci_set_msi_domain(dev);
+ pci_liveupdate_override_driver(dev);
+
/* Notifier could use PCI capabilities */
ret = device_add(&dev->dev);
WARN_ON(ret < 0);
--
2.51.0.384.g4c02a37b29-goog
On Tue, Sep 16, 2025 at 12:45:14AM -0700, Chris Li wrote: > Save the PCI driver name into "struct pci_dev_ser" during the PCI > prepare callback. > > After kexec, use driver_set_override() to ensure the device is > bound only to the saved driver. This doesn't seem like a great idea, driver name should not be made ABI. I would drop this patch and punt to the initrd. We need a more flexible way to manage driver auto binding for CC under initrd control anyhow, the same should be reused for hypervisors to shift driver binding policy to userspace. Jason
On Mon, Sep 29, 2025 at 10:57 AM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > On Tue, Sep 16, 2025 at 12:45:14AM -0700, Chris Li wrote: > > Save the PCI driver name into "struct pci_dev_ser" during the PCI > > prepare callback. > > > > After kexec, use driver_set_override() to ensure the device is > > bound only to the saved driver. > > This doesn't seem like a great idea, driver name should not be made > ABI. Let's break it down with baby steps. 1) Do you agree the liveupdated PCI device needs to bind to the exact same driver after kexec? To me that is a firm yes. If the driver binds to another driver, we can't expect the other driver will understand the original driver's saved state. 2) Assume the 1) is yes from you. Are you just not happy that the kernel saves the driver name? You want user space to save it, is that it? How does it reference the driver after kexec otherwise? If the driver has a UUID, I am happy to use that driver UUID. But it doesn't. Using the driver name can match to the kernel PCI driver_override framework. If we are not using driver_override API, we need some other API to prevent it from binding to other drivers. Do you just want the kernel not to save it and the user space(initrd) to save the driver name? Some one needs to bind that driver_override when the PCI device is enumerated. Specify in the initrd before the PCI enumerate would be too early. It hasn't found the PCI saved device state. After the PCI enumeration would be too late. > I would drop this patch and punt to the initrd. We need a more > flexible way to manage driver auto binding for CC under initrd control > anyhow, the same should be reused for hypervisors to shift driver > binding policy to userspace. What is CC stand for? Once in the liveupdate, the livedupdated device and the binding driver is fixed. It seems (to me) more complicated to let the initrd fetch the livedupate saved state and then do stuff with it. The initrd is not part of the kernel, more like user space programing. It is not able to get the LUO API to get the list of preserved PCI devices etc. We can add an API route to the user space accessing preserve data in the kernel. But that seems to be extra complexity stuff. Once it is in the liveupdate, there is no flexible driver binding policy for the device currently liveupdate, the device needs to bind to its original driver. I feel that I am missing something, please help me understand. Chris
On Mon, Sep 29, 2025 at 10:10 PM Chris Li <chrisl@kernel.org> wrote: > > On Mon, Sep 29, 2025 at 10:57 AM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > > > On Tue, Sep 16, 2025 at 12:45:14AM -0700, Chris Li wrote: > > > Save the PCI driver name into "struct pci_dev_ser" during the PCI > > > prepare callback. > > > > > > After kexec, use driver_set_override() to ensure the device is > > > bound only to the saved driver. > > > > This doesn't seem like a great idea, driver name should not be made > > ABI. > > Let's break it down with baby steps. > > 1) Do you agree the liveupdated PCI device needs to bind to the exact > same driver after kexec? > To me that is a firm yes. If the driver binds to another driver, we > can't expect the other driver will understand the original driver's > saved state. Hi Chris, Driver name does not have to be an ABI. Drivers that support live updates should provide a live update-specific ABI to detect compatibility with the preserved data. We can use a preservation schema GUID for this. > 2) Assume the 1) is yes from you. Are you just not happy that the > kernel saves the driver name? You want user space to save it, is that > it? > How does it reference the driver after kexec otherwise? If we use GUID, drivers would advertise the GUIDs they support and we would modify the core device-driver matching process to use this information. Each driver that supports this mechanism would need to declare an array of GUIDs it is compatible with. This would be a new field in its struct pci_driver. static const guid_t my_driver_guids[] = { GUID_INIT(0x123e4567, ...), // Schema V1 GUID_INIT(0x987a6543, ...), // Schema V2 {}, }; static struct pci_driver my_pci_driver = { .name = "my_driver", .id_table = my_pci_ids, .probe = my_probe, .live_update_guids = my_driver_guids, }; The kernel's PCI core would perform an extra check before falling back to the standard PCI ID matching. 1. When a PCI device is discovered, the core first asks the Live Update framework: "Is there a preserved GUID for this device?" 2. If a GUID is found, the core will only attempt to bind drivers that both match the device's PCI ID and have that specific GUID in their live_update_guids list. 3. If no GUID is preserved for the device, the core proceeds with the normal matching logic 4. If no driver matches the GUID, the device is left unbound. The state gets removed during finish(), and the device is reset. Pasha
On Tue, Sep 30, 2025 at 09:02:44AM -0400, Pasha Tatashin wrote: > The kernel's PCI core would perform an extra check before falling back > to the standard PCI ID matching. This still seems very complex just to solve the VFIO case. As I said, I would punt all of this to the initrd and let the initrd explicitly bind drivers. The only behavior we need from the kernel is to not autobind some drivers so userspace can control it, and in a LUO type environment userspace should well know what drivers go where - or can get it from a preceeding kernel from a memfd. This is broadly the same thing we need for Confidential Compute anyhow. Jason
On Tue, Sep 30, 2025 at 09:02:44AM -0400, Pasha Tatashin wrote: > On Mon, Sep 29, 2025 at 10:10 PM Chris Li <chrisl@kernel.org> wrote: > > > > On Mon, Sep 29, 2025 at 10:57 AM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > > > > > On Tue, Sep 16, 2025 at 12:45:14AM -0700, Chris Li wrote: > > > > Save the PCI driver name into "struct pci_dev_ser" during the PCI > > > > prepare callback. > > > > > > > > After kexec, use driver_set_override() to ensure the device is > > > > bound only to the saved driver. > > > > > > This doesn't seem like a great idea, driver name should not be made > > > ABI. > > > > Let's break it down with baby steps. > > > > 1) Do you agree the liveupdated PCI device needs to bind to the exact > > same driver after kexec? > > To me that is a firm yes. If the driver binds to another driver, we > > can't expect the other driver will understand the original driver's > > saved state. > > Hi Chris, > > Driver name does not have to be an ABI. A driver name can NEVER be an abi, please don't do that. > Drivers that support live > updates should provide a live update-specific ABI to detect > compatibility with the preserved data. We can use a preservation > schema GUID for this. > > > 2) Assume the 1) is yes from you. Are you just not happy that the > > kernel saves the driver name? You want user space to save it, is that > > it? > > How does it reference the driver after kexec otherwise? > > If we use GUID, drivers would advertise the GUIDs they support and we > would modify the core device-driver matching process to use this > information. > > Each driver that supports this mechanism would need to declare an > array of GUIDs it is compatible with. This would be a new field in its > struct pci_driver. > > static const guid_t my_driver_guids[] = { > GUID_INIT(0x123e4567, ...), // Schema V1 > GUID_INIT(0x987a6543, ...), // Schema V2 > {}, > }; That's crazy, who is going to be adding all of that to all drivers? And knowing to bump this if the internal data representaion changes? And it will change underneath it without the driver even knowing? This feels really really wrong, unless I'm missing something. > static struct pci_driver my_pci_driver = { > .name = "my_driver", > .id_table = my_pci_ids, > .probe = my_probe, > .live_update_guids = my_driver_guids, > }; > > The kernel's PCI core would perform an extra check before falling back > to the standard PCI ID matching. > 1. When a PCI device is discovered, the core first asks the Live > Update framework: "Is there a preserved GUID for this device?" > 2. If a GUID is found, the core will only attempt to bind drivers that > both match the device's PCI ID and have that specific GUID in their > live_update_guids list. What "core" is doing this? And how exactly? And why is PCI somehow special here? > 3. If no GUID is preserved for the device, the core proceeds with the > normal matching logic > 4. If no driver matches the GUID, the device is left unbound. The > state gets removed during finish(), and the device is reset. How do you reset a device you are not bound to? That feels ripe for causing problems (think multi-function devices...) And what about PCI drivers that are really just a aux-bus "root" point? How is the sharing of all of the child devices going to work? This feels really rough and might possibly work if you squint hard enough and test it in a very limited way with almost no real hardware :) good luck! greg k-h
On Tue, Sep 30, 2025 at 6:41 AM Greg Kroah-Hartman <gregkh@linuxfoundation.org> wrote: > > On Tue, Sep 30, 2025 at 09:02:44AM -0400, Pasha Tatashin wrote: > > On Mon, Sep 29, 2025 at 10:10 PM Chris Li <chrisl@kernel.org> wrote: > > > > > > On Mon, Sep 29, 2025 at 10:57 AM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > > > > > > > On Tue, Sep 16, 2025 at 12:45:14AM -0700, Chris Li wrote: > > > > > Save the PCI driver name into "struct pci_dev_ser" during the PCI > > > > > prepare callback. > > > > > > > > > > After kexec, use driver_set_override() to ensure the device is > > > > > bound only to the saved driver. > > > > > > > > This doesn't seem like a great idea, driver name should not be made > > > > ABI. > > > > > > Let's break it down with baby steps. > > > > > > 1) Do you agree the liveupdated PCI device needs to bind to the exact > > > same driver after kexec? > > > To me that is a firm yes. If the driver binds to another driver, we > > > can't expect the other driver will understand the original driver's > > > saved state. > > > > Hi Chris, > > > > Driver name does not have to be an ABI. > > A driver name can NEVER be an abi, please don't do that. Can you please clarify that. for example, the pci has this sysfs control api: "/sys/bus/pci/devices/0000:04:00.0/driver_override" which takes the *driver name* as data to override what driver is allowed to bind to this device. Does this driver_override consider it as using the driver name as part of the abi? If not, why? What live update wants is to make that driver_override persistent over kexec. It does not introduce the "driver_override" API. That is pre-existing conditions. The PCI liveupdate just wants to use it. I want to get some basic understanding before adventure into the more complex solutions. > > Drivers that support live > > updates should provide a live update-specific ABI to detect > > compatibility with the preserved data. We can use a preservation > > schema GUID for this. > > > > > 2) Assume the 1) is yes from you. Are you just not happy that the > > > kernel saves the driver name? You want user space to save it, is that > > > it? > > > How does it reference the driver after kexec otherwise? > > > > If we use GUID, drivers would advertise the GUIDs they support and we > > would modify the core device-driver matching process to use this > > information. > > > > Each driver that supports this mechanism would need to declare an > > array of GUIDs it is compatible with. This would be a new field in its > > struct pci_driver. > > > > static const guid_t my_driver_guids[] = { > > GUID_INIT(0x123e4567, ...), // Schema V1 > > GUID_INIT(0x987a6543, ...), // Schema V2 > > {}, > > }; > > That's crazy, who is going to be adding all of that to all drivers? And > knowing to bump this if the internal data representaion changes? And it > will change underneath it without the driver even knowing? This feels > really really wrong, unless I'm missing something. The GUID is more complex than a driver name. I am fine with not using GUID if you are so strongly opposed to it. You are saying don't do A(driver name) and B(GUID). I am waiting for the part where you say "please do C instead". Do you have any other suggestion how to prevent the live update PCI device bind to a different driver after kexec? I am happy to work on the direction you point out and turn that into a patch for the discussion purpose. Thanks Chris > > static struct pci_driver my_pci_driver = { > > .name = "my_driver", > > .id_table = my_pci_ids, > > .probe = my_probe, > > .live_update_guids = my_driver_guids, > > }; > > > > The kernel's PCI core would perform an extra check before falling back > > to the standard PCI ID matching. > > 1. When a PCI device is discovered, the core first asks the Live > > Update framework: "Is there a preserved GUID for this device?" > > 2. If a GUID is found, the core will only attempt to bind drivers that > > both match the device's PCI ID and have that specific GUID in their > > live_update_guids list. > > What "core" is doing this? And how exactly? > > And why is PCI somehow special here? > > > 3. If no GUID is preserved for the device, the core proceeds with the > > normal matching logic > > 4. If no driver matches the GUID, the device is left unbound. The > > state gets removed during finish(), and the device is reset. > > How do you reset a device you are not bound to? That feels ripe for > causing problems (think multi-function devices...) > > And what about PCI drivers that are really just a aux-bus "root" point? > How is the sharing of all of the child devices going to work? > > This feels really rough and might possibly work if you squint hard > enough and test it in a very limited way with almost no real hardware :) > > good luck! > > greg k-h
On Tue, Sep 30, 2025 at 08:41:29AM -0700, Chris Li wrote: > On Tue, Sep 30, 2025 at 6:41 AM Greg Kroah-Hartman > <gregkh@linuxfoundation.org> wrote: > > > > On Tue, Sep 30, 2025 at 09:02:44AM -0400, Pasha Tatashin wrote: > > > On Mon, Sep 29, 2025 at 10:10 PM Chris Li <chrisl@kernel.org> wrote: > > > > > > > > On Mon, Sep 29, 2025 at 10:57 AM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > > > > > > > > > On Tue, Sep 16, 2025 at 12:45:14AM -0700, Chris Li wrote: > > > > > > Save the PCI driver name into "struct pci_dev_ser" during the PCI > > > > > > prepare callback. > > > > > > > > > > > > After kexec, use driver_set_override() to ensure the device is > > > > > > bound only to the saved driver. > > > > > > > > > > This doesn't seem like a great idea, driver name should not be made > > > > > ABI. > > > > > > > > Let's break it down with baby steps. > > > > > > > > 1) Do you agree the liveupdated PCI device needs to bind to the exact > > > > same driver after kexec? > > > > To me that is a firm yes. If the driver binds to another driver, we > > > > can't expect the other driver will understand the original driver's > > > > saved state. > > > > > > Hi Chris, > > > > > > Driver name does not have to be an ABI. > > > > A driver name can NEVER be an abi, please don't do that. > > Can you please clarify that. > > for example, the pci has this sysfs control api: > > "/sys/bus/pci/devices/0000:04:00.0/driver_override" which takes the > *driver name* as data to override what driver is allowed to bind to > this device. > Does this driver_override consider it as using the driver name as part > of the abi? If not, why? Because the bind/unbind/override was created as a debug facility for doing kernel development and then people have turned it into a "let's operate our massive cloud systems with this fragile feature". We have never said that driver names will remain the same across releases, and they have changed over time. Device ids have also moved from one driver to another as well, making the "control" of the device seem to have changed names. > What live update wants is to make that driver_override persistent over > kexec. It does not introduce the "driver_override" API. That is > pre-existing conditions. The PCI liveupdate just wants to use it. That does not mean that this is the correct api to use at all. Again, this was a debugging aid, to help with users who wanted to add a device id to a driver without having to rebuild it. Don't make it something that it was never intended to be. Why not just make a new api as you are doing something new here? That way you get to define it to work exactly the way you need? > I want to get some basic understanding before adventure into the more > complex solutions. You mean "real" solutions :) > > > Drivers that support live > > > updates should provide a live update-specific ABI to detect > > > compatibility with the preserved data. We can use a preservation > > > schema GUID for this. > > > > > > > 2) Assume the 1) is yes from you. Are you just not happy that the > > > > kernel saves the driver name? You want user space to save it, is that > > > > it? > > > > How does it reference the driver after kexec otherwise? > > > > > > If we use GUID, drivers would advertise the GUIDs they support and we > > > would modify the core device-driver matching process to use this > > > information. > > > > > > Each driver that supports this mechanism would need to declare an > > > array of GUIDs it is compatible with. This would be a new field in its > > > struct pci_driver. > > > > > > static const guid_t my_driver_guids[] = { > > > GUID_INIT(0x123e4567, ...), // Schema V1 > > > GUID_INIT(0x987a6543, ...), // Schema V2 > > > {}, > > > }; > > > > That's crazy, who is going to be adding all of that to all drivers? And > > knowing to bump this if the internal data representaion changes? And it > > will change underneath it without the driver even knowing? This feels > > really really wrong, unless I'm missing something. > > The GUID is more complex than a driver name. I am fine with not using > GUID if you are so strongly opposed to it. > > You are saying don't do A(driver name) and B(GUID). I am waiting for > the part where you say "please do C instead". It's not my requirement to say "here is C", but rather I am saying "B is not going to scale over time as GUIDs are a pain to manage". > Do you have any other suggestion how to prevent the live update PCI > device bind to a different driver after kexec? I am happy to work on > the direction you point out and turn that into a patch for the > discussion purpose. Why prevent it? Why not just have a special api just for drivers that want to use this new feature? thanks, greg k-h
On Tue, Sep 30, 2025 at 9:41 AM Greg Kroah-Hartman <gregkh@linuxfoundation.org> wrote: > > On Tue, Sep 30, 2025 at 09:02:44AM -0400, Pasha Tatashin wrote: > > On Mon, Sep 29, 2025 at 10:10 PM Chris Li <chrisl@kernel.org> wrote: > > > > > > On Mon, Sep 29, 2025 at 10:57 AM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > > > > > > > On Tue, Sep 16, 2025 at 12:45:14AM -0700, Chris Li wrote: > > > > > Save the PCI driver name into "struct pci_dev_ser" during the PCI > > > > > prepare callback. > > > > > > > > > > After kexec, use driver_set_override() to ensure the device is > > > > > bound only to the saved driver. > > > > > > > > This doesn't seem like a great idea, driver name should not be made > > > > ABI. > > > > > > Let's break it down with baby steps. > > > > > > 1) Do you agree the liveupdated PCI device needs to bind to the exact > > > same driver after kexec? > > > To me that is a firm yes. If the driver binds to another driver, we > > > can't expect the other driver will understand the original driver's > > > saved state. > > > > Hi Chris, > > > > Driver name does not have to be an ABI. > > A driver name can NEVER be an abi, please don't do that. > > > Drivers that support live > > updates should provide a live update-specific ABI to detect > > compatibility with the preserved data. We can use a preservation > > schema GUID for this. > > > > > 2) Assume the 1) is yes from you. Are you just not happy that the > > > kernel saves the driver name? You want user space to save it, is that > > > it? > > > How does it reference the driver after kexec otherwise? > > > > If we use GUID, drivers would advertise the GUIDs they support and we > > would modify the core device-driver matching process to use this > > information. > > > > Each driver that supports this mechanism would need to declare an > > array of GUIDs it is compatible with. This would be a new field in its > > struct pci_driver. > > > > static const guid_t my_driver_guids[] = { > > GUID_INIT(0x123e4567, ...), // Schema V1 > > GUID_INIT(0x987a6543, ...), // Schema V2 > > {}, > > }; > > That's crazy, who is going to be adding all of that to all drivers? And Only to the drivers that support live updates, that would be just a few drivers. > knowing to bump this if the internal data representaion changes? And it > will change underneath it without the driver even knowing? This feels > really really wrong, unless I'm missing something. A driver that preserves state across a reboot already has an implicit contract with its future self about that data's format. The GUID simply makes that contract explicit and machine-checkable. It does not have to be GUID, but nevertheless there has to be a specific contract. Pasha
On Tue, Sep 30, 2025 at 10:53:50AM -0400, Pasha Tatashin wrote: > On Tue, Sep 30, 2025 at 9:41 AM Greg Kroah-Hartman > <gregkh@linuxfoundation.org> wrote: > > > > On Tue, Sep 30, 2025 at 09:02:44AM -0400, Pasha Tatashin wrote: > > > On Mon, Sep 29, 2025 at 10:10 PM Chris Li <chrisl@kernel.org> wrote: > > > > > > > > On Mon, Sep 29, 2025 at 10:57 AM Jason Gunthorpe <jgg@ziepe.ca> wrote: > > > > > > > > > > On Tue, Sep 16, 2025 at 12:45:14AM -0700, Chris Li wrote: > > > > > > Save the PCI driver name into "struct pci_dev_ser" during the PCI > > > > > > prepare callback. > > > > > > > > > > > > After kexec, use driver_set_override() to ensure the device is > > > > > > bound only to the saved driver. > > > > > > > > > > This doesn't seem like a great idea, driver name should not be made > > > > > ABI. > > > > > > > > Let's break it down with baby steps. > > > > > > > > 1) Do you agree the liveupdated PCI device needs to bind to the exact > > > > same driver after kexec? > > > > To me that is a firm yes. If the driver binds to another driver, we > > > > can't expect the other driver will understand the original driver's > > > > saved state. > > > > > > Hi Chris, > > > > > > Driver name does not have to be an ABI. > > > > A driver name can NEVER be an abi, please don't do that. > > > > > Drivers that support live > > > updates should provide a live update-specific ABI to detect > > > compatibility with the preserved data. We can use a preservation > > > schema GUID for this. > > > > > > > 2) Assume the 1) is yes from you. Are you just not happy that the > > > > kernel saves the driver name? You want user space to save it, is that > > > > it? > > > > How does it reference the driver after kexec otherwise? > > > > > > If we use GUID, drivers would advertise the GUIDs they support and we > > > would modify the core device-driver matching process to use this > > > information. > > > > > > Each driver that supports this mechanism would need to declare an > > > array of GUIDs it is compatible with. This would be a new field in its > > > struct pci_driver. > > > > > > static const guid_t my_driver_guids[] = { > > > GUID_INIT(0x123e4567, ...), // Schema V1 > > > GUID_INIT(0x987a6543, ...), // Schema V2 > > > {}, > > > }; > > > > That's crazy, who is going to be adding all of that to all drivers? And > > Only to the drivers that support live updates, that would be just a few drivers. > > > knowing to bump this if the internal data representaion changes? And it > > will change underneath it without the driver even knowing? This feels > > really really wrong, unless I'm missing something. > > A driver that preserves state across a reboot already has an implicit > contract with its future self about that data's format. The GUID > simply makes that contract explicit and machine-checkable. It does not > have to be GUID, but nevertheless there has to be a specific contract. So how are you going to "version" these GUID? I see you use "schema Vx" above, but how is that really going to work in the end? Lots of data structures change underneath the base driver that it knows nothing about, not to mention basic things like compiler flags and the like (think about how we have changed things for spectre issues over the years...) And when can you delete an old "schema"? This feels like you are forcing future developers to maintain things "for forever"... thanks, greg k-h
> > A driver that preserves state across a reboot already has an implicit > > contract with its future self about that data's format. The GUID > > simply makes that contract explicit and machine-checkable. It does not > > have to be GUID, but nevertheless there has to be a specific contract. > > So how are you going to "version" these GUID? I see you use "schema Vx" Driver developer who changes a driver to support live-update. > above, but how is that really going to work in the end? Lots of data > structures change underneath the base driver that it knows nothing > about, not to mention basic things like compiler flags and the like > (think about how we have changed things for spectre issues over the > years...) We are working on versioning protocol, the GUID I am suggesting is not to protect "struct" coherency, but just to identify which driver to bind to which device compatability. > > And when can you delete an old "schema"? This feels like you are > forcing future developers to maintain things "for forever"... This won't be an issue because of how live update support is planned. The support model will be phased and limited: Initially, and for a while there will be no stability guarantees between different kernel versions. Eventually, we will support specific, narrow upgrade paths (e.g., minor-to-minor, or stable-A to stable-A+1). Downgrades and arbitrary version jumps ("any-to-any") will not be supported upstream. Since we only ever need to handle a well-defined forward path, the code for old, irrelevant schemas can always be removed. There is no "forever". Pasha
On Tue, Sep 30, 2025 at 11:56:58AM -0400, Pasha Tatashin wrote: > > > A driver that preserves state across a reboot already has an implicit > > > contract with its future self about that data's format. The GUID > > > simply makes that contract explicit and machine-checkable. It does not > > > have to be GUID, but nevertheless there has to be a specific contract. > > > > So how are you going to "version" these GUID? I see you use "schema Vx" > > Driver developer who changes a driver to support live-update. I do not understand this response, sorry. > > above, but how is that really going to work in the end? Lots of data > > structures change underneath the base driver that it knows nothing > > about, not to mention basic things like compiler flags and the like > > (think about how we have changed things for spectre issues over the > > years...) > > We are working on versioning protocol, the GUID I am suggesting is not > to protect "struct" coherency, but just to identify which driver to > bind to which device compatability. So you have a new way of matching drivers to devices? That's odd. > > And when can you delete an old "schema"? This feels like you are > > forcing future developers to maintain things "for forever"... > > This won't be an issue because of how live update support is planned. > The support model will be phased and limited: > > Initially, and for a while there will be no stability guarantees > between different kernel versions. > Eventually, we will support specific, narrow upgrade paths (e.g., > minor-to-minor, or stable-A to stable-A+1). > Downgrades and arbitrary version jumps ("any-to-any") will not be > supported upstream. Since we only ever need to handle a well-defined > forward path, the code for old, irrelevant schemas can always be > removed. There is no "forever". This is kernel code, it is always "forever", sorry. If you want "minor to minor" update, how is that going to work given that you do not add changes only to "minor" releases (that being the 6.12.y the "y" number). Remember, Linux does not use "semantic versioning" as its release numbering is older than that scheme. It just does "this version is newer than that version" and that's it. You can't really take anything else from the number. And if this isn't for "upstream" at all, then why have it? We can't add new features and support it if we can't actually use it and it's only for out-of-tree vendor kernels. And how will you document properly a "well defined forward path"? That should be done first, before you have any code here that we are reviewing. Please do that, get people to agree on the idea and how it will work before asking us to review code. thanks, greg k-h
Hi Greg, On Wed, Oct 1, 2025 at 1:06 AM Greg Kroah-Hartman <gregkh@linuxfoundation.org> wrote: > > On Tue, Sep 30, 2025 at 11:56:58AM -0400, Pasha Tatashin wrote: > > > > A driver that preserves state across a reboot already has an implicit > > > > contract with its future self about that data's format. The GUID > > > > simply makes that contract explicit and machine-checkable. It does not > > > > have to be GUID, but nevertheless there has to be a specific contract. > > > > > > So how are you going to "version" these GUID? I see you use "schema Vx" > > > > Driver developer who changes a driver to support live-update. > > I do not understand this response, sorry. Sorry for the confusion, I misunderstood your question. I thought you were asking who would add a new field to a driver. My answer was that it would be the developer who is adding support for the Live Update feature to that specific driver. I now realize you were asking about how the GUID would be versioned. Using a GUID was just one of several ideas. My main point is that we need some form of versioned compatibility identifier, whether it's a string or a number. This would allow the system to verify that the new driver can understand the preserved data for this device from the previous kernel before it binds to the device. > > > above, but how is that really going to work in the end? Lots of data > > > structures change underneath the base driver that it knows nothing > > > about, not to mention basic things like compiler flags and the like > > > (think about how we have changed things for spectre issues over the > > > years...) > > > > We are working on versioning protocol, the GUID I am suggesting is not > > to protect "struct" coherency, but just to identify which driver to > > bind to which device compatability. > > So you have a new way of matching drivers to devices? That's odd. Correct. For a device that persists across a live update, the driver matching logic in the new kernel would need to be altered Unless, the device can stay unbound into initramfs, as Jason suggested earlier in the thread. But, still probing would need to be altered to keep the device unbound. > > > And when can you delete an old "schema"? This feels like you are > > > forcing future developers to maintain things "for forever"... > > > > This won't be an issue because of how live update support is planned. > > The support model will be phased and limited: > > > > Initially, and for a while there will be no stability guarantees > > between different kernel versions. > > Eventually, we will support specific, narrow upgrade paths (e.g., > > minor-to-minor, or stable-A to stable-A+1). > > Downgrades and arbitrary version jumps ("any-to-any") will not be > > supported upstream. Since we only ever need to handle a well-defined > > forward path, the code for old, irrelevant schemas can always be > > removed. There is no "forever". > > This is kernel code, it is always "forever", sorry. I'm sorry, but I don't quite understand what you mean. There is no stable internal kernel API; the upstream tree is constantly evolving with features being added, improved, and removed. > If you want "minor to minor" update, how is that going to work given > that you do not add changes only to "minor" releases (that being the > 6.12.y the "y" number). You are correct. Initially, our plan is to allow live updates to break between any kernel version. However, it is my hope that we will eventually stabilize this process and only allow breakages between, for example, versions 6.n and 6.n+2, and eventually from one stable release to stable+2. This would create a well-defined window for safely removing deprecated data formats and the code that handles them from the kernel. > Remember, Linux does not use "semantic versioning" as its release > numbering is older than that scheme. It just does "this version is > newer than that version" and that's it. You can't really take anything > else from the number. Understood. If that's the case, we could use stable releases as the basis for defining when a live update can break. It would take longer to achieve, but it is a possibility. These are the kinds of questions that will be discussed at the LPC Liveupdate MC. If you are attending LPC, I encourage you to join the discussion, as your thoughts on how we can frame long-term live update support would be very valuable. > And if this isn't for "upstream" at all, then why have it? We can't add > new features and support it if we can't actually use it and it's only > for out-of-tree vendor kernels. Our goal is to have full support in the upstream kernel. Downstream users will then need to adapt live updates to their specific needs. For example, if a live update from version A to version C is broken, a downstream user would either have to update incrementally from A to B and then to C, or they would have to internally fix whatever is causing the breakage before performing the live update. > And how will you document properly a "well defined forward path"? That > should be done first, before you have any code here that we are > reviewing. Currently, and for the near future, live updates will only be supported within the same kernel version. > Please do that, get people to agree on the idea and how it will work > before asking us to review code. This is an industry-wide effort. We have engineers from Amazon, Google, Microsoft, Nvidia, and other companies meeting bi-weekly to discuss Live Update support, and sending and landing patches upstream. We are also organizing an LPC Live Update Micro Conference where the versioning strategy will be a topic. For now, we have agreed that the live update can break between and kernel versions or with any commit while the feature is under active development. This approach allows us the flexibility to build the core functionality while we collaboratively define the long-term versioning and stability model. Thank you, Pasha
On Wed, Oct 01, 2025 at 05:03:19PM -0400, Pasha Tatashin wrote: > On Wed, Oct 1, 2025 at 1:06 AM Greg Kroah-Hartman > > On Tue, Sep 30, 2025 at 11:56:58AM -0400, Pasha Tatashin wrote: > > > > > A driver that preserves state across a reboot already has an implicit > > > > > contract with its future self about that data's format. The GUID > > > > > simply makes that contract explicit and machine-checkable. It does not > > > > > have to be GUID, but nevertheless there has to be a specific contract. > > > > > > > > So how are you going to "version" these GUID? I see you use "schema Vx" > > > > > > Driver developer who changes a driver to support live-update. > > > > I do not understand this response, sorry. > > Sorry for the confusion, I misunderstood your question. I thought you > were asking who would add a new field to a driver. My answer was that > it would be the developer who is adding support for the Live Update > feature to that specific driver. > I now realize you were asking about how the GUID would be versioned. > Using a GUID was just one of several ideas. My main point is that we > need some form of versioned compatibility identifier, whether it's a > string or a number. This would allow the system to verify that the new > driver can understand the preserved data for this device from the > previous kernel before it binds to the device. Again, "versioned" identifiers will not work over time as you can never drop old versions, AND a driver author does not know if the underlying structures that are outside of the driver have changed or not, nor if the compiler settings have changed, or anything else that could affect it like that have changed. > > > > And when can you delete an old "schema"? This feels like you are > > > > forcing future developers to maintain things "for forever"... > > > > > > This won't be an issue because of how live update support is planned. > > > The support model will be phased and limited: > > > > > > Initially, and for a while there will be no stability guarantees > > > between different kernel versions. > > > Eventually, we will support specific, narrow upgrade paths (e.g., > > > minor-to-minor, or stable-A to stable-A+1). > > > Downgrades and arbitrary version jumps ("any-to-any") will not be > > > supported upstream. Since we only ever need to handle a well-defined > > > forward path, the code for old, irrelevant schemas can always be > > > removed. There is no "forever". > > > > This is kernel code, it is always "forever", sorry. > > I'm sorry, but I don't quite understand what you mean. There is no > stable internal kernel API; the upstream tree is constantly evolving > with features being added, improved, and removed. Yes, that is very true, but you can not remove user-visible functionality, which is what you are saying you are going to do here. > > If you want "minor to minor" update, how is that going to work given > > that you do not add changes only to "minor" releases (that being the > > 6.12.y the "y" number). > > You are correct. Initially, our plan is to allow live updates to break > between any kernel version. Then there is no such thing as live updates :) > However, it is my hope that we will > eventually stabilize this process and only allow breakages between, > for example, versions 6.n and 6.n+2, and eventually from one stable > release to stable+2. This would create a well-defined window for > safely removing deprecated data formats and the code that handles them > from the kernel. How are you going to define this? We can not break old users when they upgrade, and so you are going to have to support this "upgrade path" for forever. > > Remember, Linux does not use "semantic versioning" as its release > > numbering is older than that scheme. It just does "this version is > > newer than that version" and that's it. You can't really take anything > > else from the number. > > Understood. If that's the case, we could use stable releases as the > basis for defining when a live update can break. So every single release? > It would take longer > to achieve, but it is a possibility. These are the kinds of questions > that will be discussed at the LPC Liveupdate MC. If you are attending > LPC, I encourage you to join the discussion, as your thoughts on how > we can frame long-term live update support would be very valuable. I will be at LPC, but can't guarantee I can make it to that MC, it all depends on scheduling. > > And if this isn't for "upstream" at all, then why have it? We can't add > > new features and support it if we can't actually use it and it's only > > for out-of-tree vendor kernels. > > Our goal is to have full support in the upstream kernel. Downstream > users will then need to adapt live updates to their specific needs. > For example, if a live update from version A to version C is broken, a > downstream user would either have to update incrementally from A to B > and then to C, or they would have to internally fix whatever is > causing the breakage before performing the live update. What does "internally fix" mean exactly here? > > And how will you document properly a "well defined forward path"? That > > should be done first, before you have any code here that we are > > reviewing. > > Currently, and for the near future, live updates will only be > supported within the same kernel version. Ok, then no need for any GUID at all. Just update and pray! :) > > Please do that, get people to agree on the idea and how it will work > > before asking us to review code. > > This is an industry-wide effort. We have engineers from Amazon, > Google, Microsoft, Nvidia, and other companies meeting bi-weekly to > discuss Live Update support, and sending and landing patches upstream. > We are also organizing an LPC Live Update Micro Conference where the > versioning strategy will be a topic. > > For now, we have agreed that the live update can break between and > kernel versions or with any commit while the feature is under active > development. This approach allows us the flexibility to build the core > functionality while we collaboratively define the long-term versioning > and stability model. Just keeping a device "alive" while rebooting into the same exact kernel image seems odd to me given that this is almost never what people actually do. They update their kernel with the weekly stable release to get the new bugfixes (remember we fix 13 CVEs a day), and away you go. You are saying that this workload would not actually be supported, so why do you want live update at all? Who needs this? thanks, greg k-h
On Thu, Oct 02, 2025 at 08:09:11AM +0200, Greg Kroah-Hartman wrote: > > However, it is my hope that we will > > eventually stabilize this process and only allow breakages between, > > for example, versions 6.n and 6.n+2, and eventually from one stable > > release to stable+2. This would create a well-defined window for > > safely removing deprecated data formats and the code that handles them > > from the kernel. > > How are you going to define this? We can not break old users when they > upgrade, and so you are going to have to support this "upgrade path" for > forever. I think the realistic proposal for LUO/kexec version compatability is more like eBPF. Expressly saying it is not ABI, not stable, but here are a bunch of tools and it is still useful. > Just keeping a device "alive" while rebooting into the same exact kernel > image seems odd to me given that this is almost never what people > actually do. This feature has a lot of development to go. Right now the baseline for upstream is no ABI promise. You can live update between any two kernel versions that don't change the LUO kexec ABI. In practice that will be a lot of version pairs. The downstreams are going to take this raw capability and choose specific downstream version pairs, patch in support for certain ABI versions that they need, and test. When things mature and the project is more complete then the kernel community may have a discussion about what upstream version pairs should be supported by the community. I don't think this would be as broad as every combination of linux versions ever, but ideas like sequential pairs of stable releases, sequential pairs of main release and so on are worth exploring. Jason
© 2016 - 2025 Red Hat, Inc.