On s390 systems, which use a machine level hypervisor, PCI devices are
always accessed through a form of PCI pass-through which fundamentally
operates on a per PCI function granularity. This is also reflected in the
s390 PCI hotplug driver which creates hotplug slots for individual PCI
functions. Its reset_slot() function, which is a wrapper for
zpci_hot_reset_device(), thus also resets individual functions.
Currently, the kernel's PCI_SLOT() macro assigns the same pci_slot object
to multifunction devices. This approach worked fine on s390 systems that
only exposed virtual functions as individual PCI domains to the operating
system. Since commit 44510d6fa0c0 ("s390/pci: Handling multifunctions")
s390 supports exposing the topology of multifunction PCI devices by
grouping them in a shared PCI domain. When attempting to reset a function
through the hotplug driver, the shared slot assignment causes the wrong
function to be reset instead of the intended one. It also leaks memory as
we do create a pci_slot object for the function, but don't correctly free
it in pci_slot_release().
Add a flag for struct pci_slot to allow per function PCI slots for
functions managed through a hypervisor, which exposes individual PCI
functions while retaining the topology.
Fixes: 44510d6fa0c0 ("s390/pci: Handling multifunctions")
Cc: stable@vger.kernel.org
Suggested-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
---
drivers/pci/hotplug/s390_pci_hpc.c | 10 ++++++++--
drivers/pci/pci.c | 5 +++--
drivers/pci/slot.c | 14 +++++++++++---
include/linux/pci.h | 1 +
4 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
index d9996516f49e..8b547de464bf 100644
--- a/drivers/pci/hotplug/s390_pci_hpc.c
+++ b/drivers/pci/hotplug/s390_pci_hpc.c
@@ -126,14 +126,20 @@ static const struct hotplug_slot_ops s390_hotplug_slot_ops = {
int zpci_init_slot(struct zpci_dev *zdev)
{
+ int ret;
char name[SLOT_NAME_SIZE];
struct zpci_bus *zbus = zdev->zbus;
zdev->hotplug_slot.ops = &s390_hotplug_slot_ops;
snprintf(name, SLOT_NAME_SIZE, "%08x", zdev->fid);
- return pci_hp_register(&zdev->hotplug_slot, zbus->bus,
- zdev->devfn, name);
+ ret = pci_hp_register(&zdev->hotplug_slot, zbus->bus,
+ zdev->devfn, name);
+ if (ret)
+ return ret;
+
+ zdev->hotplug_slot.pci_slot->per_func_slot = 1;
+ return 0;
}
void zpci_exit_slot(struct zpci_dev *zdev)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b14dd064006c..36ee38e0d817 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4980,8 +4980,9 @@ static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, bool probe)
static int pci_dev_reset_slot_function(struct pci_dev *dev, bool probe)
{
- if (dev->multifunction || dev->subordinate || !dev->slot ||
- dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET)
+ if (dev->subordinate || !dev->slot ||
+ dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
+ (dev->multifunction && !dev->slot->per_func_slot))
return -ENOTTY;
return pci_reset_hotplug_slot(dev->slot->hotplug, probe);
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 50fb3eb595fe..51ee59e14393 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -63,6 +63,14 @@ static ssize_t cur_speed_read_file(struct pci_slot *slot, char *buf)
return bus_speed_read(slot->bus->cur_bus_speed, buf);
}
+static bool pci_dev_matches_slot(struct pci_dev *dev, struct pci_slot *slot)
+{
+ if (slot->per_func_slot)
+ return dev->devfn == slot->number;
+
+ return PCI_SLOT(dev->devfn) == slot->number;
+}
+
static void pci_slot_release(struct kobject *kobj)
{
struct pci_dev *dev;
@@ -73,7 +81,7 @@ static void pci_slot_release(struct kobject *kobj)
down_read(&pci_bus_sem);
list_for_each_entry(dev, &slot->bus->devices, bus_list)
- if (PCI_SLOT(dev->devfn) == slot->number)
+ if (pci_dev_matches_slot(dev, slot))
dev->slot = NULL;
up_read(&pci_bus_sem);
@@ -166,7 +174,7 @@ void pci_dev_assign_slot(struct pci_dev *dev)
mutex_lock(&pci_slot_mutex);
list_for_each_entry(slot, &dev->bus->slots, list)
- if (PCI_SLOT(dev->devfn) == slot->number)
+ if (pci_dev_matches_slot(dev, slot))
dev->slot = slot;
mutex_unlock(&pci_slot_mutex);
}
@@ -285,7 +293,7 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
down_read(&pci_bus_sem);
list_for_each_entry(dev, &parent->devices, bus_list)
- if (PCI_SLOT(dev->devfn) == slot_nr)
+ if (pci_dev_matches_slot(dev, slot))
dev->slot = slot;
up_read(&pci_bus_sem);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d1fdf81fbe1e..6ad194597ab5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -78,6 +78,7 @@ struct pci_slot {
struct list_head list; /* Node in list of slots */
struct hotplug_slot *hotplug; /* Hotplug info (move here) */
unsigned char number; /* PCI_SLOT(pci_dev->devfn) */
+ unsigned int per_func_slot:1; /* Allow per function slot */
struct kobject kobj;
};
--
2.43.0
On Mon, 2025-10-20 at 12:01 -0700, Farhan Ali wrote:
> On s390 systems, which use a machine level hypervisor, PCI devices are
> always accessed through a form of PCI pass-through which fundamentally
> operates on a per PCI function granularity. This is also reflected in the
> s390 PCI hotplug driver which creates hotplug slots for individual PCI
> functions. Its reset_slot() function, which is a wrapper for
> zpci_hot_reset_device(), thus also resets individual functions.
>
> Currently, the kernel's PCI_SLOT() macro assigns the same pci_slot object
> to multifunction devices. This approach worked fine on s390 systems that
> only exposed virtual functions as individual PCI domains to the operating
> system. Since commit 44510d6fa0c0 ("s390/pci: Handling multifunctions")
> s390 supports exposing the topology of multifunction PCI devices by
> grouping them in a shared PCI domain. When attempting to reset a function
> through the hotplug driver, the shared slot assignment causes the wrong
> function to be reset instead of the intended one. It also leaks memory as
> we do create a pci_slot object for the function, but don't correctly free
> it in pci_slot_release().
>
> Add a flag for struct pci_slot to allow per function PCI slots for
> functions managed through a hypervisor, which exposes individual PCI
> functions while retaining the topology.
>
> Fixes: 44510d6fa0c0 ("s390/pci: Handling multifunctions")
> Cc: stable@vger.kernel.org
> Suggested-by: Niklas Schnelle <schnelle@linux.ibm.com>
> Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
> Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
> ---
> drivers/pci/hotplug/s390_pci_hpc.c | 10 ++++++++--
> drivers/pci/pci.c | 5 +++--
> drivers/pci/slot.c | 14 +++++++++++---
> include/linux/pci.h | 1 +
> 4 files changed, 23 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
> index d9996516f49e..8b547de464bf 100644
> --- a/drivers/pci/hotplug/s390_pci_hpc.c
> +++ b/drivers/pci/hotplug/s390_pci_hpc.c
> @@ -126,14 +126,20 @@ static const struct hotplug_slot_ops s390_hotplug_slot_ops = {
>
> int zpci_init_slot(struct zpci_dev *zdev)
> {
> + int ret;
> char name[SLOT_NAME_SIZE];
> struct zpci_bus *zbus = zdev->zbus;
>
> zdev->hotplug_slot.ops = &s390_hotplug_slot_ops;
>
> snprintf(name, SLOT_NAME_SIZE, "%08x", zdev->fid);
> - return pci_hp_register(&zdev->hotplug_slot, zbus->bus,
> - zdev->devfn, name);
> + ret = pci_hp_register(&zdev->hotplug_slot, zbus->bus,
> + zdev->devfn, name);
> + if (ret)
> + return ret;
> +
> + zdev->hotplug_slot.pci_slot->per_func_slot = 1;
I think the way this works is a bit odd. Due to the order of setting
the flag pci_create_slot() in pci_hp_register() tries to match using
the wrong per_func_slot == 0. This doesn't really cause mismatches
though because the slot->number won't match the PCI_SLOT(dev->devfn)
except for the slot->number 0 where it is fine.
One way to improve(?) on this is to have a per_func_slot flag also in
the struct hotplug_slot and then copy it over into the newly created
struct pci_slot. But then we have this flag twice. Or maybe this really
should be an argument to pci_create_slot()?
On 10/21/2025 5:49 AM, Niklas Schnelle wrote:
> On Mon, 2025-10-20 at 12:01 -0700, Farhan Ali wrote:
>> On s390 systems, which use a machine level hypervisor, PCI devices are
>> always accessed through a form of PCI pass-through which fundamentally
>> operates on a per PCI function granularity. This is also reflected in the
>> s390 PCI hotplug driver which creates hotplug slots for individual PCI
>> functions. Its reset_slot() function, which is a wrapper for
>> zpci_hot_reset_device(), thus also resets individual functions.
>>
>> Currently, the kernel's PCI_SLOT() macro assigns the same pci_slot object
>> to multifunction devices. This approach worked fine on s390 systems that
>> only exposed virtual functions as individual PCI domains to the operating
>> system. Since commit 44510d6fa0c0 ("s390/pci: Handling multifunctions")
>> s390 supports exposing the topology of multifunction PCI devices by
>> grouping them in a shared PCI domain. When attempting to reset a function
>> through the hotplug driver, the shared slot assignment causes the wrong
>> function to be reset instead of the intended one. It also leaks memory as
>> we do create a pci_slot object for the function, but don't correctly free
>> it in pci_slot_release().
>>
>> Add a flag for struct pci_slot to allow per function PCI slots for
>> functions managed through a hypervisor, which exposes individual PCI
>> functions while retaining the topology.
>>
>> Fixes: 44510d6fa0c0 ("s390/pci: Handling multifunctions")
>> Cc: stable@vger.kernel.org
>> Suggested-by: Niklas Schnelle <schnelle@linux.ibm.com>
>> Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
>> Signed-off-by: Farhan Ali <alifm@linux.ibm.com>
>> ---
>> drivers/pci/hotplug/s390_pci_hpc.c | 10 ++++++++--
>> drivers/pci/pci.c | 5 +++--
>> drivers/pci/slot.c | 14 +++++++++++---
>> include/linux/pci.h | 1 +
>> 4 files changed, 23 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
>> index d9996516f49e..8b547de464bf 100644
>> --- a/drivers/pci/hotplug/s390_pci_hpc.c
>> +++ b/drivers/pci/hotplug/s390_pci_hpc.c
>> @@ -126,14 +126,20 @@ static const struct hotplug_slot_ops s390_hotplug_slot_ops = {
>>
>> int zpci_init_slot(struct zpci_dev *zdev)
>> {
>> + int ret;
>> char name[SLOT_NAME_SIZE];
>> struct zpci_bus *zbus = zdev->zbus;
>>
>> zdev->hotplug_slot.ops = &s390_hotplug_slot_ops;
>>
>> snprintf(name, SLOT_NAME_SIZE, "%08x", zdev->fid);
>> - return pci_hp_register(&zdev->hotplug_slot, zbus->bus,
>> - zdev->devfn, name);
>> + ret = pci_hp_register(&zdev->hotplug_slot, zbus->bus,
>> + zdev->devfn, name);
>> + if (ret)
>> + return ret;
>> +
>> + zdev->hotplug_slot.pci_slot->per_func_slot = 1;
> I think the way this works is a bit odd. Due to the order of setting
> the flag pci_create_slot() in pci_hp_register() tries to match using
> the wrong per_func_slot == 0. This doesn't really cause mismatches
> though because the slot->number won't match the PCI_SLOT(dev->devfn)
> except for the slot->number 0 where it is fine.
>
> One way to improve(?) on this is to have a per_func_slot flag also in
> the struct hotplug_slot and then copy it over into the newly created
> struct pci_slot. But then we have this flag twice. Or maybe this really
> should be an argument to pci_create_slot()?
This would still work as we associate the struct pci_dev to struct
pci_slot in pci_dev_assign_slot(), when we would have the flag set. But
I do see your point that there is room for improvement here. As
discussed offline we can maybe have the flag in struct pci_bus since we
already have the slots list. This would allow us to set the flag for
zpci devices at the creation of the pci_bus. And can be used by
pci_create_slot() and pci_dev_assign_slot() to correctly set the slot
for the pci dev. Will post a v2 with this.
Thanks
Farhan
© 2016 - 2026 Red Hat, Inc.