Add links between a VF's struct pci_dev and its associated PF struct
pci_dev. Move the calls to pci_get_pdev()/pci_add_device() down to avoid
dropping and re-acquiring the pcidevs_lock().
During PF removal, unlink VF from PF and mark the VF broken. As before,
VFs may exist without a corresponding PF, although now only with
pdev->broken = true. If the PF is removed and re-added, dom0 is expected
to also remove and re-add the VFs.
Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
---
Candidate for backport to 4.19 (the next patch depends on this one)
v4->v5:
* new patch, split from ("x86/msi: fix locking for SR-IOV devices")
* move INIT_LIST_HEAD(&pdev->vf_list); earlier
* collapse struct list_head instances
* retain error code from pci_add_device()
* unlink (and mark broken) VFs instead of removing them
* const-ify VF->PF link
---
xen/drivers/passthrough/pci.c | 70 ++++++++++++++++++++++++++++-------
xen/include/xen/pci.h | 10 +++++
2 files changed, 66 insertions(+), 14 deletions(-)
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 74d3895e1ef6..95a8ed850efd 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -333,6 +333,8 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
*((u8*) &pdev->devfn) = devfn;
pdev->domain = NULL;
+ INIT_LIST_HEAD(&pdev->vf_list);
+
arch_pci_init_pdev(pdev);
rc = pdev_msi_init(pdev);
@@ -449,6 +451,10 @@ static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev)
list_del(&pdev->alldevs_list);
pdev_msi_deinit(pdev);
+
+ if ( pdev->info.is_virtfn && pdev->virtfn.pf_pdev )
+ list_del(&pdev->vf_list);
+
xfree(pdev);
}
@@ -656,24 +662,11 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
const char *type;
int ret;
- bool pf_is_extfn = false;
if ( !info )
type = "device";
else if ( info->is_virtfn )
- {
- pcidevs_lock();
- pdev = pci_get_pdev(NULL,
- PCI_SBDF(seg, info->physfn.bus,
- info->physfn.devfn));
- if ( pdev )
- pf_is_extfn = pdev->info.is_extfn;
- pcidevs_unlock();
- if ( !pdev )
- pci_add_device(seg, info->physfn.bus, info->physfn.devfn,
- NULL, node);
type = "virtual function";
- }
else if ( info->is_extfn )
type = "extended function";
else
@@ -703,7 +696,44 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
* extended function.
*/
if ( pdev->info.is_virtfn )
- pdev->info.is_extfn = pf_is_extfn;
+ {
+ struct pci_dev *pf_pdev;
+
+ pf_pdev = pci_get_pdev(NULL,
+ PCI_SBDF(seg, info->physfn.bus,
+ info->physfn.devfn));
+
+ if ( !pf_pdev )
+ {
+ ret = pci_add_device(seg, info->physfn.bus, info->physfn.devfn,
+ NULL, node);
+ if ( ret )
+ {
+ printk(XENLOG_WARNING "Failed to add SR-IOV device PF %pp for VF %pp\n",
+ &PCI_SBDF(seg, info->physfn.bus, info->physfn.devfn),
+ &pdev->sbdf);
+ free_pdev(pseg, pdev);
+ goto out;
+ }
+ pf_pdev = pci_get_pdev(NULL,
+ PCI_SBDF(seg, info->physfn.bus,
+ info->physfn.devfn));
+ if ( !pf_pdev )
+ {
+ ASSERT_UNREACHABLE();
+ printk(XENLOG_ERR "Failed to find SR-IOV device PF %pp for VF %pp\n",
+ &PCI_SBDF(seg, info->physfn.bus, info->physfn.devfn),
+ &pdev->sbdf);
+ free_pdev(pseg, pdev);
+ ret = -EILSEQ;
+ goto out;
+ }
+ }
+
+ pdev->info.is_extfn = pf_pdev->info.is_extfn;
+ pdev->virtfn.pf_pdev = pf_pdev;
+ list_add(&pdev->vf_list, &pf_pdev->vf_list);
+ }
}
if ( !pdev->info.is_virtfn && !pdev->vf_rlen[0] )
@@ -821,6 +851,18 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
if ( pdev->bus == bus && pdev->devfn == devfn )
{
+ if ( !pdev->info.is_virtfn )
+ {
+ struct pci_dev *vf_pdev, *tmp;
+
+ list_for_each_entry_safe(vf_pdev, tmp, &pdev->vf_list, vf_list)
+ {
+ list_del(&vf_pdev->vf_list);
+ vf_pdev->virtfn.pf_pdev = NULL;
+ vf_pdev->broken = true;
+ }
+ }
+
if ( pdev->domain )
{
write_lock(&pdev->domain->pci_lock);
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 63e49f0117e9..f9435b7f4eb9 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -150,7 +150,17 @@ struct pci_dev {
unsigned int count;
#define PT_FAULT_THRESHOLD 10
} fault;
+
+ /*
+ * List head if info.is_virtfn == false
+ * List entry if info.is_virtfn == true
+ */
+ struct list_head vf_list;
u64 vf_rlen[6];
+ struct {
+ /* Only populated for VFs (info.is_virtfn == true) */
+ const struct pci_dev *pf_pdev; /* Link from VF to PF */
+ } virtfn;
/* Data for vPCI. */
struct vpci *vpci;
--
2.47.0
On 11.10.2024 17:27, Stewart Hildebrand wrote: > Add links between a VF's struct pci_dev and its associated PF struct > pci_dev. Move the calls to pci_get_pdev()/pci_add_device() down to avoid > dropping and re-acquiring the pcidevs_lock(). > > During PF removal, unlink VF from PF and mark the VF broken. As before, > VFs may exist without a corresponding PF, although now only with > pdev->broken = true. If the PF is removed and re-added, dom0 is expected > to also remove and re-add the VFs. Right, or else the VF struct instance would remain orphaned the way you've implemented this. Question is whether it is a reasonable assumption that a Dom0 which failed to remove the VFs during PF removal might later "remember" that it still needs to report VFs removed. I for one doubt that. > @@ -703,7 +696,44 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn, > * extended function. > */ > if ( pdev->info.is_virtfn ) > - pdev->info.is_extfn = pf_is_extfn; > + { > + struct pci_dev *pf_pdev; > + > + pf_pdev = pci_get_pdev(NULL, > + PCI_SBDF(seg, info->physfn.bus, > + info->physfn.devfn)); > + > + if ( !pf_pdev ) > + { > + ret = pci_add_device(seg, info->physfn.bus, info->physfn.devfn, > + NULL, node); > + if ( ret ) > + { > + printk(XENLOG_WARNING "Failed to add SR-IOV device PF %pp for VF %pp\n", > + &PCI_SBDF(seg, info->physfn.bus, info->physfn.devfn), > + &pdev->sbdf); > + free_pdev(pseg, pdev); > + goto out; > + } > + pf_pdev = pci_get_pdev(NULL, > + PCI_SBDF(seg, info->physfn.bus, > + info->physfn.devfn)); > + if ( !pf_pdev ) > + { > + ASSERT_UNREACHABLE(); > + printk(XENLOG_ERR "Failed to find SR-IOV device PF %pp for VF %pp\n", > + &PCI_SBDF(seg, info->physfn.bus, info->physfn.devfn), > + &pdev->sbdf); > + free_pdev(pseg, pdev); > + ret = -EILSEQ; > + goto out; Might be helpful to have the printk() ahead of the ASSERT_UNREACHABLE(), in the unlikely event that the assertion would actually trigger. Positioning doesn't make a difference for release builds anyway. Jan
On 10/16/24 05:52, Jan Beulich wrote: > On 11.10.2024 17:27, Stewart Hildebrand wrote: >> Add links between a VF's struct pci_dev and its associated PF struct >> pci_dev. Move the calls to pci_get_pdev()/pci_add_device() down to avoid >> dropping and re-acquiring the pcidevs_lock(). >> >> During PF removal, unlink VF from PF and mark the VF broken. As before, >> VFs may exist without a corresponding PF, although now only with >> pdev->broken = true. If the PF is removed and re-added, dom0 is expected >> to also remove and re-add the VFs. > > Right, or else the VF struct instance would remain orphaned the way you've > implemented this. Question is whether it is a reasonable assumption that a > Dom0 which failed to remove the VFs during PF removal might later > "remember" that it still needs to report VFs removed. I for one doubt that. This matches my observations: you're right, it most likely won't. I had just written it in a misleading way in the commit message. Re-adding should not occur until after VFs and PF have been removed in non-buggy scenarios. A PF driver that fails to disable SR-IOV (i.e remove VFs) during PF removal is buggy, and would lead to issues inside dom0 as well due to the orphaned/stale VF left behind in Linux dom0 itself. As mentioned in patch #1, Linux warns about this: [ 100.000000] 0000:01:00.0: driver left SR-IOV enabled after remove With the PF<->VF links in place, we now also have the ability to detect and warn about this potentially buggy condition in Xen. I have so far only observed VF-then-PF removal order in non-buggy scenarios. My only hesitation with adding such a warning in Xen is that I don't know whether removing in PF-then-VF order is legitimate. In the previous rev I had Xen removing the VFs during PF removal. In this rev, I chose to continue to rely on dom0 to remove VFs because: 1. This is the expected behavior as far as I can tell. 2. It more accurately mirrors the state of the VFs and PFs in dom0 and Xen. 3. No need to consider special cases when xsm has different policies for PF and VF removal operations. Perhaps the last sentence would be better written as: "The hardware domain is expected to remove the associated VFs before removing the PF. Print a warning in case a PF is removed with associated VFs still present." Or, not implying any removal order: "If the PF is removed, the hardware domain is expected to also remove the associated VFs." I'm personally leaning toward not implying any removal order (i.e. same situation as before this patch). >> @@ -703,7 +696,44 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn, >> * extended function. >> */ >> if ( pdev->info.is_virtfn ) >> - pdev->info.is_extfn = pf_is_extfn; >> + { >> + struct pci_dev *pf_pdev; >> + >> + pf_pdev = pci_get_pdev(NULL, >> + PCI_SBDF(seg, info->physfn.bus, >> + info->physfn.devfn)); >> + >> + if ( !pf_pdev ) >> + { >> + ret = pci_add_device(seg, info->physfn.bus, info->physfn.devfn, >> + NULL, node); >> + if ( ret ) >> + { >> + printk(XENLOG_WARNING "Failed to add SR-IOV device PF %pp for VF %pp\n", >> + &PCI_SBDF(seg, info->physfn.bus, info->physfn.devfn), >> + &pdev->sbdf); >> + free_pdev(pseg, pdev); >> + goto out; >> + } >> + pf_pdev = pci_get_pdev(NULL, >> + PCI_SBDF(seg, info->physfn.bus, >> + info->physfn.devfn)); >> + if ( !pf_pdev ) >> + { >> + ASSERT_UNREACHABLE(); >> + printk(XENLOG_ERR "Failed to find SR-IOV device PF %pp for VF %pp\n", >> + &PCI_SBDF(seg, info->physfn.bus, info->physfn.devfn), >> + &pdev->sbdf); >> + free_pdev(pseg, pdev); >> + ret = -EILSEQ; >> + goto out; > > Might be helpful to have the printk() ahead of the ASSERT_UNREACHABLE(), in the > unlikely event that the assertion would actually trigger. Agreed. > Positioning doesn't > make a difference for release builds anyway. > > Jan
On 16.10.2024 18:50, Stewart Hildebrand wrote: > On 10/16/24 05:52, Jan Beulich wrote: >> On 11.10.2024 17:27, Stewart Hildebrand wrote: >>> Add links between a VF's struct pci_dev and its associated PF struct >>> pci_dev. Move the calls to pci_get_pdev()/pci_add_device() down to avoid >>> dropping and re-acquiring the pcidevs_lock(). >>> >>> During PF removal, unlink VF from PF and mark the VF broken. As before, >>> VFs may exist without a corresponding PF, although now only with >>> pdev->broken = true. If the PF is removed and re-added, dom0 is expected >>> to also remove and re-add the VFs. >> >> Right, or else the VF struct instance would remain orphaned the way you've >> implemented this. Question is whether it is a reasonable assumption that a >> Dom0 which failed to remove the VFs during PF removal might later >> "remember" that it still needs to report VFs removed. I for one doubt that. > > This matches my observations: you're right, it most likely won't. I had > just written it in a misleading way in the commit message. Re-adding > should not occur until after VFs and PF have been removed in non-buggy > scenarios. > > A PF driver that fails to disable SR-IOV (i.e remove VFs) during PF > removal is buggy, and would lead to issues inside dom0 as well due to > the orphaned/stale VF left behind in Linux dom0 itself. As mentioned in > patch #1, Linux warns about this: > > [ 100.000000] 0000:01:00.0: driver left SR-IOV enabled after remove > > With the PF<->VF links in place, we now also have the ability to detect > and warn about this potentially buggy condition in Xen. I have so far > only observed VF-then-PF removal order in non-buggy scenarios. My only > hesitation with adding such a warning in Xen is that I don't know > whether removing in PF-then-VF order is legitimate. My take here is that it's not legitimate. VFs with no PF is physically impossible, and hence also ought to be logically invalid. It's the PF that surfaces (materializes) the VFs, after all. Jan
© 2016 - 2024 Red Hat, Inc.