Add links between a VF's struct pci_dev and its associated PF struct
pci_dev.
The hardware domain is expected to add a PF first before adding
associated VFs. Similarly, the hardware domain is expected to remove the
associated VFs before removing the PF. If adding/removing happens out of
order, print a warning and return an error. This means that VFs can only
exist with an associated PF.
Additionally, if the hardware domain attempts to remove a PF with VFs
still present, mark the PF and VFs broken, because Linux Dom0 has been
observed to not respect the error returned.
Move the call to pci_get_pdev() down to avoid dropping and re-acquiring
the pcidevs_lock(). Drop the call to pci_add_device() as it is invalid
to add a VF without an existing PF.
Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
---
Candidate for backport to 4.19 (the next patch depends on this one)
v6->v7:
* cope with multiple invocations of pci_add_device for VFs
* get rid of enclosing struct for single member
* during PF removal attempt with VFs still present:
* keep PF
* mark broken
* don't unlink
* return error
* during VF add:
* initialize pf_pdev in declaration
* remove logic catering to adding VFs without PF
v5->v6:
* move printk() before ASSERT_UNREACHABLE()
* warn about PF removal with VFs still present
* clarify commit message
v4->v5:
* new patch, split from ("x86/msi: fix locking for SR-IOV devices")
* move INIT_LIST_HEAD(&pdev->vf_list); earlier
* collapse struct list_head instances
* retain error code from pci_add_device()
* unlink (and mark broken) VFs instead of removing them
* const-ify VF->PF link
---
xen/drivers/passthrough/pci.c | 74 ++++++++++++++++++++++++++++-------
xen/include/xen/pci.h | 8 ++++
2 files changed, 68 insertions(+), 14 deletions(-)
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 74d3895e1ef6..d4167cea09c0 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -333,6 +333,8 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
*((u8*) &pdev->devfn) = devfn;
pdev->domain = NULL;
+ INIT_LIST_HEAD(&pdev->vf_list);
+
arch_pci_init_pdev(pdev);
rc = pdev_msi_init(pdev);
@@ -449,6 +451,10 @@ static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev)
list_del(&pdev->alldevs_list);
pdev_msi_deinit(pdev);
+
+ if ( pdev->info.is_virtfn )
+ list_del(&pdev->vf_list);
+
xfree(pdev);
}
@@ -656,24 +662,11 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
const char *type;
int ret;
- bool pf_is_extfn = false;
if ( !info )
type = "device";
else if ( info->is_virtfn )
- {
- pcidevs_lock();
- pdev = pci_get_pdev(NULL,
- PCI_SBDF(seg, info->physfn.bus,
- info->physfn.devfn));
- if ( pdev )
- pf_is_extfn = pdev->info.is_extfn;
- pcidevs_unlock();
- if ( !pdev )
- pci_add_device(seg, info->physfn.bus, info->physfn.devfn,
- NULL, node);
type = "virtual function";
- }
else if ( info->is_extfn )
type = "extended function";
else
@@ -703,7 +696,38 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
* extended function.
*/
if ( pdev->info.is_virtfn )
- pdev->info.is_extfn = pf_is_extfn;
+ {
+ struct pci_dev *pf_pdev = pci_get_pdev(NULL,
+ PCI_SBDF(seg,
+ info->physfn.bus,
+ info->physfn.devfn));
+ struct pci_dev *vf_pdev;
+ bool already_added = false;
+
+ if ( !pf_pdev )
+ {
+ printk(XENLOG_WARNING
+ "Attempted to add SR-IOV device VF %pp without PF %pp\n",
+ &pdev->sbdf,
+ &PCI_SBDF(seg, info->physfn.bus, info->physfn.devfn));
+ free_pdev(pseg, pdev);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ pdev->info.is_extfn = pf_pdev->info.is_extfn;
+ pdev->pf_pdev = pf_pdev;
+ list_for_each_entry(vf_pdev, &pf_pdev->vf_list, vf_list)
+ {
+ if ( vf_pdev == pdev )
+ {
+ already_added = true;
+ break;
+ }
+ }
+ if ( !already_added )
+ list_add(&pdev->vf_list, &pf_pdev->vf_list);
+ }
}
if ( !pdev->info.is_virtfn && !pdev->vf_rlen[0] )
@@ -821,6 +845,28 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
if ( pdev->bus == bus && pdev->devfn == devfn )
{
+ if ( !pdev->info.is_virtfn && !list_empty(&pdev->vf_list) )
+ {
+ struct pci_dev *vf_pdev;
+
+ /*
+ * Linux Dom0 has been observed to not respect an error code
+ * returned from PHYSDEVOP_pci_device_remove. Mark VFs and PF
+ * broken.
+ */
+ list_for_each_entry(vf_pdev, &pdev->vf_list, vf_list)
+ vf_pdev->broken = true;
+
+ pdev->broken = true;
+
+ printk(XENLOG_WARNING
+ "Attempted to remove PCI SR-IOV PF %pp with VFs still present\n",
+ &pdev->sbdf);
+
+ ret = -EBUSY;
+ break;
+ }
+
if ( pdev->domain )
{
write_lock(&pdev->domain->pci_lock);
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 1e4fe68c60fb..977c0d08f78a 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -153,7 +153,15 @@ struct pci_dev {
unsigned int count;
#define PT_FAULT_THRESHOLD 10
} fault;
+
+ /*
+ * List head if PF.
+ * List entry if VF.
+ */
+ struct list_head vf_list;
u64 vf_rlen[6];
+ /* Link from VF to PF. Only populated for VFs. */
+ const struct pci_dev *pf_pdev;
/* Data for vPCI. */
struct vpci *vpci;
--
2.47.0
On 12.11.2024 21:53, Stewart Hildebrand wrote:
> Add links between a VF's struct pci_dev and its associated PF struct
> pci_dev.
>
> The hardware domain is expected to add a PF first before adding
> associated VFs. Similarly, the hardware domain is expected to remove the
> associated VFs before removing the PF. If adding/removing happens out of
> order, print a warning and return an error. This means that VFs can only
> exist with an associated PF.
>
> Additionally, if the hardware domain attempts to remove a PF with VFs
> still present, mark the PF and VFs broken, because Linux Dom0 has been
> observed to not respect the error returned.
>
> Move the call to pci_get_pdev() down to avoid dropping and re-acquiring
> the pcidevs_lock(). Drop the call to pci_add_device() as it is invalid
> to add a VF without an existing PF.
>
> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
I'm okay with this, so in principle
Reviewed-by: Jan Beulich <jbeulich@suse.com>
A few comments nevertheless, which may result in there wanting to be
another revision.
> ---
> Candidate for backport to 4.19 (the next patch depends on this one)
With this dependency (we definitely want to backport the other patch) ...
> v6->v7:
> * cope with multiple invocations of pci_add_device for VFs
> * get rid of enclosing struct for single member
> * during PF removal attempt with VFs still present:
> * keep PF
> * mark broken
> * don't unlink
> * return error
> * during VF add:
> * initialize pf_pdev in declaration
> * remove logic catering to adding VFs without PF
... I'd like to point out that this change has an at least theoretical
risk of causing regressions. I therefore wonder whether that wouldn't
better be a separate change, not to be backported.
> @@ -703,7 +696,38 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
> * extended function.
> */
> if ( pdev->info.is_virtfn )
> - pdev->info.is_extfn = pf_is_extfn;
> + {
> + struct pci_dev *pf_pdev = pci_get_pdev(NULL,
> + PCI_SBDF(seg,
> + info->physfn.bus,
> + info->physfn.devfn));
> + struct pci_dev *vf_pdev;
const?
> + bool already_added = false;
> +
> + if ( !pf_pdev )
> + {
> + printk(XENLOG_WARNING
> + "Attempted to add SR-IOV device VF %pp without PF %pp\n",
I'd omit "device" here.
(These changes alone I'd be happy to do while committing.)
> + &pdev->sbdf,
> + &PCI_SBDF(seg, info->physfn.bus, info->physfn.devfn));
> + free_pdev(pseg, pdev);
> + ret = -ENODEV;
> + goto out;
> + }
> +
> + pdev->info.is_extfn = pf_pdev->info.is_extfn;
There's a comment related to this, partly visible in patch context above.
That comment imo needs moving here. And correcting while moving (it's
inverted imo, or at least worded ambiguously).
> + pdev->pf_pdev = pf_pdev;
> + list_for_each_entry(vf_pdev, &pf_pdev->vf_list, vf_list)
> + {
> + if ( vf_pdev == pdev )
> + {
> + already_added = true;
> + break;
> + }
> + }
> + if ( !already_added )
> + list_add(&pdev->vf_list, &pf_pdev->vf_list);
> + }
> }
Personally, as I have a dislike for excess variables, I'd have gotten away
without "already_added". Instead of setting it to true, vf_pdev could be
set to NULL. Others may, however, consider this "obfuscation" or alike.
Jan
On 11/14/24 05:34, Jan Beulich wrote:
> On 12.11.2024 21:53, Stewart Hildebrand wrote:
>> Add links between a VF's struct pci_dev and its associated PF struct
>> pci_dev.
>>
>> The hardware domain is expected to add a PF first before adding
>> associated VFs. Similarly, the hardware domain is expected to remove the
>> associated VFs before removing the PF. If adding/removing happens out of
>> order, print a warning and return an error. This means that VFs can only
>> exist with an associated PF.
>>
>> Additionally, if the hardware domain attempts to remove a PF with VFs
>> still present, mark the PF and VFs broken, because Linux Dom0 has been
>> observed to not respect the error returned.
>>
>> Move the call to pci_get_pdev() down to avoid dropping and re-acquiring
>> the pcidevs_lock(). Drop the call to pci_add_device() as it is invalid
>> to add a VF without an existing PF.
>>
>> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
>
> I'm okay with this, so in principle
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
Thanks, I very much appreciate it! However, is it appropriate for me to
pick up this tag considering the requested/proposed changes?
> A few comments nevertheless, which may result in there wanting to be
> another revision.
I'll plan to send v8. There were some minor comments from Roger on the
removed snippet that I will also address, and I have another proposed
change.
>> ---
>> Candidate for backport to 4.19 (the next patch depends on this one)
>
> With this dependency (we definitely want to backport the other patch) ...
>
>> v6->v7:
>> * cope with multiple invocations of pci_add_device for VFs
>> * get rid of enclosing struct for single member
>> * during PF removal attempt with VFs still present:
>> * keep PF
>> * mark broken
>> * don't unlink
>> * return error
>> * during VF add:
>> * initialize pf_pdev in declaration
>> * remove logic catering to adding VFs without PF
>
> ... I'd like to point out that this change has an at least theoretical
> risk of causing regressions. I therefore wonder whether that wouldn't
> better be a separate change, not to be backported.
That makes sense. I'll split it into a separate patch for the next rev.
>> @@ -703,7 +696,38 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
>> * extended function.
>> */
>> if ( pdev->info.is_virtfn )
>> - pdev->info.is_extfn = pf_is_extfn;
>> + {
>> + struct pci_dev *pf_pdev = pci_get_pdev(NULL,
>> + PCI_SBDF(seg,
>> + info->physfn.bus,
>> + info->physfn.devfn));
BTW, since I'm spinning another rev anyway, are there any opinions on
the indentation here?
>> + struct pci_dev *vf_pdev;
>
> const?
Yes, if it's still needed
>> + bool already_added = false;
>> +
>> + if ( !pf_pdev )
>> + {
>> + printk(XENLOG_WARNING
>> + "Attempted to add SR-IOV device VF %pp without PF %pp\n",
>
> I'd omit "device" here.
OK
> (These changes alone I'd be happy to do while committing.)
I'll include the changes in v8.
>> + &pdev->sbdf,
>> + &PCI_SBDF(seg, info->physfn.bus, info->physfn.devfn));
>> + free_pdev(pseg, pdev);
>> + ret = -ENODEV;
>> + goto out;
>> + }
>> +
>> + pdev->info.is_extfn = pf_pdev->info.is_extfn;
>
> There's a comment related to this, partly visible in patch context above.
> That comment imo needs moving here. And correcting while moving (it's
> inverted imo, or at least worded ambiguously).
I'll move it. As far as wording goes, I suggest:
/*
* PF's 'is_extfn' field indicates whether the VF is an extended
* function.
*/
>> + pdev->pf_pdev = pf_pdev;
>> + list_for_each_entry(vf_pdev, &pf_pdev->vf_list, vf_list)
>> + {
>> + if ( vf_pdev == pdev )
>> + {
>> + already_added = true;
>> + break;
>> + }
>> + }
>> + if ( !already_added )
>> + list_add(&pdev->vf_list, &pf_pdev->vf_list);
>> + }
>> }
>
> Personally, as I have a dislike for excess variables, I'd have gotten away
> without "already_added". Instead of setting it to true, vf_pdev could be
> set to NULL. Others may, however, consider this "obfuscation" or alike.
This relies on vf_pdev being set to non-NULL when the list is empty and
after the last iteration if the list doesn't contain the element. I had
to look up the definitions of list_for_each_entry, INIT_LIST_HEAD, and
list_{add,del,entry} to verify that vf_pdev would be non-NULL in those
cases.
Perhaps a better approach would be to introduce list_add_unique() in
Xen's list library? Then we can also get rid of the vf_pdev variable.
static inline bool list_contains(struct list_head *entry,
struct list_head *head)
{
struct list_head *ptr;
list_for_each(ptr, head)
{
if ( ptr == entry )
return true;
}
return false;
}
static inline void list_add_unique(struct list_head *new,
struct list_head *head)
{
if ( !list_contains(new, head) )
list_add(new, head);
}
On 14.11.2024 19:50, Stewart Hildebrand wrote:
> On 11/14/24 05:34, Jan Beulich wrote:
>> On 12.11.2024 21:53, Stewart Hildebrand wrote:
>>> Add links between a VF's struct pci_dev and its associated PF struct
>>> pci_dev.
>>>
>>> The hardware domain is expected to add a PF first before adding
>>> associated VFs. Similarly, the hardware domain is expected to remove the
>>> associated VFs before removing the PF. If adding/removing happens out of
>>> order, print a warning and return an error. This means that VFs can only
>>> exist with an associated PF.
>>>
>>> Additionally, if the hardware domain attempts to remove a PF with VFs
>>> still present, mark the PF and VFs broken, because Linux Dom0 has been
>>> observed to not respect the error returned.
>>>
>>> Move the call to pci_get_pdev() down to avoid dropping and re-acquiring
>>> the pcidevs_lock(). Drop the call to pci_add_device() as it is invalid
>>> to add a VF without an existing PF.
>>>
>>> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
>>
>> I'm okay with this, so in principle
>> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>
> Thanks, I very much appreciate it! However, is it appropriate for me to
> pick up this tag considering the requested/proposed changes?
In general if in doubt, leave it out. Here, since you're meaning to
make further changes, it certainly wants leaving out.
>>> @@ -703,7 +696,38 @@ int pci_add_device(u16 seg, u8 bus, u8 devfn,
>>> * extended function.
>>> */
>>> if ( pdev->info.is_virtfn )
>>> - pdev->info.is_extfn = pf_is_extfn;
>>> + {
>>> + struct pci_dev *pf_pdev = pci_get_pdev(NULL,
>>> + PCI_SBDF(seg,
>>> + info->physfn.bus,
>>> + info->physfn.devfn));
>
> BTW, since I'm spinning another rev anyway, are there any opinions on
> the indentation here?
Well, yes. Andrew's preferred (or so I think) way of laying this out
would (imo) certainly be better here:
struct pci_dev *pf_pdev =
pci_get_pdev(NULL,
PCI_SBDF(seg, info->physfn.bus,
info->physfn.devfn));
(with less line wrapping if stuff fits within 80 chars, which I didn't
specifically check).
>>> + &pdev->sbdf,
>>> + &PCI_SBDF(seg, info->physfn.bus, info->physfn.devfn));
>>> + free_pdev(pseg, pdev);
>>> + ret = -ENODEV;
>>> + goto out;
>>> + }
>>> +
>>> + pdev->info.is_extfn = pf_pdev->info.is_extfn;
>>
>> There's a comment related to this, partly visible in patch context above.
>> That comment imo needs moving here. And correcting while moving (it's
>> inverted imo, or at least worded ambiguously).
>
> I'll move it. As far as wording goes, I suggest:
>
> /*
> * PF's 'is_extfn' field indicates whether the VF is an extended
> * function.
> */
Or maybe "VF inherits its 'is_extfn' from PF"?
>>> + pdev->pf_pdev = pf_pdev;
>>> + list_for_each_entry(vf_pdev, &pf_pdev->vf_list, vf_list)
>>> + {
>>> + if ( vf_pdev == pdev )
>>> + {
>>> + already_added = true;
>>> + break;
>>> + }
>>> + }
>>> + if ( !already_added )
>>> + list_add(&pdev->vf_list, &pf_pdev->vf_list);
>>> + }
>>> }
>>
>> Personally, as I have a dislike for excess variables, I'd have gotten away
>> without "already_added". Instead of setting it to true, vf_pdev could be
>> set to NULL. Others may, however, consider this "obfuscation" or alike.
>
> This relies on vf_pdev being set to non-NULL when the list is empty and
> after the last iteration if the list doesn't contain the element. I had
> to look up the definitions of list_for_each_entry, INIT_LIST_HEAD, and
> list_{add,del,entry} to verify that vf_pdev would be non-NULL in those
> cases.
>
> Perhaps a better approach would be to introduce list_add_unique() in
> Xen's list library? Then we can also get rid of the vf_pdev variable.
>
> static inline bool list_contains(struct list_head *entry,
> struct list_head *head)
> {
> struct list_head *ptr;
>
> list_for_each(ptr, head)
> {
> if ( ptr == entry )
> return true;
> }
>
> return false;
> }
>
> static inline void list_add_unique(struct list_head *new,
> struct list_head *head)
> {
> if ( !list_contains(new, head) )
> list_add(new, head);
> }
I'm uncertain of this kind of an addition. For long lists one would need to
be careful with whether to actually use list_contains(). It being a simple
library function would make this easy to overlook.
Jan
On 11/15/24 02:55, Jan Beulich wrote:
> On 14.11.2024 19:50, Stewart Hildebrand wrote:
>> On 11/14/24 05:34, Jan Beulich wrote:
>>> On 12.11.2024 21:53, Stewart Hildebrand wrote:
>>>> + pdev->pf_pdev = pf_pdev;
>>>> + list_for_each_entry(vf_pdev, &pf_pdev->vf_list, vf_list)
>>>> + {
>>>> + if ( vf_pdev == pdev )
>>>> + {
>>>> + already_added = true;
>>>> + break;
>>>> + }
>>>> + }
>>>> + if ( !already_added )
>>>> + list_add(&pdev->vf_list, &pf_pdev->vf_list);
>>>> + }
>>>> }
>>>
>>> Personally, as I have a dislike for excess variables, I'd have gotten away
>>> without "already_added". Instead of setting it to true, vf_pdev could be
>>> set to NULL. Others may, however, consider this "obfuscation" or alike.
>>
>> This relies on vf_pdev being set to non-NULL when the list is empty and
>> after the last iteration if the list doesn't contain the element. I had
>> to look up the definitions of list_for_each_entry, INIT_LIST_HEAD, and
>> list_{add,del,entry} to verify that vf_pdev would be non-NULL in those
>> cases.
>>
>> Perhaps a better approach would be to introduce list_add_unique() in
>> Xen's list library? Then we can also get rid of the vf_pdev variable.
>>
>> static inline bool list_contains(struct list_head *entry,
>> struct list_head *head)
>> {
>> struct list_head *ptr;
>>
>> list_for_each(ptr, head)
>> {
>> if ( ptr == entry )
>> return true;
>> }
>>
>> return false;
>> }
>>
>> static inline void list_add_unique(struct list_head *new,
>> struct list_head *head)
>> {
>> if ( !list_contains(new, head) )
>> list_add(new, head);
>> }
>
> I'm uncertain of this kind of an addition. For long lists one would need to
> be careful with whether to actually use list_contains(). It being a simple
> library function would make this easy to overlook.
It occurs to me I could simply check if pdev->pf_pdev has been initialized:
if ( !pdev->pf_pdev )
list_add(&pdev->vf_list, &pf_pdev->vf_list);
pdev->pf_pdev = pf_pdev;
© 2016 - 2025 Red Hat, Inc.