When MSI-X initialization fails vPCI will hide the capability, but
remove of handlers and data won't be performed until the device is
deassigned. Introduce a MSI-X cleanup hook that will be called when
initialization fails to cleanup MSI-X related hooks and free it's
associated data.
As all supported capabilities have been switched to use the cleanup
hooks call those from vpci_deassign_device() instead of open-code the
capability specific cleanup in there.
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
cc: "Roger Pau Monné" <roger.pau@citrix.com>
---
v8->v9 changes:
* Modify commit message.
* Call cleanup_msix() in vpci_deassign_device() to remove the open-code to cleanup msix datas.
* In cleanup_msix(), move "list_del(&vpci->msix->next);" above for loop of iounmap msix tables.
v7->v8 changes:
* Given the code in vpci_remove_registers() an error in the removal of
registers would likely imply memory corruption, at which point it's
best to fully disable the device. So, Rollback the last two modifications of v7.
v6->v7 changes:
* Change the pointer parameter of cleanup_msix() to be const.
* When vpci_remove_registers() in cleanup_msix() fails, not to return
directly, instead try to free msix and re-add ctrl handler.
* Pass pdev->vpci into vpci_add_register() instead of pdev->vpci->msix in
init_msix() since we need that every handler realize that msix is NULL
when msix is freed but handlers are still in there.
v5->v6 changes:
* Change the logic to add dummy handler when !vpci->msix in cleanup_msix().
v4->v5 changes:
* Change definition "static void cleanup_msix" to "static int cf_check cleanup_msix"
since cleanup hook is changed to be int.
* Add a read-only register for MSIX Control Register in the end of cleanup_msix().
v3->v4 changes:
* Change function name from fini_msix() to cleanup_msix().
* Change to use XFREE to free vpci->msix.
* In cleanup function, change the sequence of check and remove action according to
init_msix().
v2->v3 changes:
* Remove unnecessary clean operations in fini_msix().
v1->v2 changes:
new patch.
Best regards,
Jiqian Chen.
---
xen/drivers/vpci/msix.c | 44 ++++++++++++++++++++++++++++++++++++++++-
xen/drivers/vpci/vpci.c | 16 +++++++--------
xen/include/xen/vpci.h | 2 ++
3 files changed, 53 insertions(+), 9 deletions(-)
diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
index 54a5070733aa..8ee315eb928c 100644
--- a/xen/drivers/vpci/msix.c
+++ b/xen/drivers/vpci/msix.c
@@ -655,6 +655,48 @@ int vpci_make_msix_hole(const struct pci_dev *pdev)
return 0;
}
+int cleanup_msix(const struct pci_dev *pdev)
+{
+ int rc;
+ struct vpci *vpci = pdev->vpci;
+ const unsigned int msix_pos = pdev->msix_pos;
+
+ if ( !msix_pos )
+ return 0;
+
+ rc = vpci_remove_registers(vpci, msix_control_reg(msix_pos), 2);
+ if ( rc )
+ {
+ printk(XENLOG_ERR "%pd %pp: fail to remove MSIX handlers rc=%d\n",
+ pdev->domain, &pdev->sbdf, rc);
+ ASSERT_UNREACHABLE();
+ return rc;
+ }
+
+ if ( vpci->msix )
+ {
+ list_del(&vpci->msix->next);
+ for ( unsigned int i = 0; i < ARRAY_SIZE(vpci->msix->table); i++ )
+ if ( vpci->msix->table[i] )
+ iounmap(vpci->msix->table[i]);
+
+ XFREE(vpci->msix);
+ }
+
+ /*
+ * The driver may not traverse the capability list and think device
+ * supports MSIX by default. So here let the control register of MSIX
+ * be Read-Only is to ensure MSIX disabled.
+ */
+ rc = vpci_add_register(vpci, vpci_hw_read16, NULL,
+ msix_control_reg(msix_pos), 2, NULL);
+ if ( rc )
+ printk(XENLOG_ERR "%pd %pp: fail to add MSIX ctrl handler rc=%d\n",
+ pdev->domain, &pdev->sbdf, rc);
+
+ return rc;
+}
+
static int cf_check init_msix(struct pci_dev *pdev)
{
struct domain *d = pdev->domain;
@@ -710,7 +752,7 @@ static int cf_check init_msix(struct pci_dev *pdev)
*/
return vpci_make_msix_hole(pdev);
}
-REGISTER_VPCI_CAP(MSIX, init_msix, NULL);
+REGISTER_VPCI_CAP(MSIX, init_msix, cleanup_msix);
/*
* Local variables:
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 4b8e6b28bd07..258356019535 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -321,6 +321,14 @@ void vpci_deassign_device(struct pci_dev *pdev)
&pdev->domain->vpci_dev_assigned_map);
#endif
+ if ( pdev->vpci->msix )
+ {
+ int rc = cleanup_msix(pdev);
+ if ( rc )
+ printk(XENLOG_ERR "%pd %pp: fail to cleanup MSIX datas rc=%d\n",
+ pdev->domain, &pdev->sbdf, rc);
+ }
+
spin_lock(&pdev->vpci->lock);
while ( !list_empty(&pdev->vpci->handlers) )
{
@@ -332,18 +340,10 @@ void vpci_deassign_device(struct pci_dev *pdev)
xfree(r);
}
spin_unlock(&pdev->vpci->lock);
- if ( pdev->vpci->msix )
- {
- list_del(&pdev->vpci->msix->next);
- for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->table); i++ )
- if ( pdev->vpci->msix->table[i] )
- iounmap(pdev->vpci->msix->table[i]);
- }
for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
rangeset_destroy(pdev->vpci->header.bars[i].mem);
- xfree(pdev->vpci->msix);
xfree(pdev->vpci->msi);
xfree(pdev->vpci);
pdev->vpci = NULL;
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 514a0ce39133..6f9c7b6fb38f 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -220,6 +220,8 @@ void vpci_dump_msi(void);
/* Make sure there's a hole in the p2m for the MSIX mmio areas. */
int vpci_make_msix_hole(const struct pci_dev *pdev);
+int __must_check cleanup_msix(const struct pci_dev *pdev);
+
/* Arch-specific vPCI MSI helpers. */
void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
unsigned int entry, bool mask);
--
2.34.1
On Mon, Jul 28, 2025 at 01:04:01PM +0800, Jiqian Chen wrote:
> When MSI-X initialization fails vPCI will hide the capability, but
> remove of handlers and data won't be performed until the device is
> deassigned. Introduce a MSI-X cleanup hook that will be called when
> initialization fails to cleanup MSI-X related hooks and free it's
> associated data.
>
> As all supported capabilities have been switched to use the cleanup
> hooks call those from vpci_deassign_device() instead of open-code the
> capability specific cleanup in there.
>
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---
> cc: "Roger Pau Monné" <roger.pau@citrix.com>
> ---
> v8->v9 changes:
> * Modify commit message.
> * Call cleanup_msix() in vpci_deassign_device() to remove the open-code to cleanup msix datas.
> * In cleanup_msix(), move "list_del(&vpci->msix->next);" above for loop of iounmap msix tables.
>
> v7->v8 changes:
> * Given the code in vpci_remove_registers() an error in the removal of
> registers would likely imply memory corruption, at which point it's
> best to fully disable the device. So, Rollback the last two modifications of v7.
>
> v6->v7 changes:
> * Change the pointer parameter of cleanup_msix() to be const.
> * When vpci_remove_registers() in cleanup_msix() fails, not to return
> directly, instead try to free msix and re-add ctrl handler.
> * Pass pdev->vpci into vpci_add_register() instead of pdev->vpci->msix in
> init_msix() since we need that every handler realize that msix is NULL
> when msix is freed but handlers are still in there.
>
> v5->v6 changes:
> * Change the logic to add dummy handler when !vpci->msix in cleanup_msix().
>
> v4->v5 changes:
> * Change definition "static void cleanup_msix" to "static int cf_check cleanup_msix"
> since cleanup hook is changed to be int.
> * Add a read-only register for MSIX Control Register in the end of cleanup_msix().
>
> v3->v4 changes:
> * Change function name from fini_msix() to cleanup_msix().
> * Change to use XFREE to free vpci->msix.
> * In cleanup function, change the sequence of check and remove action according to
> init_msix().
>
> v2->v3 changes:
> * Remove unnecessary clean operations in fini_msix().
>
> v1->v2 changes:
> new patch.
>
> Best regards,
> Jiqian Chen.
> ---
> xen/drivers/vpci/msix.c | 44 ++++++++++++++++++++++++++++++++++++++++-
> xen/drivers/vpci/vpci.c | 16 +++++++--------
> xen/include/xen/vpci.h | 2 ++
> 3 files changed, 53 insertions(+), 9 deletions(-)
>
> diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
> index 54a5070733aa..8ee315eb928c 100644
> --- a/xen/drivers/vpci/msix.c
> +++ b/xen/drivers/vpci/msix.c
> @@ -655,6 +655,48 @@ int vpci_make_msix_hole(const struct pci_dev *pdev)
> return 0;
> }
>
> +int cleanup_msix(const struct pci_dev *pdev)
> +{
> + int rc;
> + struct vpci *vpci = pdev->vpci;
> + const unsigned int msix_pos = pdev->msix_pos;
> +
> + if ( !msix_pos )
> + return 0;
> +
> + rc = vpci_remove_registers(vpci, msix_control_reg(msix_pos), 2);
> + if ( rc )
> + {
> + printk(XENLOG_ERR "%pd %pp: fail to remove MSIX handlers rc=%d\n",
> + pdev->domain, &pdev->sbdf, rc);
> + ASSERT_UNREACHABLE();
> + return rc;
> + }
> +
> + if ( vpci->msix )
> + {
> + list_del(&vpci->msix->next);
> + for ( unsigned int i = 0; i < ARRAY_SIZE(vpci->msix->table); i++ )
> + if ( vpci->msix->table[i] )
> + iounmap(vpci->msix->table[i]);
> +
> + XFREE(vpci->msix);
> + }
> +
> + /*
> + * The driver may not traverse the capability list and think device
> + * supports MSIX by default. So here let the control register of MSIX
> + * be Read-Only is to ensure MSIX disabled.
> + */
> + rc = vpci_add_register(vpci, vpci_hw_read16, NULL,
> + msix_control_reg(msix_pos), 2, NULL);
> + if ( rc )
> + printk(XENLOG_ERR "%pd %pp: fail to add MSIX ctrl handler rc=%d\n",
> + pdev->domain, &pdev->sbdf, rc);
> +
> + return rc;
> +}
> +
> static int cf_check init_msix(struct pci_dev *pdev)
> {
> struct domain *d = pdev->domain;
> @@ -710,7 +752,7 @@ static int cf_check init_msix(struct pci_dev *pdev)
> */
> return vpci_make_msix_hole(pdev);
> }
> -REGISTER_VPCI_CAP(MSIX, init_msix, NULL);
> +REGISTER_VPCI_CAP(MSIX, init_msix, cleanup_msix);
>
> /*
> * Local variables:
> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> index 4b8e6b28bd07..258356019535 100644
> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -321,6 +321,14 @@ void vpci_deassign_device(struct pci_dev *pdev)
> &pdev->domain->vpci_dev_assigned_map);
> #endif
>
> + if ( pdev->vpci->msix )
> + {
> + int rc = cleanup_msix(pdev);
> + if ( rc )
> + printk(XENLOG_ERR "%pd %pp: fail to cleanup MSIX datas rc=%d\n",
> + pdev->domain, &pdev->sbdf, rc);
> + }
> +
> spin_lock(&pdev->vpci->lock);
> while ( !list_empty(&pdev->vpci->handlers) )
> {
> @@ -332,18 +340,10 @@ void vpci_deassign_device(struct pci_dev *pdev)
> xfree(r);
> }
> spin_unlock(&pdev->vpci->lock);
> - if ( pdev->vpci->msix )
> - {
> - list_del(&pdev->vpci->msix->next);
> - for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->table); i++ )
> - if ( pdev->vpci->msix->table[i] )
> - iounmap(pdev->vpci->msix->table[i]);
> - }
>
> for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
> rangeset_destroy(pdev->vpci->header.bars[i].mem);
>
> - xfree(pdev->vpci->msix);
Oh, I'm afraid this is not what I was expecting. You should call all
the cleanup hooks here, so that you can also remove the vpci->msi
xfree() (and any future ones). You want a loop over the array of
registered vpci_capability_t and call all the defined cleanup()
methods against the deassigned device IMO.
That avoids having to reference any specific capability here, and new
capabilities will only need to implement a cleanup handler without
having to modify vpci_deassign_device(). You won't need to export
cleanup_msix() either, which is ugly.
Thanks, Roger.
On 2025/7/30 00:36, Roger Pau Monné wrote:
> On Mon, Jul 28, 2025 at 01:04:01PM +0800, Jiqian Chen wrote:
>> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
>> index 4b8e6b28bd07..258356019535 100644
>> --- a/xen/drivers/vpci/vpci.c
>> +++ b/xen/drivers/vpci/vpci.c
>> @@ -321,6 +321,14 @@ void vpci_deassign_device(struct pci_dev *pdev)
>> &pdev->domain->vpci_dev_assigned_map);
>> #endif
>>
>> + if ( pdev->vpci->msix )
>> + {
>> + int rc = cleanup_msix(pdev);
>> + if ( rc )
>> + printk(XENLOG_ERR "%pd %pp: fail to cleanup MSIX datas rc=%d\n",
>> + pdev->domain, &pdev->sbdf, rc);
>> + }
>> +
>> spin_lock(&pdev->vpci->lock);
>> while ( !list_empty(&pdev->vpci->handlers) )
>> {
>> @@ -332,18 +340,10 @@ void vpci_deassign_device(struct pci_dev *pdev)
>> xfree(r);
>> }
>> spin_unlock(&pdev->vpci->lock);
>> - if ( pdev->vpci->msix )
>> - {
>> - list_del(&pdev->vpci->msix->next);
>> - for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->table); i++ )
>> - if ( pdev->vpci->msix->table[i] )
>> - iounmap(pdev->vpci->msix->table[i]);
>> - }
>>
>> for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
>> rangeset_destroy(pdev->vpci->header.bars[i].mem);
>>
>> - xfree(pdev->vpci->msix);
>
> Oh, I'm afraid this is not what I was expecting. You should call all
> the cleanup hooks here, so that you can also remove the vpci->msi
> xfree() (and any future ones). You want a loop over the array of
> registered vpci_capability_t and call all the defined cleanup()
> methods against the deassigned device IMO.
Oh, sorry to misunderstand.
Will change.
>
> That avoids having to reference any specific capability here, and new
> capabilities will only need to implement a cleanup handler without
> having to modify vpci_deassign_device(). You won't need to export
> cleanup_msix() either, which is ugly.
>
> Thanks, Roger.
--
Best regards,
Jiqian Chen.
© 2016 - 2025 Red Hat, Inc.