[PATCH v9 8/8] vpci/msix: Free MSIX resources when init_msix() fails

Jiqian Chen posted 8 patches 3 months ago
There is a newer version of this series
[PATCH v9 8/8] vpci/msix: Free MSIX resources when init_msix() fails
Posted by Jiqian Chen 3 months ago
When MSI-X initialization fails vPCI will hide the capability, but
remove of handlers and data won't be performed until the device is
deassigned.  Introduce a MSI-X cleanup hook that will be called when
initialization fails to cleanup MSI-X related hooks and free it's
associated data.

As all supported capabilities have been switched to use the cleanup
hooks call those from vpci_deassign_device() instead of open-code the
capability specific cleanup in there.

Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
cc: "Roger Pau Monné" <roger.pau@citrix.com>
---
v8->v9 changes:
* Modify commit message.
* Call cleanup_msix() in vpci_deassign_device() to remove the open-code to cleanup msix datas.
* In cleanup_msix(), move "list_del(&vpci->msix->next);" above for loop of iounmap msix tables.

v7->v8 changes:
* Given the code in vpci_remove_registers() an error in the removal of
  registers would likely imply memory corruption, at which point it's
  best to fully disable the device. So, Rollback the last two modifications of v7.

v6->v7 changes:
* Change the pointer parameter of cleanup_msix() to be const.
* When vpci_remove_registers() in cleanup_msix() fails, not to return
  directly, instead try to free msix and re-add ctrl handler.
* Pass pdev->vpci into vpci_add_register() instead of pdev->vpci->msix in
  init_msix() since we need that every handler realize that msix is NULL
  when msix is freed but handlers are still in there.

v5->v6 changes:
* Change the logic to add dummy handler when !vpci->msix in cleanup_msix().

v4->v5 changes:
* Change definition "static void cleanup_msix" to "static int cf_check cleanup_msix"
  since cleanup hook is changed to be int.
* Add a read-only register for MSIX Control Register in the end of cleanup_msix().

v3->v4 changes:
* Change function name from fini_msix() to cleanup_msix().
* Change to use XFREE to free vpci->msix.
* In cleanup function, change the sequence of check and remove action according to
  init_msix().

v2->v3 changes:
* Remove unnecessary clean operations in fini_msix().

v1->v2 changes:
new patch.

Best regards,
Jiqian Chen.
---
 xen/drivers/vpci/msix.c | 44 ++++++++++++++++++++++++++++++++++++++++-
 xen/drivers/vpci/vpci.c | 16 +++++++--------
 xen/include/xen/vpci.h  |  2 ++
 3 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
index 54a5070733aa..8ee315eb928c 100644
--- a/xen/drivers/vpci/msix.c
+++ b/xen/drivers/vpci/msix.c
@@ -655,6 +655,48 @@ int vpci_make_msix_hole(const struct pci_dev *pdev)
     return 0;
 }
 
+int cleanup_msix(const struct pci_dev *pdev)
+{
+    int rc;
+    struct vpci *vpci = pdev->vpci;
+    const unsigned int msix_pos = pdev->msix_pos;
+
+    if ( !msix_pos )
+        return 0;
+
+    rc = vpci_remove_registers(vpci, msix_control_reg(msix_pos), 2);
+    if ( rc )
+    {
+        printk(XENLOG_ERR "%pd %pp: fail to remove MSIX handlers rc=%d\n",
+               pdev->domain, &pdev->sbdf, rc);
+        ASSERT_UNREACHABLE();
+        return rc;
+    }
+
+    if ( vpci->msix )
+    {
+        list_del(&vpci->msix->next);
+        for ( unsigned int i = 0; i < ARRAY_SIZE(vpci->msix->table); i++ )
+            if ( vpci->msix->table[i] )
+                iounmap(vpci->msix->table[i]);
+
+        XFREE(vpci->msix);
+    }
+
+    /*
+     * The driver may not traverse the capability list and think device
+     * supports MSIX by default. So here let the control register of MSIX
+     * be Read-Only is to ensure MSIX disabled.
+     */
+    rc = vpci_add_register(vpci, vpci_hw_read16, NULL,
+                           msix_control_reg(msix_pos), 2, NULL);
+    if ( rc )
+        printk(XENLOG_ERR "%pd %pp: fail to add MSIX ctrl handler rc=%d\n",
+               pdev->domain, &pdev->sbdf, rc);
+
+    return rc;
+}
+
 static int cf_check init_msix(struct pci_dev *pdev)
 {
     struct domain *d = pdev->domain;
@@ -710,7 +752,7 @@ static int cf_check init_msix(struct pci_dev *pdev)
      */
     return vpci_make_msix_hole(pdev);
 }
-REGISTER_VPCI_CAP(MSIX, init_msix, NULL);
+REGISTER_VPCI_CAP(MSIX, init_msix, cleanup_msix);
 
 /*
  * Local variables:
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 4b8e6b28bd07..258356019535 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -321,6 +321,14 @@ void vpci_deassign_device(struct pci_dev *pdev)
                     &pdev->domain->vpci_dev_assigned_map);
 #endif
 
+    if ( pdev->vpci->msix )
+    {
+        int rc = cleanup_msix(pdev);
+        if ( rc )
+            printk(XENLOG_ERR "%pd %pp: fail to cleanup MSIX datas rc=%d\n",
+                   pdev->domain, &pdev->sbdf, rc);
+    }
+
     spin_lock(&pdev->vpci->lock);
     while ( !list_empty(&pdev->vpci->handlers) )
     {
@@ -332,18 +340,10 @@ void vpci_deassign_device(struct pci_dev *pdev)
         xfree(r);
     }
     spin_unlock(&pdev->vpci->lock);
-    if ( pdev->vpci->msix )
-    {
-        list_del(&pdev->vpci->msix->next);
-        for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->table); i++ )
-            if ( pdev->vpci->msix->table[i] )
-                iounmap(pdev->vpci->msix->table[i]);
-    }
 
     for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
         rangeset_destroy(pdev->vpci->header.bars[i].mem);
 
-    xfree(pdev->vpci->msix);
     xfree(pdev->vpci->msi);
     xfree(pdev->vpci);
     pdev->vpci = NULL;
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 514a0ce39133..6f9c7b6fb38f 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -220,6 +220,8 @@ void vpci_dump_msi(void);
 /* Make sure there's a hole in the p2m for the MSIX mmio areas. */
 int vpci_make_msix_hole(const struct pci_dev *pdev);
 
+int __must_check cleanup_msix(const struct pci_dev *pdev);
+
 /* Arch-specific vPCI MSI helpers. */
 void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev,
                         unsigned int entry, bool mask);
-- 
2.34.1


Re: [PATCH v9 8/8] vpci/msix: Free MSIX resources when init_msix() fails
Posted by Roger Pau Monné 3 months ago
On Mon, Jul 28, 2025 at 01:04:01PM +0800, Jiqian Chen wrote:
> When MSI-X initialization fails vPCI will hide the capability, but
> remove of handlers and data won't be performed until the device is
> deassigned.  Introduce a MSI-X cleanup hook that will be called when
> initialization fails to cleanup MSI-X related hooks and free it's
> associated data.
> 
> As all supported capabilities have been switched to use the cleanup
> hooks call those from vpci_deassign_device() instead of open-code the
> capability specific cleanup in there.
> 
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---
> cc: "Roger Pau Monné" <roger.pau@citrix.com>
> ---
> v8->v9 changes:
> * Modify commit message.
> * Call cleanup_msix() in vpci_deassign_device() to remove the open-code to cleanup msix datas.
> * In cleanup_msix(), move "list_del(&vpci->msix->next);" above for loop of iounmap msix tables.
> 
> v7->v8 changes:
> * Given the code in vpci_remove_registers() an error in the removal of
>   registers would likely imply memory corruption, at which point it's
>   best to fully disable the device. So, Rollback the last two modifications of v7.
> 
> v6->v7 changes:
> * Change the pointer parameter of cleanup_msix() to be const.
> * When vpci_remove_registers() in cleanup_msix() fails, not to return
>   directly, instead try to free msix and re-add ctrl handler.
> * Pass pdev->vpci into vpci_add_register() instead of pdev->vpci->msix in
>   init_msix() since we need that every handler realize that msix is NULL
>   when msix is freed but handlers are still in there.
> 
> v5->v6 changes:
> * Change the logic to add dummy handler when !vpci->msix in cleanup_msix().
> 
> v4->v5 changes:
> * Change definition "static void cleanup_msix" to "static int cf_check cleanup_msix"
>   since cleanup hook is changed to be int.
> * Add a read-only register for MSIX Control Register in the end of cleanup_msix().
> 
> v3->v4 changes:
> * Change function name from fini_msix() to cleanup_msix().
> * Change to use XFREE to free vpci->msix.
> * In cleanup function, change the sequence of check and remove action according to
>   init_msix().
> 
> v2->v3 changes:
> * Remove unnecessary clean operations in fini_msix().
> 
> v1->v2 changes:
> new patch.
> 
> Best regards,
> Jiqian Chen.
> ---
>  xen/drivers/vpci/msix.c | 44 ++++++++++++++++++++++++++++++++++++++++-
>  xen/drivers/vpci/vpci.c | 16 +++++++--------
>  xen/include/xen/vpci.h  |  2 ++
>  3 files changed, 53 insertions(+), 9 deletions(-)
> 
> diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
> index 54a5070733aa..8ee315eb928c 100644
> --- a/xen/drivers/vpci/msix.c
> +++ b/xen/drivers/vpci/msix.c
> @@ -655,6 +655,48 @@ int vpci_make_msix_hole(const struct pci_dev *pdev)
>      return 0;
>  }
>  
> +int cleanup_msix(const struct pci_dev *pdev)
> +{
> +    int rc;
> +    struct vpci *vpci = pdev->vpci;
> +    const unsigned int msix_pos = pdev->msix_pos;
> +
> +    if ( !msix_pos )
> +        return 0;
> +
> +    rc = vpci_remove_registers(vpci, msix_control_reg(msix_pos), 2);
> +    if ( rc )
> +    {
> +        printk(XENLOG_ERR "%pd %pp: fail to remove MSIX handlers rc=%d\n",
> +               pdev->domain, &pdev->sbdf, rc);
> +        ASSERT_UNREACHABLE();
> +        return rc;
> +    }
> +
> +    if ( vpci->msix )
> +    {
> +        list_del(&vpci->msix->next);
> +        for ( unsigned int i = 0; i < ARRAY_SIZE(vpci->msix->table); i++ )
> +            if ( vpci->msix->table[i] )
> +                iounmap(vpci->msix->table[i]);
> +
> +        XFREE(vpci->msix);
> +    }
> +
> +    /*
> +     * The driver may not traverse the capability list and think device
> +     * supports MSIX by default. So here let the control register of MSIX
> +     * be Read-Only is to ensure MSIX disabled.
> +     */
> +    rc = vpci_add_register(vpci, vpci_hw_read16, NULL,
> +                           msix_control_reg(msix_pos), 2, NULL);
> +    if ( rc )
> +        printk(XENLOG_ERR "%pd %pp: fail to add MSIX ctrl handler rc=%d\n",
> +               pdev->domain, &pdev->sbdf, rc);
> +
> +    return rc;
> +}
> +
>  static int cf_check init_msix(struct pci_dev *pdev)
>  {
>      struct domain *d = pdev->domain;
> @@ -710,7 +752,7 @@ static int cf_check init_msix(struct pci_dev *pdev)
>       */
>      return vpci_make_msix_hole(pdev);
>  }
> -REGISTER_VPCI_CAP(MSIX, init_msix, NULL);
> +REGISTER_VPCI_CAP(MSIX, init_msix, cleanup_msix);
>  
>  /*
>   * Local variables:
> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> index 4b8e6b28bd07..258356019535 100644
> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -321,6 +321,14 @@ void vpci_deassign_device(struct pci_dev *pdev)
>                      &pdev->domain->vpci_dev_assigned_map);
>  #endif
>  
> +    if ( pdev->vpci->msix )
> +    {
> +        int rc = cleanup_msix(pdev);
> +        if ( rc )
> +            printk(XENLOG_ERR "%pd %pp: fail to cleanup MSIX datas rc=%d\n",
> +                   pdev->domain, &pdev->sbdf, rc);
> +    }
> +
>      spin_lock(&pdev->vpci->lock);
>      while ( !list_empty(&pdev->vpci->handlers) )
>      {
> @@ -332,18 +340,10 @@ void vpci_deassign_device(struct pci_dev *pdev)
>          xfree(r);
>      }
>      spin_unlock(&pdev->vpci->lock);
> -    if ( pdev->vpci->msix )
> -    {
> -        list_del(&pdev->vpci->msix->next);
> -        for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->table); i++ )
> -            if ( pdev->vpci->msix->table[i] )
> -                iounmap(pdev->vpci->msix->table[i]);
> -    }
>  
>      for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
>          rangeset_destroy(pdev->vpci->header.bars[i].mem);
>  
> -    xfree(pdev->vpci->msix);

Oh, I'm afraid this is not what I was expecting.  You should call all
the cleanup hooks here, so that you can also remove the vpci->msi
xfree() (and any future ones).  You want a loop over the array of
registered vpci_capability_t and call all the defined cleanup()
methods against the deassigned device IMO.

That avoids having to reference any specific capability here, and new
capabilities will only need to implement a cleanup handler without
having to modify vpci_deassign_device().  You won't need to export
cleanup_msix() either, which is ugly.

Thanks, Roger.

Re: [PATCH v9 8/8] vpci/msix: Free MSIX resources when init_msix() fails
Posted by Chen, Jiqian 3 months ago
On 2025/7/30 00:36, Roger Pau Monné wrote:
> On Mon, Jul 28, 2025 at 01:04:01PM +0800, Jiqian Chen wrote:
>> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
>> index 4b8e6b28bd07..258356019535 100644
>> --- a/xen/drivers/vpci/vpci.c
>> +++ b/xen/drivers/vpci/vpci.c
>> @@ -321,6 +321,14 @@ void vpci_deassign_device(struct pci_dev *pdev)
>>                      &pdev->domain->vpci_dev_assigned_map);
>>  #endif
>>  
>> +    if ( pdev->vpci->msix )
>> +    {
>> +        int rc = cleanup_msix(pdev);
>> +        if ( rc )
>> +            printk(XENLOG_ERR "%pd %pp: fail to cleanup MSIX datas rc=%d\n",
>> +                   pdev->domain, &pdev->sbdf, rc);
>> +    }
>> +
>>      spin_lock(&pdev->vpci->lock);
>>      while ( !list_empty(&pdev->vpci->handlers) )
>>      {
>> @@ -332,18 +340,10 @@ void vpci_deassign_device(struct pci_dev *pdev)
>>          xfree(r);
>>      }
>>      spin_unlock(&pdev->vpci->lock);
>> -    if ( pdev->vpci->msix )
>> -    {
>> -        list_del(&pdev->vpci->msix->next);
>> -        for ( i = 0; i < ARRAY_SIZE(pdev->vpci->msix->table); i++ )
>> -            if ( pdev->vpci->msix->table[i] )
>> -                iounmap(pdev->vpci->msix->table[i]);
>> -    }
>>  
>>      for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
>>          rangeset_destroy(pdev->vpci->header.bars[i].mem);
>>  
>> -    xfree(pdev->vpci->msix);
> 
> Oh, I'm afraid this is not what I was expecting.  You should call all
> the cleanup hooks here, so that you can also remove the vpci->msi
> xfree() (and any future ones).  You want a loop over the array of
> registered vpci_capability_t and call all the defined cleanup()
> methods against the deassigned device IMO.
Oh, sorry to misunderstand.
Will change.

> 
> That avoids having to reference any specific capability here, and new
> capabilities will only need to implement a cleanup handler without
> having to modify vpci_deassign_device().  You won't need to export
> cleanup_msix() either, which is ugly.
> 
> Thanks, Roger.

-- 
Best regards,
Jiqian Chen.