[PATCH v7 7/8] vpci/msi: Free MSI resources when init_msi() fails

Jiqian Chen posted 8 patches 3 months, 4 weeks ago
There is a newer version of this series
[PATCH v7 7/8] vpci/msi: Free MSI resources when init_msi() fails
Posted by Jiqian Chen 3 months, 4 weeks ago
When init_msi() fails, current logic return fail and free MSI-related
resources in vpci_deassign_device(). But the previous new changes will
hide MSI capability and return success, it can't reach
vpci_deassign_device() to remove resources if hiding success, so those
resources must be removed in cleanup function of MSI.

To do that, implement cleanup function for MSI.

Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
cc: "Roger Pau Monné" <roger.pau@citrix.com>
---
v6->v7 changes:
* Change the pointer parameter of cleanup_msi() to be const.
* When vpci_remove_registers() in cleanup_msi() fails, not to return
  directly, instead try to free msi and re-add ctrl handler.
* Pass pdev->vpci into vpci_add_register() instead of pdev->vpci->msi in
  init_msi() since we need that every handler realize that msi is NULL
  when msi is free but handlers are still in there.

v5->v6 changes:
No.

v4->v5 changes:
* Change definition "static void cleanup_msi" to "static int cf_check cleanup_msi"
  since cleanup hook is changed to be int.
* Add a read-only register for MSI Control Register in the end of cleanup_msi.

v3->v4 changes:
* Change function name from fini_msi() to cleanup_msi().
* Remove unnecessary comment.
* Change to use XFREE to free vpci->msi.

v2->v3 changes:
* Remove all fail path, and use fini_msi() hook instead.
* Change the method to calculating the size of msi registers.

v1->v2 changes:
* Added a new function fini_msi to free all MSI resources instead of using an array
  to record registered registers.

Best regards,
Jiqian Chen.
---
 xen/drivers/vpci/msi.c | 111 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 94 insertions(+), 17 deletions(-)

diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
index c3eba4e14870..09b91a685df5 100644
--- a/xen/drivers/vpci/msi.c
+++ b/xen/drivers/vpci/msi.c
@@ -25,7 +25,11 @@
 static uint32_t cf_check control_read(
     const struct pci_dev *pdev, unsigned int reg, void *data)
 {
-    const struct vpci_msi *msi = data;
+    const struct vpci *vpci = data;
+    const struct vpci_msi *msi = vpci->msi;
+
+    if ( !msi )
+        return pci_conf_read16(pdev->sbdf, reg);
 
     return MASK_INSR(fls(pdev->msi_maxvec) - 1, PCI_MSI_FLAGS_QMASK) |
            MASK_INSR(fls(msi->vectors) - 1, PCI_MSI_FLAGS_QSIZE) |
@@ -37,12 +41,16 @@ static uint32_t cf_check control_read(
 static void cf_check control_write(
     const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
 {
-    struct vpci_msi *msi = data;
+    struct vpci *vpci = data;
+    struct vpci_msi *msi = vpci->msi;
     unsigned int vectors = min_t(uint8_t,
                                  1u << MASK_EXTR(val, PCI_MSI_FLAGS_QSIZE),
                                  pdev->msi_maxvec);
     bool new_enabled = val & PCI_MSI_FLAGS_ENABLE;
 
+    if ( !msi )
+        return;
+
     /*
      * No change if the enable field and the number of vectors is
      * the same or the device is not enabled, in which case the
@@ -101,7 +109,11 @@ static void update_msi(const struct pci_dev *pdev, struct vpci_msi *msi)
 static uint32_t cf_check address_read(
     const struct pci_dev *pdev, unsigned int reg, void *data)
 {
-    const struct vpci_msi *msi = data;
+    const struct vpci *vpci = data;
+    const struct vpci_msi *msi = vpci->msi;
+
+    if ( !msi )
+        return ~(uint32_t)0;
 
     return msi->address;
 }
@@ -109,7 +121,11 @@ static uint32_t cf_check address_read(
 static void cf_check address_write(
     const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
 {
-    struct vpci_msi *msi = data;
+    struct vpci *vpci = data;
+    struct vpci_msi *msi = vpci->msi;
+
+    if ( !msi )
+        return;
 
     /* Clear low part. */
     msi->address &= ~0xffffffffULL;
@@ -122,7 +138,11 @@ static void cf_check address_write(
 static uint32_t cf_check address_hi_read(
     const struct pci_dev *pdev, unsigned int reg, void *data)
 {
-    const struct vpci_msi *msi = data;
+    const struct vpci *vpci = data;
+    const struct vpci_msi *msi = vpci->msi;
+
+    if ( !msi )
+        return ~(uint32_t)0;
 
     return msi->address >> 32;
 }
@@ -130,7 +150,11 @@ static uint32_t cf_check address_hi_read(
 static void cf_check address_hi_write(
     const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
 {
-    struct vpci_msi *msi = data;
+    struct vpci *vpci = data;
+    struct vpci_msi *msi = vpci->msi;
+
+    if ( !msi )
+        return;
 
     /* Clear and update high part. */
     msi->address  = (uint32_t)msi->address;
@@ -143,7 +167,11 @@ static void cf_check address_hi_write(
 static uint32_t cf_check data_read(
     const struct pci_dev *pdev, unsigned int reg, void *data)
 {
-    const struct vpci_msi *msi = data;
+    const struct vpci *vpci = data;
+    const struct vpci_msi *msi = vpci->msi;
+
+    if ( !msi )
+        return ~(uint32_t)0;
 
     return msi->data;
 }
@@ -151,7 +179,11 @@ static uint32_t cf_check data_read(
 static void cf_check data_write(
     const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
 {
-    struct vpci_msi *msi = data;
+    struct vpci *vpci = data;
+    struct vpci_msi *msi = vpci->msi;
+
+    if ( !msi )
+        return;
 
     msi->data = val;
 
@@ -162,7 +194,11 @@ static void cf_check data_write(
 static uint32_t cf_check mask_read(
     const struct pci_dev *pdev, unsigned int reg, void *data)
 {
-    const struct vpci_msi *msi = data;
+    const struct vpci *vpci = data;
+    const struct vpci_msi *msi = vpci->msi;
+
+    if ( !msi )
+        return ~(uint32_t)0;
 
     return msi->mask;
 }
@@ -170,9 +206,14 @@ static uint32_t cf_check mask_read(
 static void cf_check mask_write(
     const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
 {
-    struct vpci_msi *msi = data;
-    uint32_t dmask = msi->mask ^ val;
+    struct vpci *vpci = data;
+    struct vpci_msi *msi = vpci->msi;
+    uint32_t dmask;
+
+    if ( !msi )
+        return;
 
+    dmask = msi->mask ^ val;
     if ( !dmask )
         return;
 
@@ -193,6 +234,42 @@ static void cf_check mask_write(
     msi->mask = val;
 }
 
+static int cf_check cleanup_msi(const struct pci_dev *pdev)
+{
+    int rc;
+    unsigned int end;
+    struct vpci *vpci = pdev->vpci;
+    const unsigned int msi_pos = pdev->msi_pos;
+    const unsigned int ctrl = msi_control_reg(msi_pos);
+
+    if ( !msi_pos || !vpci->msi )
+        return 0;
+
+    if ( vpci->msi->masking )
+        end = msi_pending_bits_reg(msi_pos, vpci->msi->address64);
+    else
+        end = msi_mask_bits_reg(msi_pos, vpci->msi->address64) - 2;
+
+    rc = vpci_remove_registers(vpci, ctrl, end - ctrl);
+    if ( rc )
+        printk(XENLOG_WARNING "%pd %pp: fail to remove MSI handlers rc=%d\n",
+               pdev->domain, &pdev->sbdf, rc);
+
+    XFREE(vpci->msi);
+
+    /*
+     * The driver may not traverse the capability list and think device
+     * supports MSI by default. So here let the control register of MSI
+     * be Read-Only is to ensure MSI disabled.
+     */
+    rc = vpci_add_register(vpci, vpci_hw_read16, NULL, ctrl, 2, NULL);
+    if ( rc )
+        printk(XENLOG_ERR "%pd %pp: fail to add MSI ctrl handler rc=%d\n",
+               pdev->domain, &pdev->sbdf, rc);
+
+    return rc;
+}
+
 static int cf_check init_msi(struct pci_dev *pdev)
 {
     unsigned int pos = pdev->msi_pos;
@@ -207,7 +284,7 @@ static int cf_check init_msi(struct pci_dev *pdev)
         return -ENOMEM;
 
     ret = vpci_add_register(pdev->vpci, control_read, control_write,
-                            msi_control_reg(pos), 2, pdev->vpci->msi);
+                            msi_control_reg(pos), 2, pdev->vpci);
     if ( ret )
         /*
          * NB: there's no need to free the msi struct or remove the register
@@ -235,20 +312,20 @@ static int cf_check init_msi(struct pci_dev *pdev)
     pdev->vpci->msi->masking = is_mask_bit_support(control);
 
     ret = vpci_add_register(pdev->vpci, address_read, address_write,
-                            msi_lower_address_reg(pos), 4, pdev->vpci->msi);
+                            msi_lower_address_reg(pos), 4, pdev->vpci);
     if ( ret )
         return ret;
 
     ret = vpci_add_register(pdev->vpci, data_read, data_write,
                             msi_data_reg(pos, pdev->vpci->msi->address64), 2,
-                            pdev->vpci->msi);
+                            pdev->vpci);
     if ( ret )
         return ret;
 
     if ( pdev->vpci->msi->address64 )
     {
         ret = vpci_add_register(pdev->vpci, address_hi_read, address_hi_write,
-                                msi_upper_address_reg(pos), 4, pdev->vpci->msi);
+                                msi_upper_address_reg(pos), 4, pdev->vpci);
         if ( ret )
             return ret;
     }
@@ -258,7 +335,7 @@ static int cf_check init_msi(struct pci_dev *pdev)
         ret = vpci_add_register(pdev->vpci, mask_read, mask_write,
                                 msi_mask_bits_reg(pos,
                                                   pdev->vpci->msi->address64),
-                                4, pdev->vpci->msi);
+                                4, pdev->vpci);
         if ( ret )
             return ret;
         /*
@@ -270,7 +347,7 @@ static int cf_check init_msi(struct pci_dev *pdev)
 
     return 0;
 }
-REGISTER_VPCI_CAP(MSI, init_msi, NULL);
+REGISTER_VPCI_CAP(MSI, init_msi, cleanup_msi);
 
 void vpci_dump_msi(void)
 {
-- 
2.34.1


Re: [PATCH v7 7/8] vpci/msi: Free MSI resources when init_msi() fails
Posted by Roger Pau Monné 3 months, 1 week ago
On Fri, Jul 04, 2025 at 03:08:02PM +0800, Jiqian Chen wrote:
> When init_msi() fails, current logic return fail and free MSI-related
> resources in vpci_deassign_device(). But the previous new changes will
> hide MSI capability and return success, it can't reach
> vpci_deassign_device() to remove resources if hiding success, so those
> resources must be removed in cleanup function of MSI.
> 
> To do that, implement cleanup function for MSI.
> 
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---
> cc: "Roger Pau Monné" <roger.pau@citrix.com>
> ---
> v6->v7 changes:
> * Change the pointer parameter of cleanup_msi() to be const.
> * When vpci_remove_registers() in cleanup_msi() fails, not to return
>   directly, instead try to free msi and re-add ctrl handler.
> * Pass pdev->vpci into vpci_add_register() instead of pdev->vpci->msi in
>   init_msi() since we need that every handler realize that msi is NULL
>   when msi is free but handlers are still in there.
> 
> v5->v6 changes:
> No.
> 
> v4->v5 changes:
> * Change definition "static void cleanup_msi" to "static int cf_check cleanup_msi"
>   since cleanup hook is changed to be int.
> * Add a read-only register for MSI Control Register in the end of cleanup_msi.
> 
> v3->v4 changes:
> * Change function name from fini_msi() to cleanup_msi().
> * Remove unnecessary comment.
> * Change to use XFREE to free vpci->msi.
> 
> v2->v3 changes:
> * Remove all fail path, and use fini_msi() hook instead.
> * Change the method to calculating the size of msi registers.
> 
> v1->v2 changes:
> * Added a new function fini_msi to free all MSI resources instead of using an array
>   to record registered registers.
> 
> Best regards,
> Jiqian Chen.
> ---
>  xen/drivers/vpci/msi.c | 111 ++++++++++++++++++++++++++++++++++-------
>  1 file changed, 94 insertions(+), 17 deletions(-)
> 
> diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
> index c3eba4e14870..09b91a685df5 100644
> --- a/xen/drivers/vpci/msi.c
> +++ b/xen/drivers/vpci/msi.c
> @@ -25,7 +25,11 @@
>  static uint32_t cf_check control_read(
>      const struct pci_dev *pdev, unsigned int reg, void *data)
>  {
> -    const struct vpci_msi *msi = data;
> +    const struct vpci *vpci = data;
> +    const struct vpci_msi *msi = vpci->msi;
> +
> +    if ( !msi )
> +        return pci_conf_read16(pdev->sbdf, reg);
>  
>      return MASK_INSR(fls(pdev->msi_maxvec) - 1, PCI_MSI_FLAGS_QMASK) |
>             MASK_INSR(fls(msi->vectors) - 1, PCI_MSI_FLAGS_QSIZE) |
> @@ -37,12 +41,16 @@ static uint32_t cf_check control_read(
>  static void cf_check control_write(
>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>  {
> -    struct vpci_msi *msi = data;
> +    struct vpci *vpci = data;
> +    struct vpci_msi *msi = vpci->msi;
>      unsigned int vectors = min_t(uint8_t,
>                                   1u << MASK_EXTR(val, PCI_MSI_FLAGS_QSIZE),
>                                   pdev->msi_maxvec);
>      bool new_enabled = val & PCI_MSI_FLAGS_ENABLE;
>  
> +    if ( !msi )
> +        return;
> +
>      /*
>       * No change if the enable field and the number of vectors is
>       * the same or the device is not enabled, in which case the
> @@ -101,7 +109,11 @@ static void update_msi(const struct pci_dev *pdev, struct vpci_msi *msi)
>  static uint32_t cf_check address_read(
>      const struct pci_dev *pdev, unsigned int reg, void *data)
>  {
> -    const struct vpci_msi *msi = data;
> +    const struct vpci *vpci = data;
> +    const struct vpci_msi *msi = vpci->msi;
> +
> +    if ( !msi )
> +        return ~(uint32_t)0;
>  
>      return msi->address;
>  }
> @@ -109,7 +121,11 @@ static uint32_t cf_check address_read(
>  static void cf_check address_write(
>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>  {
> -    struct vpci_msi *msi = data;
> +    struct vpci *vpci = data;
> +    struct vpci_msi *msi = vpci->msi;
> +
> +    if ( !msi )
> +        return;
>  
>      /* Clear low part. */
>      msi->address &= ~0xffffffffULL;
> @@ -122,7 +138,11 @@ static void cf_check address_write(
>  static uint32_t cf_check address_hi_read(
>      const struct pci_dev *pdev, unsigned int reg, void *data)
>  {
> -    const struct vpci_msi *msi = data;
> +    const struct vpci *vpci = data;
> +    const struct vpci_msi *msi = vpci->msi;
> +
> +    if ( !msi )
> +        return ~(uint32_t)0;
>  
>      return msi->address >> 32;
>  }
> @@ -130,7 +150,11 @@ static uint32_t cf_check address_hi_read(
>  static void cf_check address_hi_write(
>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>  {
> -    struct vpci_msi *msi = data;
> +    struct vpci *vpci = data;
> +    struct vpci_msi *msi = vpci->msi;
> +
> +    if ( !msi )
> +        return;
>  
>      /* Clear and update high part. */
>      msi->address  = (uint32_t)msi->address;
> @@ -143,7 +167,11 @@ static void cf_check address_hi_write(
>  static uint32_t cf_check data_read(
>      const struct pci_dev *pdev, unsigned int reg, void *data)
>  {
> -    const struct vpci_msi *msi = data;
> +    const struct vpci *vpci = data;
> +    const struct vpci_msi *msi = vpci->msi;
> +
> +    if ( !msi )
> +        return ~(uint32_t)0;
>  
>      return msi->data;
>  }
> @@ -151,7 +179,11 @@ static uint32_t cf_check data_read(
>  static void cf_check data_write(
>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>  {
> -    struct vpci_msi *msi = data;
> +    struct vpci *vpci = data;
> +    struct vpci_msi *msi = vpci->msi;
> +
> +    if ( !msi )
> +        return;
>  
>      msi->data = val;
>  
> @@ -162,7 +194,11 @@ static void cf_check data_write(
>  static uint32_t cf_check mask_read(
>      const struct pci_dev *pdev, unsigned int reg, void *data)
>  {
> -    const struct vpci_msi *msi = data;
> +    const struct vpci *vpci = data;
> +    const struct vpci_msi *msi = vpci->msi;
> +
> +    if ( !msi )
> +        return ~(uint32_t)0;
>  
>      return msi->mask;
>  }
> @@ -170,9 +206,14 @@ static uint32_t cf_check mask_read(
>  static void cf_check mask_write(
>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>  {
> -    struct vpci_msi *msi = data;
> -    uint32_t dmask = msi->mask ^ val;
> +    struct vpci *vpci = data;
> +    struct vpci_msi *msi = vpci->msi;
> +    uint32_t dmask;
> +
> +    if ( !msi )
> +        return;
>  
> +    dmask = msi->mask ^ val;
>      if ( !dmask )
>          return;
>  
> @@ -193,6 +234,42 @@ static void cf_check mask_write(
>      msi->mask = val;
>  }
>  
> +static int cf_check cleanup_msi(const struct pci_dev *pdev)
> +{
> +    int rc;
> +    unsigned int end;
> +    struct vpci *vpci = pdev->vpci;
> +    const unsigned int msi_pos = pdev->msi_pos;
> +    const unsigned int ctrl = msi_control_reg(msi_pos);
> +
> +    if ( !msi_pos || !vpci->msi )
> +        return 0;
> +
> +    if ( vpci->msi->masking )
> +        end = msi_pending_bits_reg(msi_pos, vpci->msi->address64);
> +    else
> +        end = msi_mask_bits_reg(msi_pos, vpci->msi->address64) - 2;
> +
> +    rc = vpci_remove_registers(vpci, ctrl, end - ctrl);
> +    if ( rc )
> +        printk(XENLOG_WARNING "%pd %pp: fail to remove MSI handlers rc=%d\n",
> +               pdev->domain, &pdev->sbdf, rc);

I think you could possibly do this as:

if ( rc )
{
    printk(...);
    ASSERT_UNREACHABLE();
    return rc;
}

Given the code in vpci_remove_registers() an error in the removal of
registers would likely imply memory corruption, at which point it's
best to fully disable the device.  That would allow you having to
modify all the handlers to pass vpci instead of msi structs.

That will avoid a lot of the extra code churn of having to change the
handler parameters.

Thanks, Roger.

Re: [PATCH v7 7/8] vpci/msi: Free MSI resources when init_msi() fails
Posted by Chen, Jiqian 3 months, 1 week ago
On 2025/7/22 00:21, Roger Pau Monné wrote:
> On Fri, Jul 04, 2025 at 03:08:02PM +0800, Jiqian Chen wrote:
>> When init_msi() fails, current logic return fail and free MSI-related
>> resources in vpci_deassign_device(). But the previous new changes will
>> hide MSI capability and return success, it can't reach
>> vpci_deassign_device() to remove resources if hiding success, so those
>> resources must be removed in cleanup function of MSI.
>>
>> To do that, implement cleanup function for MSI.
>>
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>> ---
>> cc: "Roger Pau Monné" <roger.pau@citrix.com>
>> ---
>> v6->v7 changes:
>> * Change the pointer parameter of cleanup_msi() to be const.
>> * When vpci_remove_registers() in cleanup_msi() fails, not to return
>>   directly, instead try to free msi and re-add ctrl handler.
>> * Pass pdev->vpci into vpci_add_register() instead of pdev->vpci->msi in
>>   init_msi() since we need that every handler realize that msi is NULL
>>   when msi is free but handlers are still in there.
>>
>> v5->v6 changes:
>> No.
>>
>> v4->v5 changes:
>> * Change definition "static void cleanup_msi" to "static int cf_check cleanup_msi"
>>   since cleanup hook is changed to be int.
>> * Add a read-only register for MSI Control Register in the end of cleanup_msi.
>>
>> v3->v4 changes:
>> * Change function name from fini_msi() to cleanup_msi().
>> * Remove unnecessary comment.
>> * Change to use XFREE to free vpci->msi.
>>
>> v2->v3 changes:
>> * Remove all fail path, and use fini_msi() hook instead.
>> * Change the method to calculating the size of msi registers.
>>
>> v1->v2 changes:
>> * Added a new function fini_msi to free all MSI resources instead of using an array
>>   to record registered registers.
>>
>> Best regards,
>> Jiqian Chen.
>> ---
>>  xen/drivers/vpci/msi.c | 111 ++++++++++++++++++++++++++++++++++-------
>>  1 file changed, 94 insertions(+), 17 deletions(-)
>>
>> diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
>> index c3eba4e14870..09b91a685df5 100644
>> --- a/xen/drivers/vpci/msi.c
>> +++ b/xen/drivers/vpci/msi.c
>> @@ -25,7 +25,11 @@
>>  static uint32_t cf_check control_read(
>>      const struct pci_dev *pdev, unsigned int reg, void *data)
>>  {
>> -    const struct vpci_msi *msi = data;
>> +    const struct vpci *vpci = data;
>> +    const struct vpci_msi *msi = vpci->msi;
>> +
>> +    if ( !msi )
>> +        return pci_conf_read16(pdev->sbdf, reg);
>>  
>>      return MASK_INSR(fls(pdev->msi_maxvec) - 1, PCI_MSI_FLAGS_QMASK) |
>>             MASK_INSR(fls(msi->vectors) - 1, PCI_MSI_FLAGS_QSIZE) |
>> @@ -37,12 +41,16 @@ static uint32_t cf_check control_read(
>>  static void cf_check control_write(
>>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>>  {
>> -    struct vpci_msi *msi = data;
>> +    struct vpci *vpci = data;
>> +    struct vpci_msi *msi = vpci->msi;
>>      unsigned int vectors = min_t(uint8_t,
>>                                   1u << MASK_EXTR(val, PCI_MSI_FLAGS_QSIZE),
>>                                   pdev->msi_maxvec);
>>      bool new_enabled = val & PCI_MSI_FLAGS_ENABLE;
>>  
>> +    if ( !msi )
>> +        return;
>> +
>>      /*
>>       * No change if the enable field and the number of vectors is
>>       * the same or the device is not enabled, in which case the
>> @@ -101,7 +109,11 @@ static void update_msi(const struct pci_dev *pdev, struct vpci_msi *msi)
>>  static uint32_t cf_check address_read(
>>      const struct pci_dev *pdev, unsigned int reg, void *data)
>>  {
>> -    const struct vpci_msi *msi = data;
>> +    const struct vpci *vpci = data;
>> +    const struct vpci_msi *msi = vpci->msi;
>> +
>> +    if ( !msi )
>> +        return ~(uint32_t)0;
>>  
>>      return msi->address;
>>  }
>> @@ -109,7 +121,11 @@ static uint32_t cf_check address_read(
>>  static void cf_check address_write(
>>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>>  {
>> -    struct vpci_msi *msi = data;
>> +    struct vpci *vpci = data;
>> +    struct vpci_msi *msi = vpci->msi;
>> +
>> +    if ( !msi )
>> +        return;
>>  
>>      /* Clear low part. */
>>      msi->address &= ~0xffffffffULL;
>> @@ -122,7 +138,11 @@ static void cf_check address_write(
>>  static uint32_t cf_check address_hi_read(
>>      const struct pci_dev *pdev, unsigned int reg, void *data)
>>  {
>> -    const struct vpci_msi *msi = data;
>> +    const struct vpci *vpci = data;
>> +    const struct vpci_msi *msi = vpci->msi;
>> +
>> +    if ( !msi )
>> +        return ~(uint32_t)0;
>>  
>>      return msi->address >> 32;
>>  }
>> @@ -130,7 +150,11 @@ static uint32_t cf_check address_hi_read(
>>  static void cf_check address_hi_write(
>>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>>  {
>> -    struct vpci_msi *msi = data;
>> +    struct vpci *vpci = data;
>> +    struct vpci_msi *msi = vpci->msi;
>> +
>> +    if ( !msi )
>> +        return;
>>  
>>      /* Clear and update high part. */
>>      msi->address  = (uint32_t)msi->address;
>> @@ -143,7 +167,11 @@ static void cf_check address_hi_write(
>>  static uint32_t cf_check data_read(
>>      const struct pci_dev *pdev, unsigned int reg, void *data)
>>  {
>> -    const struct vpci_msi *msi = data;
>> +    const struct vpci *vpci = data;
>> +    const struct vpci_msi *msi = vpci->msi;
>> +
>> +    if ( !msi )
>> +        return ~(uint32_t)0;
>>  
>>      return msi->data;
>>  }
>> @@ -151,7 +179,11 @@ static uint32_t cf_check data_read(
>>  static void cf_check data_write(
>>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>>  {
>> -    struct vpci_msi *msi = data;
>> +    struct vpci *vpci = data;
>> +    struct vpci_msi *msi = vpci->msi;
>> +
>> +    if ( !msi )
>> +        return;
>>  
>>      msi->data = val;
>>  
>> @@ -162,7 +194,11 @@ static void cf_check data_write(
>>  static uint32_t cf_check mask_read(
>>      const struct pci_dev *pdev, unsigned int reg, void *data)
>>  {
>> -    const struct vpci_msi *msi = data;
>> +    const struct vpci *vpci = data;
>> +    const struct vpci_msi *msi = vpci->msi;
>> +
>> +    if ( !msi )
>> +        return ~(uint32_t)0;
>>  
>>      return msi->mask;
>>  }
>> @@ -170,9 +206,14 @@ static uint32_t cf_check mask_read(
>>  static void cf_check mask_write(
>>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>>  {
>> -    struct vpci_msi *msi = data;
>> -    uint32_t dmask = msi->mask ^ val;
>> +    struct vpci *vpci = data;
>> +    struct vpci_msi *msi = vpci->msi;
>> +    uint32_t dmask;
>> +
>> +    if ( !msi )
>> +        return;
>>  
>> +    dmask = msi->mask ^ val;
>>      if ( !dmask )
>>          return;
>>  
>> @@ -193,6 +234,42 @@ static void cf_check mask_write(
>>      msi->mask = val;
>>  }
>>  
>> +static int cf_check cleanup_msi(const struct pci_dev *pdev)
>> +{
>> +    int rc;
>> +    unsigned int end;
>> +    struct vpci *vpci = pdev->vpci;
>> +    const unsigned int msi_pos = pdev->msi_pos;
>> +    const unsigned int ctrl = msi_control_reg(msi_pos);
>> +
>> +    if ( !msi_pos || !vpci->msi )
>> +        return 0;
>> +
>> +    if ( vpci->msi->masking )
>> +        end = msi_pending_bits_reg(msi_pos, vpci->msi->address64);
>> +    else
>> +        end = msi_mask_bits_reg(msi_pos, vpci->msi->address64) - 2;
>> +
>> +    rc = vpci_remove_registers(vpci, ctrl, end - ctrl);
>> +    if ( rc )
>> +        printk(XENLOG_WARNING "%pd %pp: fail to remove MSI handlers rc=%d\n",
>> +               pdev->domain, &pdev->sbdf, rc);
> 
> I think you could possibly do this as:
> 
> if ( rc )
> {
>     printk(...);
>     ASSERT_UNREACHABLE();
>     return rc;
> }
> 
> Given the code in vpci_remove_registers() an error in the removal of
> registers would likely imply memory corruption, at which point it's
> best to fully disable the device.  That would allow you having to
> modify all the handlers to pass vpci instead of msi structs.
> 
> That will avoid a lot of the extra code churn of having to change the
> handler parameters.
OK, got it.
Since Jan proposed this consideration in v6, I need to ask for his opinion.
Hi Jan, do you fine with this change?

> 
> Thanks, Roger.

-- 
Best regards,
Jiqian Chen.
Re: [PATCH v7 7/8] vpci/msi: Free MSI resources when init_msi() fails
Posted by Jan Beulich 3 months, 1 week ago
On 23.07.2025 09:54, Chen, Jiqian wrote:
> On 2025/7/22 00:21, Roger Pau Monné wrote:
>> On Fri, Jul 04, 2025 at 03:08:02PM +0800, Jiqian Chen wrote:
>>> When init_msi() fails, current logic return fail and free MSI-related
>>> resources in vpci_deassign_device(). But the previous new changes will
>>> hide MSI capability and return success, it can't reach
>>> vpci_deassign_device() to remove resources if hiding success, so those
>>> resources must be removed in cleanup function of MSI.
>>>
>>> To do that, implement cleanup function for MSI.
>>>
>>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>>> ---
>>> cc: "Roger Pau Monné" <roger.pau@citrix.com>
>>> ---
>>> v6->v7 changes:
>>> * Change the pointer parameter of cleanup_msi() to be const.
>>> * When vpci_remove_registers() in cleanup_msi() fails, not to return
>>>   directly, instead try to free msi and re-add ctrl handler.
>>> * Pass pdev->vpci into vpci_add_register() instead of pdev->vpci->msi in
>>>   init_msi() since we need that every handler realize that msi is NULL
>>>   when msi is free but handlers are still in there.
>>>
>>> v5->v6 changes:
>>> No.
>>>
>>> v4->v5 changes:
>>> * Change definition "static void cleanup_msi" to "static int cf_check cleanup_msi"
>>>   since cleanup hook is changed to be int.
>>> * Add a read-only register for MSI Control Register in the end of cleanup_msi.
>>>
>>> v3->v4 changes:
>>> * Change function name from fini_msi() to cleanup_msi().
>>> * Remove unnecessary comment.
>>> * Change to use XFREE to free vpci->msi.
>>>
>>> v2->v3 changes:
>>> * Remove all fail path, and use fini_msi() hook instead.
>>> * Change the method to calculating the size of msi registers.
>>>
>>> v1->v2 changes:
>>> * Added a new function fini_msi to free all MSI resources instead of using an array
>>>   to record registered registers.
>>>
>>> Best regards,
>>> Jiqian Chen.
>>> ---
>>>  xen/drivers/vpci/msi.c | 111 ++++++++++++++++++++++++++++++++++-------
>>>  1 file changed, 94 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
>>> index c3eba4e14870..09b91a685df5 100644
>>> --- a/xen/drivers/vpci/msi.c
>>> +++ b/xen/drivers/vpci/msi.c
>>> @@ -25,7 +25,11 @@
>>>  static uint32_t cf_check control_read(
>>>      const struct pci_dev *pdev, unsigned int reg, void *data)
>>>  {
>>> -    const struct vpci_msi *msi = data;
>>> +    const struct vpci *vpci = data;
>>> +    const struct vpci_msi *msi = vpci->msi;
>>> +
>>> +    if ( !msi )
>>> +        return pci_conf_read16(pdev->sbdf, reg);
>>>  
>>>      return MASK_INSR(fls(pdev->msi_maxvec) - 1, PCI_MSI_FLAGS_QMASK) |
>>>             MASK_INSR(fls(msi->vectors) - 1, PCI_MSI_FLAGS_QSIZE) |
>>> @@ -37,12 +41,16 @@ static uint32_t cf_check control_read(
>>>  static void cf_check control_write(
>>>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>>>  {
>>> -    struct vpci_msi *msi = data;
>>> +    struct vpci *vpci = data;
>>> +    struct vpci_msi *msi = vpci->msi;
>>>      unsigned int vectors = min_t(uint8_t,
>>>                                   1u << MASK_EXTR(val, PCI_MSI_FLAGS_QSIZE),
>>>                                   pdev->msi_maxvec);
>>>      bool new_enabled = val & PCI_MSI_FLAGS_ENABLE;
>>>  
>>> +    if ( !msi )
>>> +        return;
>>> +
>>>      /*
>>>       * No change if the enable field and the number of vectors is
>>>       * the same or the device is not enabled, in which case the
>>> @@ -101,7 +109,11 @@ static void update_msi(const struct pci_dev *pdev, struct vpci_msi *msi)
>>>  static uint32_t cf_check address_read(
>>>      const struct pci_dev *pdev, unsigned int reg, void *data)
>>>  {
>>> -    const struct vpci_msi *msi = data;
>>> +    const struct vpci *vpci = data;
>>> +    const struct vpci_msi *msi = vpci->msi;
>>> +
>>> +    if ( !msi )
>>> +        return ~(uint32_t)0;
>>>  
>>>      return msi->address;
>>>  }
>>> @@ -109,7 +121,11 @@ static uint32_t cf_check address_read(
>>>  static void cf_check address_write(
>>>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>>>  {
>>> -    struct vpci_msi *msi = data;
>>> +    struct vpci *vpci = data;
>>> +    struct vpci_msi *msi = vpci->msi;
>>> +
>>> +    if ( !msi )
>>> +        return;
>>>  
>>>      /* Clear low part. */
>>>      msi->address &= ~0xffffffffULL;
>>> @@ -122,7 +138,11 @@ static void cf_check address_write(
>>>  static uint32_t cf_check address_hi_read(
>>>      const struct pci_dev *pdev, unsigned int reg, void *data)
>>>  {
>>> -    const struct vpci_msi *msi = data;
>>> +    const struct vpci *vpci = data;
>>> +    const struct vpci_msi *msi = vpci->msi;
>>> +
>>> +    if ( !msi )
>>> +        return ~(uint32_t)0;
>>>  
>>>      return msi->address >> 32;
>>>  }
>>> @@ -130,7 +150,11 @@ static uint32_t cf_check address_hi_read(
>>>  static void cf_check address_hi_write(
>>>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>>>  {
>>> -    struct vpci_msi *msi = data;
>>> +    struct vpci *vpci = data;
>>> +    struct vpci_msi *msi = vpci->msi;
>>> +
>>> +    if ( !msi )
>>> +        return;
>>>  
>>>      /* Clear and update high part. */
>>>      msi->address  = (uint32_t)msi->address;
>>> @@ -143,7 +167,11 @@ static void cf_check address_hi_write(
>>>  static uint32_t cf_check data_read(
>>>      const struct pci_dev *pdev, unsigned int reg, void *data)
>>>  {
>>> -    const struct vpci_msi *msi = data;
>>> +    const struct vpci *vpci = data;
>>> +    const struct vpci_msi *msi = vpci->msi;
>>> +
>>> +    if ( !msi )
>>> +        return ~(uint32_t)0;
>>>  
>>>      return msi->data;
>>>  }
>>> @@ -151,7 +179,11 @@ static uint32_t cf_check data_read(
>>>  static void cf_check data_write(
>>>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>>>  {
>>> -    struct vpci_msi *msi = data;
>>> +    struct vpci *vpci = data;
>>> +    struct vpci_msi *msi = vpci->msi;
>>> +
>>> +    if ( !msi )
>>> +        return;
>>>  
>>>      msi->data = val;
>>>  
>>> @@ -162,7 +194,11 @@ static void cf_check data_write(
>>>  static uint32_t cf_check mask_read(
>>>      const struct pci_dev *pdev, unsigned int reg, void *data)
>>>  {
>>> -    const struct vpci_msi *msi = data;
>>> +    const struct vpci *vpci = data;
>>> +    const struct vpci_msi *msi = vpci->msi;
>>> +
>>> +    if ( !msi )
>>> +        return ~(uint32_t)0;
>>>  
>>>      return msi->mask;
>>>  }
>>> @@ -170,9 +206,14 @@ static uint32_t cf_check mask_read(
>>>  static void cf_check mask_write(
>>>      const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
>>>  {
>>> -    struct vpci_msi *msi = data;
>>> -    uint32_t dmask = msi->mask ^ val;
>>> +    struct vpci *vpci = data;
>>> +    struct vpci_msi *msi = vpci->msi;
>>> +    uint32_t dmask;
>>> +
>>> +    if ( !msi )
>>> +        return;
>>>  
>>> +    dmask = msi->mask ^ val;
>>>      if ( !dmask )
>>>          return;
>>>  
>>> @@ -193,6 +234,42 @@ static void cf_check mask_write(
>>>      msi->mask = val;
>>>  }
>>>  
>>> +static int cf_check cleanup_msi(const struct pci_dev *pdev)
>>> +{
>>> +    int rc;
>>> +    unsigned int end;
>>> +    struct vpci *vpci = pdev->vpci;
>>> +    const unsigned int msi_pos = pdev->msi_pos;
>>> +    const unsigned int ctrl = msi_control_reg(msi_pos);
>>> +
>>> +    if ( !msi_pos || !vpci->msi )
>>> +        return 0;
>>> +
>>> +    if ( vpci->msi->masking )
>>> +        end = msi_pending_bits_reg(msi_pos, vpci->msi->address64);
>>> +    else
>>> +        end = msi_mask_bits_reg(msi_pos, vpci->msi->address64) - 2;
>>> +
>>> +    rc = vpci_remove_registers(vpci, ctrl, end - ctrl);
>>> +    if ( rc )
>>> +        printk(XENLOG_WARNING "%pd %pp: fail to remove MSI handlers rc=%d\n",
>>> +               pdev->domain, &pdev->sbdf, rc);
>>
>> I think you could possibly do this as:
>>
>> if ( rc )
>> {
>>     printk(...);
>>     ASSERT_UNREACHABLE();
>>     return rc;
>> }
>>
>> Given the code in vpci_remove_registers() an error in the removal of
>> registers would likely imply memory corruption, at which point it's
>> best to fully disable the device.  That would allow you having to
>> modify all the handlers to pass vpci instead of msi structs.
>>
>> That will avoid a lot of the extra code churn of having to change the
>> handler parameters.
> OK, got it.
> Since Jan proposed this consideration in v6, I need to ask for his opinion.
> Hi Jan, do you fine with this change?

If that's what Roger prefers, so be it. (Imo if we suspected memory
corruption, we'd better halt the system. I agree though that in
practice vpci_remove_registers() shouldn't fail here.)

Jan

Re: [PATCH v7 7/8] vpci/msi: Free MSI resources when init_msi() fails
Posted by Jan Beulich 3 months, 3 weeks ago
On 04.07.2025 09:08, Jiqian Chen wrote:
> ---
> v6->v7 changes:
> * Change the pointer parameter of cleanup_msi() to be const.
> * When vpci_remove_registers() in cleanup_msi() fails, not to return
>   directly, instead try to free msi and re-add ctrl handler.
> * Pass pdev->vpci into vpci_add_register() instead of pdev->vpci->msi in
>   init_msi() since we need that every handler realize that msi is NULL
>   when msi is free but handlers are still in there.

Imo this latter change would better have been a separate patch. I'm not
going to insist though (also I really can't, for not being a maintainer
of this file).

> @@ -193,6 +234,42 @@ static void cf_check mask_write(
>      msi->mask = val;
>  }
>  
> +static int cf_check cleanup_msi(const struct pci_dev *pdev)
> +{
> +    int rc;
> +    unsigned int end;
> +    struct vpci *vpci = pdev->vpci;
> +    const unsigned int msi_pos = pdev->msi_pos;
> +    const unsigned int ctrl = msi_control_reg(msi_pos);
> +
> +    if ( !msi_pos || !vpci->msi )
> +        return 0;
> +
> +    if ( vpci->msi->masking )
> +        end = msi_pending_bits_reg(msi_pos, vpci->msi->address64);
> +    else
> +        end = msi_mask_bits_reg(msi_pos, vpci->msi->address64) - 2;

What's this "- 2" for? If there's no masking support, you want to cut off
_at_ the mask register, not 2 bytes ahead of it? Just like you cut off at
the pending bits register when there is masking support.

> +    rc = vpci_remove_registers(vpci, ctrl, end - ctrl);
> +    if ( rc )
> +        printk(XENLOG_WARNING "%pd %pp: fail to remove MSI handlers rc=%d\n",
> +               pdev->domain, &pdev->sbdf, rc);
> +
> +    XFREE(vpci->msi);
> +
> +    /*
> +     * The driver may not traverse the capability list and think device
> +     * supports MSI by default. So here let the control register of MSI
> +     * be Read-Only is to ensure MSI disabled.
> +     */
> +    rc = vpci_add_register(vpci, vpci_hw_read16, NULL, ctrl, 2, NULL);
> +    if ( rc )
> +        printk(XENLOG_ERR "%pd %pp: fail to add MSI ctrl handler rc=%d\n",
> +               pdev->domain, &pdev->sbdf, rc);

Imo the uses of XENLOG_ERR and XENLOG_WARNING want to change places. The latter
is extremely likely to be a follow-on failure from the first one failing. Plus
the latter failing is covered by what you add to control_read(). Which leaves
as the only case where this is really an error (and XENLOG_ERR might then be
warranted in both places) if the former succeeds and only the latter fails.

Hmm, then again vpci_init_capabilities() would too issue an error message in
that case. Perhaps keep as is then.

Jan
Re: [PATCH v7 7/8] vpci/msi: Free MSI resources when init_msi() fails
Posted by Chen, Jiqian 3 months, 3 weeks ago
On 2025/7/8 23:13, Jan Beulich wrote:
> On 04.07.2025 09:08, Jiqian Chen wrote:
>> ---
>> v6->v7 changes:
>> * Change the pointer parameter of cleanup_msi() to be const.
>> * When vpci_remove_registers() in cleanup_msi() fails, not to return
>>   directly, instead try to free msi and re-add ctrl handler.
>> * Pass pdev->vpci into vpci_add_register() instead of pdev->vpci->msi in
>>   init_msi() since we need that every handler realize that msi is NULL
>>   when msi is free but handlers are still in there.
> 
> Imo this latter change would better have been a separate patch. I'm not
> going to insist though (also I really can't, for not being a maintainer
> of this file).
Thanks. I Will do if Roger has the same opinion.

> 
>> @@ -193,6 +234,42 @@ static void cf_check mask_write(
>>      msi->mask = val;
>>  }
>>  
>> +static int cf_check cleanup_msi(const struct pci_dev *pdev)
>> +{
>> +    int rc;
>> +    unsigned int end;
>> +    struct vpci *vpci = pdev->vpci;
>> +    const unsigned int msi_pos = pdev->msi_pos;
>> +    const unsigned int ctrl = msi_control_reg(msi_pos);
>> +
>> +    if ( !msi_pos || !vpci->msi )
>> +        return 0;
>> +
>> +    if ( vpci->msi->masking )
>> +        end = msi_pending_bits_reg(msi_pos, vpci->msi->address64);
>> +    else
>> +        end = msi_mask_bits_reg(msi_pos, vpci->msi->address64) - 2;
> 
> What's this "- 2" for? If there's no masking support, you want to cut off
> _at_ the mask register, not 2 bytes ahead of it? Just like you cut off at
> the pending bits register when there is masking support.
"-2" here is to cut the reserved 2 bytes of Message Data if there is no masking support.

> 
>> +    rc = vpci_remove_registers(vpci, ctrl, end - ctrl);
>> +    if ( rc )
>> +        printk(XENLOG_WARNING "%pd %pp: fail to remove MSI handlers rc=%d\n",
>> +               pdev->domain, &pdev->sbdf, rc);
>> +
>> +    XFREE(vpci->msi);
>> +
>> +    /*
>> +     * The driver may not traverse the capability list and think device
>> +     * supports MSI by default. So here let the control register of MSI
>> +     * be Read-Only is to ensure MSI disabled.
>> +     */
>> +    rc = vpci_add_register(vpci, vpci_hw_read16, NULL, ctrl, 2, NULL);
>> +    if ( rc )
>> +        printk(XENLOG_ERR "%pd %pp: fail to add MSI ctrl handler rc=%d\n",
>> +               pdev->domain, &pdev->sbdf, rc);
> 
> Imo the uses of XENLOG_ERR and XENLOG_WARNING want to change places. The latter
> is extremely likely to be a follow-on failure from the first one failing. Plus
> the latter failing is covered by what you add to control_read(). Which leaves
> as the only case where this is really an error (and XENLOG_ERR might then be
> warranted in both places) if the former succeeds and only the latter fails.
> 
> Hmm, then again vpci_init_capabilities() would too issue an error message in
> that case. Perhaps keep as is then.
I am thinking maybe I need to add a check that "if ( rc == -EEXIST ) return 0;" here.
Since in that case, the failure is because vpci_remove_register fails to remove control handler and it can do the same thing we need here, so should return success.

> 
> Jan

-- 
Best regards,
Jiqian Chen.
Re: [PATCH v7 7/8] vpci/msi: Free MSI resources when init_msi() fails
Posted by Jan Beulich 3 months, 3 weeks ago
On 09.07.2025 08:10, Chen, Jiqian wrote:
> On 2025/7/8 23:13, Jan Beulich wrote:
>> On 04.07.2025 09:08, Jiqian Chen wrote:
>>> @@ -193,6 +234,42 @@ static void cf_check mask_write(
>>>      msi->mask = val;
>>>  }
>>>  
>>> +static int cf_check cleanup_msi(const struct pci_dev *pdev)
>>> +{
>>> +    int rc;
>>> +    unsigned int end;
>>> +    struct vpci *vpci = pdev->vpci;
>>> +    const unsigned int msi_pos = pdev->msi_pos;
>>> +    const unsigned int ctrl = msi_control_reg(msi_pos);
>>> +
>>> +    if ( !msi_pos || !vpci->msi )
>>> +        return 0;
>>> +
>>> +    if ( vpci->msi->masking )
>>> +        end = msi_pending_bits_reg(msi_pos, vpci->msi->address64);
>>> +    else
>>> +        end = msi_mask_bits_reg(msi_pos, vpci->msi->address64) - 2;
>>
>> What's this "- 2" for? If there's no masking support, you want to cut off
>> _at_ the mask register, not 2 bytes ahead of it? Just like you cut off at
>> the pending bits register when there is masking support.
> "-2" here is to cut the reserved 2 bytes of Message Data if there is no masking support.

Hmm, init_msi() doesn't intercept Extended Message Data when present. I
think that's wrong there, leading to the oddity here. (Imo such use of
hard coded numbers would almost always want to be accompanied by a
comment.)

Jan