[v5] Support hiding capability when its initialization fails

[PATCH v5 05/10] vpci: Hide legacy capability when it fails to initialize

Posted by Jiqian Chen 8 months, 2 weeks ago

When vpci fails to initialize a legacy capability of device, it just
returns an error and vPCI gets disabled for the whole device.  That
most likely renders the device unusable, plus possibly causing issues
to Xen itself if guest attempts to program the native MSI or MSI-X
capabilities if present.

So, add new function to hide legacy capability when initialization
fails. And remove the failed legacy capability from the vpci emulated
legacy capability list.

Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
cc: "Roger Pau Monné" <roger.pau@citrix.com>
---
v4->v5 changes:
* Modify vpci_get_register() to delete some unnecessary check, so that I don't need to move function vpci_register_cmp().
* Rename vpci_capability_mask() to vpci_capability_hide().

v3->v4 changes:
* Modify the commit message.
* In function vpci_get_previous_cap_register(), add an ASSERT_UNREACHABLE() if offset below 0x40.
* Modify vpci_capability_mask() to return error instead of using ASSERT.
* Use vpci_remove_register to remove PCI_CAP_LIST_ID register instead of open code.
* Add check "if ( !offset )" in vpci_capability_mask().

v2->v3 changes:
* Separated from the last version patch "vpci: Hide capability when it fails to initialize"
* Whole implementation changed because last version is wrong.
  This version adds a new helper function vpci_get_register() and uses it to get
  target handler and previous handler from vpci->handlers, then remove the target.

v1->v2 changes:
* Removed the "priorities" of initializing capabilities since it isn't used anymore.
* Added new function vpci_capability_mask() and vpci_ext_capability_mask() to
  remove failed capability from list.
* Called vpci_make_msix_hole() in the end of init_msix().

Best regards,
Jiqian Chen.
---
 xen/drivers/vpci/vpci.c | 117 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 113 insertions(+), 4 deletions(-)

diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 2861557e06d2..60e7654ec377 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -83,6 +83,99 @@ static int assign_virtual_sbdf(struct pci_dev *pdev)
 
 #endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */
 
+static struct vpci_register *vpci_get_register(struct vpci *vpci,
+                                               unsigned int offset,
+                                               unsigned int size)
+{
+    struct vpci_register *rm;
+
+    ASSERT(spin_is_locked(&vpci->lock));
+
+    list_for_each_entry ( rm, &vpci->handlers, node )
+    {
+        if ( rm->offset == offset && rm->size == size )
+            return rm;
+
+        if ( offset <= rm->offset )
+            break;
+    }
+
+    return NULL;
+}
+
+static struct vpci_register *vpci_get_previous_cap_register(
+    struct vpci *vpci, unsigned int offset)
+{
+    uint32_t next;
+    struct vpci_register *r;
+
+    if ( offset < 0x40 )
+    {
+        ASSERT_UNREACHABLE();
+        return NULL;
+    }
+
+    r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1);
+    if ( !r )
+        return NULL;
+
+    next = (uint32_t)(uintptr_t)r->private;
+    while ( next >= 0x40 && next != offset )
+    {
+        r = vpci_get_register(vpci, next + PCI_CAP_LIST_NEXT, 1);
+        if ( !r )
+            return NULL;
+        next = (uint32_t)(uintptr_t)r->private;
+    }
+
+    if ( next < 0x40 )
+        return NULL;
+
+    return r;
+}
+
+static int vpci_capability_hide(struct pci_dev *pdev, unsigned int cap)
+{
+    const unsigned int offset = pci_find_cap_offset(pdev->sbdf, cap);
+    struct vpci_register *prev_next_r, *next_r;
+    struct vpci *vpci = pdev->vpci;
+
+    if ( !offset )
+    {
+        ASSERT_UNREACHABLE();
+        return 0;
+    }
+
+    spin_lock(&vpci->lock);
+    next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1);
+    if ( !next_r )
+    {
+        spin_unlock(&vpci->lock);
+        return -ENODEV;
+    }
+
+    prev_next_r = vpci_get_previous_cap_register(vpci, offset);
+    if ( !prev_next_r )
+    {
+        spin_unlock(&vpci->lock);
+        return -ENODEV;
+    }
+
+    prev_next_r->private = next_r->private;
+    /*
+     * Not calling vpci_remove_register() here is to avoid redoing
+     * the register search
+     */
+    list_del(&next_r->node);
+    spin_unlock(&vpci->lock);
+    xfree(next_r);
+
+    if ( !is_hardware_domain(pdev->domain) )
+        return vpci_remove_register(vpci, offset + PCI_CAP_LIST_ID, 1);
+
+    return 0;
+}
+
 static int vpci_init_capabilities(struct pci_dev *pdev)
 {
     for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ )
@@ -91,7 +184,6 @@ static int vpci_init_capabilities(struct pci_dev *pdev)
         const unsigned int cap = capability->id;
         const bool is_ext = capability->is_ext;
         unsigned int pos;
-        int rc;
 
         if ( !is_ext )
             pos = pci_find_cap_offset(pdev->sbdf, cap);
@@ -103,9 +195,26 @@ static int vpci_init_capabilities(struct pci_dev *pdev)
         if ( !pos )
             continue;
 
-        rc = capability->init(pdev);
-        if ( rc )
-            return rc;
+        if ( capability->init(pdev) )
+        {
+            int rc;
+
+            if ( capability->cleanup ) {
+                rc = capability->cleanup(pdev);
+                if ( rc )
+                    return rc;
+            }
+
+            printk(XENLOG_WARNING "%pd %pp: %s cap %u init fail, mask it\n",
+                   pdev->domain, &pdev->sbdf,
+                   is_ext ? "extended" : "legacy", cap);
+            if ( !is_ext )
+            {
+                rc = vpci_capability_hide(pdev, cap);
+                if ( rc )
+                    return rc;
+            }
+        }
     }
 
     return 0;
-- 
2.34.1

Re: [PATCH v5 05/10] vpci: Hide legacy capability when it fails to initialize

Posted by Roger Pau Monné 8 months, 1 week ago

On Mon, May 26, 2025 at 05:45:54PM +0800, Jiqian Chen wrote:
> When vpci fails to initialize a legacy capability of device, it just
> returns an error and vPCI gets disabled for the whole device.  That
> most likely renders the device unusable, plus possibly causing issues
> to Xen itself if guest attempts to program the native MSI or MSI-X
> capabilities if present.
> 
> So, add new function to hide legacy capability when initialization
> fails. And remove the failed legacy capability from the vpci emulated
> legacy capability list.
> 
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> ---
> cc: "Roger Pau Monné" <roger.pau@citrix.com>
> ---
> v4->v5 changes:
> * Modify vpci_get_register() to delete some unnecessary check, so that I don't need to move function vpci_register_cmp().
> * Rename vpci_capability_mask() to vpci_capability_hide().
> 
> v3->v4 changes:
> * Modify the commit message.
> * In function vpci_get_previous_cap_register(), add an ASSERT_UNREACHABLE() if offset below 0x40.
> * Modify vpci_capability_mask() to return error instead of using ASSERT.
> * Use vpci_remove_register to remove PCI_CAP_LIST_ID register instead of open code.
> * Add check "if ( !offset )" in vpci_capability_mask().
> 
> v2->v3 changes:
> * Separated from the last version patch "vpci: Hide capability when it fails to initialize"
> * Whole implementation changed because last version is wrong.
>   This version adds a new helper function vpci_get_register() and uses it to get
>   target handler and previous handler from vpci->handlers, then remove the target.
> 
> v1->v2 changes:
> * Removed the "priorities" of initializing capabilities since it isn't used anymore.
> * Added new function vpci_capability_mask() and vpci_ext_capability_mask() to
>   remove failed capability from list.
> * Called vpci_make_msix_hole() in the end of init_msix().
> 
> Best regards,
> Jiqian Chen.
> ---
>  xen/drivers/vpci/vpci.c | 117 ++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 113 insertions(+), 4 deletions(-)
> 
> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> index 2861557e06d2..60e7654ec377 100644
> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -83,6 +83,99 @@ static int assign_virtual_sbdf(struct pci_dev *pdev)
>  
>  #endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */
>  
> +static struct vpci_register *vpci_get_register(struct vpci *vpci,
> +                                               unsigned int offset,
> +                                               unsigned int size)
> +{
> +    struct vpci_register *rm;

Nit: I think you re-used part of the code from vpci_remove_register()
that names the local variable rm (because it's for removal).  Here it
would better to just name it 'r' (for register).

> +
> +    ASSERT(spin_is_locked(&vpci->lock));
> +
> +    list_for_each_entry ( rm, &vpci->handlers, node )
> +    {
> +        if ( rm->offset == offset && rm->size == size )
> +            return rm;
> +
> +        if ( offset <= rm->offset )
> +            break;
> +    }
> +
> +    return NULL;
> +}
> +
> +static struct vpci_register *vpci_get_previous_cap_register(
> +    struct vpci *vpci, unsigned int offset)
> +{
> +    uint32_t next;
> +    struct vpci_register *r;
> +
> +    if ( offset < 0x40 )
> +    {
> +        ASSERT_UNREACHABLE();
> +        return NULL;
> +    }
> +
> +    r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1);
> +    if ( !r )
> +        return NULL;
> +
> +    next = (uint32_t)(uintptr_t)r->private;
> +    while ( next >= 0x40 && next != offset )
> +    {
> +        r = vpci_get_register(vpci, next + PCI_CAP_LIST_NEXT, 1);
> +        if ( !r )
> +            return NULL;
> +        next = (uint32_t)(uintptr_t)r->private;
> +    }
> +
> +    if ( next < 0x40 )
> +        return NULL;

The code below I think it's a bit simpler (and compact) by having a
single return instead of multiple ones:

for ( r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1); r;
      r = next >= 0x40 ? vpci_get_register(vpci,
                                           next + PCI_CAP_LIST_NEXT, 1)
                       : NULL )
{
    next = (uint32_t)(uintptr_t)r->private;
    ASSERT(next == (uintptr_t)r->private);
    if ( next == offset )
        break;
}

return r;

I haven't tested it however.

> +
> +    return r;
> +}
> +
> +static int vpci_capability_hide(struct pci_dev *pdev, unsigned int cap)
> +{
> +    const unsigned int offset = pci_find_cap_offset(pdev->sbdf, cap);
> +    struct vpci_register *prev_next_r, *next_r;

I think it might be clear to just name this prev_r, next_r,
prev_next_r is IMO a bit confusing.  I understand it refers to the next
capability register in the previous capability, and I think prev_r
might be enough.

> +    struct vpci *vpci = pdev->vpci;
> +
> +    if ( !offset )
> +    {
> +        ASSERT_UNREACHABLE();
> +        return 0;
> +    }
> +
> +    spin_lock(&vpci->lock);
> +    next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1);
> +    if ( !next_r )
> +    {
> +        spin_unlock(&vpci->lock);
> +        return -ENODEV;
> +    }
> +
> +    prev_next_r = vpci_get_previous_cap_register(vpci, offset);
> +    if ( !prev_next_r )
> +    {
> +        spin_unlock(&vpci->lock);
> +        return -ENODEV;
> +    }

You can join both the above into a single check IMO:

next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1);
prev_r = vpci_get_previous_cap_register(vpci, offset);
if ( !next_r || !prev_r )
{
    spin_unlock(&vpci->lock);
    return -ENODEV;
}

There's no benefit in using two equal error condition checks (just
makes the code longer)

> +
> +    prev_next_r->private = next_r->private;
> +    /*
> +     * Not calling vpci_remove_register() here is to avoid redoing
> +     * the register search
> +     */
> +    list_del(&next_r->node);
> +    spin_unlock(&vpci->lock);
> +    xfree(next_r);
> +
> +    if ( !is_hardware_domain(pdev->domain) )
> +        return vpci_remove_register(vpci, offset + PCI_CAP_LIST_ID, 1);
> +
> +    return 0;
> +}
> +
>  static int vpci_init_capabilities(struct pci_dev *pdev)
>  {
>      for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ )
> @@ -91,7 +184,6 @@ static int vpci_init_capabilities(struct pci_dev *pdev)
>          const unsigned int cap = capability->id;
>          const bool is_ext = capability->is_ext;
>          unsigned int pos;
> -        int rc;
>  
>          if ( !is_ext )
>              pos = pci_find_cap_offset(pdev->sbdf, cap);
> @@ -103,9 +195,26 @@ static int vpci_init_capabilities(struct pci_dev *pdev)
>          if ( !pos )
>              continue;
>  
> -        rc = capability->init(pdev);
> -        if ( rc )
> -            return rc;
> +        if ( capability->init(pdev) )

I think you want to store rc here to print it in the warning message
below.

> +        {
> +            int rc;
> +
> +            if ( capability->cleanup ) {
> +                rc = capability->cleanup(pdev);
> +                if ( rc )
> +                    return rc;

Here where both init and cleanup failed, you simply don't print any
message.

> +            }
> +
> +            printk(XENLOG_WARNING "%pd %pp: %s cap %u init fail, mask it\n",
> +                   pdev->domain, &pdev->sbdf,
> +                   is_ext ? "extended" : "legacy", cap);

I think this needs to be done ahead of the cleanup(), and print the
returned error code.  Overall we need messages printed when any of
those fails, as that makes it easier to debug when things go wrong.

Thanks, Roger.

Re: [PATCH v5 05/10] vpci: Hide legacy capability when it fails to initialize

Posted by Chen, Jiqian 8 months, 1 week ago

On 2025/6/5 21:35, Roger Pau Monné wrote:
> On Mon, May 26, 2025 at 05:45:54PM +0800, Jiqian Chen wrote:
>> When vpci fails to initialize a legacy capability of device, it just
>> returns an error and vPCI gets disabled for the whole device.  That
>> most likely renders the device unusable, plus possibly causing issues
>> to Xen itself if guest attempts to program the native MSI or MSI-X
>> capabilities if present.
>>
>> So, add new function to hide legacy capability when initialization
>> fails. And remove the failed legacy capability from the vpci emulated
>> legacy capability list.
>>
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>> ---
>> cc: "Roger Pau Monné" <roger.pau@citrix.com>
>> ---
>> v4->v5 changes:
>> * Modify vpci_get_register() to delete some unnecessary check, so that I don't need to move function vpci_register_cmp().
>> * Rename vpci_capability_mask() to vpci_capability_hide().
>>
>> v3->v4 changes:
>> * Modify the commit message.
>> * In function vpci_get_previous_cap_register(), add an ASSERT_UNREACHABLE() if offset below 0x40.
>> * Modify vpci_capability_mask() to return error instead of using ASSERT.
>> * Use vpci_remove_register to remove PCI_CAP_LIST_ID register instead of open code.
>> * Add check "if ( !offset )" in vpci_capability_mask().
>>
>> v2->v3 changes:
>> * Separated from the last version patch "vpci: Hide capability when it fails to initialize"
>> * Whole implementation changed because last version is wrong.
>>   This version adds a new helper function vpci_get_register() and uses it to get
>>   target handler and previous handler from vpci->handlers, then remove the target.
>>
>> v1->v2 changes:
>> * Removed the "priorities" of initializing capabilities since it isn't used anymore.
>> * Added new function vpci_capability_mask() and vpci_ext_capability_mask() to
>>   remove failed capability from list.
>> * Called vpci_make_msix_hole() in the end of init_msix().
>>
>> Best regards,
>> Jiqian Chen.
>> ---
>>  xen/drivers/vpci/vpci.c | 117 ++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 113 insertions(+), 4 deletions(-)
>>
>> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
>> index 2861557e06d2..60e7654ec377 100644
>> --- a/xen/drivers/vpci/vpci.c
>> +++ b/xen/drivers/vpci/vpci.c
>> @@ -83,6 +83,99 @@ static int assign_virtual_sbdf(struct pci_dev *pdev)
>>  
>>  #endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */
>>  
>> +static struct vpci_register *vpci_get_register(struct vpci *vpci,
>> +                                               unsigned int offset,
>> +                                               unsigned int size)
>> +{
>> +    struct vpci_register *rm;
> 
> Nit: I think you re-used part of the code from vpci_remove_register()
> that names the local variable rm (because it's for removal).  Here it
> would better to just name it 'r' (for register).
Will change.
> 
>> +
>> +    ASSERT(spin_is_locked(&vpci->lock));
>> +
>> +    list_for_each_entry ( rm, &vpci->handlers, node )
>> +    {
>> +        if ( rm->offset == offset && rm->size == size )
>> +            return rm;
>> +
>> +        if ( offset <= rm->offset )
>> +            break;
>> +    }
>> +
>> +    return NULL;
>> +}
>> +
>> +static struct vpci_register *vpci_get_previous_cap_register(
>> +    struct vpci *vpci, unsigned int offset)
>> +{
>> +    uint32_t next;
>> +    struct vpci_register *r;
>> +
>> +    if ( offset < 0x40 )
>> +    {
>> +        ASSERT_UNREACHABLE();
>> +        return NULL;
>> +    }
>> +
>> +    r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1);
>> +    if ( !r )
>> +        return NULL;
>> +
>> +    next = (uint32_t)(uintptr_t)r->private;
>> +    while ( next >= 0x40 && next != offset )
>> +    {
>> +        r = vpci_get_register(vpci, next + PCI_CAP_LIST_NEXT, 1);
>> +        if ( !r )
>> +            return NULL;
>> +        next = (uint32_t)(uintptr_t)r->private;
>> +    }
>> +
>> +    if ( next < 0x40 )
>> +        return NULL;
> 
> The code below I think it's a bit simpler (and compact) by having a
> single return instead of multiple ones:
> 
> for ( r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1); r;
>       r = next >= 0x40 ? vpci_get_register(vpci,
>                                            next + PCI_CAP_LIST_NEXT, 1)
>                        : NULL )
> {
>     next = (uint32_t)(uintptr_t)r->private;
>     ASSERT(next == (uintptr_t)r->private);
Why need this ASSERT here?

>     if ( next == offset )
>         break;
> }
> 
> return r;
> 
> I haven't tested it however.
Will change and test.

> 
>> +
>> +    return r;
>> +}
>> +
>> +static int vpci_capability_hide(struct pci_dev *pdev, unsigned int cap)
>> +{
>> +    const unsigned int offset = pci_find_cap_offset(pdev->sbdf, cap);
>> +    struct vpci_register *prev_next_r, *next_r;
> 
> I think it might be clear to just name this prev_r, next_r,
> prev_next_r is IMO a bit confusing.  I understand it refers to the next
> capability register in the previous capability, and I think prev_r
> might be enough.
Will change.
> 
>> +    struct vpci *vpci = pdev->vpci;
>> +
>> +    if ( !offset )
>> +    {
>> +        ASSERT_UNREACHABLE();
>> +        return 0;
>> +    }
>> +
>> +    spin_lock(&vpci->lock);
>> +    next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1);
>> +    if ( !next_r )
>> +    {
>> +        spin_unlock(&vpci->lock);
>> +        return -ENODEV;
>> +    }
>> +
>> +    prev_next_r = vpci_get_previous_cap_register(vpci, offset);
>> +    if ( !prev_next_r )
>> +    {
>> +        spin_unlock(&vpci->lock);
>> +        return -ENODEV;
>> +    }
> 
> You can join both the above into a single check IMO:
> 
> next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1);
> prev_r = vpci_get_previous_cap_register(vpci, offset);
> if ( !next_r || !prev_r )
> {
>     spin_unlock(&vpci->lock);
>     return -ENODEV;
> }
> 
> There's no benefit in using two equal error condition checks (just
> makes the code longer)
Will change.
> 
>> +
>> +    prev_next_r->private = next_r->private;
>> +    /*
>> +     * Not calling vpci_remove_register() here is to avoid redoing
>> +     * the register search
>> +     */
>> +    list_del(&next_r->node);
>> +    spin_unlock(&vpci->lock);
>> +    xfree(next_r);
>> +
>> +    if ( !is_hardware_domain(pdev->domain) )
>> +        return vpci_remove_register(vpci, offset + PCI_CAP_LIST_ID, 1);
>> +
>> +    return 0;
>> +}
>> +
>>  static int vpci_init_capabilities(struct pci_dev *pdev)
>>  {
>>      for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ )
>> @@ -91,7 +184,6 @@ static int vpci_init_capabilities(struct pci_dev *pdev)
>>          const unsigned int cap = capability->id;
>>          const bool is_ext = capability->is_ext;
>>          unsigned int pos;
>> -        int rc;
>>  
>>          if ( !is_ext )
>>              pos = pci_find_cap_offset(pdev->sbdf, cap);
>> @@ -103,9 +195,26 @@ static int vpci_init_capabilities(struct pci_dev *pdev)
>>          if ( !pos )
>>              continue;
>>  
>> -        rc = capability->init(pdev);
>> -        if ( rc )
>> -            return rc;
>> +        if ( capability->init(pdev) )
> 
> I think you want to store rc here to print it in the warning message
> below.
> 
>> +        {
>> +            int rc;
>> +
>> +            if ( capability->cleanup ) {
>> +                rc = capability->cleanup(pdev);
>> +                if ( rc )
>> +                    return rc;
> 
> Here where both init and cleanup failed, you simply don't print any
> message.
Got it , will print message when init, cleanup and hide fail.

> 
>> +            }
>> +
>> +            printk(XENLOG_WARNING "%pd %pp: %s cap %u init fail, mask it\n",
>> +                   pdev->domain, &pdev->sbdf,
>> +                   is_ext ? "extended" : "legacy", cap);
> 
> I think this needs to be done ahead of the cleanup(), and print the
> returned error code.  Overall we need messages printed when any of
> those fails, as that makes it easier to debug when things go wrong.
> 
> Thanks, Roger.

-- 
Best regards,
Jiqian Chen.

Re: [PATCH v5 05/10] vpci: Hide legacy capability when it fails to initialize

Posted by Roger Pau Monné 8 months, 1 week ago

On Fri, Jun 06, 2025 at 07:03:02AM +0000, Chen, Jiqian wrote:
> On 2025/6/5 21:35, Roger Pau Monné wrote:
> > On Mon, May 26, 2025 at 05:45:54PM +0800, Jiqian Chen wrote:
> >> When vpci fails to initialize a legacy capability of device, it just
> >> returns an error and vPCI gets disabled for the whole device.  That
> >> most likely renders the device unusable, plus possibly causing issues
> >> to Xen itself if guest attempts to program the native MSI or MSI-X
> >> capabilities if present.
> >>
> >> So, add new function to hide legacy capability when initialization
> >> fails. And remove the failed legacy capability from the vpci emulated
> >> legacy capability list.
> >>
> >> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> >> ---
> >> cc: "Roger Pau Monné" <roger.pau@citrix.com>
> >> ---
> >> v4->v5 changes:
> >> * Modify vpci_get_register() to delete some unnecessary check, so that I don't need to move function vpci_register_cmp().
> >> * Rename vpci_capability_mask() to vpci_capability_hide().
> >>
> >> v3->v4 changes:
> >> * Modify the commit message.
> >> * In function vpci_get_previous_cap_register(), add an ASSERT_UNREACHABLE() if offset below 0x40.
> >> * Modify vpci_capability_mask() to return error instead of using ASSERT.
> >> * Use vpci_remove_register to remove PCI_CAP_LIST_ID register instead of open code.
> >> * Add check "if ( !offset )" in vpci_capability_mask().
> >>
> >> v2->v3 changes:
> >> * Separated from the last version patch "vpci: Hide capability when it fails to initialize"
> >> * Whole implementation changed because last version is wrong.
> >>   This version adds a new helper function vpci_get_register() and uses it to get
> >>   target handler and previous handler from vpci->handlers, then remove the target.
> >>
> >> v1->v2 changes:
> >> * Removed the "priorities" of initializing capabilities since it isn't used anymore.
> >> * Added new function vpci_capability_mask() and vpci_ext_capability_mask() to
> >>   remove failed capability from list.
> >> * Called vpci_make_msix_hole() in the end of init_msix().
> >>
> >> Best regards,
> >> Jiqian Chen.
> >> ---
> >>  xen/drivers/vpci/vpci.c | 117 ++++++++++++++++++++++++++++++++++++++--
> >>  1 file changed, 113 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> >> index 2861557e06d2..60e7654ec377 100644
> >> --- a/xen/drivers/vpci/vpci.c
> >> +++ b/xen/drivers/vpci/vpci.c
> >> @@ -83,6 +83,99 @@ static int assign_virtual_sbdf(struct pci_dev *pdev)
> >>  
> >>  #endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */
> >>  
> >> +static struct vpci_register *vpci_get_register(struct vpci *vpci,
> >> +                                               unsigned int offset,
> >> +                                               unsigned int size)
> >> +{
> >> +    struct vpci_register *rm;
> > 
> > Nit: I think you re-used part of the code from vpci_remove_register()
> > that names the local variable rm (because it's for removal).  Here it
> > would better to just name it 'r' (for register).
> Will change.
> > 
> >> +
> >> +    ASSERT(spin_is_locked(&vpci->lock));
> >> +
> >> +    list_for_each_entry ( rm, &vpci->handlers, node )
> >> +    {
> >> +        if ( rm->offset == offset && rm->size == size )
> >> +            return rm;
> >> +
> >> +        if ( offset <= rm->offset )
> >> +            break;
> >> +    }
> >> +
> >> +    return NULL;
> >> +}
> >> +
> >> +static struct vpci_register *vpci_get_previous_cap_register(
> >> +    struct vpci *vpci, unsigned int offset)
> >> +{
> >> +    uint32_t next;
> >> +    struct vpci_register *r;
> >> +
> >> +    if ( offset < 0x40 )
> >> +    {
> >> +        ASSERT_UNREACHABLE();
> >> +        return NULL;
> >> +    }
> >> +
> >> +    r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1);
> >> +    if ( !r )
> >> +        return NULL;
> >> +
> >> +    next = (uint32_t)(uintptr_t)r->private;
> >> +    while ( next >= 0x40 && next != offset )
> >> +    {
> >> +        r = vpci_get_register(vpci, next + PCI_CAP_LIST_NEXT, 1);
> >> +        if ( !r )
> >> +            return NULL;
> >> +        next = (uint32_t)(uintptr_t)r->private;
> >> +    }
> >> +
> >> +    if ( next < 0x40 )
> >> +        return NULL;
> > 
> > The code below I think it's a bit simpler (and compact) by having a
> > single return instead of multiple ones:
> > 
> > for ( r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1); r;
> >       r = next >= 0x40 ? vpci_get_register(vpci,
> >                                            next + PCI_CAP_LIST_NEXT, 1)
> >                        : NULL )
> > {
> >     next = (uint32_t)(uintptr_t)r->private;
> >     ASSERT(next == (uintptr_t)r->private);
> Why need this ASSERT here?

Extra safety to ensure the value is not truncated (which will indicate
something is off).  I would not argue strongly for it to be added if
you don't think it's needed.

Thanks, Roger.