Xen vPCI only supports virtualizing the MSI and MSI-X capabilities, so all other
PCI capabilities should be hidden from a domU for now. We parse the physical PCI
capabilities linked list and add vPCI register handlers for the next elements,
inserting our own next value, thus presenting a modified linked list to the
domU.
Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
---
If there are no capabilities to be exposed to the guest, a future status
register handler likely would want to mask the PCI_STATUS_CAP_LIST bit. I'm
thinking we could track this in struct vpci_header as follows:
vpci.h:
struct vpci_header {
...
+ bool mask_cap_list : 1;
} header;
header.c:init_bars():
rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL,
PCI_CAPABILITY_LIST, 1,
(void *)(uintptr_t)next);
if ( rc )
return rc;
+ if ( !next )
+ header->mask_cap_list = true;
+
for ( ttl = 48; ttl > 0; ttl-- )
{
uint8_t pos;
---
xen/drivers/vpci/header.c | 78 +++++++++++++++++++++++++++++++++++++++
1 file changed, 78 insertions(+)
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index 60f7049e3498..cb1304fa1eed 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -513,6 +513,36 @@ static void cf_check rom_write(
rom->addr = val & PCI_ROM_ADDRESS_MASK;
}
+static uint8_t vpci_find_next_cap(pci_sbdf_t sbdf, uint8_t pos)
+{
+ uint8_t id;
+ int ttl;
+
+ if ( pos < 0x40 )
+ pos = pci_conf_read8(sbdf, PCI_CAPABILITY_LIST);
+ else
+ pos = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_NEXT);
+
+ for ( ttl = 48; ttl > 0; ttl-- )
+ {
+ if ( pos < 0x40 )
+ break;
+
+ pos &= ~3;
+ id = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_ID);
+
+ if ( id == 0xff )
+ break;
+
+ if ( id == PCI_CAP_ID_MSI ||
+ id == PCI_CAP_ID_MSIX )
+ return pos;
+
+ pos = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_NEXT);
+ }
+ return 0;
+}
+
static int cf_check init_bars(struct pci_dev *pdev)
{
uint16_t cmd;
@@ -544,6 +574,54 @@ static int cf_check init_bars(struct pci_dev *pdev)
if ( rc )
return rc;
+ if ( !is_hardware_domain(pdev->domain) )
+ {
+ if ( (pci_conf_read16(pdev->sbdf, PCI_STATUS) & PCI_STATUS_CAP_LIST)
+ == 0 )
+ {
+ /* RAZ/WI */
+ rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL,
+ PCI_CAPABILITY_LIST, 1, NULL);
+ if ( rc )
+ return rc;
+ }
+ else
+ {
+ /* Only expose capabilities to the guest that vPCI can handle. */
+ uint8_t next, ttl;
+
+ next = vpci_find_next_cap(pdev->sbdf, PCI_CAPABILITY_LIST);
+
+ rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL,
+ PCI_CAPABILITY_LIST, 1,
+ (void *)(uintptr_t)next);
+ if ( rc )
+ return rc;
+
+ for ( ttl = 48; ttl > 0; ttl-- )
+ {
+ uint8_t pos;
+
+ if ( !next )
+ break;
+
+ pos = next;
+ next = vpci_find_next_cap(pdev->sbdf, pos);
+
+ rc = vpci_add_register(pdev->vpci, vpci_hw_read8, NULL,
+ pos + PCI_CAP_LIST_ID, 1, NULL);
+ if ( rc )
+ return rc;
+
+ rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL,
+ pos + PCI_CAP_LIST_NEXT, 1,
+ (void *)(uintptr_t)next);
+ if ( rc )
+ return rc;
+ }
+ }
+ }
+
if ( pdev->ignore_bars )
return 0;
--
2.41.0
On 10.08.2023 21:12, Stewart Hildebrand wrote:
> Xen vPCI only supports virtualizing the MSI and MSI-X capabilities, so all other
> PCI capabilities should be hidden from a domU for now.
I'm not sure about "should"; imo this would need evaluating for every cap
type separately. I'm okay though to take this as a starting point. What
needs considering (and mentioning here) is that there may be (iirc: are)
drivers which depend on being able to find/access certain capabilities.
Also - what about extended capabilities? Don't we want to hide them all
then as well (by returning 0 for the 32-bit value at 0x100)?
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -513,6 +513,36 @@ static void cf_check rom_write(
> rom->addr = val & PCI_ROM_ADDRESS_MASK;
> }
>
> +static uint8_t vpci_find_next_cap(pci_sbdf_t sbdf, uint8_t pos)
> +{
> + uint8_t id;
> + int ttl;
> +
> + if ( pos < 0x40 )
> + pos = pci_conf_read8(sbdf, PCI_CAPABILITY_LIST);
> + else
> + pos = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_NEXT);
How about avoiding the if/else by having the caller pass in a useful
value, rather than PCI_CAPABILITY_LIST? I.e.
#define PCI_CAP_LIST_FIRST (PCI_CAPABILITY_LIST - PCI_CAP_LIST_NEXT)
> + for ( ttl = 48; ttl > 0; ttl-- )
> + {
> + if ( pos < 0x40 )
> + break;
> +
> + pos &= ~3;
> + id = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_ID);
> +
> + if ( id == 0xff )
> + break;
> +
> + if ( id == PCI_CAP_ID_MSI ||
> + id == PCI_CAP_ID_MSIX )
> + return pos;
Can this please start out as switch() right away?
> + pos = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_NEXT);
> + }
> + return 0;
> +}
Nit: Blank line please ahead of main function return point.
I also notice that the function isn't really vPCI-specific in any way
(except for the specific PCI_CAP_ID_* compared against). Would it
perhaps make sense to have it be a general utility function, living in
xen/drivers/pci/pci.c next to its relatives?
> @@ -544,6 +574,54 @@ static int cf_check init_bars(struct pci_dev *pdev)
> if ( rc )
> return rc;
>
> + if ( !is_hardware_domain(pdev->domain) )
> + {
> + if ( (pci_conf_read16(pdev->sbdf, PCI_STATUS) & PCI_STATUS_CAP_LIST)
> + == 0 )
> + {
> + /* RAZ/WI */
> + rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL,
> + PCI_CAPABILITY_LIST, 1, NULL);
> + if ( rc )
> + return rc;
> + }
> + else
> + {
> + /* Only expose capabilities to the guest that vPCI can handle. */
> + uint8_t next, ttl;
> +
> + next = vpci_find_next_cap(pdev->sbdf, PCI_CAPABILITY_LIST);
> +
> + rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL,
> + PCI_CAPABILITY_LIST, 1,
> + (void *)(uintptr_t)next);
In vpci_find_next_cap() the low 2 bits were masked off. While reserved
at present, I wonder whether we wouldn't be better off passing them
"through".
> + if ( rc )
> + return rc;
> +
> + for ( ttl = 48; ttl > 0; ttl-- )
vpci_find_next_cap() already bounds its loops; you effectively allow for
48*48 iterations here. It would seem better if the ttl applied globally,
and would hence want to be an in/out for vpci_find_next_cap().
Also note that according to ./CODING_STYLE ttl shouldn't be uint<N>_t
(this likely extends to other uses of such types here), and plain int
also isn't really appropriate for a value which can't go negative.
Jan
On 8/14/23 10:58, Jan Beulich wrote:
> On 10.08.2023 21:12, Stewart Hildebrand wrote:
>> Xen vPCI only supports virtualizing the MSI and MSI-X capabilities, so all other
>> PCI capabilities should be hidden from a domU for now.
>
> I'm not sure about "should"; imo this would need evaluating for every cap
> type separately. I'm okay though to take this as a starting point. What
> needs considering (and mentioning here) is that there may be (iirc: are)
> drivers which depend on being able to find/access certain capabilities.
OK, I will re-word the commit message.
> Also - what about extended capabilities? Don't we want to hide them all
> then as well (by returning 0 for the 32-bit value at 0x100)?
Yes, it certainly couldn't hurt to add a RAZ/WI handler at 0x100, so I'll do that.
>> --- a/xen/drivers/vpci/header.c
>> +++ b/xen/drivers/vpci/header.c
>> @@ -513,6 +513,36 @@ static void cf_check rom_write(
>> rom->addr = val & PCI_ROM_ADDRESS_MASK;
>> }
>>
>> +static uint8_t vpci_find_next_cap(pci_sbdf_t sbdf, uint8_t pos)
>> +{
>> + uint8_t id;
>> + int ttl;
>> +
>> + if ( pos < 0x40 )
>> + pos = pci_conf_read8(sbdf, PCI_CAPABILITY_LIST);
>> + else
>> + pos = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_NEXT);
>
> How about avoiding the if/else by having the caller pass in a useful
> value, rather than PCI_CAPABILITY_LIST? I.e.
>
> #define PCI_CAP_LIST_FIRST (PCI_CAPABILITY_LIST - PCI_CAP_LIST_NEXT)
OK, yes, I will eliminate the if/else.
>
>> + for ( ttl = 48; ttl > 0; ttl-- )
>> + {
>> + if ( pos < 0x40 )
>> + break;
>> +
>> + pos &= ~3;
>> + id = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_ID);
>> +
>> + if ( id == 0xff )
>> + break;
>> +
>> + if ( id == PCI_CAP_ID_MSI ||
>> + id == PCI_CAP_ID_MSIX )
>> + return pos;
>
> Can this please start out as switch() right away?
Yes, certainly.
>> + pos = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_NEXT);
>> + }
>> + return 0;
>> +}
>
> Nit: Blank line please ahead of main function return point.
>
> I also notice that the function isn't really vPCI-specific in any way
> (except for the specific PCI_CAP_ID_* compared against). Would it
> perhaps make sense to have it be a general utility function, living in
> xen/drivers/pci/pci.c next to its relatives?
Yes. The the PCI_CAP_ID_* comparisons were the only reason I initially decided not to use the existing pci_find_next_cap() function, which performs an almost identical task. I just noticed that the existing pci_find_next_cap() doesn't appear to have any callers. Given this, I'd prefer to modify the existing pci_find_next_cap() to suit our needs.
>> @@ -544,6 +574,54 @@ static int cf_check init_bars(struct pci_dev *pdev)
>> if ( rc )
>> return rc;
>>
>> + if ( !is_hardware_domain(pdev->domain) )
>> + {
>> + if ( (pci_conf_read16(pdev->sbdf, PCI_STATUS) & PCI_STATUS_CAP_LIST)
>> + == 0 )
>> + {
>> + /* RAZ/WI */
>> + rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL,
>> + PCI_CAPABILITY_LIST, 1, NULL);
>> + if ( rc )
>> + return rc;
>> + }
>> + else
>> + {
>> + /* Only expose capabilities to the guest that vPCI can handle. */
>> + uint8_t next, ttl;
>> +
>> + next = vpci_find_next_cap(pdev->sbdf, PCI_CAPABILITY_LIST);
>> +
>> + rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL,
>> + PCI_CAPABILITY_LIST, 1,
>> + (void *)(uintptr_t)next);
>
> In vpci_find_next_cap() the low 2 bits were masked off. While reserved
> at present, I wonder whether we wouldn't be better off passing them
> "through".
Agreed
>> + if ( rc )
>> + return rc;
>> +
>> + for ( ttl = 48; ttl > 0; ttl-- )
>
> vpci_find_next_cap() already bounds its loops; you effectively allow for
> 48*48 iterations here. It would seem better if the ttl applied globally,
> and would hence want to be an in/out for vpci_find_next_cap().
OK
> Also note that according to ./CODING_STYLE ttl shouldn't be uint<N>_t
> (this likely extends to other uses of such types here), and plain int
> also isn't really appropriate for a value which can't go negative.
Good catch, this was an oversight on my behalf. I will fix it.
On 14.08.2023 23:11, Stewart Hildebrand wrote:
> On 8/14/23 10:58, Jan Beulich wrote:
>> On 10.08.2023 21:12, Stewart Hildebrand wrote:
>>> --- a/xen/drivers/vpci/header.c
>>> +++ b/xen/drivers/vpci/header.c
>>> @@ -513,6 +513,36 @@ static void cf_check rom_write(
>>> rom->addr = val & PCI_ROM_ADDRESS_MASK;
>>> }
>>>
>>> +static uint8_t vpci_find_next_cap(pci_sbdf_t sbdf, uint8_t pos)
>>> +{
>>> + uint8_t id;
>>> + int ttl;
>>> +
>>> + if ( pos < 0x40 )
>>> + pos = pci_conf_read8(sbdf, PCI_CAPABILITY_LIST);
>>> + else
>>> + pos = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_NEXT);
>>
>> How about avoiding the if/else by having the caller pass in a useful
>> value, rather than PCI_CAPABILITY_LIST? I.e.
>>
>> #define PCI_CAP_LIST_FIRST (PCI_CAPABILITY_LIST - PCI_CAP_LIST_NEXT)
>
> OK, yes, I will eliminate the if/else.
>
>>
>>> + for ( ttl = 48; ttl > 0; ttl-- )
>>> + {
>>> + if ( pos < 0x40 )
>>> + break;
>>> +
>>> + pos &= ~3;
>>> + id = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_ID);
>>> +
>>> + if ( id == 0xff )
>>> + break;
>>> +
>>> + if ( id == PCI_CAP_ID_MSI ||
>>> + id == PCI_CAP_ID_MSIX )
>>> + return pos;
>>
>> Can this please start out as switch() right away?
>
> Yes, certainly.
>
>>> + pos = pci_conf_read8(sbdf, pos + PCI_CAP_LIST_NEXT);
>>> + }
>>> + return 0;
>>> +}
>>
>> Nit: Blank line please ahead of main function return point.
>>
>> I also notice that the function isn't really vPCI-specific in any way
>> (except for the specific PCI_CAP_ID_* compared against). Would it
>> perhaps make sense to have it be a general utility function, living in
>> xen/drivers/pci/pci.c next to its relatives?
>
> Yes. The the PCI_CAP_ID_* comparisons were the only reason I initially decided not to use the existing pci_find_next_cap() function, which performs an almost identical task. I just noticed that the existing pci_find_next_cap() doesn't appear to have any callers. Given this, I'd prefer to modify the existing pci_find_next_cap() to suit our needs.
Please modify the function only if it then remains (easily) usable for
its original purpose. Even if right now without callers, it exists for
a reason. (How to deal with the Misra unreachable code aspect in such
cases remains to be decided.)
Jan
© 2016 - 2026 Red Hat, Inc.