Add a new function to emulate extended capability list for dom0,
and call it in init_header(). So that it will be easy to hide a
extended capability whose initialization fails.
As for the extended capability list of domU, just move the logic
into above function and keep hiding it for domU.
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
cc: "Roger Pau Monné" <roger.pau@citrix.com>
---
v6->v7 changes:
* Change word "guest" to "DomU" in vpci_init_ext_capability_list().
* Change parameter of vpci_init_ext_capability_list() to be const.
* Delete check "if ( !header )" in the while loop of vpci_init_ext_capability_list().
* Change the loop from while to do while in vpci_init_ext_capability_list().
v5->v6 changes:
* Delete unnecessary parameter "ttl" in vpci_init_ext_capability_list()
since vpci_add_register() can already detect the overlaps.
v4->v5 changes:
* Add check: if capability list of hardware has a overlap, print warning and return 0.
v3->v4 changes:
* Add check "if ( !header ) return 0;" to avoid adding handler for
device that has no extended capabilities.
v2->v3 changes:
* In vpci_init_ext_capability_list(), when domain is domU, directly return after
adding a handler(hiding all extended capability for domU).
* In vpci_init_ext_capability_list(), change condition to be "while ( pos >= 0x100U && ttl-- )"
instead of "while ( pos && ttl-- )".
* Add new function vpci_hw_write32, and pass it to extended capability handler for dom0.
v1->v2 changes:
new patch
Best regards,
Jiqian Chen.
---
xen/drivers/vpci/header.c | 44 ++++++++++++++++++++++++++++++++-------
xen/drivers/vpci/vpci.c | 6 ++++++
xen/include/xen/vpci.h | 2 ++
3 files changed, 44 insertions(+), 8 deletions(-)
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index d26cbba08ee1..8ee8052cd4a3 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -836,6 +836,39 @@ static int vpci_init_capability_list(struct pci_dev *pdev)
PCI_STATUS_RSVDZ_MASK);
}
+static int vpci_init_ext_capability_list(const struct pci_dev *pdev)
+{
+ unsigned int pos = PCI_CFG_SPACE_SIZE;
+
+ if ( !is_hardware_domain(pdev->domain) )
+ /* Extended capabilities read as zero, write ignore for DomU */
+ return vpci_add_register(pdev->vpci, vpci_read_val, NULL,
+ pos, 4, (void *)0);
+
+ do
+ {
+ uint32_t header = pci_conf_read32(pdev->sbdf, pos);
+ int rc;
+
+ rc = vpci_add_register(pdev->vpci, vpci_read_val, vpci_hw_write32,
+ pos, 4, (void *)(uintptr_t)header);
+ if ( rc == -EEXIST )
+ {
+ printk(XENLOG_WARNING
+ "%pd %pp: overlap in extended cap list, offset %#x\n",
+ pdev->domain, &pdev->sbdf, pos);
+ return 0;
+ }
+
+ if ( rc )
+ return rc;
+
+ pos = PCI_EXT_CAP_NEXT(header);
+ } while ( pos >= PCI_CFG_SPACE_SIZE );
+
+ return 0;
+}
+
static int cf_check init_header(struct pci_dev *pdev)
{
uint16_t cmd;
@@ -888,14 +921,9 @@ static int cf_check init_header(struct pci_dev *pdev)
if ( rc )
return rc;
- if ( !is_hwdom )
- {
- /* Extended capabilities read as zero, write ignore */
- rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL, 0x100, 4,
- (void *)0);
- if ( rc )
- return rc;
- }
+ rc = vpci_init_ext_capability_list(pdev);
+ if ( rc )
+ return rc;
if ( pdev->ignore_bars )
return 0;
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 09988f04c27c..8474c0e3b995 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -267,6 +267,12 @@ void cf_check vpci_hw_write16(
pci_conf_write16(pdev->sbdf, reg, val);
}
+void cf_check vpci_hw_write32(
+ const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data)
+{
+ pci_conf_write32(pdev->sbdf, reg, val);
+}
+
int vpci_add_register_mask(struct vpci *vpci, vpci_read_t *read_handler,
vpci_write_t *write_handler, unsigned int offset,
unsigned int size, void *data, uint32_t ro_mask,
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index fc8d5b470b0b..61d16cc8b897 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -80,6 +80,8 @@ void cf_check vpci_hw_write8(
const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data);
void cf_check vpci_hw_write16(
const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data);
+void cf_check vpci_hw_write32(
+ const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data);
/*
* Check for pending vPCI operations on this vcpu. Returns true if the vcpu
--
2.34.1
On 04.07.2025 09:07, Jiqian Chen wrote:
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -836,6 +836,39 @@ static int vpci_init_capability_list(struct pci_dev *pdev)
> PCI_STATUS_RSVDZ_MASK);
> }
>
> +static int vpci_init_ext_capability_list(const struct pci_dev *pdev)
> +{
> + unsigned int pos = PCI_CFG_SPACE_SIZE;
> +
> + if ( !is_hardware_domain(pdev->domain) )
> + /* Extended capabilities read as zero, write ignore for DomU */
> + return vpci_add_register(pdev->vpci, vpci_read_val, NULL,
> + pos, 4, (void *)0);
> +
> + do
> + {
> + uint32_t header = pci_conf_read32(pdev->sbdf, pos);
> + int rc;
> +
> + rc = vpci_add_register(pdev->vpci, vpci_read_val, vpci_hw_write32,
> + pos, 4, (void *)(uintptr_t)header);
If it wasn't for this use of vpci_hw_write32(), I'd be happy to provide my
R-b. But this continues to look bogus to me: What use is it to allow writes
when Dom0 then can't read back any possible effect of such a write (in the
unexpected event that some of the bits were indeed writable)?
Jan
On 2025/7/8 22:10, Jan Beulich wrote:
> On 04.07.2025 09:07, Jiqian Chen wrote:
>> --- a/xen/drivers/vpci/header.c
>> +++ b/xen/drivers/vpci/header.c
>> @@ -836,6 +836,39 @@ static int vpci_init_capability_list(struct pci_dev *pdev)
>> PCI_STATUS_RSVDZ_MASK);
>> }
>>
>> +static int vpci_init_ext_capability_list(const struct pci_dev *pdev)
>> +{
>> + unsigned int pos = PCI_CFG_SPACE_SIZE;
>> +
>> + if ( !is_hardware_domain(pdev->domain) )
>> + /* Extended capabilities read as zero, write ignore for DomU */
>> + return vpci_add_register(pdev->vpci, vpci_read_val, NULL,
>> + pos, 4, (void *)0);
>> +
>> + do
>> + {
>> + uint32_t header = pci_conf_read32(pdev->sbdf, pos);
>> + int rc;
>> +
>> + rc = vpci_add_register(pdev->vpci, vpci_read_val, vpci_hw_write32,
>> + pos, 4, (void *)(uintptr_t)header);
>
> If it wasn't for this use of vpci_hw_write32(), I'd be happy to provide my
> R-b. But this continues to look bogus to me: What use is it to allow writes
> when Dom0 then can't read back any possible effect of such a write (in the
> unexpected event that some of the bits were indeed writable)?
Oh, I got your concern.
What do you think about updating the header value after writing to hardware in write function?
Or you prefer to pass NULL here?
>
> Jan
--
Best regards,
Jiqian Chen.
On 09.07.2025 07:29, Chen, Jiqian wrote:
> On 2025/7/8 22:10, Jan Beulich wrote:
>> On 04.07.2025 09:07, Jiqian Chen wrote:
>>> --- a/xen/drivers/vpci/header.c
>>> +++ b/xen/drivers/vpci/header.c
>>> @@ -836,6 +836,39 @@ static int vpci_init_capability_list(struct pci_dev *pdev)
>>> PCI_STATUS_RSVDZ_MASK);
>>> }
>>>
>>> +static int vpci_init_ext_capability_list(const struct pci_dev *pdev)
>>> +{
>>> + unsigned int pos = PCI_CFG_SPACE_SIZE;
>>> +
>>> + if ( !is_hardware_domain(pdev->domain) )
>>> + /* Extended capabilities read as zero, write ignore for DomU */
>>> + return vpci_add_register(pdev->vpci, vpci_read_val, NULL,
>>> + pos, 4, (void *)0);
>>> +
>>> + do
>>> + {
>>> + uint32_t header = pci_conf_read32(pdev->sbdf, pos);
>>> + int rc;
>>> +
>>> + rc = vpci_add_register(pdev->vpci, vpci_read_val, vpci_hw_write32,
>>> + pos, 4, (void *)(uintptr_t)header);
>>
>> If it wasn't for this use of vpci_hw_write32(), I'd be happy to provide my
>> R-b. But this continues to look bogus to me: What use is it to allow writes
>> when Dom0 then can't read back any possible effect of such a write (in the
>> unexpected event that some of the bits were indeed writable)?
> Oh, I got your concern.
> What do you think about updating the header value after writing to hardware in write function?
That would imo be a layering violation. Once again that's something that you
primarily would need Roger's input on.
> Or you prefer to pass NULL here?
Yes, that's what I've been trying to argue for.
Jan
On 2025/7/9 13:32, Jan Beulich wrote:
> On 09.07.2025 07:29, Chen, Jiqian wrote:
>> On 2025/7/8 22:10, Jan Beulich wrote:
>>> On 04.07.2025 09:07, Jiqian Chen wrote:
>>>> --- a/xen/drivers/vpci/header.c
>>>> +++ b/xen/drivers/vpci/header.c
>>>> @@ -836,6 +836,39 @@ static int vpci_init_capability_list(struct pci_dev *pdev)
>>>> PCI_STATUS_RSVDZ_MASK);
>>>> }
>>>>
>>>> +static int vpci_init_ext_capability_list(const struct pci_dev *pdev)
>>>> +{
>>>> + unsigned int pos = PCI_CFG_SPACE_SIZE;
>>>> +
>>>> + if ( !is_hardware_domain(pdev->domain) )
>>>> + /* Extended capabilities read as zero, write ignore for DomU */
>>>> + return vpci_add_register(pdev->vpci, vpci_read_val, NULL,
>>>> + pos, 4, (void *)0);
>>>> +
>>>> + do
>>>> + {
>>>> + uint32_t header = pci_conf_read32(pdev->sbdf, pos);
>>>> + int rc;
>>>> +
>>>> + rc = vpci_add_register(pdev->vpci, vpci_read_val, vpci_hw_write32,
>>>> + pos, 4, (void *)(uintptr_t)header);
>>>
>>> If it wasn't for this use of vpci_hw_write32(), I'd be happy to provide my
>>> R-b. But this continues to look bogus to me: What use is it to allow writes
>>> when Dom0 then can't read back any possible effect of such a write (in the
>>> unexpected event that some of the bits were indeed writable)?
>> Oh, I got your concern.
>> What do you think about updating the header value after writing to hardware in write function?
>
> That would imo be a layering violation. Once again that's something that you
> primarily would need Roger's input on.
OK, wait for Roger's input.
>
>> Or you prefer to pass NULL here?
>
> Yes, that's what I've been trying to argue for.
>
> Jan
--
Best regards,
Jiqian Chen.
On Wed, Jul 09, 2025 at 05:34:28AM +0000, Chen, Jiqian wrote:
> On 2025/7/9 13:32, Jan Beulich wrote:
> > On 09.07.2025 07:29, Chen, Jiqian wrote:
> >> On 2025/7/8 22:10, Jan Beulich wrote:
> >>> On 04.07.2025 09:07, Jiqian Chen wrote:
> >>>> --- a/xen/drivers/vpci/header.c
> >>>> +++ b/xen/drivers/vpci/header.c
> >>>> @@ -836,6 +836,39 @@ static int vpci_init_capability_list(struct pci_dev *pdev)
> >>>> PCI_STATUS_RSVDZ_MASK);
> >>>> }
> >>>>
> >>>> +static int vpci_init_ext_capability_list(const struct pci_dev *pdev)
> >>>> +{
> >>>> + unsigned int pos = PCI_CFG_SPACE_SIZE;
> >>>> +
> >>>> + if ( !is_hardware_domain(pdev->domain) )
> >>>> + /* Extended capabilities read as zero, write ignore for DomU */
> >>>> + return vpci_add_register(pdev->vpci, vpci_read_val, NULL,
> >>>> + pos, 4, (void *)0);
> >>>> +
> >>>> + do
> >>>> + {
> >>>> + uint32_t header = pci_conf_read32(pdev->sbdf, pos);
> >>>> + int rc;
> >>>> +
> >>>> + rc = vpci_add_register(pdev->vpci, vpci_read_val, vpci_hw_write32,
> >>>> + pos, 4, (void *)(uintptr_t)header);
> >>>
> >>> If it wasn't for this use of vpci_hw_write32(), I'd be happy to provide my
> >>> R-b. But this continues to look bogus to me: What use is it to allow writes
> >>> when Dom0 then can't read back any possible effect of such a write (in the
> >>> unexpected event that some of the bits were indeed writable)?
> >> Oh, I got your concern.
> >> What do you think about updating the header value after writing to hardware in write function?
>
> > That would imo be a layering violation. Once again that's something that you
> > primarily would need Roger's input on.
> OK, wait for Roger's input.
Hm, I see the asymmetry of allowing writes but not direct writes, my
thought was to give the hw domain as less interference as possibly,
hence my recommendation to use vpci_hw_write32().
I practice I think it's very unlikely that devices re-use reserved
bits in the capability register, so I'm fine with using NULL (thus
discarding the write). We can always add more complex handling here
if we ever came across a device that requires it.
Thanks, Roger.
On 2025/7/21 22:16, Roger Pau Monné wrote:
> On Wed, Jul 09, 2025 at 05:34:28AM +0000, Chen, Jiqian wrote:
>> On 2025/7/9 13:32, Jan Beulich wrote:
>>> On 09.07.2025 07:29, Chen, Jiqian wrote:
>>>> On 2025/7/8 22:10, Jan Beulich wrote:
>>>>> On 04.07.2025 09:07, Jiqian Chen wrote:
>>>>>> --- a/xen/drivers/vpci/header.c
>>>>>> +++ b/xen/drivers/vpci/header.c
>>>>>> @@ -836,6 +836,39 @@ static int vpci_init_capability_list(struct pci_dev *pdev)
>>>>>> PCI_STATUS_RSVDZ_MASK);
>>>>>> }
>>>>>>
>>>>>> +static int vpci_init_ext_capability_list(const struct pci_dev *pdev)
>>>>>> +{
>>>>>> + unsigned int pos = PCI_CFG_SPACE_SIZE;
>>>>>> +
>>>>>> + if ( !is_hardware_domain(pdev->domain) )
>>>>>> + /* Extended capabilities read as zero, write ignore for DomU */
>>>>>> + return vpci_add_register(pdev->vpci, vpci_read_val, NULL,
>>>>>> + pos, 4, (void *)0);
>>>>>> +
>>>>>> + do
>>>>>> + {
>>>>>> + uint32_t header = pci_conf_read32(pdev->sbdf, pos);
>>>>>> + int rc;
>>>>>> +
>>>>>> + rc = vpci_add_register(pdev->vpci, vpci_read_val, vpci_hw_write32,
>>>>>> + pos, 4, (void *)(uintptr_t)header);
>>>>>
>>>>> If it wasn't for this use of vpci_hw_write32(), I'd be happy to provide my
>>>>> R-b. But this continues to look bogus to me: What use is it to allow writes
>>>>> when Dom0 then can't read back any possible effect of such a write (in the
>>>>> unexpected event that some of the bits were indeed writable)?
>>>> Oh, I got your concern.
>>>> What do you think about updating the header value after writing to hardware in write function?
>>
>>> That would imo be a layering violation. Once again that's something that you
>>> primarily would need Roger's input on.
>> OK, wait for Roger's input.
>
> Hm, I see the asymmetry of allowing writes but not direct writes, my
> thought was to give the hw domain as less interference as possibly,
> hence my recommendation to use vpci_hw_write32().
>
> I practice I think it's very unlikely that devices re-use reserved
> bits in the capability register, so I'm fine with using NULL (thus
> discarding the write). We can always add more complex handling here
> if we ever came across a device that requires it.
OK, I will delete vpci_hw_write32() in next version.
Thanks.
>
> Thanks, Roger.
--
Best regards,
Jiqian Chen.
© 2016 - 2025 Red Hat, Inc.