:p
atchew
Login
This is a follow-on to 'vPCI: avoid bogus "overlap in extended cap list" warnings', addressing further issues noted there. 1: PCI: determine whether a device has extended config space 2: PCI: pass pdev to pci_ats_{device,enabled}() 3: x86/MSI: pass pdev to read_pci_mem_bar() 4: PCI: pass pdev to pci_find_{,next_}ext_capability() 5: PCI: don't look for ext-caps when there's no extended cfg space 6: vPCI/DomU: really no ext-caps without extended config space Jan
Legacy PCI devices don't have any extended config space. Reading any part thereof may return all ones or other arbitrary data, e.g. in some cases base config space contents repeatedly. Logic follows Linux 6.19-rc's pci_cfg_space_size(), albeit leveraging our determination of device type; in particular some comments are taken verbatim from there. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- Note that alloc_pdev()'s switch doesn't handle DEV_TYPE_PCI2PCIe_BRIDGE at all. Such bridges will therefore not have ->ext_cfg set (which is likely wrong). Shouldn't we handle them like DEV_TYPE_LEGACY_PCI_BRIDGE (or DEV_TYPE_PCI?) anyway (just like VT-d's set_msi_source_id() does)? Linux also has CONFIG_PCI_QUIRKS, allowing to compile out the slightly risky code (as reads may in principle have side effects). Should we gain such, too? --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -XXX,XX +XXX,XX @@ static void apply_quirks(struct pci_dev * from trying to size the BARs or add handlers to trap accesses. */ pdev->ignore_bars = true; + + if ( pdev->ext_cfg ) + { + unsigned int pos; + + /* + * PCI Express to PCI/PCI-X Bridge Specification, rev 1.0, 4.1.4 says + * that when forwarding a type1 configuration request the bridge must + * check that the extended register address field is zero. The bridge + * is not permitted to forward the transactions and must handle it as + * an Unsupported Request. Some bridges do not follow this rule and + * simply drop the extended register bits, resulting in the standard + * config space being aliased, every 256 bytes across the entire + * configuration space. Test for this condition by comparing the first + * dword of each potential alias to the vendor/device ID. + * Known offenders: + * ASM1083/1085 PCIe-to-PCI Reversible Bridge (1b21:1080, rev 01 & 03) + * AMD/ATI SBx00 PCI to PCI Bridge (1002:4384, rev 40) + */ + for ( pos = PCI_CFG_SPACE_SIZE; + pos < PCI_CFG_SPACE_EXP_SIZE; pos += PCI_CFG_SPACE_SIZE ) + { + if ( pci_conf_read16(pdev->sbdf, pos) != vendor || + pci_conf_read16(pdev->sbdf, pos + 2) != device ) + break; + } + + if ( pos >= PCI_CFG_SPACE_EXP_SIZE ) + { + printk(XENLOG_WARNING + "%pp: extended config space aliases base one\n", + &pdev->sbdf); + pdev->ext_cfg = false; + } + } } static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn) @@ -XXX,XX +XXX,XX @@ static struct pci_dev *alloc_pdev(struct unsigned long flags; case DEV_TYPE_PCIe2PCI_BRIDGE: + pdev->ext_cfg = true; + fallthrough; case DEV_TYPE_LEGACY_PCI_BRIDGE: sec_bus = pci_conf_read8(pdev->sbdf, PCI_SECONDARY_BUS); sub_bus = pci_conf_read8(pdev->sbdf, PCI_SUBORDINATE_BUS); @@ -XXX,XX +XXX,XX @@ static struct pci_dev *alloc_pdev(struct pseg->bus2bridge[sec_bus].devfn = devfn; } spin_unlock_irqrestore(&pseg->bus2bridge_lock, flags); + + fallthrough; + case DEV_TYPE_PCI: + pos = pci_find_cap_offset(pdev->sbdf, PCI_CAP_ID_PCIX); + if ( pos && + (pci_conf_read32(pdev->sbdf, pos + PCI_X_STATUS) & + (PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ)) ) + pdev->ext_cfg = true; break; case DEV_TYPE_PCIe_ENDPOINT: + pdev->ext_cfg = true; + pos = pci_find_cap_offset(pdev->sbdf, PCI_CAP_ID_EXP); BUG_ON(!pos); cap = pci_conf_read16(pdev->sbdf, pos + PCI_EXP_DEVCAP); @@ -XXX,XX +XXX,XX @@ static struct pci_dev *alloc_pdev(struct } break; - case DEV_TYPE_PCI: case DEV_TYPE_PCIe_BRIDGE: case DEV_TYPE_PCI_HOST_BRIDGE: + pdev->ext_cfg = true; break; default: @@ -XXX,XX +XXX,XX @@ static struct pci_dev *alloc_pdev(struct break; } + if ( pdev->ext_cfg && + /* + * Regular PCI devices have 256 bytes, but PCI-X 2 and PCI Express + * devices have 4096 bytes. Even if the device is capable, that + * doesn't mean we can access it. Maybe we don't have a way to + * generate extended config space accesses, or the device is behind a + * reverse Express bridge. So we try reading the dword at + * PCI_CFG_SPACE_SIZE which must either be 0 or a valid extended + * capability header. + */ + pci_conf_read32(pdev->sbdf, PCI_CFG_SPACE_SIZE) == 0xffffffffU ) + pdev->ext_cfg = false; + apply_quirks(pdev); check_pdev(pdev); @@ -XXX,XX +XXX,XX @@ int pci_add_device(u16 seg, u8 bus, u8 d list_add(&pdev->vf_list, &pf_pdev->vf_list); } + + if ( !pdev->ext_cfg ) + printk(XENLOG_WARNING + "%pp: VF without extended config space?\n", + &pdev->sbdf); } } --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -XXX,XX +XXX,XX @@ struct pci_dev { nodeid_t node; /* NUMA node */ + /* Whether the device has extended config space. */ + bool ext_cfg; + /* Device to be quarantined, don't automatically re-assign to dom0 */ bool quarantine;
This not only brings both in sync with {en,dis}able_ats_device() but also prepares for doing the same to pci_find_{,next_}ext_capability(). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/passthrough/amd/iommu_cmd.c +++ b/xen/drivers/passthrough/amd/iommu_cmd.c @@ -XXX,XX +XXX,XX @@ void amd_iommu_flush_iotlb(u8 devfn, con if ( !ats_enabled ) return; - if ( !pci_ats_enabled(pdev->seg, pdev->bus, pdev->devfn) ) + if ( !pci_ats_enabled(pdev) ) return; iommu = find_iommu_for_device(pdev->sbdf); --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -XXX,XX +XXX,XX @@ static bool use_ats( { return !ivrs_dev->block_ats && iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) && - pci_ats_device(iommu->sbdf.seg, pdev->bus, pdev->devfn); + pci_ats_device(pdev); } static int __must_check amd_iommu_setup_domain_device( @@ -XXX,XX +XXX,XX @@ static int __must_check amd_iommu_setup_ ASSERT(pcidevs_locked()); - if ( use_ats(pdev, iommu, ivrs_dev) && - !pci_ats_enabled(iommu->sbdf.seg, bus, pdev->devfn) ) + if ( use_ats(pdev, iommu, ivrs_dev) && !pci_ats_enabled(pdev) ) { if ( devfn == pdev->devfn ) enable_ats_device(pdev, &iommu->ats_devices); @@ -XXX,XX +XXX,XX @@ static void amd_iommu_disable_domain_dev ASSERT(pcidevs_locked()); - if ( pci_ats_device(iommu->sbdf.seg, bus, pdev->devfn) && - pci_ats_enabled(iommu->sbdf.seg, bus, pdev->devfn) ) + if ( pci_ats_device(pdev) && pci_ats_enabled(pdev) ) disable_ats_device(pdev); BUG_ON ( iommu->dev_table.buffer == NULL ); --- a/xen/drivers/passthrough/ats.h +++ b/xen/drivers/passthrough/ats.h @@ -XXX,XX +XXX,XX @@ extern bool ats_enabled; int enable_ats_device(struct pci_dev *pdev, struct list_head *ats_list); void disable_ats_device(struct pci_dev *pdev); -static inline int pci_ats_enabled(int seg, int bus, int devfn) +static inline int pci_ats_enabled(const struct pci_dev *pdev) { u32 value; int pos; - pos = pci_find_ext_capability(PCI_SBDF(seg, bus, devfn), - PCI_EXT_CAP_ID_ATS); + pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ATS); BUG_ON(!pos); - value = pci_conf_read16(PCI_SBDF(seg, bus, devfn), pos + ATS_REG_CTL); + value = pci_conf_read16(pdev->sbdf, pos + ATS_REG_CTL); return value & ATS_ENABLE; } -static inline int pci_ats_device(int seg, int bus, int devfn) +static inline int pci_ats_device(const struct pci_dev *pdev) { if ( !ats_enabled ) return 0; - return pci_find_ext_capability(PCI_SBDF(seg, bus, devfn), - PCI_EXT_CAP_ID_ATS); + return pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ATS); } #endif /* DRIVERS__PASSTHROUGH__ATS_H */
This not only reduces the number of parameters and local variables, but also prepares for doing the same to pci_find_{,next_}ext_capability(). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -XXX,XX +XXX,XX @@ static int msi_capability_init(struct pc return 0; } -static uint64_t read_pci_mem_bar(pci_sbdf_t sbdf, uint8_t bir, int vf, - const struct pf_info *pf_info) +static uint64_t read_pci_mem_bar(const struct pci_dev *pdev, uint8_t bir, + int vf) { - uint16_t seg = sbdf.seg; - uint8_t bus = sbdf.bus, slot = sbdf.dev, func = sbdf.fn; + uint16_t seg = pdev->sbdf.seg; + uint8_t bus = pdev->sbdf.bus, slot = pdev->sbdf.dev, func = pdev->sbdf.fn; u8 limit; u32 addr, base = PCI_BASE_ADDRESS_0; u64 disp = 0; @@ -XXX,XX +XXX,XX @@ static uint64_t read_pci_mem_bar(pci_sbd unsigned int pos; uint16_t ctrl, num_vf, offset, stride; - ASSERT(pf_info); - - pos = pci_find_ext_capability(sbdf, PCI_EXT_CAP_ID_SRIOV); - ctrl = pci_conf_read16(sbdf, pos + PCI_SRIOV_CTRL); - num_vf = pci_conf_read16(sbdf, pos + PCI_SRIOV_NUM_VF); - offset = pci_conf_read16(sbdf, pos + PCI_SRIOV_VF_OFFSET); - stride = pci_conf_read16(sbdf, pos + PCI_SRIOV_VF_STRIDE); + pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_SRIOV); + ctrl = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_CTRL); + num_vf = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_NUM_VF); + offset = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_VF_OFFSET); + stride = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_VF_STRIDE); if ( !pos || !(ctrl & PCI_SRIOV_CTRL_VFE) || !(ctrl & PCI_SRIOV_CTRL_MSE) || !num_vf || !offset || (num_vf > 1 && !stride) || bir >= PCI_SRIOV_NUM_BARS || - !pf_info->vf_rlen[bir] ) + !pdev->physfn.vf_rlen[bir] ) return 0; base = pos + PCI_SRIOV_BAR; vf -= PCI_BDF(bus, slot, func) + offset; @@ -XXX,XX +XXX,XX @@ static uint64_t read_pci_mem_bar(pci_sbd } if ( vf >= num_vf ) return 0; - BUILD_BUG_ON(ARRAY_SIZE(pf_info->vf_rlen) != PCI_SRIOV_NUM_BARS); - disp = vf * pf_info->vf_rlen[bir]; + BUILD_BUG_ON(ARRAY_SIZE(pdev->physfn.vf_rlen) != PCI_SRIOV_NUM_BARS); + disp = vf * pdev->physfn.vf_rlen[bir]; limit = PCI_SRIOV_NUM_BARS; } else switch ( pci_conf_read8(PCI_SBDF(seg, bus, slot, func), @@ -XXX,XX +XXX,XX @@ static int msix_capability_init(struct p u16 control; u64 table_paddr; u32 table_offset; - u16 seg = dev->seg; - u8 bus = dev->bus; - u8 slot = PCI_SLOT(dev->devfn); - u8 func = PCI_FUNC(dev->devfn); bool maskall = msix->host_maskall, zap_on_error = false; unsigned int pos = dev->msix_pos; @@ -XXX,XX +XXX,XX @@ static int msix_capability_init(struct p (is_hardware_domain(current->domain) && (dev->domain == current->domain || dev->domain == dom_io))) ) { - unsigned int bir = table_offset & PCI_MSIX_BIRMASK, pbus, pslot, pfunc; - int vf; + unsigned int bir = table_offset & PCI_MSIX_BIRMASK; + int vf = -1; + const struct pci_dev *pf_dev = dev; paddr_t pba_paddr; unsigned int pba_offset; - const struct pf_info *pf_info; - if ( !dev->info.is_virtfn ) - { - pbus = bus; - pslot = slot; - pfunc = func; - vf = -1; - pf_info = NULL; - } - else + if ( dev->info.is_virtfn ) { - pbus = dev->info.physfn.bus; - pslot = PCI_SLOT(dev->info.physfn.devfn); - pfunc = PCI_FUNC(dev->info.physfn.devfn); vf = dev->sbdf.bdf; ASSERT(dev->pf_pdev); - pf_info = &dev->pf_pdev->physfn; + pf_dev = dev->pf_pdev; } - table_paddr = read_pci_mem_bar(PCI_SBDF(seg, pbus, pslot, pfunc), bir, - vf, pf_info); + table_paddr = read_pci_mem_bar(pf_dev, bir, vf); WARN_ON(msi && msi->table_base != table_paddr); if ( !table_paddr ) { @@ -XXX,XX +XXX,XX @@ static int msix_capability_init(struct p pba_offset = pci_conf_read32(dev->sbdf, msix_pba_offset_reg(pos)); bir = (u8)(pba_offset & PCI_MSIX_BIRMASK); - pba_paddr = read_pci_mem_bar(PCI_SBDF(seg, pbus, pslot, pfunc), bir, vf, - pf_info); + pba_paddr = read_pci_mem_bar(pf_dev, bir, vf); WARN_ON(!pba_paddr); pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
This is in preparation of using attributes recorded for devices. Additionally locating (extended) capabilities of non-devices (e.g. phantom functions) makes no sense. While there also eliminate open-coding of PCI_CFG_SPACE_SIZE in adjacent code. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -XXX,XX +XXX,XX @@ static uint64_t read_pci_mem_bar(const s unsigned int pos; uint16_t ctrl, num_vf, offset, stride; - pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_SRIOV); + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); ctrl = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_CTRL); num_vf = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_NUM_VF); offset = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_VF_OFFSET); --- a/xen/drivers/passthrough/ats.c +++ b/xen/drivers/passthrough/ats.c @@ -XXX,XX +XXX,XX @@ int enable_ats_device(struct pci_dev *pd u32 value; int pos; - pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ATS); + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS); BUG_ON(!pos); if ( iommu_verbose ) --- a/xen/drivers/passthrough/ats.h +++ b/xen/drivers/passthrough/ats.h @@ -XXX,XX +XXX,XX @@ static inline int pci_ats_enabled(const u32 value; int pos; - pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ATS); + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS); BUG_ON(!pos); value = pci_conf_read16(pdev->sbdf, pos + ATS_REG_CTL); @@ -XXX,XX +XXX,XX @@ static inline int pci_ats_device(const s if ( !ats_enabled ) return 0; - return pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ATS); + return pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS); } #endif /* DRIVERS__PASSTHROUGH__ATS_H */ --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -XXX,XX +XXX,XX @@ static void pci_enable_acs(struct pci_de if ( !is_iommu_enabled(pdev->domain) ) return; - pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ACS); + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS); if (!pos) return; @@ -XXX,XX +XXX,XX @@ int pci_add_device(u16 seg, u8 bus, u8 d if ( !pdev->info.is_virtfn && !pdev->physfn.vf_rlen[0] ) { - unsigned int pos = pci_find_ext_capability(pdev->sbdf, - PCI_EXT_CAP_ID_SRIOV); + unsigned int pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); uint16_t ctrl = pci_conf_read16(pdev->sbdf, pos + PCI_SRIOV_CTRL); if ( !pos ) --- a/xen/drivers/passthrough/vtd/x86/ats.c +++ b/xen/drivers/passthrough/vtd/x86/ats.c @@ -XXX,XX +XXX,XX @@ int ats_device(const struct pci_dev *pde return 0; ats_drhd = find_ats_dev_drhd(drhd->iommu); - pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ATS); + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS); if ( pos && (ats_drhd == NULL) ) { --- a/xen/drivers/passthrough/vtd/quirks.c +++ b/xen/drivers/passthrough/vtd/quirks.c @@ -XXX,XX +XXX,XX @@ void pci_vtd_quirk(const struct pci_dev /* Sandybridge-EP (Romley) */ case 0x3c00: /* host bridge */ case 0x3c01 ... 0x3c0b: /* root ports */ - pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_ERR); + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); if ( !pos ) { - pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_VNDR); + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_VNDR); while ( pos ) { val = pci_conf_read32(pdev->sbdf, pos + PCI_VNDR_HEADER); @@ -XXX,XX +XXX,XX @@ void pci_vtd_quirk(const struct pci_dev pos += PCI_VNDR_HEADER; break; } - pos = pci_find_next_ext_capability(pdev->sbdf, pos, + pos = pci_find_next_ext_capability(pdev, pos, PCI_EXT_CAP_ID_VNDR); } ff = 0; --- a/xen/drivers/pci/pci.c +++ b/xen/drivers/pci/pci.c @@ -XXX,XX +XXX,XX @@ unsigned int pci_find_next_cap(pci_sbdf_ * within the device's PCI configuration space or 0 if the device does * not support it. */ -unsigned int pci_find_ext_capability(pci_sbdf_t sbdf, unsigned int cap) +unsigned int pci_find_ext_capability(const struct pci_dev *pdev, + unsigned int cap) { - return pci_find_next_ext_capability(sbdf, 0, cap); + return pci_find_next_ext_capability(pdev, 0, cap); } /** @@ -XXX,XX +XXX,XX @@ unsigned int pci_find_ext_capability(pci * within the device's PCI configuration space or 0 if the device does * not support it. */ -unsigned int pci_find_next_ext_capability(pci_sbdf_t sbdf, unsigned int start, +unsigned int pci_find_next_ext_capability(const struct pci_dev *pdev, + unsigned int start, unsigned int cap) { u32 header; int ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */ - unsigned int pos = max(start, 0x100U); + unsigned int pos = max(start, PCI_CFG_SPACE_SIZE + 0U); - header = pci_conf_read32(sbdf, pos); + header = pci_conf_read32(pdev->sbdf, pos); /* * If we have no capabilities, this is indicated by cap ID, @@ -XXX,XX +XXX,XX @@ unsigned int pci_find_next_ext_capabilit if ( PCI_EXT_CAP_ID(header) == cap && pos != start ) return pos; pos = PCI_EXT_CAP_NEXT(header); - if ( pos < 0x100 ) + if ( pos < PCI_CFG_SPACE_SIZE ) break; - header = pci_conf_read32(sbdf, pos); + header = pci_conf_read32(pdev->sbdf, pos); } return 0; } --- a/xen/drivers/vpci/rebar.c +++ b/xen/drivers/vpci/rebar.c @@ -XXX,XX +XXX,XX @@ static int cf_check init_rebar(struct pc { uint32_t ctrl; unsigned int nbars; - unsigned int rebar_offset = pci_find_ext_capability(pdev->sbdf, + unsigned int rebar_offset = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_REBAR); if ( !rebar_offset ) --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -XXX,XX +XXX,XX @@ static struct vpci_register *vpci_get_pr static int vpci_ext_capability_hide( const struct pci_dev *pdev, unsigned int cap) { - const unsigned int offset = pci_find_ext_capability(pdev->sbdf, cap); + const unsigned int offset = pci_find_ext_capability(pdev, cap); struct vpci_register *r, *prev_r; struct vpci *vpci = pdev->vpci; uint32_t header, pre_header; @@ -XXX,XX +XXX,XX @@ static int vpci_init_capabilities(struct if ( !is_ext ) pos = pci_find_cap_offset(pdev->sbdf, cap); else if ( is_hardware_domain(pdev->domain) ) - pos = pci_find_ext_capability(pdev->sbdf, cap); + pos = pci_find_ext_capability(pdev, cap); if ( !pos ) continue; @@ -XXX,XX +XXX,XX @@ void vpci_deassign_device(struct pci_dev if ( !capability->is_ext ) pos = pci_find_cap_offset(pdev->sbdf, cap); else if ( is_hardware_domain(pdev->domain) ) - pos = pci_find_ext_capability(pdev->sbdf, cap); + pos = pci_find_ext_capability(pdev, cap); if ( pos ) { int rc = capability->cleanup(pdev, false); --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -XXX,XX +XXX,XX @@ unsigned int pci_find_next_cap_ttl(pci_s unsigned int *ttl); unsigned int pci_find_next_cap(pci_sbdf_t sbdf, unsigned int pos, unsigned int cap); -unsigned int pci_find_ext_capability(pci_sbdf_t sbdf, unsigned int cap); -unsigned int pci_find_next_ext_capability(pci_sbdf_t sbdf, unsigned int start, +unsigned int pci_find_ext_capability(const struct pci_dev *pdev, + unsigned int cap); +unsigned int pci_find_next_ext_capability(const struct pci_dev *pdev, + unsigned int start, unsigned int cap); const char *parse_pci(const char *s, unsigned int *seg_p, unsigned int *bus_p, unsigned int *dev_p, unsigned int *func_p);
Avoid interpreting as extended capabilities what may be about anything. In doing so, vPCI then also won't mis-interpret data from beyond base config space anymore. Fixes: 3b35911d709e ("Enable pci mmcfg and ATS for x86_64") Fixes: a845b50c12f3 ("vpci/header: Emulate extended capability list for dom0") Signed-off-by: Jan Beulich <jbeulich@suse.com> --- Because of the multiple prereq changes, despite the Fixes: tags I'm not quite sure whether to backport this. (I'm leaning towards "no".) --- a/xen/drivers/pci/pci.c +++ b/xen/drivers/pci/pci.c @@ -XXX,XX +XXX,XX @@ unsigned int pci_find_next_ext_capabilit int ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */ unsigned int pos = max(start, PCI_CFG_SPACE_SIZE + 0U); + if ( !pdev->ext_cfg ) + { + ASSERT(!start); + return 0; + } + header = pci_conf_read32(pdev->sbdf, pos); /*
Whether to emulate accesses to the first 32 bits of extended config space as read-as-zero or read-as-all-ones depends on whether a device actually has extended config space. If it doesn't, read-as-zero isn't correct; not getting this right may confuse functions like Linux 6.19-rc's pci_ext_cfg_is_aliased(). Fixes: a845b50c12f3 ("vpci/header: Emulate extended capability list for dom0") Signed-off-by: Jan Beulich <jbeulich@suse.com> --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -XXX,XX +XXX,XX @@ static int vpci_init_ext_capability_list unsigned int pos = PCI_CFG_SPACE_SIZE; if ( !is_hardware_domain(pdev->domain) ) + { + if ( !pdev->ext_cfg ) + return 0; + /* Extended capabilities read as zero, write ignore for DomU */ return vpci_add_register(pdev->vpci, vpci_read_val, NULL, pos, 4, (void *)0); + } do {
This is a follow-on to 'vPCI: avoid bogus "overlap in extended cap list" warnings', addressing further issues noted there. v4: Three new patches and some other re-work. See individual patches. 1: x86/PCI: avoid re-evaluation of extended config space accessibility 2: vPCI: introduce private header 3: vPCI: move vpci_init_capabilities() to a separate file 4: vPCI: move capability-list init 5: vPCI: re-init extended-capabilities when MMCFG availability changed Jan
When, during boot, we have already correctly determined availability of the MMCFG access method for a given bus range, there's then no need to invoke pci_check_extcfg() again for every of the devices. This in particular avoids ->ext_cfg to transiently indicate the wrong state. Switch to using Xen style on lines being touched and immediately adjacent ones. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Roger Pau Monné <roger.pau@citrix.com> --- v4: Don't bypass mcfg_ioremap(cfg, idx, 0) in pci_mmcfg_arch_disable(). v3: New. --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -XXX,XX +XXX,XX @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( !ret ) ret = pci_segment_iterate(info.segment, physdev_check_pci_extcfg, &info); + else if ( ret > 0 ) /* Indication of "no change". */ + ret = 0; if ( !ret && has_vpci(currd) && (info.flags & XEN_PCI_MMCFG_RESERVED) ) { --- a/xen/arch/x86/x86_64/mmconfig.h +++ b/xen/arch/x86/x86_64/mmconfig.h @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_reserved(uint64_t address, unsigned int flags); int pci_mmcfg_arch_init(void); int pci_mmcfg_arch_enable(unsigned int idx); -void pci_mmcfg_arch_disable(unsigned int idx); +int pci_mmcfg_arch_disable(unsigned int idx); #endif /* X86_64_MMCONFIG_H */ --- a/xen/arch/x86/x86_64/mmconfig-shared.c +++ b/xen/arch/x86/x86_64/mmconfig-shared.c @@ -XXX,XX +XXX,XX @@ static bool __init pci_mmcfg_reject_brok (unsigned int)cfg->start_bus_number, (unsigned int)cfg->end_bus_number); - if (!is_mmconf_reserved(addr, size, i, cfg) || - pci_mmcfg_arch_enable(i)) { + if ( !is_mmconf_reserved(addr, size, i, cfg) || + pci_mmcfg_arch_enable(i) < 0 ) + { pci_mmcfg_arch_disable(i); valid = 0; } @@ -XXX,XX +XXX,XX @@ void __init acpi_mmcfg_init(void) unsigned int i; pci_mmcfg_arch_init(); - for (i = 0; i < pci_mmcfg_config_num; ++i) - if (pci_mmcfg_arch_enable(i)) + for ( i = 0; i < pci_mmcfg_config_num; ++i ) + if ( pci_mmcfg_arch_enable(i) < 0 ) valid = 0; } else { acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_reserved(uint64_t address, segment, start_bus, end_bus, address, cfg->address); return -EIO; } - if (flags & XEN_PCI_MMCFG_RESERVED) + + if ( flags & XEN_PCI_MMCFG_RESERVED ) return pci_mmcfg_arch_enable(i); - pci_mmcfg_arch_disable(i); - return 0; + + return pci_mmcfg_arch_disable(i); } } --- a/xen/arch/x86/x86_64/mmconfig_64.c +++ b/xen/arch/x86/x86_64/mmconfig_64.c @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_arch_enable(unsigned int i const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg; unsigned long start_mfn, end_mfn; - if (pci_mmcfg_virt[idx].virt) - return 0; + if ( pci_mmcfg_virt[idx].virt ) + return 1; + pci_mmcfg_virt[idx].virt = mcfg_ioremap(cfg, idx, PAGE_HYPERVISOR_UC); if (!pci_mmcfg_virt[idx].virt) { printk(KERN_ERR "PCI: Cannot map MCFG aperture for segment %04x\n", @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_arch_enable(unsigned int i return 0; } -void pci_mmcfg_arch_disable(unsigned int idx) +int pci_mmcfg_arch_disable(unsigned int idx) { const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg; + int ret = !pci_mmcfg_virt[idx].virt; pci_mmcfg_virt[idx].virt = NULL; /* @@ -XXX,XX +XXX,XX @@ void pci_mmcfg_arch_disable(unsigned int mcfg_ioremap(cfg, idx, 0); printk(KERN_WARNING "PCI: Not using MCFG for segment %04x bus %02x-%02x\n", cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number); + + return ret; } bool pci_mmcfg_decode(unsigned long mfn, unsigned int *seg, unsigned int *bdf)
Before adding more private stuff to xen/vpci.h, split it up. In order to be able to include the private header first in a CU, the per-arch struct decls also need to move (to new asm/vpci.h files). While adjusting the test harness'es Makefile, also switch the pre-existing header symlink-ing rule to a pattern one. Apart from in the test harness code, things only move; no functional change intended. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- Subsequently, at least on x86 more stuff may want moving into asm/vpci.h. --- v4: New. --- a/tools/tests/vpci/Makefile +++ b/tools/tests/vpci/Makefile @@ -XXX,XX +XXX,XX @@ else $(warning HOSTCC != CC, will not run test) endif -$(TARGET): vpci.c vpci.h list.h main.c emul.h - $(CC) $(CFLAGS_xeninclude) -g -o $@ vpci.c main.c +$(TARGET): vpci.c vpci.h list.h private.h main.c emul.h + $(CC) $(CFLAGS_xeninclude) -include emul.h -g -o $@ vpci.c main.c .PHONY: clean clean: @@ -XXX,XX +XXX,XX @@ uninstall: $(RM) -- $(DESTDIR)$(LIBEXEC)/tests/$(TARGET) vpci.c: $(XEN_ROOT)/xen/drivers/vpci/vpci.c - # Remove includes and add the test harness header - sed -e '/#include/d' -e '1s/^/#include "emul.h"/' <$< >$@ + sed -e '/#include/d' <$< >$@ + +private.h: %.h: $(XEN_ROOT)/xen/drivers/vpci/%.h + sed -e '/#include/d' <$< >$@ -list.h: $(XEN_ROOT)/xen/include/xen/list.h -vpci.h: $(XEN_ROOT)/xen/include/xen/vpci.h -list.h vpci.h: +list.h vpci.h: %.h: $(XEN_ROOT)/xen/include/xen/%.h sed -e '/#include/d' <$< >$@ --- a/tools/tests/vpci/emul.h +++ b/tools/tests/vpci/emul.h @@ -XXX,XX +XXX,XX @@ typedef union { #define CONFIG_HAS_VPCI #include "vpci.h" +#include "private.h" #define __hwdom_init --- a/xen/arch/arm/include/asm/pci.h +++ b/xen/arch/arm/include/asm/pci.h @@ -XXX,XX +XXX,XX @@ struct arch_pci_dev { struct device dev; }; -/* Arch-specific MSI data for vPCI. */ -struct vpci_arch_msi { -}; - -/* Arch-specific MSI-X entry data for vPCI. */ -struct vpci_arch_msix_entry { -}; - /* * Because of the header cross-dependencies, e.g. we need both * struct pci_dev and struct arch_pci_dev at the same time, this cannot be --- /dev/null +++ b/xen/arch/arm/include/asm/vpci.h @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef ARM_VPCI_H +#define ARM_VPCI_H + +/* Arch-specific MSI data for vPCI. */ +struct vpci_arch_msi { +}; + +/* Arch-specific MSI-X entry data for vPCI. */ +struct vpci_arch_msix_entry { +}; + +#endif /* ARM_VPCI_H */ --- a/xen/arch/x86/include/asm/hvm/io.h +++ b/xen/arch/x86/include/asm/hvm/io.h @@ -XXX,XX +XXX,XX @@ void msixtbl_init(struct domain *d); static inline void msixtbl_init(struct domain *d) {} #endif -/* Arch-specific MSI data for vPCI. */ -struct vpci_arch_msi { - int pirq; - bool bound; -}; - -/* Arch-specific MSI-X entry data for vPCI. */ -struct vpci_arch_msix_entry { - int pirq; -}; - void stdvga_init(struct domain *d); extern void hvm_dpci_msi_eoi(struct domain *d, int vector); --- /dev/null +++ b/xen/arch/x86/include/asm/vpci.h @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef X86_VPCI_H +#define X86_VPCI_H + +#include <xen/stdbool.h> + +/* Arch-specific MSI data for vPCI. */ +struct vpci_arch_msi { + int pirq; + bool bound; +}; + +/* Arch-specific MSI-X entry data for vPCI. */ +struct vpci_arch_msix_entry { + int pirq; +}; + +#endif /* X86_VPCI_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -XXX,XX +XXX,XX @@ * License along with this program; If not, see <http://www.gnu.org/licenses/>. */ +#include "private.h" + #include <xen/iocap.h> #include <xen/lib.h> #include <xen/sched.h> #include <xen/softirq.h> -#include <xen/vpci.h> #include <xsm/xsm.h> --- a/xen/drivers/vpci/msi.c +++ b/xen/drivers/vpci/msi.c @@ -XXX,XX +XXX,XX @@ * License along with this program; If not, see <http://www.gnu.org/licenses/>. */ +#include "private.h" + #include <xen/sched.h> #include <xen/softirq.h> -#include <xen/vpci.h> #include <asm/msi.h> --- a/xen/drivers/vpci/msix.c +++ b/xen/drivers/vpci/msix.c @@ -XXX,XX +XXX,XX @@ * License along with this program; If not, see <http://www.gnu.org/licenses/>. */ +#include "private.h" + #include <xen/io.h> #include <xen/lib.h> #include <xen/sched.h> -#include <xen/vpci.h> #include <asm/msi.h> #include <asm/p2m.h> --- /dev/null +++ b/xen/drivers/vpci/private.h @@ -XXX,XX +XXX,XX @@ +#ifndef VPCI_PRIVATE_H +#define VPCI_PRIVATE_H + +#include <xen/vpci.h> + +typedef uint32_t vpci_read_t(const struct pci_dev *pdev, unsigned int reg, + void *data); + +typedef void vpci_write_t(const struct pci_dev *pdev, unsigned int reg, + uint32_t val, void *data); + +typedef struct { + unsigned int id; + bool is_ext; + int (* init)(struct pci_dev *pdev); + int (* cleanup)(const struct pci_dev *pdev, bool hide); +} vpci_capability_t; + +#define REGISTER_VPCI_CAPABILITY(cap, name, finit, fclean, ext) \ + static const vpci_capability_t name##_entry \ + __used_section(".data.rel.ro.vpci") = { \ + .id = (cap), \ + .init = (finit), \ + .cleanup = (fclean), \ + .is_ext = (ext), \ + } + +#define REGISTER_VPCI_CAP(name, finit, fclean) \ + REGISTER_VPCI_CAPABILITY(PCI_CAP_ID_##name, name, finit, fclean, false) +#define REGISTER_VPCI_EXTCAP(name, finit, fclean) \ + REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true) + +/* Add/remove a register handler. */ +int __must_check vpci_add_register_mask(struct vpci *vpci, + vpci_read_t *read_handler, + vpci_write_t *write_handler, + unsigned int offset, unsigned int size, + void *data, uint32_t ro_mask, + uint32_t rw1c_mask, uint32_t rsvdp_mask, + uint32_t rsvdz_mask); +int __must_check vpci_add_register(struct vpci *vpci, + vpci_read_t *read_handler, + vpci_write_t *write_handler, + unsigned int offset, unsigned int size, + void *data); + +int vpci_remove_registers(struct vpci *vpci, unsigned int start, + unsigned int size); + +/* Helper to return the value passed in data. */ +uint32_t cf_check vpci_read_val( + const struct pci_dev *pdev, unsigned int reg, void *data); + +/* Passthrough handlers. */ +uint32_t cf_check vpci_hw_read8( + const struct pci_dev *pdev, unsigned int reg, void *data); +uint32_t cf_check vpci_hw_read16( + const struct pci_dev *pdev, unsigned int reg, void *data); +uint32_t cf_check vpci_hw_read32( + const struct pci_dev *pdev, unsigned int reg, void *data); +void cf_check vpci_hw_write8( + const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data); +void cf_check vpci_hw_write16( + const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data); + +#ifdef __XEN__ +/* Make sure there's a hole in the p2m for the MSIX mmio areas. */ +int vpci_make_msix_hole(const struct pci_dev *pdev); + +/* + * Helper functions to fetch MSIX related data. They are used by both the + * emulated MSIX code and the BAR handlers. + */ +static inline paddr_t vmsix_table_host_base(const struct vpci *vpci, + unsigned int nr) +{ + return vpci->header.bars[vpci->msix->tables[nr] & PCI_MSIX_BIRMASK].addr; +} + +static inline paddr_t vmsix_table_host_addr(const struct vpci *vpci, + unsigned int nr) +{ + return vmsix_table_host_base(vpci, nr) + + (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK); +} + +static inline paddr_t vmsix_table_base(const struct vpci *vpci, unsigned int nr) +{ + return vpci->header.bars[vpci->msix->tables[nr] & + PCI_MSIX_BIRMASK].guest_addr; +} + +static inline paddr_t vmsix_table_addr(const struct vpci *vpci, unsigned int nr) +{ + return vmsix_table_base(vpci, nr) + + (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK); +} + +/* + * Note regarding the size calculation of the PBA: the spec mentions "The last + * QWORD will not necessarily be fully populated", so it implies that the PBA + * size is 64-bit aligned. + */ +static inline size_t vmsix_table_size(const struct vpci *vpci, unsigned int nr) +{ + return + (nr == VPCI_MSIX_TABLE) ? vpci->msix->max_entries * PCI_MSIX_ENTRY_SIZE + : ROUNDUP(DIV_ROUND_UP(vpci->msix->max_entries, + 8), 8); +} + +#endif /* __XEN__ */ + +#endif /* VPCI_PRIVATE_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- a/xen/drivers/vpci/rebar.c +++ b/xen/drivers/vpci/rebar.c @@ -XXX,XX +XXX,XX @@ * Author: Jiqian Chen <Jiqian.Chen@amd.com> */ +#include "private.h" + #include <xen/sched.h> -#include <xen/vpci.h> static void cf_check rebar_ctrl_write(const struct pci_dev *pdev, unsigned int reg, --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -XXX,XX +XXX,XX @@ * License along with this program; If not, see <http://www.gnu.org/licenses/>. */ +#include "private.h" + #include <xen/sched.h> -#include <xen/vpci.h> #include <xen/vmap.h> /* Internal struct to store the emulated PCI registers. */ --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -XXX,XX +XXX,XX @@ #include <xen/types.h> #include <xen/list.h> -typedef uint32_t vpci_read_t(const struct pci_dev *pdev, unsigned int reg, - void *data); - -typedef void vpci_write_t(const struct pci_dev *pdev, unsigned int reg, - uint32_t val, void *data); - -typedef struct { - unsigned int id; - bool is_ext; - int (* init)(struct pci_dev *pdev); - int (* cleanup)(const struct pci_dev *pdev, bool hide); -} vpci_capability_t; +#include <asm/vpci.h> #define VPCI_ECAM_BDF(addr) (((addr) & 0x0ffff000) >> 12) @@ -XXX,XX +XXX,XX @@ typedef struct { */ #define VPCI_MAX_VIRT_DEV (PCI_SLOT(~0) + 1) -#define REGISTER_VPCI_CAPABILITY(cap, name, finit, fclean, ext) \ - static const vpci_capability_t name##_entry \ - __used_section(".data.rel.ro.vpci") = { \ - .id = (cap), \ - .init = (finit), \ - .cleanup = (fclean), \ - .is_ext = (ext), \ - } - -#define REGISTER_VPCI_CAP(name, finit, fclean) \ - REGISTER_VPCI_CAPABILITY(PCI_CAP_ID_##name, name, finit, fclean, false) -#define REGISTER_VPCI_EXTCAP(name, finit, fclean) \ - REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true) - int __must_check vpci_init_header(struct pci_dev *pdev); /* Assign vPCI to device by adding handlers. */ @@ -XXX,XX +XXX,XX @@ int __must_check vpci_assign_device(stru /* Remove all handlers and free vpci related structures. */ void vpci_deassign_device(struct pci_dev *pdev); -/* Add/remove a register handler. */ -int __must_check vpci_add_register_mask(struct vpci *vpci, - vpci_read_t *read_handler, - vpci_write_t *write_handler, - unsigned int offset, unsigned int size, - void *data, uint32_t ro_mask, - uint32_t rw1c_mask, uint32_t rsvdp_mask, - uint32_t rsvdz_mask); -int __must_check vpci_add_register(struct vpci *vpci, - vpci_read_t *read_handler, - vpci_write_t *write_handler, - unsigned int offset, unsigned int size, - void *data); - -int vpci_remove_registers(struct vpci *vpci, unsigned int start, - unsigned int size); - /* Generic read/write handlers for the PCI config space. */ uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size); void vpci_write(pci_sbdf_t sbdf, unsigned int reg, unsigned int size, uint32_t data); -/* Helper to return the value passed in data. */ -uint32_t cf_check vpci_read_val( - const struct pci_dev *pdev, unsigned int reg, void *data); - -/* Passthrough handlers. */ -uint32_t cf_check vpci_hw_read8( - const struct pci_dev *pdev, unsigned int reg, void *data); -uint32_t cf_check vpci_hw_read16( - const struct pci_dev *pdev, unsigned int reg, void *data); -uint32_t cf_check vpci_hw_read32( - const struct pci_dev *pdev, unsigned int reg, void *data); -void cf_check vpci_hw_write8( - const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data); -void cf_check vpci_hw_write16( - const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data); - /* * Check for pending vPCI operations on this vcpu. Returns true if the vcpu * should not run. @@ -XXX,XX +XXX,XX @@ struct vpci_vcpu { #ifdef __XEN__ void vpci_dump_msi(void); -/* Make sure there's a hole in the p2m for the MSIX mmio areas. */ -int vpci_make_msix_hole(const struct pci_dev *pdev); - /* Arch-specific vPCI MSI helpers. */ void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev, unsigned int entry, bool mask); @@ -XXX,XX +XXX,XX @@ int __must_check vpci_msix_arch_disable_ void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry); int vpci_msix_arch_print(const struct vpci_msix *msix); -/* - * Helper functions to fetch MSIX related data. They are used by both the - * emulated MSIX code and the BAR handlers. - */ -static inline paddr_t vmsix_table_host_base(const struct vpci *vpci, - unsigned int nr) -{ - return vpci->header.bars[vpci->msix->tables[nr] & PCI_MSIX_BIRMASK].addr; -} - -static inline paddr_t vmsix_table_host_addr(const struct vpci *vpci, - unsigned int nr) -{ - return vmsix_table_host_base(vpci, nr) + - (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK); -} - -static inline paddr_t vmsix_table_base(const struct vpci *vpci, unsigned int nr) -{ - return vpci->header.bars[vpci->msix->tables[nr] & - PCI_MSIX_BIRMASK].guest_addr; -} - -static inline paddr_t vmsix_table_addr(const struct vpci *vpci, unsigned int nr) -{ - return vmsix_table_base(vpci, nr) + - (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK); -} - -/* - * Note regarding the size calculation of the PBA: the spec mentions "The last - * QWORD will not necessarily be fully populated", so it implies that the PBA - * size is 64-bit aligned. - */ -static inline size_t vmsix_table_size(const struct vpci *vpci, unsigned int nr) -{ - return - (nr == VPCI_MSIX_TABLE) ? vpci->msix->max_entries * PCI_MSIX_ENTRY_SIZE - : ROUNDUP(DIV_ROUND_UP(vpci->msix->max_entries, - 8), 8); -} - static inline unsigned int vmsix_entry_nr(const struct vpci_msix *msix, const struct vpci_msix_entry *entry) {
Let's keep capability handling together. Start with moving vpci_init_capabilities() and its helpers, plus splitting out of its cleanup counterpart. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- vpci_get_register(), while now only used by cap.c, didn't look like it would want moving there. --- v4: New. --- a/xen/drivers/vpci/Makefile +++ b/xen/drivers/vpci/Makefile @@ -XXX,XX +XXX,XX @@ +obj-y += cap.o obj-y += vpci.o header.o rebar.o obj-$(CONFIG_HAS_PCI_MSI) += msi.o msix.o --- /dev/null +++ b/xen/drivers/vpci/cap.c @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Capability handling for guest PCI configuration space. + */ + +#include "private.h" + +#include <xen/sched.h> + +extern const vpci_capability_t __start_vpci_array[]; +extern const vpci_capability_t __end_vpci_array[]; +#define NUM_VPCI_INIT (__end_vpci_array - __start_vpci_array) + +static struct vpci_register *vpci_get_previous_cap_register( + const struct vpci *vpci, unsigned int offset) +{ + unsigned int next; + struct vpci_register *r; + + if ( offset < 0x40 ) + { + ASSERT_UNREACHABLE(); + return NULL; + } + + for ( r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1); r; + r = next >= 0x40 ? vpci_get_register(vpci, + next + PCI_CAP_LIST_NEXT, 1) + : NULL ) + { + next = (unsigned int)(uintptr_t)r->private; + ASSERT(next == (uintptr_t)r->private); + if ( next == offset ) + break; + } + + return r; +} + +static int vpci_capability_hide(const struct pci_dev *pdev, unsigned int cap) +{ + const unsigned int offset = pci_find_cap_offset(pdev->sbdf, cap); + struct vpci_register *prev_r, *next_r; + struct vpci *vpci = pdev->vpci; + + if ( !offset ) + { + ASSERT_UNREACHABLE(); + return 0; + } + + spin_lock(&vpci->lock); + prev_r = vpci_get_previous_cap_register(vpci, offset); + next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1); + if ( !prev_r || !next_r ) + { + spin_unlock(&vpci->lock); + return -ENODEV; + } + + prev_r->private = next_r->private; + /* + * Not calling vpci_remove_registers() here is to avoid redoing + * the register search. + */ + list_del(&next_r->node); + spin_unlock(&vpci->lock); + xfree(next_r); + + if ( !is_hardware_domain(pdev->domain) ) + return vpci_remove_registers(vpci, offset + PCI_CAP_LIST_ID, 1); + + return 0; +} + +static struct vpci_register *vpci_get_previous_ext_cap_register( + const struct vpci *vpci, unsigned int offset) +{ + unsigned int pos = PCI_CFG_SPACE_SIZE; + struct vpci_register *r; + + if ( offset <= PCI_CFG_SPACE_SIZE ) + { + ASSERT_UNREACHABLE(); + return NULL; + } + + for ( r = vpci_get_register(vpci, pos, 4); r; + r = pos > PCI_CFG_SPACE_SIZE ? vpci_get_register(vpci, pos, 4) + : NULL ) + { + uint32_t header = (uint32_t)(uintptr_t)r->private; + + ASSERT(header == (uintptr_t)r->private); + + pos = PCI_EXT_CAP_NEXT(header); + if ( pos == offset ) + break; + } + + return r; +} + +static int vpci_ext_capability_hide( + const struct pci_dev *pdev, unsigned int cap) +{ + const unsigned int offset = pci_find_ext_capability(pdev, cap); + struct vpci_register *r, *prev_r; + struct vpci *vpci = pdev->vpci; + uint32_t header, pre_header; + + if ( offset < PCI_CFG_SPACE_SIZE ) + { + ASSERT_UNREACHABLE(); + return 0; + } + + spin_lock(&vpci->lock); + r = vpci_get_register(vpci, offset, 4); + if ( !r ) + { + spin_unlock(&vpci->lock); + return -ENODEV; + } + + header = (uint32_t)(uintptr_t)r->private; + if ( offset == PCI_CFG_SPACE_SIZE ) + { + if ( PCI_EXT_CAP_NEXT(header) <= PCI_CFG_SPACE_SIZE ) + r->private = (void *)0; + else + /* + * The first extended capability (0x100) can not be removed from + * the linked list, so instead mask its capability ID to return 0 + * hopefully forcing OSes to skip it. + */ + r->private = (void *)(uintptr_t)(header & ~PCI_EXT_CAP_ID(header)); + + spin_unlock(&vpci->lock); + return 0; + } + + prev_r = vpci_get_previous_ext_cap_register(vpci, offset); + if ( !prev_r ) + { + spin_unlock(&vpci->lock); + return -ENODEV; + } + + pre_header = (uint32_t)(uintptr_t)prev_r->private; + pre_header &= ~PCI_EXT_CAP_NEXT_MASK; + pre_header |= header & PCI_EXT_CAP_NEXT_MASK; + prev_r->private = (void *)(uintptr_t)pre_header; + + list_del(&r->node); + spin_unlock(&vpci->lock); + xfree(r); + + return 0; +} + +int vpci_init_capabilities(struct pci_dev *pdev) +{ + for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ ) + { + const vpci_capability_t *capability = &__start_vpci_array[i]; + const unsigned int cap = capability->id; + const bool is_ext = capability->is_ext; + unsigned int pos = 0; + int rc; + + if ( !is_ext ) + pos = pci_find_cap_offset(pdev->sbdf, cap); + else if ( is_hardware_domain(pdev->domain) ) + pos = pci_find_ext_capability(pdev, cap); + + if ( !pos ) + continue; + + rc = capability->init(pdev); + if ( rc ) + { + const char *type = is_ext ? "extended" : "legacy"; + + printk(XENLOG_WARNING + "%pd %pp: init %s cap %u fail rc=%d, mask it\n", + pdev->domain, &pdev->sbdf, type, cap, rc); + + if ( capability->cleanup ) + { + rc = capability->cleanup(pdev, true); + if ( rc ) + { + printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n", + pdev->domain, &pdev->sbdf, type, cap, rc); + if ( !is_hardware_domain(pdev->domain) ) + return rc; + } + } + + if ( !is_ext ) + rc = vpci_capability_hide(pdev, cap); + else + rc = vpci_ext_capability_hide(pdev, cap); + if ( rc ) + { + printk(XENLOG_ERR "%pd %pp: hide %s cap %u fail rc=%d\n", + pdev->domain, &pdev->sbdf, type, cap, rc); + return rc; + } + } + } + + return 0; +} + +void vpci_cleanup_capabilities(struct pci_dev *pdev) +{ + for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ ) + { + const vpci_capability_t *capability = &__start_vpci_array[i]; + const unsigned int cap = capability->id; + unsigned int pos = 0; + + if ( !capability->cleanup ) + continue; + + if ( !capability->is_ext ) + pos = pci_find_cap_offset(pdev->sbdf, cap); + else if ( is_hardware_domain(pdev->domain) ) + pos = pci_find_ext_capability(pdev, cap); + if ( pos ) + { + int rc = capability->cleanup(pdev, false); + + if ( rc ) + printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n", + pdev->domain, &pdev->sbdf, + capability->is_ext ? "extended" : "legacy", cap, rc); + } + } +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- a/xen/drivers/vpci/private.h +++ b/xen/drivers/vpci/private.h @@ -XXX,XX +XXX,XX @@ typedef uint32_t vpci_read_t(const struc typedef void vpci_write_t(const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data); +/* Internal struct to store the emulated PCI registers. */ +struct vpci_register { + vpci_read_t *read; + vpci_write_t *write; + unsigned int size; + unsigned int offset; + void *private; + struct list_head node; + uint32_t ro_mask; + uint32_t rw1c_mask; + uint32_t rsvdp_mask; + uint32_t rsvdz_mask; +}; + typedef struct { unsigned int id; bool is_ext; @@ -XXX,XX +XXX,XX @@ typedef struct { #define REGISTER_VPCI_EXTCAP(name, finit, fclean) \ REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true) +int vpci_init_capabilities(struct pci_dev *pdev); +void vpci_cleanup_capabilities(struct pci_dev *pdev); + /* Add/remove a register handler. */ int __must_check vpci_add_register_mask(struct vpci *vpci, vpci_read_t *read_handler, @@ -XXX,XX +XXX,XX @@ int __must_check vpci_add_register(struc int vpci_remove_registers(struct vpci *vpci, unsigned int start, unsigned int size); +struct vpci_register *vpci_get_register(const struct vpci *vpci, + unsigned int offset, + unsigned int size); + /* Helper to return the value passed in data. */ uint32_t cf_check vpci_read_val( const struct pci_dev *pdev, unsigned int reg, void *data); --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -XXX,XX +XXX,XX @@ #include <xen/sched.h> #include <xen/vmap.h> -/* Internal struct to store the emulated PCI registers. */ -struct vpci_register { - vpci_read_t *read; - vpci_write_t *write; - unsigned int size; - unsigned int offset; - void *private; - struct list_head node; - uint32_t ro_mask; - uint32_t rw1c_mask; - uint32_t rsvdp_mask; - uint32_t rsvdz_mask; -}; - #ifdef __XEN__ -extern const vpci_capability_t __start_vpci_array[]; -extern const vpci_capability_t __end_vpci_array[]; -#define NUM_VPCI_INIT (__end_vpci_array - __start_vpci_array) #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT static int assign_virtual_sbdf(struct pci_dev *pdev) @@ -XXX,XX +XXX,XX @@ static int assign_virtual_sbdf(struct pc #endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ -static struct vpci_register *vpci_get_register(const struct vpci *vpci, - unsigned int offset, - unsigned int size) +struct vpci_register *vpci_get_register(const struct vpci *vpci, + unsigned int offset, + unsigned int size) { struct vpci_register *r; @@ -XXX,XX +XXX,XX @@ static struct vpci_register *vpci_get_re return NULL; } -static struct vpci_register *vpci_get_previous_cap_register( - const struct vpci *vpci, unsigned int offset) -{ - unsigned int next; - struct vpci_register *r; - - if ( offset < 0x40 ) - { - ASSERT_UNREACHABLE(); - return NULL; - } - - for ( r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1); r; - r = next >= 0x40 ? vpci_get_register(vpci, - next + PCI_CAP_LIST_NEXT, 1) - : NULL ) - { - next = (unsigned int)(uintptr_t)r->private; - ASSERT(next == (uintptr_t)r->private); - if ( next == offset ) - break; - } - - return r; -} - -static int vpci_capability_hide(const struct pci_dev *pdev, unsigned int cap) -{ - const unsigned int offset = pci_find_cap_offset(pdev->sbdf, cap); - struct vpci_register *prev_r, *next_r; - struct vpci *vpci = pdev->vpci; - - if ( !offset ) - { - ASSERT_UNREACHABLE(); - return 0; - } - - spin_lock(&vpci->lock); - prev_r = vpci_get_previous_cap_register(vpci, offset); - next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1); - if ( !prev_r || !next_r ) - { - spin_unlock(&vpci->lock); - return -ENODEV; - } - - prev_r->private = next_r->private; - /* - * Not calling vpci_remove_registers() here is to avoid redoing - * the register search. - */ - list_del(&next_r->node); - spin_unlock(&vpci->lock); - xfree(next_r); - - if ( !is_hardware_domain(pdev->domain) ) - return vpci_remove_registers(vpci, offset + PCI_CAP_LIST_ID, 1); - - return 0; -} - -static struct vpci_register *vpci_get_previous_ext_cap_register( - const struct vpci *vpci, unsigned int offset) -{ - unsigned int pos = PCI_CFG_SPACE_SIZE; - struct vpci_register *r; - - if ( offset <= PCI_CFG_SPACE_SIZE ) - { - ASSERT_UNREACHABLE(); - return NULL; - } - - for ( r = vpci_get_register(vpci, pos, 4); r; - r = pos > PCI_CFG_SPACE_SIZE ? vpci_get_register(vpci, pos, 4) - : NULL ) - { - uint32_t header = (uint32_t)(uintptr_t)r->private; - - ASSERT(header == (uintptr_t)r->private); - - pos = PCI_EXT_CAP_NEXT(header); - if ( pos == offset ) - break; - } - - return r; -} - -static int vpci_ext_capability_hide( - const struct pci_dev *pdev, unsigned int cap) -{ - const unsigned int offset = pci_find_ext_capability(pdev, cap); - struct vpci_register *r, *prev_r; - struct vpci *vpci = pdev->vpci; - uint32_t header, pre_header; - - if ( offset < PCI_CFG_SPACE_SIZE ) - { - ASSERT_UNREACHABLE(); - return 0; - } - - spin_lock(&vpci->lock); - r = vpci_get_register(vpci, offset, 4); - if ( !r ) - { - spin_unlock(&vpci->lock); - return -ENODEV; - } - - header = (uint32_t)(uintptr_t)r->private; - if ( offset == PCI_CFG_SPACE_SIZE ) - { - if ( PCI_EXT_CAP_NEXT(header) <= PCI_CFG_SPACE_SIZE ) - r->private = (void *)0; - else - /* - * The first extended capability (0x100) can not be removed from - * the linked list, so instead mask its capability ID to return 0 - * hopefully forcing OSes to skip it. - */ - r->private = (void *)(uintptr_t)(header & ~PCI_EXT_CAP_ID(header)); - - spin_unlock(&vpci->lock); - return 0; - } - - prev_r = vpci_get_previous_ext_cap_register(vpci, offset); - if ( !prev_r ) - { - spin_unlock(&vpci->lock); - return -ENODEV; - } - - pre_header = (uint32_t)(uintptr_t)prev_r->private; - pre_header &= ~PCI_EXT_CAP_NEXT_MASK; - pre_header |= header & PCI_EXT_CAP_NEXT_MASK; - prev_r->private = (void *)(uintptr_t)pre_header; - - list_del(&r->node); - spin_unlock(&vpci->lock); - xfree(r); - - return 0; -} - -static int vpci_init_capabilities(struct pci_dev *pdev) -{ - for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ ) - { - const vpci_capability_t *capability = &__start_vpci_array[i]; - const unsigned int cap = capability->id; - const bool is_ext = capability->is_ext; - unsigned int pos = 0; - int rc; - - if ( !is_ext ) - pos = pci_find_cap_offset(pdev->sbdf, cap); - else if ( is_hardware_domain(pdev->domain) ) - pos = pci_find_ext_capability(pdev, cap); - - if ( !pos ) - continue; - - rc = capability->init(pdev); - if ( rc ) - { - const char *type = is_ext ? "extended" : "legacy"; - - printk(XENLOG_WARNING - "%pd %pp: init %s cap %u fail rc=%d, mask it\n", - pdev->domain, &pdev->sbdf, type, cap, rc); - - if ( capability->cleanup ) - { - rc = capability->cleanup(pdev, true); - if ( rc ) - { - printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n", - pdev->domain, &pdev->sbdf, type, cap, rc); - if ( !is_hardware_domain(pdev->domain) ) - return rc; - } - } - - if ( !is_ext ) - rc = vpci_capability_hide(pdev, cap); - else - rc = vpci_ext_capability_hide(pdev, cap); - if ( rc ) - { - printk(XENLOG_ERR "%pd %pp: hide %s cap %u fail rc=%d\n", - pdev->domain, &pdev->sbdf, type, cap, rc); - return rc; - } - } - } - - return 0; -} - void vpci_deassign_device(struct pci_dev *pdev) { unsigned int i; @@ -XXX,XX +XXX,XX @@ void vpci_deassign_device(struct pci_dev &pdev->domain->vpci_dev_assigned_map); #endif - for ( i = 0; i < NUM_VPCI_INIT; i++ ) - { - const vpci_capability_t *capability = &__start_vpci_array[i]; - const unsigned int cap = capability->id; - unsigned int pos = 0; - - if ( !capability->cleanup ) - continue; - - if ( !capability->is_ext ) - pos = pci_find_cap_offset(pdev->sbdf, cap); - else if ( is_hardware_domain(pdev->domain) ) - pos = pci_find_ext_capability(pdev, cap); - if ( pos ) - { - int rc = capability->cleanup(pdev, false); - - if ( rc ) - printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n", - pdev->domain, &pdev->sbdf, - capability->is_ext ? "extended" : "legacy", cap, rc); - } - } + vpci_cleanup_capabilities(pdev); spin_lock(&pdev->vpci->lock); while ( !list_empty(&pdev->vpci->handlers) )
... both for when the functions are invoked and where they live in source. Don't invoke them directly in vpci_init_header(), but instead first thing in vpci_init_capabilities(). Suggested-by: Roger Pau Monné <roger.pau@citrix.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v4: New. --- a/xen/drivers/vpci/cap.c +++ b/xen/drivers/vpci/cap.c @@ -XXX,XX +XXX,XX @@ static int vpci_ext_capability_hide( return 0; } +static int vpci_init_capability_list(struct pci_dev *pdev) +{ + int rc; + bool mask_cap_list = false; + bool is_hwdom = is_hardware_domain(pdev->domain); + + if ( pci_conf_read16(pdev->sbdf, PCI_STATUS) & PCI_STATUS_CAP_LIST ) + { + /* Only expose capabilities to the guest that vPCI can handle. */ + unsigned int next, ttl = 48; + static const unsigned int supported_caps[] = { + PCI_CAP_ID_MSI, + PCI_CAP_ID_MSIX, + }; + /* + * For dom0, we should expose all capabilities instead of a fixed + * capabilities array, so setting n to 0 here is to get the next + * capability position directly in pci_find_next_cap_ttl. + */ + const unsigned int n = is_hwdom ? 0 : ARRAY_SIZE(supported_caps); + + next = pci_find_next_cap_ttl(pdev->sbdf, PCI_CAPABILITY_LIST, + supported_caps, n, &ttl); + + rc = vpci_add_register(pdev->vpci, vpci_read_val, + is_hwdom ? vpci_hw_write8 : NULL, + PCI_CAPABILITY_LIST, 1, + (void *)(uintptr_t)next); + if ( rc ) + return rc; + + next &= ~3; + + if ( !next && !is_hwdom ) + /* + * If we don't have any supported capabilities to expose to the + * guest, mask the PCI_STATUS_CAP_LIST bit in the status + * register. + */ + mask_cap_list = true; + + while ( next && ttl ) + { + unsigned int pos = next; + + next = pci_find_next_cap_ttl(pdev->sbdf, + pos + PCI_CAP_LIST_NEXT, + supported_caps, n, &ttl); + + if ( !is_hwdom ) + { + rc = vpci_add_register(pdev->vpci, vpci_hw_read8, NULL, + pos + PCI_CAP_LIST_ID, 1, NULL); + if ( rc ) + return rc; + } + + rc = vpci_add_register(pdev->vpci, vpci_read_val, + is_hwdom ? vpci_hw_write8 : NULL, + pos + PCI_CAP_LIST_NEXT, 1, + (void *)(uintptr_t)next); + if ( rc ) + return rc; + + next &= ~3; + } + } + + /* Return early for the hw domain, no masking of PCI_STATUS. */ + if ( is_hwdom ) + return 0; + + /* Utilize rsvdp_mask to hide PCI_STATUS_CAP_LIST from the guest. */ + return vpci_add_register_mask(pdev->vpci, vpci_hw_read16, vpci_hw_write16, + PCI_STATUS, 2, NULL, + PCI_STATUS_RO_MASK & + ~(mask_cap_list ? PCI_STATUS_CAP_LIST : 0), + PCI_STATUS_RW1C_MASK, + mask_cap_list ? PCI_STATUS_CAP_LIST : 0, + PCI_STATUS_RSVDZ_MASK); +} + +static int vpci_init_ext_capability_list(const struct pci_dev *pdev) +{ + unsigned int pos = PCI_CFG_SPACE_SIZE; + + if ( !pdev->ext_cfg ) + return 0; + + if ( !is_hardware_domain(pdev->domain) ) + /* Extended capabilities read as zero, write ignore for DomU */ + return vpci_add_register(pdev->vpci, vpci_read_val, NULL, + pos, 4, (void *)0); + + do + { + uint32_t header = pci_conf_read32(pdev->sbdf, pos); + int rc; + + if ( header == 0xffffffffU ) + { + printk(XENLOG_WARNING + "%pd %pp: broken extended cap list, offset %#x\n", + pdev->domain, &pdev->sbdf, pos); + return 0; + } + + rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL, + pos, 4, (void *)(uintptr_t)header); + if ( rc == -EEXIST ) + { + printk(XENLOG_WARNING + "%pd %pp: overlap in extended cap list, offset %#x\n", + pdev->domain, &pdev->sbdf, pos); + return 0; + } + + if ( rc ) + return rc; + + pos = PCI_EXT_CAP_NEXT(header); + } while ( pos >= PCI_CFG_SPACE_SIZE ); + + return 0; +} + int vpci_init_capabilities(struct pci_dev *pdev) { + int rc; + + rc = vpci_init_capability_list(pdev); + if ( rc ) + return rc; + + rc = vpci_init_ext_capability_list(pdev); + if ( rc ) + return rc; + for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ ) { const vpci_capability_t *capability = &__start_vpci_array[i]; const unsigned int cap = capability->id; const bool is_ext = capability->is_ext; unsigned int pos = 0; - int rc; if ( !is_ext ) pos = pci_find_cap_offset(pdev->sbdf, cap); --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -XXX,XX +XXX,XX @@ static int bar_add_rangeset(const struct return !bar->mem ? -ENOMEM : 0; } -static int vpci_init_capability_list(struct pci_dev *pdev) -{ - int rc; - bool mask_cap_list = false; - bool is_hwdom = is_hardware_domain(pdev->domain); - - if ( pci_conf_read16(pdev->sbdf, PCI_STATUS) & PCI_STATUS_CAP_LIST ) - { - /* Only expose capabilities to the guest that vPCI can handle. */ - unsigned int next, ttl = 48; - static const unsigned int supported_caps[] = { - PCI_CAP_ID_MSI, - PCI_CAP_ID_MSIX, - }; - /* - * For dom0, we should expose all capabilities instead of a fixed - * capabilities array, so setting n to 0 here is to get the next - * capability position directly in pci_find_next_cap_ttl. - */ - const unsigned int n = is_hwdom ? 0 : ARRAY_SIZE(supported_caps); - - next = pci_find_next_cap_ttl(pdev->sbdf, PCI_CAPABILITY_LIST, - supported_caps, n, &ttl); - - rc = vpci_add_register(pdev->vpci, vpci_read_val, - is_hwdom ? vpci_hw_write8 : NULL, - PCI_CAPABILITY_LIST, 1, - (void *)(uintptr_t)next); - if ( rc ) - return rc; - - next &= ~3; - - if ( !next && !is_hwdom ) - /* - * If we don't have any supported capabilities to expose to the - * guest, mask the PCI_STATUS_CAP_LIST bit in the status - * register. - */ - mask_cap_list = true; - - while ( next && ttl ) - { - unsigned int pos = next; - - next = pci_find_next_cap_ttl(pdev->sbdf, - pos + PCI_CAP_LIST_NEXT, - supported_caps, n, &ttl); - - if ( !is_hwdom ) - { - rc = vpci_add_register(pdev->vpci, vpci_hw_read8, NULL, - pos + PCI_CAP_LIST_ID, 1, NULL); - if ( rc ) - return rc; - } - - rc = vpci_add_register(pdev->vpci, vpci_read_val, - is_hwdom ? vpci_hw_write8 : NULL, - pos + PCI_CAP_LIST_NEXT, 1, - (void *)(uintptr_t)next); - if ( rc ) - return rc; - - next &= ~3; - } - } - - /* Return early for the hw domain, no masking of PCI_STATUS. */ - if ( is_hwdom ) - return 0; - - /* Utilize rsvdp_mask to hide PCI_STATUS_CAP_LIST from the guest. */ - return vpci_add_register_mask(pdev->vpci, vpci_hw_read16, vpci_hw_write16, - PCI_STATUS, 2, NULL, - PCI_STATUS_RO_MASK & - ~(mask_cap_list ? PCI_STATUS_CAP_LIST : 0), - PCI_STATUS_RW1C_MASK, - mask_cap_list ? PCI_STATUS_CAP_LIST : 0, - PCI_STATUS_RSVDZ_MASK); -} - -static int vpci_init_ext_capability_list(const struct pci_dev *pdev) -{ - unsigned int pos = PCI_CFG_SPACE_SIZE; - - if ( !pdev->ext_cfg ) - return 0; - - if ( !is_hardware_domain(pdev->domain) ) - /* Extended capabilities read as zero, write ignore for DomU */ - return vpci_add_register(pdev->vpci, vpci_read_val, NULL, - pos, 4, (void *)0); - - do - { - uint32_t header = pci_conf_read32(pdev->sbdf, pos); - int rc; - - if ( header == 0xffffffffU ) - { - printk(XENLOG_WARNING - "%pd %pp: broken extended cap list, offset %#x\n", - pdev->domain, &pdev->sbdf, pos); - return 0; - } - - rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL, - pos, 4, (void *)(uintptr_t)header); - if ( rc == -EEXIST ) - { - printk(XENLOG_WARNING - "%pd %pp: overlap in extended cap list, offset %#x\n", - pdev->domain, &pdev->sbdf, pos); - return 0; - } - - if ( rc ) - return rc; - - pos = PCI_EXT_CAP_NEXT(header); - } while ( pos >= PCI_CFG_SPACE_SIZE ); - - return 0; -} - int vpci_init_header(struct pci_dev *pdev) { uint16_t cmd; @@ -XXX,XX +XXX,XX @@ int vpci_init_header(struct pci_dev *pde if ( rc ) return rc; - rc = vpci_init_capability_list(pdev); - if ( rc ) - return rc; - - rc = vpci_init_ext_capability_list(pdev); - if ( rc ) - return rc; - if ( pdev->ignore_bars ) return 0;
When Dom0 informs us about MMCFG usability, this may change whether extended capabilities are available (accessible) for devices. Zap what might be on record, and re-initialize things. No synchronization is added for the case where devices may already be in use. That'll need sorting when (a) DomU support was added and (b) DomU-s may run already while Dom0 / hwdom still boots (dom0less, Hyperlaunch). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- vpci_reinit_ext_capabilities()'es return value isn't checked, as it doesn't feel quite right to fail the hypercall because of this. At the same time it also doesn't feel quite right to have the function return "void". Thoughts? --- v4: Make sure ->cleanup() and ->init() are invoked. v3: New. --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -XXX,XX +XXX,XX @@ #include <xen/guest_access.h> #include <xen/iocap.h> #include <xen/serial.h> +#include <xen/vpci.h> + #include <asm/current.h> #include <asm/io_apic.h> #include <asm/msi.h> @@ -XXX,XX +XXX,XX @@ int cf_check physdev_check_pci_extcfg(st ASSERT(pdev->seg == info->segment); if ( pdev->bus >= info->start_bus && pdev->bus <= info->end_bus ) + { pci_check_extcfg(pdev); + vpci_reinit_ext_capabilities(pdev); + } return 0; } --- a/xen/drivers/vpci/cap.c +++ b/xen/drivers/vpci/cap.c @@ -XXX,XX +XXX,XX @@ static int vpci_init_ext_capability_list return 0; } -int vpci_init_capabilities(struct pci_dev *pdev) +int vpci_init_capabilities(struct pci_dev *pdev, bool ext_only) { int rc; - rc = vpci_init_capability_list(pdev); - if ( rc ) - return rc; + if ( !ext_only ) + { + rc = vpci_init_capability_list(pdev); + if ( rc ) + return rc; + } rc = vpci_init_ext_capability_list(pdev); if ( rc ) @@ -XXX,XX +XXX,XX @@ int vpci_init_capabilities(struct pci_de unsigned int pos = 0; if ( !is_ext ) - pos = pci_find_cap_offset(pdev->sbdf, cap); + pos = !ext_only ? pci_find_cap_offset(pdev->sbdf, cap) : 0; else if ( is_hardware_domain(pdev->domain) ) pos = pci_find_ext_capability(pdev, cap); @@ -XXX,XX +XXX,XX @@ int vpci_init_capabilities(struct pci_de return 0; } -void vpci_cleanup_capabilities(struct pci_dev *pdev) +void vpci_cleanup_capabilities(struct pci_dev *pdev, bool ext_only) { for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ ) { @@ -XXX,XX +XXX,XX @@ void vpci_cleanup_capabilities(struct pc continue; if ( !capability->is_ext ) - pos = pci_find_cap_offset(pdev->sbdf, cap); + pos = !ext_only ? pci_find_cap_offset(pdev->sbdf, cap) : 0; else if ( is_hardware_domain(pdev->domain) ) pos = pci_find_ext_capability(pdev, cap); if ( pos ) @@ -XXX,XX +XXX,XX @@ void vpci_cleanup_capabilities(struct pc } } +int vpci_reinit_ext_capabilities(struct pci_dev *pdev) +{ + if ( !pdev->vpci ) + return 0; + + vpci_cleanup_capabilities(pdev, true); + + if ( vpci_remove_registers(pdev->vpci, PCI_CFG_SPACE_SIZE, + PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) ) + ASSERT_UNREACHABLE(); + + return vpci_init_capabilities(pdev, true); +} + /* * Local variables: * mode: C --- a/xen/drivers/vpci/private.h +++ b/xen/drivers/vpci/private.h @@ -XXX,XX +XXX,XX @@ typedef struct { #define REGISTER_VPCI_EXTCAP(name, finit, fclean) \ REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true) -int vpci_init_capabilities(struct pci_dev *pdev); -void vpci_cleanup_capabilities(struct pci_dev *pdev); +int vpci_init_capabilities(struct pci_dev *pdev, bool ext_only); +void vpci_cleanup_capabilities(struct pci_dev *pdev, bool ext_only); /* Add/remove a register handler. */ int __must_check vpci_add_register_mask(struct vpci *vpci, --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -XXX,XX +XXX,XX @@ void vpci_deassign_device(struct pci_dev &pdev->domain->vpci_dev_assigned_map); #endif - vpci_cleanup_capabilities(pdev); + vpci_cleanup_capabilities(pdev, false); spin_lock(&pdev->vpci->lock); while ( !list_empty(&pdev->vpci->handlers) ) @@ -XXX,XX +XXX,XX @@ int vpci_assign_device(struct pci_dev *p if ( rc ) goto out; - rc = vpci_init_capabilities(pdev); + rc = vpci_init_capabilities(pdev, false); out: if ( rc ) --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -XXX,XX +XXX,XX @@ #define VPCI_MAX_VIRT_DEV (PCI_SLOT(~0) + 1) int __must_check vpci_init_header(struct pci_dev *pdev); +int vpci_reinit_ext_capabilities(struct pci_dev *pdev); /* Assign vPCI to device by adding handlers. */ int __must_check vpci_assign_device(struct pci_dev *pdev); @@ -XXX,XX +XXX,XX @@ bool vpci_ecam_read(pci_sbdf_t sbdf, uns #else /* !CONFIG_HAS_VPCI */ struct vpci_vcpu {}; +static inline int vpci_reinit_ext_capabilities(struct pci_dev *pdev) +{ + return 0; +} + static inline int vpci_assign_device(struct pci_dev *pdev) { return 0;