:p
atchew
Login
This is a follow-on to 'vPCI: avoid bogus "overlap in extended cap list" warnings', addressing further issues noted there. v3: Three new patches and some other re-work. See individual patches. 1: PCI: handle PCI->PCIe bridges as well in free_pdev() 2: PCI: determine whether a device has extended config space 3: PCI: don't look for ext-caps when there's no extended cfg space 4: vPCI/DomU: really no ext-caps without extended config space 5: x86/PCI: avoid re-evaluation of extended config space accessibility 6: vPCI: re-init extended-capability lists when MMCFG availability changed Jan
Don't know how I managed to overlook the freeing side when adding the case to alloc_pdev(). Fixes: cd2b9f0b1986 ("PCI: handle PCI->PCIe bridges as well in alloc_pdev()") Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v3: New. --- Noticed due to the original patch still applying cleanly, just with an offset of a few dozen lines. --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -XXX,XX +XXX,XX @@ static void free_pdev(struct pci_seg *ps unsigned long flags; case DEV_TYPE_PCIe2PCI_BRIDGE: + case DEV_TYPE_PCI2PCIe_BRIDGE: case DEV_TYPE_LEGACY_PCI_BRIDGE: sec_bus = pci_conf_read8(pdev->sbdf, PCI_SECONDARY_BUS); sub_bus = pci_conf_read8(pdev->sbdf, PCI_SUBORDINATE_BUS);
Legacy PCI devices don't have any extended config space. Reading any part thereof may return all ones or other arbitrary data, e.g. in some cases base config space contents repeatedly. Logic follows Linux 6.19-rc's pci_cfg_space_size(), albeit leveraging our determination of device type; in particular some comments are taken verbatim from there. Like with Linux'es CONFIG_PCI_QUIRKS, only the alias detection logic is covered by the new "pci=no-quirks". The singular access at PCI_CFG_SPACE_SIZE is left unconditional. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Roger Pau Monné <roger.pau@citrix.com> --- The warning near the bottom of pci_check_extcfg() may be issued multiple times for a single device now. Should we try to avoid that? Note that no vPCI adjustments are done here, but they're going to be needed: Whatever requires extended capabilities will need re- evaluating / newly establishing / tearing down in case an invocation of PHYSDEVOP_pci_mmcfg_reserved alters global state. --- v3: Add command line (sub-)option. v2: Major re-work to also check upon PHYSDEVOP_pci_mmcfg_reserved invocation. --- a/docs/misc/xen-command-line.pandoc +++ b/docs/misc/xen-command-line.pandoc @@ -XXX,XX +XXX,XX @@ Only effective if CONFIG_PARTIAL_EMULATI behavior.** ### pci - = List of [ serr=<bool>, perr=<bool> ] + = List of [ serr=<bool>, perr=<bool>, quirks=<bool> ] + +* `serr` and `perr` Default: Signaling left as set by firmware. -Override the firmware settings, and explicitly enable or disable the -signalling of PCI System and Parity errors. + Override the firmware settings, and explicitly enable or disable the + signalling of PCI System and Parity errors. + +* `quirks` + + Default: `on` + + In its negative form, allows to suppress certain quirk workarounds, in case + they cause issues. ### pci-phantom > `=[<seg>:]<bus>:<device>,<stride>` --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -XXX,XX +XXX,XX @@ int physdev_map_pirq(struct domain *d, i struct msi_info *msi); int physdev_unmap_pirq(struct domain *d, int pirq); +int cf_check physdev_check_pci_extcfg(struct pci_dev *pdev, void *arg); + #include "x86_64/mmconfig.h" #ifndef COMPAT @@ -XXX,XX +XXX,XX @@ int physdev_unmap_pirq(struct domain *d, return ret; } + +int cf_check physdev_check_pci_extcfg(struct pci_dev *pdev, void *arg) +{ + const struct physdev_pci_mmcfg_reserved *info = arg; + + ASSERT(pdev->seg == info->segment); + if ( pdev->bus >= info->start_bus && pdev->bus <= info->end_bus ) + pci_check_extcfg(pdev); + + return 0; +} #endif /* COMPAT */ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) @@ -XXX,XX +XXX,XX @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H ret = pci_mmcfg_reserved(info.address, info.segment, info.start_bus, info.end_bus, info.flags); + + if ( !ret ) + ret = pci_segment_iterate(info.segment, physdev_check_pci_extcfg, + &info); + if ( !ret && has_vpci(currd) && (info.flags & XEN_PCI_MMCFG_RESERVED) ) { /* --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -XXX,XX +XXX,XX @@ custom_param("pci-phantom", parse_phanto static u16 __read_mostly command_mask; static u16 __read_mostly bridge_ctl_mask; +static bool __ro_after_init opt_pci_quirks = true; static int __init cf_check parse_pci_param(const char *s) { @@ -XXX,XX +XXX,XX @@ static int __init cf_check parse_pci_par cmd_mask = PCI_COMMAND_PARITY; brctl_mask = PCI_BRIDGE_CTL_PARITY; } + else if ( (val = parse_boolean("quirks", s, ss)) >= 0 ) + opt_pci_quirks = val; else rc = -EINVAL; @@ -XXX,XX +XXX,XX @@ static struct pci_dev *alloc_pdev(struct } apply_quirks(pdev); + + pci_check_extcfg(pdev); + check_pdev(pdev); return pdev; @@ -XXX,XX +XXX,XX @@ int pci_add_device(u16 seg, u8 bus, u8 d list_add(&pdev->vf_list, &pf_pdev->vf_list); } + + if ( !pdev->ext_cfg ) + printk(XENLOG_WARNING + "%pp: VF without extended config space?\n", + &pdev->sbdf); } } @@ -XXX,XX +XXX,XX @@ enum pdev_type pdev_type(u16 seg, u8 bus return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI; } +void pci_check_extcfg(struct pci_dev *pdev) +{ + unsigned int pos; + + pdev->ext_cfg = false; + + switch ( pdev->type ) + { + case DEV_TYPE_PCIe_ENDPOINT: + case DEV_TYPE_PCIe_BRIDGE: + case DEV_TYPE_PCI_HOST_BRIDGE: + case DEV_TYPE_PCIe2PCI_BRIDGE: + case DEV_TYPE_PCI2PCIe_BRIDGE: + break; + + case DEV_TYPE_LEGACY_PCI_BRIDGE: + case DEV_TYPE_PCI: + pos = pci_find_cap_offset(pdev->sbdf, PCI_CAP_ID_PCIX); + if ( !pos || + !(pci_conf_read32(pdev->sbdf, pos + PCI_X_STATUS) & + (PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ)) ) + return; + break; + + default: + return; + } + + /* + * Regular PCI devices have 256 bytes, but PCI-X 2 and PCI Express devices + * have 4096 bytes. Even if the device is capable, that doesn't mean we + * can access it. Maybe we don't have a way to generate extended config + * space accesses, or the device is behind a reverse Express bridge. So + * we try reading the dword at PCI_CFG_SPACE_SIZE which must either be 0 + * or a valid extended capability header. + */ + if ( pci_conf_read32(pdev->sbdf, PCI_CFG_SPACE_SIZE) == 0xffffffffU ) + return; + + if ( opt_pci_quirks ) + { + /* + * PCI Express to PCI/PCI-X Bridge Specification, rev 1.0, 4.1.4 says + * that when forwarding a type1 configuration request the bridge must + * check that the extended register address field is zero. The bridge + * is not permitted to forward the transactions and must handle it as + * an Unsupported Request. Some bridges do not follow this rule and + * simply drop the extended register bits, resulting in the standard + * config space being aliased, every 256 bytes across the entire + * configuration space. Test for this condition by comparing the first + * dword of each potential alias to the vendor/device ID. + * Known offenders: + * ASM1083/1085 PCIe-to-PCI Reversible Bridge (1b21:1080, rev 01 & 03) + * AMD/ATI SBx00 PCI to PCI Bridge (1002:4384, rev 40) + */ + unsigned int sig = pci_conf_read32(pdev->sbdf, PCI_VENDOR_ID); + + for ( pos = PCI_CFG_SPACE_SIZE; + pos < PCI_CFG_SPACE_EXP_SIZE; pos += PCI_CFG_SPACE_SIZE ) + if ( pci_conf_read32(pdev->sbdf, pos) != sig ) + break; + + if ( pos >= PCI_CFG_SPACE_EXP_SIZE ) + { + printk(XENLOG_WARNING "%pp: extended config space aliases base one\n", + &pdev->sbdf); + return; + } + } + + pdev->ext_cfg = true; +} + /* * find the upstream PCIe-to-PCI/PCIX bridge or PCI legacy bridge * return 0: the device is integrated PCI device or PCIe @@ -XXX,XX +XXX,XX @@ int pci_iterate_devices(int (*handler)(s return pci_segments_iterate(iterate_all, &iter) ?: iter.rc; } +/* Iterate a single PCI segment, with locking but without preemption. */ +int pci_segment_iterate(unsigned int segment, + int (*handler)(struct pci_dev *pdev, void *arg), + void *arg) +{ + struct pci_seg *seg = get_pseg(segment); + struct segment_iter iter = { + .handler = handler, + .arg = arg, + }; + + if ( !seg ) + return -ENODEV; + + pcidevs_lock(); + + iter.rc = iterate_all(seg, &iter) ?: iter.rc; + + pcidevs_unlock(); + + return iter.rc; +} + /* * Local variables: * mode: C --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -XXX,XX +XXX,XX @@ struct pci_dev { nodeid_t node; /* NUMA node */ + /* Whether the device has (accessible) extended config space. */ + bool ext_cfg; + /* Device to be quarantined, don't automatically re-assign to dom0 */ bool quarantine; @@ -XXX,XX +XXX,XX @@ void pci_check_disable_device(u16 seg, u int pci_iterate_devices(int (*handler)(struct pci_dev *pdev, void *arg), void *arg); +/* Iterate a single PCI segment, with locking but without preemption. */ +int pci_segment_iterate(unsigned int segment, + int (*handler)(struct pci_dev *pdev, void *arg), + void *arg); + uint8_t pci_conf_read8(pci_sbdf_t sbdf, unsigned int reg); uint16_t pci_conf_read16(pci_sbdf_t sbdf, unsigned int reg); uint32_t pci_conf_read32(pci_sbdf_t sbdf, unsigned int reg); @@ -XXX,XX +XXX,XX @@ unsigned int pci_find_next_cap_ttl(pci_s unsigned int *ttl); unsigned int pci_find_next_cap(pci_sbdf_t sbdf, unsigned int pos, unsigned int cap); +void pci_check_extcfg(struct pci_dev *pdev); unsigned int pci_find_ext_capability(const struct pci_dev *pdev, unsigned int cap); unsigned int pci_find_next_ext_capability(const struct pci_dev *pdev,
Avoid interpreting as extended capabilities what may be about anything. In doing so, vPCI then also won't mis-interpret data from beyond base config space anymore. Fixes: 3b35911d709e ("Enable pci mmcfg and ATS for x86_64") Fixes: a845b50c12f3 ("vpci/header: Emulate extended capability list for dom0") Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Stewart Hildebrand <stewart.hildebrand@amd.com> Acked-by: Roger Pau Monné <roger.pau@citrix.com> --- Because of the multiple prereq changes, despite the Fixes: tags I'm not quite sure whether to backport this. (I'm leaning towards "no".) --- a/xen/drivers/pci/pci.c +++ b/xen/drivers/pci/pci.c @@ -XXX,XX +XXX,XX @@ unsigned int pci_find_next_ext_capabilit int ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */ unsigned int pos = max(start, PCI_CFG_SPACE_SIZE + 0U); + if ( !pdev->ext_cfg ) + { + ASSERT(!start); + return 0; + } + header = pci_conf_read32(pdev->sbdf, pos); /*
For DomU-s, whether to emulate accesses to the first 32 bits of extended config space as read-as-zero or read-as-all-ones depends on whether a device actually has extended config space. If it doesn't, read-as-zero isn't correct; not getting this right may confuse functions like Linux 6.19-rc's pci_ext_cfg_is_aliased(). For Dom0 this then simply allows dropping a later conditional. Fixes: a845b50c12f3 ("vpci/header: Emulate extended capability list for dom0") Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v3: Move code condition to top-level function scope. Eliminate a later conditional in exchange. --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -XXX,XX +XXX,XX @@ static int vpci_init_ext_capability_list { unsigned int pos = PCI_CFG_SPACE_SIZE; + if ( !pdev->ext_cfg ) + return 0; + if ( !is_hardware_domain(pdev->domain) ) /* Extended capabilities read as zero, write ignore for DomU */ return vpci_add_register(pdev->vpci, vpci_read_val, NULL, @@ -XXX,XX +XXX,XX @@ static int vpci_init_ext_capability_list if ( header == 0xffffffffU ) { - if ( pos != PCI_CFG_SPACE_SIZE ) - printk(XENLOG_WARNING - "%pd %pp: broken extended cap list, offset %#x\n", - pdev->domain, &pdev->sbdf, pos); + printk(XENLOG_WARNING + "%pd %pp: broken extended cap list, offset %#x\n", + pdev->domain, &pdev->sbdf, pos); return 0; }
When, during boot, we have already correctly determined availability of the MMCFG access method for a given bus range, there's then no need to invoke pci_check_extcfg() again for every of the devices. This in particular avoids ->ext_cfg to transiently indicate the wrong state. Switch to using Xen style on lines being touched and immediately adjacent ones. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v3: New. --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -XXX,XX +XXX,XX @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( !ret ) ret = pci_segment_iterate(info.segment, physdev_check_pci_extcfg, &info); + else if ( ret > 0 ) /* Indication of "no change". */ + ret = 0; if ( !ret && has_vpci(currd) && (info.flags & XEN_PCI_MMCFG_RESERVED) ) { --- a/xen/arch/x86/x86_64/mmconfig.h +++ b/xen/arch/x86/x86_64/mmconfig.h @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_reserved(uint64_t address, unsigned int flags); int pci_mmcfg_arch_init(void); int pci_mmcfg_arch_enable(unsigned int idx); -void pci_mmcfg_arch_disable(unsigned int idx); +int pci_mmcfg_arch_disable(unsigned int idx); #endif /* X86_64_MMCONFIG_H */ --- a/xen/arch/x86/x86_64/mmconfig-shared.c +++ b/xen/arch/x86/x86_64/mmconfig-shared.c @@ -XXX,XX +XXX,XX @@ static bool __init pci_mmcfg_reject_brok (unsigned int)cfg->start_bus_number, (unsigned int)cfg->end_bus_number); - if (!is_mmconf_reserved(addr, size, i, cfg) || - pci_mmcfg_arch_enable(i)) { + if ( !is_mmconf_reserved(addr, size, i, cfg) || + pci_mmcfg_arch_enable(i) < 0 ) + { pci_mmcfg_arch_disable(i); valid = 0; } @@ -XXX,XX +XXX,XX @@ void __init acpi_mmcfg_init(void) unsigned int i; pci_mmcfg_arch_init(); - for (i = 0; i < pci_mmcfg_config_num; ++i) - if (pci_mmcfg_arch_enable(i)) + for ( i = 0; i < pci_mmcfg_config_num; ++i ) + if ( pci_mmcfg_arch_enable(i) < 0 ) valid = 0; } else { acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_reserved(uint64_t address, segment, start_bus, end_bus, address, cfg->address); return -EIO; } - if (flags & XEN_PCI_MMCFG_RESERVED) + + if ( flags & XEN_PCI_MMCFG_RESERVED ) return pci_mmcfg_arch_enable(i); - pci_mmcfg_arch_disable(i); - return 0; + + return pci_mmcfg_arch_disable(i); } } --- a/xen/arch/x86/x86_64/mmconfig_64.c +++ b/xen/arch/x86/x86_64/mmconfig_64.c @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_arch_enable(unsigned int i const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg; unsigned long start_mfn, end_mfn; - if (pci_mmcfg_virt[idx].virt) - return 0; + if ( pci_mmcfg_virt[idx].virt ) + return 1; + pci_mmcfg_virt[idx].virt = mcfg_ioremap(cfg, idx, PAGE_HYPERVISOR_UC); if (!pci_mmcfg_virt[idx].virt) { printk(KERN_ERR "PCI: Cannot map MCFG aperture for segment %04x\n", @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_arch_enable(unsigned int i return 0; } -void pci_mmcfg_arch_disable(unsigned int idx) +int pci_mmcfg_arch_disable(unsigned int idx) { const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg; + if ( !pci_mmcfg_virt[idx].virt ) + return 1; + pci_mmcfg_virt[idx].virt = NULL; /* * Don't use destroy_xen_mappings() here, or make sure that at least @@ -XXX,XX +XXX,XX @@ void pci_mmcfg_arch_disable(unsigned int mcfg_ioremap(cfg, idx, 0); printk(KERN_WARNING "PCI: Not using MCFG for segment %04x bus %02x-%02x\n", cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number); + + return 0; } bool pci_mmcfg_decode(unsigned long mfn, unsigned int *seg, unsigned int *bdf)
When Dom0 informs up about MMCFG usability, this may change whether extended capabilities are available (accessible) for devices. Zap what might be on record, and re-initialize the list. No synchronization is added for the case where devices may already be in use. That'll need sorting when (a) DomU support was added and (b) DomU-s may run already while Dom0 / hwdom still boots (dom0less, Hyperlaunch). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- vpci_reinit_ext_capability_list()'s return value isn't checked, as it doesn't feel quite right to fail the hypercall because of this. At the same time it also doesn't feel quite right to have the function return "void". Thoughts? --- v3: New. --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -XXX,XX +XXX,XX @@ #include <xen/guest_access.h> #include <xen/iocap.h> #include <xen/serial.h> +#include <xen/vpci.h> + #include <asm/current.h> #include <asm/io_apic.h> #include <asm/msi.h> @@ -XXX,XX +XXX,XX @@ int cf_check physdev_check_pci_extcfg(st ASSERT(pdev->seg == info->segment); if ( pdev->bus >= info->start_bus && pdev->bus <= info->end_bus ) + { pci_check_extcfg(pdev); + vpci_reinit_ext_capability_list(pdev); + } return 0; } --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -XXX,XX +XXX,XX @@ static int vpci_init_ext_capability_list return 0; } +int vpci_reinit_ext_capability_list(const struct pci_dev *pdev) +{ + if ( !pdev->vpci ) + return 0; + + if ( vpci_remove_registers(pdev->vpci, PCI_CFG_SPACE_SIZE, + PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) ) + ASSERT_UNREACHABLE(); + + return vpci_init_ext_capability_list(pdev); +} + int vpci_init_header(struct pci_dev *pdev) { uint16_t cmd; --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -XXX,XX +XXX,XX @@ typedef struct { REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true) int __must_check vpci_init_header(struct pci_dev *pdev); +int vpci_reinit_ext_capability_list(const struct pci_dev *pdev); /* Assign vPCI to device by adding handlers. */ int __must_check vpci_assign_device(struct pci_dev *pdev); @@ -XXX,XX +XXX,XX @@ bool vpci_ecam_read(pci_sbdf_t sbdf, uns #else /* !CONFIG_HAS_VPCI */ struct vpci_vcpu {}; +static inline int vpci_reinit_ext_capability_list(const struct pci_dev *pdev) +{ + return 0; +} + static inline int vpci_assign_device(struct pci_dev *pdev) { return 0;
This is a follow-on to 'vPCI: avoid bogus "overlap in extended cap list" warnings', addressing further issues noted there. v4: Three new patches and some other re-work. See individual patches. 1: x86/PCI: avoid re-evaluation of extended config space accessibility 2: vPCI: introduce private header 3: vPCI: move vpci_init_capabilities() to a separate file 4: vPCI: move capability-list init 5: vPCI: re-init extended-capabilities when MMCFG availability changed Jan
When, during boot, we have already correctly determined availability of the MMCFG access method for a given bus range, there's then no need to invoke pci_check_extcfg() again for every of the devices. This in particular avoids ->ext_cfg to transiently indicate the wrong state. Switch to using Xen style on lines being touched and immediately adjacent ones. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Roger Pau Monné <roger.pau@citrix.com> --- v4: Don't bypass mcfg_ioremap(cfg, idx, 0) in pci_mmcfg_arch_disable(). v3: New. --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -XXX,XX +XXX,XX @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( !ret ) ret = pci_segment_iterate(info.segment, physdev_check_pci_extcfg, &info); + else if ( ret > 0 ) /* Indication of "no change". */ + ret = 0; if ( !ret && has_vpci(currd) && (info.flags & XEN_PCI_MMCFG_RESERVED) ) { --- a/xen/arch/x86/x86_64/mmconfig.h +++ b/xen/arch/x86/x86_64/mmconfig.h @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_reserved(uint64_t address, unsigned int flags); int pci_mmcfg_arch_init(void); int pci_mmcfg_arch_enable(unsigned int idx); -void pci_mmcfg_arch_disable(unsigned int idx); +int pci_mmcfg_arch_disable(unsigned int idx); #endif /* X86_64_MMCONFIG_H */ --- a/xen/arch/x86/x86_64/mmconfig-shared.c +++ b/xen/arch/x86/x86_64/mmconfig-shared.c @@ -XXX,XX +XXX,XX @@ static bool __init pci_mmcfg_reject_brok (unsigned int)cfg->start_bus_number, (unsigned int)cfg->end_bus_number); - if (!is_mmconf_reserved(addr, size, i, cfg) || - pci_mmcfg_arch_enable(i)) { + if ( !is_mmconf_reserved(addr, size, i, cfg) || + pci_mmcfg_arch_enable(i) < 0 ) + { pci_mmcfg_arch_disable(i); valid = 0; } @@ -XXX,XX +XXX,XX @@ void __init acpi_mmcfg_init(void) unsigned int i; pci_mmcfg_arch_init(); - for (i = 0; i < pci_mmcfg_config_num; ++i) - if (pci_mmcfg_arch_enable(i)) + for ( i = 0; i < pci_mmcfg_config_num; ++i ) + if ( pci_mmcfg_arch_enable(i) < 0 ) valid = 0; } else { acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_reserved(uint64_t address, segment, start_bus, end_bus, address, cfg->address); return -EIO; } - if (flags & XEN_PCI_MMCFG_RESERVED) + + if ( flags & XEN_PCI_MMCFG_RESERVED ) return pci_mmcfg_arch_enable(i); - pci_mmcfg_arch_disable(i); - return 0; + + return pci_mmcfg_arch_disable(i); } } --- a/xen/arch/x86/x86_64/mmconfig_64.c +++ b/xen/arch/x86/x86_64/mmconfig_64.c @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_arch_enable(unsigned int i const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg; unsigned long start_mfn, end_mfn; - if (pci_mmcfg_virt[idx].virt) - return 0; + if ( pci_mmcfg_virt[idx].virt ) + return 1; + pci_mmcfg_virt[idx].virt = mcfg_ioremap(cfg, idx, PAGE_HYPERVISOR_UC); if (!pci_mmcfg_virt[idx].virt) { printk(KERN_ERR "PCI: Cannot map MCFG aperture for segment %04x\n", @@ -XXX,XX +XXX,XX @@ int pci_mmcfg_arch_enable(unsigned int i return 0; } -void pci_mmcfg_arch_disable(unsigned int idx) +int pci_mmcfg_arch_disable(unsigned int idx) { const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg; + int ret = !pci_mmcfg_virt[idx].virt; pci_mmcfg_virt[idx].virt = NULL; /* @@ -XXX,XX +XXX,XX @@ void pci_mmcfg_arch_disable(unsigned int mcfg_ioremap(cfg, idx, 0); printk(KERN_WARNING "PCI: Not using MCFG for segment %04x bus %02x-%02x\n", cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number); + + return ret; } bool pci_mmcfg_decode(unsigned long mfn, unsigned int *seg, unsigned int *bdf)
Before adding more private stuff to xen/vpci.h, split it up. In order to be able to include the private header first in a CU, the per-arch struct decls also need to move (to new asm/vpci.h files). While adjusting the test harness'es Makefile, also switch the pre-existing header symlink-ing rule to a pattern one. Apart from in the test harness code, things only move; no functional change intended. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- Subsequently, at least on x86 more stuff may want moving into asm/vpci.h. --- v4: New. --- a/tools/tests/vpci/Makefile +++ b/tools/tests/vpci/Makefile @@ -XXX,XX +XXX,XX @@ else $(warning HOSTCC != CC, will not run test) endif -$(TARGET): vpci.c vpci.h list.h main.c emul.h - $(CC) $(CFLAGS_xeninclude) -g -o $@ vpci.c main.c +$(TARGET): vpci.c vpci.h list.h private.h main.c emul.h + $(CC) $(CFLAGS_xeninclude) -include emul.h -g -o $@ vpci.c main.c .PHONY: clean clean: @@ -XXX,XX +XXX,XX @@ uninstall: $(RM) -- $(DESTDIR)$(LIBEXEC)/tests/$(TARGET) vpci.c: $(XEN_ROOT)/xen/drivers/vpci/vpci.c - # Remove includes and add the test harness header - sed -e '/#include/d' -e '1s/^/#include "emul.h"/' <$< >$@ + sed -e '/#include/d' <$< >$@ + +private.h: %.h: $(XEN_ROOT)/xen/drivers/vpci/%.h + sed -e '/#include/d' <$< >$@ -list.h: $(XEN_ROOT)/xen/include/xen/list.h -vpci.h: $(XEN_ROOT)/xen/include/xen/vpci.h -list.h vpci.h: +list.h vpci.h: %.h: $(XEN_ROOT)/xen/include/xen/%.h sed -e '/#include/d' <$< >$@ --- a/tools/tests/vpci/emul.h +++ b/tools/tests/vpci/emul.h @@ -XXX,XX +XXX,XX @@ typedef union { #define CONFIG_HAS_VPCI #include "vpci.h" +#include "private.h" #define __hwdom_init --- a/xen/arch/arm/include/asm/pci.h +++ b/xen/arch/arm/include/asm/pci.h @@ -XXX,XX +XXX,XX @@ struct arch_pci_dev { struct device dev; }; -/* Arch-specific MSI data for vPCI. */ -struct vpci_arch_msi { -}; - -/* Arch-specific MSI-X entry data for vPCI. */ -struct vpci_arch_msix_entry { -}; - /* * Because of the header cross-dependencies, e.g. we need both * struct pci_dev and struct arch_pci_dev at the same time, this cannot be --- /dev/null +++ b/xen/arch/arm/include/asm/vpci.h @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef ARM_VPCI_H +#define ARM_VPCI_H + +/* Arch-specific MSI data for vPCI. */ +struct vpci_arch_msi { +}; + +/* Arch-specific MSI-X entry data for vPCI. */ +struct vpci_arch_msix_entry { +}; + +#endif /* ARM_VPCI_H */ --- a/xen/arch/x86/include/asm/hvm/io.h +++ b/xen/arch/x86/include/asm/hvm/io.h @@ -XXX,XX +XXX,XX @@ void msixtbl_init(struct domain *d); static inline void msixtbl_init(struct domain *d) {} #endif -/* Arch-specific MSI data for vPCI. */ -struct vpci_arch_msi { - int pirq; - bool bound; -}; - -/* Arch-specific MSI-X entry data for vPCI. */ -struct vpci_arch_msix_entry { - int pirq; -}; - void stdvga_init(struct domain *d); extern void hvm_dpci_msi_eoi(struct domain *d, int vector); --- /dev/null +++ b/xen/arch/x86/include/asm/vpci.h @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef X86_VPCI_H +#define X86_VPCI_H + +#include <xen/stdbool.h> + +/* Arch-specific MSI data for vPCI. */ +struct vpci_arch_msi { + int pirq; + bool bound; +}; + +/* Arch-specific MSI-X entry data for vPCI. */ +struct vpci_arch_msix_entry { + int pirq; +}; + +#endif /* X86_VPCI_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -XXX,XX +XXX,XX @@ * License along with this program; If not, see <http://www.gnu.org/licenses/>. */ +#include "private.h" + #include <xen/iocap.h> #include <xen/lib.h> #include <xen/sched.h> #include <xen/softirq.h> -#include <xen/vpci.h> #include <xsm/xsm.h> --- a/xen/drivers/vpci/msi.c +++ b/xen/drivers/vpci/msi.c @@ -XXX,XX +XXX,XX @@ * License along with this program; If not, see <http://www.gnu.org/licenses/>. */ +#include "private.h" + #include <xen/sched.h> #include <xen/softirq.h> -#include <xen/vpci.h> #include <asm/msi.h> --- a/xen/drivers/vpci/msix.c +++ b/xen/drivers/vpci/msix.c @@ -XXX,XX +XXX,XX @@ * License along with this program; If not, see <http://www.gnu.org/licenses/>. */ +#include "private.h" + #include <xen/io.h> #include <xen/lib.h> #include <xen/sched.h> -#include <xen/vpci.h> #include <asm/msi.h> #include <asm/p2m.h> --- /dev/null +++ b/xen/drivers/vpci/private.h @@ -XXX,XX +XXX,XX @@ +#ifndef VPCI_PRIVATE_H +#define VPCI_PRIVATE_H + +#include <xen/vpci.h> + +typedef uint32_t vpci_read_t(const struct pci_dev *pdev, unsigned int reg, + void *data); + +typedef void vpci_write_t(const struct pci_dev *pdev, unsigned int reg, + uint32_t val, void *data); + +typedef struct { + unsigned int id; + bool is_ext; + int (* init)(struct pci_dev *pdev); + int (* cleanup)(const struct pci_dev *pdev, bool hide); +} vpci_capability_t; + +#define REGISTER_VPCI_CAPABILITY(cap, name, finit, fclean, ext) \ + static const vpci_capability_t name##_entry \ + __used_section(".data.rel.ro.vpci") = { \ + .id = (cap), \ + .init = (finit), \ + .cleanup = (fclean), \ + .is_ext = (ext), \ + } + +#define REGISTER_VPCI_CAP(name, finit, fclean) \ + REGISTER_VPCI_CAPABILITY(PCI_CAP_ID_##name, name, finit, fclean, false) +#define REGISTER_VPCI_EXTCAP(name, finit, fclean) \ + REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true) + +/* Add/remove a register handler. */ +int __must_check vpci_add_register_mask(struct vpci *vpci, + vpci_read_t *read_handler, + vpci_write_t *write_handler, + unsigned int offset, unsigned int size, + void *data, uint32_t ro_mask, + uint32_t rw1c_mask, uint32_t rsvdp_mask, + uint32_t rsvdz_mask); +int __must_check vpci_add_register(struct vpci *vpci, + vpci_read_t *read_handler, + vpci_write_t *write_handler, + unsigned int offset, unsigned int size, + void *data); + +int vpci_remove_registers(struct vpci *vpci, unsigned int start, + unsigned int size); + +/* Helper to return the value passed in data. */ +uint32_t cf_check vpci_read_val( + const struct pci_dev *pdev, unsigned int reg, void *data); + +/* Passthrough handlers. */ +uint32_t cf_check vpci_hw_read8( + const struct pci_dev *pdev, unsigned int reg, void *data); +uint32_t cf_check vpci_hw_read16( + const struct pci_dev *pdev, unsigned int reg, void *data); +uint32_t cf_check vpci_hw_read32( + const struct pci_dev *pdev, unsigned int reg, void *data); +void cf_check vpci_hw_write8( + const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data); +void cf_check vpci_hw_write16( + const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data); + +#ifdef __XEN__ +/* Make sure there's a hole in the p2m for the MSIX mmio areas. */ +int vpci_make_msix_hole(const struct pci_dev *pdev); + +/* + * Helper functions to fetch MSIX related data. They are used by both the + * emulated MSIX code and the BAR handlers. + */ +static inline paddr_t vmsix_table_host_base(const struct vpci *vpci, + unsigned int nr) +{ + return vpci->header.bars[vpci->msix->tables[nr] & PCI_MSIX_BIRMASK].addr; +} + +static inline paddr_t vmsix_table_host_addr(const struct vpci *vpci, + unsigned int nr) +{ + return vmsix_table_host_base(vpci, nr) + + (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK); +} + +static inline paddr_t vmsix_table_base(const struct vpci *vpci, unsigned int nr) +{ + return vpci->header.bars[vpci->msix->tables[nr] & + PCI_MSIX_BIRMASK].guest_addr; +} + +static inline paddr_t vmsix_table_addr(const struct vpci *vpci, unsigned int nr) +{ + return vmsix_table_base(vpci, nr) + + (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK); +} + +/* + * Note regarding the size calculation of the PBA: the spec mentions "The last + * QWORD will not necessarily be fully populated", so it implies that the PBA + * size is 64-bit aligned. + */ +static inline size_t vmsix_table_size(const struct vpci *vpci, unsigned int nr) +{ + return + (nr == VPCI_MSIX_TABLE) ? vpci->msix->max_entries * PCI_MSIX_ENTRY_SIZE + : ROUNDUP(DIV_ROUND_UP(vpci->msix->max_entries, + 8), 8); +} + +#endif /* __XEN__ */ + +#endif /* VPCI_PRIVATE_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- a/xen/drivers/vpci/rebar.c +++ b/xen/drivers/vpci/rebar.c @@ -XXX,XX +XXX,XX @@ * Author: Jiqian Chen <Jiqian.Chen@amd.com> */ +#include "private.h" + #include <xen/sched.h> -#include <xen/vpci.h> static void cf_check rebar_ctrl_write(const struct pci_dev *pdev, unsigned int reg, --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -XXX,XX +XXX,XX @@ * License along with this program; If not, see <http://www.gnu.org/licenses/>. */ +#include "private.h" + #include <xen/sched.h> -#include <xen/vpci.h> #include <xen/vmap.h> /* Internal struct to store the emulated PCI registers. */ --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -XXX,XX +XXX,XX @@ #include <xen/types.h> #include <xen/list.h> -typedef uint32_t vpci_read_t(const struct pci_dev *pdev, unsigned int reg, - void *data); - -typedef void vpci_write_t(const struct pci_dev *pdev, unsigned int reg, - uint32_t val, void *data); - -typedef struct { - unsigned int id; - bool is_ext; - int (* init)(struct pci_dev *pdev); - int (* cleanup)(const struct pci_dev *pdev, bool hide); -} vpci_capability_t; +#include <asm/vpci.h> #define VPCI_ECAM_BDF(addr) (((addr) & 0x0ffff000) >> 12) @@ -XXX,XX +XXX,XX @@ typedef struct { */ #define VPCI_MAX_VIRT_DEV (PCI_SLOT(~0) + 1) -#define REGISTER_VPCI_CAPABILITY(cap, name, finit, fclean, ext) \ - static const vpci_capability_t name##_entry \ - __used_section(".data.rel.ro.vpci") = { \ - .id = (cap), \ - .init = (finit), \ - .cleanup = (fclean), \ - .is_ext = (ext), \ - } - -#define REGISTER_VPCI_CAP(name, finit, fclean) \ - REGISTER_VPCI_CAPABILITY(PCI_CAP_ID_##name, name, finit, fclean, false) -#define REGISTER_VPCI_EXTCAP(name, finit, fclean) \ - REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true) - int __must_check vpci_init_header(struct pci_dev *pdev); /* Assign vPCI to device by adding handlers. */ @@ -XXX,XX +XXX,XX @@ int __must_check vpci_assign_device(stru /* Remove all handlers and free vpci related structures. */ void vpci_deassign_device(struct pci_dev *pdev); -/* Add/remove a register handler. */ -int __must_check vpci_add_register_mask(struct vpci *vpci, - vpci_read_t *read_handler, - vpci_write_t *write_handler, - unsigned int offset, unsigned int size, - void *data, uint32_t ro_mask, - uint32_t rw1c_mask, uint32_t rsvdp_mask, - uint32_t rsvdz_mask); -int __must_check vpci_add_register(struct vpci *vpci, - vpci_read_t *read_handler, - vpci_write_t *write_handler, - unsigned int offset, unsigned int size, - void *data); - -int vpci_remove_registers(struct vpci *vpci, unsigned int start, - unsigned int size); - /* Generic read/write handlers for the PCI config space. */ uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size); void vpci_write(pci_sbdf_t sbdf, unsigned int reg, unsigned int size, uint32_t data); -/* Helper to return the value passed in data. */ -uint32_t cf_check vpci_read_val( - const struct pci_dev *pdev, unsigned int reg, void *data); - -/* Passthrough handlers. */ -uint32_t cf_check vpci_hw_read8( - const struct pci_dev *pdev, unsigned int reg, void *data); -uint32_t cf_check vpci_hw_read16( - const struct pci_dev *pdev, unsigned int reg, void *data); -uint32_t cf_check vpci_hw_read32( - const struct pci_dev *pdev, unsigned int reg, void *data); -void cf_check vpci_hw_write8( - const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data); -void cf_check vpci_hw_write16( - const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data); - /* * Check for pending vPCI operations on this vcpu. Returns true if the vcpu * should not run. @@ -XXX,XX +XXX,XX @@ struct vpci_vcpu { #ifdef __XEN__ void vpci_dump_msi(void); -/* Make sure there's a hole in the p2m for the MSIX mmio areas. */ -int vpci_make_msix_hole(const struct pci_dev *pdev); - /* Arch-specific vPCI MSI helpers. */ void vpci_msi_arch_mask(struct vpci_msi *msi, const struct pci_dev *pdev, unsigned int entry, bool mask); @@ -XXX,XX +XXX,XX @@ int __must_check vpci_msix_arch_disable_ void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry); int vpci_msix_arch_print(const struct vpci_msix *msix); -/* - * Helper functions to fetch MSIX related data. They are used by both the - * emulated MSIX code and the BAR handlers. - */ -static inline paddr_t vmsix_table_host_base(const struct vpci *vpci, - unsigned int nr) -{ - return vpci->header.bars[vpci->msix->tables[nr] & PCI_MSIX_BIRMASK].addr; -} - -static inline paddr_t vmsix_table_host_addr(const struct vpci *vpci, - unsigned int nr) -{ - return vmsix_table_host_base(vpci, nr) + - (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK); -} - -static inline paddr_t vmsix_table_base(const struct vpci *vpci, unsigned int nr) -{ - return vpci->header.bars[vpci->msix->tables[nr] & - PCI_MSIX_BIRMASK].guest_addr; -} - -static inline paddr_t vmsix_table_addr(const struct vpci *vpci, unsigned int nr) -{ - return vmsix_table_base(vpci, nr) + - (vpci->msix->tables[nr] & ~PCI_MSIX_BIRMASK); -} - -/* - * Note regarding the size calculation of the PBA: the spec mentions "The last - * QWORD will not necessarily be fully populated", so it implies that the PBA - * size is 64-bit aligned. - */ -static inline size_t vmsix_table_size(const struct vpci *vpci, unsigned int nr) -{ - return - (nr == VPCI_MSIX_TABLE) ? vpci->msix->max_entries * PCI_MSIX_ENTRY_SIZE - : ROUNDUP(DIV_ROUND_UP(vpci->msix->max_entries, - 8), 8); -} - static inline unsigned int vmsix_entry_nr(const struct vpci_msix *msix, const struct vpci_msix_entry *entry) {
Let's keep capability handling together. Start with moving vpci_init_capabilities() and its helpers, plus splitting out of its cleanup counterpart. Signed-off-by: Jan Beulich <jbeulich@suse.com> --- vpci_get_register(), while now only used by cap.c, didn't look like it would want moving there. --- v4: New. --- a/xen/drivers/vpci/Makefile +++ b/xen/drivers/vpci/Makefile @@ -XXX,XX +XXX,XX @@ +obj-y += cap.o obj-y += vpci.o header.o rebar.o obj-$(CONFIG_HAS_PCI_MSI) += msi.o msix.o --- /dev/null +++ b/xen/drivers/vpci/cap.c @@ -XXX,XX +XXX,XX @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Capability handling for guest PCI configuration space. + */ + +#include "private.h" + +#include <xen/sched.h> + +extern const vpci_capability_t __start_vpci_array[]; +extern const vpci_capability_t __end_vpci_array[]; +#define NUM_VPCI_INIT (__end_vpci_array - __start_vpci_array) + +static struct vpci_register *vpci_get_previous_cap_register( + const struct vpci *vpci, unsigned int offset) +{ + unsigned int next; + struct vpci_register *r; + + if ( offset < 0x40 ) + { + ASSERT_UNREACHABLE(); + return NULL; + } + + for ( r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1); r; + r = next >= 0x40 ? vpci_get_register(vpci, + next + PCI_CAP_LIST_NEXT, 1) + : NULL ) + { + next = (unsigned int)(uintptr_t)r->private; + ASSERT(next == (uintptr_t)r->private); + if ( next == offset ) + break; + } + + return r; +} + +static int vpci_capability_hide(const struct pci_dev *pdev, unsigned int cap) +{ + const unsigned int offset = pci_find_cap_offset(pdev->sbdf, cap); + struct vpci_register *prev_r, *next_r; + struct vpci *vpci = pdev->vpci; + + if ( !offset ) + { + ASSERT_UNREACHABLE(); + return 0; + } + + spin_lock(&vpci->lock); + prev_r = vpci_get_previous_cap_register(vpci, offset); + next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1); + if ( !prev_r || !next_r ) + { + spin_unlock(&vpci->lock); + return -ENODEV; + } + + prev_r->private = next_r->private; + /* + * Not calling vpci_remove_registers() here is to avoid redoing + * the register search. + */ + list_del(&next_r->node); + spin_unlock(&vpci->lock); + xfree(next_r); + + if ( !is_hardware_domain(pdev->domain) ) + return vpci_remove_registers(vpci, offset + PCI_CAP_LIST_ID, 1); + + return 0; +} + +static struct vpci_register *vpci_get_previous_ext_cap_register( + const struct vpci *vpci, unsigned int offset) +{ + unsigned int pos = PCI_CFG_SPACE_SIZE; + struct vpci_register *r; + + if ( offset <= PCI_CFG_SPACE_SIZE ) + { + ASSERT_UNREACHABLE(); + return NULL; + } + + for ( r = vpci_get_register(vpci, pos, 4); r; + r = pos > PCI_CFG_SPACE_SIZE ? vpci_get_register(vpci, pos, 4) + : NULL ) + { + uint32_t header = (uint32_t)(uintptr_t)r->private; + + ASSERT(header == (uintptr_t)r->private); + + pos = PCI_EXT_CAP_NEXT(header); + if ( pos == offset ) + break; + } + + return r; +} + +static int vpci_ext_capability_hide( + const struct pci_dev *pdev, unsigned int cap) +{ + const unsigned int offset = pci_find_ext_capability(pdev, cap); + struct vpci_register *r, *prev_r; + struct vpci *vpci = pdev->vpci; + uint32_t header, pre_header; + + if ( offset < PCI_CFG_SPACE_SIZE ) + { + ASSERT_UNREACHABLE(); + return 0; + } + + spin_lock(&vpci->lock); + r = vpci_get_register(vpci, offset, 4); + if ( !r ) + { + spin_unlock(&vpci->lock); + return -ENODEV; + } + + header = (uint32_t)(uintptr_t)r->private; + if ( offset == PCI_CFG_SPACE_SIZE ) + { + if ( PCI_EXT_CAP_NEXT(header) <= PCI_CFG_SPACE_SIZE ) + r->private = (void *)0; + else + /* + * The first extended capability (0x100) can not be removed from + * the linked list, so instead mask its capability ID to return 0 + * hopefully forcing OSes to skip it. + */ + r->private = (void *)(uintptr_t)(header & ~PCI_EXT_CAP_ID(header)); + + spin_unlock(&vpci->lock); + return 0; + } + + prev_r = vpci_get_previous_ext_cap_register(vpci, offset); + if ( !prev_r ) + { + spin_unlock(&vpci->lock); + return -ENODEV; + } + + pre_header = (uint32_t)(uintptr_t)prev_r->private; + pre_header &= ~PCI_EXT_CAP_NEXT_MASK; + pre_header |= header & PCI_EXT_CAP_NEXT_MASK; + prev_r->private = (void *)(uintptr_t)pre_header; + + list_del(&r->node); + spin_unlock(&vpci->lock); + xfree(r); + + return 0; +} + +int vpci_init_capabilities(struct pci_dev *pdev) +{ + for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ ) + { + const vpci_capability_t *capability = &__start_vpci_array[i]; + const unsigned int cap = capability->id; + const bool is_ext = capability->is_ext; + unsigned int pos = 0; + int rc; + + if ( !is_ext ) + pos = pci_find_cap_offset(pdev->sbdf, cap); + else if ( is_hardware_domain(pdev->domain) ) + pos = pci_find_ext_capability(pdev, cap); + + if ( !pos ) + continue; + + rc = capability->init(pdev); + if ( rc ) + { + const char *type = is_ext ? "extended" : "legacy"; + + printk(XENLOG_WARNING + "%pd %pp: init %s cap %u fail rc=%d, mask it\n", + pdev->domain, &pdev->sbdf, type, cap, rc); + + if ( capability->cleanup ) + { + rc = capability->cleanup(pdev, true); + if ( rc ) + { + printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n", + pdev->domain, &pdev->sbdf, type, cap, rc); + if ( !is_hardware_domain(pdev->domain) ) + return rc; + } + } + + if ( !is_ext ) + rc = vpci_capability_hide(pdev, cap); + else + rc = vpci_ext_capability_hide(pdev, cap); + if ( rc ) + { + printk(XENLOG_ERR "%pd %pp: hide %s cap %u fail rc=%d\n", + pdev->domain, &pdev->sbdf, type, cap, rc); + return rc; + } + } + } + + return 0; +} + +void vpci_cleanup_capabilities(struct pci_dev *pdev) +{ + for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ ) + { + const vpci_capability_t *capability = &__start_vpci_array[i]; + const unsigned int cap = capability->id; + unsigned int pos = 0; + + if ( !capability->cleanup ) + continue; + + if ( !capability->is_ext ) + pos = pci_find_cap_offset(pdev->sbdf, cap); + else if ( is_hardware_domain(pdev->domain) ) + pos = pci_find_ext_capability(pdev, cap); + if ( pos ) + { + int rc = capability->cleanup(pdev, false); + + if ( rc ) + printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n", + pdev->domain, &pdev->sbdf, + capability->is_ext ? "extended" : "legacy", cap, rc); + } + } +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ --- a/xen/drivers/vpci/private.h +++ b/xen/drivers/vpci/private.h @@ -XXX,XX +XXX,XX @@ typedef uint32_t vpci_read_t(const struc typedef void vpci_write_t(const struct pci_dev *pdev, unsigned int reg, uint32_t val, void *data); +/* Internal struct to store the emulated PCI registers. */ +struct vpci_register { + vpci_read_t *read; + vpci_write_t *write; + unsigned int size; + unsigned int offset; + void *private; + struct list_head node; + uint32_t ro_mask; + uint32_t rw1c_mask; + uint32_t rsvdp_mask; + uint32_t rsvdz_mask; +}; + typedef struct { unsigned int id; bool is_ext; @@ -XXX,XX +XXX,XX @@ typedef struct { #define REGISTER_VPCI_EXTCAP(name, finit, fclean) \ REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true) +int vpci_init_capabilities(struct pci_dev *pdev); +void vpci_cleanup_capabilities(struct pci_dev *pdev); + /* Add/remove a register handler. */ int __must_check vpci_add_register_mask(struct vpci *vpci, vpci_read_t *read_handler, @@ -XXX,XX +XXX,XX @@ int __must_check vpci_add_register(struc int vpci_remove_registers(struct vpci *vpci, unsigned int start, unsigned int size); +struct vpci_register *vpci_get_register(const struct vpci *vpci, + unsigned int offset, + unsigned int size); + /* Helper to return the value passed in data. */ uint32_t cf_check vpci_read_val( const struct pci_dev *pdev, unsigned int reg, void *data); --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -XXX,XX +XXX,XX @@ #include <xen/sched.h> #include <xen/vmap.h> -/* Internal struct to store the emulated PCI registers. */ -struct vpci_register { - vpci_read_t *read; - vpci_write_t *write; - unsigned int size; - unsigned int offset; - void *private; - struct list_head node; - uint32_t ro_mask; - uint32_t rw1c_mask; - uint32_t rsvdp_mask; - uint32_t rsvdz_mask; -}; - #ifdef __XEN__ -extern const vpci_capability_t __start_vpci_array[]; -extern const vpci_capability_t __end_vpci_array[]; -#define NUM_VPCI_INIT (__end_vpci_array - __start_vpci_array) #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT static int assign_virtual_sbdf(struct pci_dev *pdev) @@ -XXX,XX +XXX,XX @@ static int assign_virtual_sbdf(struct pc #endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ -static struct vpci_register *vpci_get_register(const struct vpci *vpci, - unsigned int offset, - unsigned int size) +struct vpci_register *vpci_get_register(const struct vpci *vpci, + unsigned int offset, + unsigned int size) { struct vpci_register *r; @@ -XXX,XX +XXX,XX @@ static struct vpci_register *vpci_get_re return NULL; } -static struct vpci_register *vpci_get_previous_cap_register( - const struct vpci *vpci, unsigned int offset) -{ - unsigned int next; - struct vpci_register *r; - - if ( offset < 0x40 ) - { - ASSERT_UNREACHABLE(); - return NULL; - } - - for ( r = vpci_get_register(vpci, PCI_CAPABILITY_LIST, 1); r; - r = next >= 0x40 ? vpci_get_register(vpci, - next + PCI_CAP_LIST_NEXT, 1) - : NULL ) - { - next = (unsigned int)(uintptr_t)r->private; - ASSERT(next == (uintptr_t)r->private); - if ( next == offset ) - break; - } - - return r; -} - -static int vpci_capability_hide(const struct pci_dev *pdev, unsigned int cap) -{ - const unsigned int offset = pci_find_cap_offset(pdev->sbdf, cap); - struct vpci_register *prev_r, *next_r; - struct vpci *vpci = pdev->vpci; - - if ( !offset ) - { - ASSERT_UNREACHABLE(); - return 0; - } - - spin_lock(&vpci->lock); - prev_r = vpci_get_previous_cap_register(vpci, offset); - next_r = vpci_get_register(vpci, offset + PCI_CAP_LIST_NEXT, 1); - if ( !prev_r || !next_r ) - { - spin_unlock(&vpci->lock); - return -ENODEV; - } - - prev_r->private = next_r->private; - /* - * Not calling vpci_remove_registers() here is to avoid redoing - * the register search. - */ - list_del(&next_r->node); - spin_unlock(&vpci->lock); - xfree(next_r); - - if ( !is_hardware_domain(pdev->domain) ) - return vpci_remove_registers(vpci, offset + PCI_CAP_LIST_ID, 1); - - return 0; -} - -static struct vpci_register *vpci_get_previous_ext_cap_register( - const struct vpci *vpci, unsigned int offset) -{ - unsigned int pos = PCI_CFG_SPACE_SIZE; - struct vpci_register *r; - - if ( offset <= PCI_CFG_SPACE_SIZE ) - { - ASSERT_UNREACHABLE(); - return NULL; - } - - for ( r = vpci_get_register(vpci, pos, 4); r; - r = pos > PCI_CFG_SPACE_SIZE ? vpci_get_register(vpci, pos, 4) - : NULL ) - { - uint32_t header = (uint32_t)(uintptr_t)r->private; - - ASSERT(header == (uintptr_t)r->private); - - pos = PCI_EXT_CAP_NEXT(header); - if ( pos == offset ) - break; - } - - return r; -} - -static int vpci_ext_capability_hide( - const struct pci_dev *pdev, unsigned int cap) -{ - const unsigned int offset = pci_find_ext_capability(pdev, cap); - struct vpci_register *r, *prev_r; - struct vpci *vpci = pdev->vpci; - uint32_t header, pre_header; - - if ( offset < PCI_CFG_SPACE_SIZE ) - { - ASSERT_UNREACHABLE(); - return 0; - } - - spin_lock(&vpci->lock); - r = vpci_get_register(vpci, offset, 4); - if ( !r ) - { - spin_unlock(&vpci->lock); - return -ENODEV; - } - - header = (uint32_t)(uintptr_t)r->private; - if ( offset == PCI_CFG_SPACE_SIZE ) - { - if ( PCI_EXT_CAP_NEXT(header) <= PCI_CFG_SPACE_SIZE ) - r->private = (void *)0; - else - /* - * The first extended capability (0x100) can not be removed from - * the linked list, so instead mask its capability ID to return 0 - * hopefully forcing OSes to skip it. - */ - r->private = (void *)(uintptr_t)(header & ~PCI_EXT_CAP_ID(header)); - - spin_unlock(&vpci->lock); - return 0; - } - - prev_r = vpci_get_previous_ext_cap_register(vpci, offset); - if ( !prev_r ) - { - spin_unlock(&vpci->lock); - return -ENODEV; - } - - pre_header = (uint32_t)(uintptr_t)prev_r->private; - pre_header &= ~PCI_EXT_CAP_NEXT_MASK; - pre_header |= header & PCI_EXT_CAP_NEXT_MASK; - prev_r->private = (void *)(uintptr_t)pre_header; - - list_del(&r->node); - spin_unlock(&vpci->lock); - xfree(r); - - return 0; -} - -static int vpci_init_capabilities(struct pci_dev *pdev) -{ - for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ ) - { - const vpci_capability_t *capability = &__start_vpci_array[i]; - const unsigned int cap = capability->id; - const bool is_ext = capability->is_ext; - unsigned int pos = 0; - int rc; - - if ( !is_ext ) - pos = pci_find_cap_offset(pdev->sbdf, cap); - else if ( is_hardware_domain(pdev->domain) ) - pos = pci_find_ext_capability(pdev, cap); - - if ( !pos ) - continue; - - rc = capability->init(pdev); - if ( rc ) - { - const char *type = is_ext ? "extended" : "legacy"; - - printk(XENLOG_WARNING - "%pd %pp: init %s cap %u fail rc=%d, mask it\n", - pdev->domain, &pdev->sbdf, type, cap, rc); - - if ( capability->cleanup ) - { - rc = capability->cleanup(pdev, true); - if ( rc ) - { - printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n", - pdev->domain, &pdev->sbdf, type, cap, rc); - if ( !is_hardware_domain(pdev->domain) ) - return rc; - } - } - - if ( !is_ext ) - rc = vpci_capability_hide(pdev, cap); - else - rc = vpci_ext_capability_hide(pdev, cap); - if ( rc ) - { - printk(XENLOG_ERR "%pd %pp: hide %s cap %u fail rc=%d\n", - pdev->domain, &pdev->sbdf, type, cap, rc); - return rc; - } - } - } - - return 0; -} - void vpci_deassign_device(struct pci_dev *pdev) { unsigned int i; @@ -XXX,XX +XXX,XX @@ void vpci_deassign_device(struct pci_dev &pdev->domain->vpci_dev_assigned_map); #endif - for ( i = 0; i < NUM_VPCI_INIT; i++ ) - { - const vpci_capability_t *capability = &__start_vpci_array[i]; - const unsigned int cap = capability->id; - unsigned int pos = 0; - - if ( !capability->cleanup ) - continue; - - if ( !capability->is_ext ) - pos = pci_find_cap_offset(pdev->sbdf, cap); - else if ( is_hardware_domain(pdev->domain) ) - pos = pci_find_ext_capability(pdev, cap); - if ( pos ) - { - int rc = capability->cleanup(pdev, false); - - if ( rc ) - printk(XENLOG_ERR "%pd %pp: clean %s cap %u fail rc=%d\n", - pdev->domain, &pdev->sbdf, - capability->is_ext ? "extended" : "legacy", cap, rc); - } - } + vpci_cleanup_capabilities(pdev); spin_lock(&pdev->vpci->lock); while ( !list_empty(&pdev->vpci->handlers) )
... both for when the functions are invoked and where they live in source. Don't invoke them directly in vpci_init_header(), but instead first thing in vpci_init_capabilities(). Suggested-by: Roger Pau Monné <roger.pau@citrix.com> Signed-off-by: Jan Beulich <jbeulich@suse.com> --- v4: New. --- a/xen/drivers/vpci/cap.c +++ b/xen/drivers/vpci/cap.c @@ -XXX,XX +XXX,XX @@ static int vpci_ext_capability_hide( return 0; } +static int vpci_init_capability_list(struct pci_dev *pdev) +{ + int rc; + bool mask_cap_list = false; + bool is_hwdom = is_hardware_domain(pdev->domain); + + if ( pci_conf_read16(pdev->sbdf, PCI_STATUS) & PCI_STATUS_CAP_LIST ) + { + /* Only expose capabilities to the guest that vPCI can handle. */ + unsigned int next, ttl = 48; + static const unsigned int supported_caps[] = { + PCI_CAP_ID_MSI, + PCI_CAP_ID_MSIX, + }; + /* + * For dom0, we should expose all capabilities instead of a fixed + * capabilities array, so setting n to 0 here is to get the next + * capability position directly in pci_find_next_cap_ttl. + */ + const unsigned int n = is_hwdom ? 0 : ARRAY_SIZE(supported_caps); + + next = pci_find_next_cap_ttl(pdev->sbdf, PCI_CAPABILITY_LIST, + supported_caps, n, &ttl); + + rc = vpci_add_register(pdev->vpci, vpci_read_val, + is_hwdom ? vpci_hw_write8 : NULL, + PCI_CAPABILITY_LIST, 1, + (void *)(uintptr_t)next); + if ( rc ) + return rc; + + next &= ~3; + + if ( !next && !is_hwdom ) + /* + * If we don't have any supported capabilities to expose to the + * guest, mask the PCI_STATUS_CAP_LIST bit in the status + * register. + */ + mask_cap_list = true; + + while ( next && ttl ) + { + unsigned int pos = next; + + next = pci_find_next_cap_ttl(pdev->sbdf, + pos + PCI_CAP_LIST_NEXT, + supported_caps, n, &ttl); + + if ( !is_hwdom ) + { + rc = vpci_add_register(pdev->vpci, vpci_hw_read8, NULL, + pos + PCI_CAP_LIST_ID, 1, NULL); + if ( rc ) + return rc; + } + + rc = vpci_add_register(pdev->vpci, vpci_read_val, + is_hwdom ? vpci_hw_write8 : NULL, + pos + PCI_CAP_LIST_NEXT, 1, + (void *)(uintptr_t)next); + if ( rc ) + return rc; + + next &= ~3; + } + } + + /* Return early for the hw domain, no masking of PCI_STATUS. */ + if ( is_hwdom ) + return 0; + + /* Utilize rsvdp_mask to hide PCI_STATUS_CAP_LIST from the guest. */ + return vpci_add_register_mask(pdev->vpci, vpci_hw_read16, vpci_hw_write16, + PCI_STATUS, 2, NULL, + PCI_STATUS_RO_MASK & + ~(mask_cap_list ? PCI_STATUS_CAP_LIST : 0), + PCI_STATUS_RW1C_MASK, + mask_cap_list ? PCI_STATUS_CAP_LIST : 0, + PCI_STATUS_RSVDZ_MASK); +} + +static int vpci_init_ext_capability_list(const struct pci_dev *pdev) +{ + unsigned int pos = PCI_CFG_SPACE_SIZE; + + if ( !pdev->ext_cfg ) + return 0; + + if ( !is_hardware_domain(pdev->domain) ) + /* Extended capabilities read as zero, write ignore for DomU */ + return vpci_add_register(pdev->vpci, vpci_read_val, NULL, + pos, 4, (void *)0); + + do + { + uint32_t header = pci_conf_read32(pdev->sbdf, pos); + int rc; + + if ( header == 0xffffffffU ) + { + printk(XENLOG_WARNING + "%pd %pp: broken extended cap list, offset %#x\n", + pdev->domain, &pdev->sbdf, pos); + return 0; + } + + rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL, + pos, 4, (void *)(uintptr_t)header); + if ( rc == -EEXIST ) + { + printk(XENLOG_WARNING + "%pd %pp: overlap in extended cap list, offset %#x\n", + pdev->domain, &pdev->sbdf, pos); + return 0; + } + + if ( rc ) + return rc; + + pos = PCI_EXT_CAP_NEXT(header); + } while ( pos >= PCI_CFG_SPACE_SIZE ); + + return 0; +} + int vpci_init_capabilities(struct pci_dev *pdev) { + int rc; + + rc = vpci_init_capability_list(pdev); + if ( rc ) + return rc; + + rc = vpci_init_ext_capability_list(pdev); + if ( rc ) + return rc; + for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ ) { const vpci_capability_t *capability = &__start_vpci_array[i]; const unsigned int cap = capability->id; const bool is_ext = capability->is_ext; unsigned int pos = 0; - int rc; if ( !is_ext ) pos = pci_find_cap_offset(pdev->sbdf, cap); --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -XXX,XX +XXX,XX @@ static int bar_add_rangeset(const struct return !bar->mem ? -ENOMEM : 0; } -static int vpci_init_capability_list(struct pci_dev *pdev) -{ - int rc; - bool mask_cap_list = false; - bool is_hwdom = is_hardware_domain(pdev->domain); - - if ( pci_conf_read16(pdev->sbdf, PCI_STATUS) & PCI_STATUS_CAP_LIST ) - { - /* Only expose capabilities to the guest that vPCI can handle. */ - unsigned int next, ttl = 48; - static const unsigned int supported_caps[] = { - PCI_CAP_ID_MSI, - PCI_CAP_ID_MSIX, - }; - /* - * For dom0, we should expose all capabilities instead of a fixed - * capabilities array, so setting n to 0 here is to get the next - * capability position directly in pci_find_next_cap_ttl. - */ - const unsigned int n = is_hwdom ? 0 : ARRAY_SIZE(supported_caps); - - next = pci_find_next_cap_ttl(pdev->sbdf, PCI_CAPABILITY_LIST, - supported_caps, n, &ttl); - - rc = vpci_add_register(pdev->vpci, vpci_read_val, - is_hwdom ? vpci_hw_write8 : NULL, - PCI_CAPABILITY_LIST, 1, - (void *)(uintptr_t)next); - if ( rc ) - return rc; - - next &= ~3; - - if ( !next && !is_hwdom ) - /* - * If we don't have any supported capabilities to expose to the - * guest, mask the PCI_STATUS_CAP_LIST bit in the status - * register. - */ - mask_cap_list = true; - - while ( next && ttl ) - { - unsigned int pos = next; - - next = pci_find_next_cap_ttl(pdev->sbdf, - pos + PCI_CAP_LIST_NEXT, - supported_caps, n, &ttl); - - if ( !is_hwdom ) - { - rc = vpci_add_register(pdev->vpci, vpci_hw_read8, NULL, - pos + PCI_CAP_LIST_ID, 1, NULL); - if ( rc ) - return rc; - } - - rc = vpci_add_register(pdev->vpci, vpci_read_val, - is_hwdom ? vpci_hw_write8 : NULL, - pos + PCI_CAP_LIST_NEXT, 1, - (void *)(uintptr_t)next); - if ( rc ) - return rc; - - next &= ~3; - } - } - - /* Return early for the hw domain, no masking of PCI_STATUS. */ - if ( is_hwdom ) - return 0; - - /* Utilize rsvdp_mask to hide PCI_STATUS_CAP_LIST from the guest. */ - return vpci_add_register_mask(pdev->vpci, vpci_hw_read16, vpci_hw_write16, - PCI_STATUS, 2, NULL, - PCI_STATUS_RO_MASK & - ~(mask_cap_list ? PCI_STATUS_CAP_LIST : 0), - PCI_STATUS_RW1C_MASK, - mask_cap_list ? PCI_STATUS_CAP_LIST : 0, - PCI_STATUS_RSVDZ_MASK); -} - -static int vpci_init_ext_capability_list(const struct pci_dev *pdev) -{ - unsigned int pos = PCI_CFG_SPACE_SIZE; - - if ( !pdev->ext_cfg ) - return 0; - - if ( !is_hardware_domain(pdev->domain) ) - /* Extended capabilities read as zero, write ignore for DomU */ - return vpci_add_register(pdev->vpci, vpci_read_val, NULL, - pos, 4, (void *)0); - - do - { - uint32_t header = pci_conf_read32(pdev->sbdf, pos); - int rc; - - if ( header == 0xffffffffU ) - { - printk(XENLOG_WARNING - "%pd %pp: broken extended cap list, offset %#x\n", - pdev->domain, &pdev->sbdf, pos); - return 0; - } - - rc = vpci_add_register(pdev->vpci, vpci_read_val, NULL, - pos, 4, (void *)(uintptr_t)header); - if ( rc == -EEXIST ) - { - printk(XENLOG_WARNING - "%pd %pp: overlap in extended cap list, offset %#x\n", - pdev->domain, &pdev->sbdf, pos); - return 0; - } - - if ( rc ) - return rc; - - pos = PCI_EXT_CAP_NEXT(header); - } while ( pos >= PCI_CFG_SPACE_SIZE ); - - return 0; -} - int vpci_init_header(struct pci_dev *pdev) { uint16_t cmd; @@ -XXX,XX +XXX,XX @@ int vpci_init_header(struct pci_dev *pde if ( rc ) return rc; - rc = vpci_init_capability_list(pdev); - if ( rc ) - return rc; - - rc = vpci_init_ext_capability_list(pdev); - if ( rc ) - return rc; - if ( pdev->ignore_bars ) return 0;
When Dom0 informs us about MMCFG usability, this may change whether extended capabilities are available (accessible) for devices. Zap what might be on record, and re-initialize things. No synchronization is added for the case where devices may already be in use. That'll need sorting when (a) DomU support was added and (b) DomU-s may run already while Dom0 / hwdom still boots (dom0less, Hyperlaunch). Signed-off-by: Jan Beulich <jbeulich@suse.com> --- vpci_reinit_ext_capabilities()'es return value isn't checked, as it doesn't feel quite right to fail the hypercall because of this. At the same time it also doesn't feel quite right to have the function return "void". Thoughts? --- v4: Make sure ->cleanup() and ->init() are invoked. v3: New. --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -XXX,XX +XXX,XX @@ #include <xen/guest_access.h> #include <xen/iocap.h> #include <xen/serial.h> +#include <xen/vpci.h> + #include <asm/current.h> #include <asm/io_apic.h> #include <asm/msi.h> @@ -XXX,XX +XXX,XX @@ int cf_check physdev_check_pci_extcfg(st ASSERT(pdev->seg == info->segment); if ( pdev->bus >= info->start_bus && pdev->bus <= info->end_bus ) + { pci_check_extcfg(pdev); + vpci_reinit_ext_capabilities(pdev); + } return 0; } --- a/xen/drivers/vpci/cap.c +++ b/xen/drivers/vpci/cap.c @@ -XXX,XX +XXX,XX @@ static int vpci_init_ext_capability_list return 0; } -int vpci_init_capabilities(struct pci_dev *pdev) +int vpci_init_capabilities(struct pci_dev *pdev, bool ext_only) { int rc; - rc = vpci_init_capability_list(pdev); - if ( rc ) - return rc; + if ( !ext_only ) + { + rc = vpci_init_capability_list(pdev); + if ( rc ) + return rc; + } rc = vpci_init_ext_capability_list(pdev); if ( rc ) @@ -XXX,XX +XXX,XX @@ int vpci_init_capabilities(struct pci_de unsigned int pos = 0; if ( !is_ext ) - pos = pci_find_cap_offset(pdev->sbdf, cap); + pos = !ext_only ? pci_find_cap_offset(pdev->sbdf, cap) : 0; else if ( is_hardware_domain(pdev->domain) ) pos = pci_find_ext_capability(pdev, cap); @@ -XXX,XX +XXX,XX @@ int vpci_init_capabilities(struct pci_de return 0; } -void vpci_cleanup_capabilities(struct pci_dev *pdev) +void vpci_cleanup_capabilities(struct pci_dev *pdev, bool ext_only) { for ( unsigned int i = 0; i < NUM_VPCI_INIT; i++ ) { @@ -XXX,XX +XXX,XX @@ void vpci_cleanup_capabilities(struct pc continue; if ( !capability->is_ext ) - pos = pci_find_cap_offset(pdev->sbdf, cap); + pos = !ext_only ? pci_find_cap_offset(pdev->sbdf, cap) : 0; else if ( is_hardware_domain(pdev->domain) ) pos = pci_find_ext_capability(pdev, cap); if ( pos ) @@ -XXX,XX +XXX,XX @@ void vpci_cleanup_capabilities(struct pc } } +int vpci_reinit_ext_capabilities(struct pci_dev *pdev) +{ + if ( !pdev->vpci ) + return 0; + + vpci_cleanup_capabilities(pdev, true); + + if ( vpci_remove_registers(pdev->vpci, PCI_CFG_SPACE_SIZE, + PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) ) + ASSERT_UNREACHABLE(); + + return vpci_init_capabilities(pdev, true); +} + /* * Local variables: * mode: C --- a/xen/drivers/vpci/private.h +++ b/xen/drivers/vpci/private.h @@ -XXX,XX +XXX,XX @@ typedef struct { #define REGISTER_VPCI_EXTCAP(name, finit, fclean) \ REGISTER_VPCI_CAPABILITY(PCI_EXT_CAP_ID_##name, name, finit, fclean, true) -int vpci_init_capabilities(struct pci_dev *pdev); -void vpci_cleanup_capabilities(struct pci_dev *pdev); +int vpci_init_capabilities(struct pci_dev *pdev, bool ext_only); +void vpci_cleanup_capabilities(struct pci_dev *pdev, bool ext_only); /* Add/remove a register handler. */ int __must_check vpci_add_register_mask(struct vpci *vpci, --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -XXX,XX +XXX,XX @@ void vpci_deassign_device(struct pci_dev &pdev->domain->vpci_dev_assigned_map); #endif - vpci_cleanup_capabilities(pdev); + vpci_cleanup_capabilities(pdev, false); spin_lock(&pdev->vpci->lock); while ( !list_empty(&pdev->vpci->handlers) ) @@ -XXX,XX +XXX,XX @@ int vpci_assign_device(struct pci_dev *p if ( rc ) goto out; - rc = vpci_init_capabilities(pdev); + rc = vpci_init_capabilities(pdev, false); out: if ( rc ) --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -XXX,XX +XXX,XX @@ #define VPCI_MAX_VIRT_DEV (PCI_SLOT(~0) + 1) int __must_check vpci_init_header(struct pci_dev *pdev); +int vpci_reinit_ext_capabilities(struct pci_dev *pdev); /* Assign vPCI to device by adding handlers. */ int __must_check vpci_assign_device(struct pci_dev *pdev); @@ -XXX,XX +XXX,XX @@ bool vpci_ecam_read(pci_sbdf_t sbdf, uns #else /* !CONFIG_HAS_VPCI */ struct vpci_vcpu {}; +static inline int vpci_reinit_ext_capabilities(struct pci_dev *pdev) +{ + return 0; +} + static inline int vpci_assign_device(struct pci_dev *pdev) { return 0;