This reverts commit 122173a5830f7757f8a94a3b1559582f312e140b.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/hw/pci/pci_device.h | 6 +-
include/hw/pci/pcie_sriov.h | 18 ---
hw/pci/pci.c | 62 +++-----
hw/pci/pcie_sriov.c | 290 ++++++++----------------------------
4 files changed, 83 insertions(+), 293 deletions(-)
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index e7e41cb939..1ff3ce94e2 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -38,8 +38,6 @@ struct PCIDeviceClass {
uint16_t subsystem_id; /* only for header type = 0 */
const char *romfile; /* rom bar */
-
- bool sriov_vf_user_creatable;
};
enum PCIReqIDType {
@@ -169,8 +167,6 @@ struct PCIDevice {
/* ID of standby device in net_failover pair */
char *failover_pair_id;
uint32_t acpi_index;
-
- char *sriov_pf;
};
static inline int pci_intx(PCIDevice *pci_dev)
@@ -203,7 +199,7 @@ static inline int pci_is_express_downstream_port(const PCIDevice *d)
static inline int pci_is_vf(const PCIDevice *d)
{
- return d->sriov_pf || d->exp.sriov_vf.pf != NULL;
+ return d->exp.sriov_vf.pf != NULL;
}
static inline uint32_t pci_config_size(const PCIDevice *d)
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index f75b8f22ee..c5d2d318d3 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -18,7 +18,6 @@
typedef struct PCIESriovPF {
uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */
PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
- bool vf_user_created; /* If VFs are created by user */
} PCIESriovPF;
typedef struct PCIESriovVF {
@@ -41,23 +40,6 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
MemoryRegion *memory);
-/**
- * pcie_sriov_pf_init_from_user_created_vfs() - Initialize PF with user-created
- * VFs.
- * @dev: A PCIe device being realized.
- * @offset: The offset of the SR-IOV capability.
- * @errp: pointer to Error*, to store an error if it happens.
- *
- * Return: The size of added capability. 0 if the user did not create VFs.
- * -1 if failed.
- */
-int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
- uint16_t offset,
- Error **errp);
-
-bool pcie_sriov_register_device(PCIDevice *dev, Error **errp);
-void pcie_sriov_unregister_device(PCIDevice *dev);
-
/*
* Default (minimal) page size support values
* as required by the SR/IOV standard:
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 8ad5d7e2d8..cf2794879d 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -85,7 +85,6 @@ static Property pci_props[] = {
QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
- DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf),
DEFINE_PROP_END_OF_LIST()
};
@@ -960,8 +959,13 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
}
- /* SR/IOV is not handled here. */
- if (pci_is_vf(dev)) {
+ /*
+ * With SR/IOV and ARI, a device at function 0 need not be a multifunction
+ * device, as it may just be a VF that ended up with function 0 in
+ * the legacy PCI interpretation. Avoid failing in such cases:
+ */
+ if (pci_is_vf(dev) &&
+ dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
return;
}
@@ -994,8 +998,7 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
}
/* function 0 indicates single function, so function > 0 must be NULL */
for (func = 1; func < PCI_FUNC_MAX; ++func) {
- PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)];
- if (device && !pci_is_vf(device)) {
+ if (bus->devices[PCI_DEVFN(slot, func)]) {
error_setg(errp, "PCI: %x.0 indicates single function, "
"but %x.%x is already populated.",
slot, slot, func);
@@ -1280,7 +1283,6 @@ static void pci_qdev_unrealize(DeviceState *dev)
pci_unregister_io_regions(pci_dev);
pci_del_option_rom(pci_dev);
- pcie_sriov_unregister_device(pci_dev);
if (pc->exit) {
pc->exit(pci_dev);
@@ -1312,6 +1314,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
pcibus_t size = memory_region_size(memory);
uint8_t hdr_type;
+ assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
assert(region_num >= 0);
assert(region_num < PCI_NUM_REGIONS);
assert(is_power_of_2(size));
@@ -1322,6 +1325,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
assert(hdr_type != PCI_HEADER_TYPE_BRIDGE || region_num < 2);
r = &pci_dev->io_regions[region_num];
+ r->addr = PCI_BAR_UNMAPPED;
r->size = size;
r->type = type;
r->memory = memory;
@@ -1329,35 +1333,22 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
? pci_get_bus(pci_dev)->address_space_io
: pci_get_bus(pci_dev)->address_space_mem;
- if (pci_is_vf(pci_dev)) {
- PCIDevice *pf = pci_dev->exp.sriov_vf.pf;
- assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]);
+ wmask = ~(size - 1);
+ if (region_num == PCI_ROM_SLOT) {
+ /* ROM enable bit is writable */
+ wmask |= PCI_ROM_ADDRESS_ENABLE;
+ }
- r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size);
- if (r->addr != PCI_BAR_UNMAPPED) {
- memory_region_add_subregion_overlap(r->address_space,
- r->addr, r->memory, 1);
- }
+ addr = pci_bar(pci_dev, region_num);
+ pci_set_long(pci_dev->config + addr, type);
+
+ if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
+ r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ pci_set_quad(pci_dev->wmask + addr, wmask);
+ pci_set_quad(pci_dev->cmask + addr, ~0ULL);
} else {
- r->addr = PCI_BAR_UNMAPPED;
-
- wmask = ~(size - 1);
- if (region_num == PCI_ROM_SLOT) {
- /* ROM enable bit is writable */
- wmask |= PCI_ROM_ADDRESS_ENABLE;
- }
-
- addr = pci_bar(pci_dev, region_num);
- pci_set_long(pci_dev->config + addr, type);
-
- if (!(r->type & PCI_BASE_ADDRESS_SPACE_IO) &&
- r->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- pci_set_quad(pci_dev->wmask + addr, wmask);
- pci_set_quad(pci_dev->cmask + addr, ~0ULL);
- } else {
- pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
- pci_set_long(pci_dev->cmask + addr, 0xffffffff);
- }
+ pci_set_long(pci_dev->wmask + addr, wmask & 0xffffffff);
+ pci_set_long(pci_dev->cmask + addr, 0xffffffff);
}
}
@@ -2118,11 +2109,6 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
- if (!pcie_sriov_register_device(pci_dev, errp)) {
- pci_qdev_unrealize(DEVICE(pci_dev));
- return;
- }
-
/*
* A PCIe Downstream Port that do not have ARI Forwarding enabled must
* associate only Device 0 with the device attached to the bus
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index 0fc9f810b9..15a4aac1f4 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -20,8 +20,6 @@
#include "qapi/error.h"
#include "trace.h"
-static GHashTable *pfs;
-
static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
{
for (uint16_t i = 0; i < total_vfs; i++) {
@@ -33,49 +31,14 @@ static void unparent_vfs(PCIDevice *dev, uint16_t total_vfs)
dev->exp.sriov_pf.vf = NULL;
}
-static void clear_ctrl_vfe(PCIDevice *dev)
-{
- uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL;
- pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE);
-}
-
-static void register_vfs(PCIDevice *dev)
-{
- uint16_t num_vfs;
- uint16_t i;
- uint16_t sriov_cap = dev->exp.sriov_cap;
-
- assert(sriov_cap > 0);
- num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
- if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
- clear_ctrl_vfe(dev);
- return;
- }
-
- trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), num_vfs);
- for (i = 0; i < num_vfs; i++) {
- pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
- }
-}
-
-static void unregister_vfs(PCIDevice *dev)
-{
- uint16_t i;
- uint8_t *cfg = dev->config + dev->exp.sriov_cap;
-
- trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn));
- for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
- pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
- }
-}
-
-static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset,
- uint16_t vf_dev_id, uint16_t init_vfs,
- uint16_t total_vfs, uint16_t vf_offset,
- uint16_t vf_stride, Error **errp)
+bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+ const char *vfname, uint16_t vf_dev_id,
+ uint16_t init_vfs, uint16_t total_vfs,
+ uint16_t vf_offset, uint16_t vf_stride,
+ Error **errp)
{
+ BusState *bus = qdev_get_parent_bus(&dev->qdev);
+ int32_t devfn = dev->devfn + vf_offset;
uint8_t *cfg = dev->config + offset;
uint8_t *wmask;
@@ -137,28 +100,6 @@ static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset,
qdev_prop_set_bit(&dev->qdev, "multifunction", true);
- return true;
-}
-
-bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
- const char *vfname, uint16_t vf_dev_id,
- uint16_t init_vfs, uint16_t total_vfs,
- uint16_t vf_offset, uint16_t vf_stride,
- Error **errp)
-{
- BusState *bus = qdev_get_parent_bus(&dev->qdev);
- int32_t devfn = dev->devfn + vf_offset;
-
- if (pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
- error_setg(errp, "attaching user-created SR-IOV VF unsupported");
- return false;
- }
-
- if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, init_vfs,
- total_vfs, vf_offset, vf_stride, errp)) {
- return false;
- }
-
dev->exp.sriov_pf.vf = g_new(PCIDevice *, total_vfs);
for (uint16_t i = 0; i < total_vfs; i++) {
@@ -188,24 +129,7 @@ void pcie_sriov_pf_exit(PCIDevice *dev)
{
uint8_t *cfg = dev->config + dev->exp.sriov_cap;
- if (dev->exp.sriov_pf.vf_user_created) {
- uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
- uint16_t total_vfs = pci_get_word(dev->config + PCI_SRIOV_TOTAL_VF);
- uint16_t vf_dev_id = pci_get_word(dev->config + PCI_SRIOV_VF_DID);
-
- unregister_vfs(dev);
-
- for (uint16_t i = 0; i < total_vfs; i++) {
- PCIDevice *vf = dev->exp.sriov_pf.vf[i];
-
- vf->exp.sriov_vf.pf = NULL;
-
- pci_config_set_vendor_id(vf->config, ven_id);
- pci_config_set_device_id(vf->config, vf_dev_id);
- }
- } else {
- unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
- }
+ unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
}
void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
@@ -238,172 +162,74 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
MemoryRegion *memory)
{
+ PCIIORegion *r;
+ PCIBus *bus = pci_get_bus(dev);
uint8_t type;
+ pcibus_t size = memory_region_size(memory);
- assert(dev->exp.sriov_vf.pf);
+ assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */
+ assert(region_num >= 0);
+ assert(region_num < PCI_NUM_REGIONS);
type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num];
- return pci_register_bar(dev, region_num, type, memory);
+ if (!is_power_of_2(size)) {
+ error_report("%s: PCI region size must be a power"
+ " of two - type=0x%x, size=0x%"FMT_PCIBUS,
+ __func__, type, size);
+ exit(1);
+ }
+
+ r = &dev->io_regions[region_num];
+ r->memory = memory;
+ r->address_space =
+ type & PCI_BASE_ADDRESS_SPACE_IO
+ ? bus->address_space_io
+ : bus->address_space_mem;
+ r->size = size;
+ r->type = type;
+
+ r->addr = pci_bar_address(dev, region_num, r->type, r->size);
+ if (r->addr != PCI_BAR_UNMAPPED) {
+ memory_region_add_subregion_overlap(r->address_space,
+ r->addr, r->memory, 1);
+ }
}
-static gint compare_vf_devfns(gconstpointer a, gconstpointer b)
+static void clear_ctrl_vfe(PCIDevice *dev)
{
- return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn;
+ uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL;
+ pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE);
}
-int16_t pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev,
- uint16_t offset,
- Error **errp)
+static void register_vfs(PCIDevice *dev)
{
- GPtrArray *pf;
- PCIDevice **vfs;
- BusState *bus = qdev_get_parent_bus(DEVICE(dev));
- uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
- uint16_t vf_dev_id;
- uint16_t vf_offset;
- uint16_t vf_stride;
+ uint16_t num_vfs;
uint16_t i;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
- if (!pfs || !dev->qdev.id) {
- return 0;
+ assert(sriov_cap > 0);
+ num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+ if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
+ clear_ctrl_vfe(dev);
+ return;
}
- pf = g_hash_table_lookup(pfs, dev->qdev.id);
- if (!pf) {
- return 0;
+ trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), num_vfs);
+ for (i = 0; i < num_vfs; i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], true);
}
-
- if (pf->len > UINT16_MAX) {
- error_setg(errp, "too many VFs");
- return -1;
- }
-
- g_ptr_array_sort(pf, compare_vf_devfns);
- vfs = (void *)pf->pdata;
-
- if (vfs[0]->devfn <= dev->devfn) {
- error_setg(errp, "a VF function number is less than the PF function number");
- return -1;
- }
-
- vf_dev_id = pci_get_word(vfs[0]->config + PCI_DEVICE_ID);
- vf_offset = vfs[0]->devfn - dev->devfn;
- vf_stride = pf->len < 2 ? 0 : vfs[1]->devfn - vfs[0]->devfn;
-
- for (i = 0; i < pf->len; i++) {
- if (bus != qdev_get_parent_bus(&vfs[i]->qdev)) {
- error_setg(errp, "SR-IOV VF parent bus mismatches with PF");
- return -1;
- }
-
- if (ven_id != pci_get_word(vfs[i]->config + PCI_VENDOR_ID)) {
- error_setg(errp, "SR-IOV VF vendor ID mismatches with PF");
- return -1;
- }
-
- if (vf_dev_id != pci_get_word(vfs[i]->config + PCI_DEVICE_ID)) {
- error_setg(errp, "inconsistent SR-IOV VF device IDs");
- return -1;
- }
-
- for (size_t j = 0; j < PCI_NUM_REGIONS; j++) {
- if (vfs[i]->io_regions[j].size != vfs[0]->io_regions[j].size ||
- vfs[i]->io_regions[j].type != vfs[0]->io_regions[j].type) {
- error_setg(errp, "inconsistent SR-IOV BARs");
- return -1;
- }
- }
-
- if (vfs[i]->devfn - vfs[0]->devfn != vf_stride * i) {
- error_setg(errp, "inconsistent SR-IOV stride");
- return -1;
- }
- }
-
- if (!pcie_sriov_pf_init_common(dev, offset, vf_dev_id, pf->len,
- pf->len, vf_offset, vf_stride, errp)) {
- return -1;
- }
-
- for (i = 0; i < pf->len; i++) {
- vfs[i]->exp.sriov_vf.pf = dev;
- vfs[i]->exp.sriov_vf.vf_number = i;
-
- /* set vid/did according to sr/iov spec - they are not used */
- pci_config_set_vendor_id(vfs[i]->config, 0xffff);
- pci_config_set_device_id(vfs[i]->config, 0xffff);
- }
-
- dev->exp.sriov_pf.vf = vfs;
- dev->exp.sriov_pf.vf_user_created = true;
-
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
- PCIIORegion *region = &vfs[0]->io_regions[i];
-
- if (region->size) {
- pcie_sriov_pf_init_vf_bar(dev, i, region->type, region->size);
- }
- }
-
- return PCI_EXT_CAP_SRIOV_SIZEOF;
}
-bool pcie_sriov_register_device(PCIDevice *dev, Error **errp)
+static void unregister_vfs(PCIDevice *dev)
{
- if (!dev->exp.sriov_pf.vf && dev->qdev.id &&
- pfs && g_hash_table_contains(pfs, dev->qdev.id)) {
- error_setg(errp, "attaching user-created SR-IOV VF unsupported");
- return false;
- }
+ uint16_t i;
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
- if (dev->sriov_pf) {
- PCIDevice *pci_pf;
- GPtrArray *pf;
-
- if (!PCI_DEVICE_GET_CLASS(dev)->sriov_vf_user_creatable) {
- error_setg(errp, "user cannot create SR-IOV VF with this device type");
- return false;
- }
-
- if (!pci_is_express(dev)) {
- error_setg(errp, "PCI Express is required for SR-IOV VF");
- return false;
- }
-
- if (!pci_qdev_find_device(dev->sriov_pf, &pci_pf)) {
- error_setg(errp, "PCI device specified as SR-IOV PF already exists");
- return false;
- }
-
- if (!pfs) {
- pfs = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
- }
-
- pf = g_hash_table_lookup(pfs, dev->sriov_pf);
- if (!pf) {
- pf = g_ptr_array_new();
- g_hash_table_insert(pfs, g_strdup(dev->sriov_pf), pf);
- }
-
- g_ptr_array_add(pf, dev);
- }
-
- return true;
-}
-
-void pcie_sriov_unregister_device(PCIDevice *dev)
-{
- if (dev->sriov_pf && pfs) {
- GPtrArray *pf = g_hash_table_lookup(pfs, dev->sriov_pf);
-
- if (pf) {
- g_ptr_array_remove_fast(pf, dev);
-
- if (!pf->len) {
- g_hash_table_remove(pfs, dev->sriov_pf);
- g_ptr_array_free(pf, FALSE);
- }
- }
+ trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
+ for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) {
+ pci_set_enabled(dev->exp.sriov_pf.vf[i], false);
}
}
@@ -490,7 +316,7 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize)
uint16_t pcie_sriov_vf_number(PCIDevice *dev)
{
- assert(dev->exp.sriov_vf.pf);
+ assert(pci_is_vf(dev));
return dev->exp.sriov_vf.vf_number;
}
--
MST
© 2016 - 2024 Red Hat, Inc.