This enables SR-IOV emulation on virtio-pci devices. It introduces a
property 'sriov-pf' to state that the device will be a VF, and it
will be paired with the PF identified with the property.
Currently this feature needs to be explicitly enabled by a subclass.
Co-developed-by: Yui Washizu <yui.washidu@gmail.com>
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
include/hw/virtio/virtio-pci.h | 2 +
hw/virtio/virtio-pci.c | 208 +++++++++++++++++++++++++++++++++++++++--
2 files changed, 201 insertions(+), 9 deletions(-)
diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
index 5a3f182f99..0cd781ea98 100644
--- a/include/hw/virtio/virtio-pci.h
+++ b/include/hw/virtio/virtio-pci.h
@@ -105,6 +105,7 @@ struct VirtioPCIClass {
PCIDeviceClass parent_class;
DeviceRealize parent_dc_realize;
void (*realize)(VirtIOPCIProxy *vpci_dev, Error **errp);
+ bool sriov_supported;
};
typedef struct VirtIOPCIRegion {
@@ -159,6 +160,7 @@ struct VirtIOPCIProxy {
uint32_t gfselect;
uint32_t guest_features[2];
VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX];
+ GArray *sriov_vfs;
VirtIOIRQFD *vector_irqfd;
int nvqs_with_notifiers;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 205dbf24fb..3f1b3db9b7 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -26,6 +26,9 @@
#include "hw/pci/pci.h"
#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
+#include "monitor/qdev.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qobject.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/log.h"
@@ -49,6 +52,18 @@
* configuration space */
#define VIRTIO_PCI_CONFIG_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev))
+typedef struct VirtIOPCISriovVF {
+ ObjectClass *class;
+ PCIESriovVFOpts opts;
+ struct {
+ pcibus_t size;
+ uint8_t type;
+ } io_regions[PCI_NUM_REGIONS];
+ uint16_t devfn;
+} VirtIOPCISriovVF;
+
+static GHashTable *sriov_vfs;
+
static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
VirtIOPCIProxy *dev);
static void virtio_pci_reset(DeviceState *qdev);
@@ -1912,6 +1927,18 @@ static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+ if (d->id) {
+ if (pci_is_vf(&proxy->pci_dev)) {
+ if (g_hash_table_contains(sriov_vfs, d->id)) {
+ error_setg(errp, "a function cannot be SR-IOV PF and VF at the same time");
+ return;
+ }
+ } else {
+ proxy->sriov_vfs = g_hash_table_lookup(sriov_vfs, d->id);
+ virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV);
+ }
+ }
+
if (virtio_pci_modern(proxy)) {
virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
}
@@ -1919,10 +1946,26 @@ static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
}
+static gint virtio_pci_sriov_vfs_compare(gconstpointer a, gconstpointer b)
+{
+ return ((VirtIOPCISriovVF *)a)->devfn - ((VirtIOPCISriovVF *)b)->devfn;
+}
+
+static void virtio_pci_register_bar(VirtIOPCIProxy *proxy, int region_num,
+ uint8_t type, MemoryRegion *memory)
+{
+ if (pci_is_vf(&proxy->pci_dev)) {
+ pcie_sriov_vf_register_bar(&proxy->pci_dev, region_num, memory);
+ } else {
+ pci_register_bar(&proxy->pci_dev, region_num, type, memory);
+ }
+}
+
/* This is called by virtio-bus just after the device is plugged. */
static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
{
VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+ VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(d);
VirtioBusState *bus = &proxy->bus;
bool legacy = virtio_pci_legacy(proxy);
bool modern;
@@ -2026,18 +2069,18 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
memory_region_init(&proxy->io_bar, OBJECT(proxy),
"virtio-pci-io", 0x4);
- pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx,
- PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
+ virtio_pci_register_bar(proxy, proxy->modern_io_bar_idx,
+ PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio,
¬ify_pio.cap);
}
- pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx,
- PCI_BASE_ADDRESS_SPACE_MEMORY |
- PCI_BASE_ADDRESS_MEM_PREFETCH |
- PCI_BASE_ADDRESS_MEM_TYPE_64,
- &proxy->modern_bar);
+ virtio_pci_register_bar(proxy, proxy->modern_mem_bar_idx,
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_PREFETCH |
+ PCI_BASE_ADDRESS_MEM_TYPE_64,
+ &proxy->modern_bar);
proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap);
cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap);
@@ -2072,8 +2115,92 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
&virtio_pci_config_ops,
proxy, "virtio-pci", size);
- pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
- PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
+ virtio_pci_register_bar(proxy, proxy->legacy_io_bar_idx,
+ PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
+ }
+
+ if (proxy->sriov_vfs) {
+ uint16_t first_devfn;
+ uint16_t stride;
+ PCIESriovVFOpts *opts;
+
+ if (!k->sriov_supported) {
+ error_setg(errp, "SR-IOV is not supported by this device type");
+ return;
+ }
+
+ if (!pci_is_express(&proxy->pci_dev)) {
+ error_setg(errp, "PCI Express is required for SR-IOV");
+ return;
+ }
+
+ g_array_sort(proxy->sriov_vfs, virtio_pci_sriov_vfs_compare);
+
+ first_devfn = g_array_index(proxy->sriov_vfs, VirtIOPCISriovVF, 0).devfn;
+ if (first_devfn <= proxy->pci_dev.devfn) {
+ error_setg(errp, "a VF function number is less than the PF function number");
+ return;
+ }
+
+ stride = proxy->sriov_vfs->len < 2 ?
+ 0 :
+ (g_array_index(proxy->sriov_vfs, VirtIOPCISriovVF, 1).devfn -
+ first_devfn);
+
+ for (uint16_t i = 0; i < proxy->sriov_vfs->len; i++) {
+ VirtIOPCISriovVF *vf = &g_array_index(proxy->sriov_vfs,
+ VirtIOPCISriovVF,
+ i);
+ if (vf->class != object_get_class(OBJECT(proxy))) {
+ error_setg(errp, "a VF and its paired PF have different types");
+ return;
+ }
+
+ for (size_t j = 0; j < PCI_NUM_REGIONS; j++) {
+ if (j == PCI_ROM_SLOT) {
+ continue;
+ }
+
+ if (vf->io_regions[j].size != proxy->pci_dev.io_regions[j].size ||
+ vf->io_regions[j].type != proxy->pci_dev.io_regions[j].type) {
+ error_setg(errp, "inconsistent SR-IOV BARs");
+ }
+ }
+
+ if (vf->devfn - first_devfn != stride * i) {
+ error_setg(errp, "inconsistent SR-IOV stride");
+ return;
+ }
+ }
+
+ opts = g_new(PCIESriovVFOpts, proxy->sriov_vfs->len);
+
+ for (uint16_t i = 0; i < proxy->sriov_vfs->len; i++) {
+ opts[i] = g_array_index(proxy->sriov_vfs, VirtIOPCISriovVF, i).opts;
+ qobject_ref(opts[i].device_opts);
+ }
+
+ pcie_sriov_pf_init(&proxy->pci_dev, PCI_CONFIG_SPACE_SIZE,
+ proxy->pci_dev.name, opts,
+ PCI_DEVICE_ID_VIRTIO_10_BASE
+ + virtio_bus_get_vdev_id(bus),
+ proxy->sriov_vfs->len, proxy->sriov_vfs->len,
+ first_devfn - proxy->pci_dev.devfn,
+ stride);
+
+ for (int i = 0; i < PCI_NUM_REGIONS; i++) {
+ if (i == PCI_ROM_SLOT) {
+ continue;
+ }
+
+ VirtIOPCISriovVF *vf = &g_array_index(proxy->sriov_vfs,
+ VirtIOPCISriovVF,
+ 0);
+ uint8_t type = vf->io_regions[i].type;
+ size = vf->io_regions[i].size;
+
+ pcie_sriov_pf_init_vf_bar(&proxy->pci_dev, i, type, size);
+ }
}
}
@@ -2093,9 +2220,69 @@ static void virtio_pci_device_unplugged(DeviceState *d)
if (modern_pio) {
virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio);
}
+ if (proxy->sriov_vfs) {
+ pcie_sriov_pf_exit(&proxy->pci_dev);
+ }
}
}
+static bool virtio_pci_hide(PCIDeviceClass *pc, const QDict *device_opts,
+ bool from_json, Error **errp)
+{
+ const char *pf;
+ GArray *array;
+ QDict *cloned_device_opts;
+ VirtIOPCISriovVF vf;
+ DeviceState *dev;
+ PCIDevice *pci_dev;
+
+ if (!device_opts) {
+ return false;
+ }
+
+ pf = qdict_get_try_str(device_opts, "sriov-pf");
+ if (!pf) {
+ return false;
+ }
+
+ cloned_device_opts = qdict_clone_shallow(device_opts);
+ qdict_del(cloned_device_opts, "sriov-pf");
+
+ dev = qdev_device_add_from_qdict(cloned_device_opts, from_json, errp);
+ if (!dev) {
+ qobject_unref(cloned_device_opts);
+ return false;
+ }
+
+ pci_dev = PCI_DEVICE(dev);
+ vf.class = object_get_class(OBJECT(dev));
+ vf.opts.device_opts = cloned_device_opts;
+ vf.opts.from_json = from_json;
+
+ for (size_t i = 0; i < PCI_NUM_REGIONS; i++) {
+ vf.io_regions[i].size = pci_dev->io_regions[i].size;
+ vf.io_regions[i].type = pci_dev->io_regions[i].type;
+ }
+
+ vf.devfn = pci_dev->devfn;
+
+ qdev_unplug(dev, errp);
+ if (*errp) {
+ qobject_unref(cloned_device_opts);
+ return false;
+ }
+
+ array = g_hash_table_lookup(sriov_vfs, pf);
+ if (!array) {
+ array = g_array_new(false, false, sizeof(VirtIOPCISriovVF));
+ g_hash_table_insert(sriov_vfs, g_strdup(pf), array);
+ }
+
+ g_array_append_val(array, vf);
+
+ return true;
+}
+
static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
{
VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
@@ -2325,7 +2512,10 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data)
VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
ResettableClass *rc = RESETTABLE_CLASS(klass);
+ sriov_vfs = g_hash_table_new(g_str_hash, g_str_equal);
+
device_class_set_props(dc, virtio_pci_properties);
+ k->hide = virtio_pci_hide;
k->realize = virtio_pci_realize;
k->exit = virtio_pci_exit;
k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
--
2.43.0