[PATCH 13/14] virtio-pci: add SR-IOV capability

Akihiko Odaki posted 14 patches 12 months ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, Eduardo Habkost <eduardo@habkost.net>, Akihiko Odaki <akihiko.odaki@daynix.com>, Sriram Yagnaraman <sriram.yagnaraman@est.tech>, Jason Wang <jasowang@redhat.com>, Keith Busch <kbusch@kernel.org>, Klaus Jensen <its@irrelevant.dk>, Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>
There is a newer version of this series
[PATCH 13/14] virtio-pci: add SR-IOV capability
Posted by Akihiko Odaki 12 months ago
This enables SR-IOV emulation on virtio-pci devices. It introduces a
property 'sriov-pf' to state that the device will be a VF, and it
will be paired with the PF identified with the property.
Currently this feature needs to be explicitly enabled by a subclass.

Co-developed-by: Yui Washizu <yui.washidu@gmail.com>
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
 include/hw/virtio/virtio-pci.h |   2 +
 hw/virtio/virtio-pci.c         | 208 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 201 insertions(+), 9 deletions(-)

diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
index 5a3f182f99..0cd781ea98 100644
--- a/include/hw/virtio/virtio-pci.h
+++ b/include/hw/virtio/virtio-pci.h
@@ -105,6 +105,7 @@ struct VirtioPCIClass {
     PCIDeviceClass parent_class;
     DeviceRealize parent_dc_realize;
     void (*realize)(VirtIOPCIProxy *vpci_dev, Error **errp);
+    bool sriov_supported;
 };
 
 typedef struct VirtIOPCIRegion {
@@ -159,6 +160,7 @@ struct VirtIOPCIProxy {
     uint32_t gfselect;
     uint32_t guest_features[2];
     VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX];
+    GArray *sriov_vfs;
 
     VirtIOIRQFD *vector_irqfd;
     int nvqs_with_notifiers;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 205dbf24fb..3f1b3db9b7 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -26,6 +26,9 @@
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/qdev-properties.h"
+#include "monitor/qdev.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qobject.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/log.h"
@@ -49,6 +52,18 @@
  * configuration space */
 #define VIRTIO_PCI_CONFIG_SIZE(dev)     VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev))
 
+typedef struct VirtIOPCISriovVF {
+    ObjectClass *class;
+    PCIESriovVFOpts opts;
+    struct {
+        pcibus_t size;
+        uint8_t type;
+    } io_regions[PCI_NUM_REGIONS];
+    uint16_t devfn;
+} VirtIOPCISriovVF;
+
+static GHashTable *sriov_vfs;
+
 static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size,
                                VirtIOPCIProxy *dev);
 static void virtio_pci_reset(DeviceState *qdev);
@@ -1912,6 +1927,18 @@ static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
     VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 
+    if (d->id) {
+        if (pci_is_vf(&proxy->pci_dev)) {
+            if (g_hash_table_contains(sriov_vfs, d->id)) {
+                error_setg(errp, "a function cannot be SR-IOV PF and VF at the same time");
+                return;
+            }
+        } else {
+            proxy->sriov_vfs = g_hash_table_lookup(sriov_vfs, d->id);
+            virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV);
+        }
+    }
+
     if (virtio_pci_modern(proxy)) {
         virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
     }
@@ -1919,10 +1946,26 @@ static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
     virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
 }
 
+static gint virtio_pci_sriov_vfs_compare(gconstpointer a, gconstpointer b)
+{
+    return ((VirtIOPCISriovVF *)a)->devfn - ((VirtIOPCISriovVF *)b)->devfn;
+}
+
+static void virtio_pci_register_bar(VirtIOPCIProxy *proxy, int region_num,
+                                    uint8_t type, MemoryRegion *memory)
+{
+    if (pci_is_vf(&proxy->pci_dev)) {
+        pcie_sriov_vf_register_bar(&proxy->pci_dev, region_num, memory);
+    } else {
+        pci_register_bar(&proxy->pci_dev, region_num, type, memory);
+    }
+}
+
 /* This is called by virtio-bus just after the device is plugged. */
 static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
 {
     VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+    VirtioPCIClass *k = VIRTIO_PCI_GET_CLASS(d);
     VirtioBusState *bus = &proxy->bus;
     bool legacy = virtio_pci_legacy(proxy);
     bool modern;
@@ -2026,18 +2069,18 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
             memory_region_init(&proxy->io_bar, OBJECT(proxy),
                                "virtio-pci-io", 0x4);
 
-            pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx,
-                             PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
+            virtio_pci_register_bar(proxy, proxy->modern_io_bar_idx,
+                                    PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
 
             virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio,
                                             &notify_pio.cap);
         }
 
-        pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx,
-                         PCI_BASE_ADDRESS_SPACE_MEMORY |
-                         PCI_BASE_ADDRESS_MEM_PREFETCH |
-                         PCI_BASE_ADDRESS_MEM_TYPE_64,
-                         &proxy->modern_bar);
+        virtio_pci_register_bar(proxy, proxy->modern_mem_bar_idx,
+                                PCI_BASE_ADDRESS_SPACE_MEMORY |
+                                PCI_BASE_ADDRESS_MEM_PREFETCH |
+                                PCI_BASE_ADDRESS_MEM_TYPE_64,
+                                &proxy->modern_bar);
 
         proxy->config_cap = virtio_pci_add_mem_cap(proxy, &cfg.cap);
         cfg_mask = (void *)(proxy->pci_dev.wmask + proxy->config_cap);
@@ -2072,8 +2115,92 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
                               &virtio_pci_config_ops,
                               proxy, "virtio-pci", size);
 
-        pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
-                         PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
+        virtio_pci_register_bar(proxy, proxy->legacy_io_bar_idx,
+                                PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
+    }
+
+    if (proxy->sriov_vfs) {
+        uint16_t first_devfn;
+        uint16_t stride;
+        PCIESriovVFOpts *opts;
+
+        if (!k->sriov_supported) {
+            error_setg(errp, "SR-IOV is not supported by this device type");
+            return;
+        }
+
+        if (!pci_is_express(&proxy->pci_dev)) {
+            error_setg(errp, "PCI Express is required for SR-IOV");
+            return;
+        }
+
+        g_array_sort(proxy->sriov_vfs, virtio_pci_sriov_vfs_compare);
+
+        first_devfn = g_array_index(proxy->sriov_vfs, VirtIOPCISriovVF, 0).devfn;
+        if (first_devfn <= proxy->pci_dev.devfn) {
+            error_setg(errp, "a VF function number is less than the PF function number");
+            return;
+        }
+
+        stride = proxy->sriov_vfs->len < 2 ?
+                 0 :
+                 (g_array_index(proxy->sriov_vfs, VirtIOPCISriovVF, 1).devfn -
+                  first_devfn);
+
+        for (uint16_t i = 0; i < proxy->sriov_vfs->len; i++) {
+            VirtIOPCISriovVF *vf = &g_array_index(proxy->sriov_vfs,
+                                                  VirtIOPCISriovVF,
+                                                  i);
+            if (vf->class != object_get_class(OBJECT(proxy))) {
+                error_setg(errp, "a VF and its paired PF have different types");
+                return;
+            }
+
+            for (size_t j = 0; j < PCI_NUM_REGIONS; j++) {
+                if (j == PCI_ROM_SLOT) {
+                    continue;
+                }
+
+                if (vf->io_regions[j].size != proxy->pci_dev.io_regions[j].size ||
+                    vf->io_regions[j].type != proxy->pci_dev.io_regions[j].type) {
+                    error_setg(errp, "inconsistent SR-IOV BARs");
+                }
+            }
+
+            if (vf->devfn - first_devfn != stride * i) {
+                error_setg(errp, "inconsistent SR-IOV stride");
+                return;
+            }
+        }
+
+        opts = g_new(PCIESriovVFOpts, proxy->sriov_vfs->len);
+
+        for (uint16_t i = 0; i < proxy->sriov_vfs->len; i++) {
+            opts[i] = g_array_index(proxy->sriov_vfs, VirtIOPCISriovVF, i).opts;
+            qobject_ref(opts[i].device_opts);
+        }
+
+        pcie_sriov_pf_init(&proxy->pci_dev, PCI_CONFIG_SPACE_SIZE,
+                           proxy->pci_dev.name, opts,
+                           PCI_DEVICE_ID_VIRTIO_10_BASE
+                           + virtio_bus_get_vdev_id(bus),
+                           proxy->sriov_vfs->len, proxy->sriov_vfs->len,
+                           first_devfn - proxy->pci_dev.devfn,
+                           stride);
+
+        for (int i = 0; i < PCI_NUM_REGIONS; i++) {
+            if (i == PCI_ROM_SLOT) {
+                continue;
+            }
+
+            VirtIOPCISriovVF *vf = &g_array_index(proxy->sriov_vfs,
+                                                  VirtIOPCISriovVF,
+                                                  0);
+            uint8_t type = vf->io_regions[i].type;
+            size = vf->io_regions[i].size;
+
+            pcie_sriov_pf_init_vf_bar(&proxy->pci_dev, i, type, size);
+        }
     }
 }
 
@@ -2093,9 +2220,69 @@ static void virtio_pci_device_unplugged(DeviceState *d)
         if (modern_pio) {
             virtio_pci_modern_io_region_unmap(proxy, &proxy->notify_pio);
         }
+        if (proxy->sriov_vfs) {
+            pcie_sriov_pf_exit(&proxy->pci_dev);
+        }
     }
 }
 
+static bool virtio_pci_hide(PCIDeviceClass *pc, const QDict *device_opts,
+                            bool from_json, Error **errp)
+{
+    const char *pf;
+    GArray *array;
+    QDict *cloned_device_opts;
+    VirtIOPCISriovVF vf;
+    DeviceState *dev;
+    PCIDevice *pci_dev;
+
+    if (!device_opts) {
+        return false;
+    }
+
+    pf = qdict_get_try_str(device_opts, "sriov-pf");
+    if (!pf) {
+        return false;
+    }
+
+    cloned_device_opts = qdict_clone_shallow(device_opts);
+    qdict_del(cloned_device_opts, "sriov-pf");
+
+    dev = qdev_device_add_from_qdict(cloned_device_opts, from_json, errp);
+    if (!dev) {
+        qobject_unref(cloned_device_opts);
+        return false;
+    }
+
+    pci_dev = PCI_DEVICE(dev);
+    vf.class = object_get_class(OBJECT(dev));
+    vf.opts.device_opts = cloned_device_opts;
+    vf.opts.from_json = from_json;
+
+    for (size_t i = 0; i < PCI_NUM_REGIONS; i++) {
+        vf.io_regions[i].size = pci_dev->io_regions[i].size;
+        vf.io_regions[i].type = pci_dev->io_regions[i].type;
+    }
+
+    vf.devfn = pci_dev->devfn;
+
+    qdev_unplug(dev, errp);
+    if (*errp) {
+        qobject_unref(cloned_device_opts);
+        return false;
+    }
+
+    array = g_hash_table_lookup(sriov_vfs, pf);
+    if (!array) {
+        array = g_array_new(false, false, sizeof(VirtIOPCISriovVF));
+        g_hash_table_insert(sriov_vfs, g_strdup(pf), array);
+    }
+
+    g_array_append_val(array, vf);
+
+    return true;
+}
+
 static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
 {
     VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
@@ -2325,7 +2512,10 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data)
     VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
     ResettableClass *rc = RESETTABLE_CLASS(klass);
 
+    sriov_vfs = g_hash_table_new(g_str_hash, g_str_equal);
+
     device_class_set_props(dc, virtio_pci_properties);
+    k->hide = virtio_pci_hide;
     k->realize = virtio_pci_realize;
     k->exit = virtio_pci_exit;
     k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;

-- 
2.43.0