[PATCH v3 4/5] vhost-user-dev: Add cache BAR

Albert Esteve posted 5 patches 1 week ago
[PATCH v3 4/5] vhost-user-dev: Add cache BAR
Posted by Albert Esteve 1 week ago
Add a cache BAR in the vhost-user-device
into which files can be directly mapped.

The number, shmid, and size of the VIRTIO Shared
Memory subregions is retrieved through a get_shmem_config
message sent by the vhost-user-base module
on the realize step, after virtio_init().

By default, if VHOST_USER_PROTOCOL_F_SHMEM
feature is not supported by the backend,
there is no cache.

Signed-off-by: Albert Esteve <aesteve@redhat.com>
---
 hw/virtio/vhost-user-base.c       | 37 +++++++++++++++++++++++++++--
 hw/virtio/vhost-user-device-pci.c | 39 ++++++++++++++++++++++++++++---
 2 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c
index 2bc3423326..f2597d021a 100644
--- a/hw/virtio/vhost-user-base.c
+++ b/hw/virtio/vhost-user-base.c
@@ -271,7 +271,9 @@ static void vub_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserBase *vub = VHOST_USER_BASE(dev);
-    int ret;
+    uint64_t memory_sizes[8];
+    void *cache_ptr;
+    int i, ret, nregions;
 
     if (!vub->chardev.chr) {
         error_setg(errp, "vhost-user-base: missing chardev");
@@ -314,7 +316,7 @@ static void vub_device_realize(DeviceState *dev, Error **errp)
 
     /* Allocate queues */
     vub->vqs = g_ptr_array_sized_new(vub->num_vqs);
-    for (int i = 0; i < vub->num_vqs; i++) {
+    for (i = 0; i < vub->num_vqs; i++) {
         g_ptr_array_add(vub->vqs,
                         virtio_add_queue(vdev, vub->vq_size,
                                          vub_handle_output));
@@ -331,6 +333,37 @@ static void vub_device_realize(DeviceState *dev, Error **errp)
         do_vhost_user_cleanup(vdev, vub);
     }
 
+    ret = vub->vhost_dev.vhost_ops->vhost_get_shmem_config(&vub->vhost_dev,
+                                                           &nregions,
+                                                           memory_sizes,
+                                                           errp);
+
+    if (ret < 0) {
+        do_vhost_user_cleanup(vdev, vub);
+    }
+
+    for (i = 0; i < nregions; i++) {
+        if (memory_sizes[i]) {
+            if (memory_sizes[i] % qemu_real_host_page_size() != 0) {
+                error_setg(errp, "Shared memory %d size must be a power of 2 "
+                                 "no smaller than the page size", i);
+                return;
+            }
+
+            cache_ptr = mmap(NULL, memory_sizes[i], PROT_NONE,
+                            MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+            if (cache_ptr == MAP_FAILED) {
+                error_setg_errno(errp, errno, "Unable to mmap blank cache");
+                return;
+            }
+
+            virtio_new_shmem_region(vdev);
+            memory_region_init_ram_ptr(vdev->shmem_list[i].mr,
+                                       OBJECT(vdev), "vub-shm-" + i,
+                                       memory_sizes[i], cache_ptr);
+        }
+    }
+
     qemu_chr_fe_set_handlers(&vub->chardev, NULL, NULL, vub_event, NULL,
                              dev, NULL, true);
 }
diff --git a/hw/virtio/vhost-user-device-pci.c b/hw/virtio/vhost-user-device-pci.c
index efaf55d3dd..abf4e90c21 100644
--- a/hw/virtio/vhost-user-device-pci.c
+++ b/hw/virtio/vhost-user-device-pci.c
@@ -8,14 +8,18 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "hw/qdev-properties.h"
 #include "hw/virtio/vhost-user-base.h"
 #include "hw/virtio/virtio-pci.h"
 
+#define VIRTIO_DEVICE_PCI_CACHE_BAR 2
+
 struct VHostUserDevicePCI {
     VirtIOPCIProxy parent_obj;
 
     VHostUserBase vub;
+    MemoryRegion cachebar;
 };
 
 #define TYPE_VHOST_USER_DEVICE_PCI "vhost-user-device-pci-base"
@@ -25,10 +29,39 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserDevicePCI, VHOST_USER_DEVICE_PCI)
 static void vhost_user_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
 {
     VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(vpci_dev);
-    DeviceState *vdev = DEVICE(&dev->vub);
-
+    DeviceState *dev_state = DEVICE(&dev->vub);
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev_state);
+    MemoryRegion *mr;
+    uint64_t offset = 0, cache_size = 0;
+    int i;
+    
     vpci_dev->nvectors = 1;
-    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
+    qdev_realize(dev_state, BUS(&vpci_dev->bus), errp);
+
+    for (i = 0; i < vdev->n_shmem_regions; i++) {
+        mr = vdev->shmem_list[i].mr;
+        if (mr->size > UINT64_MAX - cache_size) {
+            error_setg(errp, "Total shared memory required overflow");
+            return;
+        }
+        cache_size = cache_size + mr->size;
+    }
+    if (cache_size) {
+        memory_region_init(&dev->cachebar, OBJECT(vpci_dev),
+                           "vhost-device-pci-cachebar", cache_size);
+        for (i = 0; i < vdev->n_shmem_regions; i++) {
+            mr = vdev->shmem_list[i].mr;
+            memory_region_add_subregion(&dev->cachebar, offset, mr);
+            virtio_pci_add_shm_cap(vpci_dev, VIRTIO_DEVICE_PCI_CACHE_BAR,
+                                   offset, mr->size, i);
+            offset = offset + mr->size;
+        }
+        pci_register_bar(&vpci_dev->pci_dev, VIRTIO_DEVICE_PCI_CACHE_BAR,
+                        PCI_BASE_ADDRESS_SPACE_MEMORY |
+                        PCI_BASE_ADDRESS_MEM_PREFETCH |
+                        PCI_BASE_ADDRESS_MEM_TYPE_64,
+                        &dev->cachebar);
+    }
 }
 
 static void vhost_user_device_pci_class_init(ObjectClass *klass, void *data)
-- 
2.45.2
Re: [PATCH v3 4/5] vhost-user-dev: Add cache BAR
Posted by Stefan Hajnoczi 2 days, 7 hours ago
On Thu, Sep 12, 2024 at 04:53:34PM +0200, Albert Esteve wrote:
> @@ -331,6 +333,37 @@ static void vub_device_realize(DeviceState *dev, Error **errp)
>          do_vhost_user_cleanup(vdev, vub);
>      }
>  
> +    ret = vub->vhost_dev.vhost_ops->vhost_get_shmem_config(&vub->vhost_dev,
> +                                                           &nregions,
> +                                                           memory_sizes,
> +                                                           errp);
> +
> +    if (ret < 0) {
> +        do_vhost_user_cleanup(vdev, vub);
> +    }
> +
> +    for (i = 0; i < nregions; i++) {
> +        if (memory_sizes[i]) {
> +            if (memory_sizes[i] % qemu_real_host_page_size() != 0) {
> +                error_setg(errp, "Shared memory %d size must be a power of 2 "
> +                                 "no smaller than the page size", i);
> +                return;
> +            }
> +
> +            cache_ptr = mmap(NULL, memory_sizes[i], PROT_NONE,
> +                            MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +            if (cache_ptr == MAP_FAILED) {
> +                error_setg_errno(errp, errno, "Unable to mmap blank cache");
> +                return;
> +            }
> +
> +            virtio_new_shmem_region(vdev);
> +            memory_region_init_ram_ptr(vdev->shmem_list[i].mr,

I don't think this works because virtio_new_shmem_region() leaves .mr =
NULL? Why allocates the MemoryRegion and assigns it to shmem_list[i].mr?
Re: [PATCH v3 4/5] vhost-user-dev: Add cache BAR
Posted by Stefan Hajnoczi 2 days, 7 hours ago
On Tue, 17 Sept 2024 at 10:33, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>
> On Thu, Sep 12, 2024 at 04:53:34PM +0200, Albert Esteve wrote:
> > @@ -331,6 +333,37 @@ static void vub_device_realize(DeviceState *dev, Error **errp)
> >          do_vhost_user_cleanup(vdev, vub);
> >      }
> >
> > +    ret = vub->vhost_dev.vhost_ops->vhost_get_shmem_config(&vub->vhost_dev,
> > +                                                           &nregions,
> > +                                                           memory_sizes,
> > +                                                           errp);
> > +
> > +    if (ret < 0) {
> > +        do_vhost_user_cleanup(vdev, vub);
> > +    }
> > +
> > +    for (i = 0; i < nregions; i++) {
> > +        if (memory_sizes[i]) {
> > +            if (memory_sizes[i] % qemu_real_host_page_size() != 0) {
> > +                error_setg(errp, "Shared memory %d size must be a power of 2 "
> > +                                 "no smaller than the page size", i);
> > +                return;
> > +            }
> > +
> > +            cache_ptr = mmap(NULL, memory_sizes[i], PROT_NONE,
> > +                            MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> > +            if (cache_ptr == MAP_FAILED) {
> > +                error_setg_errno(errp, errno, "Unable to mmap blank cache");
> > +                return;
> > +            }
> > +
> > +            virtio_new_shmem_region(vdev);
> > +            memory_region_init_ram_ptr(vdev->shmem_list[i].mr,
>
> I don't think this works because virtio_new_shmem_region() leaves .mr =
> NULL? Why allocates the MemoryRegion and assigns it to shmem_list[i].mr?

"Why" -> "Who"
Re: [PATCH v3 4/5] vhost-user-dev: Add cache BAR
Posted by Stefan Hajnoczi 2 days, 8 hours ago
On Thu, Sep 12, 2024 at 04:53:34PM +0200, Albert Esteve wrote:
> Add a cache BAR in the vhost-user-device
> into which files can be directly mapped.
> 
> The number, shmid, and size of the VIRTIO Shared
> Memory subregions is retrieved through a get_shmem_config
> message sent by the vhost-user-base module
> on the realize step, after virtio_init().
> 
> By default, if VHOST_USER_PROTOCOL_F_SHMEM
> feature is not supported by the backend,
> there is no cache.
> 
> Signed-off-by: Albert Esteve <aesteve@redhat.com>
> ---

Not all devices derive from vhost-user-base.c so this does not offer
full coverage. I think that's okay since few devices currently use
VIRTIO Shared Memory Regions. A note about this in the commit
description would be useful though. Which vhost-user devices gain VIRTIO
Shared Memory Region support and what should you do if your device is
not included in this list?

>  hw/virtio/vhost-user-base.c       | 37 +++++++++++++++++++++++++++--
>  hw/virtio/vhost-user-device-pci.c | 39 ++++++++++++++++++++++++++++---
>  2 files changed, 71 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c
> index 2bc3423326..f2597d021a 100644
> --- a/hw/virtio/vhost-user-base.c
> +++ b/hw/virtio/vhost-user-base.c
> @@ -271,7 +271,9 @@ static void vub_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VHostUserBase *vub = VHOST_USER_BASE(dev);
> -    int ret;
> +    uint64_t memory_sizes[8];
> +    void *cache_ptr;
> +    int i, ret, nregions;
>  
>      if (!vub->chardev.chr) {
>          error_setg(errp, "vhost-user-base: missing chardev");
> @@ -314,7 +316,7 @@ static void vub_device_realize(DeviceState *dev, Error **errp)
>  
>      /* Allocate queues */
>      vub->vqs = g_ptr_array_sized_new(vub->num_vqs);
> -    for (int i = 0; i < vub->num_vqs; i++) {
> +    for (i = 0; i < vub->num_vqs; i++) {
>          g_ptr_array_add(vub->vqs,
>                          virtio_add_queue(vdev, vub->vq_size,
>                                           vub_handle_output));
> @@ -331,6 +333,37 @@ static void vub_device_realize(DeviceState *dev, Error **errp)
>          do_vhost_user_cleanup(vdev, vub);

Missing return statement.

>      }
>  
> +    ret = vub->vhost_dev.vhost_ops->vhost_get_shmem_config(&vub->vhost_dev,
> +                                                           &nregions,
> +                                                           memory_sizes,

Buffer overflow. vhost_get_shmem_config() copies out up to 256
memory_sizes[] elements. Please introduce a constant in the VIRTIO
header and use it instead of hardcoding uint64_t memory_sizes[8] above.

> +                                                           errp);
> +
> +    if (ret < 0) {
> +        do_vhost_user_cleanup(vdev, vub);

Missing return statement.

> +    }
> +
> +    for (i = 0; i < nregions; i++) {
> +        if (memory_sizes[i]) {
> +            if (memory_sizes[i] % qemu_real_host_page_size() != 0) {
> +                error_setg(errp, "Shared memory %d size must be a power of 2 "
> +                                 "no smaller than the page size", i);
> +                return;

Missing do_vhost_user_cleanup().

> +            }
> +
> +            cache_ptr = mmap(NULL, memory_sizes[i], PROT_NONE,
> +                            MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);



> +            if (cache_ptr == MAP_FAILED) {
> +                error_setg_errno(errp, errno, "Unable to mmap blank cache");
> +                return;

Missing do_vhost_user_cleanup().

> +            }
> +
> +            virtio_new_shmem_region(vdev);
> +            memory_region_init_ram_ptr(vdev->shmem_list[i].mr,
> +                                       OBJECT(vdev), "vub-shm-" + i,
> +                                       memory_sizes[i], cache_ptr);

I think memory_region_init_ram_ptr() is included in live migration, so
the contents of VIRTIO Shared Memory Regions will be transferred to the
destination QEMU and written to the equivalent memory region there. I'm
not sure this works:
1. If there are PROT_NONE memory ranges, then live migration will
   probably crash the source QEMU while trying to send this memory to
   the destination QEMU.
2. If the destination vhost-user device has not yet loaded its state and
   sent MAP messages setting up the VIRTIO Shared Memory Region, then
   receiving migrated data and writing it into this memory will fail.

QEMU has a migration blocker API so that devices can refuse live
migration. For the time being a migration blocker is probably needed
here. See migrate_add_blocker()/migrate_del_blocker().

> +        }
> +    }
> +
>      qemu_chr_fe_set_handlers(&vub->chardev, NULL, NULL, vub_event, NULL,
>                               dev, NULL, true);
>  }
> diff --git a/hw/virtio/vhost-user-device-pci.c b/hw/virtio/vhost-user-device-pci.c
> index efaf55d3dd..abf4e90c21 100644
> --- a/hw/virtio/vhost-user-device-pci.c
> +++ b/hw/virtio/vhost-user-device-pci.c
> @@ -8,14 +8,18 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qapi/error.h"
>  #include "hw/qdev-properties.h"
>  #include "hw/virtio/vhost-user-base.h"
>  #include "hw/virtio/virtio-pci.h"
>  
> +#define VIRTIO_DEVICE_PCI_CACHE_BAR 2

"Cache" is ambigous. Call it shmem_bar here and everywhere else?

> +
>  struct VHostUserDevicePCI {
>      VirtIOPCIProxy parent_obj;
>  
>      VHostUserBase vub;
> +    MemoryRegion cachebar;
>  };
>  
>  #define TYPE_VHOST_USER_DEVICE_PCI "vhost-user-device-pci-base"
> @@ -25,10 +29,39 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserDevicePCI, VHOST_USER_DEVICE_PCI)
>  static void vhost_user_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
>  {
>      VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(vpci_dev);
> -    DeviceState *vdev = DEVICE(&dev->vub);
> -
> +    DeviceState *dev_state = DEVICE(&dev->vub);
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev_state);
> +    MemoryRegion *mr;
> +    uint64_t offset = 0, cache_size = 0;
> +    int i;
> +    
>      vpci_dev->nvectors = 1;
> -    qdev_realize(vdev, BUS(&vpci_dev->bus), errp);
> +    qdev_realize(dev_state, BUS(&vpci_dev->bus), errp);
> +
> +    for (i = 0; i < vdev->n_shmem_regions; i++) {
> +        mr = vdev->shmem_list[i].mr;
> +        if (mr->size > UINT64_MAX - cache_size) {
> +            error_setg(errp, "Total shared memory required overflow");
> +            return;
> +        }
> +        cache_size = cache_size + mr->size;
> +    }
> +    if (cache_size) {
> +        memory_region_init(&dev->cachebar, OBJECT(vpci_dev),
> +                           "vhost-device-pci-cachebar", cache_size);
> +        for (i = 0; i < vdev->n_shmem_regions; i++) {
> +            mr = vdev->shmem_list[i].mr;
> +            memory_region_add_subregion(&dev->cachebar, offset, mr);
> +            virtio_pci_add_shm_cap(vpci_dev, VIRTIO_DEVICE_PCI_CACHE_BAR,
> +                                   offset, mr->size, i);
> +            offset = offset + mr->size;
> +        }
> +        pci_register_bar(&vpci_dev->pci_dev, VIRTIO_DEVICE_PCI_CACHE_BAR,
> +                        PCI_BASE_ADDRESS_SPACE_MEMORY |
> +                        PCI_BASE_ADDRESS_MEM_PREFETCH |
> +                        PCI_BASE_ADDRESS_MEM_TYPE_64,
> +                        &dev->cachebar);
> +    }
>  }
>  
>  static void vhost_user_device_pci_class_init(ObjectClass *klass, void *data)
> -- 
> 2.45.2
>