[PATCH 6/7] vhost: add in_order feature to shadow virtqueue

Eugenio Pérez posted 7 patches 1 month, 1 week ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Stefano Garzarella <sgarzare@redhat.com>, "Eugenio Pérez" <eperezma@redhat.com>
[PATCH 6/7] vhost: add in_order feature to shadow virtqueue
Posted by Eugenio Pérez 1 month, 1 week ago
Some vdpa devices benefit from the in order feature.  Add support to SVQ
so QEMU can migrate these.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 hw/virtio/vhost-shadow-virtqueue.c | 137 +++++++++++++++++++++++++++--
 hw/virtio/vhost-shadow-virtqueue.h |  36 ++++++--
 2 files changed, 160 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 2d8fc82cc06f..60212fcd7bf3 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -12,11 +12,14 @@
 
 #include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "qemu/iov.h"
 #include "qemu/main-loop.h"
 #include "qemu/log.h"
 #include "qemu/memalign.h"
 #include "linux-headers/linux/vhost.h"
 
+#define VIRTIO_RING_NOT_IN_BATCH UINT16_MAX
+
 /**
  * Validate the transport device features that both guests can use with the SVQ
  * and SVQs can use with the device.
@@ -150,7 +153,33 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
 static uint16_t vhost_svq_next_desc(const VhostShadowVirtqueue *svq,
                                     uint16_t id)
 {
-    return svq->desc_state[id].next;
+    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+        return (id == svq->vring.num) ? 0 : ++id;
+    } else {
+        return svq->desc_state[id].next;
+    }
+}
+
+/**
+ * Updates the SVQ free_head member after adding them to the SVQ avail ring.
+ * The new free_head is the next descriptor that SVQ will make available by
+ * forwarding a new guest descriptor.
+ *
+ * @svq Shadow Virtqueue
+ * @num Number of descriptors added
+ * @id ID of the last descriptor added to the SVQ avail ring.
+ */
+static void vhost_svq_update_free_head(VhostShadowVirtqueue *svq,
+                                       size_t num, uint16_t id)
+{
+    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+        svq->free_head += num;
+        if (svq->free_head >= svq->vring.num) {
+            svq->free_head -= svq->vring.num;
+        }
+    } else {
+        svq->free_head = vhost_svq_next_desc(svq, id);
+    }
 }
 
 /**
@@ -202,7 +231,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
         i = next;
     }
 
-    svq->free_head = vhost_svq_next_desc(svq, last);
+    vhost_svq_update_free_head(svq, num, last);
     return true;
 }
 
@@ -306,6 +335,9 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
     svq->num_free -= ndescs;
     svq->desc_state[qemu_head].elem = elem;
     svq->desc_state[qemu_head].ndescs = ndescs;
+    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+        svq->desc_state[qemu_head].in_bytes = iov_size(in_sg, in_num);
+    }
     vhost_svq_kick(svq);
     return 0;
 }
@@ -401,6 +433,12 @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n)
 static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
 {
     uint16_t *used_idx = &svq->vring.used->idx;
+
+    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER) &&
+        svq->batch_last.id != VIRTIO_RING_NOT_IN_BATCH) {
+        return true;
+    }
+
     if (svq->last_used_idx != svq->shadow_used_idx) {
         return true;
     }
@@ -463,6 +501,47 @@ static uint16_t vhost_svq_get_last_used_split(VhostShadowVirtqueue *svq,
     return le32_to_cpu(used->ring[last_used].id);
 }
 
+/*
+ * Gets the next buffer id and moves forward the used idx, so the next time
+ * SVQ calls this function will get the next one.  IN_ORDER version
+ *
+ * @svq: Shadow VirtQueue
+ * @len: Consumed length by the device.
+ *
+ * Return the next descriptor consumed by the device.
+ */
+static int32_t vhost_svq_get_last_used_split_in_order(
+        VhostShadowVirtqueue *svq,
+        uint32_t *len)
+{
+    unsigned num = svq->vring.num;
+    const vring_used_t *used = svq->vring.used;
+    uint16_t last_used = svq->last_used & (num - 1);
+    uint16_t last_used_idx = svq->last_used_idx & (num - 1);
+
+    if (svq->batch_last.id == VIRTIO_RING_NOT_IN_BATCH) {
+        svq->batch_last.id = le32_to_cpu(used->ring[last_used_idx].id);
+        svq->batch_last.len = le32_to_cpu(used->ring[last_used_idx].len);
+    }
+
+    if (unlikely(last_used >= num)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
+                      svq->vdev->name, last_used);
+        return -1;
+    }
+
+    if (svq->batch_last.id == last_used) {
+        svq->batch_last.id = VIRTIO_RING_NOT_IN_BATCH;
+        *len = svq->batch_last.len;
+    } else {
+        *len = svq->desc_state[last_used].in_bytes;
+    }
+
+    svq->last_used += svq->desc_state[last_used].ndescs;
+    svq->last_used_idx++;
+    return last_used;
+}
+
 static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
                                              uint16_t num, uint16_t i)
 {
@@ -474,8 +553,8 @@ static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
 }
 
 G_GNUC_WARN_UNUSED_RESULT
-static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq,
-                                              uint16_t id)
+static VirtQueueElement *vhost_svq_detach_buf_split(VhostShadowVirtqueue *svq,
+                                                    uint16_t id)
 {
     uint16_t num = svq->desc_state[id].ndescs;
     uint16_t last_used_chain = vhost_svq_last_desc_of_chain(svq, num, id);
@@ -486,6 +565,33 @@ static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq,
     return g_steal_pointer(&svq->desc_state[id].elem);
 }
 
+G_GNUC_WARN_UNUSED_RESULT
+static VirtQueueElement *vhost_svq_detach_buf_split_in_order(
+        VhostShadowVirtqueue *svq,
+        uint16_t id)
+{
+    return g_steal_pointer(&svq->desc_state[id].elem);
+}
+
+/*
+ * Return the descriptor id (and the chain of ids) to the free list
+ *
+ * @svq: Shadow Virtqueue
+ * @id: Id of the buffer to return.
+ *
+ * Return the element associated to the buffer if any.
+ */
+G_GNUC_WARN_UNUSED_RESULT
+static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq,
+                                              uint16_t id)
+{
+    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+        return vhost_svq_detach_buf_split_in_order(svq, id);
+    } else {
+        return vhost_svq_detach_buf_split(svq, id);
+    }
+}
+
 G_GNUC_WARN_UNUSED_RESULT
 static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
                                            uint32_t *len)
@@ -498,7 +604,18 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 
     /* Only get used array entries after they have been exposed by dev */
     smp_rmb();
-    last_used = vhost_svq_get_last_used_split(svq, len);
+
+    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+        int32_t r;
+        r = vhost_svq_get_last_used_split_in_order(svq, len);
+        if (r < 0) {
+            return NULL;
+        }
+
+        last_used = r;
+    } else {
+        last_used = vhost_svq_get_last_used_split(svq, len);
+    }
 
     if (unlikely(last_used >= svq->vring.num)) {
         qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
@@ -726,6 +843,8 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
     svq->next_guest_avail_elem = NULL;
     svq->shadow_avail_idx = 0;
     svq->shadow_used_idx = 0;
+    memset(&svq->batch_last, 0, sizeof(svq->batch_last));
+    svq->last_used = 0;
     svq->last_used_idx = 0;
     svq->vdev = vdev;
     svq->vq = vq;
@@ -742,8 +861,12 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
                            PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
                            -1, 0);
     svq->desc_state = g_new0(SVQDescState, svq->vring.num);
-    for (unsigned i = 0; i < svq->vring.num - 1; i++) {
-        svq->desc_state[i].next = i + 1;
+    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+        svq->batch_last.id = VIRTIO_RING_NOT_IN_BATCH;
+    } else {
+        for (unsigned i = 0; i < svq->vring.num - 1; i++) {
+            svq->desc_state[i].next = i + 1;
+        }
     }
 }
 
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
index f52c33e65046..ec16a1e83858 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -24,11 +24,19 @@ typedef struct SVQDescState {
      */
     unsigned int ndescs;
 
-    /*
-     * Backup next field for each descriptor so we can recover securely, not
-     * needing to trust the device access.
-     */
-    uint16_t next;
+    union {
+        /*
+         * Total length of the available buffer that is writable by the device.
+         * Only used in packed vq.
+         */
+        uint32_t in_bytes;
+
+        /*
+         * Backup next field for each descriptor so we can recover securely, not
+         * needing to trust the device access.  Only used in split vq.
+         */
+        uint16_t next;
+    };
 } SVQDescState;
 
 typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
@@ -99,9 +107,25 @@ typedef struct VhostShadowVirtqueue {
     /* Next head to expose to the device */
     uint16_t shadow_avail_idx;
 
-    /* Next free descriptor */
+    /*
+     * Next free descriptor.
+     *
+     * Without IN_ORDER free_head is used as a linked list head, and
+     * desc_next[id] is the next element.
+     * With IN_ORDER free_head is the next available buffer index.
+     */
     uint16_t free_head;
 
+    /*
+     * Last used element of the processing batch of used descriptors if
+     * IN_ORDER.
+     * If SVQ is not processing a batch of descriptors id is set to UINT_MAX.
+     */
+    vring_used_elem_t batch_last;
+
+    /* Last used id if IN_ORDER and split vq */
+    uint16_t last_used;
+
     /* Last seen used idx */
     uint16_t shadow_used_idx;
 
-- 
2.53.0


Re: [PATCH 6/7] vhost: add in_order feature to shadow virtqueue
Posted by Jason Wang 1 month ago
On Thu, Mar 5, 2026 at 1:36 AM Eugenio Pérez <eperezma@redhat.com> wrote:
>
> Some vdpa devices benefit from the in order feature.  Add support to SVQ
> so QEMU can migrate these.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---

Acked-by: Jason Wang <jasowang@redhat.com>

Thanks