Some vdpa devices benefit from the in order feature. Add support to SVQ
so QEMU can migrate these.
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
hw/virtio/vhost-shadow-virtqueue.c | 137 +++++++++++++++++++++++++++--
hw/virtio/vhost-shadow-virtqueue.h | 36 ++++++--
2 files changed, 160 insertions(+), 13 deletions(-)
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 2d8fc82cc06f..60212fcd7bf3 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -12,11 +12,14 @@
#include "qemu/error-report.h"
#include "qapi/error.h"
+#include "qemu/iov.h"
#include "qemu/main-loop.h"
#include "qemu/log.h"
#include "qemu/memalign.h"
#include "linux-headers/linux/vhost.h"
+#define VIRTIO_RING_NOT_IN_BATCH UINT16_MAX
+
/**
* Validate the transport device features that both guests can use with the SVQ
* and SVQs can use with the device.
@@ -150,7 +153,33 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
static uint16_t vhost_svq_next_desc(const VhostShadowVirtqueue *svq,
uint16_t id)
{
- return svq->desc_state[id].next;
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ return (id == svq->vring.num) ? 0 : ++id;
+ } else {
+ return svq->desc_state[id].next;
+ }
+}
+
+/**
+ * Updates the SVQ free_head member after adding them to the SVQ avail ring.
+ * The new free_head is the next descriptor that SVQ will make available by
+ * forwarding a new guest descriptor.
+ *
+ * @svq Shadow Virtqueue
+ * @num Number of descriptors added
+ * @id ID of the last descriptor added to the SVQ avail ring.
+ */
+static void vhost_svq_update_free_head(VhostShadowVirtqueue *svq,
+ size_t num, uint16_t id)
+{
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ svq->free_head += num;
+ if (svq->free_head >= svq->vring.num) {
+ svq->free_head -= svq->vring.num;
+ }
+ } else {
+ svq->free_head = vhost_svq_next_desc(svq, id);
+ }
}
/**
@@ -202,7 +231,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
i = next;
}
- svq->free_head = vhost_svq_next_desc(svq, last);
+ vhost_svq_update_free_head(svq, num, last);
return true;
}
@@ -306,6 +335,9 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
svq->num_free -= ndescs;
svq->desc_state[qemu_head].elem = elem;
svq->desc_state[qemu_head].ndescs = ndescs;
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ svq->desc_state[qemu_head].in_bytes = iov_size(in_sg, in_num);
+ }
vhost_svq_kick(svq);
return 0;
}
@@ -401,6 +433,12 @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n)
static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
{
uint16_t *used_idx = &svq->vring.used->idx;
+
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER) &&
+ svq->batch_last.id != VIRTIO_RING_NOT_IN_BATCH) {
+ return true;
+ }
+
if (svq->last_used_idx != svq->shadow_used_idx) {
return true;
}
@@ -463,6 +501,47 @@ static uint16_t vhost_svq_get_last_used_split(VhostShadowVirtqueue *svq,
return le32_to_cpu(used->ring[last_used].id);
}
+/*
+ * Gets the next buffer id and moves forward the used idx, so the next time
+ * SVQ calls this function will get the next one. IN_ORDER version
+ *
+ * @svq: Shadow VirtQueue
+ * @len: Consumed length by the device.
+ *
+ * Return the next descriptor consumed by the device.
+ */
+static int32_t vhost_svq_get_last_used_split_in_order(
+ VhostShadowVirtqueue *svq,
+ uint32_t *len)
+{
+ unsigned num = svq->vring.num;
+ const vring_used_t *used = svq->vring.used;
+ uint16_t last_used = svq->last_used & (num - 1);
+ uint16_t last_used_idx = svq->last_used_idx & (num - 1);
+
+ if (svq->batch_last.id == VIRTIO_RING_NOT_IN_BATCH) {
+ svq->batch_last.id = le32_to_cpu(used->ring[last_used_idx].id);
+ svq->batch_last.len = le32_to_cpu(used->ring[last_used_idx].len);
+ }
+
+ if (unlikely(last_used >= num)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
+ svq->vdev->name, last_used);
+ return -1;
+ }
+
+ if (svq->batch_last.id == last_used) {
+ svq->batch_last.id = VIRTIO_RING_NOT_IN_BATCH;
+ *len = svq->batch_last.len;
+ } else {
+ *len = svq->desc_state[last_used].in_bytes;
+ }
+
+ svq->last_used += svq->desc_state[last_used].ndescs;
+ svq->last_used_idx++;
+ return last_used;
+}
+
static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
uint16_t num, uint16_t i)
{
@@ -474,8 +553,8 @@ static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
}
G_GNUC_WARN_UNUSED_RESULT
-static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq,
- uint16_t id)
+static VirtQueueElement *vhost_svq_detach_buf_split(VhostShadowVirtqueue *svq,
+ uint16_t id)
{
uint16_t num = svq->desc_state[id].ndescs;
uint16_t last_used_chain = vhost_svq_last_desc_of_chain(svq, num, id);
@@ -486,6 +565,33 @@ static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq,
return g_steal_pointer(&svq->desc_state[id].elem);
}
+G_GNUC_WARN_UNUSED_RESULT
+static VirtQueueElement *vhost_svq_detach_buf_split_in_order(
+ VhostShadowVirtqueue *svq,
+ uint16_t id)
+{
+ return g_steal_pointer(&svq->desc_state[id].elem);
+}
+
+/*
+ * Return the descriptor id (and the chain of ids) to the free list
+ *
+ * @svq: Shadow Virtqueue
+ * @id: Id of the buffer to return.
+ *
+ * Return the element associated to the buffer if any.
+ */
+G_GNUC_WARN_UNUSED_RESULT
+static VirtQueueElement *vhost_svq_detach_buf(VhostShadowVirtqueue *svq,
+ uint16_t id)
+{
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ return vhost_svq_detach_buf_split_in_order(svq, id);
+ } else {
+ return vhost_svq_detach_buf_split(svq, id);
+ }
+}
+
G_GNUC_WARN_UNUSED_RESULT
static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
uint32_t *len)
@@ -498,7 +604,18 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
/* Only get used array entries after they have been exposed by dev */
smp_rmb();
- last_used = vhost_svq_get_last_used_split(svq, len);
+
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ int32_t r;
+ r = vhost_svq_get_last_used_split_in_order(svq, len);
+ if (r < 0) {
+ return NULL;
+ }
+
+ last_used = r;
+ } else {
+ last_used = vhost_svq_get_last_used_split(svq, len);
+ }
if (unlikely(last_used >= svq->vring.num)) {
qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
@@ -726,6 +843,8 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
svq->next_guest_avail_elem = NULL;
svq->shadow_avail_idx = 0;
svq->shadow_used_idx = 0;
+ memset(&svq->batch_last, 0, sizeof(svq->batch_last));
+ svq->last_used = 0;
svq->last_used_idx = 0;
svq->vdev = vdev;
svq->vq = vq;
@@ -742,8 +861,12 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
-1, 0);
svq->desc_state = g_new0(SVQDescState, svq->vring.num);
- for (unsigned i = 0; i < svq->vring.num - 1; i++) {
- svq->desc_state[i].next = i + 1;
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_IN_ORDER)) {
+ svq->batch_last.id = VIRTIO_RING_NOT_IN_BATCH;
+ } else {
+ for (unsigned i = 0; i < svq->vring.num - 1; i++) {
+ svq->desc_state[i].next = i + 1;
+ }
}
}
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
index f52c33e65046..ec16a1e83858 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -24,11 +24,19 @@ typedef struct SVQDescState {
*/
unsigned int ndescs;
- /*
- * Backup next field for each descriptor so we can recover securely, not
- * needing to trust the device access.
- */
- uint16_t next;
+ union {
+ /*
+ * Total length of the available buffer that is writable by the device.
+ * Only used in packed vq.
+ */
+ uint32_t in_bytes;
+
+ /*
+ * Backup next field for each descriptor so we can recover securely, not
+ * needing to trust the device access. Only used in split vq.
+ */
+ uint16_t next;
+ };
} SVQDescState;
typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
@@ -99,9 +107,25 @@ typedef struct VhostShadowVirtqueue {
/* Next head to expose to the device */
uint16_t shadow_avail_idx;
- /* Next free descriptor */
+ /*
+ * Next free descriptor.
+ *
+ * Without IN_ORDER free_head is used as a linked list head, and
+ * desc_next[id] is the next element.
+ * With IN_ORDER free_head is the next available buffer index.
+ */
uint16_t free_head;
+ /*
+ * Last used element of the processing batch of used descriptors if
+ * IN_ORDER.
+ * If SVQ is not processing a batch of descriptors id is set to UINT_MAX.
+ */
+ vring_used_elem_t batch_last;
+
+ /* Last used id if IN_ORDER and split vq */
+ uint16_t last_used;
+
/* Last seen used idx */
uint16_t shadow_used_idx;
--
2.53.0
On Thu, Mar 5, 2026 at 1:36 AM Eugenio Pérez <eperezma@redhat.com> wrote: > > Some vdpa devices benefit from the in order feature. Add support to SVQ > so QEMU can migrate these. > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com> > --- Acked-by: Jason Wang <jasowang@redhat.com> Thanks
© 2016 - 2026 Red Hat, Inc.