This is the first patch in a series to add support for packed
virtqueues in vhost_shadow_virtqueue. This patch implements the
insertion of available buffers in the descriptor area. It takes
into account descriptor chains, but does not consider indirect
descriptors.
Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
---
Changes v2 -> v3:
* vhost-shadow-virtqueue.c
- Move parts common to "vhost_svq_add_split" and
"vhost_svq_add_packed" to "vhost_svq_add".
(vhost_svq_add_packed):
- Refactor to minimize duplicate code between
this and "vhost_svq_add_split"
- Fix code style issues.
(vhost_svq_add_split):
- Merge with "vhost_svq_vring_write_descs()"
- Refactor to minimize duplicate code between
this and "vhost_svq_add_packed"
(vhost_svq_add):
- Refactor to minimize duplicate code between
split and packed version of "vhost_svq_add"
hw/virtio/vhost-shadow-virtqueue.c | 174 +++++++++++++++++++----------
1 file changed, 115 insertions(+), 59 deletions(-)
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index fc5f408f77..4c308ee53d 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -124,97 +124,132 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
}
/**
- * Write descriptors to SVQ vring
+ * Write descriptors to SVQ split vring
*
* @svq: The shadow virtqueue
- * @sg: Cache for hwaddr
- * @iovec: The iovec from the guest
- * @num: iovec length
- * @more_descs: True if more descriptors come in the chain
- * @write: True if they are writeable descriptors
- *
- * Return true if success, false otherwise and print error.
+ * @out_sg: The iovec to the guest
+ * @out_num: Outgoing iovec length
+ * @in_sg: The iovec from the guest
+ * @in_num: Incoming iovec length
+ * @sgs: Cache for hwaddr
+ * @head: Saves current free_head
*/
-static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
- const struct iovec *iovec, size_t num,
- bool more_descs, bool write)
+static void vhost_svq_add_split(VhostShadowVirtqueue *svq,
+ const struct iovec *out_sg, size_t out_num,
+ const struct iovec *in_sg, size_t in_num,
+ hwaddr *sgs, unsigned *head)
{
+ unsigned avail_idx, n;
uint16_t i = svq->free_head, last = svq->free_head;
- unsigned n;
- uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
+ vring_avail_t *avail = svq->vring.avail;
vring_desc_t *descs = svq->vring.desc;
- bool ok;
-
- if (num == 0) {
- return true;
- }
+ size_t num = in_num + out_num;
- ok = vhost_svq_translate_addr(svq, sg, iovec, num);
- if (unlikely(!ok)) {
- return false;
- }
+ *head = svq->free_head;
for (n = 0; n < num; n++) {
- if (more_descs || (n + 1 < num)) {
- descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
+ descs[i].flags = cpu_to_le16(n < out_num ? 0 : VRING_DESC_F_WRITE);
+ if (n + 1 < num) {
+ descs[i].flags |= cpu_to_le16(VRING_DESC_F_NEXT);
descs[i].next = cpu_to_le16(svq->desc_next[i]);
+ }
+
+ descs[i].addr = cpu_to_le64(sgs[n]);
+ if (n < out_num) {
+ descs[i].len = cpu_to_le32(out_sg[n].iov_len);
} else {
- descs[i].flags = flags;
+ descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
}
- descs[i].addr = cpu_to_le64(sg[n]);
- descs[i].len = cpu_to_le32(iovec[n].iov_len);
last = i;
i = cpu_to_le16(svq->desc_next[i]);
}
svq->free_head = le16_to_cpu(svq->desc_next[last]);
- return true;
+
+ /*
+ * Put the entry in the available array (but don't update avail->idx until
+ * they do sync).
+ */
+ avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
+ avail->ring[avail_idx] = cpu_to_le16(*head);
+ svq->shadow_avail_idx++;
+
+ /* Update the avail index after write the descriptor */
+ smp_wmb();
+ avail->idx = cpu_to_le16(svq->shadow_avail_idx);
}
-static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
+/**
+ * Write descriptors to SVQ packed vring
+ *
+ * @svq: The shadow virtqueue
+ * @out_sg: The iovec to the guest
+ * @out_num: Outgoing iovec length
+ * @in_sg: The iovec from the guest
+ * @in_num: Incoming iovec length
+ * @sgs: Cache for hwaddr
+ * @head: Saves current free_head
+ */
+static void vhost_svq_add_packed(VhostShadowVirtqueue *svq,
const struct iovec *out_sg, size_t out_num,
const struct iovec *in_sg, size_t in_num,
- unsigned *head)
+ hwaddr *sgs, unsigned *head)
{
- unsigned avail_idx;
- vring_avail_t *avail = svq->vring.avail;
- bool ok;
- g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
+ uint16_t id, curr, i, head_flags = 0;
+ size_t num = out_num + in_num;
+ unsigned n;
- *head = svq->free_head;
+ struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
- /* We need some descriptors here */
- if (unlikely(!out_num && !in_num)) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "Guest provided element with no descriptors");
- return false;
- }
+ *head = svq->vring_packed.next_avail_idx;
+ i = *head;
+ id = svq->free_head;
+ curr = id;
- ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
- false);
- if (unlikely(!ok)) {
- return false;
+ /* Write descriptors to SVQ packed vring */
+ for (n = 0; n < num; n++) {
+ uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
+ (n < out_num ? 0 : VRING_DESC_F_WRITE) |
+ (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
+ if (i == *head) {
+ head_flags = flags;
+ } else {
+ descs[i].flags = flags;
+ }
+
+ descs[i].addr = cpu_to_le64(sgs[n]);
+ descs[i].id = id;
+ if (n < out_num) {
+ descs[i].len = cpu_to_le32(out_sg[n].iov_len);
+ } else {
+ descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
+ }
+
+ curr = cpu_to_le16(svq->desc_next[curr]);
+
+ if (++i >= svq->vring_packed.vring.num) {
+ i = 0;
+ svq->vring_packed.avail_used_flags ^=
+ 1 << VRING_PACKED_DESC_F_AVAIL |
+ 1 << VRING_PACKED_DESC_F_USED;
+ }
}
- ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
- if (unlikely(!ok)) {
- return false;
+ if (i <= *head) {
+ svq->vring_packed.avail_wrap_counter ^= 1;
}
+ svq->vring_packed.next_avail_idx = i;
+ svq->free_head = curr;
+
/*
- * Put the entry in the available array (but don't update avail->idx until
- * they do sync).
+ * A driver MUST NOT make the first descriptor in the list
+ * available before all subsequent descriptors comprising
+ * the list are made available.
*/
- avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
- avail->ring[avail_idx] = cpu_to_le16(*head);
- svq->shadow_avail_idx++;
-
- /* Update the avail index after write the descriptor */
smp_wmb();
- avail->idx = cpu_to_le16(svq->shadow_avail_idx);
-
- return true;
+ svq->vring_packed.vring.desc[*head].flags = head_flags;
}
static void vhost_svq_kick(VhostShadowVirtqueue *svq)
@@ -254,15 +289,36 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
unsigned ndescs = in_num + out_num;
bool ok;
+ /* We need some descriptors here */
+ if (unlikely(!ndescs)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Guest provided element with no descriptors");
+ return -EINVAL;
+ }
+
if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
return -ENOSPC;
}
- ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
+ g_autofree hwaddr *sgs = g_new(hwaddr, ndescs);
+ ok = vhost_svq_translate_addr(svq, sgs, out_sg, out_num);
if (unlikely(!ok)) {
return -EINVAL;
}
+ ok = vhost_svq_translate_addr(svq, sgs + out_num, in_sg, in_num);
+ if (unlikely(!ok)) {
+ return -EINVAL;
+ }
+
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) {
+ vhost_svq_add_packed(svq, out_sg, out_num, in_sg,
+ in_num, sgs, &qemu_head);
+ } else {
+ vhost_svq_add_split(svq, out_sg, out_num, in_sg,
+ in_num, sgs, &qemu_head);
+ }
+
svq->num_free -= ndescs;
svq->desc_state[qemu_head].elem = elem;
svq->desc_state[qemu_head].ndescs = ndescs;
--
2.45.2
On Fri, Aug 2, 2024 at 1:22 PM Sahil Siddiq <icegambit91@gmail.com> wrote:
>
> This is the first patch in a series to add support for packed
> virtqueues in vhost_shadow_virtqueue. This patch implements the
> insertion of available buffers in the descriptor area. It takes
> into account descriptor chains, but does not consider indirect
> descriptors.
>
> Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
> ---
> Changes v2 -> v3:
> * vhost-shadow-virtqueue.c
> - Move parts common to "vhost_svq_add_split" and
> "vhost_svq_add_packed" to "vhost_svq_add".
> (vhost_svq_add_packed):
> - Refactor to minimize duplicate code between
> this and "vhost_svq_add_split"
> - Fix code style issues.
> (vhost_svq_add_split):
> - Merge with "vhost_svq_vring_write_descs()"
> - Refactor to minimize duplicate code between
> this and "vhost_svq_add_packed"
> (vhost_svq_add):
> - Refactor to minimize duplicate code between
> split and packed version of "vhost_svq_add"
>
> hw/virtio/vhost-shadow-virtqueue.c | 174 +++++++++++++++++++----------
> 1 file changed, 115 insertions(+), 59 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index fc5f408f77..4c308ee53d 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -124,97 +124,132 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
> }
>
> /**
> - * Write descriptors to SVQ vring
> + * Write descriptors to SVQ split vring
> *
> * @svq: The shadow virtqueue
> - * @sg: Cache for hwaddr
> - * @iovec: The iovec from the guest
> - * @num: iovec length
> - * @more_descs: True if more descriptors come in the chain
> - * @write: True if they are writeable descriptors
> - *
> - * Return true if success, false otherwise and print error.
> + * @out_sg: The iovec to the guest
> + * @out_num: Outgoing iovec length
> + * @in_sg: The iovec from the guest
> + * @in_num: Incoming iovec length
> + * @sgs: Cache for hwaddr
> + * @head: Saves current free_head
> */
> -static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
> - const struct iovec *iovec, size_t num,
> - bool more_descs, bool write)
> +static void vhost_svq_add_split(VhostShadowVirtqueue *svq,
> + const struct iovec *out_sg, size_t out_num,
> + const struct iovec *in_sg, size_t in_num,
> + hwaddr *sgs, unsigned *head)
> {
> + unsigned avail_idx, n;
> uint16_t i = svq->free_head, last = svq->free_head;
> - unsigned n;
> - uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
> + vring_avail_t *avail = svq->vring.avail;
> vring_desc_t *descs = svq->vring.desc;
> - bool ok;
> -
> - if (num == 0) {
> - return true;
> - }
> + size_t num = in_num + out_num;
>
> - ok = vhost_svq_translate_addr(svq, sg, iovec, num);
> - if (unlikely(!ok)) {
> - return false;
> - }
> + *head = svq->free_head;
>
> for (n = 0; n < num; n++) {
> - if (more_descs || (n + 1 < num)) {
> - descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
> + descs[i].flags = cpu_to_le16(n < out_num ? 0 : VRING_DESC_F_WRITE);
> + if (n + 1 < num) {
> + descs[i].flags |= cpu_to_le16(VRING_DESC_F_NEXT);
> descs[i].next = cpu_to_le16(svq->desc_next[i]);
> + }
> +
> + descs[i].addr = cpu_to_le64(sgs[n]);
> + if (n < out_num) {
> + descs[i].len = cpu_to_le32(out_sg[n].iov_len);
> } else {
> - descs[i].flags = flags;
> + descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
> }
> - descs[i].addr = cpu_to_le64(sg[n]);
> - descs[i].len = cpu_to_le32(iovec[n].iov_len);
>
> last = i;
> i = cpu_to_le16(svq->desc_next[i]);
> }
>
> svq->free_head = le16_to_cpu(svq->desc_next[last]);
> - return true;
> +
> + /*
> + * Put the entry in the available array (but don't update avail->idx until
> + * they do sync).
> + */
> + avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
> + avail->ring[avail_idx] = cpu_to_le16(*head);
> + svq->shadow_avail_idx++;
> +
> + /* Update the avail index after write the descriptor */
> + smp_wmb();
> + avail->idx = cpu_to_le16(svq->shadow_avail_idx);
> }
>
I think this code is already in a very good shape. But actual testing
is needed before acks.
As a suggestion, we can split it into:
1) Refactor in vhost_svq_translate_addr to support out_num+in_num. No
functional change.
2) Refactor vhost_svq_add_split to extract common code into
vhost_svq_add. No functional change.
3) Adding packed code.
How to split or merge the patches is not a well-defined thing, so I'm
happy with this patch if you think the reactor is not worth it.
> -static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
> +/**
> + * Write descriptors to SVQ packed vring
> + *
> + * @svq: The shadow virtqueue
> + * @out_sg: The iovec to the guest
> + * @out_num: Outgoing iovec length
> + * @in_sg: The iovec from the guest
> + * @in_num: Incoming iovec length
> + * @sgs: Cache for hwaddr
> + * @head: Saves current free_head
> + */
> +static void vhost_svq_add_packed(VhostShadowVirtqueue *svq,
> const struct iovec *out_sg, size_t out_num,
> const struct iovec *in_sg, size_t in_num,
> - unsigned *head)
> + hwaddr *sgs, unsigned *head)
> {
> - unsigned avail_idx;
> - vring_avail_t *avail = svq->vring.avail;
> - bool ok;
> - g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
> + uint16_t id, curr, i, head_flags = 0;
> + size_t num = out_num + in_num;
> + unsigned n;
>
> - *head = svq->free_head;
> + struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
>
> - /* We need some descriptors here */
> - if (unlikely(!out_num && !in_num)) {
> - qemu_log_mask(LOG_GUEST_ERROR,
> - "Guest provided element with no descriptors");
> - return false;
> - }
> + *head = svq->vring_packed.next_avail_idx;
> + i = *head;
> + id = svq->free_head;
> + curr = id;
>
> - ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
> - false);
> - if (unlikely(!ok)) {
> - return false;
> + /* Write descriptors to SVQ packed vring */
> + for (n = 0; n < num; n++) {
> + uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
> + (n < out_num ? 0 : VRING_DESC_F_WRITE) |
> + (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
> + if (i == *head) {
> + head_flags = flags;
> + } else {
> + descs[i].flags = flags;
> + }
> +
> + descs[i].addr = cpu_to_le64(sgs[n]);
> + descs[i].id = id;
> + if (n < out_num) {
> + descs[i].len = cpu_to_le32(out_sg[n].iov_len);
> + } else {
> + descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
> + }
> +
> + curr = cpu_to_le16(svq->desc_next[curr]);
> +
> + if (++i >= svq->vring_packed.vring.num) {
> + i = 0;
> + svq->vring_packed.avail_used_flags ^=
> + 1 << VRING_PACKED_DESC_F_AVAIL |
> + 1 << VRING_PACKED_DESC_F_USED;
> + }
> }
>
> - ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
> - if (unlikely(!ok)) {
> - return false;
> + if (i <= *head) {
> + svq->vring_packed.avail_wrap_counter ^= 1;
> }
>
> + svq->vring_packed.next_avail_idx = i;
> + svq->free_head = curr;
> +
> /*
> - * Put the entry in the available array (but don't update avail->idx until
> - * they do sync).
> + * A driver MUST NOT make the first descriptor in the list
> + * available before all subsequent descriptors comprising
> + * the list are made available.
> */
> - avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
> - avail->ring[avail_idx] = cpu_to_le16(*head);
> - svq->shadow_avail_idx++;
> -
> - /* Update the avail index after write the descriptor */
> smp_wmb();
> - avail->idx = cpu_to_le16(svq->shadow_avail_idx);
> -
> - return true;
> + svq->vring_packed.vring.desc[*head].flags = head_flags;
> }
>
> static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> @@ -254,15 +289,36 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
> unsigned ndescs = in_num + out_num;
> bool ok;
>
> + /* We need some descriptors here */
> + if (unlikely(!ndescs)) {
> + qemu_log_mask(LOG_GUEST_ERROR,
> + "Guest provided element with no descriptors");
> + return -EINVAL;
> + }
> +
> if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
> return -ENOSPC;
> }
>
> - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
> + g_autofree hwaddr *sgs = g_new(hwaddr, ndescs);
> + ok = vhost_svq_translate_addr(svq, sgs, out_sg, out_num);
> if (unlikely(!ok)) {
> return -EINVAL;
> }
>
> + ok = vhost_svq_translate_addr(svq, sgs + out_num, in_sg, in_num);
> + if (unlikely(!ok)) {
> + return -EINVAL;
> + }
> +
> + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_F_RING_PACKED)) {
> + vhost_svq_add_packed(svq, out_sg, out_num, in_sg,
> + in_num, sgs, &qemu_head);
> + } else {
> + vhost_svq_add_split(svq, out_sg, out_num, in_sg,
> + in_num, sgs, &qemu_head);
> + }
> +
> svq->num_free -= ndescs;
> svq->desc_state[qemu_head].elem = elem;
> svq->desc_state[qemu_head].ndescs = ndescs;
> --
> 2.45.2
>
© 2016 - 2026 Red Hat, Inc.