Add packed format to shadow virtqueue

[RFC v5 3/7] vhost: Forward descriptors to device via packed SVQ

Posted by Sahil Siddiq 1 week ago

Implement the insertion of available buffers in the descriptor area of
packed shadow virtqueues. It takes into account descriptor chains, but
does not consider indirect descriptors.

Enable the packed SVQ to forward the descriptors to the device.

Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
---
Changes from v4 -> v5:
- This was commit #2 in v4. This has been reordered to commit #3
  based on review comments.
- vhost-shadow-virtqueue.c:
  (vhost_svq_valid_features): Move addition of enums to commit #6
  based on review comments.
  (vhost_svq_add_packed): Set head_idx to buffer id instead of vring's
  index.
  (vhost_svq_kick): Split into vhost_svq_kick_split and
  vhost_svq_kick_packed.
  (vhost_svq_add): Use new vhost_svq_kick_* functions.

 hw/virtio/vhost-shadow-virtqueue.c | 117 +++++++++++++++++++++++++++--
 1 file changed, 112 insertions(+), 5 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index 4f74ad402a..6e16cd4bdf 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -193,10 +193,83 @@ static void vhost_svq_add_split(VhostShadowVirtqueue *svq,
     /* Update the avail index after write the descriptor */
     smp_wmb();
     avail->idx = cpu_to_le16(svq->shadow_avail_idx);
+}
+
+/**
+ * Write descriptors to SVQ packed vring
+ *
+ * @svq: The shadow virtqueue
+ * @out_sg: The iovec to the guest
+ * @out_num: Outgoing iovec length
+ * @in_sg: The iovec from the guest
+ * @in_num: Incoming iovec length
+ * @sgs: Cache for hwaddr
+ * @head: Saves current free_head
+ */
+static void vhost_svq_add_packed(VhostShadowVirtqueue *svq,
+                                 const struct iovec *out_sg, size_t out_num,
+                                 const struct iovec *in_sg, size_t in_num,
+                                 hwaddr *sgs, unsigned *head)
+{
+    uint16_t id, curr, i, head_flags = 0, head_idx;
+    size_t num = out_num + in_num;
+    unsigned n;
+
+    struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
+
+    head_idx = svq->vring_packed.next_avail_idx;
+    i = head_idx;
+    id = svq->free_head;
+    curr = id;
+    *head = id;
+
+    /* Write descriptors to SVQ packed vring */
+    for (n = 0; n < num; n++) {
+        uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
+                                     (n < out_num ? 0 : VRING_DESC_F_WRITE) |
+                                     (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
+        if (i == head_idx) {
+            head_flags = flags;
+        } else {
+            descs[i].flags = flags;
+        }
+
+        descs[i].addr = cpu_to_le64(sgs[n]);
+        descs[i].id = id;
+        if (n < out_num) {
+            descs[i].len = cpu_to_le32(out_sg[n].iov_len);
+        } else {
+            descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
+        }
+
+        curr = cpu_to_le16(svq->desc_next[curr]);
+
+        if (++i >= svq->vring_packed.vring.num) {
+            i = 0;
+            svq->vring_packed.avail_used_flags ^=
+                1 << VRING_PACKED_DESC_F_AVAIL |
+                1 << VRING_PACKED_DESC_F_USED;
+        }
+    }
 
+    if (i <= head_idx) {
+        svq->vring_packed.avail_wrap_counter ^= 1;
+    }
+
+    svq->vring_packed.next_avail_idx = i;
+    svq->shadow_avail_idx = i;
+    svq->free_head = curr;
+
+    /*
+     * A driver MUST NOT make the first descriptor in the list
+     * available before all subsequent descriptors comprising
+     * the list are made available.
+     */
+    smp_wmb();
+    svq->vring_packed.vring.desc[head_idx].flags = head_flags;
 }
 
-static void vhost_svq_kick(VhostShadowVirtqueue *svq)
+static void vhost_svq_kick_split(VhostShadowVirtqueue *svq)
 {
     bool needs_kick;
 
@@ -209,7 +282,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
     if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
         uint16_t avail_event = le16_to_cpu(
                 *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
-        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
+        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
+                     svq->shadow_avail_idx - 1);
     } else {
         needs_kick =
                 !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
@@ -222,6 +296,30 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
     event_notifier_set(&svq->hdev_kick);
 }
 
+static void vhost_svq_kick_packed(VhostShadowVirtqueue *svq)
+{
+    bool needs_kick;
+
+    /*
+     * We need to expose the available array entries before checking
+     * notification suppressions.
+     */
+    smp_mb();
+
+    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+        return;
+    } else {
+        needs_kick = (svq->vring_packed.vring.device->flags !=
+                      cpu_to_le16(VRING_PACKED_EVENT_FLAG_DISABLE));
+    }
+
+    if (!needs_kick) {
+        return;
+    }
+
+    event_notifier_set(&svq->hdev_kick);
+}
+
 /**
  * Add an element to a SVQ.
  *
@@ -258,13 +356,22 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
         return -EINVAL;
     }
 
-    vhost_svq_add_split(svq, out_sg, out_num, in_sg,
-                        in_num, sgs, &qemu_head);
+    if (svq->is_packed) {
+        vhost_svq_add_packed(svq, out_sg, out_num, in_sg,
+                             in_num, sgs, &qemu_head);
+    } else {
+        vhost_svq_add_split(svq, out_sg, out_num, in_sg,
+                            in_num, sgs, &qemu_head);
+    }
 
     svq->num_free -= ndescs;
     svq->desc_state[qemu_head].elem = elem;
     svq->desc_state[qemu_head].ndescs = ndescs;
-    vhost_svq_kick(svq);
+    if (svq->is_packed) {
+        vhost_svq_kick_packed(svq);
+    } else {
+        vhost_svq_kick_split(svq);
+    }
     return 0;
 }
 
-- 
2.48.1

Re: [RFC v5 3/7] vhost: Forward descriptors to device via packed SVQ

Posted by Eugenio Perez Martin 5 days, 20 hours ago

On Mon, Mar 24, 2025 at 3:00 PM Sahil Siddiq <icegambit91@gmail.com> wrote:
>
> Implement the insertion of available buffers in the descriptor area of
> packed shadow virtqueues. It takes into account descriptor chains, but
> does not consider indirect descriptors.
>
> Enable the packed SVQ to forward the descriptors to the device.
>
> Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
> ---
> Changes from v4 -> v5:
> - This was commit #2 in v4. This has been reordered to commit #3
>   based on review comments.
> - vhost-shadow-virtqueue.c:
>   (vhost_svq_valid_features): Move addition of enums to commit #6
>   based on review comments.
>   (vhost_svq_add_packed): Set head_idx to buffer id instead of vring's
>   index.
>   (vhost_svq_kick): Split into vhost_svq_kick_split and
>   vhost_svq_kick_packed.
>   (vhost_svq_add): Use new vhost_svq_kick_* functions.
>
>  hw/virtio/vhost-shadow-virtqueue.c | 117 +++++++++++++++++++++++++++--
>  1 file changed, 112 insertions(+), 5 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index 4f74ad402a..6e16cd4bdf 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -193,10 +193,83 @@ static void vhost_svq_add_split(VhostShadowVirtqueue *svq,
>      /* Update the avail index after write the descriptor */
>      smp_wmb();
>      avail->idx = cpu_to_le16(svq->shadow_avail_idx);
> +}
> +
> +/**
> + * Write descriptors to SVQ packed vring
> + *
> + * @svq: The shadow virtqueue
> + * @out_sg: The iovec to the guest
> + * @out_num: Outgoing iovec length
> + * @in_sg: The iovec from the guest
> + * @in_num: Incoming iovec length
> + * @sgs: Cache for hwaddr
> + * @head: Saves current free_head
> + */
> +static void vhost_svq_add_packed(VhostShadowVirtqueue *svq,
> +                                 const struct iovec *out_sg, size_t out_num,
> +                                 const struct iovec *in_sg, size_t in_num,
> +                                 hwaddr *sgs, unsigned *head)
> +{
> +    uint16_t id, curr, i, head_flags = 0, head_idx;
> +    size_t num = out_num + in_num;
> +    unsigned n;
> +
> +    struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
> +
> +    head_idx = svq->vring_packed.next_avail_idx;
> +    i = head_idx;
> +    id = svq->free_head;
> +    curr = id;
> +    *head = id;
> +
> +    /* Write descriptors to SVQ packed vring */
> +    for (n = 0; n < num; n++) {
> +        uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
> +                                     (n < out_num ? 0 : VRING_DESC_F_WRITE) |
> +                                     (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
> +        if (i == head_idx) {
> +            head_flags = flags;
> +        } else {
> +            descs[i].flags = flags;
> +        }
> +
> +        descs[i].addr = cpu_to_le64(sgs[n]);
> +        descs[i].id = id;
> +        if (n < out_num) {
> +            descs[i].len = cpu_to_le32(out_sg[n].iov_len);
> +        } else {
> +            descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
> +        }
> +
> +        curr = cpu_to_le16(svq->desc_next[curr]);
> +
> +        if (++i >= svq->vring_packed.vring.num) {
> +            i = 0;
> +            svq->vring_packed.avail_used_flags ^=
> +                1 << VRING_PACKED_DESC_F_AVAIL |
> +                1 << VRING_PACKED_DESC_F_USED;
> +        }
> +    }
>
> +    if (i <= head_idx) {
> +        svq->vring_packed.avail_wrap_counter ^= 1;
> +    }
> +
> +    svq->vring_packed.next_avail_idx = i;
> +    svq->shadow_avail_idx = i;
> +    svq->free_head = curr;
> +
> +    /*
> +     * A driver MUST NOT make the first descriptor in the list
> +     * available before all subsequent descriptors comprising
> +     * the list are made available.
> +     */
> +    smp_wmb();
> +    svq->vring_packed.vring.desc[head_idx].flags = head_flags;
>  }
>
> -static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> +static void vhost_svq_kick_split(VhostShadowVirtqueue *svq)
>  {
>      bool needs_kick;
>
> @@ -209,7 +282,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>      if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
>          uint16_t avail_event = le16_to_cpu(
>                  *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
> -        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
> +        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
> +                     svq->shadow_avail_idx - 1);
>      } else {
>          needs_kick =
>                  !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
> @@ -222,6 +296,30 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>      event_notifier_set(&svq->hdev_kick);
>  }
>
> +static void vhost_svq_kick_packed(VhostShadowVirtqueue *svq)
> +{
> +    bool needs_kick;
> +
> +    /*
> +     * We need to expose the available array entries before checking
> +     * notification suppressions.
> +     */
> +    smp_mb();
> +
> +    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> +        return;

It's weird SVQ does not need to kick if _F_EVENT_IDX. This should have
code checking the device ring flags etc.

> +    } else {
> +        needs_kick = (svq->vring_packed.vring.device->flags !=
> +                      cpu_to_le16(VRING_PACKED_EVENT_FLAG_DISABLE));
> +    }
> +
> +    if (!needs_kick) {
> +        return;
> +    }
> +
> +    event_notifier_set(&svq->hdev_kick);
> +}
> +
>  /**
>   * Add an element to a SVQ.
>   *
> @@ -258,13 +356,22 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
>          return -EINVAL;
>      }
>
> -    vhost_svq_add_split(svq, out_sg, out_num, in_sg,
> -                        in_num, sgs, &qemu_head);
> +    if (svq->is_packed) {
> +        vhost_svq_add_packed(svq, out_sg, out_num, in_sg,
> +                             in_num, sgs, &qemu_head);
> +    } else {
> +        vhost_svq_add_split(svq, out_sg, out_num, in_sg,
> +                            in_num, sgs, &qemu_head);
> +    }
>
>      svq->num_free -= ndescs;
>      svq->desc_state[qemu_head].elem = elem;
>      svq->desc_state[qemu_head].ndescs = ndescs;
> -    vhost_svq_kick(svq);
> +    if (svq->is_packed) {
> +        vhost_svq_kick_packed(svq);
> +    } else {
> +        vhost_svq_kick_split(svq);
> +    }
>      return 0;
>  }
>
> --
> 2.48.1
>

Re: [RFC v5 3/7] vhost: Forward descriptors to device via packed SVQ

Posted by Sahil Siddiq 4 days, 3 hours ago

Hi,

On 3/26/25 5:32 PM, Eugenio Perez Martin wrote:
> On Mon, Mar 24, 2025 at 3:00 PM Sahil Siddiq <icegambit91@gmail.com> wrote:
>>
>> Implement the insertion of available buffers in the descriptor area of
>> packed shadow virtqueues. It takes into account descriptor chains, but
>> does not consider indirect descriptors.
>>
>> Enable the packed SVQ to forward the descriptors to the device.
>>
>> Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
>> ---
>> Changes from v4 -> v5:
>> - This was commit #2 in v4. This has been reordered to commit #3
>>    based on review comments.
>> - vhost-shadow-virtqueue.c:
>>    (vhost_svq_valid_features): Move addition of enums to commit #6
>>    based on review comments.
>>    (vhost_svq_add_packed): Set head_idx to buffer id instead of vring's
>>    index.
>>    (vhost_svq_kick): Split into vhost_svq_kick_split and
>>    vhost_svq_kick_packed.
>>    (vhost_svq_add): Use new vhost_svq_kick_* functions.
>>
>>   hw/virtio/vhost-shadow-virtqueue.c | 117 +++++++++++++++++++++++++++--
>>   1 file changed, 112 insertions(+), 5 deletions(-)
>>
>> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
>> index 4f74ad402a..6e16cd4bdf 100644
>> --- a/hw/virtio/vhost-shadow-virtqueue.c
>> +++ b/hw/virtio/vhost-shadow-virtqueue.c
>> @@ -193,10 +193,83 @@ static void vhost_svq_add_split(VhostShadowVirtqueue *svq,
>>       /* Update the avail index after write the descriptor */
>>       smp_wmb();
>>       avail->idx = cpu_to_le16(svq->shadow_avail_idx);
>> +}
>> +
>> +/**
>> + * Write descriptors to SVQ packed vring
>> + *
>> + * @svq: The shadow virtqueue
>> + * @out_sg: The iovec to the guest
>> + * @out_num: Outgoing iovec length
>> + * @in_sg: The iovec from the guest
>> + * @in_num: Incoming iovec length
>> + * @sgs: Cache for hwaddr
>> + * @head: Saves current free_head
>> + */
>> +static void vhost_svq_add_packed(VhostShadowVirtqueue *svq,
>> +                                 const struct iovec *out_sg, size_t out_num,
>> +                                 const struct iovec *in_sg, size_t in_num,
>> +                                 hwaddr *sgs, unsigned *head)
>> +{
>> +    uint16_t id, curr, i, head_flags = 0, head_idx;
>> +    size_t num = out_num + in_num;
>> +    unsigned n;
>> +
>> +    struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
>> +
>> +    head_idx = svq->vring_packed.next_avail_idx;
>> +    i = head_idx;
>> +    id = svq->free_head;
>> +    curr = id;
>> +    *head = id;
>> +
>> +    /* Write descriptors to SVQ packed vring */
>> +    for (n = 0; n < num; n++) {
>> +        uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
>> +                                     (n < out_num ? 0 : VRING_DESC_F_WRITE) |
>> +                                     (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
>> +        if (i == head_idx) {
>> +            head_flags = flags;
>> +        } else {
>> +            descs[i].flags = flags;
>> +        }
>> +
>> +        descs[i].addr = cpu_to_le64(sgs[n]);
>> +        descs[i].id = id;
>> +        if (n < out_num) {
>> +            descs[i].len = cpu_to_le32(out_sg[n].iov_len);
>> +        } else {
>> +            descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
>> +        }
>> +
>> +        curr = cpu_to_le16(svq->desc_next[curr]);
>> +
>> +        if (++i >= svq->vring_packed.vring.num) {
>> +            i = 0;
>> +            svq->vring_packed.avail_used_flags ^=
>> +                1 << VRING_PACKED_DESC_F_AVAIL |
>> +                1 << VRING_PACKED_DESC_F_USED;
>> +        }
>> +    }
>>
>> +    if (i <= head_idx) {
>> +        svq->vring_packed.avail_wrap_counter ^= 1;
>> +    }
>> +
>> +    svq->vring_packed.next_avail_idx = i;
>> +    svq->shadow_avail_idx = i;
>> +    svq->free_head = curr;
>> +
>> +    /*
>> +     * A driver MUST NOT make the first descriptor in the list
>> +     * available before all subsequent descriptors comprising
>> +     * the list are made available.
>> +     */
>> +    smp_wmb();
>> +    svq->vring_packed.vring.desc[head_idx].flags = head_flags;
>>   }
>>
>> -static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>> +static void vhost_svq_kick_split(VhostShadowVirtqueue *svq)
>>   {
>>       bool needs_kick;
>>
>> @@ -209,7 +282,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>>       if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
>>           uint16_t avail_event = le16_to_cpu(
>>                   *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
>> -        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
>> +        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
>> +                     svq->shadow_avail_idx - 1);
>>       } else {
>>           needs_kick =
>>                   !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
>> @@ -222,6 +296,30 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>>       event_notifier_set(&svq->hdev_kick);
>>   }
>>
>> +static void vhost_svq_kick_packed(VhostShadowVirtqueue *svq)
>> +{
>> +    bool needs_kick;
>> +
>> +    /*
>> +     * We need to expose the available array entries before checking
>> +     * notification suppressions.
>> +     */
>> +    smp_mb();
>> +
>> +    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
>> +        return;
> 
> It's weird SVQ does not need to kick if _F_EVENT_IDX. This should have
> code checking the device ring flags etc.
> 

Right, I haven't implemented this yet. Since the current implementation is
being tested with event_idx=off (points 3 and 4 of the roadmap [1]), I thought
I would leave this for later.

Maybe I can add a comment in the implementation explaining this.

Thanks,
Sahil

[1] https://wiki.qemu.org/Internships/ProjectIdeas/PackedShadowVirtqueue

Re: [RFC v5 3/7] vhost: Forward descriptors to device via packed SVQ

Posted by Eugenio Perez Martin 4 days, 1 hour ago

On Fri, Mar 28, 2025 at 6:10 AM Sahil Siddiq <icegambit91@gmail.com> wrote:
>
> Hi,
>
> On 3/26/25 5:32 PM, Eugenio Perez Martin wrote:
> > On Mon, Mar 24, 2025 at 3:00 PM Sahil Siddiq <icegambit91@gmail.com> wrote:
> >>
> >> Implement the insertion of available buffers in the descriptor area of
> >> packed shadow virtqueues. It takes into account descriptor chains, but
> >> does not consider indirect descriptors.
> >>
> >> Enable the packed SVQ to forward the descriptors to the device.
> >>
> >> Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
> >> ---
> >> Changes from v4 -> v5:
> >> - This was commit #2 in v4. This has been reordered to commit #3
> >>    based on review comments.
> >> - vhost-shadow-virtqueue.c:
> >>    (vhost_svq_valid_features): Move addition of enums to commit #6
> >>    based on review comments.
> >>    (vhost_svq_add_packed): Set head_idx to buffer id instead of vring's
> >>    index.
> >>    (vhost_svq_kick): Split into vhost_svq_kick_split and
> >>    vhost_svq_kick_packed.
> >>    (vhost_svq_add): Use new vhost_svq_kick_* functions.
> >>
> >>   hw/virtio/vhost-shadow-virtqueue.c | 117 +++++++++++++++++++++++++++--
> >>   1 file changed, 112 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> >> index 4f74ad402a..6e16cd4bdf 100644
> >> --- a/hw/virtio/vhost-shadow-virtqueue.c
> >> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> >> @@ -193,10 +193,83 @@ static void vhost_svq_add_split(VhostShadowVirtqueue *svq,
> >>       /* Update the avail index after write the descriptor */
> >>       smp_wmb();
> >>       avail->idx = cpu_to_le16(svq->shadow_avail_idx);
> >> +}
> >> +
> >> +/**
> >> + * Write descriptors to SVQ packed vring
> >> + *
> >> + * @svq: The shadow virtqueue
> >> + * @out_sg: The iovec to the guest
> >> + * @out_num: Outgoing iovec length
> >> + * @in_sg: The iovec from the guest
> >> + * @in_num: Incoming iovec length
> >> + * @sgs: Cache for hwaddr
> >> + * @head: Saves current free_head
> >> + */
> >> +static void vhost_svq_add_packed(VhostShadowVirtqueue *svq,
> >> +                                 const struct iovec *out_sg, size_t out_num,
> >> +                                 const struct iovec *in_sg, size_t in_num,
> >> +                                 hwaddr *sgs, unsigned *head)
> >> +{
> >> +    uint16_t id, curr, i, head_flags = 0, head_idx;
> >> +    size_t num = out_num + in_num;
> >> +    unsigned n;
> >> +
> >> +    struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
> >> +
> >> +    head_idx = svq->vring_packed.next_avail_idx;
> >> +    i = head_idx;
> >> +    id = svq->free_head;
> >> +    curr = id;
> >> +    *head = id;
> >> +
> >> +    /* Write descriptors to SVQ packed vring */
> >> +    for (n = 0; n < num; n++) {
> >> +        uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
> >> +                                     (n < out_num ? 0 : VRING_DESC_F_WRITE) |
> >> +                                     (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
> >> +        if (i == head_idx) {
> >> +            head_flags = flags;
> >> +        } else {
> >> +            descs[i].flags = flags;
> >> +        }
> >> +
> >> +        descs[i].addr = cpu_to_le64(sgs[n]);
> >> +        descs[i].id = id;
> >> +        if (n < out_num) {
> >> +            descs[i].len = cpu_to_le32(out_sg[n].iov_len);
> >> +        } else {
> >> +            descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
> >> +        }
> >> +
> >> +        curr = cpu_to_le16(svq->desc_next[curr]);
> >> +
> >> +        if (++i >= svq->vring_packed.vring.num) {
> >> +            i = 0;
> >> +            svq->vring_packed.avail_used_flags ^=
> >> +                1 << VRING_PACKED_DESC_F_AVAIL |
> >> +                1 << VRING_PACKED_DESC_F_USED;
> >> +        }
> >> +    }
> >>
> >> +    if (i <= head_idx) {
> >> +        svq->vring_packed.avail_wrap_counter ^= 1;
> >> +    }
> >> +
> >> +    svq->vring_packed.next_avail_idx = i;
> >> +    svq->shadow_avail_idx = i;
> >> +    svq->free_head = curr;
> >> +
> >> +    /*
> >> +     * A driver MUST NOT make the first descriptor in the list
> >> +     * available before all subsequent descriptors comprising
> >> +     * the list are made available.
> >> +     */
> >> +    smp_wmb();
> >> +    svq->vring_packed.vring.desc[head_idx].flags = head_flags;
> >>   }
> >>
> >> -static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> >> +static void vhost_svq_kick_split(VhostShadowVirtqueue *svq)
> >>   {
> >>       bool needs_kick;
> >>
> >> @@ -209,7 +282,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> >>       if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >>           uint16_t avail_event = le16_to_cpu(
> >>                   *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
> >> -        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
> >> +        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
> >> +                     svq->shadow_avail_idx - 1);
> >>       } else {
> >>           needs_kick =
> >>                   !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
> >> @@ -222,6 +296,30 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> >>       event_notifier_set(&svq->hdev_kick);
> >>   }
> >>
> >> +static void vhost_svq_kick_packed(VhostShadowVirtqueue *svq)
> >> +{
> >> +    bool needs_kick;
> >> +
> >> +    /*
> >> +     * We need to expose the available array entries before checking
> >> +     * notification suppressions.
> >> +     */
> >> +    smp_mb();
> >> +
> >> +    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >> +        return;
> >
> > It's weird SVQ does not need to kick if _F_EVENT_IDX. This should have
> > code checking the device ring flags etc.
> >
>
> Right, I haven't implemented this yet. Since the current implementation is
> being tested with event_idx=off (points 3 and 4 of the roadmap [1]), I thought
> I would leave this for later.
>
> Maybe I can add a comment in the implementation explaining this.
>

Sure that's fine, and probably even better than trying to address
everything in one shot :) Can you add a TODO in each place you
identify so we're sure we don't miss any?

> Thanks,
> Sahil
>
> [1] https://wiki.qemu.org/Internships/ProjectIdeas/PackedShadowVirtqueue
>

Re: [RFC v5 3/7] vhost: Forward descriptors to device via packed SVQ

Posted by Sahil Siddiq 1 week ago

Hi,

I had a few queries here.

On 3/24/25 7:29 PM, Sahil Siddiq wrote:
> Implement the insertion of available buffers in the descriptor area of
> packed shadow virtqueues. It takes into account descriptor chains, but
> does not consider indirect descriptors.
> 
> Enable the packed SVQ to forward the descriptors to the device.
> 
> Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
> ---
> Changes from v4 -> v5:
> - This was commit #2 in v4. This has been reordered to commit #3
>    based on review comments.
> - vhost-shadow-virtqueue.c:
>    (vhost_svq_valid_features): Move addition of enums to commit #6
>    based on review comments.
>    (vhost_svq_add_packed): Set head_idx to buffer id instead of vring's
>    index.
>    (vhost_svq_kick): Split into vhost_svq_kick_split and
>    vhost_svq_kick_packed.
>    (vhost_svq_add): Use new vhost_svq_kick_* functions.
> 
>   hw/virtio/vhost-shadow-virtqueue.c | 117 +++++++++++++++++++++++++++--
>   1 file changed, 112 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index 4f74ad402a..6e16cd4bdf 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -193,10 +193,83 @@ static void vhost_svq_add_split(VhostShadowVirtqueue *svq,
>       /* Update the avail index after write the descriptor */
>       smp_wmb();
>       avail->idx = cpu_to_le16(svq->shadow_avail_idx);
> +}
> +
> +/**
> + * Write descriptors to SVQ packed vring
> + *
> + * @svq: The shadow virtqueue
> + * @out_sg: The iovec to the guest
> + * @out_num: Outgoing iovec length
> + * @in_sg: The iovec from the guest
> + * @in_num: Incoming iovec length
> + * @sgs: Cache for hwaddr
> + * @head: Saves current free_head
> + */
> +static void vhost_svq_add_packed(VhostShadowVirtqueue *svq,
> +                                 const struct iovec *out_sg, size_t out_num,
> +                                 const struct iovec *in_sg, size_t in_num,
> +                                 hwaddr *sgs, unsigned *head)
> +{
> +    uint16_t id, curr, i, head_flags = 0, head_idx;
> +    size_t num = out_num + in_num;
> +    unsigned n;
> +
> +    struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
> +
> +    head_idx = svq->vring_packed.next_avail_idx;

Since "svq->vring_packed.next_avail_idx" is part of QEMU internals and not
stored in guest memory, no endianness conversion is required here, right?

> +    i = head_idx;
> +    id = svq->free_head;
> +    curr = id;
> +    *head = id;

Should head be the buffer id or the idx of the descriptor ring where the
first descriptor of a descriptor chain is inserted?

> +    /* Write descriptors to SVQ packed vring */
> +    for (n = 0; n < num; n++) {
> +        uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
> +                                     (n < out_num ? 0 : VRING_DESC_F_WRITE) |
> +                                     (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
> +        if (i == head_idx) {
> +            head_flags = flags;
> +        } else {
> +            descs[i].flags = flags;
> +        }
> +
> +        descs[i].addr = cpu_to_le64(sgs[n]);
> +        descs[i].id = id;
> +        if (n < out_num) {
> +            descs[i].len = cpu_to_le32(out_sg[n].iov_len);
> +        } else {
> +            descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
> +        }
> +
> +        curr = cpu_to_le16(svq->desc_next[curr]);
> +
> +        if (++i >= svq->vring_packed.vring.num) {
> +            i = 0;
> +            svq->vring_packed.avail_used_flags ^=
> +                1 << VRING_PACKED_DESC_F_AVAIL |
> +                1 << VRING_PACKED_DESC_F_USED;
> +        }
> +    }
>   
> +    if (i <= head_idx) {
> +        svq->vring_packed.avail_wrap_counter ^= 1;
> +    }
> +
> +    svq->vring_packed.next_avail_idx = i;
> +    svq->shadow_avail_idx = i;
> +    svq->free_head = curr;
> +
> +    /*
> +     * A driver MUST NOT make the first descriptor in the list
> +     * available before all subsequent descriptors comprising
> +     * the list are made available.
> +     */
> +    smp_wmb();
> +    svq->vring_packed.vring.desc[head_idx].flags = head_flags;
>   }
>   
> -static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> +static void vhost_svq_kick_split(VhostShadowVirtqueue *svq)
>   {
>       bool needs_kick;
>   
> @@ -209,7 +282,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>       if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
>           uint16_t avail_event = le16_to_cpu(
>                   *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
> -        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
> +        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
> +                     svq->shadow_avail_idx - 1);
>       } else {
>           needs_kick =
>                   !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
> @@ -222,6 +296,30 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>       event_notifier_set(&svq->hdev_kick);
>   }
>   
> +static void vhost_svq_kick_packed(VhostShadowVirtqueue *svq)
> +{
> +    bool needs_kick;
> +
> +    /*
> +     * We need to expose the available array entries before checking
> +     * notification suppressions.
> +     */
> +    smp_mb();
> +
> +    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> +        return;
> +    } else {
> +        needs_kick = (svq->vring_packed.vring.device->flags !=
> +                      cpu_to_le16(VRING_PACKED_EVENT_FLAG_DISABLE));
> +    }
> +
> +    if (!needs_kick) {
> +        return;
> +    }
> +
> +    event_notifier_set(&svq->hdev_kick);
> +}
> +
>   /**
>    * Add an element to a SVQ.
>    *
> @@ -258,13 +356,22 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
>           return -EINVAL;
>       }
>   
> -    vhost_svq_add_split(svq, out_sg, out_num, in_sg,
> -                        in_num, sgs, &qemu_head);
> +    if (svq->is_packed) {
> +        vhost_svq_add_packed(svq, out_sg, out_num, in_sg,
> +                             in_num, sgs, &qemu_head);
> +    } else {
> +        vhost_svq_add_split(svq, out_sg, out_num, in_sg,
> +                            in_num, sgs, &qemu_head);
> +    }
>   
>       svq->num_free -= ndescs;
>       svq->desc_state[qemu_head].elem = elem;
>       svq->desc_state[qemu_head].ndescs = ndescs;

*head in vhost_svq_add_packed() is stored in "qemu_head" here.

> -    vhost_svq_kick(svq);
> +    if (svq->is_packed) {
> +        vhost_svq_kick_packed(svq);
> +    } else {
> +        vhost_svq_kick_split(svq);
> +    }
>       return 0;
>   }
>   

Thanks,
Sahil

Re: [RFC v5 3/7] vhost: Forward descriptors to device via packed SVQ

Posted by Eugenio Perez Martin 6 days ago

On Mon, Mar 24, 2025 at 3:14 PM Sahil Siddiq <icegambit91@gmail.com> wrote:
>
> Hi,
>
> I had a few queries here.
>
> On 3/24/25 7:29 PM, Sahil Siddiq wrote:
> > Implement the insertion of available buffers in the descriptor area of
> > packed shadow virtqueues. It takes into account descriptor chains, but
> > does not consider indirect descriptors.
> >
> > Enable the packed SVQ to forward the descriptors to the device.
> >
> > Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
> > ---
> > Changes from v4 -> v5:
> > - This was commit #2 in v4. This has been reordered to commit #3
> >    based on review comments.
> > - vhost-shadow-virtqueue.c:
> >    (vhost_svq_valid_features): Move addition of enums to commit #6
> >    based on review comments.
> >    (vhost_svq_add_packed): Set head_idx to buffer id instead of vring's
> >    index.
> >    (vhost_svq_kick): Split into vhost_svq_kick_split and
> >    vhost_svq_kick_packed.
> >    (vhost_svq_add): Use new vhost_svq_kick_* functions.
> >
> >   hw/virtio/vhost-shadow-virtqueue.c | 117 +++++++++++++++++++++++++++--
> >   1 file changed, 112 insertions(+), 5 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> > index 4f74ad402a..6e16cd4bdf 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -193,10 +193,83 @@ static void vhost_svq_add_split(VhostShadowVirtqueue *svq,
> >       /* Update the avail index after write the descriptor */
> >       smp_wmb();
> >       avail->idx = cpu_to_le16(svq->shadow_avail_idx);
> > +}
> > +
> > +/**
> > + * Write descriptors to SVQ packed vring
> > + *
> > + * @svq: The shadow virtqueue
> > + * @out_sg: The iovec to the guest
> > + * @out_num: Outgoing iovec length
> > + * @in_sg: The iovec from the guest
> > + * @in_num: Incoming iovec length
> > + * @sgs: Cache for hwaddr
> > + * @head: Saves current free_head
> > + */
> > +static void vhost_svq_add_packed(VhostShadowVirtqueue *svq,
> > +                                 const struct iovec *out_sg, size_t out_num,
> > +                                 const struct iovec *in_sg, size_t in_num,
> > +                                 hwaddr *sgs, unsigned *head)
> > +{
> > +    uint16_t id, curr, i, head_flags = 0, head_idx;
> > +    size_t num = out_num + in_num;
> > +    unsigned n;
> > +
> > +    struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
> > +
> > +    head_idx = svq->vring_packed.next_avail_idx;
>
> Since "svq->vring_packed.next_avail_idx" is part of QEMU internals and not
> stored in guest memory, no endianness conversion is required here, right?
>

Right!

> > +    i = head_idx;
> > +    id = svq->free_head;
> > +    curr = id;
> > +    *head = id;
>
> Should head be the buffer id or the idx of the descriptor ring where the
> first descriptor of a descriptor chain is inserted?
>

The buffer id of the *last* descriptor of a chain. See "2.8.6 Next
Flag: Descriptor Chaining" at [1].

> > +    /* Write descriptors to SVQ packed vring */
> > +    for (n = 0; n < num; n++) {
> > +        uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
> > +                                     (n < out_num ? 0 : VRING_DESC_F_WRITE) |
> > +                                     (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
> > +        if (i == head_idx) {
> > +            head_flags = flags;
> > +        } else {
> > +            descs[i].flags = flags;
> > +        }
> > +
> > +        descs[i].addr = cpu_to_le64(sgs[n]);
> > +        descs[i].id = id;
> > +        if (n < out_num) {
> > +            descs[i].len = cpu_to_le32(out_sg[n].iov_len);
> > +        } else {
> > +            descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
> > +        }
> > +
> > +        curr = cpu_to_le16(svq->desc_next[curr]);
> > +
> > +        if (++i >= svq->vring_packed.vring.num) {
> > +            i = 0;
> > +            svq->vring_packed.avail_used_flags ^=
> > +                1 << VRING_PACKED_DESC_F_AVAIL |
> > +                1 << VRING_PACKED_DESC_F_USED;
> > +        }
> > +    }
> >
> > +    if (i <= head_idx) {
> > +        svq->vring_packed.avail_wrap_counter ^= 1;
> > +    }
> > +
> > +    svq->vring_packed.next_avail_idx = i;
> > +    svq->shadow_avail_idx = i;
> > +    svq->free_head = curr;
> > +
> > +    /*
> > +     * A driver MUST NOT make the first descriptor in the list
> > +     * available before all subsequent descriptors comprising
> > +     * the list are made available.
> > +     */
> > +    smp_wmb();
> > +    svq->vring_packed.vring.desc[head_idx].flags = head_flags;
> >   }
> >
> > -static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> > +static void vhost_svq_kick_split(VhostShadowVirtqueue *svq)
> >   {
> >       bool needs_kick;
> >
> > @@ -209,7 +282,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> >       if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >           uint16_t avail_event = le16_to_cpu(
> >                   *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
> > -        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
> > +        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
> > +                     svq->shadow_avail_idx - 1);
> >       } else {
> >           needs_kick =
> >                   !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
> > @@ -222,6 +296,30 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> >       event_notifier_set(&svq->hdev_kick);
> >   }
> >
> > +static void vhost_svq_kick_packed(VhostShadowVirtqueue *svq)
> > +{
> > +    bool needs_kick;
> > +
> > +    /*
> > +     * We need to expose the available array entries before checking
> > +     * notification suppressions.
> > +     */
> > +    smp_mb();
> > +
> > +    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> > +        return;
> > +    } else {
> > +        needs_kick = (svq->vring_packed.vring.device->flags !=
> > +                      cpu_to_le16(VRING_PACKED_EVENT_FLAG_DISABLE));
> > +    }
> > +
> > +    if (!needs_kick) {
> > +        return;
> > +    }
> > +
> > +    event_notifier_set(&svq->hdev_kick);
> > +}
> > +
> >   /**
> >    * Add an element to a SVQ.
> >    *
> > @@ -258,13 +356,22 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
> >           return -EINVAL;
> >       }
> >
> > -    vhost_svq_add_split(svq, out_sg, out_num, in_sg,
> > -                        in_num, sgs, &qemu_head);
> > +    if (svq->is_packed) {
> > +        vhost_svq_add_packed(svq, out_sg, out_num, in_sg,
> > +                             in_num, sgs, &qemu_head);
> > +    } else {
> > +        vhost_svq_add_split(svq, out_sg, out_num, in_sg,
> > +                            in_num, sgs, &qemu_head);
> > +    }
> >
> >       svq->num_free -= ndescs;
> >       svq->desc_state[qemu_head].elem = elem;
> >       svq->desc_state[qemu_head].ndescs = ndescs;
>
> *head in vhost_svq_add_packed() is stored in "qemu_head" here.
>

Sorry I don't get this, can you expand?

[1] https://docs.oasis-open.org/virtio/virtio/v1.3/virtio-v1.3.html

> > -    vhost_svq_kick(svq);
> > +    if (svq->is_packed) {
> > +        vhost_svq_kick_packed(svq);
> > +    } else {
> > +        vhost_svq_kick_split(svq);
> > +    }
> >       return 0;
> >   }
> >
>
> Thanks,
> Sahil
>

Re: [RFC v5 3/7] vhost: Forward descriptors to device via packed SVQ

Posted by Sahil Siddiq 4 days, 13 hours ago

Hi,

On 3/26/25 1:33 PM, Eugenio Perez Martin wrote:
> On Mon, Mar 24, 2025 at 3:14 PM Sahil Siddiq <icegambit91@gmail.com> wrote:
>> On 3/24/25 7:29 PM, Sahil Siddiq wrote:
>>> Implement the insertion of available buffers in the descriptor area of
>>> packed shadow virtqueues. It takes into account descriptor chains, but
>>> does not consider indirect descriptors.
>>>
>>> Enable the packed SVQ to forward the descriptors to the device.
>>>
>>> Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
>>> ---
>>> Changes from v4 -> v5:
>>> - This was commit #2 in v4. This has been reordered to commit #3
>>>     based on review comments.
>>> - vhost-shadow-virtqueue.c:
>>>     (vhost_svq_valid_features): Move addition of enums to commit #6
>>>     based on review comments.
>>>     (vhost_svq_add_packed): Set head_idx to buffer id instead of vring's
>>>     index.
>>>     (vhost_svq_kick): Split into vhost_svq_kick_split and
>>>     vhost_svq_kick_packed.
>>>     (vhost_svq_add): Use new vhost_svq_kick_* functions.
>>>
>>>    hw/virtio/vhost-shadow-virtqueue.c | 117 +++++++++++++++++++++++++++--
>>>    1 file changed, 112 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
>>> index 4f74ad402a..6e16cd4bdf 100644
>>> --- a/hw/virtio/vhost-shadow-virtqueue.c
>>> +++ b/hw/virtio/vhost-shadow-virtqueue.c
>>> @@ -193,10 +193,83 @@ static void vhost_svq_add_split(VhostShadowVirtqueue *svq,
>>>        /* Update the avail index after write the descriptor */
>>>        smp_wmb();
>>>        avail->idx = cpu_to_le16(svq->shadow_avail_idx);
>>> +}
>>> +
>>> +/**
>>> + * Write descriptors to SVQ packed vring
>>> + *
>>> + * @svq: The shadow virtqueue
>>> + * @out_sg: The iovec to the guest
>>> + * @out_num: Outgoing iovec length
>>> + * @in_sg: The iovec from the guest
>>> + * @in_num: Incoming iovec length
>>> + * @sgs: Cache for hwaddr
>>> + * @head: Saves current free_head
>>> + */
>>> +static void vhost_svq_add_packed(VhostShadowVirtqueue *svq,
>>> +                                 const struct iovec *out_sg, size_t out_num,
>>> +                                 const struct iovec *in_sg, size_t in_num,
>>> +                                 hwaddr *sgs, unsigned *head)
>>> +{
>>> +    uint16_t id, curr, i, head_flags = 0, head_idx;
>>> +    size_t num = out_num + in_num;
>>> +    unsigned n;
>>> +
>>> +    struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
>>> +
>>> +    head_idx = svq->vring_packed.next_avail_idx;
>>
>> Since "svq->vring_packed.next_avail_idx" is part of QEMU internals and not
>> stored in guest memory, no endianness conversion is required here, right?
>>
> 
> Right!

Understood.

>>> +    i = head_idx;
>>> +    id = svq->free_head;
>>> +    curr = id;
>>> +    *head = id;
>>
>> Should head be the buffer id or the idx of the descriptor ring where the
>> first descriptor of a descriptor chain is inserted?
>>
> 
> The buffer id of the *last* descriptor of a chain. See "2.8.6 Next
> Flag: Descriptor Chaining" at [1].

Ah, yes. The second half of my question in incorrect.

The tail descriptor of the chain includes the buffer id. In this implementation
we place the same tail buffer id in other locations of the descriptor ring since
they will be ignored anyway [1].

The explanation below frames my query better.

>>> +    /* Write descriptors to SVQ packed vring */
>>> +    for (n = 0; n < num; n++) {
>>> +        uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
>>> +                                     (n < out_num ? 0 : VRING_DESC_F_WRITE) |
>>> +                                     (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
>>> +        if (i == head_idx) {
>>> +            head_flags = flags;
>>> +        } else {
>>> +            descs[i].flags = flags;
>>> +        }
>>> +
>>> +        descs[i].addr = cpu_to_le64(sgs[n]);
>>> +        descs[i].id = id;
>>> +        if (n < out_num) {
>>> +            descs[i].len = cpu_to_le32(out_sg[n].iov_len);
>>> +        } else {
>>> +            descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
>>> +        }
>>> +
>>> +        curr = cpu_to_le16(svq->desc_next[curr]);
>>> +
>>> +        if (++i >= svq->vring_packed.vring.num) {
>>> +            i = 0;
>>> +            svq->vring_packed.avail_used_flags ^=
>>> +                1 << VRING_PACKED_DESC_F_AVAIL |
>>> +                1 << VRING_PACKED_DESC_F_USED;
>>> +        }
>>> +    }
>>>
>>> +    if (i <= head_idx) {
>>> +        svq->vring_packed.avail_wrap_counter ^= 1;
>>> +    }
>>> +
>>> +    svq->vring_packed.next_avail_idx = i;
>>> +    svq->shadow_avail_idx = i;
>>> +    svq->free_head = curr;
>>> +
>>> +    /*
>>> +     * A driver MUST NOT make the first descriptor in the list
>>> +     * available before all subsequent descriptors comprising
>>> +     * the list are made available.
>>> +     */
>>> +    smp_wmb();
>>> +    svq->vring_packed.vring.desc[head_idx].flags = head_flags;
>>>    }
>>>
>>> -static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>>> +static void vhost_svq_kick_split(VhostShadowVirtqueue *svq)
>>>    {
>>>        bool needs_kick;
>>>
>>> @@ -209,7 +282,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>>>        if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
>>>            uint16_t avail_event = le16_to_cpu(
>>>                    *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
>>> -        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
>>> +        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
>>> +                     svq->shadow_avail_idx - 1);
>>>        } else {
>>>            needs_kick =
>>>                    !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
>>> @@ -222,6 +296,30 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
>>>        event_notifier_set(&svq->hdev_kick);
>>>    }
>>>
>>> +static void vhost_svq_kick_packed(VhostShadowVirtqueue *svq)
>>> +{
>>> +    bool needs_kick;
>>> +
>>> +    /*
>>> +     * We need to expose the available array entries before checking
>>> +     * notification suppressions.
>>> +     */
>>> +    smp_mb();
>>> +
>>> +    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
>>> +        return;
>>> +    } else {
>>> +        needs_kick = (svq->vring_packed.vring.device->flags !=
>>> +                      cpu_to_le16(VRING_PACKED_EVENT_FLAG_DISABLE));
>>> +    }
>>> +
>>> +    if (!needs_kick) {
>>> +        return;
>>> +    }
>>> +
>>> +    event_notifier_set(&svq->hdev_kick);
>>> +}
>>> +
>>>    /**
>>>     * Add an element to a SVQ.
>>>     *
>>> @@ -258,13 +356,22 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
>>>            return -EINVAL;
>>>        }
>>>
>>> -    vhost_svq_add_split(svq, out_sg, out_num, in_sg,
>>> -                        in_num, sgs, &qemu_head);
>>> +    if (svq->is_packed) {
>>> +        vhost_svq_add_packed(svq, out_sg, out_num, in_sg,
>>> +                             in_num, sgs, &qemu_head);
>>> +    } else {
>>> +        vhost_svq_add_split(svq, out_sg, out_num, in_sg,
>>> +                            in_num, sgs, &qemu_head);
>>> +    }
>>>
>>>        svq->num_free -= ndescs;
>>>        svq->desc_state[qemu_head].elem = elem;
>>>        svq->desc_state[qemu_head].ndescs = ndescs;
>>
>> *head in vhost_svq_add_packed() is stored in "qemu_head" here.
>>
> 
> Sorry I don't get this, can you expand?

Sure. In vhost_svq_add(), after the descriptors have been added
(either using vhost_svq_add_split or vhost_svq_add_packed),
VirtQueueElement elem and ndescs are both saved in the
svq->desc_state array. "elem" and "ndescs" are later used when
the guest consumes used descriptors from the device in
vhost_svq_get_buf_(split|packed).

For split vqs, the index of svq->desc where elem and ndescs are
saved matches the index of the descriptor ring where the head of
the descriptor ring is placed.

In vhost_svq_add_split:

*head = svq->free_head;
[...]
avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
avail->ring[avail_idx] = cpu_to_le16(*head);

"qemu_head" in vhost_svq_add gets its value from "*head" in
vhost_svq_add_split:

svq->desc_state[qemu_head].elem = elem;
svq->desc_state[qemu_head].ndescs = ndescs;

For packed vq, something similar has to be done. My approach was
to have the index of svq->desc_state match the buffer id in the
tail of the descriptor ring.

The entire chain is written to the descriptor ring in the loop
in vhost_svq_add_packed. I am not sure if the index of
svq->desc_state should be the buffer id or if it should be a
descriptor index ("head_idx" or the index corresponding to the
tail of the chain).

Thanks,
Sahil

[1] https://lists.nongnu.org/archive/html/qemu-devel/2024-06/msg03512.html

Re: [RFC v5 3/7] vhost: Forward descriptors to device via packed SVQ

Posted by Eugenio Perez Martin 4 days ago

On Thu, Mar 27, 2025 at 7:42 PM Sahil Siddiq <icegambit91@gmail.com> wrote:
>
> Hi,
>
> On 3/26/25 1:33 PM, Eugenio Perez Martin wrote:
> > On Mon, Mar 24, 2025 at 3:14 PM Sahil Siddiq <icegambit91@gmail.com> wrote:
> >> On 3/24/25 7:29 PM, Sahil Siddiq wrote:
> >>> Implement the insertion of available buffers in the descriptor area of
> >>> packed shadow virtqueues. It takes into account descriptor chains, but
> >>> does not consider indirect descriptors.
> >>>
> >>> Enable the packed SVQ to forward the descriptors to the device.
> >>>
> >>> Signed-off-by: Sahil Siddiq <sahilcdq@proton.me>
> >>> ---
> >>> Changes from v4 -> v5:
> >>> - This was commit #2 in v4. This has been reordered to commit #3
> >>>     based on review comments.
> >>> - vhost-shadow-virtqueue.c:
> >>>     (vhost_svq_valid_features): Move addition of enums to commit #6
> >>>     based on review comments.
> >>>     (vhost_svq_add_packed): Set head_idx to buffer id instead of vring's
> >>>     index.
> >>>     (vhost_svq_kick): Split into vhost_svq_kick_split and
> >>>     vhost_svq_kick_packed.
> >>>     (vhost_svq_add): Use new vhost_svq_kick_* functions.
> >>>
> >>>    hw/virtio/vhost-shadow-virtqueue.c | 117 +++++++++++++++++++++++++++--
> >>>    1 file changed, 112 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> >>> index 4f74ad402a..6e16cd4bdf 100644
> >>> --- a/hw/virtio/vhost-shadow-virtqueue.c
> >>> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> >>> @@ -193,10 +193,83 @@ static void vhost_svq_add_split(VhostShadowVirtqueue *svq,
> >>>        /* Update the avail index after write the descriptor */
> >>>        smp_wmb();
> >>>        avail->idx = cpu_to_le16(svq->shadow_avail_idx);
> >>> +}
> >>> +
> >>> +/**
> >>> + * Write descriptors to SVQ packed vring
> >>> + *
> >>> + * @svq: The shadow virtqueue
> >>> + * @out_sg: The iovec to the guest
> >>> + * @out_num: Outgoing iovec length
> >>> + * @in_sg: The iovec from the guest
> >>> + * @in_num: Incoming iovec length
> >>> + * @sgs: Cache for hwaddr
> >>> + * @head: Saves current free_head
> >>> + */
> >>> +static void vhost_svq_add_packed(VhostShadowVirtqueue *svq,
> >>> +                                 const struct iovec *out_sg, size_t out_num,
> >>> +                                 const struct iovec *in_sg, size_t in_num,
> >>> +                                 hwaddr *sgs, unsigned *head)
> >>> +{
> >>> +    uint16_t id, curr, i, head_flags = 0, head_idx;
> >>> +    size_t num = out_num + in_num;
> >>> +    unsigned n;
> >>> +
> >>> +    struct vring_packed_desc *descs = svq->vring_packed.vring.desc;
> >>> +
> >>> +    head_idx = svq->vring_packed.next_avail_idx;
> >>
> >> Since "svq->vring_packed.next_avail_idx" is part of QEMU internals and not
> >> stored in guest memory, no endianness conversion is required here, right?
> >>
> >
> > Right!
>
> Understood.
>
> >>> +    i = head_idx;
> >>> +    id = svq->free_head;
> >>> +    curr = id;
> >>> +    *head = id;
> >>
> >> Should head be the buffer id or the idx of the descriptor ring where the
> >> first descriptor of a descriptor chain is inserted?
> >>
> >
> > The buffer id of the *last* descriptor of a chain. See "2.8.6 Next
> > Flag: Descriptor Chaining" at [1].
>
> Ah, yes. The second half of my question in incorrect.
>
> The tail descriptor of the chain includes the buffer id. In this implementation
> we place the same tail buffer id in other locations of the descriptor ring since
> they will be ignored anyway [1].
>
> The explanation below frames my query better.
>
> >>> +    /* Write descriptors to SVQ packed vring */
> >>> +    for (n = 0; n < num; n++) {
> >>> +        uint16_t flags = cpu_to_le16(svq->vring_packed.avail_used_flags |
> >>> +                                     (n < out_num ? 0 : VRING_DESC_F_WRITE) |
> >>> +                                     (n + 1 == num ? 0 : VRING_DESC_F_NEXT));
> >>> +        if (i == head_idx) {
> >>> +            head_flags = flags;
> >>> +        } else {
> >>> +            descs[i].flags = flags;
> >>> +        }
> >>> +
> >>> +        descs[i].addr = cpu_to_le64(sgs[n]);
> >>> +        descs[i].id = id;
> >>> +        if (n < out_num) {
> >>> +            descs[i].len = cpu_to_le32(out_sg[n].iov_len);
> >>> +        } else {
> >>> +            descs[i].len = cpu_to_le32(in_sg[n - out_num].iov_len);
> >>> +        }
> >>> +
> >>> +        curr = cpu_to_le16(svq->desc_next[curr]);
> >>> +
> >>> +        if (++i >= svq->vring_packed.vring.num) {
> >>> +            i = 0;
> >>> +            svq->vring_packed.avail_used_flags ^=
> >>> +                1 << VRING_PACKED_DESC_F_AVAIL |
> >>> +                1 << VRING_PACKED_DESC_F_USED;
> >>> +        }
> >>> +    }
> >>>
> >>> +    if (i <= head_idx) {
> >>> +        svq->vring_packed.avail_wrap_counter ^= 1;
> >>> +    }
> >>> +
> >>> +    svq->vring_packed.next_avail_idx = i;
> >>> +    svq->shadow_avail_idx = i;
> >>> +    svq->free_head = curr;
> >>> +
> >>> +    /*
> >>> +     * A driver MUST NOT make the first descriptor in the list
> >>> +     * available before all subsequent descriptors comprising
> >>> +     * the list are made available.
> >>> +     */
> >>> +    smp_wmb();
> >>> +    svq->vring_packed.vring.desc[head_idx].flags = head_flags;
> >>>    }
> >>>
> >>> -static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> >>> +static void vhost_svq_kick_split(VhostShadowVirtqueue *svq)
> >>>    {
> >>>        bool needs_kick;
> >>>
> >>> @@ -209,7 +282,8 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> >>>        if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >>>            uint16_t avail_event = le16_to_cpu(
> >>>                    *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]));
> >>> -        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
> >>> +        needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx,
> >>> +                     svq->shadow_avail_idx - 1);
> >>>        } else {
> >>>            needs_kick =
> >>>                    !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY));
> >>> @@ -222,6 +296,30 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
> >>>        event_notifier_set(&svq->hdev_kick);
> >>>    }
> >>>
> >>> +static void vhost_svq_kick_packed(VhostShadowVirtqueue *svq)
> >>> +{
> >>> +    bool needs_kick;
> >>> +
> >>> +    /*
> >>> +     * We need to expose the available array entries before checking
> >>> +     * notification suppressions.
> >>> +     */
> >>> +    smp_mb();
> >>> +
> >>> +    if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
> >>> +        return;
> >>> +    } else {
> >>> +        needs_kick = (svq->vring_packed.vring.device->flags !=
> >>> +                      cpu_to_le16(VRING_PACKED_EVENT_FLAG_DISABLE));
> >>> +    }
> >>> +
> >>> +    if (!needs_kick) {
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    event_notifier_set(&svq->hdev_kick);
> >>> +}
> >>> +
> >>>    /**
> >>>     * Add an element to a SVQ.
> >>>     *
> >>> @@ -258,13 +356,22 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
> >>>            return -EINVAL;
> >>>        }
> >>>
> >>> -    vhost_svq_add_split(svq, out_sg, out_num, in_sg,
> >>> -                        in_num, sgs, &qemu_head);
> >>> +    if (svq->is_packed) {
> >>> +        vhost_svq_add_packed(svq, out_sg, out_num, in_sg,
> >>> +                             in_num, sgs, &qemu_head);
> >>> +    } else {
> >>> +        vhost_svq_add_split(svq, out_sg, out_num, in_sg,
> >>> +                            in_num, sgs, &qemu_head);
> >>> +    }
> >>>
> >>>        svq->num_free -= ndescs;
> >>>        svq->desc_state[qemu_head].elem = elem;
> >>>        svq->desc_state[qemu_head].ndescs = ndescs;
> >>
> >> *head in vhost_svq_add_packed() is stored in "qemu_head" here.
> >>
> >
> > Sorry I don't get this, can you expand?
>
> Sure. In vhost_svq_add(), after the descriptors have been added
> (either using vhost_svq_add_split or vhost_svq_add_packed),
> VirtQueueElement elem and ndescs are both saved in the
> svq->desc_state array. "elem" and "ndescs" are later used when
> the guest consumes used descriptors from the device in
> vhost_svq_get_buf_(split|packed).
>
> For split vqs, the index of svq->desc where elem and ndescs are
> saved matches the index of the descriptor ring where the head of
> the descriptor ring is placed.
>
> In vhost_svq_add_split:
>
> *head = svq->free_head;
> [...]
> avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
> avail->ring[avail_idx] = cpu_to_le16(*head);
>
> "qemu_head" in vhost_svq_add gets its value from "*head" in
> vhost_svq_add_split:
>
> svq->desc_state[qemu_head].elem = elem;
> svq->desc_state[qemu_head].ndescs = ndescs;
>
> For packed vq, something similar has to be done. My approach was
> to have the index of svq->desc_state match the buffer id in the
> tail of the descriptor ring.
>
> The entire chain is written to the descriptor ring in the loop
> in vhost_svq_add_packed. I am not sure if the index of
> svq->desc_state should be the buffer id or if it should be a
> descriptor index ("head_idx" or the index corresponding to the
> tail of the chain).
>

I think both approaches should be valid. My advice is to follow
Linux's code and let it be the tail descriptor id. This descriptor id
is pushed and popped from vq->free_head in a stack style.

In addition to that, Linux also sets the same id to all the chain
elements. I think this is useful when dealing with bad devices. In
particular, QEMU's packed vq implementation looked at the first
desciptor's id, which is an incorrect behavior.

[RFC v5 1/7] vhost: Refactor vhost_svq_add_split
[RFC v5 2/7] vhost: Data structure changes to support packed vqs
[RFC v5 3/7] vhost: Forward descriptors to device via packed SVQ
[RFC v5 4/7] vdpa: Allocate memory for SVQ and map them to vdpa
[RFC v5 5/7] vhost: Forward descriptors to guest via packed vqs
[RFC v5 6/7] vhost: Validate transport device features for packed vqs
[RFC v5 7/7] vdpa: Support setting vring_base for packed SVQ