[RFC PATCH v7 01/25] vhost: Track descriptor chain in private at SVQ

Eugenio Pérez posted 25 patches 3 years, 10 months ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Jason Wang <jasowang@redhat.com>, "Gonglei (Arei)" <arei.gonglei@huawei.com>, Peter Xu <peterx@redhat.com>, Cornelia Huck <cohuck@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>
There is a newer version of this series
[RFC PATCH v7 01/25] vhost: Track descriptor chain in private at SVQ
Posted by Eugenio Pérez 3 years, 10 months ago
Only the first one of them were properly enqueued back.

While we're at it, harden SVQ: The device could have access to modify
them, and it definitely have access when we implement packed vq. Harden
SVQ maintaining a private copy of the descriptor chain. Other fields
like buffer addresses are already maintained sepparatedly.

Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding")

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 hw/virtio/vhost-shadow-virtqueue.h |  6 ++++++
 hw/virtio/vhost-shadow-virtqueue.c | 27 +++++++++++++++++++++------
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
index e5e24c536d..c132c994e9 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue {
     /* Next VirtQueue element that guest made available */
     VirtQueueElement *next_guest_avail_elem;
 
+    /*
+     * Backup next field for each descriptor so we can recover securely, not
+     * needing to trust the device access.
+     */
+    uint16_t *desc_next;
+
     /* Next head to expose to the device */
     uint16_t shadow_avail_idx;
 
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index b232803d1b..a2531d5874 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
     for (n = 0; n < num; n++) {
         if (more_descs || (n + 1 < num)) {
             descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
+            descs[i].next = cpu_to_le16(svq->desc_next[i]);
         } else {
             descs[i].flags = flags;
         }
@@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
         descs[i].len = cpu_to_le32(iovec[n].iov_len);
 
         last = i;
-        i = cpu_to_le16(descs[i].next);
+        i = cpu_to_le16(svq->desc_next[i]);
     }
 
-    svq->free_head = le16_to_cpu(descs[last].next);
+    svq->free_head = le16_to_cpu(svq->desc_next[last]);
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
@@ -333,13 +334,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
     svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
 }
 
+static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
+                                             uint16_t num, uint16_t i)
+{
+    for (uint16_t j = 0; j < num; ++j) {
+        i = le16_to_cpu(svq->desc_next[i]);
+    }
+
+    return i;
+}
+
 static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
                                            uint32_t *len)
 {
-    vring_desc_t *descs = svq->vring.desc;
     const vring_used_t *used = svq->vring.used;
     vring_used_elem_t used_elem;
-    uint16_t last_used;
+    uint16_t last_used, last_used_chain, num;
 
     if (!vhost_svq_more_used(svq)) {
         return NULL;
@@ -365,7 +375,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
         return NULL;
     }
 
-    descs[used_elem.id].next = svq->free_head;
+    num = svq->ring_id_maps[used_elem.id]->in_num +
+          svq->ring_id_maps[used_elem.id]->out_num;
+    last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
+    svq->desc_next[last_used_chain] = svq->free_head;
     svq->free_head = used_elem.id;
 
     *len = used_elem.len;
@@ -540,8 +553,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
     svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
     memset(svq->vring.used, 0, device_size);
     svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
+    svq->desc_next = g_new0(uint16_t, svq->vring.num);
     for (unsigned i = 0; i < svq->vring.num - 1; i++) {
-        svq->vring.desc[i].next = cpu_to_le16(i + 1);
+        svq->desc_next[i] = cpu_to_le16(i + 1);
     }
 }
 
@@ -574,6 +588,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
         virtqueue_detach_element(svq->vq, next_avail_elem, 0);
     }
     svq->vq = NULL;
+    g_free(svq->desc_next);
     g_free(svq->ring_id_maps);
     qemu_vfree(svq->vring.desc);
     qemu_vfree(svq->vring.used);
-- 
2.27.0


Re: [RFC PATCH v7 01/25] vhost: Track descriptor chain in private at SVQ
Posted by Jason Wang 3 years, 10 months ago
在 2022/4/14 00:31, Eugenio Pérez 写道:
> Only the first one of them were properly enqueued back.


I wonder if it's better to use two patches:

1) using private chain

2) fix the chain issue

Patch looks good itself.

Thanks


>
> While we're at it, harden SVQ: The device could have access to modify
> them, and it definitely have access when we implement packed vq. Harden
> SVQ maintaining a private copy of the descriptor chain. Other fields
> like buffer addresses are already maintained sepparatedly.
>
> Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding")
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>   hw/virtio/vhost-shadow-virtqueue.h |  6 ++++++
>   hw/virtio/vhost-shadow-virtqueue.c | 27 +++++++++++++++++++++------
>   2 files changed, 27 insertions(+), 6 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
> index e5e24c536d..c132c994e9 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue {
>       /* Next VirtQueue element that guest made available */
>       VirtQueueElement *next_guest_avail_elem;
>   
> +    /*
> +     * Backup next field for each descriptor so we can recover securely, not
> +     * needing to trust the device access.
> +     */
> +    uint16_t *desc_next;
> +
>       /* Next head to expose to the device */
>       uint16_t shadow_avail_idx;
>   
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> index b232803d1b..a2531d5874 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
>       for (n = 0; n < num; n++) {
>           if (more_descs || (n + 1 < num)) {
>               descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
> +            descs[i].next = cpu_to_le16(svq->desc_next[i]);
>           } else {
>               descs[i].flags = flags;
>           }
> @@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
>           descs[i].len = cpu_to_le32(iovec[n].iov_len);
>   
>           last = i;
> -        i = cpu_to_le16(descs[i].next);
> +        i = cpu_to_le16(svq->desc_next[i]);
>       }
>   
> -    svq->free_head = le16_to_cpu(descs[last].next);
> +    svq->free_head = le16_to_cpu(svq->desc_next[last]);
>   }
>   
>   static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
> @@ -333,13 +334,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
>       svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
>   }
>   
> +static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
> +                                             uint16_t num, uint16_t i)
> +{
> +    for (uint16_t j = 0; j < num; ++j) {
> +        i = le16_to_cpu(svq->desc_next[i]);
> +    }
> +
> +    return i;
> +}
> +
>   static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
>                                              uint32_t *len)
>   {
> -    vring_desc_t *descs = svq->vring.desc;
>       const vring_used_t *used = svq->vring.used;
>       vring_used_elem_t used_elem;
> -    uint16_t last_used;
> +    uint16_t last_used, last_used_chain, num;
>   
>       if (!vhost_svq_more_used(svq)) {
>           return NULL;
> @@ -365,7 +375,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
>           return NULL;
>       }
>   
> -    descs[used_elem.id].next = svq->free_head;
> +    num = svq->ring_id_maps[used_elem.id]->in_num +
> +          svq->ring_id_maps[used_elem.id]->out_num;
> +    last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
> +    svq->desc_next[last_used_chain] = svq->free_head;
>       svq->free_head = used_elem.id;
>   
>       *len = used_elem.len;
> @@ -540,8 +553,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
>       svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
>       memset(svq->vring.used, 0, device_size);
>       svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
> +    svq->desc_next = g_new0(uint16_t, svq->vring.num);
>       for (unsigned i = 0; i < svq->vring.num - 1; i++) {
> -        svq->vring.desc[i].next = cpu_to_le16(i + 1);
> +        svq->desc_next[i] = cpu_to_le16(i + 1);
>       }
>   }
>   
> @@ -574,6 +588,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
>           virtqueue_detach_element(svq->vq, next_avail_elem, 0);
>       }
>       svq->vq = NULL;
> +    g_free(svq->desc_next);
>       g_free(svq->ring_id_maps);
>       qemu_vfree(svq->vring.desc);
>       qemu_vfree(svq->vring.used);


Re: [RFC PATCH v7 01/25] vhost: Track descriptor chain in private at SVQ
Posted by Eugenio Perez Martin 3 years, 9 months ago
On Thu, Apr 14, 2022 at 5:48 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2022/4/14 00:31, Eugenio Pérez 写道:
> > Only the first one of them were properly enqueued back.
>
>
> I wonder if it's better to use two patches:
>
> 1) using private chain
>
> 2) fix the chain issue
>
> Patch looks good itself.
>
> Thanks
>

Sure, it can be done that way for the next version.

Thanks!

>
> >
> > While we're at it, harden SVQ: The device could have access to modify
> > them, and it definitely have access when we implement packed vq. Harden
> > SVQ maintaining a private copy of the descriptor chain. Other fields
> > like buffer addresses are already maintained sepparatedly.
> >
> > Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding")
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> >   hw/virtio/vhost-shadow-virtqueue.h |  6 ++++++
> >   hw/virtio/vhost-shadow-virtqueue.c | 27 +++++++++++++++++++++------
> >   2 files changed, 27 insertions(+), 6 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
> > index e5e24c536d..c132c994e9 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > @@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue {
> >       /* Next VirtQueue element that guest made available */
> >       VirtQueueElement *next_guest_avail_elem;
> >
> > +    /*
> > +     * Backup next field for each descriptor so we can recover securely, not
> > +     * needing to trust the device access.
> > +     */
> > +    uint16_t *desc_next;
> > +
> >       /* Next head to expose to the device */
> >       uint16_t shadow_avail_idx;
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> > index b232803d1b..a2531d5874 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
> >       for (n = 0; n < num; n++) {
> >           if (more_descs || (n + 1 < num)) {
> >               descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
> > +            descs[i].next = cpu_to_le16(svq->desc_next[i]);
> >           } else {
> >               descs[i].flags = flags;
> >           }
> > @@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
> >           descs[i].len = cpu_to_le32(iovec[n].iov_len);
> >
> >           last = i;
> > -        i = cpu_to_le16(descs[i].next);
> > +        i = cpu_to_le16(svq->desc_next[i]);
> >       }
> >
> > -    svq->free_head = le16_to_cpu(descs[last].next);
> > +    svq->free_head = le16_to_cpu(svq->desc_next[last]);
> >   }
> >
> >   static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
> > @@ -333,13 +334,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
> >       svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
> >   }
> >
> > +static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
> > +                                             uint16_t num, uint16_t i)
> > +{
> > +    for (uint16_t j = 0; j < num; ++j) {
> > +        i = le16_to_cpu(svq->desc_next[i]);
> > +    }
> > +
> > +    return i;
> > +}
> > +
> >   static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
> >                                              uint32_t *len)
> >   {
> > -    vring_desc_t *descs = svq->vring.desc;
> >       const vring_used_t *used = svq->vring.used;
> >       vring_used_elem_t used_elem;
> > -    uint16_t last_used;
> > +    uint16_t last_used, last_used_chain, num;
> >
> >       if (!vhost_svq_more_used(svq)) {
> >           return NULL;
> > @@ -365,7 +375,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
> >           return NULL;
> >       }
> >
> > -    descs[used_elem.id].next = svq->free_head;
> > +    num = svq->ring_id_maps[used_elem.id]->in_num +
> > +          svq->ring_id_maps[used_elem.id]->out_num;
> > +    last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
> > +    svq->desc_next[last_used_chain] = svq->free_head;
> >       svq->free_head = used_elem.id;
> >
> >       *len = used_elem.len;
> > @@ -540,8 +553,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
> >       svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
> >       memset(svq->vring.used, 0, device_size);
> >       svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
> > +    svq->desc_next = g_new0(uint16_t, svq->vring.num);
> >       for (unsigned i = 0; i < svq->vring.num - 1; i++) {
> > -        svq->vring.desc[i].next = cpu_to_le16(i + 1);
> > +        svq->desc_next[i] = cpu_to_le16(i + 1);
> >       }
> >   }
> >
> > @@ -574,6 +588,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
> >           virtqueue_detach_element(svq->vq, next_avail_elem, 0);
> >       }
> >       svq->vq = NULL;
> > +    g_free(svq->desc_next);
> >       g_free(svq->ring_id_maps);
> >       qemu_vfree(svq->vring.desc);
> >       qemu_vfree(svq->vring.used);
>