[PATCH v4 16/19] vdpa: Buffer CVQ support on shadow virtqueue

Eugenio Pérez posted 19 patches 3 years, 6 months ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Jason Wang <jasowang@redhat.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>
There is a newer version of this series
[PATCH v4 16/19] vdpa: Buffer CVQ support on shadow virtqueue
Posted by Eugenio Pérez 3 years, 6 months ago
Introduce the control virtqueue support for vDPA shadow virtqueue. This
is needed for advanced networking features like rx filtering.

Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
TOCTOU with the guest's or device's memory every time there is a device
model change.  Otherwise, the guest could change the memory content in
the time between qemu and the device read it.

To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
implemented.  If the virtio-net driver changes MAC the virtio-net device
model will be updated with the new one, and a rx filtering change event
will be raised.

More cvq commands could be added here straightforwardly but they have
not been tested.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 net/vhost-vdpa.c | 211 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 204 insertions(+), 7 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 2e3b6b10d8..3915b148c4 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
     NetClientState nc;
     struct vhost_vdpa vhost_vdpa;
     VHostNetState *vhost_net;
+
+    /* Control commands shadow buffers */
+    void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
     bool started;
 } VhostVDPAState;
 
@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
 {
     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
 
+    qemu_vfree(s->cvq_cmd_out_buffer);
+    qemu_vfree(s->cvq_cmd_in_buffer);
     if (s->vhost_net) {
         vhost_net_cleanup(s->vhost_net);
         g_free(s->vhost_net);
@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
         .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
+{
+    VhostIOVATree *tree = v->iova_tree;
+    DMAMap needle = {
+        /*
+         * No need to specify size or to look for more translations since
+         * this contiguous chunk was allocated by us.
+         */
+        .translated_addr = (hwaddr)(uintptr_t)addr,
+    };
+    const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
+    int r;
+
+    if (unlikely(!map)) {
+        error_report("Cannot locate expected map");
+        return;
+    }
+
+    r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
+    if (unlikely(r != 0)) {
+        error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
+    }
+
+    vhost_iova_tree_remove(tree, map);
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
+{
+    /*
+     * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
+     * In buffer is always 1 byte, so it should fit here
+     */
+    return sizeof(struct virtio_net_ctrl_hdr) +
+           2 * sizeof(struct virtio_net_ctrl_mac) +
+           MAC_TABLE_ENTRIES * ETH_ALEN;
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
+{
+    return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
+}
+
+/** Copy and map a guest buffer. */
+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
+                                   const struct iovec *out_data,
+                                   size_t out_num, size_t data_len, void *buf,
+                                   size_t *written, bool write)
+{
+    DMAMap map = {};
+    int r;
+
+    if (unlikely(!data_len)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
+                      __func__, write ? "in" : "out");
+        return false;
+    }
+
+    *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
+    map.translated_addr = (hwaddr)(uintptr_t)buf;
+    map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
+    map.perm = write ? IOMMU_RW : IOMMU_RO,
+    r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
+    if (unlikely(r != IOVA_OK)) {
+        error_report("Cannot map injected element");
+        return false;
+    }
+
+    r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
+                           !write);
+    if (unlikely(r < 0)) {
+        goto dma_map_err;
+    }
+
+    return true;
+
+dma_map_err:
+    vhost_iova_tree_remove(v->iova_tree, &map);
+    return false;
+}
+
 /**
- * Forward buffer for the moment.
+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC
+ *
+ * @iov: [0] is the out buffer, [1] is the in one
+ */
+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
+                                        VirtQueueElement *elem,
+                                        struct iovec *iov)
+{
+    size_t in_copied;
+    bool ok;
+
+    iov[0].iov_base = s->cvq_cmd_out_buffer;
+    ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
+                                vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
+                                &iov[0].iov_len, false);
+    if (unlikely(!ok)) {
+        return false;
+    }
+
+    iov[1].iov_base = s->cvq_cmd_in_buffer;
+    ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
+                                sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
+                                &in_copied, true);
+    if (unlikely(!ok)) {
+        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
+        return false;
+    }
+
+    iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
+    return true;
+}
+
+/**
+ * Do not forward commands not supported by SVQ. Otherwise, the device could
+ * accept it and qemu would not know how to update the device model.
+ */
+static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
+                                            size_t out_num)
+{
+    struct virtio_net_ctrl_hdr ctrl;
+    size_t n;
+
+    n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
+    if (unlikely(n < sizeof(ctrl))) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: invalid legnth of out buffer %zu\n", __func__, n);
+        return false;
+    }
+
+    switch (ctrl.class) {
+    case VIRTIO_NET_CTRL_MAC:
+        switch (ctrl.cmd) {
+        case VIRTIO_NET_CTRL_MAC_ADDR_SET:
+            return true;
+        default:
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
+                          __func__, ctrl.cmd);
+        };
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
+                      __func__, ctrl.class);
+    };
+
+    return false;
+}
+
+/**
+ * Validate and copy control virtqueue commands.
+ *
+ * Following QEMU guidelines, we offer a copy of the buffers to the device to
+ * prevent TOCTOU bugs.
  */
 static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
                                             VirtQueueElement *elem,
                                             void *opaque)
 {
-    unsigned int n = elem->out_num + elem->in_num;
-    g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
+    VhostVDPAState *s = opaque;
     size_t in_len, dev_written;
     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+    /* out and in buffers sent to the device */
+    struct iovec dev_buffers[2] = {
+        { .iov_base = s->cvq_cmd_out_buffer },
+        { .iov_base = s->cvq_cmd_in_buffer },
+    };
+    /* in buffer used for device model */
+    const struct iovec in = {
+        .iov_base = &status,
+        .iov_len = sizeof(status),
+    };
     int r;
+    bool ok;
 
-    memcpy(dev_buffers, elem->out_sg, elem->out_num);
-    memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
+    ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
+    if (unlikely(!ok)) {
+        goto out;
+    }
 
-    r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
-                      elem->in_num, elem);
+    ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
+    if (unlikely(!ok)) {
+        goto out;
+    }
+
+    r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
     if (unlikely(r != 0)) {
         if (unlikely(r == -ENOSPC)) {
             qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
@@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
     dev_written = vhost_svq_poll(svq);
     if (unlikely(dev_written < sizeof(status))) {
         error_report("Insufficient written data (%zu)", dev_written);
+        goto out;
+    }
+
+    memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
+    if (status != VIRTIO_NET_OK) {
+        goto out;
+    }
+
+    status = VIRTIO_NET_ERR;
+    virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
+    if (status != VIRTIO_NET_OK) {
+        error_report("Bad CVQ processing in model");
     }
 
 out:
@@ -234,6 +418,12 @@ out:
     }
     vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
     g_free(elem);
+    if (dev_buffers[0].iov_base) {
+        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
+    }
+    if (dev_buffers[1].iov_base) {
+        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
+    }
     return r;
 }
 
@@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
     s->vhost_vdpa.device_fd = vdpa_device_fd;
     s->vhost_vdpa.index = queue_pair_index;
     if (!is_datapath) {
+        s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
+                                            vhost_vdpa_net_cvq_cmd_page_len());
+        memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
+        s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(),
+                                            vhost_vdpa_net_cvq_cmd_page_len());
+        memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
+
         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
         s->vhost_vdpa.shadow_vq_ops_opaque = s;
     }
-- 
2.31.1


Re: [PATCH v4 16/19] vdpa: Buffer CVQ support on shadow virtqueue
Posted by Jason Wang 3 years, 6 months ago
在 2022/7/18 18:29, Eugenio Pérez 写道:
> Introduce the control virtqueue support for vDPA shadow virtqueue. This
> is needed for advanced networking features like rx filtering.
>
> Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
> TOCTOU with the guest's or device's memory every time there is a device
> model change.  Otherwise, the guest could change the memory content in
> the time between qemu and the device read it.
>
> To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
> implemented.  If the virtio-net driver changes MAC the virtio-net device
> model will be updated with the new one, and a rx filtering change event
> will be raised.
>
> More cvq commands could be added here straightforwardly but they have
> not been tested.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>   net/vhost-vdpa.c | 211 +++++++++++++++++++++++++++++++++++++++++++++--
>   1 file changed, 204 insertions(+), 7 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 2e3b6b10d8..3915b148c4 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
>       NetClientState nc;
>       struct vhost_vdpa vhost_vdpa;
>       VHostNetState *vhost_net;
> +
> +    /* Control commands shadow buffers */
> +    void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
>       bool started;
>   } VhostVDPAState;
>   
> @@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
>   {
>       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
>   
> +    qemu_vfree(s->cvq_cmd_out_buffer);
> +    qemu_vfree(s->cvq_cmd_in_buffer);
>       if (s->vhost_net) {
>           vhost_net_cleanup(s->vhost_net);
>           g_free(s->vhost_net);
> @@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
>           .check_peer_type = vhost_vdpa_check_peer_type,
>   };
>   
> +static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
> +{
> +    VhostIOVATree *tree = v->iova_tree;
> +    DMAMap needle = {
> +        /*
> +         * No need to specify size or to look for more translations since
> +         * this contiguous chunk was allocated by us.
> +         */
> +        .translated_addr = (hwaddr)(uintptr_t)addr,
> +    };
> +    const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
> +    int r;
> +
> +    if (unlikely(!map)) {
> +        error_report("Cannot locate expected map");
> +        return;
> +    }
> +
> +    r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
> +    if (unlikely(r != 0)) {
> +        error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
> +    }
> +
> +    vhost_iova_tree_remove(tree, map);
> +}
> +
> +static size_t vhost_vdpa_net_cvq_cmd_len(void)
> +{
> +    /*
> +     * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
> +     * In buffer is always 1 byte, so it should fit here
> +     */
> +    return sizeof(struct virtio_net_ctrl_hdr) +
> +           2 * sizeof(struct virtio_net_ctrl_mac) +
> +           MAC_TABLE_ENTRIES * ETH_ALEN;
> +}
> +
> +static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
> +{
> +    return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
> +}
> +
> +/** Copy and map a guest buffer. */
> +static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
> +                                   const struct iovec *out_data,
> +                                   size_t out_num, size_t data_len, void *buf,
> +                                   size_t *written, bool write)
> +{
> +    DMAMap map = {};
> +    int r;
> +
> +    if (unlikely(!data_len)) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
> +                      __func__, write ? "in" : "out");
> +        return false;
> +    }
> +
> +    *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
> +    map.translated_addr = (hwaddr)(uintptr_t)buf;
> +    map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
> +    map.perm = write ? IOMMU_RW : IOMMU_RO,
> +    r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
> +    if (unlikely(r != IOVA_OK)) {
> +        error_report("Cannot map injected element");
> +        return false;
> +    }
> +
> +    r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
> +                           !write);
> +    if (unlikely(r < 0)) {
> +        goto dma_map_err;
> +    }
> +
> +    return true;
> +
> +dma_map_err:
> +    vhost_iova_tree_remove(v->iova_tree, &map);
> +    return false;
> +}
> +
>   /**
> - * Forward buffer for the moment.
> + * Copy the guest element into a dedicated buffer suitable to be sent to NIC
> + *
> + * @iov: [0] is the out buffer, [1] is the in one
> + */
> +static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
> +                                        VirtQueueElement *elem,
> +                                        struct iovec *iov)
> +{
> +    size_t in_copied;
> +    bool ok;
> +
> +    iov[0].iov_base = s->cvq_cmd_out_buffer;
> +    ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
> +                                vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
> +                                &iov[0].iov_len, false);
> +    if (unlikely(!ok)) {
> +        return false;
> +    }
> +
> +    iov[1].iov_base = s->cvq_cmd_in_buffer;
> +    ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
> +                                sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
> +                                &in_copied, true);
> +    if (unlikely(!ok)) {
> +        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
> +        return false;
> +    }
> +
> +    iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
> +    return true;
> +}
> +
> +/**
> + * Do not forward commands not supported by SVQ. Otherwise, the device could
> + * accept it and qemu would not know how to update the device model.
> + */
> +static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
> +                                            size_t out_num)
> +{
> +    struct virtio_net_ctrl_hdr ctrl;
> +    size_t n;
> +
> +    n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
> +    if (unlikely(n < sizeof(ctrl))) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: invalid legnth of out buffer %zu\n", __func__, n);
> +        return false;
> +    }
> +
> +    switch (ctrl.class) {
> +    case VIRTIO_NET_CTRL_MAC:
> +        switch (ctrl.cmd) {
> +        case VIRTIO_NET_CTRL_MAC_ADDR_SET:
> +            return true;
> +        default:
> +            qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
> +                          __func__, ctrl.cmd);
> +        };
> +        break;
> +    default:
> +        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
> +                      __func__, ctrl.class);
> +    };
> +
> +    return false;
> +}
> +
> +/**
> + * Validate and copy control virtqueue commands.
> + *
> + * Following QEMU guidelines, we offer a copy of the buffers to the device to
> + * prevent TOCTOU bugs.
>    */
>   static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
>                                               VirtQueueElement *elem,
>                                               void *opaque)
>   {
> -    unsigned int n = elem->out_num + elem->in_num;
> -    g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
> +    VhostVDPAState *s = opaque;
>       size_t in_len, dev_written;
>       virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> +    /* out and in buffers sent to the device */
> +    struct iovec dev_buffers[2] = {
> +        { .iov_base = s->cvq_cmd_out_buffer },
> +        { .iov_base = s->cvq_cmd_in_buffer },
> +    };
> +    /* in buffer used for device model */
> +    const struct iovec in = {
> +        .iov_base = &status,
> +        .iov_len = sizeof(status),
> +    };
>       int r;


I got this:

FAILED: libcommon.fa.p/net_vhost-vdpa.c.o
cc -m64 -mcx16 -Ilibcommon.fa.p -I../dtc/libfdt -I../slirp 
-I../slirp/src -I/usr/include/capstone -I/usr/include/pixman-1 
-I/usr/include/libpng16 -I/usr/include/libmount -I/usr/include/blkid 
-I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include 
-I/usr/include/gio-unix-2.0 -fdiagnostics-color=auto -Wall -Winvalid-pch 
-Werror -std=gnu11 -O2 -g -isystem /home/devel/git/qemu/linux-headers 
-isystem linux-headers -iquote . -iquote /home/devel/git/qemu -iquote 
/home/devel/git/qemu/include -iquote /home/devel/git/qemu/tcg/i386 
-pthread -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE 
-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes 
-Wredundant-decls -Wundef -Wwrite-strings -Wmissing-prototypes 
-fno-strict-aliasing -fno-common -fwrapv -Wold-style-declaration 
-Wold-style-definition -Wtype-limits -Wformat-security -Wformat-y2k 
-Winit-self -Wignored-qualifiers -Wempty-body -Wnested-externs 
-Wendif-labels -Wexpansion-to-defined -Wimplicit-fallthrough=2 
-Wno-missing-include-dirs -Wno-shift-negative-value -Wno-psabi 
-fstack-protector-strong -fPIE -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=600 
-DNCURSES_WIDECHAR=1 -MD -MQ libcommon.fa.p/net_vhost-vdpa.c.o -MF 
libcommon.fa.p/net_vhost-vdpa.c.o.d -o libcommon.fa.p/net_vhost-vdpa.c.o 
-c ../net/vhost-vdpa.c
../net/vhost-vdpa.c: In function ‘vhost_vdpa_net_handle_ctrl_avail’:
../net/vhost-vdpa.c:427:12: error: ‘r’ may be used uninitialized in this 
function [-Werror=maybe-uninitialized]
   427 |     return r;
       |            ^
cc1: all warnings being treated as errors


> +    bool ok;
>   
> -    memcpy(dev_buffers, elem->out_sg, elem->out_num);
> -    memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
> +    ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);


I think it's good idea to let this function return int instead of a boolean.

Thanks


> +    if (unlikely(!ok)) {
> +        goto out;
> +    }
>   
> -    r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
> -                      elem->in_num, elem);
> +    ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
> +    if (unlikely(!ok)) {
> +        goto out;
> +    }
> +
> +    r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
>       if (unlikely(r != 0)) {
>           if (unlikely(r == -ENOSPC)) {
>               qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
> @@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
>       dev_written = vhost_svq_poll(svq);
>       if (unlikely(dev_written < sizeof(status))) {
>           error_report("Insufficient written data (%zu)", dev_written);
> +        goto out;
> +    }
> +
> +    memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
> +    if (status != VIRTIO_NET_OK) {
> +        goto out;
> +    }
> +
> +    status = VIRTIO_NET_ERR;
> +    virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
> +    if (status != VIRTIO_NET_OK) {
> +        error_report("Bad CVQ processing in model");
>       }
>   
>   out:
> @@ -234,6 +418,12 @@ out:
>       }
>       vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
>       g_free(elem);
> +    if (dev_buffers[0].iov_base) {
> +        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
> +    }
> +    if (dev_buffers[1].iov_base) {
> +        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
> +    }
>       return r;
>   }
>   
> @@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
>       s->vhost_vdpa.device_fd = vdpa_device_fd;
>       s->vhost_vdpa.index = queue_pair_index;
>       if (!is_datapath) {
> +        s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
> +                                            vhost_vdpa_net_cvq_cmd_page_len());
> +        memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
> +        s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(),
> +                                            vhost_vdpa_net_cvq_cmd_page_len());
> +        memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
> +
>           s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
>           s->vhost_vdpa.shadow_vq_ops_opaque = s;
>       }


Re: [PATCH v4 16/19] vdpa: Buffer CVQ support on shadow virtqueue
Posted by Eugenio Perez Martin 3 years, 6 months ago
On Tue, Jul 19, 2022 at 4:39 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2022/7/18 18:29, Eugenio Pérez 写道:
> > Introduce the control virtqueue support for vDPA shadow virtqueue. This
> > is needed for advanced networking features like rx filtering.
> >
> > Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
> > TOCTOU with the guest's or device's memory every time there is a device
> > model change.  Otherwise, the guest could change the memory content in
> > the time between qemu and the device read it.
> >
> > To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
> > implemented.  If the virtio-net driver changes MAC the virtio-net device
> > model will be updated with the new one, and a rx filtering change event
> > will be raised.
> >
> > More cvq commands could be added here straightforwardly but they have
> > not been tested.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> >   net/vhost-vdpa.c | 211 +++++++++++++++++++++++++++++++++++++++++++++--
> >   1 file changed, 204 insertions(+), 7 deletions(-)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 2e3b6b10d8..3915b148c4 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
> >       NetClientState nc;
> >       struct vhost_vdpa vhost_vdpa;
> >       VHostNetState *vhost_net;
> > +
> > +    /* Control commands shadow buffers */
> > +    void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
> >       bool started;
> >   } VhostVDPAState;
> >
> > @@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
> >   {
> >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> >
> > +    qemu_vfree(s->cvq_cmd_out_buffer);
> > +    qemu_vfree(s->cvq_cmd_in_buffer);
> >       if (s->vhost_net) {
> >           vhost_net_cleanup(s->vhost_net);
> >           g_free(s->vhost_net);
> > @@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
> >           .check_peer_type = vhost_vdpa_check_peer_type,
> >   };
> >
> > +static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
> > +{
> > +    VhostIOVATree *tree = v->iova_tree;
> > +    DMAMap needle = {
> > +        /*
> > +         * No need to specify size or to look for more translations since
> > +         * this contiguous chunk was allocated by us.
> > +         */
> > +        .translated_addr = (hwaddr)(uintptr_t)addr,
> > +    };
> > +    const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
> > +    int r;
> > +
> > +    if (unlikely(!map)) {
> > +        error_report("Cannot locate expected map");
> > +        return;
> > +    }
> > +
> > +    r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
> > +    if (unlikely(r != 0)) {
> > +        error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
> > +    }
> > +
> > +    vhost_iova_tree_remove(tree, map);
> > +}
> > +
> > +static size_t vhost_vdpa_net_cvq_cmd_len(void)
> > +{
> > +    /*
> > +     * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
> > +     * In buffer is always 1 byte, so it should fit here
> > +     */
> > +    return sizeof(struct virtio_net_ctrl_hdr) +
> > +           2 * sizeof(struct virtio_net_ctrl_mac) +
> > +           MAC_TABLE_ENTRIES * ETH_ALEN;
> > +}
> > +
> > +static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
> > +{
> > +    return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
> > +}
> > +
> > +/** Copy and map a guest buffer. */
> > +static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
> > +                                   const struct iovec *out_data,
> > +                                   size_t out_num, size_t data_len, void *buf,
> > +                                   size_t *written, bool write)
> > +{
> > +    DMAMap map = {};
> > +    int r;
> > +
> > +    if (unlikely(!data_len)) {
> > +        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
> > +                      __func__, write ? "in" : "out");
> > +        return false;
> > +    }
> > +
> > +    *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
> > +    map.translated_addr = (hwaddr)(uintptr_t)buf;
> > +    map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
> > +    map.perm = write ? IOMMU_RW : IOMMU_RO,
> > +    r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
> > +    if (unlikely(r != IOVA_OK)) {
> > +        error_report("Cannot map injected element");
> > +        return false;
> > +    }
> > +
> > +    r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
> > +                           !write);
> > +    if (unlikely(r < 0)) {
> > +        goto dma_map_err;
> > +    }
> > +
> > +    return true;
> > +
> > +dma_map_err:
> > +    vhost_iova_tree_remove(v->iova_tree, &map);
> > +    return false;
> > +}
> > +
> >   /**
> > - * Forward buffer for the moment.
> > + * Copy the guest element into a dedicated buffer suitable to be sent to NIC
> > + *
> > + * @iov: [0] is the out buffer, [1] is the in one
> > + */
> > +static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
> > +                                        VirtQueueElement *elem,
> > +                                        struct iovec *iov)
> > +{
> > +    size_t in_copied;
> > +    bool ok;
> > +
> > +    iov[0].iov_base = s->cvq_cmd_out_buffer;
> > +    ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
> > +                                vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
> > +                                &iov[0].iov_len, false);
> > +    if (unlikely(!ok)) {
> > +        return false;
> > +    }
> > +
> > +    iov[1].iov_base = s->cvq_cmd_in_buffer;
> > +    ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
> > +                                sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
> > +                                &in_copied, true);
> > +    if (unlikely(!ok)) {
> > +        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
> > +        return false;
> > +    }
> > +
> > +    iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
> > +    return true;
> > +}
> > +
> > +/**
> > + * Do not forward commands not supported by SVQ. Otherwise, the device could
> > + * accept it and qemu would not know how to update the device model.
> > + */
> > +static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
> > +                                            size_t out_num)
> > +{
> > +    struct virtio_net_ctrl_hdr ctrl;
> > +    size_t n;
> > +
> > +    n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
> > +    if (unlikely(n < sizeof(ctrl))) {
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> > +                      "%s: invalid legnth of out buffer %zu\n", __func__, n);
> > +        return false;
> > +    }
> > +
> > +    switch (ctrl.class) {
> > +    case VIRTIO_NET_CTRL_MAC:
> > +        switch (ctrl.cmd) {
> > +        case VIRTIO_NET_CTRL_MAC_ADDR_SET:
> > +            return true;
> > +        default:
> > +            qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
> > +                          __func__, ctrl.cmd);
> > +        };
> > +        break;
> > +    default:
> > +        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
> > +                      __func__, ctrl.class);
> > +    };
> > +
> > +    return false;
> > +}
> > +
> > +/**
> > + * Validate and copy control virtqueue commands.
> > + *
> > + * Following QEMU guidelines, we offer a copy of the buffers to the device to
> > + * prevent TOCTOU bugs.
> >    */
> >   static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> >                                               VirtQueueElement *elem,
> >                                               void *opaque)
> >   {
> > -    unsigned int n = elem->out_num + elem->in_num;
> > -    g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
> > +    VhostVDPAState *s = opaque;
> >       size_t in_len, dev_written;
> >       virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > +    /* out and in buffers sent to the device */
> > +    struct iovec dev_buffers[2] = {
> > +        { .iov_base = s->cvq_cmd_out_buffer },
> > +        { .iov_base = s->cvq_cmd_in_buffer },
> > +    };
> > +    /* in buffer used for device model */
> > +    const struct iovec in = {
> > +        .iov_base = &status,
> > +        .iov_len = sizeof(status),
> > +    };
> >       int r;
>
>
> I got this:
>
> FAILED: libcommon.fa.p/net_vhost-vdpa.c.o
> cc -m64 -mcx16 -Ilibcommon.fa.p -I../dtc/libfdt -I../slirp
> -I../slirp/src -I/usr/include/capstone -I/usr/include/pixman-1
> -I/usr/include/libpng16 -I/usr/include/libmount -I/usr/include/blkid
> -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include
> -I/usr/include/gio-unix-2.0 -fdiagnostics-color=auto -Wall -Winvalid-pch
> -Werror -std=gnu11 -O2 -g -isystem /home/devel/git/qemu/linux-headers
> -isystem linux-headers -iquote . -iquote /home/devel/git/qemu -iquote
> /home/devel/git/qemu/include -iquote /home/devel/git/qemu/tcg/i386
> -pthread -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE
> -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes
> -Wredundant-decls -Wundef -Wwrite-strings -Wmissing-prototypes
> -fno-strict-aliasing -fno-common -fwrapv -Wold-style-declaration
> -Wold-style-definition -Wtype-limits -Wformat-security -Wformat-y2k
> -Winit-self -Wignored-qualifiers -Wempty-body -Wnested-externs
> -Wendif-labels -Wexpansion-to-defined -Wimplicit-fallthrough=2
> -Wno-missing-include-dirs -Wno-shift-negative-value -Wno-psabi
> -fstack-protector-strong -fPIE -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=600
> -DNCURSES_WIDECHAR=1 -MD -MQ libcommon.fa.p/net_vhost-vdpa.c.o -MF
> libcommon.fa.p/net_vhost-vdpa.c.o.d -o libcommon.fa.p/net_vhost-vdpa.c.o
> -c ../net/vhost-vdpa.c
> ../net/vhost-vdpa.c: In function ‘vhost_vdpa_net_handle_ctrl_avail’:
> ../net/vhost-vdpa.c:427:12: error: ‘r’ may be used uninitialized in this
> function [-Werror=maybe-uninitialized]
>    427 |     return r;
>        |            ^
> cc1: all warnings being treated as errors
>

Sorry, I rebase with these applied (or half applied) [1].

I'll send a new version based on the current master in a moment.

Thanks!

[1] https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg01986.html

>
> > +    bool ok;
> >
> > -    memcpy(dev_buffers, elem->out_sg, elem->out_num);
> > -    memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
> > +    ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
>
>
> I think it's good idea to let this function return int instead of a boolean.
>
> Thanks
>
>
> > +    if (unlikely(!ok)) {
> > +        goto out;
> > +    }
> >
> > -    r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
> > -                      elem->in_num, elem);
> > +    ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
> > +    if (unlikely(!ok)) {
> > +        goto out;
> > +    }
> > +
> > +    r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
> >       if (unlikely(r != 0)) {
> >           if (unlikely(r == -ENOSPC)) {
> >               qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
> > @@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> >       dev_written = vhost_svq_poll(svq);
> >       if (unlikely(dev_written < sizeof(status))) {
> >           error_report("Insufficient written data (%zu)", dev_written);
> > +        goto out;
> > +    }
> > +
> > +    memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
> > +    if (status != VIRTIO_NET_OK) {
> > +        goto out;
> > +    }
> > +
> > +    status = VIRTIO_NET_ERR;
> > +    virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
> > +    if (status != VIRTIO_NET_OK) {
> > +        error_report("Bad CVQ processing in model");
> >       }
> >
> >   out:
> > @@ -234,6 +418,12 @@ out:
> >       }
> >       vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
> >       g_free(elem);
> > +    if (dev_buffers[0].iov_base) {
> > +        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
> > +    }
> > +    if (dev_buffers[1].iov_base) {
> > +        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
> > +    }
> >       return r;
> >   }
> >
> > @@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
> >       s->vhost_vdpa.device_fd = vdpa_device_fd;
> >       s->vhost_vdpa.index = queue_pair_index;
> >       if (!is_datapath) {
> > +        s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
> > +                                            vhost_vdpa_net_cvq_cmd_page_len());
> > +        memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
> > +        s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(),
> > +                                            vhost_vdpa_net_cvq_cmd_page_len());
> > +        memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
> > +
> >           s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
> >           s->vhost_vdpa.shadow_vq_ops_opaque = s;
> >       }
>
Re: [PATCH v4 16/19] vdpa: Buffer CVQ support on shadow virtqueue
Posted by Eugenio Perez Martin 3 years, 6 months ago
On Tue, Jul 19, 2022 at 7:42 AM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Tue, Jul 19, 2022 at 4:39 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> >
> > 在 2022/7/18 18:29, Eugenio Pérez 写道:
> > > Introduce the control virtqueue support for vDPA shadow virtqueue. This
> > > is needed for advanced networking features like rx filtering.
> > >
> > > Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
> > > TOCTOU with the guest's or device's memory every time there is a device
> > > model change.  Otherwise, the guest could change the memory content in
> > > the time between qemu and the device read it.
> > >
> > > To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
> > > implemented.  If the virtio-net driver changes MAC the virtio-net device
> > > model will be updated with the new one, and a rx filtering change event
> > > will be raised.
> > >
> > > More cvq commands could be added here straightforwardly but they have
> > > not been tested.
> > >
> > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > ---
> > >   net/vhost-vdpa.c | 211 +++++++++++++++++++++++++++++++++++++++++++++--
> > >   1 file changed, 204 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > index 2e3b6b10d8..3915b148c4 100644
> > > --- a/net/vhost-vdpa.c
> > > +++ b/net/vhost-vdpa.c
> > > @@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
> > >       NetClientState nc;
> > >       struct vhost_vdpa vhost_vdpa;
> > >       VHostNetState *vhost_net;
> > > +
> > > +    /* Control commands shadow buffers */
> > > +    void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
> > >       bool started;
> > >   } VhostVDPAState;
> > >
> > > @@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
> > >   {
> > >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > >
> > > +    qemu_vfree(s->cvq_cmd_out_buffer);
> > > +    qemu_vfree(s->cvq_cmd_in_buffer);
> > >       if (s->vhost_net) {
> > >           vhost_net_cleanup(s->vhost_net);
> > >           g_free(s->vhost_net);
> > > @@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
> > >           .check_peer_type = vhost_vdpa_check_peer_type,
> > >   };
> > >
> > > +static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
> > > +{
> > > +    VhostIOVATree *tree = v->iova_tree;
> > > +    DMAMap needle = {
> > > +        /*
> > > +         * No need to specify size or to look for more translations since
> > > +         * this contiguous chunk was allocated by us.
> > > +         */
> > > +        .translated_addr = (hwaddr)(uintptr_t)addr,
> > > +    };
> > > +    const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
> > > +    int r;
> > > +
> > > +    if (unlikely(!map)) {
> > > +        error_report("Cannot locate expected map");
> > > +        return;
> > > +    }
> > > +
> > > +    r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
> > > +    if (unlikely(r != 0)) {
> > > +        error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
> > > +    }
> > > +
> > > +    vhost_iova_tree_remove(tree, map);
> > > +}
> > > +
> > > +static size_t vhost_vdpa_net_cvq_cmd_len(void)
> > > +{
> > > +    /*
> > > +     * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
> > > +     * In buffer is always 1 byte, so it should fit here
> > > +     */
> > > +    return sizeof(struct virtio_net_ctrl_hdr) +
> > > +           2 * sizeof(struct virtio_net_ctrl_mac) +
> > > +           MAC_TABLE_ENTRIES * ETH_ALEN;
> > > +}
> > > +
> > > +static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
> > > +{
> > > +    return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
> > > +}
> > > +
> > > +/** Copy and map a guest buffer. */
> > > +static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
> > > +                                   const struct iovec *out_data,
> > > +                                   size_t out_num, size_t data_len, void *buf,
> > > +                                   size_t *written, bool write)
> > > +{
> > > +    DMAMap map = {};
> > > +    int r;
> > > +
> > > +    if (unlikely(!data_len)) {
> > > +        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
> > > +                      __func__, write ? "in" : "out");
> > > +        return false;
> > > +    }
> > > +
> > > +    *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
> > > +    map.translated_addr = (hwaddr)(uintptr_t)buf;
> > > +    map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
> > > +    map.perm = write ? IOMMU_RW : IOMMU_RO,
> > > +    r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
> > > +    if (unlikely(r != IOVA_OK)) {
> > > +        error_report("Cannot map injected element");
> > > +        return false;
> > > +    }
> > > +
> > > +    r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
> > > +                           !write);
> > > +    if (unlikely(r < 0)) {
> > > +        goto dma_map_err;
> > > +    }
> > > +
> > > +    return true;
> > > +
> > > +dma_map_err:
> > > +    vhost_iova_tree_remove(v->iova_tree, &map);
> > > +    return false;
> > > +}
> > > +
> > >   /**
> > > - * Forward buffer for the moment.
> > > + * Copy the guest element into a dedicated buffer suitable to be sent to NIC
> > > + *
> > > + * @iov: [0] is the out buffer, [1] is the in one
> > > + */
> > > +static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
> > > +                                        VirtQueueElement *elem,
> > > +                                        struct iovec *iov)
> > > +{
> > > +    size_t in_copied;
> > > +    bool ok;
> > > +
> > > +    iov[0].iov_base = s->cvq_cmd_out_buffer;
> > > +    ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
> > > +                                vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
> > > +                                &iov[0].iov_len, false);
> > > +    if (unlikely(!ok)) {
> > > +        return false;
> > > +    }
> > > +
> > > +    iov[1].iov_base = s->cvq_cmd_in_buffer;
> > > +    ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
> > > +                                sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
> > > +                                &in_copied, true);
> > > +    if (unlikely(!ok)) {
> > > +        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
> > > +        return false;
> > > +    }
> > > +
> > > +    iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
> > > +    return true;
> > > +}
> > > +
> > > +/**
> > > + * Do not forward commands not supported by SVQ. Otherwise, the device could
> > > + * accept it and qemu would not know how to update the device model.
> > > + */
> > > +static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
> > > +                                            size_t out_num)
> > > +{
> > > +    struct virtio_net_ctrl_hdr ctrl;
> > > +    size_t n;
> > > +
> > > +    n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
> > > +    if (unlikely(n < sizeof(ctrl))) {
> > > +        qemu_log_mask(LOG_GUEST_ERROR,
> > > +                      "%s: invalid legnth of out buffer %zu\n", __func__, n);
> > > +        return false;
> > > +    }
> > > +
> > > +    switch (ctrl.class) {
> > > +    case VIRTIO_NET_CTRL_MAC:
> > > +        switch (ctrl.cmd) {
> > > +        case VIRTIO_NET_CTRL_MAC_ADDR_SET:
> > > +            return true;
> > > +        default:
> > > +            qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
> > > +                          __func__, ctrl.cmd);
> > > +        };
> > > +        break;
> > > +    default:
> > > +        qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
> > > +                      __func__, ctrl.class);
> > > +    };
> > > +
> > > +    return false;
> > > +}
> > > +
> > > +/**
> > > + * Validate and copy control virtqueue commands.
> > > + *
> > > + * Following QEMU guidelines, we offer a copy of the buffers to the device to
> > > + * prevent TOCTOU bugs.
> > >    */
> > >   static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> > >                                               VirtQueueElement *elem,
> > >                                               void *opaque)
> > >   {
> > > -    unsigned int n = elem->out_num + elem->in_num;
> > > -    g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
> > > +    VhostVDPAState *s = opaque;
> > >       size_t in_len, dev_written;
> > >       virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > > +    /* out and in buffers sent to the device */
> > > +    struct iovec dev_buffers[2] = {
> > > +        { .iov_base = s->cvq_cmd_out_buffer },
> > > +        { .iov_base = s->cvq_cmd_in_buffer },
> > > +    };
> > > +    /* in buffer used for device model */
> > > +    const struct iovec in = {
> > > +        .iov_base = &status,
> > > +        .iov_len = sizeof(status),
> > > +    };
> > >       int r;
> >
> >
> > I got this:
> >
> > FAILED: libcommon.fa.p/net_vhost-vdpa.c.o
> > cc -m64 -mcx16 -Ilibcommon.fa.p -I../dtc/libfdt -I../slirp
> > -I../slirp/src -I/usr/include/capstone -I/usr/include/pixman-1
> > -I/usr/include/libpng16 -I/usr/include/libmount -I/usr/include/blkid
> > -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include
> > -I/usr/include/gio-unix-2.0 -fdiagnostics-color=auto -Wall -Winvalid-pch
> > -Werror -std=gnu11 -O2 -g -isystem /home/devel/git/qemu/linux-headers
> > -isystem linux-headers -iquote . -iquote /home/devel/git/qemu -iquote
> > /home/devel/git/qemu/include -iquote /home/devel/git/qemu/tcg/i386
> > -pthread -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE
> > -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes
> > -Wredundant-decls -Wundef -Wwrite-strings -Wmissing-prototypes
> > -fno-strict-aliasing -fno-common -fwrapv -Wold-style-declaration
> > -Wold-style-definition -Wtype-limits -Wformat-security -Wformat-y2k
> > -Winit-self -Wignored-qualifiers -Wempty-body -Wnested-externs
> > -Wendif-labels -Wexpansion-to-defined -Wimplicit-fallthrough=2
> > -Wno-missing-include-dirs -Wno-shift-negative-value -Wno-psabi
> > -fstack-protector-strong -fPIE -D_DEFAULT_SOURCE -D_XOPEN_SOURCE=600
> > -DNCURSES_WIDECHAR=1 -MD -MQ libcommon.fa.p/net_vhost-vdpa.c.o -MF
> > libcommon.fa.p/net_vhost-vdpa.c.o.d -o libcommon.fa.p/net_vhost-vdpa.c.o
> > -c ../net/vhost-vdpa.c
> > ../net/vhost-vdpa.c: In function ‘vhost_vdpa_net_handle_ctrl_avail’:
> > ../net/vhost-vdpa.c:427:12: error: ‘r’ may be used uninitialized in this
> > function [-Werror=maybe-uninitialized]
> >    427 |     return r;
> >        |            ^
> > cc1: all warnings being treated as errors
> >
>
> Sorry, I rebase with these applied (or half applied) [1].
>
> I'll send a new version based on the current master in a moment.
>

So that was not the issue. With --enable-debug, the optimization
prevents that analysis from running in my compiler.

However, if I enable it there are other areas using uninitialized variables:
../tests/fp/berkeley-testfloat-3/source/slowfloat.c:1016:14: error:
'uiZ' may be used uninitialized in this function
[-Werror=maybe-uninitialized]
     return uZ.f;

I'll send a new version. I'll add a warning in the cover letter for
these unmerged fixes anyway.

Thanks!





> Thanks!
>
> [1] https://lists.nongnu.org/archive/html/qemu-devel/2022-07/msg01986.html
>
> >
> > > +    bool ok;
> > >
> > > -    memcpy(dev_buffers, elem->out_sg, elem->out_num);
> > > -    memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
> > > +    ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
> >
> >
> > I think it's good idea to let this function return int instead of a boolean.
> >
> > Thanks
> >
> >
> > > +    if (unlikely(!ok)) {
> > > +        goto out;
> > > +    }
> > >
> > > -    r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
> > > -                      elem->in_num, elem);
> > > +    ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
> > > +    if (unlikely(!ok)) {
> > > +        goto out;
> > > +    }
> > > +
> > > +    r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
> > >       if (unlikely(r != 0)) {
> > >           if (unlikely(r == -ENOSPC)) {
> > >               qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
> > > @@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> > >       dev_written = vhost_svq_poll(svq);
> > >       if (unlikely(dev_written < sizeof(status))) {
> > >           error_report("Insufficient written data (%zu)", dev_written);
> > > +        goto out;
> > > +    }
> > > +
> > > +    memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
> > > +    if (status != VIRTIO_NET_OK) {
> > > +        goto out;
> > > +    }
> > > +
> > > +    status = VIRTIO_NET_ERR;
> > > +    virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
> > > +    if (status != VIRTIO_NET_OK) {
> > > +        error_report("Bad CVQ processing in model");
> > >       }
> > >
> > >   out:
> > > @@ -234,6 +418,12 @@ out:
> > >       }
> > >       vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
> > >       g_free(elem);
> > > +    if (dev_buffers[0].iov_base) {
> > > +        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
> > > +    }
> > > +    if (dev_buffers[1].iov_base) {
> > > +        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
> > > +    }
> > >       return r;
> > >   }
> > >
> > > @@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
> > >       s->vhost_vdpa.device_fd = vdpa_device_fd;
> > >       s->vhost_vdpa.index = queue_pair_index;
> > >       if (!is_datapath) {
> > > +        s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
> > > +                                            vhost_vdpa_net_cvq_cmd_page_len());
> > > +        memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
> > > +        s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(),
> > > +                                            vhost_vdpa_net_cvq_cmd_page_len());
> > > +        memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
> > > +
> > >           s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
> > >           s->vhost_vdpa.shadow_vq_ops_opaque = s;
> > >       }
> >