[PULL 09/25] vfio-user: implement message receive infrastructure

Cédric Le Goater posted 25 patches 4 months, 3 weeks ago
Maintainers: John Levon <john.levon@nutanix.com>, Thanos Makatos <thanos.makatos@nutanix.com>, Paolo Bonzini <pbonzini@redhat.com>, Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, Thomas Huth <thuth@redhat.com>, Tony Krowiak <akrowiak@linux.ibm.com>, Halil Pasic <pasic@linux.ibm.com>, Jason Herne <jjherne@linux.ibm.com>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>
There is a newer version of this series
[PULL 09/25] vfio-user: implement message receive infrastructure
Posted by Cédric Le Goater 4 months, 3 weeks ago
From: John Levon <john.levon@nutanix.com>

Add the basic implementation for receiving vfio-user messages from the
control socket.

Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250625193012.2316242-4-john.levon@nutanix.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
 meson.build               |   1 +
 hw/vfio-user/protocol.h   |  53 +++++
 hw/vfio-user/proxy.h      |  11 +
 hw/vfio-user/trace.h      |   4 +
 hw/vfio-user/pci.c        |  11 +
 hw/vfio-user/proxy.c      | 411 ++++++++++++++++++++++++++++++++++++++
 hw/vfio-user/trace-events |   8 +
 7 files changed, 499 insertions(+)
 create mode 100644 hw/vfio-user/protocol.h
 create mode 100644 hw/vfio-user/trace.h
 create mode 100644 hw/vfio-user/trace-events

diff --git a/meson.build b/meson.build
index 4676908dbb2006181355785c7262a85df52fc87c..dbc97bfdf7a0d888e270056d52b565c494878646 100644
--- a/meson.build
+++ b/meson.build
@@ -3683,6 +3683,7 @@ if have_system
     'hw/ufs',
     'hw/usb',
     'hw/vfio',
+    'hw/vfio-user',
     'hw/virtio',
     'hw/vmapple',
     'hw/watchdog',
diff --git a/hw/vfio-user/protocol.h b/hw/vfio-user/protocol.h
new file mode 100644
index 0000000000000000000000000000000000000000..4ddfb5f222e60c3bd212110bba6855b81f191d27
--- /dev/null
+++ b/hw/vfio-user/protocol.h
@@ -0,0 +1,53 @@
+#ifndef VFIO_USER_PROTOCOL_H
+#define VFIO_USER_PROTOCOL_H
+
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * Each message has a standard header that describes the command
+ * being sent, which is almost always a VFIO ioctl().
+ *
+ * The header may be followed by command-specific data, such as the
+ * region and offset info for read and write commands.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+typedef struct {
+    uint16_t id;
+    uint16_t command;
+    uint32_t size;
+    uint32_t flags;
+    uint32_t error_reply;
+} VFIOUserHdr;
+
+/* VFIOUserHdr commands */
+enum vfio_user_command {
+    VFIO_USER_VERSION                   = 1,
+    VFIO_USER_DMA_MAP                   = 2,
+    VFIO_USER_DMA_UNMAP                 = 3,
+    VFIO_USER_DEVICE_GET_INFO           = 4,
+    VFIO_USER_DEVICE_GET_REGION_INFO    = 5,
+    VFIO_USER_DEVICE_GET_REGION_IO_FDS  = 6,
+    VFIO_USER_DEVICE_GET_IRQ_INFO       = 7,
+    VFIO_USER_DEVICE_SET_IRQS           = 8,
+    VFIO_USER_REGION_READ               = 9,
+    VFIO_USER_REGION_WRITE              = 10,
+    VFIO_USER_DMA_READ                  = 11,
+    VFIO_USER_DMA_WRITE                 = 12,
+    VFIO_USER_DEVICE_RESET              = 13,
+    VFIO_USER_DIRTY_PAGES               = 14,
+    VFIO_USER_MAX,
+};
+
+/* VFIOUserHdr flags */
+#define VFIO_USER_REQUEST       0x0
+#define VFIO_USER_REPLY         0x1
+#define VFIO_USER_TYPE          0xF
+
+#define VFIO_USER_NO_REPLY      0x10
+#define VFIO_USER_ERROR         0x20
+
+#endif /* VFIO_USER_PROTOCOL_H */
diff --git a/hw/vfio-user/proxy.h b/hw/vfio-user/proxy.h
index a9bce82239d0e9dc40a892e1542488cfffc4f3da..ff553cad9d50ce2ba4867d910adf92836dc068b5 100644
--- a/hw/vfio-user/proxy.h
+++ b/hw/vfio-user/proxy.h
@@ -12,6 +12,9 @@
 #include "io/channel.h"
 #include "io/channel-socket.h"
 
+#include "qemu/sockets.h"
+#include "hw/vfio-user/protocol.h"
+
 typedef struct {
     int send_fds;
     int recv_fds;
@@ -28,6 +31,7 @@ enum msg_type {
 
 typedef struct VFIOUserMsg {
     QTAILQ_ENTRY(VFIOUserMsg) next;
+    VFIOUserHdr *hdr;
     VFIOUserFDs *fds;
     uint32_t rsize;
     uint32_t id;
@@ -67,13 +71,20 @@ typedef struct VFIOUserProxy {
     VFIOUserMsgQ incoming;
     VFIOUserMsgQ outgoing;
     VFIOUserMsg *last_nowait;
+    VFIOUserMsg *part_recv;
+    size_t recv_left;
     enum proxy_state state;
 } VFIOUserProxy;
 
 /* VFIOProxy flags */
 #define VFIO_PROXY_CLIENT        0x1
 
+typedef struct VFIODevice VFIODevice;
+
 VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp);
 void vfio_user_disconnect(VFIOUserProxy *proxy);
+void vfio_user_set_handler(VFIODevice *vbasedev,
+                           void (*handler)(void *opaque, VFIOUserMsg *msg),
+                           void *reqarg);
 
 #endif /* VFIO_USER_PROXY_H */
diff --git a/hw/vfio-user/trace.h b/hw/vfio-user/trace.h
new file mode 100644
index 0000000000000000000000000000000000000000..9cf02d9506dd3d7e5a1d234497761f7c545aca76
--- /dev/null
+++ b/hw/vfio-user/trace.h
@@ -0,0 +1,4 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "trace/trace-hw_vfio_user.h"
diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
index 642421e7916aad01a894ee390c43453dd5297356..bad2829f5cfc76ef5716a67ffb000c8d78ed360d 100644
--- a/hw/vfio-user/pci.c
+++ b/hw/vfio-user/pci.c
@@ -22,6 +22,16 @@ struct VFIOUserPCIDevice {
     SocketAddress *socket;
 };
 
+/*
+ * Incoming request message callback.
+ *
+ * Runs off main loop, so BQL held.
+ */
+static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg)
+{
+
+}
+
 /*
  * Emulated devices don't use host hot reset
  */
@@ -80,6 +90,7 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
         return;
     }
     vbasedev->proxy = proxy;
+    vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev);
 
     /*
      * vfio-user devices are effectively mdevs (don't use a host iommu).
diff --git a/hw/vfio-user/proxy.c b/hw/vfio-user/proxy.c
index bb436c9db91b6ebd90542b4121b62d56bbbdf4ab..349ea2b27c9877993125f4f0f3ca27cb4df9b1b3 100644
--- a/hw/vfio-user/proxy.c
+++ b/hw/vfio-user/proxy.c
@@ -11,15 +11,31 @@
 
 #include "hw/vfio/vfio-device.h"
 #include "hw/vfio-user/proxy.h"
+#include "hw/vfio-user/trace.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/lockable.h"
+#include "qemu/main-loop.h"
 #include "system/iothread.h"
 
 static IOThread *vfio_user_iothread;
 
 static void vfio_user_shutdown(VFIOUserProxy *proxy);
+static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+                                     VFIOUserFDs *fds);
+static VFIOUserFDs *vfio_user_getfds(int numfds);
+static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg);
 
+static void vfio_user_recv(void *opaque);
+static void vfio_user_cb(void *opaque);
+
+static void vfio_user_request(void *opaque);
+
+static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err)
+{
+    hdr->flags |= VFIO_USER_ERROR;
+    hdr->error_reply = err;
+}
 
 /*
  * Functions called by main, CPU, or iothread threads
@@ -32,10 +48,343 @@ static void vfio_user_shutdown(VFIOUserProxy *proxy)
                                    proxy->ctx, NULL, NULL);
 }
 
+static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+                                     VFIOUserFDs *fds)
+{
+    VFIOUserMsg *msg;
+
+    msg = QTAILQ_FIRST(&proxy->free);
+    if (msg != NULL) {
+        QTAILQ_REMOVE(&proxy->free, msg, next);
+    } else {
+        msg = g_malloc0(sizeof(*msg));
+        qemu_cond_init(&msg->cv);
+    }
+
+    msg->hdr = hdr;
+    msg->fds = fds;
+    return msg;
+}
+
+/*
+ * Recycle a message list entry to the free list.
+ */
+static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg)
+{
+    if (msg->type == VFIO_MSG_NONE) {
+        error_printf("vfio_user_recycle - freeing free msg\n");
+        return;
+    }
+
+    /* free msg buffer if no one is waiting to consume the reply */
+    if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) {
+        g_free(msg->hdr);
+        if (msg->fds != NULL) {
+            g_free(msg->fds);
+        }
+    }
+
+    msg->type = VFIO_MSG_NONE;
+    msg->hdr = NULL;
+    msg->fds = NULL;
+    msg->complete = false;
+    QTAILQ_INSERT_HEAD(&proxy->free, msg, next);
+}
+
+static VFIOUserFDs *vfio_user_getfds(int numfds)
+{
+    VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int)));
+
+    fds->fds = (int *)((char *)fds + sizeof(*fds));
+
+    return fds;
+}
+
 /*
  * Functions only called by iothread
  */
 
+/*
+ * Process a received message.
+ */
+static void vfio_user_process(VFIOUserProxy *proxy, VFIOUserMsg *msg,
+                              bool isreply)
+{
+
+    /*
+     * Replies signal a waiter, if none just check for errors
+     * and free the message buffer.
+     *
+     * Requests get queued for the BH.
+     */
+    if (isreply) {
+        msg->complete = true;
+        if (msg->type == VFIO_MSG_WAIT) {
+            qemu_cond_signal(&msg->cv);
+        } else {
+            if (msg->hdr->flags & VFIO_USER_ERROR) {
+                error_printf("vfio_user_process: error reply on async ");
+                error_printf("request command %x error %s\n",
+                             msg->hdr->command,
+                             strerror(msg->hdr->error_reply));
+            }
+            /* youngest nowait msg has been ack'd */
+            if (proxy->last_nowait == msg) {
+                proxy->last_nowait = NULL;
+            }
+            vfio_user_recycle(proxy, msg);
+        }
+    } else {
+        QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next);
+        qemu_bh_schedule(proxy->req_bh);
+    }
+}
+
+/*
+ * Complete a partial message read
+ */
+static int vfio_user_complete(VFIOUserProxy *proxy, Error **errp)
+{
+    VFIOUserMsg *msg = proxy->part_recv;
+    size_t msgleft = proxy->recv_left;
+    bool isreply;
+    char *data;
+    int ret;
+
+    data = (char *)msg->hdr + (msg->hdr->size - msgleft);
+    while (msgleft > 0) {
+        ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
+
+        /* error or would block */
+        if (ret <= 0) {
+            /* try for rest on next iternation */
+            if (ret == QIO_CHANNEL_ERR_BLOCK) {
+                proxy->recv_left = msgleft;
+            }
+            return ret;
+        }
+        trace_vfio_user_recv_read(msg->hdr->id, ret);
+
+        msgleft -= ret;
+        data += ret;
+    }
+
+    /*
+     * Read complete message, process it.
+     */
+    proxy->part_recv = NULL;
+    proxy->recv_left = 0;
+    isreply = (msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REPLY;
+    vfio_user_process(proxy, msg, isreply);
+
+    /* return positive value */
+    return 1;
+}
+
+/*
+ * Receive and process one incoming message.
+ *
+ * For replies, find matching outgoing request and wake any waiters.
+ * For requests, queue in incoming list and run request BH.
+ */
+static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp)
+{
+    VFIOUserMsg *msg = NULL;
+    g_autofree int *fdp = NULL;
+    VFIOUserFDs *reqfds;
+    VFIOUserHdr hdr;
+    struct iovec iov = {
+        .iov_base = &hdr,
+        .iov_len = sizeof(hdr),
+    };
+    bool isreply = false;
+    int i, ret;
+    size_t msgleft, numfds = 0;
+    char *data = NULL;
+    char *buf = NULL;
+
+    /*
+     * Complete any partial reads
+     */
+    if (proxy->part_recv != NULL) {
+        ret = vfio_user_complete(proxy, errp);
+
+        /* still not complete, try later */
+        if (ret == QIO_CHANNEL_ERR_BLOCK) {
+            return ret;
+        }
+
+        if (ret <= 0) {
+            goto fatal;
+        }
+        /* else fall into reading another msg */
+    }
+
+    /*
+     * Read header
+     */
+    ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, 0,
+                                 errp);
+    if (ret == QIO_CHANNEL_ERR_BLOCK) {
+        return ret;
+    }
+
+    /* read error or other side closed connection */
+    if (ret <= 0) {
+        goto fatal;
+    }
+
+    if (ret < sizeof(hdr)) {
+        error_setg(errp, "short read of header");
+        goto fatal;
+    }
+
+    /*
+     * Validate header
+     */
+    if (hdr.size < sizeof(VFIOUserHdr)) {
+        error_setg(errp, "bad header size");
+        goto fatal;
+    }
+    switch (hdr.flags & VFIO_USER_TYPE) {
+    case VFIO_USER_REQUEST:
+        isreply = false;
+        break;
+    case VFIO_USER_REPLY:
+        isreply = true;
+        break;
+    default:
+        error_setg(errp, "unknown message type");
+        goto fatal;
+    }
+    trace_vfio_user_recv_hdr(proxy->sockname, hdr.id, hdr.command, hdr.size,
+                             hdr.flags);
+
+    /*
+     * For replies, find the matching pending request.
+     * For requests, reap incoming FDs.
+     */
+    if (isreply) {
+        QTAILQ_FOREACH(msg, &proxy->pending, next) {
+            if (hdr.id == msg->id) {
+                break;
+            }
+        }
+        if (msg == NULL) {
+            error_setg(errp, "unexpected reply");
+            goto err;
+        }
+        QTAILQ_REMOVE(&proxy->pending, msg, next);
+
+        /*
+         * Process any received FDs
+         */
+        if (numfds != 0) {
+            if (msg->fds == NULL || msg->fds->recv_fds < numfds) {
+                error_setg(errp, "unexpected FDs");
+                goto err;
+            }
+            msg->fds->recv_fds = numfds;
+            memcpy(msg->fds->fds, fdp, numfds * sizeof(int));
+        }
+    } else {
+        if (numfds != 0) {
+            reqfds = vfio_user_getfds(numfds);
+            memcpy(reqfds->fds, fdp, numfds * sizeof(int));
+        } else {
+            reqfds = NULL;
+        }
+    }
+
+    /*
+     * Put the whole message into a single buffer.
+     */
+    if (isreply) {
+        if (hdr.size > msg->rsize) {
+            error_setg(errp, "reply larger than recv buffer");
+            goto err;
+        }
+        *msg->hdr = hdr;
+        data = (char *)msg->hdr + sizeof(hdr);
+    } else {
+        buf = g_malloc0(hdr.size);
+        memcpy(buf, &hdr, sizeof(hdr));
+        data = buf + sizeof(hdr);
+        msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds);
+        msg->type = VFIO_MSG_REQ;
+    }
+
+    /*
+     * Read rest of message.
+     */
+    msgleft = hdr.size - sizeof(hdr);
+    while (msgleft > 0) {
+        ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
+
+        /* prepare to complete read on next iternation */
+        if (ret == QIO_CHANNEL_ERR_BLOCK) {
+            proxy->part_recv = msg;
+            proxy->recv_left = msgleft;
+            return ret;
+        }
+
+        if (ret <= 0) {
+            goto fatal;
+        }
+        trace_vfio_user_recv_read(hdr.id, ret);
+
+        msgleft -= ret;
+        data += ret;
+    }
+
+    vfio_user_process(proxy, msg, isreply);
+    return 0;
+
+    /*
+     * fatal means the other side closed or we don't trust the stream
+     * err means this message is corrupt
+     */
+fatal:
+    vfio_user_shutdown(proxy);
+    proxy->state = VFIO_PROXY_ERROR;
+
+    /* set error if server side closed */
+    if (ret == 0) {
+        error_setg(errp, "server closed socket");
+    }
+
+err:
+    for (i = 0; i < numfds; i++) {
+        close(fdp[i]);
+    }
+    if (isreply && msg != NULL) {
+        /* force an error to keep sending thread from hanging */
+        vfio_user_set_error(msg->hdr, EINVAL);
+        msg->complete = true;
+        qemu_cond_signal(&msg->cv);
+    }
+    return -1;
+}
+
+static void vfio_user_recv(void *opaque)
+{
+    VFIOUserProxy *proxy = opaque;
+
+    QEMU_LOCK_GUARD(&proxy->lock);
+
+    if (proxy->state == VFIO_PROXY_CONNECTED) {
+        Error *local_err = NULL;
+
+        while (vfio_user_recv_one(proxy, &local_err) == 0) {
+            ;
+        }
+
+        if (local_err != NULL) {
+            error_report_err(local_err);
+        }
+    }
+}
+
 static void vfio_user_cb(void *opaque)
 {
     VFIOUserProxy *proxy = opaque;
@@ -51,6 +400,53 @@ static void vfio_user_cb(void *opaque)
  * Functions called by main or CPU threads
  */
 
+/*
+ * Process incoming requests.
+ *
+ * The bus-specific callback has the form:
+ *    request(opaque, msg)
+ * where 'opaque' was specified in vfio_user_set_handler
+ * and 'msg' is the inbound message.
+ *
+ * The callback is responsible for disposing of the message buffer,
+ * usually by re-using it when calling vfio_send_reply or vfio_send_error,
+ * both of which free their message buffer when the reply is sent.
+ *
+ * If the callback uses a new buffer, it needs to free the old one.
+ */
+static void vfio_user_request(void *opaque)
+{
+    VFIOUserProxy *proxy = opaque;
+    VFIOUserMsgQ new, free;
+    VFIOUserMsg *msg, *m1;
+
+    /* reap all incoming */
+    QTAILQ_INIT(&new);
+    WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+        QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) {
+            QTAILQ_REMOVE(&proxy->incoming, msg, next);
+            QTAILQ_INSERT_TAIL(&new, msg, next);
+        }
+    }
+
+    /* process list */
+    QTAILQ_INIT(&free);
+    QTAILQ_FOREACH_SAFE(msg, &new, next, m1) {
+        QTAILQ_REMOVE(&new, msg, next);
+        trace_vfio_user_recv_request(msg->hdr->command);
+        proxy->request(proxy->req_arg, msg);
+        QTAILQ_INSERT_HEAD(&free, msg, next);
+    }
+
+    /* free list */
+    WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+        QTAILQ_FOREACH_SAFE(msg, &free, next, m1) {
+            vfio_user_recycle(proxy, msg);
+        }
+    }
+}
+
+
 static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets =
     QLIST_HEAD_INITIALIZER(vfio_user_sockets);
 
@@ -89,6 +485,7 @@ VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp)
     }
 
     proxy->ctx = iothread_get_aio_context(vfio_user_iothread);
+    proxy->req_bh = qemu_bh_new(vfio_user_request, proxy);
 
     QTAILQ_INIT(&proxy->outgoing);
     QTAILQ_INIT(&proxy->incoming);
@@ -99,6 +496,18 @@ VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp)
     return proxy;
 }
 
+void vfio_user_set_handler(VFIODevice *vbasedev,
+                           void (*handler)(void *opaque, VFIOUserMsg *msg),
+                           void *req_arg)
+{
+    VFIOUserProxy *proxy = vbasedev->proxy;
+
+    proxy->request = handler;
+    proxy->req_arg = req_arg;
+    qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
+                                   vfio_user_recv, NULL, NULL, proxy);
+}
+
 void vfio_user_disconnect(VFIOUserProxy *proxy)
 {
     VFIOUserMsg *r1, *r2;
@@ -114,6 +523,8 @@ void vfio_user_disconnect(VFIOUserProxy *proxy)
     }
     object_unref(OBJECT(proxy->ioc));
     proxy->ioc = NULL;
+    qemu_bh_delete(proxy->req_bh);
+    proxy->req_bh = NULL;
 
     proxy->state = VFIO_PROXY_CLOSING;
     QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) {
diff --git a/hw/vfio-user/trace-events b/hw/vfio-user/trace-events
new file mode 100644
index 0000000000000000000000000000000000000000..ddeb9f4b2f62557c6c7307f61e0888524d158b36
--- /dev/null
+++ b/hw/vfio-user/trace-events
@@ -0,0 +1,8 @@
+# See docs/devel/tracing.rst for syntax documentation.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# common.c
+vfio_user_recv_hdr(const char *name, uint16_t id, uint16_t cmd, uint32_t size, uint32_t flags) " (%s) id 0x%x cmd 0x%x size 0x%x flags 0x%x"
+vfio_user_recv_read(uint16_t id, int read) " id 0x%x read 0x%x"
+vfio_user_recv_request(uint16_t cmd) " command 0x%x"
-- 
2.49.0


Re: [PULL 09/25] vfio-user: implement message receive infrastructure
Posted by Peter Maydell 4 months, 1 week ago
On Thu, 26 Jun 2025 at 08:47, Cédric Le Goater <clg@redhat.com> wrote:
>
> From: John Levon <john.levon@nutanix.com>
>
> Add the basic implementation for receiving vfio-user messages from the
> control socket.
>

Hi; Coverity suggests there are some issues with this code
(CID 1611807, 1611808, 1611809):

> +/*
> + * Receive and process one incoming message.
> + *
> + * For replies, find matching outgoing request and wake any waiters.
> + * For requests, queue in incoming list and run request BH.
> + */
> +static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp)
> +{


> +    /*
> +     * For replies, find the matching pending request.
> +     * For requests, reap incoming FDs.
> +     */
> +    if (isreply) {
> +        QTAILQ_FOREACH(msg, &proxy->pending, next) {
> +            if (hdr.id == msg->id) {
> +                break;
> +            }
> +        }
> +        if (msg == NULL) {
> +            error_setg(errp, "unexpected reply");
> +            goto err;
> +        }
> +        QTAILQ_REMOVE(&proxy->pending, msg, next);
> +
> +        /*
> +         * Process any received FDs
> +         */
> +        if (numfds != 0) {
> +            if (msg->fds == NULL || msg->fds->recv_fds < numfds) {
> +                error_setg(errp, "unexpected FDs");
> +                goto err;
> +            }
> +            msg->fds->recv_fds = numfds;
> +            memcpy(msg->fds->fds, fdp, numfds * sizeof(int));
> +        }
> +    } else {
> +        if (numfds != 0) {
> +            reqfds = vfio_user_getfds(numfds);
> +            memcpy(reqfds->fds, fdp, numfds * sizeof(int));
> +        } else {
> +            reqfds = NULL;
> +        }

Here we allocate memory into reqfds...

> +    }
> +
> +    /*
> +     * Put the whole message into a single buffer.
> +     */
> +    if (isreply) {
> +        if (hdr.size > msg->rsize) {
> +            error_setg(errp, "reply larger than recv buffer");
> +            goto err;
> +        }
> +        *msg->hdr = hdr;
> +        data = (char *)msg->hdr + sizeof(hdr);
> +    } else {
> +        buf = g_malloc0(hdr.size);
> +        memcpy(buf, &hdr, sizeof(hdr));
> +        data = buf + sizeof(hdr);
> +        msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds);
> +        msg->type = VFIO_MSG_REQ;

...and here we allocate memory into msg...

> +    }
> +
> +    /*
> +     * Read rest of message.
> +     */
> +    msgleft = hdr.size - sizeof(hdr);
> +    while (msgleft > 0) {
> +        ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
> +
> +        /* prepare to complete read on next iternation */
> +        if (ret == QIO_CHANNEL_ERR_BLOCK) {
> +            proxy->part_recv = msg;
> +            proxy->recv_left = msgleft;
> +            return ret;
> +        }
> +
> +        if (ret <= 0) {
> +            goto fatal;
> +        }

...but here we may take an error-exit codepath to the 'fatal'
label...

> +        trace_vfio_user_recv_read(hdr.id, ret);
> +
> +        msgleft -= ret;
> +        data += ret;
> +    }
> +
> +    vfio_user_process(proxy, msg, isreply);
> +    return 0;
> +
> +    /*
> +     * fatal means the other side closed or we don't trust the stream
> +     * err means this message is corrupt
> +     */
> +fatal:
> +    vfio_user_shutdown(proxy);
> +    proxy->state = VFIO_PROXY_ERROR;
> +
> +    /* set error if server side closed */
> +    if (ret == 0) {
> +        error_setg(errp, "server closed socket");
> +    }
> +
> +err:
> +    for (i = 0; i < numfds; i++) {
> +        close(fdp[i]);
> +    }
> +    if (isreply && msg != NULL) {
> +        /* force an error to keep sending thread from hanging */
> +        vfio_user_set_error(msg->hdr, EINVAL);
> +        msg->complete = true;
> +        qemu_cond_signal(&msg->cv);
> +    }
> +    return -1;

...and in this error handling codepath we don't do anything
to free either msg or reqfds.

Coverity also wonders if you have missing locking because
the call to qemu_cond_signal() here is the only place
that touches msg->cv without holding a lock. But this one's
a heuristic that may be wrong.

thanks
-- PMM
Re: [PULL 09/25] vfio-user: implement message receive infrastructure
Posted by Peter Maydell 2 weeks, 4 days ago
On Thu, 10 Jul 2025 at 13:44, Peter Maydell <peter.maydell@linaro.org> wrote:
>
> On Thu, 26 Jun 2025 at 08:47, Cédric Le Goater <clg@redhat.com> wrote:
> >
> > From: John Levon <john.levon@nutanix.com>
> >
> > Add the basic implementation for receiving vfio-user messages from the
> > control socket.
> >
>
> Hi; Coverity suggests there are some issues with this code
> (CID 1611807, 1611808, 1611809):

Hi; it looks like 1611807 and 1611808 (the resource leaks)
are still present in this code in current git; would somebody
like to have a look at this?

> > +/*
> > + * Receive and process one incoming message.
> > + *
> > + * For replies, find matching outgoing request and wake any waiters.
> > + * For requests, queue in incoming list and run request BH.
> > + */
> > +static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp)
> > +{
>
>
> > +    /*
> > +     * For replies, find the matching pending request.
> > +     * For requests, reap incoming FDs.
> > +     */
> > +    if (isreply) {
> > +        QTAILQ_FOREACH(msg, &proxy->pending, next) {
> > +            if (hdr.id == msg->id) {
> > +                break;
> > +            }
> > +        }
> > +        if (msg == NULL) {
> > +            error_setg(errp, "unexpected reply");
> > +            goto err;
> > +        }
> > +        QTAILQ_REMOVE(&proxy->pending, msg, next);
> > +
> > +        /*
> > +         * Process any received FDs
> > +         */
> > +        if (numfds != 0) {
> > +            if (msg->fds == NULL || msg->fds->recv_fds < numfds) {
> > +                error_setg(errp, "unexpected FDs");
> > +                goto err;
> > +            }
> > +            msg->fds->recv_fds = numfds;
> > +            memcpy(msg->fds->fds, fdp, numfds * sizeof(int));
> > +        }
> > +    } else {
> > +        if (numfds != 0) {
> > +            reqfds = vfio_user_getfds(numfds);
> > +            memcpy(reqfds->fds, fdp, numfds * sizeof(int));
> > +        } else {
> > +            reqfds = NULL;
> > +        }
>
> Here we allocate memory into reqfds...
>
> > +    }
> > +
> > +    /*
> > +     * Put the whole message into a single buffer.
> > +     */
> > +    if (isreply) {
> > +        if (hdr.size > msg->rsize) {
> > +            error_setg(errp, "reply larger than recv buffer");
> > +            goto err;
> > +        }
> > +        *msg->hdr = hdr;
> > +        data = (char *)msg->hdr + sizeof(hdr);
> > +    } else {
> > +        buf = g_malloc0(hdr.size);
> > +        memcpy(buf, &hdr, sizeof(hdr));
> > +        data = buf + sizeof(hdr);
> > +        msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds);
> > +        msg->type = VFIO_MSG_REQ;
>
> ...and here we allocate memory into msg...
>
> > +    }
> > +
> > +    /*
> > +     * Read rest of message.
> > +     */
> > +    msgleft = hdr.size - sizeof(hdr);
> > +    while (msgleft > 0) {
> > +        ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
> > +
> > +        /* prepare to complete read on next iternation */
> > +        if (ret == QIO_CHANNEL_ERR_BLOCK) {
> > +            proxy->part_recv = msg;
> > +            proxy->recv_left = msgleft;
> > +            return ret;
> > +        }
> > +
> > +        if (ret <= 0) {
> > +            goto fatal;
> > +        }
>
> ...but here we may take an error-exit codepath to the 'fatal'
> label...
>
> > +        trace_vfio_user_recv_read(hdr.id, ret);
> > +
> > +        msgleft -= ret;
> > +        data += ret;
> > +    }
> > +
> > +    vfio_user_process(proxy, msg, isreply);
> > +    return 0;
> > +
> > +    /*
> > +     * fatal means the other side closed or we don't trust the stream
> > +     * err means this message is corrupt
> > +     */
> > +fatal:
> > +    vfio_user_shutdown(proxy);
> > +    proxy->state = VFIO_PROXY_ERROR;
> > +
> > +    /* set error if server side closed */
> > +    if (ret == 0) {
> > +        error_setg(errp, "server closed socket");
> > +    }
> > +
> > +err:
> > +    for (i = 0; i < numfds; i++) {
> > +        close(fdp[i]);
> > +    }
> > +    if (isreply && msg != NULL) {
> > +        /* force an error to keep sending thread from hanging */
> > +        vfio_user_set_error(msg->hdr, EINVAL);
> > +        msg->complete = true;
> > +        qemu_cond_signal(&msg->cv);
> > +    }
> > +    return -1;
>
> ...and in this error handling codepath we don't do anything
> to free either msg or reqfds.

thanks
-- PMM
Re: [PULL 09/25] vfio-user: implement message receive infrastructure
Posted by John Levon 2 weeks, 4 days ago
On Tue, Oct 28, 2025 at 01:35:05PM +0000, Peter Maydell wrote:

> On Thu, 10 Jul 2025 at 13:44, Peter Maydell <peter.maydell@linaro.org> wrote:
> >
> > On Thu, 26 Jun 2025 at 08:47, Cédric Le Goater <clg@redhat.com> wrote:
> > >
> > > From: John Levon <john.levon@nutanix.com>
> > >
> > > Add the basic implementation for receiving vfio-user messages from the
> > > control socket.
> > >
> >
> > Hi; Coverity suggests there are some issues with this code
> > (CID 1611807, 1611808, 1611809):
> 
> Hi; it looks like 1611807 and 1611808 (the resource leaks)
> are still present in this code in current git; would somebody
> like to have a look at this?

Please see https://lore.kernel.org/qemu-devel/aG-4hkfLDEpDsqo6@movementarian.org/

I believe them to be false positives.

regards
john
Re: [PULL 09/25] vfio-user: implement message receive infrastructure
Posted by Peter Maydell 1 week, 5 days ago
On Tue, 28 Oct 2025 at 13:52, John Levon <levon@movementarian.org> wrote:
>
> On Tue, Oct 28, 2025 at 01:35:05PM +0000, Peter Maydell wrote:
>
> > On Thu, 10 Jul 2025 at 13:44, Peter Maydell <peter.maydell@linaro.org> wrote:
> > >
> > > On Thu, 26 Jun 2025 at 08:47, Cédric Le Goater <clg@redhat.com> wrote:
> > > >
> > > > From: John Levon <john.levon@nutanix.com>
> > > >
> > > > Add the basic implementation for receiving vfio-user messages from the
> > > > control socket.
> > > >
> > >
> > > Hi; Coverity suggests there are some issues with this code
> > > (CID 1611807, 1611808, 1611809):
> >
> > Hi; it looks like 1611807 and 1611808 (the resource leaks)
> > are still present in this code in current git; would somebody
> > like to have a look at this?
>
> Please see https://lore.kernel.org/qemu-devel/aG-4hkfLDEpDsqo6@movementarian.org/
>
> I believe them to be false positives.

That email only seems to talk about the locking issue (1611806,
1611809) which we've marked as false-positives in Coverity.
But 1611807 and 1611808 are "failed to free resources" issues:
we allocate memory into reqfds and msg, but in the error
exit path (e.g. if we "goto fatal" because qio_channel_read()
failed) it doesn't look like we ever free that memory.

thanks
-- PMM
Re: [PULL 09/25] vfio-user: implement message receive infrastructure
Posted by John Levon 1 week, 5 days ago
On Mon, Nov 03, 2025 at 11:34:36AM +0000, Peter Maydell wrote:

> > > > Hi; Coverity suggests there are some issues with this code
> > > > (CID 1611807, 1611808, 1611809):
> > >
> > > Hi; it looks like 1611807 and 1611808 (the resource leaks)
> > > are still present in this code in current git; would somebody
> > > like to have a look at this?
> >
> > Please see https://lore.kernel.org/qemu-devel/aG-4hkfLDEpDsqo6@movementarian.org/
> >
> > I believe them to be false positives.
> 
> That email only seems to talk about the locking issue (1611806,
> 1611809) which we've marked as false-positives in Coverity.
> But 1611807 and 1611808 are "failed to free resources" issues:
> we allocate memory into reqfds and msg, but in the error
> exit path (e.g. if we "goto fatal" because qio_channel_read()
> failed) it doesn't look like we ever free that memory.

Sorry, I should have pasted: Here was my reply to Cédric below.

Looking at it with fresh eyes, though, I'm not so sure, at least in the case we
alloc instead of freelist, we need some form of recycle on the error path. The
whole function needs a big refactor for clarity...

> *** CID 1611808:         Resource leaks  (RESOURCE_LEAK)
> /builds/qemu-project/qemu/hw/vfio-user/proxy.c: 411             in vfio_user_recv_one()
> 405         if (isreply && msg != NULL) {
> 406             /* force an error to keep sending thread from hanging */
> 407             vfio_user_set_error(msg->hdr, EINVAL);
> 408             msg->complete = true;
> 409             qemu_cond_signal(&msg->cv);
> 410         }
> > > >     CID 1611808:         Resource leaks  (RESOURCE_LEAK)
> > > >     Variable "reqfds" going out of scope leaks the storage it points to.
> 411         return -1;

vfio_user_getmsg() I think takes ownership of reqfds so this looks OK.
It's hard to say without the full analysis.

> /builds/qemu-project/qemu/hw/vfio-user/proxy.c: 411             in vfio_user_recv_one()
> 405         if (isreply && msg != NULL) {
> 406             /* force an error to keep sending thread from hanging */
> 407             vfio_user_set_error(msg->hdr, EINVAL);
> 408             msg->complete = true;
> 409             qemu_cond_signal(&msg->cv);
> 410         }
> > > >     CID 1611807:         Resource leaks  (RESOURCE_LEAK)
> > > >     Variable "msg" going out of scope leaks the storage it points to.
> 411         return -1;
> 412     }

Same as above.

regards
john
Re: [PULL 09/25] vfio-user: implement message receive infrastructure
Posted by Peter Maydell 1 week, 5 days ago
On Mon, 3 Nov 2025 at 12:05, John Levon <levon@movementarian.org> wrote:
>
> On Mon, Nov 03, 2025 at 11:34:36AM +0000, Peter Maydell wrote:
>
> > > > > Hi; Coverity suggests there are some issues with this code
> > > > > (CID 1611807, 1611808, 1611809):
> > > >
> > > > Hi; it looks like 1611807 and 1611808 (the resource leaks)
> > > > are still present in this code in current git; would somebody
> > > > like to have a look at this?
> > >
> > > Please see https://lore.kernel.org/qemu-devel/aG-4hkfLDEpDsqo6@movementarian.org/
> > >
> > > I believe them to be false positives.
> >
> > That email only seems to talk about the locking issue (1611806,
> > 1611809) which we've marked as false-positives in Coverity.
> > But 1611807 and 1611808 are "failed to free resources" issues:
> > we allocate memory into reqfds and msg, but in the error
> > exit path (e.g. if we "goto fatal" because qio_channel_read()
> > failed) it doesn't look like we ever free that memory.
>
> Sorry, I should have pasted: Here was my reply to Cédric below.
>
> Looking at it with fresh eyes, though, I'm not so sure, at least in the case we
> alloc instead of freelist, we need some form of recycle on the error path. The
> whole function needs a big refactor for clarity...
>
> > *** CID 1611808:         Resource leaks  (RESOURCE_LEAK)
> > /builds/qemu-project/qemu/hw/vfio-user/proxy.c: 411             in vfio_user_recv_one()
> > 405         if (isreply && msg != NULL) {
> > 406             /* force an error to keep sending thread from hanging */
> > 407             vfio_user_set_error(msg->hdr, EINVAL);
> > 408             msg->complete = true;
> > 409             qemu_cond_signal(&msg->cv);
> > 410         }
> > > > >     CID 1611808:         Resource leaks  (RESOURCE_LEAK)
> > > > >     Variable "reqfds" going out of scope leaks the storage it points to.
> > 411         return -1;
>
> vfio_user_getmsg() I think takes ownership of reqfds so this looks OK.

The error reported in CID 1611808 is that if we take the "goto err"
code path for "vfio_user_recv request larger than max" then we
never call vfio_user_getmsg() to take ownership of reqfds, but the
'err' codepath does not free reqfds.

> It's hard to say without the full analysis.

If you like you can ask for an account at
https://scan.coverity.com/projects/qemu which will let you look
at the issues in the web GUI, which is a bit more detailed than
the email summaries.

> > /builds/qemu-project/qemu/hw/vfio-user/proxy.c: 411             in vfio_user_recv_one()
> > 405         if (isreply && msg != NULL) {
> > 406             /* force an error to keep sending thread from hanging */
> > 407             vfio_user_set_error(msg->hdr, EINVAL);
> > 408             msg->complete = true;
> > 409             qemu_cond_signal(&msg->cv);
> > 410         }
> > > > >     CID 1611807:         Resource leaks  (RESOURCE_LEAK)
> > > > >     Variable "msg" going out of scope leaks the storage it points to.
> > 411         return -1;
> > 412     }
>
> Same as above.

In 1611807 the error path is slightly different: here we
do call vfio_user_getmsg(), which allocates 'msg' and returns
it to us. Then we get a failure return from qio_channel_read()
which causes us to take a "goto fatal", so we never call
vfio_user_process() to take ownership of 'msg'; but we don't free
it or otherwise dispose of it in the 'fatal' codepath.

I think it might also make the code more readable if
it was refactored to keep the is_reply and !is_reply
codepaths separate. At the moment the error handling
is complicated because the second half of the function
has two consecutive "if (is_reply) { ... } else { ... }"
long code blocks, and the error handling depends on
whether we're is_reply or not.

thanks
-- PMM
Re: [PULL 09/25] vfio-user: implement message receive infrastructure
Posted by John Levon 1 week, 4 days ago
On Mon, Nov 03, 2025 at 02:09:00PM +0000, Peter Maydell wrote:

> > It's hard to say without the full analysis.
> 
> If you like you can ask for an account at
> https://scan.coverity.com/projects/qemu which will let you look
> at the issues in the web GUI, which is a bit more detailed than
> the email summaries.

Yes please. I think it was missed when I asked for one before.

regards
john
Re: [PULL 09/25] vfio-user: implement message receive infrastructure
Posted by Peter Maydell 1 week, 4 days ago
On Mon, 3 Nov 2025 at 15:20, John Levon <levon@movementarian.org> wrote:
>
> On Mon, Nov 03, 2025 at 02:09:00PM +0000, Peter Maydell wrote:
>
> > > It's hard to say without the full analysis.
> >
> > If you like you can ask for an account at
> > https://scan.coverity.com/projects/qemu which will let you look
> > at the issues in the web GUI, which is a bit more detailed than
> > the email summaries.
>
> Yes please. I think it was missed when I asked for one before.

You need to request an account through their web UI. Then
the system will email me to ask me to approve it.

thanks
-- PMM