From nobody Thu May 2 11:05:48 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (208.118.235.17 [208.118.235.17]) by mx.zohomail.com with SMTPS id 1513925092524884.0605094881738; Thu, 21 Dec 2017 22:44:52 -0800 (PST) Received: from localhost ([::1]:35747 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1eSH4J-00071u-LI for importer@patchew.org; Fri, 22 Dec 2017 01:44:31 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:45984) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1eSH2a-00063P-Us for qemu-devel@nongnu.org; Fri, 22 Dec 2017 01:42:46 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1eSH2W-0007MN-NV for qemu-devel@nongnu.org; Fri, 22 Dec 2017 01:42:45 -0500 Received: from mga07.intel.com ([134.134.136.100]:46019) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1eSH2W-0007Ia-Ee for qemu-devel@nongnu.org; Fri, 22 Dec 2017 01:42:40 -0500 Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 21 Dec 2017 22:42:39 -0800 Received: from deepin-15.sh.intel.com (HELO debian-xvivbkq.sh.intel.com) ([10.67.104.165]) by fmsmga008.fm.intel.com with ESMTP; 21 Dec 2017 22:42:37 -0800 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.45,439,1508828400"; d="scan'208";a="4514457" From: Tiwei Bie To: virtio-dev@lists.oasis-open.org, qemu-devel@nongnu.org, mst@redhat.com, alex.williamson@redhat.com, pbonzini@redhat.com, stefanha@redhat.com Date: Fri, 22 Dec 2017 14:41:49 +0800 Message-Id: <20171222064151.29266-2-tiwei.bie@intel.com> X-Mailer: git-send-email 2.13.3 In-Reply-To: <20171222064151.29266-1-tiwei.bie@intel.com> References: <20171222064151.29266-1-tiwei.bie@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.100 Subject: [Qemu-devel] [RFC 1/3] vhost-user: support receiving file descriptors in slave_read X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: jianfeng.tan@intel.com, tiwei.bie@intel.com, cunming.liang@intel.com, xiao.w.wang@intel.com, zhihong.wang@intel.com, dan.daly@intel.com Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Signed-off-by: Tiwei Bie --- hw/virtio/vhost-user.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 093675ed98..e7108138fd 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -614,14 +614,43 @@ static void slave_read(void *opaque) struct vhost_user *u =3D dev->opaque; VhostUserMsg msg =3D { 0, }; int size, ret =3D 0; + struct iovec iov; + struct msghdr msgh; + int fd =3D -1; + size_t fdsize =3D sizeof(fd); + char control[CMSG_SPACE(fdsize)]; + struct cmsghdr *cmsg; + + memset(&msgh, 0, sizeof(msgh)); + msgh.msg_iov =3D &iov; + msgh.msg_iovlen =3D 1; + msgh.msg_control =3D control; + msgh.msg_controllen =3D sizeof(control); =20 /* Read header */ - size =3D read(u->slave_fd, &msg, VHOST_USER_HDR_SIZE); + iov.iov_base =3D &msg; + iov.iov_len =3D VHOST_USER_HDR_SIZE; + + size =3D recvmsg(u->slave_fd, &msgh, 0); if (size !=3D VHOST_USER_HDR_SIZE) { error_report("Failed to read from slave."); goto err; } =20 + if (msgh.msg_flags & MSG_CTRUNC) { + error_report("Truncated message."); + goto err; + } + + for (cmsg =3D CMSG_FIRSTHDR(&msgh); cmsg !=3D NULL; + cmsg =3D CMSG_NXTHDR(&msgh, cmsg)) { + if (cmsg->cmsg_level =3D=3D SOL_SOCKET && + cmsg->cmsg_type =3D=3D SCM_RIGHTS) { + memcpy(&fd, CMSG_DATA(cmsg), fdsize); + break; + } + } + if (msg.size > VHOST_USER_PAYLOAD_SIZE) { error_report("Failed to read msg header." " Size %d exceeds the maximum %zu.", msg.size, @@ -642,9 +671,15 @@ static void slave_read(void *opaque) break; default: error_report("Received unexpected msg type."); + if (fd !=3D -1) { + close(fd); + } ret =3D -EINVAL; } =20 + /* Message handlers need to make sure that fd will be consumed. */ + fd =3D -1; + /* * REPLY_ACK feature handling. Other reply types has to be managed * directly in their request handlers. @@ -669,6 +704,9 @@ err: qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); close(u->slave_fd); u->slave_fd =3D -1; + if (fd !=3D -1) { + close(fd); + } return; } =20 --=20 2.13.3 From nobody Thu May 2 11:05:48 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (208.118.235.17 [208.118.235.17]) by mx.zohomail.com with SMTPS id 1513925082814236.09794999549968; Thu, 21 Dec 2017 22:44:42 -0800 (PST) Received: from localhost ([::1]:35746 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1eSH4H-000710-Uh for importer@patchew.org; Fri, 22 Dec 2017 01:44:29 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:45992) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1eSH2b-00063S-Gp for qemu-devel@nongnu.org; Fri, 22 Dec 2017 01:42:47 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1eSH2Z-0007NW-MZ for qemu-devel@nongnu.org; Fri, 22 Dec 2017 01:42:45 -0500 Received: from mga07.intel.com ([134.134.136.100]:46019) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1eSH2Z-0007Ia-B5 for qemu-devel@nongnu.org; Fri, 22 Dec 2017 01:42:43 -0500 Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 21 Dec 2017 22:42:42 -0800 Received: from deepin-15.sh.intel.com (HELO debian-xvivbkq.sh.intel.com) ([10.67.104.165]) by fmsmga008.fm.intel.com with ESMTP; 21 Dec 2017 22:42:40 -0800 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.45,439,1508828400"; d="scan'208";a="4514465" From: Tiwei Bie To: virtio-dev@lists.oasis-open.org, qemu-devel@nongnu.org, mst@redhat.com, alex.williamson@redhat.com, pbonzini@redhat.com, stefanha@redhat.com Date: Fri, 22 Dec 2017 14:41:50 +0800 Message-Id: <20171222064151.29266-3-tiwei.bie@intel.com> X-Mailer: git-send-email 2.13.3 In-Reply-To: <20171222064151.29266-1-tiwei.bie@intel.com> References: <20171222064151.29266-1-tiwei.bie@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.100 Subject: [Qemu-devel] [RFC 2/3] vhost-user: introduce shared vhost-user state X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: jianfeng.tan@intel.com, tiwei.bie@intel.com, cunming.liang@intel.com, xiao.w.wang@intel.com, zhihong.wang@intel.com, dan.daly@intel.com Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" When multi-queue is enabled for virtio-net, each virtio queue pair will have a vhost_dev, and the only thing they share currently is the chardev. This patch introduces a vhost-user state structure which will be shared by all virtio queue pairs of the same virtio device. Signed-off-by: Tiwei Bie --- hw/scsi/vhost-user-scsi.c | 6 +++--- hw/virtio/vhost-user.c | 9 +++++---- include/hw/virtio/vhost-user.h | 17 +++++++++++++++++ include/hw/virtio/virtio-scsi.h | 6 +++++- net/vhost-user.c | 30 ++++++++++++++++-------------- 5 files changed, 46 insertions(+), 22 deletions(-) create mode 100644 include/hw/virtio/vhost-user.h diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index f7561e23fa..2c46c74128 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -73,7 +73,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Err= or **errp) Error *err =3D NULL; int ret; =20 - if (!vs->conf.chardev.chr) { + if (!vs->conf.vhost_user.chr.chr) { error_setg(errp, "vhost-user-scsi: missing chardev"); return; } @@ -91,7 +91,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Err= or **errp) vsc->dev.vq_index =3D 0; vsc->dev.backend_features =3D 0; =20 - ret =3D vhost_dev_init(&vsc->dev, (void *)&vs->conf.chardev, + ret =3D vhost_dev_init(&vsc->dev, (void *)&vs->conf.vhost_user, VHOST_BACKEND_TYPE_USER, 0); if (ret < 0) { error_setg(errp, "vhost-user-scsi: vhost initialization failed: %s= ", @@ -132,7 +132,7 @@ static uint64_t vhost_user_scsi_get_features(VirtIODevi= ce *vdev, } =20 static Property vhost_user_scsi_properties[] =3D { - DEFINE_PROP_CHR("chardev", VirtIOSCSICommon, conf.chardev), + DEFINE_PROP_CHR("chardev", VirtIOSCSICommon, conf.vhost_user.chr), DEFINE_PROP_UINT32("boot_tpgt", VirtIOSCSICommon, conf.boot_tpgt, 0), DEFINE_PROP_UINT32("num_queues", VirtIOSCSICommon, conf.num_queues, 1), DEFINE_PROP_UINT32("virtqueue_size", VirtIOSCSICommon, conf.virtqueue_= size, diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index e7108138fd..3e308d0a62 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -12,6 +12,7 @@ #include "qapi/error.h" #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-backend.h" +#include "hw/virtio/vhost-user.h" #include "hw/virtio/virtio-net.h" #include "chardev/char-fe.h" #include "sysemu/kvm.h" @@ -123,7 +124,7 @@ static VhostUserMsg m __attribute__ ((unused)); #define VHOST_USER_VERSION (0x1) =20 struct vhost_user { - CharBackend *chr; + VhostUser *shared; int slave_fd; }; =20 @@ -135,7 +136,7 @@ static bool ioeventfd_enabled(void) static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) { struct vhost_user *u =3D dev->opaque; - CharBackend *chr =3D u->chr; + CharBackend *chr =3D &u->shared->chr; uint8_t *p =3D (uint8_t *) msg; int r, size =3D VHOST_USER_HDR_SIZE; =20 @@ -221,7 +222,7 @@ static int vhost_user_write(struct vhost_dev *dev, Vhos= tUserMsg *msg, int *fds, int fd_num) { struct vhost_user *u =3D dev->opaque; - CharBackend *chr =3D u->chr; + CharBackend *chr =3D &u->shared->chr; int ret, size =3D VHOST_USER_HDR_SIZE + msg->size; =20 /* @@ -767,7 +768,7 @@ static int vhost_user_init(struct vhost_dev *dev, void = *opaque) assert(dev->vhost_ops->backend_type =3D=3D VHOST_BACKEND_TYPE_USER); =20 u =3D g_new0(struct vhost_user, 1); - u->chr =3D opaque; + u->shared =3D opaque; u->slave_fd =3D -1; dev->opaque =3D u; =20 diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h new file mode 100644 index 0000000000..10d698abe2 --- /dev/null +++ b/include/hw/virtio/vhost-user.h @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2017 Intel Corporation + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_VIRTIO_VHOST_USER_H +#define HW_VIRTIO_VHOST_USER_H + +#include "chardev/char-fe.h" + +typedef struct VhostUser { + CharBackend chr; +} VhostUser; + +#endif diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scs= i.h index 4c0bcdb788..885c3e84b5 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -19,6 +19,7 @@ #define VIRTIO_SCSI_SENSE_SIZE 0 #include "standard-headers/linux/virtio_scsi.h" #include "hw/virtio/virtio.h" +#include "hw/virtio/vhost-user.h" #include "hw/pci/pci.h" #include "hw/scsi/scsi.h" #include "chardev/char-fe.h" @@ -54,7 +55,10 @@ struct VirtIOSCSIConf { char *vhostfd; char *wwpn; #endif - CharBackend chardev; + union { + VhostUser vhost_user; + CharBackend chardev; + }; uint32_t boot_tpgt; IOThread *iothread; }; diff --git a/net/vhost-user.c b/net/vhost-user.c index c23927c912..b398294074 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -12,6 +12,7 @@ #include "clients.h" #include "net/vhost_net.h" #include "net/vhost-user.h" +#include "hw/virtio/vhost-user.h" #include "chardev/char-fe.h" #include "qemu/config-file.h" #include "qemu/error-report.h" @@ -20,7 +21,7 @@ =20 typedef struct VhostUserState { NetClientState nc; - CharBackend chr; /* only queue index 0 */ + VhostUser vhost_user; /* only queue index 0 */ VHostNetState *vhost_net; guint watch; uint64_t acked_features; @@ -62,7 +63,7 @@ static void vhost_user_stop(int queues, NetClientState *n= cs[]) } } =20 -static int vhost_user_start(int queues, NetClientState *ncs[], CharBackend= *be) +static int vhost_user_start(int queues, NetClientState *ncs[], void *be) { VhostNetOptions options; struct vhost_net *net =3D NULL; @@ -155,7 +156,7 @@ static void vhost_user_cleanup(NetClientState *nc) g_source_remove(s->watch); s->watch =3D 0; } - qemu_chr_fe_deinit(&s->chr, true); + qemu_chr_fe_deinit(&s->vhost_user.chr, true); } =20 qemu_purge_queued_packets(nc); @@ -189,7 +190,7 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, = GIOCondition cond, { VhostUserState *s =3D opaque; =20 - qemu_chr_fe_disconnect(&s->chr); + qemu_chr_fe_disconnect(&s->vhost_user.chr); =20 return TRUE; } @@ -214,7 +215,8 @@ static void chr_closed_bh(void *opaque) qmp_set_link(name, false, &err); vhost_user_stop(queues, ncs); =20 - qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, + qemu_chr_fe_set_handlers(&s->vhost_user.chr, NULL, NULL, + net_vhost_user_event, NULL, opaque, NULL, true); =20 if (err) { @@ -237,15 +239,15 @@ static void net_vhost_user_event(void *opaque, int ev= ent) assert(queues < MAX_QUEUE_NUM); =20 s =3D DO_UPCAST(VhostUserState, nc, ncs[0]); - chr =3D qemu_chr_fe_get_driver(&s->chr); + chr =3D qemu_chr_fe_get_driver(&s->vhost_user.chr); trace_vhost_user_event(chr->label, event); switch (event) { case CHR_EVENT_OPENED: - if (vhost_user_start(queues, ncs, &s->chr) < 0) { - qemu_chr_fe_disconnect(&s->chr); + if (vhost_user_start(queues, ncs, &s->vhost_user) < 0) { + qemu_chr_fe_disconnect(&s->vhost_user.chr); return; } - s->watch =3D qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, + s->watch =3D qemu_chr_fe_add_watch(&s->vhost_user.chr, G_IO_HUP, net_vhost_user_watch, s); qmp_set_link(name, true, &err); s->started =3D true; @@ -261,8 +263,8 @@ static void net_vhost_user_event(void *opaque, int even= t) =20 g_source_remove(s->watch); s->watch =3D 0; - qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, NULL, - NULL, NULL, false); + qemu_chr_fe_set_handlers(&s->vhost_user.chr, NULL, NULL, NULL, + NULL, NULL, NULL, false); =20 aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque); } @@ -294,7 +296,7 @@ static int net_vhost_user_init(NetClientState *peer, co= nst char *device, if (!nc0) { nc0 =3D nc; s =3D DO_UPCAST(VhostUserState, nc, nc); - if (!qemu_chr_fe_init(&s->chr, chr, &err)) { + if (!qemu_chr_fe_init(&s->vhost_user.chr, chr, &err)) { error_report_err(err); return -1; } @@ -304,11 +306,11 @@ static int net_vhost_user_init(NetClientState *peer, = const char *device, =20 s =3D DO_UPCAST(VhostUserState, nc, nc0); do { - if (qemu_chr_fe_wait_connected(&s->chr, &err) < 0) { + if (qemu_chr_fe_wait_connected(&s->vhost_user.chr, &err) < 0) { error_report_err(err); return -1; } - qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, + qemu_chr_fe_set_handlers(&s->vhost_user.chr, NULL, NULL, net_vhost_user_event, NULL, nc0->name, NU= LL, true); } while (!s->started); --=20 2.13.3 From nobody Thu May 2 11:05:48 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 15139251867490.7770145115943023; Thu, 21 Dec 2017 22:46:26 -0800 (PST) Received: from localhost ([::1]:35828 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1eSH63-00005z-KO for importer@patchew.org; Fri, 22 Dec 2017 01:46:19 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46048) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1eSH2g-000687-3c for qemu-devel@nongnu.org; Fri, 22 Dec 2017 01:42:54 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1eSH2d-0007PY-Jb for qemu-devel@nongnu.org; Fri, 22 Dec 2017 01:42:50 -0500 Received: from mga07.intel.com ([134.134.136.100]:46019) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1eSH2d-0007Ia-5x for qemu-devel@nongnu.org; Fri, 22 Dec 2017 01:42:47 -0500 Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 21 Dec 2017 22:42:45 -0800 Received: from deepin-15.sh.intel.com (HELO debian-xvivbkq.sh.intel.com) ([10.67.104.165]) by fmsmga008.fm.intel.com with ESMTP; 21 Dec 2017 22:42:43 -0800 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.45,439,1508828400"; d="scan'208";a="4514470" From: Tiwei Bie To: virtio-dev@lists.oasis-open.org, qemu-devel@nongnu.org, mst@redhat.com, alex.williamson@redhat.com, pbonzini@redhat.com, stefanha@redhat.com Date: Fri, 22 Dec 2017 14:41:51 +0800 Message-Id: <20171222064151.29266-4-tiwei.bie@intel.com> X-Mailer: git-send-email 2.13.3 In-Reply-To: <20171222064151.29266-1-tiwei.bie@intel.com> References: <20171222064151.29266-1-tiwei.bie@intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.100 Subject: [Qemu-devel] [RFC 3/3] vhost-user: add VFIO based accelerators support X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: jianfeng.tan@intel.com, tiwei.bie@intel.com, cunming.liang@intel.com, xiao.w.wang@intel.com, zhihong.wang@intel.com, dan.daly@intel.com Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail: RSF_0 Z_629925259 SPT_0 Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Signed-off-by: Tiwei Bie --- docs/interop/vhost-user.txt | 57 ++++++ hw/vfio/common.c | 2 +- hw/virtio/vhost-user.c | 381 +++++++++++++++++++++++++++++++++++++= +++- hw/virtio/vhost.c | 3 +- hw/virtio/virtio-pci.c | 8 - hw/virtio/virtio-pci.h | 8 + include/hw/vfio/vfio.h | 2 + include/hw/virtio/vhost-user.h | 26 +++ 8 files changed, 476 insertions(+), 11 deletions(-) diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index 954771d0d8..dd029e4b9d 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -116,6 +116,15 @@ Depending on the request type, payload can be: - 3: IOTLB invalidate - 4: IOTLB access fail =20 + * Vring area description + ----------------------- + | u64 | size | offset | + ----------------------- + + u64: a 64-bit unsigned integer + Size: a 64-bit size + Offset: a 64-bit offset + In QEMU the vhost-user message is implemented with the following struct: =20 typedef struct VhostUserMsg { @@ -129,6 +138,7 @@ typedef struct VhostUserMsg { VhostUserMemory memory; VhostUserLog log; struct vhost_iotlb_msg iotlb; + VhostUserVringArea area; }; } QEMU_PACKED VhostUserMsg; =20 @@ -317,6 +327,17 @@ The fd is provided via VHOST_USER_SET_SLAVE_REQ_FD anc= illary data. A slave may then send VHOST_USER_SLAVE_* messages to the master using this fd communication channel. =20 +VFIO based accelerators +----------------------- + +The VFIO based accelerators feature is a protocol extension. It is support= ed +when the protocol feature VHOST_USER_PROTOCOL_F_VFIO (bit 7) is set. + +The vhost-user backend will set the accelerator context via slave channel, +and QEMU just needs to handle those messages passively. The accelerator +context will be set for each queue independently. So the page-per-vq prope= rty +should also be enabled. + Protocol features ----------------- =20 @@ -327,6 +348,7 @@ Protocol features #define VHOST_USER_PROTOCOL_F_MTU 4 #define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 #define VHOST_USER_PROTOCOL_F_CROSS_ENDIAN 6 +#define VHOST_USER_PROTOCOL_F_VFIO 7 =20 Master message types -------------------- @@ -614,6 +636,41 @@ Slave message types This request should be send only when VIRTIO_F_IOMMU_PLATFORM feature has been successfully negotiated. =20 + * VHOST_USER_SLAVE_VFIO_SET_VRING_GROUP_FD + + Id: 2 + Equivalent ioctl: N/A + Slave payload: u64 + Master payload: N/A + + Sets the VFIO group file descriptor which is passed as ancillary data + for a specified queue (queue index is carried in the u64 payload). + Slave sends this request to tell QEMU to add or delete a VFIO group. + QEMU will delete the current group if any for the specified queue wh= en the + message is sent without a file descriptor. A VFIO group will be actu= ally + deleted when its reference count reaches zero. + This request should be sent only when VHOST_USER_PROTOCOL_F_VFIO pro= tocol + feature has been successfully negotiated. + + * VHOST_USER_SLAVE_VFIO_SET_VRING_NOTIFY_AREA + + Id: 3 + Equivalent ioctl: N/A + Slave payload: vring area description + Master payload: N/A + + Sets the notify area for a specified queue (queue index is carried + in the u64 field of the vring area description). A file descriptor is + passed as ancillary data (typically it's a VFIO device fd). QEMU can + mmap the file descriptor based on the information carried in the vri= ng + area description. + Slave sends this request to tell QEMU to add or delete a MemoryRegion + for a specified queue's notify MMIO region. QEMU will delete the cur= rent + MemoryRegion if any for the specified queue when the message is sent + without a file descriptor. + This request should be sent only when VHOST_USER_PROTOCOL_F_VFIO pro= tocol + feature and VIRTIO_F_VERSION_1 feature have been successfully negoti= ated. + VHOST_USER_PROTOCOL_F_REPLY_ACK: ------------------------------- The original vhost-user specification only demands replies for certain diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 7b2924c0ef..53d8700581 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -49,7 +49,7 @@ struct vfio_as_head vfio_address_spaces =3D * initialized, this file descriptor is only released on QEMU exit and * we'll re-use it should another vfio device be attached before then. */ -static int vfio_kvm_device_fd =3D -1; +int vfio_kvm_device_fd =3D -1; #endif =20 /* diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 3e308d0a62..22d7dd5729 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -14,6 +14,8 @@ #include "hw/virtio/vhost-backend.h" #include "hw/virtio/vhost-user.h" #include "hw/virtio/virtio-net.h" +#include "hw/virtio/virtio-pci.h" +#include "hw/vfio/vfio.h" #include "chardev/char-fe.h" #include "sysemu/kvm.h" #include "qemu/error-report.h" @@ -35,6 +37,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_NET_MTU =3D 4, VHOST_USER_PROTOCOL_F_SLAVE_REQ =3D 5, VHOST_USER_PROTOCOL_F_CROSS_ENDIAN =3D 6, + VHOST_USER_PROTOCOL_F_VFIO =3D 7, =20 VHOST_USER_PROTOCOL_F_MAX }; @@ -72,6 +75,8 @@ typedef enum VhostUserRequest { typedef enum VhostUserSlaveRequest { VHOST_USER_SLAVE_NONE =3D 0, VHOST_USER_SLAVE_IOTLB_MSG =3D 1, + VHOST_USER_SLAVE_VFIO_SET_VRING_GROUP_FD =3D 2, + VHOST_USER_SLAVE_VFIO_SET_VRING_NOTIFY_AREA =3D 3, VHOST_USER_SLAVE_MAX } VhostUserSlaveRequest; =20 @@ -93,6 +98,12 @@ typedef struct VhostUserLog { uint64_t mmap_offset; } VhostUserLog; =20 +typedef struct VhostUserVringArea { + uint64_t u64; + uint64_t size; + uint64_t offset; +} VhostUserVringArea; + typedef struct VhostUserMsg { VhostUserRequest request; =20 @@ -110,6 +121,7 @@ typedef struct VhostUserMsg { VhostUserMemory memory; VhostUserLog log; struct vhost_iotlb_msg iotlb; + VhostUserVringArea area; } payload; } QEMU_PACKED VhostUserMsg; =20 @@ -609,6 +621,342 @@ static int vhost_user_reset_device(struct vhost_dev *= dev) return 0; } =20 +#ifdef CONFIG_KVM +static int vfio_group_fd_to_id(int group_fd) +{ + char linkname[PATH_MAX]; + char pathname[PATH_MAX]; + char *filename; + int group_id, ret; + + snprintf(linkname, sizeof(linkname), "/proc/self/fd/%d", group_fd); + + ret =3D readlink(linkname, pathname, sizeof(pathname)); + if (ret < 0) { + return -1; + } + + filename =3D g_path_get_basename(pathname); + group_id =3D atoi(filename); + g_free(filename); + + return group_id; +} + +static int vhost_user_kvm_add_vfio_group(struct vhost_dev *dev, + int group_id, int group_fd) +{ + struct vhost_user *u =3D dev->opaque; + struct vhost_user_vfio_state *vfio =3D &u->shared->vfio; + struct kvm_device_attr attr =3D { + .group =3D KVM_DEV_VFIO_GROUP, + .attr =3D KVM_DEV_VFIO_GROUP_ADD, + }; + bool found =3D false; + int i, ret; + + for (i =3D 0; i < vfio->nr_group; i++) { + if (vfio->group[i].id =3D=3D group_id) { + found =3D true; + break; + } + } + + if (found) { + close(group_fd); + vfio->group[i].refcnt++; + return 0; + } + + if (vfio->nr_group >=3D VIRTIO_QUEUE_MAX) { + return -1; + } + + vfio->group[i].id =3D group_id; + vfio->group[i].fd =3D group_fd; + vfio->group[i].refcnt =3D 1; + + attr.addr =3D (uint64_t)(uintptr_t)&vfio->group[i].fd; + +again: + /* XXX: improve this */ + if (vfio_kvm_device_fd < 0) { + struct kvm_create_device cd =3D { + .type =3D KVM_DEV_TYPE_VFIO, + }; + + ret =3D kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd); + if (ret < 0) { + if (errno =3D=3D EBUSY) { + goto again; + } + error_report("Failed to create KVM VFIO device."); + return -1; + } + + vfio_kvm_device_fd =3D cd.fd; + } + + ret =3D ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr); + if (ret < 0) { + error_report("Failed to add group %d to KVM VFIO device.", + group_id); + return -1; + } + + vfio->nr_group++; + + return 0; +} + +static int vhost_user_kvm_del_vfio_group(struct vhost_dev *dev, int group_= id) +{ + struct vhost_user *u =3D dev->opaque; + struct vhost_user_vfio_state *vfio =3D &u->shared->vfio; + struct kvm_device_attr attr =3D { + .group =3D KVM_DEV_VFIO_GROUP, + .attr =3D KVM_DEV_VFIO_GROUP_DEL, + }; + bool found =3D false; + int i, ret; + + kvm_irqchip_commit_routes(kvm_state); + + for (i =3D 0; i < vfio->nr_group; i++) { + if (vfio->group[i].id =3D=3D group_id) { + found =3D true; + break; + } + } + + if (!found) { + return 0; + } + + vfio->group[i].refcnt--; + + if (vfio->group[i].refcnt =3D=3D 0) { + attr.addr =3D (uint64_t)(uintptr_t)&vfio->group[i].fd; + ret =3D ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr); + if (ret < 0) { + error_report("Failed to remove group %d from KVM VFIO device.", + group_id); + vfio->group[i].refcnt++; + return -1; + } + + close(vfio->group[i].fd); + + for (; i + 1 < vfio->nr_group; i++) { + vfio->group[i] =3D vfio->group[i + 1]; + } + vfio->nr_group--; + } + + return 0; +} + +static int vhost_user_handle_vfio_set_vring_group_fd(struct vhost_dev *dev, + uint64_t u64, + int group_fd) +{ + struct vhost_user *u =3D dev->opaque; + struct vhost_user_vfio_state *vfio =3D &u->shared->vfio; + int qid =3D u64 & VHOST_USER_VRING_IDX_MASK; + int group_id, nvqs, ret =3D 0; + + qemu_mutex_lock(&vfio->lock); + + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_VFIO)) { + ret =3D -1; + goto out; + } + + if (dev->vdev =3D=3D NULL) { + error_report("vhost_dev isn't available."); + ret =3D -1; + goto out; + } + + nvqs =3D virtio_get_num_queues(dev->vdev); + if (qid >=3D nvqs) { + error_report("invalid queue index."); + ret =3D -1; + goto out; + } + + if (u64 & VHOST_USER_VRING_NOFD_MASK) { + group_id =3D vfio->group_id[qid]; + if (group_id !=3D -1) { + if (vhost_user_kvm_del_vfio_group(dev, group_id) < 0) { + ret =3D -1; + goto out; + } + vfio->group_id[qid] =3D -1; + } + goto out; + } + + group_id =3D vfio_group_fd_to_id(group_fd); + if (group_id =3D=3D -1) { + ret =3D -1; + goto out; + } + + if (vfio->group_id[qid] =3D=3D group_id) { + close(group_fd); + goto out; + } + + if (vfio->group_id[qid] !=3D -1) { + if (vhost_user_kvm_del_vfio_group(dev, vfio->group_id[qid]) < 0) { + ret =3D -1; + goto out; + } + vfio->group_id[qid] =3D -1; + } + + if (vhost_user_kvm_add_vfio_group(dev, group_id, group_fd) < 0) { + ret =3D -1; + goto out; + } + vfio->group_id[qid] =3D group_id; + +out: + kvm_irqchip_commit_routes(kvm_state); + qemu_mutex_unlock(&vfio->lock); + + if (ret !=3D 0 && group_fd !=3D -1) { + close(group_fd); + } + + return ret; +} +#else +static int vhost_user_handle_vfio_set_vring_group_fd(struct vhost_dev *dev, + uint64_t u64, + int group_fd) +{ + if (group_fd !=3D -1) { + close(group_fd); + } + + return 0; +} +#endif + +static int vhost_user_add_mapping(struct vhost_dev *dev, int qid, int fd, + uint64_t size, uint64_t offset) +{ + struct vhost_user *u =3D dev->opaque; + struct vhost_user_vfio_state *vfio =3D &u->shared->vfio; + MemoryRegion *sysmem =3D get_system_memory(); + VirtIONetPCI *d; + VirtIOPCIProxy *proxy; /* XXX: handle non-PCI case */ + uint64_t paddr; + void *addr; + char *name; + + d =3D container_of(dev->vdev, VirtIONetPCI, vdev.parent_obj); + proxy =3D &d->parent_obj; + + if ((proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) =3D=3D 0 || + size !=3D virtio_pci_queue_mem_mult(proxy)) { + return -1; + } + + addr =3D mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offs= et); + if (addr =3D=3D MAP_FAILED) { + error_report("Can't map notify region."); + return -1; + } + + vfio->notify[qid].mmap.addr =3D addr; + vfio->notify[qid].mmap.size =3D size; + + /* The notify_offset of each queue is queue_select */ + paddr =3D proxy->modern_bar.addr + proxy->notify.offset + + virtio_pci_queue_mem_mult(proxy) * qid; + + name =3D g_strdup_printf("vhost-user/vfio@%p mmaps[%d]", vfio, qid); + memory_region_init_ram_device_ptr(&vfio->notify[qid].mr, + memory_region_owner(sysmem), + name, size, addr); + g_free(name); + memory_region_add_subregion(sysmem, paddr, &vfio->notify[qid].mr); + + return 0; +} + +static int vhost_user_del_mapping(struct vhost_dev *dev, int qid) +{ + struct vhost_user *u =3D dev->opaque; + struct vhost_user_vfio_state *vfio =3D &u->shared->vfio; + MemoryRegion *sysmem =3D get_system_memory(); + + if (vfio->notify[qid].mmap.addr =3D=3D NULL) { + return 0; + } + + memory_region_del_subregion(sysmem, &vfio->notify[qid].mr); + object_unparent(OBJECT(&vfio->notify[qid].mr)); + + munmap(vfio->notify[qid].mmap.addr, vfio->notify[qid].mmap.size); + vfio->notify[qid].mmap.addr =3D NULL; + vfio->notify[qid].mmap.size =3D 0; + + return 0; +} + +static int vhost_user_handle_vfio_set_vring_notify_area(struct vhost_dev *= dev, + VhostUserVringArea *notify_area, int fd) +{ + struct vhost_user *u =3D dev->opaque; + struct vhost_user_vfio_state *vfio =3D &u->shared->vfio; + int qid =3D notify_area->u64 & VHOST_USER_VRING_IDX_MASK; + int nvqs, ret =3D 0; + + qemu_mutex_lock(&vfio->lock); + + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_VFIO)) { + ret =3D -1; + goto out; + } + + if (dev->vdev =3D=3D NULL) { + error_report("vhost_dev isn't available."); + ret =3D -1; + goto out; + } + + nvqs =3D virtio_get_num_queues(dev->vdev); + if (qid >=3D nvqs) { + error_report("invalid queue index."); + ret =3D -1; + goto out; + } + + if (vfio->notify[qid].mmap.addr !=3D NULL) { + vhost_user_del_mapping(dev, qid); + } + + if (notify_area->u64 & VHOST_USER_VRING_NOFD_MASK) { + goto out; + } + + ret =3D vhost_user_add_mapping(dev, qid, fd, notify_area->size, + notify_area->offset); + +out: + if (fd !=3D -1) { + close(fd); + } + qemu_mutex_unlock(&vfio->lock); + return ret; +} + static void slave_read(void *opaque) { struct vhost_dev *dev =3D opaque; @@ -670,6 +1018,14 @@ static void slave_read(void *opaque) case VHOST_USER_SLAVE_IOTLB_MSG: ret =3D vhost_backend_handle_iotlb_msg(dev, &msg.payload.iotlb); break; + case VHOST_USER_SLAVE_VFIO_SET_VRING_GROUP_FD: + ret =3D vhost_user_handle_vfio_set_vring_group_fd(dev, + msg.payload.u64, fd); + break; + case VHOST_USER_SLAVE_VFIO_SET_VRING_NOTIFY_AREA: + ret =3D vhost_user_handle_vfio_set_vring_notify_area(dev, + &msg.payload.area, fd); + break; default: error_report("Received unexpected msg type."); if (fd !=3D -1) { @@ -763,7 +1119,7 @@ static int vhost_user_init(struct vhost_dev *dev, void= *opaque) { uint64_t features, protocol_features; struct vhost_user *u; - int err; + int i, err; =20 assert(dev->vhost_ops->backend_type =3D=3D VHOST_BACKEND_TYPE_USER); =20 @@ -772,6 +1128,13 @@ static int vhost_user_init(struct vhost_dev *dev, voi= d *opaque) u->slave_fd =3D -1; dev->opaque =3D u; =20 + if (dev->vq_index =3D=3D 0) { + for (i =3D 0; i < VIRTIO_QUEUE_MAX; i++) { + u->shared->vfio.group_id[i] =3D -1; + } + qemu_mutex_init(&u->shared->vfio.lock); + } + err =3D vhost_user_get_features(dev, &features); if (err < 0) { return err; @@ -832,6 +1195,7 @@ static int vhost_user_init(struct vhost_dev *dev, void= *opaque) static int vhost_user_cleanup(struct vhost_dev *dev) { struct vhost_user *u; + int i; =20 assert(dev->vhost_ops->backend_type =3D=3D VHOST_BACKEND_TYPE_USER); =20 @@ -841,6 +1205,21 @@ static int vhost_user_cleanup(struct vhost_dev *dev) close(u->slave_fd); u->slave_fd =3D -1; } + + if (dev->vq_index =3D=3D 0) { + for (i =3D 0; i < VIRTIO_QUEUE_MAX; i++) { + vhost_user_del_mapping(dev, i); + } + +#ifdef CONFIG_KVM + while (u->shared->vfio.nr_group > 0) { + int group_id; + group_id =3D u->shared->vfio.group[0].id; + vhost_user_kvm_del_vfio_group(dev, group_id); + } +#endif + } + g_free(u); dev->opaque =3D 0; =20 diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index e4290ce93d..a001a0936a 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -612,7 +612,8 @@ static void vhost_set_memory(MemoryListener *listener, static bool vhost_section(MemoryRegionSection *section) { return memory_region_is_ram(section->mr) && - !memory_region_is_rom(section->mr); + !memory_region_is_rom(section->mr) && + !memory_region_is_ram_device(section->mr); } =20 static void vhost_begin(MemoryListener *listener) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index e92837c42b..c28fed8676 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -219,14 +219,6 @@ static bool virtio_pci_ioeventfd_enabled(DeviceState *= d) return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) !=3D 0; } =20 -#define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000 - -static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy) -{ - return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ? - QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4; -} - static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *noti= fier, int n, bool assign) { diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 12d3a90686..f2a613569b 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -209,6 +209,14 @@ static inline void virtio_pci_disable_modern(VirtIOPCI= Proxy *proxy) proxy->disable_modern =3D true; } =20 +#define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000 + +static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy) +{ + return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ? + QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4; +} + /* * virtio-scsi-pci: This extends VirtioPCIProxy. */ diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h index 86248f5436..7425fcd90c 100644 --- a/include/hw/vfio/vfio.h +++ b/include/hw/vfio/vfio.h @@ -1,6 +1,8 @@ #ifndef HW_VFIO_H #define HW_VFIO_H =20 +extern int vfio_kvm_device_fd; + bool vfio_eeh_as_ok(AddressSpace *as); int vfio_eeh_as_op(AddressSpace *as, uint32_t op); =20 diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index 10d698abe2..cc998f4f43 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -9,9 +9,35 @@ #define HW_VIRTIO_VHOST_USER_H =20 #include "chardev/char-fe.h" +#include "hw/virtio/virtio.h" + +struct vhost_user_vfio_state { + /* The group ID associated with each queue */ + int group_id[VIRTIO_QUEUE_MAX]; + + /* The notify context of each queue */ + struct { + struct { + uint64_t size; + void *addr; + } mmap; + MemoryRegion mr; + } notify[VIRTIO_QUEUE_MAX]; + + /* The vfio groups associated with this vhost user */ + struct { + int fd; + int id; + int refcnt; + } group[VIRTIO_QUEUE_MAX]; + int nr_group; + + QemuMutex lock; +}; =20 typedef struct VhostUser { CharBackend chr; + struct vhost_user_vfio_state vfio; } VhostUser; =20 #endif --=20 2.13.3