From nobody Fri Nov 7 05:45:09 2025 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; dkim=fail; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org; dmarc=fail(p=none dis=none) header.from=gmail.com Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1546510891861296.5944972451023; Thu, 3 Jan 2019 02:21:31 -0800 (PST) Received: from localhost ([127.0.0.1]:50417 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gf082-00067r-Dc for importer@patchew.org; Thu, 03 Jan 2019 05:21:30 -0500 Received: from eggs.gnu.org ([208.118.235.92]:44171) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gf05x-0004IG-DC for qemu-devel@nongnu.org; Thu, 03 Jan 2019 05:19:23 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1gf05t-0002si-Iu for qemu-devel@nongnu.org; Thu, 03 Jan 2019 05:19:21 -0500 Received: from mail-pl1-x643.google.com ([2607:f8b0:4864:20::643]:34053) by eggs.gnu.org with esmtps (TLS1.0:RSA_AES_128_CBC_SHA1:16) (Exim 4.71) (envelope-from ) id 1gf05r-0002r0-JR for qemu-devel@nongnu.org; Thu, 03 Jan 2019 05:19:17 -0500 Received: by mail-pl1-x643.google.com with SMTP id w4so15749981plz.1 for ; Thu, 03 Jan 2019 02:19:14 -0800 (PST) Received: from localhost ([116.247.112.152]) by smtp.gmail.com with ESMTPSA id d13sm82021324pfd.58.2019.01.03.02.19.13 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Thu, 03 Jan 2019 02:19:13 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=ku389Ea7iJJNi12slF2hwtTC0PlyQNn6AYfSdp+9lEM=; b=rn1qehwJm/byG7FC3w/v5bt11paltRfs/qdiW5ypkkiEyDNiVGx2aHh2OCEJoiJYx6 3B8RKqHJmwakcO372IB36C8YMOCcrmHnUd+rIJ6/4Esv/8Kp0aECEQMH2z0VAl3qf9yF Jj3LhK+dMyKIGQ7qerkuAobgvPlbNuq0sDBV8WxY1z0duz1zOu9eJxGjBKJmee4CY8ZJ 7CxV/g/dM5b9pxJWg0l6MuA/eBvDI6UVh+fZ9BNhSWioVGgAxMYFHKgB/oXJpB9NCr3u Q7ITNQFonR8RKSwVvYFYckSOz8fjPfuCJBuTzC9+XeDKAhRrcW1nTBOpZ8iEQ89t8tpL RwQA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=ku389Ea7iJJNi12slF2hwtTC0PlyQNn6AYfSdp+9lEM=; b=QmzH9ds9ySa1FgzHxq4W8pz8vx+uiAqbThbhU0WCel1gi5A6rBlSHd/r5Jo6unvZzP CkVZ8DGImV0kyaRH7eOW/qaTIb8lMTR0HL2xMXoFuY1TYWvqZ/lNls5w9tlu6vwKOGPw ruQszN2zRUVWreHrKMMhxXwCuyt1vqUtnMHrnZdCVMypwGlxkegRPfhpktOlVmBGYQ/+ 5IhSiwNl7cK1h6t+9gzW0EgWc5HvXTPpOVHaKbwPCNkUBGBUOYMZ3NhoDRl9Ao1VJKAU RH7/LuxY4H4JaeMVzOqmiaNToWc+VjpCwET5kkC4cPTvLjq2tMui55In8YrsqREA16Z4 vQrg== X-Gm-Message-State: AJcUukdYspGcXsiFNKvu9hB0dPsmoSh/+g38S9jFavPEFufCgLPkoM5O 4wF5JTG3Pj632k9D2WbYiHVJsXa9sVI= X-Google-Smtp-Source: ALg8bN40NtzF6R07YkK5oPXQ/Hz/G7An6l9ss7nA1oCCGQlgOf9zf1AZbDcrpqbcVfSF5zAQ0uYPxw== X-Received: by 2002:a17:902:7848:: with SMTP id e8mr47415043pln.100.1546510753852; Thu, 03 Jan 2019 02:19:13 -0800 (PST) From: elohimes@gmail.com X-Google-Original-From: xieyongji@baidu.com To: mst@redhat.com, marcandre.lureau@redhat.com, berrange@redhat.com, jasowang@redhat.com, maxime.coquelin@redhat.com, yury-kotov@yandex-team.ru, wrfsh@yandex-team.ru Date: Thu, 3 Jan 2019 18:18:16 +0800 Message-Id: <20190103101819.7418-5-xieyongji@baidu.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190103101819.7418-1-xieyongji@baidu.com> References: <20190103101819.7418-1-xieyongji@baidu.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 2607:f8b0:4864:20::643 Subject: [Qemu-devel] [PATCH v3 for-4.0 4/7] libvhost-user: Support recording inflight I/O in shared memory X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: nixun@baidu.com, qemu-devel@nongnu.org, lilin24@baidu.com, zhangyu31@baidu.com, chaiwen@baidu.com, Xie Yongji Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" X-ZohoMail-DKIM: fail (Header signature does not verify) Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" From: Xie Yongji This patch adds support for VHOST_USER_GET_SHM_SIZE and VHOST_USER_SET_SHM_FD message to get shared memory from qemu. Then we maintain a "bitmap" of all descriptors in the shared memory for each queue to record inflight I/O. Signed-off-by: Xie Yongji Signed-off-by: Zhang Yu --- contrib/libvhost-user/libvhost-user.c | 221 +++++++++++++++++++++++++- contrib/libvhost-user/libvhost-user.h | 33 ++++ 2 files changed, 248 insertions(+), 6 deletions(-) diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/= libvhost-user.c index 23bd52264c..f18f5e6e62 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -53,6 +53,18 @@ _min1 < _min2 ? _min1 : _min2; }) #endif =20 +/* Round number down to multiple */ +#define ALIGN_DOWN(n, m) ((n) / (m) * (m)) + +/* Round number up to multiple */ +#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m)) + +/* Align each region to cache line size in shared memory */ +#define SHM_ALIGNMENT 64 + +/* The version of shared memory */ +#define SHM_VERSION 1 + #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) =20 /* The version of the protocol we support */ @@ -100,6 +112,8 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_POSTCOPY_ADVISE), REQ(VHOST_USER_POSTCOPY_LISTEN), REQ(VHOST_USER_POSTCOPY_END), + REQ(VHOST_USER_GET_SHM_SIZE), + REQ(VHOST_USER_SET_SHM_FD), REQ(VHOST_USER_MAX), }; #undef REQ @@ -890,6 +904,41 @@ vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg) return true; } =20 +static int +vu_check_queue_inflights(VuDev *dev, VuVirtq *vq) +{ + int i =3D 0; + + if ((dev->protocol_features & + VHOST_USER_PROTOCOL_F_SLAVE_SHMFD) =3D=3D 0) { + return 0; + } + + if (unlikely(!vq->shm)) { + return -1; + } + + vq->used_idx =3D vq->vring.used->idx; + vq->inflight_num =3D 0; + for (i =3D 0; i < vq->vring.num; i++) { + if (vq->shm->inflight[i] =3D=3D 0) { + continue; + } + + vq->inflight_desc[vq->inflight_num++] =3D i; + vq->inuse++; + } + vq->shadow_avail_idx =3D vq->last_avail_idx =3D vq->inuse + vq->used_i= dx; + + /* in case of I/O hang after reconnecting */ + if (eventfd_write(vq->kick_fd, 1) || + eventfd_write(vq->call_fd, 1)) { + return -1; + } + + return 0; +} + static bool vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg) { @@ -925,6 +974,10 @@ vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg) dev->vq[index].kick_fd, index); } =20 + if (vu_check_queue_inflights(dev, &dev->vq[index])) { + vu_panic(dev, "Failed to check inflights for vq: %d\n", index); + } + return false; } =20 @@ -1215,6 +1268,115 @@ vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg) return true; } =20 +static int +vu_setup_shm(VuDev *dev) +{ + int i; + char *addr =3D (char *)dev->shm_info.addr; + uint64_t size =3D 0; + uint32_t vq_size =3D ALIGN_UP(dev->shm_info.vq_size, dev->shm_info.ali= gn); + + if (dev->shm_info.version !=3D SHM_VERSION) { + DPRINT("Invalid version for shm: %d", dev->shm_info.version); + return -1; + } + + if (dev->shm_info.dev_size !=3D 0) { + DPRINT("Invalid dev_size for shm: %d", dev->shm_info.dev_size); + return -1; + } + + if (dev->shm_info.vq_size !=3D sizeof(VuVirtqShm)) { + DPRINT("Invalid vq_size for shm: %d", dev->shm_info.vq_size); + return -1; + } + + for (i =3D 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) { + size +=3D vq_size; + if (size > dev->shm_info.mmap_size) { + break; + } + dev->vq[i].shm =3D (VuVirtqShm *)addr; + addr +=3D vq_size; + } + + return 0; +} + +static bool +vu_get_shm_size(VuDev *dev, VhostUserMsg *vmsg) +{ + if (vmsg->size !=3D sizeof(vmsg->payload.shm)) { + vu_panic(dev, "Invalid get_shm_size message:%d", vmsg->size); + vmsg->size =3D 0; + return true; + } + + vmsg->payload.shm.dev_size =3D 0; + vmsg->payload.shm.vq_size =3D sizeof(VuVirtqShm); + vmsg->payload.shm.align =3D SHM_ALIGNMENT; + vmsg->payload.shm.version =3D SHM_VERSION; + + DPRINT("send shm dev_size: %"PRId32"\n", vmsg->payload.shm.dev_size); + DPRINT("send shm vq_size: %"PRId32"\n", vmsg->payload.shm.vq_size); + DPRINT("send shm align: %"PRId32"\n", vmsg->payload.shm.align); + DPRINT("send shm version: %"PRId32"\n", vmsg->payload.shm.version); + + return true; +} + +static bool +vu_set_shm_fd(VuDev *dev, VhostUserMsg *vmsg) +{ + int fd; + uint64_t mmap_size, mmap_offset; + void *rc; + + if (vmsg->fd_num !=3D 1 || + vmsg->size !=3D sizeof(vmsg->payload.shm)) { + vu_panic(dev, "Invalid set_shm_fd message size:%d fds:%d", + vmsg->size, vmsg->fd_num); + return false; + } + + fd =3D vmsg->fds[0]; + mmap_size =3D vmsg->payload.shm.mmap_size; + mmap_offset =3D vmsg->payload.shm.mmap_offset; + DPRINT("set_shm_fd mmap_size: %"PRId64"\n", mmap_size); + DPRINT("set_shm_fd mmap_offset: %"PRId64"\n", mmap_offset); + DPRINT("set_shm_fd dev_size: %"PRId32"\n", vmsg->payload.shm.dev_size); + DPRINT("set_shm_fd vq_size: %"PRId32"\n", vmsg->payload.shm.vq_size); + DPRINT("set_shm_fd align: %"PRId32"\n", vmsg->payload.shm.align); + DPRINT("set_shm_fd version: %"PRId32"\n", vmsg->payload.shm.version); + + rc =3D mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, mmap_offset); + + close(fd); + + if (rc =3D=3D MAP_FAILED) { + vu_panic(dev, "set_shm_fd mmap error: %s", strerror(errno)); + return false; + } + + if (dev->shm_info.addr) { + munmap(dev->shm_info.addr, dev->shm_info.mmap_size); + } + dev->shm_info.addr =3D rc; + dev->shm_info.mmap_size =3D mmap_size; + dev->shm_info.dev_size =3D vmsg->payload.shm.dev_size; + dev->shm_info.vq_size =3D vmsg->payload.shm.vq_size; + dev->shm_info.align =3D vmsg->payload.shm.align; + dev->shm_info.version =3D vmsg->payload.shm.version; + + if (vu_setup_shm(dev)) { + vu_panic(dev, "setup shm failed"); + return false; + } + + return false; +} + static bool vu_process_message(VuDev *dev, VhostUserMsg *vmsg) { @@ -1292,6 +1454,10 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) return vu_set_postcopy_listen(dev, vmsg); case VHOST_USER_POSTCOPY_END: return vu_set_postcopy_end(dev, vmsg); + case VHOST_USER_GET_SHM_SIZE: + return vu_get_shm_size(dev, vmsg); + case VHOST_USER_SET_SHM_FD: + return vu_set_shm_fd(dev, vmsg); default: vmsg_close_fds(vmsg); vu_panic(dev, "Unhandled request: %d", vmsg->request); @@ -1359,8 +1525,13 @@ vu_deinit(VuDev *dev) close(vq->err_fd); vq->err_fd =3D -1; } + vq->shm =3D NULL; } =20 + if (dev->shm_info.addr) { + munmap(dev->shm_info.addr, dev->shm_info.mmap_size); + dev->shm_info.addr =3D NULL; + } =20 vu_close_log(dev); if (dev->slave_fd !=3D -1) { @@ -1829,12 +2000,6 @@ virtqueue_map_desc(VuDev *dev, *p_num_sg =3D num_sg; } =20 -/* Round number down to multiple */ -#define ALIGN_DOWN(n, m) ((n) / (m) * (m)) - -/* Round number up to multiple */ -#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m)) - static void * virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) @@ -1935,9 +2100,44 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned = int idx, size_t sz) return elem; } =20 +static int +vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx) +{ + if ((dev->protocol_features & + VHOST_USER_PROTOCOL_F_SLAVE_SHMFD) =3D=3D 0) { + return 0; + } + + if (unlikely(!vq->shm)) { + return -1; + } + + vq->shm->inflight[desc_idx] =3D 1; + + return 0; +} + +static int +vu_queue_inflight_put(VuDev *dev, VuVirtq *vq, int desc_idx) +{ + if ((dev->protocol_features & + VHOST_USER_PROTOCOL_F_SLAVE_SHMFD) =3D=3D 0) { + return 0; + } + + if (unlikely(!vq->shm)) { + return -1; + } + + vq->shm->inflight[desc_idx] =3D 0; + + return 0; +} + void * vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) { + int i; unsigned int head; VuVirtqElement *elem; =20 @@ -1946,6 +2146,12 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) return NULL; } =20 + if (unlikely(vq->inflight_num > 0)) { + i =3D (--vq->inflight_num); + elem =3D vu_queue_map_desc(dev, vq, vq->inflight_desc[i], sz); + return elem; + } + if (vu_queue_empty(dev, vq)) { return NULL; } @@ -1976,6 +2182,8 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) =20 vq->inuse++; =20 + vu_queue_inflight_get(dev, vq, head); + return elem; } =20 @@ -2121,4 +2329,5 @@ vu_queue_push(VuDev *dev, VuVirtq *vq, { vu_queue_fill(dev, vq, elem, len, 0); vu_queue_flush(dev, vq, 1); + vu_queue_inflight_put(dev, vq, elem->index); } diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/= libvhost-user.h index 4aa55b4d2d..fdfda688d2 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -53,6 +53,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_CONFIG =3D 9, VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD =3D 10, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER =3D 11, + VHOST_USER_PROTOCOL_F_SLAVE_SHMFD =3D 12, =20 VHOST_USER_PROTOCOL_F_MAX }; @@ -91,6 +92,8 @@ typedef enum VhostUserRequest { VHOST_USER_POSTCOPY_ADVISE =3D 28, VHOST_USER_POSTCOPY_LISTEN =3D 29, VHOST_USER_POSTCOPY_END =3D 30, + VHOST_USER_GET_SHM_SIZE =3D 31, + VHOST_USER_SET_SHM_FD =3D 32, VHOST_USER_MAX } VhostUserRequest; =20 @@ -138,6 +141,15 @@ typedef struct VhostUserVringArea { uint64_t offset; } VhostUserVringArea; =20 +typedef struct VhostUserShm { + uint64_t mmap_size; + uint64_t mmap_offset; + uint32_t dev_size; + uint32_t vq_size; + uint32_t align; + uint32_t version; +} VhostUserShm; + #if defined(_WIN32) # define VU_PACKED __attribute__((gcc_struct, packed)) #else @@ -163,6 +175,7 @@ typedef struct VhostUserMsg { VhostUserLog log; VhostUserConfig config; VhostUserVringArea area; + VhostUserShm shm; } payload; =20 int fds[VHOST_MEMORY_MAX_NREGIONS]; @@ -234,9 +247,19 @@ typedef struct VuRing { uint32_t flags; } VuRing; =20 +typedef struct VuVirtqShm { + char inflight[VIRTQUEUE_MAX_SIZE]; +} VuVirtqShm; + typedef struct VuVirtq { VuRing vring; =20 + VuVirtqShm *shm; + + uint16_t inflight_desc[VIRTQUEUE_MAX_SIZE]; + + uint16_t inflight_num; + /* Next head to pop */ uint16_t last_avail_idx; =20 @@ -279,11 +302,21 @@ typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, = int condition, vu_watch_cb cb, void *data); typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd); =20 +typedef struct VuDevShmInfo { + void *addr; + uint64_t mmap_size; + uint32_t dev_size; + uint32_t vq_size; + uint32_t align; + uint32_t version; +} VuDevShmInfo; + struct VuDev { int sock; uint32_t nregions; VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS]; VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE]; + VuDevShmInfo shm_info; int log_call_fd; int slave_fd; uint64_t log_size; --=20 2.17.1