From nobody Wed May 1 22:48:49 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (208.118.235.17 [208.118.235.17]) by mx.zohomail.com with SMTPS id 1541151114468199.55629741816983; Fri, 2 Nov 2018 02:31:54 -0700 (PDT) Received: from localhost ([::1]:50434 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gIVnr-000198-9K for importer@patchew.org; Fri, 02 Nov 2018 05:31:43 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:56203) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gIVmB-0000Dm-Sn for qemu-devel@nongnu.org; Fri, 02 Nov 2018 05:30:00 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1gIVm9-0003XQ-QD for qemu-devel@nongnu.org; Fri, 02 Nov 2018 05:29:59 -0400 Received: from smtp03.citrix.com ([162.221.156.55]:4352) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1gIVm9-0003WP-Cj for qemu-devel@nongnu.org; Fri, 02 Nov 2018 05:29:57 -0400 X-IronPort-AV: E=Sophos;i="5.54,455,1534809600"; d="scan'208";a="69458995" From: Tim Smith To: Date: Fri, 2 Nov 2018 09:29:50 +0000 Message-ID: <154115099006.664.2982181181564452215.stgit@dhcp-3-135.uk.xensource.com> In-Reply-To: <154115098499.664.15585399091081300567.stgit@dhcp-3-135.uk.xensource.com> References: <154115098499.664.15585399091081300567.stgit@dhcp-3-135.uk.xensource.com> User-Agent: StGit/0.17.1-dirty MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 162.221.156.55 Subject: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Paul Durrant Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" When I/O consists of many small requests, performance is improved by batching them together in a single io_submit() call. When there are relatively few requests, the extra overhead is not worth it. This introduces a check to start batching I/O requests via blk_io_plug()/ blk_io_unplug() in an amount proportional to the number which were already in flight at the time we started reading the ring. Signed-off-by: Tim Smith --- hw/block/xen_disk.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 36eff94f84..6cb40d66fa 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -101,6 +101,9 @@ struct XenBlkDev { AioContext *ctx; }; =20 +/* Threshold of in-flight requests above which we will start using + * blk_io_plug()/blk_io_unplug() to batch requests */ +#define IO_PLUG_THRESHOLD 1 /* ------------------------------------------------------------- */ =20 static void ioreq_reset(struct ioreq *ioreq) @@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev *blkde= v) { RING_IDX rc, rp; struct ioreq *ioreq; + int inflight_atstart =3D blkdev->requests_inflight; + int batched =3D 0; =20 blkdev->more_work =3D 0; =20 @@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev *blkd= ev) xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ =20 blk_send_response_all(blkdev); + /* If there was more than IO_PLUG_THRESHOLD ioreqs in flight + * when we got here, this is an indication that there the bottleneck + * is below us, so it's worth beginning to batch up I/O requests + * rather than submitting them immediately. The maximum number + * of requests we're willing to batch is the number already in + * flight, so it can grow up to max_requests when the bottleneck + * is below us */ + if (inflight_atstart > IO_PLUG_THRESHOLD) { + blk_io_plug(blkdev->blk); + } while (rc !=3D rp) { /* pull request from ring */ if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) { @@ -589,7 +604,21 @@ static void blk_handle_requests(struct XenBlkDev *blkd= ev) continue; } =20 + if (inflight_atstart > IO_PLUG_THRESHOLD && batched >=3D inflight_= atstart) { + blk_io_unplug(blkdev->blk); + } ioreq_runio_qemu_aio(ioreq); + if (inflight_atstart > IO_PLUG_THRESHOLD) { + if (batched >=3D inflight_atstart) { + blk_io_plug(blkdev->blk); + batched=3D0; + } else { + batched++; + } + } + } + if (inflight_atstart > IO_PLUG_THRESHOLD) { + blk_io_unplug(blkdev->blk); } =20 if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_reque= sts) { From nobody Wed May 1 22:48:49 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (208.118.235.17 [208.118.235.17]) by mx.zohomail.com with SMTPS id 1541151119064346.8685204855477; Fri, 2 Nov 2018 02:31:59 -0700 (PDT) Received: from localhost ([::1]:50436 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gIVnw-0001C5-A1 for importer@patchew.org; Fri, 02 Nov 2018 05:31:48 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:56206) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gIVmB-0000Dn-TG for qemu-devel@nongnu.org; Fri, 02 Nov 2018 05:30:00 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1gIVmA-0003Xg-MA for qemu-devel@nongnu.org; Fri, 02 Nov 2018 05:29:59 -0400 Received: from smtp03.citrix.com ([162.221.156.55]:4352) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1gIVmA-0003WP-7v for qemu-devel@nongnu.org; Fri, 02 Nov 2018 05:29:58 -0400 X-IronPort-AV: E=Sophos;i="5.54,455,1534809600"; d="scan'208";a="69458996" From: Tim Smith To: Date: Fri, 2 Nov 2018 09:29:55 +0000 Message-ID: <154115099514.664.11901452428735273192.stgit@dhcp-3-135.uk.xensource.com> In-Reply-To: <154115098499.664.15585399091081300567.stgit@dhcp-3-135.uk.xensource.com> References: <154115098499.664.15585399091081300567.stgit@dhcp-3-135.uk.xensource.com> User-Agent: StGit/0.17.1-dirty MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 162.221.156.55 Subject: [Qemu-devel] [PATCH 2/3] Improve xen_disk response latency X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Paul Durrant Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" If the I/O ring is full, the guest cannot send any more requests until some responses are sent. Only sending all available responses just before checking for new work does not leave much time for the guest to supply new work, so this will cause stalls if the ring gets full. Also, not completing reads as soon as possible adds latency to the guest. To alleviate that, complete IO requests as soon as they come back. blk_send_response() already returns a value indicating whether a notify should be sent, which is all the batching we need. Signed-off-by: Tim Smith --- hw/block/xen_disk.c | 43 ++++++++++++------------------------------- 1 file changed, 12 insertions(+), 31 deletions(-) diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 6cb40d66fa..c11cd21d37 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -83,11 +83,9 @@ struct XenBlkDev { =20 /* request lists */ QLIST_HEAD(inflight_head, ioreq) inflight; - QLIST_HEAD(finished_head, ioreq) finished; QLIST_HEAD(freelist_head, ioreq) freelist; int requests_total; int requests_inflight; - int requests_finished; unsigned int max_requests; =20 gboolean feature_discard; @@ -104,6 +102,9 @@ struct XenBlkDev { /* Threshold of in-flight requests above which we will start using * blk_io_plug()/blk_io_unplug() to batch requests */ #define IO_PLUG_THRESHOLD 1 + +static int blk_send_response(struct ioreq *ioreq); + /* ------------------------------------------------------------- */ =20 static void ioreq_reset(struct ioreq *ioreq) @@ -155,12 +156,10 @@ static void ioreq_finish(struct ioreq *ioreq) struct XenBlkDev *blkdev =3D ioreq->blkdev; =20 QLIST_REMOVE(ioreq, list); - QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list); blkdev->requests_inflight--; - blkdev->requests_finished++; } =20 -static void ioreq_release(struct ioreq *ioreq, bool finish) +static void ioreq_release(struct ioreq *ioreq) { struct XenBlkDev *blkdev =3D ioreq->blkdev; =20 @@ -168,11 +167,7 @@ static void ioreq_release(struct ioreq *ioreq, bool fi= nish) ioreq_reset(ioreq); ioreq->blkdev =3D blkdev; QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list); - if (finish) { - blkdev->requests_finished--; - } else { - blkdev->requests_inflight--; - } + blkdev->requests_inflight--; } =20 /* @@ -351,6 +346,10 @@ static void qemu_aio_complete(void *opaque, int ret) default: break; } + if (blk_send_response(ioreq)) { + xen_pv_send_notify(&blkdev->xendev); + } + ioreq_release(ioreq); qemu_bh_schedule(blkdev->bh); =20 done: @@ -455,7 +454,7 @@ err: return -1; } =20 -static int blk_send_response_one(struct ioreq *ioreq) +static int blk_send_response(struct ioreq *ioreq) { struct XenBlkDev *blkdev =3D ioreq->blkdev; int send_notify =3D 0; @@ -504,22 +503,6 @@ static int blk_send_response_one(struct ioreq *ioreq) return send_notify; } =20 -/* walk finished list, send outstanding responses, free requests */ -static void blk_send_response_all(struct XenBlkDev *blkdev) -{ - struct ioreq *ioreq; - int send_notify =3D 0; - - while (!QLIST_EMPTY(&blkdev->finished)) { - ioreq =3D QLIST_FIRST(&blkdev->finished); - send_notify +=3D blk_send_response_one(ioreq); - ioreq_release(ioreq, true); - } - if (send_notify) { - xen_pv_send_notify(&blkdev->xendev); - } -} - static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, = RING_IDX rc) { switch (blkdev->protocol) { @@ -554,7 +537,6 @@ static void blk_handle_requests(struct XenBlkDev *blkde= v) rp =3D blkdev->rings.common.sring->req_prod; xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ =20 - blk_send_response_all(blkdev); /* If there was more than IO_PLUG_THRESHOLD ioreqs in flight * when we got here, this is an indication that there the bottleneck * is below us, so it's worth beginning to batch up I/O requests @@ -597,10 +579,10 @@ static void blk_handle_requests(struct XenBlkDev *blk= dev) break; }; =20 - if (blk_send_response_one(ioreq)) { + if (blk_send_response(ioreq)) { xen_pv_send_notify(&blkdev->xendev); } - ioreq_release(ioreq, false); + ioreq_release(ioreq); continue; } =20 @@ -645,7 +627,6 @@ static void blk_alloc(struct XenDevice *xendev) trace_xen_disk_alloc(xendev->name); =20 QLIST_INIT(&blkdev->inflight); - QLIST_INIT(&blkdev->finished); QLIST_INIT(&blkdev->freelist); =20 blkdev->iothread =3D iothread_create(xendev->name, &err); From nobody Wed May 1 22:48:49 2024 Delivered-To: importer@patchew.org Received-SPF: pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) client-ip=208.118.235.17; envelope-from=qemu-devel-bounces+importer=patchew.org@nongnu.org; helo=lists.gnu.org; Authentication-Results: mx.zohomail.com; spf=pass (zoho.com: domain of gnu.org designates 208.118.235.17 as permitted sender) smtp.mailfrom=qemu-devel-bounces+importer=patchew.org@nongnu.org Return-Path: Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) by mx.zohomail.com with SMTPS id 1541151229968239.64090367236008; Fri, 2 Nov 2018 02:33:49 -0700 (PDT) Received: from localhost ([::1]:50443 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gIVps-000495-Nm for importer@patchew.org; Fri, 02 Nov 2018 05:33:48 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:56308) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1gIVmO-0000OO-LK for qemu-devel@nongnu.org; Fri, 02 Nov 2018 05:30:13 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1gIVmK-0003c1-Gt for qemu-devel@nongnu.org; Fri, 02 Nov 2018 05:30:12 -0400 Received: from smtp03.citrix.com ([162.221.156.55]:4369) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1gIVmK-0003bg-4V for qemu-devel@nongnu.org; Fri, 02 Nov 2018 05:30:08 -0400 X-IronPort-AV: E=Sophos;i="5.54,455,1534809600"; d="scan'208";a="69459011" From: Tim Smith To: Date: Fri, 2 Nov 2018 09:30:00 +0000 Message-ID: <154115100023.664.18428772738615804977.stgit@dhcp-3-135.uk.xensource.com> In-Reply-To: <154115098499.664.15585399091081300567.stgit@dhcp-3-135.uk.xensource.com> References: <154115098499.664.15585399091081300567.stgit@dhcp-3-135.uk.xensource.com> User-Agent: StGit/0.17.1-dirty MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 162.221.156.55 Subject: [Qemu-devel] [PATCH 3/3] Avoid repeated memory allocation in xen_disk X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Paul Durrant Errors-To: qemu-devel-bounces+importer=patchew.org@nongnu.org Sender: "Qemu-devel" xen_disk currently allocates memory to hold the data for each ioreq as that ioreq is used, and frees it afterwards. Because it requires page-aligned blocks, this interacts poorly with non-page-aligned allocations and balloons the heap. Instead, allocate the maximum possible requirement, which is BLKIF_MAX_SEGMENTS_PER_REQUEST pages (currently 11 pages) when the ioreq is created, and keep that allocation until it is destroyed. Since the ioreqs themselves are re-used via a free list, this should actually improve memory usage. Signed-off-by: Tim Smith --- hw/block/xen_disk.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index c11cd21d37..67f894bba5 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -112,7 +112,6 @@ static void ioreq_reset(struct ioreq *ioreq) memset(&ioreq->req, 0, sizeof(ioreq->req)); ioreq->status =3D 0; ioreq->start =3D 0; - ioreq->buf =3D NULL; ioreq->size =3D 0; ioreq->presync =3D 0; =20 @@ -137,6 +136,10 @@ static struct ioreq *ioreq_start(struct XenBlkDev *blk= dev) /* allocate new struct */ ioreq =3D g_malloc0(sizeof(*ioreq)); ioreq->blkdev =3D blkdev; + /* We cannot need more pages per ioreq than this, and we do re-use= ioreqs, + * so allocate the memory once here, to be freed in blk_free() whe= n the + * ioreq is freed. */ + ioreq->buf =3D qemu_memalign(XC_PAGE_SIZE, BLKIF_MAX_SEGMENTS_PER_= REQUEST * XC_PAGE_SIZE); blkdev->requests_total++; qemu_iovec_init(&ioreq->v, 1); } else { @@ -313,14 +316,12 @@ static void qemu_aio_complete(void *opaque, int ret) if (ret =3D=3D 0) { ioreq_grant_copy(ioreq); } - qemu_vfree(ioreq->buf); break; case BLKIF_OP_WRITE: case BLKIF_OP_FLUSH_DISKCACHE: if (!ioreq->req.nr_segments) { break; } - qemu_vfree(ioreq->buf); break; default: break; @@ -392,12 +393,10 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq) { struct XenBlkDev *blkdev =3D ioreq->blkdev; =20 - ioreq->buf =3D qemu_memalign(XC_PAGE_SIZE, ioreq->size); if (ioreq->req.nr_segments && (ioreq->req.operation =3D=3D BLKIF_OP_WRITE || ioreq->req.operation =3D=3D BLKIF_OP_FLUSH_DISKCACHE) && ioreq_grant_copy(ioreq)) { - qemu_vfree(ioreq->buf); goto err; } =20 @@ -989,6 +988,7 @@ static int blk_free(struct XenDevice *xendev) ioreq =3D QLIST_FIRST(&blkdev->freelist); QLIST_REMOVE(ioreq, list); qemu_iovec_destroy(&ioreq->v); + qemu_vfree(ioreq->buf); g_free(ioreq); } =20