Performance improvements for xen_disk v2

[Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour

Posted by Tim Smith 7 years, 3 months ago

When I/O consists of many small requests, performance is improved by
batching them together in a single io_submit() call. When there are
relatively few requests, the extra overhead is not worth it. This
introduces a check to start batching I/O requests via blk_io_plug()/
blk_io_unplug() in an amount proportional to the number which were
already in flight at the time we started reading the ring.

Signed-off-by: Tim Smith <tim.smith@citrix.com>
---
 hw/block/xen_disk.c |   30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 36eff94f84..cb2881b7e6 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -101,6 +101,9 @@ struct XenBlkDev {
     AioContext          *ctx;
 };
 
+/* Threshold of in-flight requests above which we will start using
+ * blk_io_plug()/blk_io_unplug() to batch requests */
+#define IO_PLUG_THRESHOLD 1
 /* ------------------------------------------------------------- */
 
 static void ioreq_reset(struct ioreq *ioreq)
@@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 {
     RING_IDX rc, rp;
     struct ioreq *ioreq;
+    int inflight_atstart = blkdev->requests_inflight;
+    int batched = 0;
 
     blkdev->more_work = 0;
 
@@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
     xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 
     blk_send_response_all(blkdev);
+    /* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
+     * when we got here, this is an indication that there the bottleneck
+     * is below us, so it's worth beginning to batch up I/O requests
+     * rather than submitting them immediately. The maximum number
+     * of requests we're willing to batch is the number already in
+     * flight, so it can grow up to max_requests when the bottleneck
+     * is below us */
+    if (inflight_atstart > IO_PLUG_THRESHOLD) {
+        blk_io_plug(blkdev->blk);
+    }
     while (rc != rp) {
         /* pull request from ring */
         if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
@@ -589,7 +604,22 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
             continue;
         }
 
+        if (inflight_atstart > IO_PLUG_THRESHOLD &&
+            batched >= inflight_atstart) {
+            blk_io_unplug(blkdev->blk);
+        }
         ioreq_runio_qemu_aio(ioreq);
+        if (inflight_atstart > IO_PLUG_THRESHOLD) {
+            if (batched >= inflight_atstart) {
+                blk_io_plug(blkdev->blk);
+                batched = 0;
+            } else {
+                batched++;
+            }
+        }
+    }
+    if (inflight_atstart > IO_PLUG_THRESHOLD) {
+        blk_io_unplug(blkdev->blk);
     }
 
     if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) {

Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour

Posted by Paul Durrant 7 years, 3 months ago

> -----Original Message-----
> From: Tim Smith [mailto:tim.smith@citrix.com]
> Sent: 02 November 2018 10:01
> To: xen-devel@lists.xenproject.org; qemu-devel@nongnu.org; qemu-
> block@nongnu.org
> Cc: Anthony Perard <anthony.perard@citrix.com>; Kevin Wolf
> <kwolf@redhat.com>; Paul Durrant <Paul.Durrant@citrix.com>; Stefano
> Stabellini <sstabellini@kernel.org>; Max Reitz <mreitz@redhat.com>
> Subject: [PATCH 1/3] Improve xen_disk batching behaviour
> 
> When I/O consists of many small requests, performance is improved by
> batching them together in a single io_submit() call. When there are
> relatively few requests, the extra overhead is not worth it. This
> introduces a check to start batching I/O requests via blk_io_plug()/
> blk_io_unplug() in an amount proportional to the number which were
> already in flight at the time we started reading the ring.
> 
> Signed-off-by: Tim Smith <tim.smith@citrix.com>

Reviewed-by: Paul Durrant <paul.durrant@citrix.com>

> ---
>  hw/block/xen_disk.c |   30 ++++++++++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 36eff94f84..cb2881b7e6 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -101,6 +101,9 @@ struct XenBlkDev {
>      AioContext          *ctx;
>  };
> 
> +/* Threshold of in-flight requests above which we will start using
> + * blk_io_plug()/blk_io_unplug() to batch requests */
> +#define IO_PLUG_THRESHOLD 1
>  /* ------------------------------------------------------------- */
> 
>  static void ioreq_reset(struct ioreq *ioreq)
> @@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>  {
>      RING_IDX rc, rp;
>      struct ioreq *ioreq;
> +    int inflight_atstart = blkdev->requests_inflight;
> +    int batched = 0;
> 
>      blkdev->more_work = 0;
> 
> @@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>      xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
> 
>      blk_send_response_all(blkdev);
> +    /* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
> +     * when we got here, this is an indication that there the bottleneck
> +     * is below us, so it's worth beginning to batch up I/O requests
> +     * rather than submitting them immediately. The maximum number
> +     * of requests we're willing to batch is the number already in
> +     * flight, so it can grow up to max_requests when the bottleneck
> +     * is below us */
> +    if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +        blk_io_plug(blkdev->blk);
> +    }
>      while (rc != rp) {
>          /* pull request from ring */
>          if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) {
> @@ -589,7 +604,22 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>              continue;
>          }
> 
> +        if (inflight_atstart > IO_PLUG_THRESHOLD &&
> +            batched >= inflight_atstart) {
> +            blk_io_unplug(blkdev->blk);
> +        }
>          ioreq_runio_qemu_aio(ioreq);
> +        if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +            if (batched >= inflight_atstart) {
> +                blk_io_plug(blkdev->blk);
> +                batched = 0;
> +            } else {
> +                batched++;
> +            }
> +        }
> +    }
> +    if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +        blk_io_unplug(blkdev->blk);
>      }
> 
>      if (blkdev->more_work && blkdev->requests_inflight < blkdev-
> >max_requests) {

Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour

Posted by Anthony PERARD 7 years, 3 months ago

On Fri, Nov 02, 2018 at 10:00:59AM +0000, Tim Smith wrote:
> When I/O consists of many small requests, performance is improved by
> batching them together in a single io_submit() call. When there are
> relatively few requests, the extra overhead is not worth it. This
> introduces a check to start batching I/O requests via blk_io_plug()/
> blk_io_unplug() in an amount proportional to the number which were
> already in flight at the time we started reading the ring.
> 
> Signed-off-by: Tim Smith <tim.smith@citrix.com>

Acked-by: Anthony PERARD <anthony.perard@citrix.com>

-- 
Anthony PERARD