[v1] block-copy: memory limit

[PATCH 3/6] block/block-copy: refactor copying

Posted by Vladimir Sementsov-Ogievskiy 6 years, 4 months ago

Merge copying code into one function block_copy_do_copy, which only
calls bdrv_ io functions and don't do any synchronization (like dirty
bitmap set/reset).

Refactor block_copy() function so that it takes full decision about
size of chunk to be copied and does all the synchronization (checking
intersecting requests, set/reset dirty bitmaps).

It will help:
 - introduce parallel processing of block_copy iterations: we need to
   calculate chunk size, start async chunk copying and go to the next
   iteration
 - simplify synchronization improvement (like memory limiting in
   further commit and reducing critical section (now we lock the whole
   requested range, when actually we need to lock only dirty region
   which we handle at the moment))

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/block-copy.c | 113 ++++++++++++++++++++-------------------------
 block/trace-events |   6 +--
 2 files changed, 53 insertions(+), 66 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index 75287ce24d..cc49d2345d 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -126,25 +126,43 @@ void block_copy_set_callbacks(
 }
 
 /*
- * Copy range to target with a bounce buffer and return the bytes copied. If
- * error occurred, return a negative error number
+ * block_copy_do_copy
+ *
+ * Do copy of cluser-aligned chunk. @end is allowed to exceed s->len only to
+ * cover last cluster when s->len is not aligned to clusters.
+ *
+ * No sync here: nor bitmap neighter intersecting requests handling, only copy.
+ *
+ * Returns 0 on success.
  */
-static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
-                                                      int64_t start,
-                                                      int64_t end,
-                                                      bool *error_is_read)
+static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
+                                           int64_t start, int64_t end,
+                                           bool *error_is_read)
 {
     int ret;
-    int nbytes;
-    void *bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size);
+    int nbytes = MIN(end, s->len) - start;
+    void *bounce_buffer = NULL;
 
     assert(QEMU_IS_ALIGNED(start, s->cluster_size));
-    bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
-    nbytes = MIN(s->cluster_size, s->len - start);
+    assert(QEMU_IS_ALIGNED(end, s->cluster_size));
+    assert(end < s->len || end == QEMU_ALIGN_UP(s->len, s->cluster_size));
+
+    if (s->use_copy_range) {
+        ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
+                                 0, s->write_flags);
+        if (ret < 0) {
+            trace_block_copy_copy_range_fail(s, start, ret);
+            s->use_copy_range = false;
+        } else {
+            return ret;
+        }
+    }
+
+    bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
 
     ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0);
     if (ret < 0) {
-        trace_block_copy_with_bounce_buffer_read_fail(s, start, ret);
+        trace_block_copy_read_fail(s, start, ret);
         if (error_is_read) {
             *error_is_read = true;
         }
@@ -154,7 +172,7 @@ static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
     ret = bdrv_co_pwrite(s->target, start, nbytes, bounce_buffer,
                          s->write_flags);
     if (ret < 0) {
-        trace_block_copy_with_bounce_buffer_write_fail(s, start, ret);
+        trace_block_copy_write_fail(s, start, ret);
         if (error_is_read) {
             *error_is_read = false;
         }
@@ -163,42 +181,12 @@ static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
 
     qemu_vfree(bounce_buffer);
 
-    return nbytes;
+    return 0;
+
 fail:
     qemu_vfree(bounce_buffer);
-    bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
-    return ret;
-
-}
-
-/*
- * Copy range to target and return the bytes copied. If error occurred, return a
- * negative error number.
- */
-static int coroutine_fn block_copy_with_offload(BlockCopyState *s,
-                                                int64_t start,
-                                                int64_t end)
-{
-    int ret;
-    int nr_clusters;
-    int nbytes;
 
-    assert(QEMU_IS_ALIGNED(s->copy_range_size, s->cluster_size));
-    assert(QEMU_IS_ALIGNED(start, s->cluster_size));
-    nbytes = MIN(s->copy_range_size, MIN(end, s->len) - start);
-    nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size);
-    bdrv_reset_dirty_bitmap(s->copy_bitmap, start,
-                            s->cluster_size * nr_clusters);
-    ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
-                            0, s->write_flags);
-    if (ret < 0) {
-        trace_block_copy_with_offload_fail(s, start, ret);
-        bdrv_set_dirty_bitmap(s->copy_bitmap, start,
-                              s->cluster_size * nr_clusters);
-        return ret;
-    }
-
-    return nbytes;
+    return ret;
 }
 
 /*
@@ -294,7 +282,7 @@ int coroutine_fn block_copy(BlockCopyState *s,
     block_copy_inflight_req_begin(s, &req, start, end);
 
     while (start < end) {
-        int64_t dirty_end;
+        int64_t next_zero, chunk_end;
 
         if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
             trace_block_copy_skip(s, start);
@@ -302,10 +290,15 @@ int coroutine_fn block_copy(BlockCopyState *s,
             continue; /* already copied */
         }
 
-        dirty_end = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
-                                                (end - start));
-        if (dirty_end < 0) {
-            dirty_end = end;
+        chunk_end = MIN(end, start + (s->use_copy_range ?
+                                      s->copy_range_size : s->cluster_size));
+
+        next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
+                                                chunk_end - start);
+        if (next_zero >= 0) {
+            assert(next_zero > start); /* start is dirty */
+            assert(next_zero < chunk_end); /* no need to do MIN() */
+            chunk_end = next_zero;
         }
 
         if (s->skip_unallocated) {
@@ -316,27 +309,21 @@ int coroutine_fn block_copy(BlockCopyState *s,
                 continue;
             }
             /* Clamp to known allocated region */
-            dirty_end = MIN(dirty_end, start + status_bytes);
+            chunk_end = MIN(chunk_end, start + status_bytes);
         }
 
         trace_block_copy_process(s, start);
 
-        if (s->use_copy_range) {
-            ret = block_copy_with_offload(s, start, dirty_end);
-            if (ret < 0) {
-                s->use_copy_range = false;
-            }
-        }
-        if (!s->use_copy_range) {
-            ret = block_copy_with_bounce_buffer(s, start, dirty_end,
-                                                error_is_read);
-        }
+        bdrv_reset_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
+
+        ret = block_copy_do_copy(s, start, chunk_end, error_is_read);
         if (ret < 0) {
+            bdrv_set_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
             break;
         }
 
-        start += ret;
-        s->progress_bytes_callback(ret, s->progress_opaque);
+        s->progress_bytes_callback(chunk_end - start, s->progress_opaque);
+        start = chunk_end;
         ret = 0;
     }
 
diff --git a/block/trace-events b/block/trace-events
index b8d70f5242..ccde15a14c 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -45,9 +45,9 @@ backup_do_cow_return(void *job, int64_t offset, uint64_t bytes, int ret) "job %p
 block_copy_skip(void *bcs, int64_t start) "bcs %p start %"PRId64
 block_copy_skip_range(void *bcs, int64_t start, uint64_t bytes) "bcs %p start %"PRId64" bytes %"PRId64
 block_copy_process(void *bcs, int64_t start) "bcs %p start %"PRId64
-block_copy_with_bounce_buffer_read_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
-block_copy_with_bounce_buffer_write_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
-block_copy_with_offload_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
+block_copy_copy_range_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
+block_copy_read_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
+block_copy_write_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
 
 # ../blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
-- 
2.21.0

Re: [PATCH 3/6] block/block-copy: refactor copying

Posted by Max Reitz 6 years, 4 months ago

On 03.10.19 19:15, Vladimir Sementsov-Ogievskiy wrote:
> Merge copying code into one function block_copy_do_copy, which only
> calls bdrv_ io functions and don't do any synchronization (like dirty
> bitmap set/reset).
> 
> Refactor block_copy() function so that it takes full decision about
> size of chunk to be copied and does all the synchronization (checking
> intersecting requests, set/reset dirty bitmaps).
> 
> It will help:
>  - introduce parallel processing of block_copy iterations: we need to
>    calculate chunk size, start async chunk copying and go to the next
>    iteration
>  - simplify synchronization improvement (like memory limiting in
>    further commit and reducing critical section (now we lock the whole
>    requested range, when actually we need to lock only dirty region
>    which we handle at the moment))
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  block/block-copy.c | 113 ++++++++++++++++++++-------------------------
>  block/trace-events |   6 +--
>  2 files changed, 53 insertions(+), 66 deletions(-)

Looks good to me, just some clean-up path nit picks below.

> diff --git a/block/block-copy.c b/block/block-copy.c
> index 75287ce24d..cc49d2345d 100644
> --- a/block/block-copy.c
> +++ b/block/block-copy.c
> @@ -126,25 +126,43 @@ void block_copy_set_callbacks(
>  }
>  
>  /*
> - * Copy range to target with a bounce buffer and return the bytes copied. If
> - * error occurred, return a negative error number
> + * block_copy_do_copy
> + *
> + * Do copy of cluser-aligned chunk. @end is allowed to exceed s->len only to
> + * cover last cluster when s->len is not aligned to clusters.
> + *
> + * No sync here: nor bitmap neighter intersecting requests handling, only copy.
> + *
> + * Returns 0 on success.
>   */
> -static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
> -                                                      int64_t start,
> -                                                      int64_t end,
> -                                                      bool *error_is_read)
> +static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
> +                                           int64_t start, int64_t end,
> +                                           bool *error_is_read)
>  {
>      int ret;
> -    int nbytes;
> -    void *bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size);
> +    int nbytes = MIN(end, s->len) - start;
> +    void *bounce_buffer = NULL;
>  
>      assert(QEMU_IS_ALIGNED(start, s->cluster_size));
> -    bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
> -    nbytes = MIN(s->cluster_size, s->len - start);
> +    assert(QEMU_IS_ALIGNED(end, s->cluster_size));
> +    assert(end < s->len || end == QEMU_ALIGN_UP(s->len, s->cluster_size));
> +
> +    if (s->use_copy_range) {
> +        ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
> +                                 0, s->write_flags);
> +        if (ret < 0) {
> +            trace_block_copy_copy_range_fail(s, start, ret);
> +            s->use_copy_range = false;
> +        } else {
> +            return ret;

Maybe the “fail” label should be called ”out” and then we could go there
from here.  Doesn’t make much of a difference here (1 LoC either way),
but maybe it’s a bit cleaner to always use the clean-up path in this
function (even when there’s nothing to clean up).

*shrug*

> +        }
> +    }
> +
> +    bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
>  
>      ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0);
>      if (ret < 0) {
> -        trace_block_copy_with_bounce_buffer_read_fail(s, start, ret);
> +        trace_block_copy_read_fail(s, start, ret);
>          if (error_is_read) {
>              *error_is_read = true;
>          }

[...]

> @@ -163,42 +181,12 @@ static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
>  
>      qemu_vfree(bounce_buffer);
>  
> -    return nbytes;
> +    return 0;
> +
>  fail:
>      qemu_vfree(bounce_buffer);
> -    bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
> -    return ret;
> -
> -}

Wouldn’t it be simpler to drop the “qemu_vfree(bounce_buffer); return
0;” above the fail label and just fall through?

In any case:

Reviewed-by: Max Reitz <mreitz@redhat.com>

Re: [PATCH 3/6] block/block-copy: refactor copying

Posted by Vladimir Sementsov-Ogievskiy 6 years, 4 months ago

07.10.2019 17:17, Max Reitz wrote:
> On 03.10.19 19:15, Vladimir Sementsov-Ogievskiy wrote:
>> Merge copying code into one function block_copy_do_copy, which only
>> calls bdrv_ io functions and don't do any synchronization (like dirty
>> bitmap set/reset).
>>
>> Refactor block_copy() function so that it takes full decision about
>> size of chunk to be copied and does all the synchronization (checking
>> intersecting requests, set/reset dirty bitmaps).
>>
>> It will help:
>>   - introduce parallel processing of block_copy iterations: we need to
>>     calculate chunk size, start async chunk copying and go to the next
>>     iteration
>>   - simplify synchronization improvement (like memory limiting in
>>     further commit and reducing critical section (now we lock the whole
>>     requested range, when actually we need to lock only dirty region
>>     which we handle at the moment))
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>>   block/block-copy.c | 113 ++++++++++++++++++++-------------------------
>>   block/trace-events |   6 +--
>>   2 files changed, 53 insertions(+), 66 deletions(-)
> 
> Looks good to me, just some clean-up path nit picks below.
> 
>> diff --git a/block/block-copy.c b/block/block-copy.c
>> index 75287ce24d..cc49d2345d 100644
>> --- a/block/block-copy.c
>> +++ b/block/block-copy.c
>> @@ -126,25 +126,43 @@ void block_copy_set_callbacks(
>>   }
>>   
>>   /*
>> - * Copy range to target with a bounce buffer and return the bytes copied. If
>> - * error occurred, return a negative error number
>> + * block_copy_do_copy
>> + *
>> + * Do copy of cluser-aligned chunk. @end is allowed to exceed s->len only to
>> + * cover last cluster when s->len is not aligned to clusters.
>> + *
>> + * No sync here: nor bitmap neighter intersecting requests handling, only copy.
>> + *
>> + * Returns 0 on success.
>>    */
>> -static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
>> -                                                      int64_t start,
>> -                                                      int64_t end,
>> -                                                      bool *error_is_read)
>> +static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
>> +                                           int64_t start, int64_t end,
>> +                                           bool *error_is_read)
>>   {
>>       int ret;
>> -    int nbytes;
>> -    void *bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size);
>> +    int nbytes = MIN(end, s->len) - start;
>> +    void *bounce_buffer = NULL;
>>   
>>       assert(QEMU_IS_ALIGNED(start, s->cluster_size));
>> -    bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
>> -    nbytes = MIN(s->cluster_size, s->len - start);
>> +    assert(QEMU_IS_ALIGNED(end, s->cluster_size));
>> +    assert(end < s->len || end == QEMU_ALIGN_UP(s->len, s->cluster_size));
>> +
>> +    if (s->use_copy_range) {
>> +        ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
>> +                                 0, s->write_flags);
>> +        if (ret < 0) {
>> +            trace_block_copy_copy_range_fail(s, start, ret);
>> +            s->use_copy_range = false;
>> +        } else {
>> +            return ret;
> 
> Maybe the “fail” label should be called ”out” and then we could go there
> from here.  Doesn’t make much of a difference here (1 LoC either way),
> but maybe it’s a bit cleaner to always use the clean-up path in this
> function (even when there’s nothing to clean up).

Hmm, I always do immediate return if possible (things which needs cleanup not
yet happened). A kind of taste of course, you are maintainer, so if you like
another way, I'll change it to goto.

> 
> *shrug*
> 
>> +        }
>> +    }
>> +
>> +    bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
>>   
>>       ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0);
>>       if (ret < 0) {
>> -        trace_block_copy_with_bounce_buffer_read_fail(s, start, ret);
>> +        trace_block_copy_read_fail(s, start, ret);
>>           if (error_is_read) {
>>               *error_is_read = true;
>>           }
> 
> [...]
> 
>> @@ -163,42 +181,12 @@ static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
>>   
>>       qemu_vfree(bounce_buffer);
>>   
>> -    return nbytes;
>> +    return 0;
>> +
>>   fail:
>>       qemu_vfree(bounce_buffer);
>> -    bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
>> -    return ret;
>> -
>> -}
> 
> Wouldn’t it be simpler to drop the “qemu_vfree(bounce_buffer); return
> 0;” above the fail label and just fall through?

Hmm yes that's better and more usual pattern. Will do.

> 
> In any case:
> 
> Reviewed-by: Max Reitz <mreitz@redhat.com>
> 


-- 
Best regards,
Vladimir

[PATCH 1/6] block/block-copy: allocate buffer in block_copy_with_bounce_buffer
[PATCH 2/6] block/block-copy: limit copy_range_size to 16 MiB
[PATCH 3/6] block/block-copy: refactor copying
[PATCH 4/6] util: introduce co-shared-amount
[PATCH 5/6] block/block-copy: add memory limit
[PATCH 6/6] block/block-copy: increase buffered copy request