We are going to directly use one async block-copy operation for backup
job, so we need rate limitator.
We want to maintain current backup behavior: only background copying is
limited and copy-before-write operations only participate in limit
calculation. Therefore we need one rate limitator for block-copy state
and boolean flag for block-copy call state for actual limitation.
Note, that we can't just calculate each chunk in limitator after
successful copying: it will not save us from starting a lot of async
sub-requests which will exceed limit too much. Instead let's use the
following scheme on sub-request creation:
1. If at the moment limit is not exceeded, create the request and
account it immediately.
2. If at the moment limit is already exceeded, drop create sub-request
and handle limit instead (by sleep).
With this approach we'll never exceed the limit more than by one
sub-request (which pretty much matches current backup behavior).
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
include/block/block-copy.h | 8 +++++++
block/block-copy.c | 44 ++++++++++++++++++++++++++++++++++++++
2 files changed, 52 insertions(+)
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 600984c733..d40e691123 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -59,6 +59,14 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
int64_t max_chunk,
BlockCopyAsyncCallbackFunc cb);
+/*
+ * Set speed limit for block-copy instance. All block-copy operations related to
+ * this BlockCopyState will participate in speed calculation, but only
+ * block_copy_async calls with @ratelimit=true will be actually limited.
+ */
+void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
+ uint64_t speed);
+
BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
diff --git a/block/block-copy.c b/block/block-copy.c
index 4114d1fd25..851d9c8aaf 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -26,6 +26,7 @@
#define BLOCK_COPY_MAX_BUFFER (1 * MiB)
#define BLOCK_COPY_MAX_MEM (128 * MiB)
#define BLOCK_COPY_MAX_WORKERS 64
+#define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
static coroutine_fn int block_copy_task_entry(AioTask *task);
@@ -36,11 +37,13 @@ typedef struct BlockCopyCallState {
int64_t bytes;
int max_workers;
int64_t max_chunk;
+ bool ratelimit;
BlockCopyAsyncCallbackFunc cb;
/* State */
bool failed;
bool finished;
+ QemuCoSleepState *sleep_state;
/* OUT parameters */
bool error_is_read;
@@ -103,6 +106,9 @@ typedef struct BlockCopyState {
void *progress_opaque;
SharedResource *mem;
+
+ uint64_t speed;
+ RateLimit rate_limit;
} BlockCopyState;
static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
@@ -611,6 +617,21 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
}
task->zeroes = ret & BDRV_BLOCK_ZERO;
+ if (s->speed) {
+ if (call_state->ratelimit) {
+ uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
+ if (ns > 0) {
+ block_copy_task_end(task, -EAGAIN);
+ g_free(task);
+ qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns,
+ &call_state->sleep_state);
+ continue;
+ }
+ }
+
+ ratelimit_calculate_delay(&s->rate_limit, task->bytes);
+ }
+
trace_block_copy_process(s, task->offset);
co_get_from_shres(s->mem, task->bytes);
@@ -649,6 +670,13 @@ out:
return ret < 0 ? ret : found_dirty;
}
+static void block_copy_kick(BlockCopyCallState *call_state)
+{
+ if (call_state->sleep_state) {
+ qemu_co_sleep_wake(call_state->sleep_state);
+ }
+}
+
/*
* block_copy_common
*
@@ -729,6 +757,7 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
.s = s,
.offset = offset,
.bytes = bytes,
+ .ratelimit = ratelimit,
.cb = cb,
.max_workers = max_workers ?: BLOCK_COPY_MAX_WORKERS,
.max_chunk = max_chunk,
@@ -752,3 +781,18 @@ void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
{
s->skip_unallocated = skip;
}
+
+void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
+ uint64_t speed)
+{
+ uint64_t old_speed = s->speed;
+
+ s->speed = speed;
+ if (speed > 0) {
+ ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
+ }
+
+ if (call_state && old_speed && (speed > old_speed || speed == 0)) {
+ block_copy_kick(call_state);
+ }
+}
--
2.21.0
On 01.06.20 20:11, Vladimir Sementsov-Ogievskiy wrote:
> We are going to directly use one async block-copy operation for backup
> job, so we need rate limitator.
%s/limitator/limiter/g, I think.
> We want to maintain current backup behavior: only background copying is
> limited and copy-before-write operations only participate in limit
> calculation. Therefore we need one rate limitator for block-copy state
> and boolean flag for block-copy call state for actual limitation.
>
> Note, that we can't just calculate each chunk in limitator after
> successful copying: it will not save us from starting a lot of async
> sub-requests which will exceed limit too much. Instead let's use the
> following scheme on sub-request creation:
> 1. If at the moment limit is not exceeded, create the request and
> account it immediately.
> 2. If at the moment limit is already exceeded, drop create sub-request
> and handle limit instead (by sleep).
> With this approach we'll never exceed the limit more than by one
> sub-request (which pretty much matches current backup behavior).
Sounds reasonable.
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
> include/block/block-copy.h | 8 +++++++
> block/block-copy.c | 44 ++++++++++++++++++++++++++++++++++++++
> 2 files changed, 52 insertions(+)
>
> diff --git a/include/block/block-copy.h b/include/block/block-copy.h
> index 600984c733..d40e691123 100644
> --- a/include/block/block-copy.h
> +++ b/include/block/block-copy.h
> @@ -59,6 +59,14 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
> int64_t max_chunk,
> BlockCopyAsyncCallbackFunc cb);
>
> +/*
> + * Set speed limit for block-copy instance. All block-copy operations related to
> + * this BlockCopyState will participate in speed calculation, but only
> + * block_copy_async calls with @ratelimit=true will be actually limited.
> + */
> +void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
> + uint64_t speed);
> +
> BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
> void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
>
> diff --git a/block/block-copy.c b/block/block-copy.c
> index 4114d1fd25..851d9c8aaf 100644
> --- a/block/block-copy.c
> +++ b/block/block-copy.c
> @@ -26,6 +26,7 @@
> #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
> #define BLOCK_COPY_MAX_MEM (128 * MiB)
> #define BLOCK_COPY_MAX_WORKERS 64
> +#define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
>
> static coroutine_fn int block_copy_task_entry(AioTask *task);
>
> @@ -36,11 +37,13 @@ typedef struct BlockCopyCallState {
> int64_t bytes;
> int max_workers;
> int64_t max_chunk;
> + bool ratelimit;
> BlockCopyAsyncCallbackFunc cb;
>
> /* State */
> bool failed;
> bool finished;
> + QemuCoSleepState *sleep_state;
>
> /* OUT parameters */
> bool error_is_read;
> @@ -103,6 +106,9 @@ typedef struct BlockCopyState {
> void *progress_opaque;
>
> SharedResource *mem;
> +
> + uint64_t speed;
> + RateLimit rate_limit;
> } BlockCopyState;
>
> static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
> @@ -611,6 +617,21 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
> }
> task->zeroes = ret & BDRV_BLOCK_ZERO;
>
> + if (s->speed) {
> + if (call_state->ratelimit) {
> + uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
> + if (ns > 0) {
> + block_copy_task_end(task, -EAGAIN);
> + g_free(task);
> + qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns,
> + &call_state->sleep_state);
> + continue;
> + }
> + }
> +
> + ratelimit_calculate_delay(&s->rate_limit, task->bytes);
> + }
> +
Looks good.
> trace_block_copy_process(s, task->offset);
>
> co_get_from_shres(s->mem, task->bytes);
> @@ -649,6 +670,13 @@ out:
> return ret < 0 ? ret : found_dirty;
> }
>
> +static void block_copy_kick(BlockCopyCallState *call_state)
> +{
> + if (call_state->sleep_state) {
> + qemu_co_sleep_wake(call_state->sleep_state);
> + }
> +}
> +
> /*
> * block_copy_common
> *
> @@ -729,6 +757,7 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
> .s = s,
> .offset = offset,
> .bytes = bytes,
> + .ratelimit = ratelimit,
Hm, same problem/question as in patch 6: Should the @ratelimit parameter
really be added in patch 5 if it’s used only now?
> .cb = cb,
> .max_workers = max_workers ?: BLOCK_COPY_MAX_WORKERS,
> .max_chunk = max_chunk,
> @@ -752,3 +781,18 @@ void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
> {
> s->skip_unallocated = skip;
> }
> +
> +void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
> + uint64_t speed)
> +{
> + uint64_t old_speed = s->speed;
> +
> + s->speed = speed;
> + if (speed > 0) {
> + ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
> + }
> +
> + if (call_state && old_speed && (speed > old_speed || speed == 0)) {
> + block_copy_kick(call_state);
> + }
> +}
Hm. I’m interested in seeing how this is going to be used, i.e. what
callers will pass for @call_state. I suppose it’s going to be the
background operation for the whole device, but I wonder whether it
actually makes sense to pass it. I mean, the caller could just call
block_copy_kick() itself (unconditionally, because it’ll never hurt, I
think).
22.07.2020 14:05, Max Reitz wrote:
> On 01.06.20 20:11, Vladimir Sementsov-Ogievskiy wrote:
>> We are going to directly use one async block-copy operation for backup
>> job, so we need rate limitator.
>
> %s/limitator/limiter/g, I think.
>
>> We want to maintain current backup behavior: only background copying is
>> limited and copy-before-write operations only participate in limit
>> calculation. Therefore we need one rate limitator for block-copy state
>> and boolean flag for block-copy call state for actual limitation.
>>
>> Note, that we can't just calculate each chunk in limitator after
>> successful copying: it will not save us from starting a lot of async
>> sub-requests which will exceed limit too much. Instead let's use the
>> following scheme on sub-request creation:
>> 1. If at the moment limit is not exceeded, create the request and
>> account it immediately.
>> 2. If at the moment limit is already exceeded, drop create sub-request
>> and handle limit instead (by sleep).
>> With this approach we'll never exceed the limit more than by one
>> sub-request (which pretty much matches current backup behavior).
>
> Sounds reasonable.
>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>> include/block/block-copy.h | 8 +++++++
>> block/block-copy.c | 44 ++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 52 insertions(+)
>>
>> diff --git a/include/block/block-copy.h b/include/block/block-copy.h
>> index 600984c733..d40e691123 100644
>> --- a/include/block/block-copy.h
>> +++ b/include/block/block-copy.h
>> @@ -59,6 +59,14 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
>> int64_t max_chunk,
>> BlockCopyAsyncCallbackFunc cb);
>>
>> +/*
>> + * Set speed limit for block-copy instance. All block-copy operations related to
>> + * this BlockCopyState will participate in speed calculation, but only
>> + * block_copy_async calls with @ratelimit=true will be actually limited.
>> + */
>> +void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
>> + uint64_t speed);
>> +
>> BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
>> void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
>>
>> diff --git a/block/block-copy.c b/block/block-copy.c
>> index 4114d1fd25..851d9c8aaf 100644
>> --- a/block/block-copy.c
>> +++ b/block/block-copy.c
>> @@ -26,6 +26,7 @@
>> #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
>> #define BLOCK_COPY_MAX_MEM (128 * MiB)
>> #define BLOCK_COPY_MAX_WORKERS 64
>> +#define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
>>
>> static coroutine_fn int block_copy_task_entry(AioTask *task);
>>
>> @@ -36,11 +37,13 @@ typedef struct BlockCopyCallState {
>> int64_t bytes;
>> int max_workers;
>> int64_t max_chunk;
>> + bool ratelimit;
>> BlockCopyAsyncCallbackFunc cb;
>>
>> /* State */
>> bool failed;
>> bool finished;
>> + QemuCoSleepState *sleep_state;
>>
>> /* OUT parameters */
>> bool error_is_read;
>> @@ -103,6 +106,9 @@ typedef struct BlockCopyState {
>> void *progress_opaque;
>>
>> SharedResource *mem;
>> +
>> + uint64_t speed;
>> + RateLimit rate_limit;
>> } BlockCopyState;
>>
>> static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
>> @@ -611,6 +617,21 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
>> }
>> task->zeroes = ret & BDRV_BLOCK_ZERO;
>>
>> + if (s->speed) {
>> + if (call_state->ratelimit) {
>> + uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
>> + if (ns > 0) {
>> + block_copy_task_end(task, -EAGAIN);
>> + g_free(task);
>> + qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns,
>> + &call_state->sleep_state);
>> + continue;
>> + }
>> + }
>> +
>> + ratelimit_calculate_delay(&s->rate_limit, task->bytes);
>> + }
>> +
>
> Looks good.
>
>> trace_block_copy_process(s, task->offset);
>>
>> co_get_from_shres(s->mem, task->bytes);
>> @@ -649,6 +670,13 @@ out:
>> return ret < 0 ? ret : found_dirty;
>> }
>>
>> +static void block_copy_kick(BlockCopyCallState *call_state)
>> +{
>> + if (call_state->sleep_state) {
>> + qemu_co_sleep_wake(call_state->sleep_state);
>> + }
>> +}
>> +
>> /*
>> * block_copy_common
>> *
>> @@ -729,6 +757,7 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
>> .s = s,
>> .offset = offset,
>> .bytes = bytes,
>> + .ratelimit = ratelimit,
>
> Hm, same problem/question as in patch 6: Should the @ratelimit parameter
> really be added in patch 5 if it’s used only now?
>
>> .cb = cb,
>> .max_workers = max_workers ?: BLOCK_COPY_MAX_WORKERS,
>> .max_chunk = max_chunk,
>> @@ -752,3 +781,18 @@ void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
>> {
>> s->skip_unallocated = skip;
>> }
>> +
>> +void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
>> + uint64_t speed)
>> +{
>> + uint64_t old_speed = s->speed;
>> +
>> + s->speed = speed;
>> + if (speed > 0) {
>> + ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
>> + }
>> +
>> + if (call_state && old_speed && (speed > old_speed || speed == 0)) {
>> + block_copy_kick(call_state);
>> + }
>> +}
>
> Hm. I’m interested in seeing how this is going to be used, i.e. what
> callers will pass for @call_state. I suppose it’s going to be the
> background operation for the whole device, but I wonder whether it
> actually makes sense to pass it. I mean, the caller could just call
> block_copy_kick() itself (unconditionally, because it’ll never hurt, I
> think).
>
Yes, that's weird. I think the correct approach is to have list of call-states in block-copy-state, and kick them all on speed update.
--
Best regards,
Vladimir
© 2016 - 2026 Red Hat, Inc.