Add a parameter that enables discard-after-copy. That is mostly useful
in "push backup with fleecing" scheme, when source is snapshot-access
format driver node, based on copy-before-write filter snapshot-access
API:
[guest] [snapshot-access] ~~ blockdev-backup ~~> [backup target]
| |
| root | file
v v
[copy-before-write]
| |
| file | target
v v
[active disk] [temp.img]
In this case discard-after-copy does two things:
- discard data in temp.img to save disk space
- avoid further copy-before-write operation in discarded area
Note that we have to declare WRITE permission on source in
copy-before-write filter, for discard to work. Alternative is to pass
an option to bdrv_cbw_append(), add some internal open-option for
copy-before-write filter to require WRITE permission only for backup
with discard-source=true. But I'm not sure it worth the complexity.
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@openvz.org>
---
block/backup.c | 5 +++--
block/block-copy.c | 10 ++++++++--
block/copy-before-write.c | 2 +-
block/replication.c | 4 ++--
blockdev.c | 2 +-
include/block/block-copy.h | 2 +-
include/block/block_int-global-state.h | 2 +-
qapi/block-core.json | 4 ++++
8 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/block/backup.c b/block/backup.c
index 5cfd0b999c..d0d512ec61 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -355,7 +355,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, int64_t speed,
MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
BitmapSyncMode bitmap_mode,
- bool compress,
+ bool compress, bool discard_source,
const char *filter_node_name,
BackupPerf *perf,
BlockdevOnError on_source_error,
@@ -486,7 +486,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
job->len = len;
job->perf = *perf;
- block_copy_set_copy_opts(bcs, perf->use_copy_range, compress);
+ block_copy_set_copy_opts(bcs, perf->use_copy_range, compress,
+ discard_source);
block_copy_set_progress_meter(bcs, &job->common.job.progress);
block_copy_set_speed(bcs, speed);
diff --git a/block/block-copy.c b/block/block-copy.c
index 9626043480..2d8373f63f 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -133,6 +133,7 @@ typedef struct BlockCopyState {
CoMutex lock;
int64_t in_flight_bytes;
BlockCopyMethod method;
+ bool discard_source;
BlockReqList reqs;
QLIST_HEAD(, BlockCopyCallState) calls;
/*
@@ -278,11 +279,12 @@ static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
}
void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
- bool compress)
+ bool compress, bool discard_source)
{
/* Keep BDRV_REQ_SERIALISING set (or not set) in block_copy_state_new() */
s->write_flags = (s->write_flags & BDRV_REQ_SERIALISING) |
(compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
+ s->discard_source = discard_source;
if (s->max_transfer < s->cluster_size) {
/*
@@ -405,7 +407,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
cluster_size),
};
- block_copy_set_copy_opts(s, false, false);
+ block_copy_set_copy_opts(s, false, false, false);
ratelimit_init(&s->rate_limit);
qemu_co_mutex_init(&s->lock);
@@ -575,6 +577,10 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
co_put_to_shres(s->mem, t->req.bytes);
block_copy_task_end(t, ret);
+ if (s->discard_source && ret == 0) {
+ bdrv_co_pdiscard(s->source, t->req.offset, t->req.bytes);
+ }
+
return ret;
}
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 79cf12380e..3e77313a9a 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -319,7 +319,7 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
bdrv_default_perms(bs, c, role, reopen_queue,
perm, shared, nperm, nshared);
- *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+ *nperm = *nperm | BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
*nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
}
}
diff --git a/block/replication.c b/block/replication.c
index 2f17397764..f6a0b23563 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -587,8 +587,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
s->backup_job = backup_job_create(
NULL, s->secondary_disk->bs, s->hidden_disk->bs,
- 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
- &perf,
+ 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false,
+ NULL, &perf,
BLOCKDEV_ON_ERROR_REPORT,
BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
backup_job_completed, bs, NULL, &local_err);
diff --git a/blockdev.c b/blockdev.c
index 89167fbc08..946073a732 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2924,7 +2924,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
backup->sync, bmap, backup->bitmap_mode,
- backup->compress,
+ backup->compress, backup->discard_source,
backup->filter_node_name,
&perf,
backup->on_source_error,
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index b03eb5f016..e3cf0b200b 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -31,7 +31,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
/* Function should be called prior any actual copy request */
void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
- bool compress);
+ bool compress, bool discard_source);
void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm);
void block_copy_state_free(BlockCopyState *s);
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
index aed62a45d9..dc540868ec 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -183,7 +183,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
MirrorSyncMode sync_mode,
BdrvDirtyBitmap *sync_bitmap,
BitmapSyncMode bitmap_mode,
- bool compress,
+ bool compress, bool discard_source,
const char *filter_node_name,
BackupPerf *perf,
BlockdevOnError on_source_error,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6e944e4f52..ffc26d06ee 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1436,6 +1436,9 @@
# above node specified by @drive. If this option is not given,
# a node name is autogenerated. (Since: 4.2)
#
+# @discard-source: Discard blocks on source which are already copied to the
+# target. (Since: 7.1)
+#
# @x-perf: Performance options. (Since 6.0)
#
# Features:
@@ -1456,6 +1459,7 @@
'*on-target-error': 'BlockdevOnError',
'*auto-finalize': 'bool', '*auto-dismiss': 'bool',
'*filter-node-name': 'str',
+ '*discard-source': 'bool',
'*x-perf': { 'type': 'BackupPerf',
'features': [ 'unstable' ] } } }
--
2.35.1
Hi Vladimir,
hope I didn't miss a newer version of this series. I'm currently
evaluating fleecing backup for Proxmox downstream, so I pulled in this
series and wanted to let you know about two issues I encountered while
testing. We are still based on 8.1, but if I'm not mistaken, they are
still relevant:
Am 31.03.22 um 21:57 schrieb Vladimir Sementsov-Ogievskiy:
> @@ -575,6 +577,10 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
> co_put_to_shres(s->mem, t->req.bytes);
> block_copy_task_end(t, ret);
>
> + if (s->discard_source && ret == 0) {
> + bdrv_co_pdiscard(s->source, t->req.offset, t->req.bytes);
> + }
> +
> return ret;
> }
>
If the image size is not aligned to the cluster size, passing
t->req.bytes when calling bdrv_co_pdiscard() can lead to an assertion
failure at the end of the image:
> kvm: ../block/io.c:1982: bdrv_co_write_req_prepare: Assertion `offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE || child->perm & BLK_PERM_RESIZE' failed.
block_copy_do_copy() does have a line to clamp down:
> int64_t nbytes = MIN(offset + bytes, s->len) - offset;
If I do the same before calling bdrv_co_pdiscard(), the failure goes away.
For the second one, the following code saw some changes since the series
was sent:
> diff --git a/block/copy-before-write.c b/block/copy-before-write.c
> index 79cf12380e..3e77313a9a 100644
> --- a/block/copy-before-write.c
> +++ b/block/copy-before-write.c
> @@ -319,7 +319,7 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
> bdrv_default_perms(bs, c, role, reopen_queue,
> perm, shared, nperm, nshared);
>
> - *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
> + *nperm = *nperm | BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
> *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
> }
> }
It's now:
> bdrv_default_perms(bs, c, role, reopen_queue,
> perm, shared, nperm, nshared);
>
> if (!QLIST_EMPTY(&bs->parents)) {
> if (perm & BLK_PERM_WRITE) {
> *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
> }
> *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
> }
So I wasn't sure how to adapt the patch:
- If setting BLK_PERM_WRITE unconditionally, it seems to break usual
drive-backup (with no fleecing set up):
> permissions 'write' are both required by node '#block691' (uses node '#block151' as 'file' child) and unshared by block device 'drive-scsi0' (uses node '#block151' as 'root' child).
- If I only do it within the if block, it doesn't work when I try to set
up fleecing, because bs->parents is empty for me, i.e. when passing the
snapshot-access node to backup_job_create() while the usual cbw for
backup is appended. I should note I'm doing it manually in a custom QMP
command, not in a transaction (which requires the not-yet-merged
blockdev-replace AFAIU).
Not sure if I'm doing something wrong, but maybe what you wrote in the
commit message is necessary after all?
> Alternative is to pass
> an option to bdrv_cbw_append(), add some internal open-option for
> copy-before-write filter to require WRITE permission only for backup
> with discard-source=true. But I'm not sure it worth the complexity.
Best Regards,
Fiona
Hi Fiona!
That was the only version, because it was based on "[PATCH v5 00/45] Transactional block-graph modifying API", as written in commit message.
And "[PATCH v5 00/45] Transactional block-graph modifying API" was not merged, instead I decided to send it part-by-part, some were already merged. Now the current "in-flight" part is "[PATCH v8 0/7] blockdev-replace".
So, probably something from that old big series is still needed for "backup: discard-source parameter" to work. Or, may be everything is prepared now.
I'll look at it closely next week and try to make a v2. Thanks for testing and debugging!
On 11.01.24 18:19, Fiona Ebner wrote:
> Hi Vladimir,
>
> hope I didn't miss a newer version of this series. I'm currently
> evaluating fleecing backup for Proxmox downstream, so I pulled in this
> series and wanted to let you know about two issues I encountered while
> testing. We are still based on 8.1, but if I'm not mistaken, they are
> still relevant:
>
> Am 31.03.22 um 21:57 schrieb Vladimir Sementsov-Ogievskiy:
>> @@ -575,6 +577,10 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
>> co_put_to_shres(s->mem, t->req.bytes);
>> block_copy_task_end(t, ret);
>>
>> + if (s->discard_source && ret == 0) {
>> + bdrv_co_pdiscard(s->source, t->req.offset, t->req.bytes);
>> + }
>> +
>> return ret;
>> }
>>
>
> If the image size is not aligned to the cluster size, passing
> t->req.bytes when calling bdrv_co_pdiscard() can lead to an assertion
> failure at the end of the image:
>
>> kvm: ../block/io.c:1982: bdrv_co_write_req_prepare: Assertion `offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE || child->perm & BLK_PERM_RESIZE' failed.
>
> block_copy_do_copy() does have a line to clamp down:
>
>> int64_t nbytes = MIN(offset + bytes, s->len) - offset;
>
> If I do the same before calling bdrv_co_pdiscard(), the failure goes away.
>
>
> For the second one, the following code saw some changes since the series
> was sent:
>
>> diff --git a/block/copy-before-write.c b/block/copy-before-write.c
>> index 79cf12380e..3e77313a9a 100644
>> --- a/block/copy-before-write.c
>> +++ b/block/copy-before-write.c
>> @@ -319,7 +319,7 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
>> bdrv_default_perms(bs, c, role, reopen_queue,
>> perm, shared, nperm, nshared);
>>
>> - *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
>> + *nperm = *nperm | BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
>> *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
>> }
>> }
>
> It's now:
>
>> bdrv_default_perms(bs, c, role, reopen_queue,
>> perm, shared, nperm, nshared);
>>
>> if (!QLIST_EMPTY(&bs->parents)) {
>> if (perm & BLK_PERM_WRITE) {
>> *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
>> }
>> *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
>> }
>
> So I wasn't sure how to adapt the patch:
>
> - If setting BLK_PERM_WRITE unconditionally, it seems to break usual
> drive-backup (with no fleecing set up):
>
>> permissions 'write' are both required by node '#block691' (uses node '#block151' as 'file' child) and unshared by block device 'drive-scsi0' (uses node '#block151' as 'root' child).
>
> - If I only do it within the if block, it doesn't work when I try to set
> up fleecing, because bs->parents is empty for me, i.e. when passing the
> snapshot-access node to backup_job_create() while the usual cbw for
> backup is appended. I should note I'm doing it manually in a custom QMP
> command, not in a transaction (which requires the not-yet-merged
> blockdev-replace AFAIU).
>
> Not sure if I'm doing something wrong, but maybe what you wrote in the
> commit message is necessary after all?
>
>> Alternative is to pass
>> an option to bdrv_cbw_append(), add some internal open-option for
>> copy-before-write filter to require WRITE permission only for backup
>> with discard-source=true. But I'm not sure it worth the complexity.
>
> Best Regards,
> Fiona
>
--
Best regards,
Vladimir
© 2016 - 2026 Red Hat, Inc.