Currently this field only set by qed and qcow2. But in fact, all
backing-supporting formats (parallels, qcow, qcow2, qed, vmdk) share
this semantics: on unallocated blocks, if there is no backing file they
just memset the buffer with zeroes.
So, document this behavior for .supports_backing and drop
.unallocated_blocks_are_zero
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
include/block/block.h | 6 ------
include/block/block_int.h | 13 ++++++++++++-
block.c | 15 ---------------
block/io.c | 8 ++++----
block/qcow2.c | 1 -
block/qed.c | 1 -
6 files changed, 16 insertions(+), 28 deletions(-)
diff --git a/include/block/block.h b/include/block/block.h
index 8b62429aa4..db1cb503ec 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -21,11 +21,6 @@ typedef struct BlockDriverInfo {
/* offset at which the VM state can be saved (0 if not possible) */
int64_t vm_state_offset;
bool is_dirty;
- /*
- * True if unallocated blocks read back as zeroes. This is equivalent
- * to the LBPRZ flag in the SCSI logical block provisioning page.
- */
- bool unallocated_blocks_are_zero;
/*
* True if this block driver only supports compressed writes
*/
@@ -431,7 +426,6 @@ int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
int bdrv_has_zero_init_1(BlockDriverState *bs);
int bdrv_has_zero_init(BlockDriverState *bs);
int bdrv_has_zero_init_truncate(BlockDriverState *bs);
-bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
int bdrv_block_status(BlockDriverState *bs, int64_t offset,
int64_t bytes, int64_t *pnum, int64_t *map,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 92335f33c7..c156b22c6b 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -115,7 +115,18 @@ struct BlockDriver {
*/
bool bdrv_needs_filename;
- /* Set if a driver can support backing files */
+ /*
+ * Set if a driver can support backing files. This also implies the
+ * following semantics:
+ *
+ * - Return status 0 of .bdrv_co_block_status means that corresponding
+ * blocks are not allocated in this layer of backing-chain
+ * - For such (unallocated) blocks, read will:
+ * - read from backing file if there is one and big enough
+ * - fill buffer with zeroes if there is no backing file
+ * - space after EOF of the backing file considered as zero
+ * (corresponding part of read-buffer must be zeroed by driver)
+ */
bool supports_backing;
/* For handling image reopen for split or non-split files */
diff --git a/block.c b/block.c
index cf5c19b1db..0283fdecbc 100644
--- a/block.c
+++ b/block.c
@@ -5305,21 +5305,6 @@ int bdrv_has_zero_init_truncate(BlockDriverState *bs)
return 0;
}
-bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
-{
- BlockDriverInfo bdi;
-
- if (bs->backing) {
- return false;
- }
-
- if (bdrv_get_info(bs, &bdi) == 0) {
- return bdi.unallocated_blocks_are_zero;
- }
-
- return false;
-}
-
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
{
if (!(bs->open_flags & BDRV_O_UNMAP)) {
diff --git a/block/io.c b/block/io.c
index a4f9714230..484adec5a1 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2385,16 +2385,16 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
ret |= BDRV_BLOCK_ALLOCATED;
- } else if (want_zero) {
- if (bdrv_unallocated_blocks_are_zero(bs)) {
- ret |= BDRV_BLOCK_ZERO;
- } else if (bs->backing) {
+ } else if (want_zero && bs->drv->supports_backing) {
+ if (bs->backing) {
BlockDriverState *bs2 = bs->backing->bs;
int64_t size2 = bdrv_getlength(bs2);
if (size2 >= 0 && offset >= size2) {
ret |= BDRV_BLOCK_ZERO;
}
+ } else {
+ ret |= BDRV_BLOCK_ZERO;
}
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 2ba0b17c39..dc3c0aac2b 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4858,7 +4858,6 @@ err:
static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
BDRVQcow2State *s = bs->opaque;
- bdi->unallocated_blocks_are_zero = true;
bdi->cluster_size = s->cluster_size;
bdi->vm_state_offset = qcow2_vm_state_offset(s);
return 0;
diff --git a/block/qed.c b/block/qed.c
index b0fdb8f565..fb7833dc8b 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1514,7 +1514,6 @@ static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
memset(bdi, 0, sizeof(*bdi));
bdi->cluster_size = s->header.cluster_size;
bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
- bdi->unallocated_blocks_are_zero = true;
return 0;
}
--
2.21.0
On 5/6/20 4:25 AM, Vladimir Sementsov-Ogievskiy wrote:
> Currently this field only set by qed and qcow2.
Well, only after patches 1-6 (prior to then, it was also set in protocol
drivers). I think you might be able to hoist part of this patch earlier
in the series, to make the changes to the protocol drivers easier to
review, by rewording this sentence:
Currently, the only format drivers that set this field are qed and qcow2.
> But in fact, all
> backing-supporting formats (parallels, qcow, qcow2, qed, vmdk) share
> this semantics: on unallocated blocks, if there is no backing file they
s/this/these/
> just memset the buffer with zeroes.
>
> So, document this behavior for .supports_backing and drop
> .unallocated_blocks_are_zero
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
> include/block/block.h | 6 ------
> include/block/block_int.h | 13 ++++++++++++-
> block.c | 15 ---------------
> block/io.c | 8 ++++----
> block/qcow2.c | 1 -
> block/qed.c | 1 -
> 6 files changed, 16 insertions(+), 28 deletions(-)
>
> diff --git a/include/block/block.h b/include/block/block.h
> index 8b62429aa4..db1cb503ec 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -21,11 +21,6 @@ typedef struct BlockDriverInfo {
> /* offset at which the VM state can be saved (0 if not possible) */
> int64_t vm_state_offset;
> bool is_dirty;
> - /*
> - * True if unallocated blocks read back as zeroes. This is equivalent
> - * to the LBPRZ flag in the SCSI logical block provisioning page.
> - */
> - bool unallocated_blocks_are_zero;
You can't delete this field until all protocol drivers are cleaned up,
so deferring this part of the change to the end of the series makes sense.
> /*
> * True if this block driver only supports compressed writes
> */
> @@ -431,7 +426,6 @@ int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
> int bdrv_has_zero_init_1(BlockDriverState *bs);
> int bdrv_has_zero_init(BlockDriverState *bs);
> int bdrv_has_zero_init_truncate(BlockDriverState *bs);
> -bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
Doing this cleanup makes sense: there is only one caller of this
function pre-patch, and 0 callers post-patch - but whether the cleanup
should be at the same time as you fix the one caller, or deferred to
when you also clean up the field, is less important.
If I were writing the series:
1 - fix qemu-img.c to not use the field
2 - fix block_status to not use the function
3-n - fix protocol drivers that set the field to also return _ZERO
during block status (but not delete the field at that time)
n+1 - delete unused function and field (from ALL drivers)
> bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
> int bdrv_block_status(BlockDriverState *bs, int64_t offset,
> int64_t bytes, int64_t *pnum, int64_t *map,
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 92335f33c7..c156b22c6b 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -115,7 +115,18 @@ struct BlockDriver {
> */
> bool bdrv_needs_filename;
>
> - /* Set if a driver can support backing files */
> + /*
> + * Set if a driver can support backing files. This also implies the
> + * following semantics:
> + *
> + * - Return status 0 of .bdrv_co_block_status means that corresponding
> + * blocks are not allocated in this layer of backing-chain
> + * - For such (unallocated) blocks, read will:
> + * - read from backing file if there is one and big enough
s/and/and it is/
> + * - fill buffer with zeroes if there is no backing file
> + * - space after EOF of the backing file considered as zero
> + * (corresponding part of read-buffer must be zeroed by driver)
Does the driver actually have to do the zeroing? Looking at qcow2.c, I see:
static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
...
case QCOW2_CLUSTER_UNALLOCATED:
assert(bs->backing); /* otherwise handled in
qcow2_co_preadv_part */
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
return bdrv_co_preadv_part(bs->backing, offset, bytes,
qiov, qiov_offset, 0);
which just defers to the block layer to handle read-beyond-EOF, rather
than an explicit memset in the driver.
So maybe you can simplify to:
- For such (unallocated) blocks, read will:
- fill buffer with zeros if there is no backing file
- read from the backing file otherwise, where the block layer
takes care of reading zeros beyond EOF if backing file is short
But the effect is the same.
> +++ b/block/io.c
> @@ -2385,16 +2385,16 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
>
> if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
> ret |= BDRV_BLOCK_ALLOCATED;
> - } else if (want_zero) {
> - if (bdrv_unallocated_blocks_are_zero(bs)) {
> - ret |= BDRV_BLOCK_ZERO;
> - } else if (bs->backing) {
> + } else if (want_zero && bs->drv->supports_backing) {
> + if (bs->backing) {
> BlockDriverState *bs2 = bs->backing->bs;
> int64_t size2 = bdrv_getlength(bs2);
>
> if (size2 >= 0 && offset >= size2) {
> ret |= BDRV_BLOCK_ZERO;
> }
> + } else {
> + ret |= BDRV_BLOCK_ZERO;
> }
I like this part of the change. But if it is done first in the series,
it _does_ have a semantic impact on protocol drivers (previously,
protocol drivers that return 0 but set the field
.unallocated_blocks_are_zero will be changed to report _ZERO; after this
patch, protocol drivers do not do that, because they don't support
backing files, and it is now only backing files that do the _ZERO
magic). So doing _just_ this change, along with a better analysis of
how it changes the semantics of 'qemu-io -c map' on protocol drivers
while mentioning why that is okay, would make a better start to the
series, rather than here at the end. Of course, if you defer it to the
end, then none of the protocol drivers set .unallocated_blocks_are_zero
anyway, but that cost more review work on each altered protocol driver.
--
Eric Blake, Principal Software Engineer
Red Hat, Inc. +1-919-301-3226
Virtualization: qemu.org | libvirt.org
06.05.2020 18:14, Eric Blake wrote:
> On 5/6/20 4:25 AM, Vladimir Sementsov-Ogievskiy wrote:
>> Currently this field only set by qed and qcow2.
>
> Well, only after patches 1-6 (prior to then, it was also set in protocol drivers). I think you might be able to hoist part of this patch earlier in the series, to make the changes to the protocol drivers easier to review, by rewording this sentence:
>
> Currently, the only format drivers that set this field are qed and qcow2.
>
>> But in fact, all
>> backing-supporting formats (parallels, qcow, qcow2, qed, vmdk) share
>> this semantics: on unallocated blocks, if there is no backing file they
>
> s/this/these/
>
>> just memset the buffer with zeroes.
>>
>> So, document this behavior for .supports_backing and drop
>> .unallocated_blocks_are_zero
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>> include/block/block.h | 6 ------
>> include/block/block_int.h | 13 ++++++++++++-
>> block.c | 15 ---------------
>> block/io.c | 8 ++++----
>> block/qcow2.c | 1 -
>> block/qed.c | 1 -
>> 6 files changed, 16 insertions(+), 28 deletions(-)
>>
>> diff --git a/include/block/block.h b/include/block/block.h
>> index 8b62429aa4..db1cb503ec 100644
>> --- a/include/block/block.h
>> +++ b/include/block/block.h
>> @@ -21,11 +21,6 @@ typedef struct BlockDriverInfo {
>> /* offset at which the VM state can be saved (0 if not possible) */
>> int64_t vm_state_offset;
>> bool is_dirty;
>> - /*
>> - * True if unallocated blocks read back as zeroes. This is equivalent
>> - * to the LBPRZ flag in the SCSI logical block provisioning page.
>> - */
>> - bool unallocated_blocks_are_zero;
>
> You can't delete this field until all protocol drivers are cleaned up, so deferring this part of the change to the end of the series makes sense.
>
>> /*
>> * True if this block driver only supports compressed writes
>> */
>> @@ -431,7 +426,6 @@ int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
>> int bdrv_has_zero_init_1(BlockDriverState *bs);
>> int bdrv_has_zero_init(BlockDriverState *bs);
>> int bdrv_has_zero_init_truncate(BlockDriverState *bs);
>> -bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
>
> Doing this cleanup makes sense: there is only one caller of this function pre-patch, and 0 callers post-patch - but whether the cleanup should be at the same time as you fix the one caller, or deferred to when you also clean up the field, is less important.
>
> If I were writing the series:
>
> 1 - fix qemu-img.c to not use the field
> 2 - fix block_status to not use the function
> 3-n - fix protocol drivers that set the field to also return _ZERO
> during block status (but not delete the field at that time)
> n+1 - delete unused function and field (from ALL drivers)
>
>> bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
>> int bdrv_block_status(BlockDriverState *bs, int64_t offset,
>> int64_t bytes, int64_t *pnum, int64_t *map,
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index 92335f33c7..c156b22c6b 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -115,7 +115,18 @@ struct BlockDriver {
>> */
>> bool bdrv_needs_filename;
>> - /* Set if a driver can support backing files */
>> + /*
>> + * Set if a driver can support backing files. This also implies the
>> + * following semantics:
>> + *
>> + * - Return status 0 of .bdrv_co_block_status means that corresponding
>> + * blocks are not allocated in this layer of backing-chain
>> + * - For such (unallocated) blocks, read will:
>> + * - read from backing file if there is one and big enough
>
> s/and/and it is/
>
>> + * - fill buffer with zeroes if there is no backing file
>> + * - space after EOF of the backing file considered as zero
>> + * (corresponding part of read-buffer must be zeroed by driver)
>
> Does the driver actually have to do the zeroing? Looking at qcow2.c, I see:
> static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
> ...
>
> case QCOW2_CLUSTER_UNALLOCATED:
> assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
>
> BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
> return bdrv_co_preadv_part(bs->backing, offset, bytes,
> qiov, qiov_offset, 0);
>
> which just defers to the block layer to handle read-beyond-EOF, rather than an explicit memset in the driver.
Hmm, yes.
>
> So maybe you can simplify to:
> - For such (unallocated) blocks, read will:
> - fill buffer with zeros if there is no backing file
> - read from the backing file otherwise, where the block layer
> takes care of reading zeros beyond EOF if backing file is short
>
OK
> But the effect is the same.
>
>> +++ b/block/io.c
>> @@ -2385,16 +2385,16 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
>> if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
>> ret |= BDRV_BLOCK_ALLOCATED;
>> - } else if (want_zero) {
>> - if (bdrv_unallocated_blocks_are_zero(bs)) {
>> - ret |= BDRV_BLOCK_ZERO;
>> - } else if (bs->backing) {
>> + } else if (want_zero && bs->drv->supports_backing) {
>> + if (bs->backing) {
>> BlockDriverState *bs2 = bs->backing->bs;
>> int64_t size2 = bdrv_getlength(bs2);
>> if (size2 >= 0 && offset >= size2) {
>> ret |= BDRV_BLOCK_ZERO;
>> }
>> + } else {
>> + ret |= BDRV_BLOCK_ZERO;
>> }
>
> I like this part of the change. But if it is done first in the series, it _does_ have a semantic impact on protocol drivers (previously, protocol drivers that return 0 but set the field .unallocated_blocks_are_zero will be changed to report _ZERO; after this patch, protocol drivers do not do that, because they don't support backing files, and it is now only backing files that do the _ZERO magic). So doing _just_ this change, along with a better analysis of how it changes the semantics of 'qemu-io -c map' on protocol drivers while mentioning why that is okay, would make a better start to the series, rather than here at the end. Of course, if you defer it to the end, then none of the protocol drivers set .unallocated_blocks_are_zero anyway, but that cost more review work on each altered protocol driver.
>
I understand the idea.. Ha, it looks like an optimization issue :) I use greedy algorithm, trying to make each patch to be a simple small step to the result. But greedy algorithm now always optimal as we know. Actually, here, making first step larger and more complicated, we achieve less total review-complexity. Good new experience for me, thanks, I'll try the proposed way.
--
Best regards,
Vladimir
06.05.2020 18:14, Eric Blake wrote:
> On 5/6/20 4:25 AM, Vladimir Sementsov-Ogievskiy wrote:
>> Currently this field only set by qed and qcow2.
>
> Well, only after patches 1-6 (prior to then, it was also set in protocol drivers). I think you might be able to hoist part of this patch earlier in the series, to make the changes to the protocol drivers easier to review, by rewording this sentence:
>
> Currently, the only format drivers that set this field are qed and qcow2.
>
>> But in fact, all
>> backing-supporting formats (parallels, qcow, qcow2, qed, vmdk) share
>> this semantics: on unallocated blocks, if there is no backing file they
>
> s/this/these/
>
>> just memset the buffer with zeroes.
>>
>> So, document this behavior for .supports_backing and drop
>> .unallocated_blocks_are_zero
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>> include/block/block.h | 6 ------
>> include/block/block_int.h | 13 ++++++++++++-
>> block.c | 15 ---------------
>> block/io.c | 8 ++++----
>> block/qcow2.c | 1 -
>> block/qed.c | 1 -
>> 6 files changed, 16 insertions(+), 28 deletions(-)
>>
>> diff --git a/include/block/block.h b/include/block/block.h
>> index 8b62429aa4..db1cb503ec 100644
>> --- a/include/block/block.h
>> +++ b/include/block/block.h
>> @@ -21,11 +21,6 @@ typedef struct BlockDriverInfo {
>> /* offset at which the VM state can be saved (0 if not possible) */
>> int64_t vm_state_offset;
>> bool is_dirty;
>> - /*
>> - * True if unallocated blocks read back as zeroes. This is equivalent
>> - * to the LBPRZ flag in the SCSI logical block provisioning page.
>> - */
>> - bool unallocated_blocks_are_zero;
>
> You can't delete this field until all protocol drivers are cleaned up, so deferring this part of the change to the end of the series makes sense.
>
>> /*
>> * True if this block driver only supports compressed writes
>> */
>> @@ -431,7 +426,6 @@ int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
>> int bdrv_has_zero_init_1(BlockDriverState *bs);
>> int bdrv_has_zero_init(BlockDriverState *bs);
>> int bdrv_has_zero_init_truncate(BlockDriverState *bs);
>> -bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
>
> Doing this cleanup makes sense: there is only one caller of this function pre-patch, and 0 callers post-patch - but whether the cleanup should be at the same time as you fix the one caller, or deferred to when you also clean up the field, is less important.
>
> If I were writing the series:
>
> 1 - fix qemu-img.c to not use the field
> 2 - fix block_status to not use the function
Hmm stop. We still need patches 1,2 before modifying block_status, otherwise we'll still need to check unallocated_blocks_are_zero
> 3-n - fix protocol drivers that set the field to also return _ZERO
> during block status (but not delete the field at that time)
> n+1 - delete unused function and field (from ALL drivers)
>
>> bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
>> int bdrv_block_status(BlockDriverState *bs, int64_t offset,
>> int64_t bytes, int64_t *pnum, int64_t *map,
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index 92335f33c7..c156b22c6b 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -115,7 +115,18 @@ struct BlockDriver {
>> */
>> bool bdrv_needs_filename;
>> - /* Set if a driver can support backing files */
>> + /*
>> + * Set if a driver can support backing files. This also implies the
>> + * following semantics:
>> + *
>> + * - Return status 0 of .bdrv_co_block_status means that corresponding
>> + * blocks are not allocated in this layer of backing-chain
>> + * - For such (unallocated) blocks, read will:
>> + * - read from backing file if there is one and big enough
>
> s/and/and it is/
>
>> + * - fill buffer with zeroes if there is no backing file
>> + * - space after EOF of the backing file considered as zero
>> + * (corresponding part of read-buffer must be zeroed by driver)
>
> Does the driver actually have to do the zeroing? Looking at qcow2.c, I see:
> static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
> ...
>
> case QCOW2_CLUSTER_UNALLOCATED:
> assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
>
> BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
> return bdrv_co_preadv_part(bs->backing, offset, bytes,
> qiov, qiov_offset, 0);
>
> which just defers to the block layer to handle read-beyond-EOF, rather than an explicit memset in the driver.
>
> So maybe you can simplify to:
> - For such (unallocated) blocks, read will:
> - fill buffer with zeros if there is no backing file
> - read from the backing file otherwise, where the block layer
> takes care of reading zeros beyond EOF if backing file is short
>
> But the effect is the same.
>
>> +++ b/block/io.c
>> @@ -2385,16 +2385,16 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
>> if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
>> ret |= BDRV_BLOCK_ALLOCATED;
>> - } else if (want_zero) {
>> - if (bdrv_unallocated_blocks_are_zero(bs)) {
>> - ret |= BDRV_BLOCK_ZERO;
>> - } else if (bs->backing) {
>> + } else if (want_zero && bs->drv->supports_backing) {
>> + if (bs->backing) {
>> BlockDriverState *bs2 = bs->backing->bs;
>> int64_t size2 = bdrv_getlength(bs2);
>> if (size2 >= 0 && offset >= size2) {
>> ret |= BDRV_BLOCK_ZERO;
>> }
>> + } else {
>> + ret |= BDRV_BLOCK_ZERO;
>> }
>
> I like this part of the change. But if it is done first in the series, it _does_ have a semantic impact on protocol drivers (previously, protocol drivers that return 0 but set the field .unallocated_blocks_are_zero will be changed to report _ZERO; after this patch, protocol drivers do not do that, because they don't support backing files, and it is now only backing files that do the _ZERO magic). So doing _just_ this change, along with a better analysis of how it changes the semantics of 'qemu-io -c map' on protocol drivers while mentioning why that is okay, would make a better start to the series, rather than here at the end. Of course, if you defer it to the end, then none of the protocol drivers set .unallocated_blocks_are_zero anyway, but that cost more review work on each altered protocol driver.
>
--
Best regards,
Vladimir
07.05.2020 10:05, Vladimir Sementsov-Ogievskiy wrote:
> 06.05.2020 18:14, Eric Blake wrote:
>> On 5/6/20 4:25 AM, Vladimir Sementsov-Ogievskiy wrote:
>>> Currently this field only set by qed and qcow2.
>>
>> Well, only after patches 1-6 (prior to then, it was also set in protocol drivers). I think you might be able to hoist part of this patch earlier in the series, to make the changes to the protocol drivers easier to review, by rewording this sentence:
>>
>> Currently, the only format drivers that set this field are qed and qcow2.
>>
>>> But in fact, all
>>> backing-supporting formats (parallels, qcow, qcow2, qed, vmdk) share
>>> this semantics: on unallocated blocks, if there is no backing file they
>>
>> s/this/these/
>>
>>> just memset the buffer with zeroes.
>>>
>>> So, document this behavior for .supports_backing and drop
>>> .unallocated_blocks_are_zero
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>> ---
>>> include/block/block.h | 6 ------
>>> include/block/block_int.h | 13 ++++++++++++-
>>> block.c | 15 ---------------
>>> block/io.c | 8 ++++----
>>> block/qcow2.c | 1 -
>>> block/qed.c | 1 -
>>> 6 files changed, 16 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/include/block/block.h b/include/block/block.h
>>> index 8b62429aa4..db1cb503ec 100644
>>> --- a/include/block/block.h
>>> +++ b/include/block/block.h
>>> @@ -21,11 +21,6 @@ typedef struct BlockDriverInfo {
>>> /* offset at which the VM state can be saved (0 if not possible) */
>>> int64_t vm_state_offset;
>>> bool is_dirty;
>>> - /*
>>> - * True if unallocated blocks read back as zeroes. This is equivalent
>>> - * to the LBPRZ flag in the SCSI logical block provisioning page.
>>> - */
>>> - bool unallocated_blocks_are_zero;
>>
>> You can't delete this field until all protocol drivers are cleaned up, so deferring this part of the change to the end of the series makes sense.
>>
>>> /*
>>> * True if this block driver only supports compressed writes
>>> */
>>> @@ -431,7 +426,6 @@ int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
>>> int bdrv_has_zero_init_1(BlockDriverState *bs);
>>> int bdrv_has_zero_init(BlockDriverState *bs);
>>> int bdrv_has_zero_init_truncate(BlockDriverState *bs);
>>> -bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
>>
>> Doing this cleanup makes sense: there is only one caller of this function pre-patch, and 0 callers post-patch - but whether the cleanup should be at the same time as you fix the one caller, or deferred to when you also clean up the field, is less important.
>>
>> If I were writing the series:
>>
>> 1 - fix qemu-img.c to not use the field
>> 2 - fix block_status to not use the function
>
> Hmm stop. We still need patches 1,2 before modifying block_status, otherwise we'll still need to check unallocated_blocks_are_zero
Hmm2. This just means that I need to put all commit messages about dropping unallocated_block_are_zero into one commit message rewriting the block_status. I doubt that it simplifies review: instead of analyzing format-by-format, you'll have to analyze all format at once.
>
>> 3-n - fix protocol drivers that set the field to also return _ZERO
>> during block status (but not delete the field at that time)
>> n+1 - delete unused function and field (from ALL drivers)
>>
>>> bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
>>> int bdrv_block_status(BlockDriverState *bs, int64_t offset,
>>> int64_t bytes, int64_t *pnum, int64_t *map,
>>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>>> index 92335f33c7..c156b22c6b 100644
>>> --- a/include/block/block_int.h
>>> +++ b/include/block/block_int.h
>>> @@ -115,7 +115,18 @@ struct BlockDriver {
>>> */
>>> bool bdrv_needs_filename;
>>> - /* Set if a driver can support backing files */
>>> + /*
>>> + * Set if a driver can support backing files. This also implies the
>>> + * following semantics:
>>> + *
>>> + * - Return status 0 of .bdrv_co_block_status means that corresponding
>>> + * blocks are not allocated in this layer of backing-chain
>>> + * - For such (unallocated) blocks, read will:
>>> + * - read from backing file if there is one and big enough
>>
>> s/and/and it is/
>>
>>> + * - fill buffer with zeroes if there is no backing file
>>> + * - space after EOF of the backing file considered as zero
>>> + * (corresponding part of read-buffer must be zeroed by driver)
>>
>> Does the driver actually have to do the zeroing? Looking at qcow2.c, I see:
>> static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
>> ...
>>
>> case QCOW2_CLUSTER_UNALLOCATED:
>> assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
>>
>> BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
>> return bdrv_co_preadv_part(bs->backing, offset, bytes,
>> qiov, qiov_offset, 0);
>>
>> which just defers to the block layer to handle read-beyond-EOF, rather than an explicit memset in the driver.
>>
>> So maybe you can simplify to:
>> - For such (unallocated) blocks, read will:
>> - fill buffer with zeros if there is no backing file
>> - read from the backing file otherwise, where the block layer
>> takes care of reading zeros beyond EOF if backing file is short
>>
>> But the effect is the same.
>>
>>> +++ b/block/io.c
>>> @@ -2385,16 +2385,16 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
>>> if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
>>> ret |= BDRV_BLOCK_ALLOCATED;
>>> - } else if (want_zero) {
>>> - if (bdrv_unallocated_blocks_are_zero(bs)) {
>>> - ret |= BDRV_BLOCK_ZERO;
>>> - } else if (bs->backing) {
>>> + } else if (want_zero && bs->drv->supports_backing) {
>>> + if (bs->backing) {
>>> BlockDriverState *bs2 = bs->backing->bs;
>>> int64_t size2 = bdrv_getlength(bs2);
>>> if (size2 >= 0 && offset >= size2) {
>>> ret |= BDRV_BLOCK_ZERO;
>>> }
>>> + } else {
>>> + ret |= BDRV_BLOCK_ZERO;
>>> }
>>
>> I like this part of the change. But if it is done first in the series, it _does_ have a semantic impact on protocol drivers (previously, protocol drivers that return 0 but set the field .unallocated_blocks_are_zero will be changed to report _ZERO; after this patch, protocol drivers do not do that, because they don't support backing files, and it is now only backing files that do the _ZERO magic). So doing _just_ this change, along with a better analysis of how it changes the semantics of 'qemu-io -c map' on protocol drivers while mentioning why that is okay, would make a better start to the series, rather than here at the end. Of course, if you defer it to the end, then none of the protocol drivers set .unallocated_blocks_are_zero anyway, but that cost more review work on each altered protocol driver.
>>
>
>
--
Best regards,
Vladimir
06.05.2020 18:14, Eric Blake wrote:
> On 5/6/20 4:25 AM, Vladimir Sementsov-Ogievskiy wrote:
>> Currently this field only set by qed and qcow2.
>
> Well, only after patches 1-6 (prior to then, it was also set in protocol drivers). I think you might be able to hoist part of this patch earlier in the series, to make the changes to the protocol drivers easier to review, by rewording this sentence:
>
> Currently, the only format drivers that set this field are qed and qcow2.
>
>> But in fact, all
>> backing-supporting formats (parallels, qcow, qcow2, qed, vmdk) share
>> this semantics: on unallocated blocks, if there is no backing file they
>
> s/this/these/
>
>> just memset the buffer with zeroes.
>>
>> So, document this behavior for .supports_backing and drop
>> .unallocated_blocks_are_zero
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>> include/block/block.h | 6 ------
>> include/block/block_int.h | 13 ++++++++++++-
>> block.c | 15 ---------------
>> block/io.c | 8 ++++----
>> block/qcow2.c | 1 -
>> block/qed.c | 1 -
>> 6 files changed, 16 insertions(+), 28 deletions(-)
>>
>> diff --git a/include/block/block.h b/include/block/block.h
>> index 8b62429aa4..db1cb503ec 100644
>> --- a/include/block/block.h
>> +++ b/include/block/block.h
>> @@ -21,11 +21,6 @@ typedef struct BlockDriverInfo {
>> /* offset at which the VM state can be saved (0 if not possible) */
>> int64_t vm_state_offset;
>> bool is_dirty;
>> - /*
>> - * True if unallocated blocks read back as zeroes. This is equivalent
>> - * to the LBPRZ flag in the SCSI logical block provisioning page.
>> - */
>> - bool unallocated_blocks_are_zero;
>
> You can't delete this field until all protocol drivers are cleaned up, so deferring this part of the change to the end of the series makes sense.
>
>> /*
>> * True if this block driver only supports compressed writes
>> */
>> @@ -431,7 +426,6 @@ int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
>> int bdrv_has_zero_init_1(BlockDriverState *bs);
>> int bdrv_has_zero_init(BlockDriverState *bs);
>> int bdrv_has_zero_init_truncate(BlockDriverState *bs);
>> -bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
>
> Doing this cleanup makes sense: there is only one caller of this function pre-patch, and 0 callers post-patch - but whether the cleanup should be at the same time as you fix the one caller, or deferred to when you also clean up the field, is less important.
>
> If I were writing the series:
>
> 1 - fix qemu-img.c to not use the field
> 2 - fix block_status to not use the function
> 3-n - fix protocol drivers that set the field to also return _ZERO
> during block status (but not delete the field at that time)
> n+1 - delete unused function and field (from ALL drivers)
>
>> bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
>> int bdrv_block_status(BlockDriverState *bs, int64_t offset,
>> int64_t bytes, int64_t *pnum, int64_t *map,
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index 92335f33c7..c156b22c6b 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -115,7 +115,18 @@ struct BlockDriver {
>> */
>> bool bdrv_needs_filename;
>> - /* Set if a driver can support backing files */
>> + /*
>> + * Set if a driver can support backing files. This also implies the
>> + * following semantics:
>> + *
>> + * - Return status 0 of .bdrv_co_block_status means that corresponding
>> + * blocks are not allocated in this layer of backing-chain
>> + * - For such (unallocated) blocks, read will:
>> + * - read from backing file if there is one and big enough
>
> s/and/and it is/
>
>> + * - fill buffer with zeroes if there is no backing file
>> + * - space after EOF of the backing file considered as zero
>> + * (corresponding part of read-buffer must be zeroed by driver)
>
> Does the driver actually have to do the zeroing? Looking at qcow2.c, I see:
> static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
> ...
>
> case QCOW2_CLUSTER_UNALLOCATED:
> assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
>
> BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
> return bdrv_co_preadv_part(bs->backing, offset, bytes,
> qiov, qiov_offset, 0);
>
> which just defers to the block layer to handle read-beyond-EOF, rather than an explicit memset in the driver.
>
> So maybe you can simplify to:
> - For such (unallocated) blocks, read will:
> - fill buffer with zeros if there is no backing file
> - read from the backing file otherwise, where the block layer
> takes care of reading zeros beyond EOF if backing file is short
>
> But the effect is the same.
>
>> +++ b/block/io.c
>> @@ -2385,16 +2385,16 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
>> if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
>> ret |= BDRV_BLOCK_ALLOCATED;
>> - } else if (want_zero) {
>> - if (bdrv_unallocated_blocks_are_zero(bs)) {
>> - ret |= BDRV_BLOCK_ZERO;
>> - } else if (bs->backing) {
>> + } else if (want_zero && bs->drv->supports_backing) {
>> + if (bs->backing) {
>> BlockDriverState *bs2 = bs->backing->bs;
>> int64_t size2 = bdrv_getlength(bs2);
>> if (size2 >= 0 && offset >= size2) {
>> ret |= BDRV_BLOCK_ZERO;
>> }
>> + } else {
>> + ret |= BDRV_BLOCK_ZERO;
>> }
>
> I like this part of the change. But if it is done first in the series, it _does_ have a semantic impact on protocol drivers (previously, protocol drivers that return 0 but set the field .unallocated_blocks_are_zero will be changed to report _ZERO; after this patch, protocol drivers do not do that, because they don't support backing files, and it is now only backing files that do the _ZERO magic). So doing _just_ this change, along with a better analysis of how it changes the semantics of 'qemu-io -c map' on protocol drivers while mentioning why that is okay, would make a better start to the series, rather than here at the end. Of course, if you defer it to the end, then none of the protocol drivers set .unallocated_blocks_are_zero anyway, but that cost more review work on each altered protocol driver.
>
Doing just this change prior to patches 1/2 breaks final BDRV_BLOCK_ZERO produced for vdi and vpc.
--
Best regards,
Vladimir
On 5/6/20 4:25 AM, Vladimir Sementsov-Ogievskiy wrote: > Currently this field only set by qed and qcow2. But in fact, all > backing-supporting formats (parallels, qcow, qcow2, qed, vmdk) share > this semantics: on unallocated blocks, if there is no backing file they > just memset the buffer with zeroes. In checking the behavior of all 5 .supports_backing drivers, I noticed that qed.c:qed_read_backing_file() does a lot of redundant work in computing the backing file size itself, when it could just defer to the block layer like all the other drivers do. That would be a separate patch. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
© 2016 - 2025 Red Hat, Inc.