[v4] migration/block: use blk_pwrite_zeroes for each zero cluster

[Qemu-devel] [PATCH v4] migration/block: use blk_pwrite_zeroes for each zero cluster

Posted by jemmy858585@gmail.com 8 years, 10 months ago

From: Lidong Chen <lidongchen@tencent.com>

BLOCK_SIZE is (1 << 20), qcow2 cluster size is 65536 by default,
this maybe cause the qcow2 file size is bigger after migration.
This patch check each cluster, use blk_pwrite_zeroes for each
zero cluster.

Signed-off-by: Lidong Chen <lidongchen@tencent.com>
---
 migration/block.c | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/migration/block.c b/migration/block.c
index 7734ff7..5d0635a 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
     int64_t total_sectors = 0;
     int nr_sectors;
     int ret;
+    BlockDriverInfo bdi;
+    int cluster_size;
 
     do {
         addr = qemu_get_be64(f);
@@ -919,6 +921,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
                     error_report_err(local_err);
                     return -EINVAL;
                 }
+
+                ret = bdrv_get_info(blk_bs(blk), &bdi);
+                if (ret == 0 && bdi.cluster_size > 0 &&
+                    bdi.cluster_size <= BLOCK_SIZE &&
+                    BLOCK_SIZE % bdi.cluster_size == 0) {
+                    cluster_size = bdi.cluster_size;
+                } else {
+                    cluster_size = BLOCK_SIZE;
+                }
             }
 
             if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
@@ -932,10 +943,28 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
                                         nr_sectors * BDRV_SECTOR_SIZE,
                                         BDRV_REQ_MAY_UNMAP);
             } else {
+                int i;
+                int64_t cur_addr;
+                uint8_t *cur_buf;
+
                 buf = g_malloc(BLOCK_SIZE);
                 qemu_get_buffer(f, buf, BLOCK_SIZE);
-                ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
-                                 nr_sectors * BDRV_SECTOR_SIZE, 0);
+                for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
+                    cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size;
+                    cur_buf = buf + i * cluster_size;
+
+                    if (buffer_is_zero(cur_buf, cluster_size)) {
+                        ret = blk_pwrite_zeroes(blk, cur_addr,
+                                                cluster_size,
+                                                BDRV_REQ_MAY_UNMAP);
+                    } else {
+                        ret = blk_pwrite(blk, cur_addr, cur_buf,
+                                         cluster_size, 0);
+                    }
+                    if (ret < 0) {
+                        break;
+                    }
+                }
                 g_free(buf);
             }
 
-- 
1.8.3.1

Re: [Qemu-devel] [PATCH v4] migration/block: use blk_pwrite_zeroes for each zero cluster

Posted by Stefan Hajnoczi 8 years, 10 months ago

On Tue, Apr 11, 2017 at 08:05:12PM +0800, jemmy858585@gmail.com wrote:
> From: Lidong Chen <lidongchen@tencent.com>
> 
> BLOCK_SIZE is (1 << 20), qcow2 cluster size is 65536 by default,
> this maybe cause the qcow2 file size is bigger after migration.
> This patch check each cluster, use blk_pwrite_zeroes for each
> zero cluster.
> 
> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
> ---
>  migration/block.c | 33 +++++++++++++++++++++++++++++++--
>  1 file changed, 31 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/block.c b/migration/block.c
> index 7734ff7..5d0635a 100644
> --- a/migration/block.c
> +++ b/migration/block.c
> @@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>      int64_t total_sectors = 0;
>      int nr_sectors;
>      int ret;
> +    BlockDriverInfo bdi;
> +    int cluster_size;
>  
>      do {
>          addr = qemu_get_be64(f);
> @@ -919,6 +921,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>                      error_report_err(local_err);
>                      return -EINVAL;
>                  }
> +
> +                ret = bdrv_get_info(blk_bs(blk), &bdi);
> +                if (ret == 0 && bdi.cluster_size > 0 &&
> +                    bdi.cluster_size <= BLOCK_SIZE &&
> +                    BLOCK_SIZE % bdi.cluster_size == 0) {
> +                    cluster_size = bdi.cluster_size;
> +                } else {
> +                    cluster_size = BLOCK_SIZE;

This is a nice trick to unify code paths.  It has a disadvantage though:

If the "zero blocks" migration capability is enabled and the drive has
no cluster_size (e.g. raw files), then the source QEMU process has
already scanned for zeroes.  CPU is wasted scanning for zeroes in the
destination QEMU process.

Given that disk images can be large we should probably avoid unnecessary
scanning.  This is especially true because there's no other reason
(besides zero detection) to pollute the CPU cache with data from the
disk image.

In other words, we should only scan for zeroes when
!block_mig_state.zero_blocks || cluster_size < BLOCK_SIZE.

> +                }
>              }
>  
>              if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
> @@ -932,10 +943,28 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>                                          nr_sectors * BDRV_SECTOR_SIZE,
>                                          BDRV_REQ_MAY_UNMAP);
>              } else {
> +                int i;
> +                int64_t cur_addr;
> +                uint8_t *cur_buf;
> +
>                  buf = g_malloc(BLOCK_SIZE);
>                  qemu_get_buffer(f, buf, BLOCK_SIZE);
> -                ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
> -                                 nr_sectors * BDRV_SECTOR_SIZE, 0);
> +                for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
> +                    cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size;
> +                    cur_buf = buf + i * cluster_size;
> +
> +                    if (buffer_is_zero(cur_buf, cluster_size)) {
> +                        ret = blk_pwrite_zeroes(blk, cur_addr,
> +                                                cluster_size,
> +                                                BDRV_REQ_MAY_UNMAP);
> +                    } else {
> +                        ret = blk_pwrite(blk, cur_addr, cur_buf,
> +                                         cluster_size, 0);
> +                    }
> +                    if (ret < 0) {
> +                        break;
> +                    }
> +                }
>                  g_free(buf);
>              }

Re: [Qemu-devel] [PATCH v4] migration/block: use blk_pwrite_zeroes for each zero cluster

Posted by 858585 jemmy 8 years, 10 months ago

On Tue, Apr 11, 2017 at 11:59 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> On Tue, Apr 11, 2017 at 08:05:12PM +0800, jemmy858585@gmail.com wrote:
>> From: Lidong Chen <lidongchen@tencent.com>
>>
>> BLOCK_SIZE is (1 << 20), qcow2 cluster size is 65536 by default,
>> this maybe cause the qcow2 file size is bigger after migration.
>> This patch check each cluster, use blk_pwrite_zeroes for each
>> zero cluster.
>>
>> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
>> ---
>>  migration/block.c | 33 +++++++++++++++++++++++++++++++--
>>  1 file changed, 31 insertions(+), 2 deletions(-)
>>
>> diff --git a/migration/block.c b/migration/block.c
>> index 7734ff7..5d0635a 100644
>> --- a/migration/block.c
>> +++ b/migration/block.c
>> @@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>>      int64_t total_sectors = 0;
>>      int nr_sectors;
>>      int ret;
>> +    BlockDriverInfo bdi;
>> +    int cluster_size;
>>
>>      do {
>>          addr = qemu_get_be64(f);
>> @@ -919,6 +921,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>>                      error_report_err(local_err);
>>                      return -EINVAL;
>>                  }
>> +
>> +                ret = bdrv_get_info(blk_bs(blk), &bdi);
>> +                if (ret == 0 && bdi.cluster_size > 0 &&
>> +                    bdi.cluster_size <= BLOCK_SIZE &&
>> +                    BLOCK_SIZE % bdi.cluster_size == 0) {
>> +                    cluster_size = bdi.cluster_size;
>> +                } else {
>> +                    cluster_size = BLOCK_SIZE;
>
> This is a nice trick to unify code paths.  It has a disadvantage though:
>
> If the "zero blocks" migration capability is enabled and the drive has
> no cluster_size (e.g. raw files), then the source QEMU process has
> already scanned for zeroes.  CPU is wasted scanning for zeroes in the
> destination QEMU process.
>
> Given that disk images can be large we should probably avoid unnecessary
> scanning.  This is especially true because there's no other reason
> (besides zero detection) to pollute the CPU cache with data from the
> disk image.
>
> In other words, we should only scan for zeroes when
> !block_mig_state.zero_blocks || cluster_size < BLOCK_SIZE.

This case, the source qemu process will add BLK_MIG_FLAG_ZERO_BLOCK flag.
It will call blk_pwrite_zeroes already before apply this patch.
so destination QEMU process will not scanning for zero cluster.

There are two reason cause the destination QEMU process receive the
block which don't have
BLK_MIG_FLAG_ZERO_BLOCK flag.
1.the source QEMU process is old version, or !block_mig_state.zero_blocks.
2.the content of BLOCK_SIZE is not zero.

So if the destination QEMU process receive the block which don't have
BLK_MIG_FLAG_ZERO_BLOCK flag, it already mee the condition
!block_mig_state.zero_blocks || cluster_size < BLOCK_SIZE.

so i think it's unnecessary to check this condition again.

Thanks.

>
>> +                }
>>              }
>>
>>              if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
>> @@ -932,10 +943,28 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>>                                          nr_sectors * BDRV_SECTOR_SIZE,
>>                                          BDRV_REQ_MAY_UNMAP);
>>              } else {
>> +                int i;
>> +                int64_t cur_addr;
>> +                uint8_t *cur_buf;
>> +
>>                  buf = g_malloc(BLOCK_SIZE);
>>                  qemu_get_buffer(f, buf, BLOCK_SIZE);
>> -                ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
>> -                                 nr_sectors * BDRV_SECTOR_SIZE, 0);
>> +                for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
>> +                    cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size;
>> +                    cur_buf = buf + i * cluster_size;
>> +
>> +                    if (buffer_is_zero(cur_buf, cluster_size)) {
>> +                        ret = blk_pwrite_zeroes(blk, cur_addr,
>> +                                                cluster_size,
>> +                                                BDRV_REQ_MAY_UNMAP);
>> +                    } else {
>> +                        ret = blk_pwrite(blk, cur_addr, cur_buf,
>> +                                         cluster_size, 0);
>> +                    }
>> +                    if (ret < 0) {
>> +                        break;
>> +                    }
>> +                }
>>                  g_free(buf);
>>              }

Re: [Qemu-devel] [PATCH v4] migration/block: use blk_pwrite_zeroes for each zero cluster

Posted by 858585 jemmy 8 years, 10 months ago

On Wed, Apr 12, 2017 at 9:27 AM, 858585 jemmy <jemmy858585@gmail.com> wrote:
> On Tue, Apr 11, 2017 at 11:59 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>> On Tue, Apr 11, 2017 at 08:05:12PM +0800, jemmy858585@gmail.com wrote:
>>> From: Lidong Chen <lidongchen@tencent.com>
>>>
>>> BLOCK_SIZE is (1 << 20), qcow2 cluster size is 65536 by default,
>>> this maybe cause the qcow2 file size is bigger after migration.
>>> This patch check each cluster, use blk_pwrite_zeroes for each
>>> zero cluster.
>>>
>>> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
>>> ---
>>>  migration/block.c | 33 +++++++++++++++++++++++++++++++--
>>>  1 file changed, 31 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/migration/block.c b/migration/block.c
>>> index 7734ff7..5d0635a 100644
>>> --- a/migration/block.c
>>> +++ b/migration/block.c
>>> @@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>>>      int64_t total_sectors = 0;
>>>      int nr_sectors;
>>>      int ret;
>>> +    BlockDriverInfo bdi;
>>> +    int cluster_size;
>>>
>>>      do {
>>>          addr = qemu_get_be64(f);
>>> @@ -919,6 +921,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>>>                      error_report_err(local_err);
>>>                      return -EINVAL;
>>>                  }
>>> +
>>> +                ret = bdrv_get_info(blk_bs(blk), &bdi);
>>> +                if (ret == 0 && bdi.cluster_size > 0 &&
>>> +                    bdi.cluster_size <= BLOCK_SIZE &&
>>> +                    BLOCK_SIZE % bdi.cluster_size == 0) {
>>> +                    cluster_size = bdi.cluster_size;
>>> +                } else {
>>> +                    cluster_size = BLOCK_SIZE;
>>
>> This is a nice trick to unify code paths.  It has a disadvantage though:
>>
>> If the "zero blocks" migration capability is enabled and the drive has
>> no cluster_size (e.g. raw files), then the source QEMU process has
>> already scanned for zeroes.  CPU is wasted scanning for zeroes in the
>> destination QEMU process.
>>
>> Given that disk images can be large we should probably avoid unnecessary
>> scanning.  This is especially true because there's no other reason
>> (besides zero detection) to pollute the CPU cache with data from the
>> disk image.
>>
>> In other words, we should only scan for zeroes when
>> !block_mig_state.zero_blocks || cluster_size < BLOCK_SIZE.
>
> This case, the source qemu process will add BLK_MIG_FLAG_ZERO_BLOCK flag.
> It will call blk_pwrite_zeroes already before apply this patch.
> so destination QEMU process will not scanning for zero cluster.
>
> There are two reason cause the destination QEMU process receive the
> block which don't have
> BLK_MIG_FLAG_ZERO_BLOCK flag.
> 1.the source QEMU process is old version, or !block_mig_state.zero_blocks.
> 2.the content of BLOCK_SIZE is not zero.
>
> So if the destination QEMU process receive the block which don't have
> BLK_MIG_FLAG_ZERO_BLOCK flag, it already mee the condition
> !block_mig_state.zero_blocks || cluster_size < BLOCK_SIZE.
>
> so i think it's unnecessary to check this condition again.
>
> Thanks.

Sorry,  you are right.
it will cause the destination QEMU process scanning for zeroes again.

How about this?

                for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
                    cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size;
                    cur_buf = buf + i * cluster_size;

                    if ((!block_mig_state.zero_blocks || cluster_size
< BLOCK_SIZE)
                        && buffer_is_zero(cur_buf, cluster_size)) {
                        ret = blk_pwrite_zeroes(blk, cur_addr,
                                                cluster_size,
                                                BDRV_REQ_MAY_UNMAP);
                    } else {
                        ret = blk_pwrite(blk, cur_addr, cur_buf,
                                         cluster_size, 0);
                    }
                    if (ret < 0) {
                        break;
                    }
                }


>
>>
>>> +                }
>>>              }
>>>
>>>              if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
>>> @@ -932,10 +943,28 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>>>                                          nr_sectors * BDRV_SECTOR_SIZE,
>>>                                          BDRV_REQ_MAY_UNMAP);
>>>              } else {
>>> +                int i;
>>> +                int64_t cur_addr;
>>> +                uint8_t *cur_buf;
>>> +
>>>                  buf = g_malloc(BLOCK_SIZE);
>>>                  qemu_get_buffer(f, buf, BLOCK_SIZE);
>>> -                ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
>>> -                                 nr_sectors * BDRV_SECTOR_SIZE, 0);
>>> +                for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
>>> +                    cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size;
>>> +                    cur_buf = buf + i * cluster_size;
>>> +
>>> +                    if (buffer_is_zero(cur_buf, cluster_size)) {
>>> +                        ret = blk_pwrite_zeroes(blk, cur_addr,
>>> +                                                cluster_size,
>>> +                                                BDRV_REQ_MAY_UNMAP);
>>> +                    } else {
>>> +                        ret = blk_pwrite(blk, cur_addr, cur_buf,
>>> +                                         cluster_size, 0);
>>> +                    }
>>> +                    if (ret < 0) {
>>> +                        break;
>>> +                    }
>>> +                }
>>>                  g_free(buf);
>>>              }

Re: [Qemu-devel] [PATCH v4] migration/block: use blk_pwrite_zeroes for each zero cluster

Posted by Stefan Hajnoczi 8 years, 10 months ago

On Wed, Apr 12, 2017 at 09:51:23AM +0800, 858585 jemmy wrote:
> On Wed, Apr 12, 2017 at 9:27 AM, 858585 jemmy <jemmy858585@gmail.com> wrote:
> > On Tue, Apr 11, 2017 at 11:59 PM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> >> On Tue, Apr 11, 2017 at 08:05:12PM +0800, jemmy858585@gmail.com wrote:
> >>> From: Lidong Chen <lidongchen@tencent.com>
> >>>
> >>> BLOCK_SIZE is (1 << 20), qcow2 cluster size is 65536 by default,
> >>> this maybe cause the qcow2 file size is bigger after migration.
> >>> This patch check each cluster, use blk_pwrite_zeroes for each
> >>> zero cluster.
> >>>
> >>> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
> >>> ---
> >>>  migration/block.c | 33 +++++++++++++++++++++++++++++++--
> >>>  1 file changed, 31 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/migration/block.c b/migration/block.c
> >>> index 7734ff7..5d0635a 100644
> >>> --- a/migration/block.c
> >>> +++ b/migration/block.c
> >>> @@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
> >>>      int64_t total_sectors = 0;
> >>>      int nr_sectors;
> >>>      int ret;
> >>> +    BlockDriverInfo bdi;
> >>> +    int cluster_size;
> >>>
> >>>      do {
> >>>          addr = qemu_get_be64(f);
> >>> @@ -919,6 +921,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
> >>>                      error_report_err(local_err);
> >>>                      return -EINVAL;
> >>>                  }
> >>> +
> >>> +                ret = bdrv_get_info(blk_bs(blk), &bdi);
> >>> +                if (ret == 0 && bdi.cluster_size > 0 &&
> >>> +                    bdi.cluster_size <= BLOCK_SIZE &&
> >>> +                    BLOCK_SIZE % bdi.cluster_size == 0) {
> >>> +                    cluster_size = bdi.cluster_size;
> >>> +                } else {
> >>> +                    cluster_size = BLOCK_SIZE;
> >>
> >> This is a nice trick to unify code paths.  It has a disadvantage though:
> >>
> >> If the "zero blocks" migration capability is enabled and the drive has
> >> no cluster_size (e.g. raw files), then the source QEMU process has
> >> already scanned for zeroes.  CPU is wasted scanning for zeroes in the
> >> destination QEMU process.
> >>
> >> Given that disk images can be large we should probably avoid unnecessary
> >> scanning.  This is especially true because there's no other reason
> >> (besides zero detection) to pollute the CPU cache with data from the
> >> disk image.
> >>
> >> In other words, we should only scan for zeroes when
> >> !block_mig_state.zero_blocks || cluster_size < BLOCK_SIZE.
> >
> > This case, the source qemu process will add BLK_MIG_FLAG_ZERO_BLOCK flag.
> > It will call blk_pwrite_zeroes already before apply this patch.
> > so destination QEMU process will not scanning for zero cluster.
> >
> > There are two reason cause the destination QEMU process receive the
> > block which don't have
> > BLK_MIG_FLAG_ZERO_BLOCK flag.
> > 1.the source QEMU process is old version, or !block_mig_state.zero_blocks.
> > 2.the content of BLOCK_SIZE is not zero.
> >
> > So if the destination QEMU process receive the block which don't have
> > BLK_MIG_FLAG_ZERO_BLOCK flag, it already mee the condition
> > !block_mig_state.zero_blocks || cluster_size < BLOCK_SIZE.
> >
> > so i think it's unnecessary to check this condition again.
> >
> > Thanks.
> 
> Sorry,  you are right.
> it will cause the destination QEMU process scanning for zeroes again.
> 
> How about this?
> 
>                 for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
>                     cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size;
>                     cur_buf = buf + i * cluster_size;
> 
>                     if ((!block_mig_state.zero_blocks || cluster_size
> < BLOCK_SIZE)
>                         && buffer_is_zero(cur_buf, cluster_size)) {
>                         ret = blk_pwrite_zeroes(blk, cur_addr,
>                                                 cluster_size,
>                                                 BDRV_REQ_MAY_UNMAP);
>                     } else {
>                         ret = blk_pwrite(blk, cur_addr, cur_buf,
>                                          cluster_size, 0);
>                     }
>                     if (ret < 0) {
>                         break;
>                     }
>                 }

Thanks, looks good to me.  Please send a new revision of the patch.

You can already add my:

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>