[Qemu-devel] [PATCH v4 10/10] block/blklogwrites: Use the block device logical sector size when logging writes

Ari Sundholm posted 10 patches 7 years, 8 months ago
There is a newer version of this series
[Qemu-devel] [PATCH v4 10/10] block/blklogwrites: Use the block device logical sector size when logging writes
Posted by Ari Sundholm 7 years, 8 months ago
The guest OS may perform writes which are aligned to the logical
sector size instead of the physical one, so logging at this granularity
records the writes performed on the block device most faithfully.

Signed-off-by: Ari Sundholm <ari@tuxera.com>
---
 block/blklogwrites.c | 47 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index 216367f..decf5e5 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -47,6 +47,8 @@ struct log_write_entry {
 
 typedef struct {
     BdrvChild *log_file;
+    uint32_t sectorsize;
+    uint32_t sectorbits;
     uint64_t cur_log_sector;
     uint64_t nr_entries;
 } BDRVBlkLogWritesState;
@@ -67,6 +69,8 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
+    s->sectorsize = BDRV_SECTOR_SIZE; /* May be updated later */
+    s->sectorbits = BDRV_SECTOR_BITS;
     s->cur_log_sector = 1;
     s->nr_entries = 0;
 
@@ -173,11 +177,20 @@ static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp)
     }
 }
 
+static inline uint32_t blk_log_writes_log2(uint32_t value)
+{
+    assert(value > 0);
+    return 31 - clz32(value);
+}
+
 static void blk_log_writes_apply_blkconf(BlockDriverState *bs, BlockConf *conf)
 {
-    assert(bs && conf && conf->blk);
+    BDRVBlkLogWritesState *s = bs->opaque;
+    assert(bs && conf && conf->blk && s);
 
-    bs->bl.request_alignment = conf->logical_block_size;
+    s->sectorsize = conf->logical_block_size;
+    s->sectorbits = blk_log_writes_log2(s->sectorsize);
+    bs->bl.request_alignment = s->sectorsize;
     if (conf->discard_granularity != (uint32_t)-1) {
         bs->bl.pdiscard_alignment = conf->discard_granularity;
     }
@@ -220,20 +233,20 @@ typedef struct {
 static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
 {
     BDRVBlkLogWritesState *s = lr->bs->opaque;
-    uint64_t cur_log_offset = s->cur_log_sector << BDRV_SECTOR_BITS;
+    uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits;
 
     s->nr_entries++;
     s->cur_log_sector +=
-            ROUND_UP(lr->qiov->size, BDRV_SECTOR_SIZE) >> BDRV_SECTOR_BITS;
+            ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits;
 
     lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size,
                                   lr->qiov, 0);
 
     /* Logging for the "write zeroes" operation */
     if (lr->log_ret == 0 && lr->zero_size) {
-        cur_log_offset = s->cur_log_sector << BDRV_SECTOR_BITS;
+        cur_log_offset = s->cur_log_sector << s->sectorbits;
         s->cur_log_sector +=
-                ROUND_UP(lr->zero_size, BDRV_SECTOR_SIZE) >> BDRV_SECTOR_BITS;
+                ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits;
 
         lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset,
                                             lr->zero_size, 0);
@@ -245,21 +258,22 @@ static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
             .magic      = cpu_to_le64(WRITE_LOG_MAGIC),
             .version    = cpu_to_le64(WRITE_LOG_VERSION),
             .nr_entries = cpu_to_le64(s->nr_entries),
-            .sectorsize = cpu_to_le32(1 << BDRV_SECTOR_BITS),
+            .sectorsize = cpu_to_le32(s->sectorsize),
         };
-        static const char zeroes[BDRV_SECTOR_SIZE - sizeof(super)] = { '\0' };
+        void *zeroes = g_malloc0(s->sectorsize - sizeof(super));
         QEMUIOVector qiov;
 
         qemu_iovec_init(&qiov, 2);
         qemu_iovec_add(&qiov, &super, sizeof(super));
-        qemu_iovec_add(&qiov, (void *)zeroes, sizeof(zeroes));
+        qemu_iovec_add(&qiov, zeroes, s->sectorsize - sizeof(super));
 
         lr->log_ret =
-            bdrv_co_pwritev(s->log_file, 0, BDRV_SECTOR_SIZE, &qiov, 0);
+            bdrv_co_pwritev(s->log_file, 0, s->sectorsize, &qiov, 0);
         if (lr->log_ret == 0) {
             lr->log_ret = bdrv_co_flush(s->log_file->bs);
         }
         qemu_iovec_destroy(&qiov);
+        g_free(zeroes);
     }
 }
 
@@ -277,6 +291,7 @@ blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     QEMUIOVector log_qiov;
     size_t niov = qiov ? qiov->niov : 0;
     size_t i;
+    BDRVBlkLogWritesState *s = bs->opaque;
     BlkLogWritesFileReq fr = {
         .bs     = bs,
         .offset = offset,
@@ -289,22 +304,23 @@ blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
         .bs             = bs,
         .qiov           = &log_qiov,
         .entry = {
-            .sector     = cpu_to_le64(offset >> BDRV_SECTOR_BITS),
-            .nr_sectors = cpu_to_le64(bytes >> BDRV_SECTOR_BITS),
+            .sector     = cpu_to_le64(offset >> s->sectorbits),
+            .nr_sectors = cpu_to_le64(bytes >> s->sectorbits),
             .flags      = cpu_to_le64(entry_flags),
             .data_len   = 0,
         },
         .zero_size = is_zero_write ? bytes : 0,
     };
-    static const char zeroes[BDRV_SECTOR_SIZE - sizeof(struct log_write_entry)]
-        = { '\0' };
+    void *zeroes = g_malloc0(s->sectorsize - sizeof(lr.entry));
 
+    assert((1 << s->sectorbits) == s->sectorsize);
+    assert(bs->bl.request_alignment == s->sectorsize);
     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
 
     qemu_iovec_init(&log_qiov, niov + 2);
     qemu_iovec_add(&log_qiov, &lr.entry, sizeof(lr.entry));
-    qemu_iovec_add(&log_qiov, (void *)zeroes, sizeof(zeroes));
+    qemu_iovec_add(&log_qiov, zeroes, s->sectorsize - sizeof(lr.entry));
     for (i = 0; i < niov; ++i) {
         qemu_iovec_add(&log_qiov, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
     }
@@ -313,6 +329,7 @@ blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     blk_log_writes_co_do_log(&lr);
 
     qemu_iovec_destroy(&log_qiov);
+    g_free(zeroes);
 
     if (lr.log_ret < 0) {
         return lr.log_ret;
-- 
2.7.4


Re: [Qemu-devel] [Qemu-block] [PATCH v4 10/10] block/blklogwrites: Use the block device logical sector size when logging writes
Posted by Alberto Garcia 7 years, 7 months ago
On Fri 08 Jun 2018 02:32:28 PM CEST, Ari Sundholm wrote:
> The guest OS may perform writes which are aligned to the logical
> sector size instead of the physical one, so logging at this granularity
> records the writes performed on the block device most faithfully.
>
> Signed-off-by: Ari Sundholm <ari@tuxera.com>
> ---
>  block/blklogwrites.c | 47 ++++++++++++++++++++++++++++++++---------------
>  1 file changed, 32 insertions(+), 15 deletions(-)
>
> diff --git a/block/blklogwrites.c b/block/blklogwrites.c
> index 216367f..decf5e5 100644
> --- a/block/blklogwrites.c
> +++ b/block/blklogwrites.c
> @@ -47,6 +47,8 @@ struct log_write_entry {
>  
>  typedef struct {
>      BdrvChild *log_file;
> +    uint32_t sectorsize;
> +    uint32_t sectorbits;
>      uint64_t cur_log_sector;
>      uint64_t nr_entries;
>  } BDRVBlkLogWritesState;
> @@ -67,6 +69,8 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
>          goto fail;
>      }
>  
> +    s->sectorsize = BDRV_SECTOR_SIZE; /* May be updated later */
> +    s->sectorbits = BDRV_SECTOR_BITS;
>      s->cur_log_sector = 1;
>      s->nr_entries = 0;

I haven't looked closely into this series so sorry if there's something
that I'm overlooking, but this caught my attention: why do you have a
patch that improves a driver that you introduced earlier in this same
series?

Berto

Re: [Qemu-devel] [Qemu-block] [PATCH v4 10/10] block/blklogwrites: Use the block device logical sector size when logging writes
Posted by Ari Sundholm 7 years, 7 months ago
On 06/18/2018 06:36 PM, Alberto Garcia wrote:
> On Fri 08 Jun 2018 02:32:28 PM CEST, Ari Sundholm wrote:
>> The guest OS may perform writes which are aligned to the logical
>> sector size instead of the physical one, so logging at this granularity
>> records the writes performed on the block device most faithfully.
>>
>> Signed-off-by: Ari Sundholm <ari@tuxera.com>
>> ---
>>   block/blklogwrites.c | 47 ++++++++++++++++++++++++++++++++---------------
>>   1 file changed, 32 insertions(+), 15 deletions(-)
>>
>> diff --git a/block/blklogwrites.c b/block/blklogwrites.c
>> index 216367f..decf5e5 100644
>> --- a/block/blklogwrites.c
>> +++ b/block/blklogwrites.c
>> @@ -47,6 +47,8 @@ struct log_write_entry {
>>   
>>   typedef struct {
>>       BdrvChild *log_file;
>> +    uint32_t sectorsize;
>> +    uint32_t sectorbits;
>>       uint64_t cur_log_sector;
>>       uint64_t nr_entries;
>>   } BDRVBlkLogWritesState;
>> @@ -67,6 +69,8 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
>>           goto fail;
>>       }
>>   
>> +    s->sectorsize = BDRV_SECTOR_SIZE; /* May be updated later */
>> +    s->sectorbits = BDRV_SECTOR_BITS;
>>       s->cur_log_sector = 1;
>>       s->nr_entries = 0;
> 
> I haven't looked closely into this series so sorry if there's something
> that I'm overlooking, but this caught my attention: why do you have a
> patch that improves a driver that you introduced earlier in this same
> series?
> 

I guess there is no other real reason than that it reflects the 
development history of the driver more closely: the code by the original 
author did not contain proper support for non-512 sector sizes. I then 
added the parts needed for this.

The series could be restructured in the following manner if that makes 
it more reviewable (prerequisites first, then the entire driver):
- Patch 1: as before
- Patches 2-7: introduction of the mechanism to pass block 
configurations to drivers + changes to block device implementations to 
make them pass on this information (current patches 3-8)
- Patch 8: Introduction of the blklogwrites driver (current patches 2, 9 
and 10, squashed)

Does that sound like a good idea?

> Berto
> 

Thank you,
Ari Sundholm
ari@tuxera.com

Re: [Qemu-devel] [Qemu-block] [PATCH v4 10/10] block/blklogwrites: Use the block device logical sector size when logging writes
Posted by Alberto Garcia 7 years, 7 months ago
On Mon 18 Jun 2018 05:53:50 PM CEST, Ari Sundholm wrote:
> On 06/18/2018 06:36 PM, Alberto Garcia wrote:
>> On Fri 08 Jun 2018 02:32:28 PM CEST, Ari Sundholm wrote:
>>> The guest OS may perform writes which are aligned to the logical
>>> sector size instead of the physical one, so logging at this granularity
>>> records the writes performed on the block device most faithfully.
>>>
>>> Signed-off-by: Ari Sundholm <ari@tuxera.com>
>>> ---
>>>   block/blklogwrites.c | 47 ++++++++++++++++++++++++++++++++---------------
>>>   1 file changed, 32 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/block/blklogwrites.c b/block/blklogwrites.c
>>> index 216367f..decf5e5 100644
>>> --- a/block/blklogwrites.c
>>> +++ b/block/blklogwrites.c
>>> @@ -47,6 +47,8 @@ struct log_write_entry {
>>>   
>>>   typedef struct {
>>>       BdrvChild *log_file;
>>> +    uint32_t sectorsize;
>>> +    uint32_t sectorbits;
>>>       uint64_t cur_log_sector;
>>>       uint64_t nr_entries;
>>>   } BDRVBlkLogWritesState;
>>> @@ -67,6 +69,8 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
>>>           goto fail;
>>>       }
>>>   
>>> +    s->sectorsize = BDRV_SECTOR_SIZE; /* May be updated later */
>>> +    s->sectorbits = BDRV_SECTOR_BITS;
>>>       s->cur_log_sector = 1;
>>>       s->nr_entries = 0;
>> 
>> I haven't looked closely into this series so sorry if there's something
>> that I'm overlooking, but this caught my attention: why do you have a
>> patch that improves a driver that you introduced earlier in this same
>> series?
>> 
>
> I guess there is no other real reason than that it reflects the
> development history of the driver more closely: the code by the
> original author did not contain proper support for non-512 sector
> sizes. I then added the parts needed for this.

Ah I see, so the driver was originally written by someone else and you
improved it.

As I said I haven't looked closely into the code (I hope I'll have time
to do it after I'm done with my blockdev-reopen work) so perhaps there's
good reason for this in this case, but in general I think that reviews
are simpler if one doesn't need to review code that is going to be
modified in the following patch.

> The series could be restructured in the following manner if that makes
> it more reviewable (prerequisites first, then the entire driver):

I just want to clarify that the code looks reviewable as it is now, it's
not like this is a blocker (for reviews at least) :)

> - Patch 1: as before
> - Patches 2-7: introduction of the mechanism to pass block
> configurations to drivers + changes to block device implementations to
> make them pass on this information (current patches 3-8)
> - Patch 8: Introduction of the blklogwrites driver (current patches 2,
> 9 and 10, squashed)
>
> Does that sound like a good idea?

It does, yes.

Thanks!

Berto