[PATCH] block/io: accept NULL qiov in bdrv_pad_request

Fiona Ebner posted 1 patch 1 month, 1 week ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20240319091341.303414-1-f.ebner@proxmox.com
Maintainers: Stefan Hajnoczi <stefanha@redhat.com>, Fam Zheng <fam@euphon.net>, Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>
block/io.c | 31 +++++++++++++++++++------------
1 file changed, 19 insertions(+), 12 deletions(-)
[PATCH] block/io: accept NULL qiov in bdrv_pad_request
Posted by Fiona Ebner 1 month, 1 week ago
From: Stefan Reiter <s.reiter@proxmox.com>

Some operations, e.g. block-stream, perform reads while discarding the
results (only copy-on-read matters). In this case, they will pass NULL
as the target QEMUIOVector, which will however trip bdrv_pad_request,
since it wants to extend its passed vector. In particular, this is the
case for the blk_co_preadv() call in stream_populate().

If there is no qiov, no operation can be done with it, but the bytes
and offset still need to be updated, so the subsequent aligned read
will actually be aligned and not run into an assertion failure.

In particular, this can happen when the request alignment of the top
node is larger than the allocated part of the bottom node, in which
case padding becomes necessary. For example:

> ./qemu-img create /tmp/backing.qcow2 -f qcow2 64M -o cluster_size=32768
> ./qemu-io -c "write -P42 0x0 0x1" /tmp/backing.qcow2
> ./qemu-img create /tmp/top.qcow2 -f qcow2 64M -b /tmp/backing.qcow2 -F qcow2
> ./qemu-system-x86_64 --qmp stdio \
> --blockdev qcow2,node-name=node0,file.driver=file,file.filename=/tmp/top.qcow2 \
> <<EOF
> {"execute": "qmp_capabilities"}
> {"execute": "blockdev-add", "arguments": { "driver": "compress", "file": "node0", "node-name": "node1" } }
> {"execute": "block-stream", "arguments": { "job-id": "stream0", "device": "node1" } }
> EOF

Originally-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: do update bytes and offset in any case
     add reproducer to commit message]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
 block/io.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/block/io.c b/block/io.c
index 33150c0359..395bea3bac 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1726,22 +1726,29 @@ static int bdrv_pad_request(BlockDriverState *bs,
         return 0;
     }
 
-    sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
-                                  &sliced_head, &sliced_tail,
-                                  &sliced_niov);
+    /*
+     * For prefetching in stream_populate(), no qiov is passed along, because
+     * only copy-on-read matters.
+     */
+    if (qiov && *qiov) {
+        sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
+                                      &sliced_head, &sliced_tail,
+                                      &sliced_niov);
 
-    /* Guaranteed by bdrv_check_request32() */
-    assert(*bytes <= SIZE_MAX);
-    ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
-                                  sliced_head, *bytes);
-    if (ret < 0) {
-        bdrv_padding_finalize(pad);
-        return ret;
+        /* Guaranteed by bdrv_check_request32() */
+        assert(*bytes <= SIZE_MAX);
+        ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
+                                      sliced_head, *bytes);
+        if (ret < 0) {
+            bdrv_padding_finalize(pad);
+            return ret;
+        }
+        *qiov = &pad->local_qiov;
+        *qiov_offset = 0;
     }
+
     *bytes += pad->head + pad->tail;
     *offset -= pad->head;
-    *qiov = &pad->local_qiov;
-    *qiov_offset = 0;
     if (padded) {
         *padded = true;
     }
-- 
2.39.2
Re: [PATCH] block/io: accept NULL qiov in bdrv_pad_request
Posted by Stefan Hajnoczi 1 month, 1 week ago
On Tue, Mar 19, 2024 at 10:13:41AM +0100, Fiona Ebner wrote:
> From: Stefan Reiter <s.reiter@proxmox.com>
> 
> Some operations, e.g. block-stream, perform reads while discarding the
> results (only copy-on-read matters). In this case, they will pass NULL
> as the target QEMUIOVector, which will however trip bdrv_pad_request,
> since it wants to extend its passed vector. In particular, this is the
> case for the blk_co_preadv() call in stream_populate().
> 
> If there is no qiov, no operation can be done with it, but the bytes
> and offset still need to be updated, so the subsequent aligned read
> will actually be aligned and not run into an assertion failure.
> 
> In particular, this can happen when the request alignment of the top
> node is larger than the allocated part of the bottom node, in which
> case padding becomes necessary. For example:
> 
> > ./qemu-img create /tmp/backing.qcow2 -f qcow2 64M -o cluster_size=32768
> > ./qemu-io -c "write -P42 0x0 0x1" /tmp/backing.qcow2
> > ./qemu-img create /tmp/top.qcow2 -f qcow2 64M -b /tmp/backing.qcow2 -F qcow2
> > ./qemu-system-x86_64 --qmp stdio \
> > --blockdev qcow2,node-name=node0,file.driver=file,file.filename=/tmp/top.qcow2 \
> > <<EOF
> > {"execute": "qmp_capabilities"}
> > {"execute": "blockdev-add", "arguments": { "driver": "compress", "file": "node0", "node-name": "node1" } }
> > {"execute": "block-stream", "arguments": { "job-id": "stream0", "device": "node1" } }
> > EOF

Hi Fiona,
Can you add a qemu-iotests test case for this issue?

Thanks,
Stefan

> 
> Originally-by: Stefan Reiter <s.reiter@proxmox.com>
> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
> [FE: do update bytes and offset in any case
>      add reproducer to commit message]
> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
> ---
>  block/io.c | 31 +++++++++++++++++++------------
>  1 file changed, 19 insertions(+), 12 deletions(-)
> 
> diff --git a/block/io.c b/block/io.c
> index 33150c0359..395bea3bac 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -1726,22 +1726,29 @@ static int bdrv_pad_request(BlockDriverState *bs,
>          return 0;
>      }
>  
> -    sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
> -                                  &sliced_head, &sliced_tail,
> -                                  &sliced_niov);
> +    /*
> +     * For prefetching in stream_populate(), no qiov is passed along, because
> +     * only copy-on-read matters.
> +     */
> +    if (qiov && *qiov) {
> +        sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
> +                                      &sliced_head, &sliced_tail,
> +                                      &sliced_niov);
>  
> -    /* Guaranteed by bdrv_check_request32() */
> -    assert(*bytes <= SIZE_MAX);
> -    ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
> -                                  sliced_head, *bytes);
> -    if (ret < 0) {
> -        bdrv_padding_finalize(pad);
> -        return ret;
> +        /* Guaranteed by bdrv_check_request32() */
> +        assert(*bytes <= SIZE_MAX);
> +        ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
> +                                      sliced_head, *bytes);
> +        if (ret < 0) {
> +            bdrv_padding_finalize(pad);
> +            return ret;
> +        }
> +        *qiov = &pad->local_qiov;
> +        *qiov_offset = 0;
>      }
> +
>      *bytes += pad->head + pad->tail;
>      *offset -= pad->head;
> -    *qiov = &pad->local_qiov;
> -    *qiov_offset = 0;
>      if (padded) {
>          *padded = true;
>      }
> -- 
> 2.39.2
> 
>