[RFC PATCH] block/io.c: Flush parent for quorum in generic code

Zhang Chen posted 1 patch 2 years, 11 months ago
Test checkpatch passed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20210512074957.763711-1-chen.zhang@intel.com
block/io.c | 31 ++++++++++++++++++++++---------
1 file changed, 22 insertions(+), 9 deletions(-)
[RFC PATCH] block/io.c: Flush parent for quorum in generic code
Posted by Zhang Chen 2 years, 11 months ago
Fix the issue from this patch:
[PATCH] block: Flush all children in generic code
From 883833e29cb800b4d92b5d4736252f4004885191

Quorum driver do not have the primary child.
It will caused guest block flush issue when use quorum and NBD.
The vm guest flushes failed,and then guest filesystem is shutdown.

Signed-off-by: Zhang Chen <chen.zhang@intel.com>
Reported-by: Minghao Yuan <meeho@qq.com>
---
 block/io.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/block/io.c b/block/io.c
index 35b6c56efc..4dc1873cb9 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2849,6 +2849,13 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
     BdrvChild *child;
     int current_gen;
     int ret = 0;
+    bool no_primary_child = false;
+
+    /* Quorum drivers do not have the primary child. */
+    if (!primary_child) {
+        primary_child = bs->file;
+        no_primary_child = true;
+    }
 
     bdrv_inc_in_flight(bs);
 
@@ -2886,12 +2893,12 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 
     /* But don't actually force it to the disk with cache=unsafe */
     if (bs->open_flags & BDRV_O_NO_FLUSH) {
-        goto flush_children;
+        goto flush_data;
     }
 
     /* Check if we really need to flush anything */
     if (bs->flushed_gen == current_gen) {
-        goto flush_children;
+        goto flush_data;
     }
 
     BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
@@ -2938,13 +2945,19 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
     /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
      * in the case of cache=unsafe, so there are no useless flushes.
      */
-flush_children:
-    ret = 0;
-    QLIST_FOREACH(child, &bs->children, next) {
-        if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
-            int this_child_ret = bdrv_co_flush(child->bs);
-            if (!ret) {
-                ret = this_child_ret;
+flush_data:
+    if (no_primary_child) {
+        /* Flush parent */
+        ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
+    } else {
+        /* Flush childrens */
+        ret = 0;
+        QLIST_FOREACH(child, &bs->children, next) {
+            if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
+                int this_child_ret = bdrv_co_flush(child->bs);
+                if (!ret) {
+                    ret = this_child_ret;
+                }
             }
         }
     }
-- 
2.25.1


Re: [RFC PATCH] block/io.c: Flush parent for quorum in generic code
Posted by Stefan Hajnoczi 2 years, 11 months ago
On Wed, May 12, 2021 at 03:49:57PM +0800, Zhang Chen wrote:
> Fix the issue from this patch:
> [PATCH] block: Flush all children in generic code
> From 883833e29cb800b4d92b5d4736252f4004885191
> 
> Quorum driver do not have the primary child.
> It will caused guest block flush issue when use quorum and NBD.
> The vm guest flushes failed,and then guest filesystem is shutdown.
> 
> Signed-off-by: Zhang Chen <chen.zhang@intel.com>
> Reported-by: Minghao Yuan <meeho@qq.com>
> ---
>  block/io.c | 31 ++++++++++++++++++++++---------
>  1 file changed, 22 insertions(+), 9 deletions(-)
...
> +flush_data:
> +    if (no_primary_child) {
> +        /* Flush parent */
> +        ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
> +    } else {
> +        /* Flush childrens */
> +        ret = 0;
> +        QLIST_FOREACH(child, &bs->children, next) {
> +            if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
> +                int this_child_ret = bdrv_co_flush(child->bs);
> +                if (!ret) {
> +                    ret = this_child_ret;
> +                }
>              }
>          }

I'm missing something:

The quorum driver has a valid bs->children list even though it does not
have a primary child. Why does QLIST_FOREACH() loop fail for you?

Does this patch effectively skip bdrv_co_flush() calls on all quorum
children? That seems wrong since children need to be flushed so that
data is persisted.

Stefan
RE: [RFC PATCH] block/io.c: Flush parent for quorum in generic code
Posted by Zhang, Chen 2 years, 11 months ago

> -----Original Message-----
> From: Stefan Hajnoczi <stefanha@redhat.com>
> Sent: Thursday, May 13, 2021 10:26 PM
> To: Zhang, Chen <chen.zhang@intel.com>
> Cc: Kevin Wolf <kwolf@redhat.com>; Max Reitz <mreitz@redhat.com>; Fam
> Zheng <fam@euphon.net>; qemu-dev <qemu-devel@nongnu.org>; qemu-
> block <qemu-block@nongnu.org>; Zhang Chen <zhangckid@gmail.com>;
> Minghao Yuan <meeho@qq.com>
> Subject: Re: [RFC PATCH] block/io.c: Flush parent for quorum in generic code
> 
> On Wed, May 12, 2021 at 03:49:57PM +0800, Zhang Chen wrote:
> > Fix the issue from this patch:
> > [PATCH] block: Flush all children in generic code From
> > 883833e29cb800b4d92b5d4736252f4004885191
> >
> > Quorum driver do not have the primary child.
> > It will caused guest block flush issue when use quorum and NBD.
> > The vm guest flushes failed,and then guest filesystem is shutdown.
> >
> > Signed-off-by: Zhang Chen <chen.zhang@intel.com>
> > Reported-by: Minghao Yuan <meeho@qq.com>
> > ---
> >  block/io.c | 31 ++++++++++++++++++++++---------
> >  1 file changed, 22 insertions(+), 9 deletions(-)
> ...
> > +flush_data:
> > +    if (no_primary_child) {
> > +        /* Flush parent */
> > +        ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
> > +    } else {
> > +        /* Flush childrens */
> > +        ret = 0;
> > +        QLIST_FOREACH(child, &bs->children, next) {
> > +            if (child->perm & (BLK_PERM_WRITE |
> BLK_PERM_WRITE_UNCHANGED)) {
> > +                int this_child_ret = bdrv_co_flush(child->bs);
> > +                if (!ret) {
> > +                    ret = this_child_ret;
> > +                }
> >              }
> >          }
> 
> I'm missing something:
> 
> The quorum driver has a valid bs->children list even though it does not have a
> primary child. Why does QLIST_FOREACH() loop fail for you?
> 

Yes, in most cases QLIST_FOREACH() works for me.
But not work when one of the child disconnected, the original patch changed
the default behavior of quorum driver when occurs issue.

For example:
VM quorum driver have two children, local disk1 and NBD disk2.
When network down and NBD disk2 disconnected, current code will report 
"event": "QUORUM_REPORT_BAD" "type": "flush", "error": "Input/output error"
And
"event": "BLOCK_IO_ERROR" "#block008", "reason": "Input/output error"

The guest even cannot read/write the normal local disk1. VM IO will crashed caused by NDB disk2 input/output error.
I think we do need report the event about we lose a child(NBD disk2) at this time, but no need crash all IO system.
Because we can fix it by x-blockdev-change and drive_add/drive_del for new children. 
Before the original patch(883833e2), VM still can read/write the local disk1.

This patch just the RFC version, please give me more comments to fix this issue.
 
Thanks
Chen


> Does this patch effectively skip bdrv_co_flush() calls on all quorum children?
> That seems wrong since children need to be flushed so that data is persisted.
> 

Yes, 

> Stefan
Re: [RFC PATCH] block/io.c: Flush parent for quorum in generic code
Posted by Lukas Straub 2 years, 11 months ago
On Wed, 12 May 2021 15:49:57 +0800
Zhang Chen <chen.zhang@intel.com> wrote:

> Fix the issue from this patch:
> [PATCH] block: Flush all children in generic code
> From 883833e29cb800b4d92b5d4736252f4004885191
> 
> Quorum driver do not have the primary child.
> It will caused guest block flush issue when use quorum and NBD.
> The vm guest flushes failed,and then guest filesystem is shutdown.

Hi,
I think the problem is rather that the quorum driver provides
.bdrv_co_flush_to_disk (which predates .bdrv_co_flush) instead of
.bdrv_co_flush. Can you try with the following patch instead?

diff --git a/block/quorum.c b/block/quorum.c
index cfc1436abb..f2c0805000 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1279,7 +1279,7 @@ static BlockDriver bdrv_quorum = {
     .bdrv_dirname                       = quorum_dirname,
     .bdrv_co_block_status               = quorum_co_block_status,
 
-    .bdrv_co_flush_to_disk              = quorum_co_flush,
+    .bdrv_co_flush                      = quorum_co_flush,
 
     .bdrv_getlength                     = quorum_getlength,


> Signed-off-by: Zhang Chen <chen.zhang@intel.com>
> Reported-by: Minghao Yuan <meeho@qq.com>
> ---
>  block/io.c | 31 ++++++++++++++++++++++---------
>  1 file changed, 22 insertions(+), 9 deletions(-)
> 
> diff --git a/block/io.c b/block/io.c
> index 35b6c56efc..4dc1873cb9 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -2849,6 +2849,13 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
>      BdrvChild *child;
>      int current_gen;
>      int ret = 0;
> +    bool no_primary_child = false;
> +
> +    /* Quorum drivers do not have the primary child. */
> +    if (!primary_child) {
> +        primary_child = bs->file;
> +        no_primary_child = true;
> +    }
>  
>      bdrv_inc_in_flight(bs);
>  
> @@ -2886,12 +2893,12 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
>  
>      /* But don't actually force it to the disk with cache=unsafe */
>      if (bs->open_flags & BDRV_O_NO_FLUSH) {
> -        goto flush_children;
> +        goto flush_data;
>      }
>  
>      /* Check if we really need to flush anything */
>      if (bs->flushed_gen == current_gen) {
> -        goto flush_children;
> +        goto flush_data;
>      }
>  
>      BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
> @@ -2938,13 +2945,19 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
>      /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
>       * in the case of cache=unsafe, so there are no useless flushes.
>       */
> -flush_children:
> -    ret = 0;
> -    QLIST_FOREACH(child, &bs->children, next) {
> -        if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
> -            int this_child_ret = bdrv_co_flush(child->bs);
> -            if (!ret) {
> -                ret = this_child_ret;
> +flush_data:
> +    if (no_primary_child) {
> +        /* Flush parent */
> +        ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
> +    } else {
> +        /* Flush childrens */
> +        ret = 0;
> +        QLIST_FOREACH(child, &bs->children, next) {
> +            if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
> +                int this_child_ret = bdrv_co_flush(child->bs);
> +                if (!ret) {
> +                    ret = this_child_ret;
> +                }
>              }
>          }
>      }



--