[PATCH] block-migration: Ensure we don't crash during migration cleanup

Fabiano Rosas posted 1 patch 9 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20230731203338.27581-1-farosas@suse.de
Maintainers: Stefan Hajnoczi <stefanha@redhat.com>, Fam Zheng <fam@euphon.net>, Juan Quintela <quintela@redhat.com>, Peter Xu <peterx@redhat.com>, Leonardo Bras <leobras@redhat.com>
migration/block.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
[PATCH] block-migration: Ensure we don't crash during migration cleanup
Posted by Fabiano Rosas 9 months ago
We can fail the blk_insert_bs() at init_blk_migration(), leaving the
BlkMigDevState without a dirty_bitmap and BlockDriverState. Account
for the possibly missing elements when doing cleanup.

Fix the following crashes:

Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
359         BlockDriverState *bs = bitmap->bs;
 #0  0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
 #1  0x0000555555bba331 in unset_dirty_tracking () at ../migration/block.c:371
 #2  0x0000555555bbad98 in block_migration_cleanup_bmds () at ../migration/block.c:681

Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
7073        QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
 #0  0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
 #1  0x0000555555e9734a in bdrv_op_unblock_all (bs=0x0, reason=0x0) at ../block.c:7095
 #2  0x0000555555bbae13 in block_migration_cleanup_bmds () at ../migration/block.c:690

Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
 migration/block.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/migration/block.c b/migration/block.c
index b9580a6c7e..86c2256a2b 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -368,7 +368,9 @@ static void unset_dirty_tracking(void)
     BlkMigDevState *bmds;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
+        if (bmds->dirty_bitmap) {
+            bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
+        }
     }
 }
 
@@ -676,13 +678,18 @@ static int64_t get_remaining_dirty(void)
 static void block_migration_cleanup_bmds(void)
 {
     BlkMigDevState *bmds;
+    BlockDriverState *bs;
     AioContext *ctx;
 
     unset_dirty_tracking();
 
     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
-        bdrv_op_unblock_all(blk_bs(bmds->blk), bmds->blocker);
+
+        bs = blk_bs(bmds->blk);
+        if (bs) {
+            bdrv_op_unblock_all(bs, bmds->blocker);
+        }
         error_free(bmds->blocker);
 
         /* Save ctx, because bmds->blk can disappear during blk_unref.  */
-- 
2.35.3
Re: [PATCH] block-migration: Ensure we don't crash during migration cleanup
Posted by Stefan Hajnoczi 8 months, 3 weeks ago
On Mon, Jul 31, 2023 at 05:33:38PM -0300, Fabiano Rosas wrote:
> We can fail the blk_insert_bs() at init_blk_migration(), leaving the
> BlkMigDevState without a dirty_bitmap and BlockDriverState. Account
> for the possibly missing elements when doing cleanup.
> 
> Fix the following crashes:
> 
> Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
> 0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
> 359         BlockDriverState *bs = bitmap->bs;
>  #0  0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
>  #1  0x0000555555bba331 in unset_dirty_tracking () at ../migration/block.c:371
>  #2  0x0000555555bbad98 in block_migration_cleanup_bmds () at ../migration/block.c:681
> 
> Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
> 0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
> 7073        QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
>  #0  0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
>  #1  0x0000555555e9734a in bdrv_op_unblock_all (bs=0x0, reason=0x0) at ../block.c:7095
>  #2  0x0000555555bbae13 in block_migration_cleanup_bmds () at ../migration/block.c:690
> 
> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> ---
>  migration/block.c | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)

Sorry, I missed this patch!

If this needs to be in QEMU 8.1 (-rc3 is being tagged today), please
reply and provide a justification. At this point only security fixes and
showstoppers will be merged. Thanks!

Applied to my block-next tree for QEMU 8.2:
https://gitlab.com/stefanha/qemu/commits/block-next

Stefan
Re: [PATCH] block-migration: Ensure we don't crash during migration cleanup
Posted by Claudio Fontana 8 months, 3 weeks ago
On 8/8/23 19:08, Stefan Hajnoczi wrote:
> On Mon, Jul 31, 2023 at 05:33:38PM -0300, Fabiano Rosas wrote:
>> We can fail the blk_insert_bs() at init_blk_migration(), leaving the
>> BlkMigDevState without a dirty_bitmap and BlockDriverState. Account
>> for the possibly missing elements when doing cleanup.
>>
>> Fix the following crashes:
>>
>> Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
>> 0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
>> 359         BlockDriverState *bs = bitmap->bs;
>>  #0  0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
>>  #1  0x0000555555bba331 in unset_dirty_tracking () at ../migration/block.c:371
>>  #2  0x0000555555bbad98 in block_migration_cleanup_bmds () at ../migration/block.c:681
>>
>> Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
>> 0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
>> 7073        QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
>>  #0  0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
>>  #1  0x0000555555e9734a in bdrv_op_unblock_all (bs=0x0, reason=0x0) at ../block.c:7095
>>  #2  0x0000555555bbae13 in block_migration_cleanup_bmds () at ../migration/block.c:690
>>
>> Signed-off-by: Fabiano Rosas <farosas@suse.de>
>> ---
>>  migration/block.c | 11 +++++++++--
>>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> Sorry, I missed this patch!
> 
> If this needs to be in QEMU 8.1 (-rc3 is being tagged today), please
> reply and provide a justification. At this point only security fixes and
> showstoppers will be merged. Thanks!
> 
> Applied to my block-next tree for QEMU 8.2:
> https://gitlab.com/stefanha/qemu/commits/block-next
> 
> Stefan

Thanks, and in my personal view I think it's ok for 8.2, IIUC it happens during the migration to file work which is not in 8.1 anyway,
Fabiano correct me here if I am wrong,

Ciao,

Claudio
Re: [PATCH] block-migration: Ensure we don't crash during migration cleanup
Posted by Claudio Fontana 8 months, 4 weeks ago
added Kevin and Hanna for block, since this seems still untouched?

Thanks,

Claudio

On 7/31/23 22:33, Fabiano Rosas wrote:
> We can fail the blk_insert_bs() at init_blk_migration(), leaving the
> BlkMigDevState without a dirty_bitmap and BlockDriverState. Account
> for the possibly missing elements when doing cleanup.
> 
> Fix the following crashes:
> 
> Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
> 0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
> 359         BlockDriverState *bs = bitmap->bs;
>  #0  0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
>  #1  0x0000555555bba331 in unset_dirty_tracking () at ../migration/block.c:371
>  #2  0x0000555555bbad98 in block_migration_cleanup_bmds () at ../migration/block.c:681
> 
> Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
> 0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
> 7073        QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
>  #0  0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
>  #1  0x0000555555e9734a in bdrv_op_unblock_all (bs=0x0, reason=0x0) at ../block.c:7095
>  #2  0x0000555555bbae13 in block_migration_cleanup_bmds () at ../migration/block.c:690
> 
> Signed-off-by: Fabiano Rosas <farosas@suse.de>
> ---
>  migration/block.c | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/block.c b/migration/block.c
> index b9580a6c7e..86c2256a2b 100644
> --- a/migration/block.c
> +++ b/migration/block.c
> @@ -368,7 +368,9 @@ static void unset_dirty_tracking(void)
>      BlkMigDevState *bmds;
>  
>      QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> -        bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
> +        if (bmds->dirty_bitmap) {
> +            bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
> +        }
>      }
>  }
>  
> @@ -676,13 +678,18 @@ static int64_t get_remaining_dirty(void)
>  static void block_migration_cleanup_bmds(void)
>  {
>      BlkMigDevState *bmds;
> +    BlockDriverState *bs;
>      AioContext *ctx;
>  
>      unset_dirty_tracking();
>  
>      while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
>          QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
> -        bdrv_op_unblock_all(blk_bs(bmds->blk), bmds->blocker);
> +
> +        bs = blk_bs(bmds->blk);
> +        if (bs) {
> +            bdrv_op_unblock_all(bs, bmds->blocker);
> +        }
>          error_free(bmds->blocker);
>  
>          /* Save ctx, because bmds->blk can disappear during blk_unref.  */