[Qemu-devel] [PATCH] blockjob: leak fix, remove from txn when failing early

Marc-André Lureau posted 1 patch 6 years ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20180327160736.24309-1-marcandre.lureau@redhat.com
There is a newer version of this series
blockjob.c | 5 +++++
1 file changed, 5 insertions(+)
[Qemu-devel] [PATCH] blockjob: leak fix, remove from txn when failing early
Posted by Marc-André Lureau 6 years ago
This fixes leaks found by ASAN such as:
  GTESTER tests/test-blockjob
=================================================================
==31442==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 24 byte(s) in 1 object(s) allocated from:
    #0 0x7f88483cba38 in __interceptor_calloc (/lib64/libasan.so.4+0xdea38)
    #1 0x7f8845e1bd77 in g_malloc0 ../glib/gmem.c:129
    #2 0x7f8845e1c04b in g_malloc0_n ../glib/gmem.c:360
    #3 0x5584d2732498 in block_job_txn_new /home/elmarco/src/qemu/blockjob.c:172
    #4 0x5584d2739b28 in block_job_create /home/elmarco/src/qemu/blockjob.c:973
    #5 0x5584d270ae31 in mk_job /home/elmarco/src/qemu/tests/test-blockjob.c:34
    #6 0x5584d270b1c1 in do_test_id /home/elmarco/src/qemu/tests/test-blockjob.c:57
    #7 0x5584d270b65c in test_job_ids /home/elmarco/src/qemu/tests/test-blockjob.c:118
    #8 0x7f8845e40b69 in test_case_run ../glib/gtestutils.c:2255
    #9 0x7f8845e40f29 in g_test_run_suite_internal ../glib/gtestutils.c:2339
    #10 0x7f8845e40fd2 in g_test_run_suite_internal ../glib/gtestutils.c:2351
    #11 0x7f8845e411e9 in g_test_run_suite ../glib/gtestutils.c:2426
    #12 0x7f8845e3fe72 in g_test_run ../glib/gtestutils.c:1692
    #13 0x5584d270d6e2 in main /home/elmarco/src/qemu/tests/test-blockjob.c:377
    #14 0x7f8843641f29 in __libc_start_main (/lib64/libc.so.6+0x20f29)

Add an assert to make sure that the job doesn't have associated txn before free().

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 blockjob.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/blockjob.c b/blockjob.c
index 11c9ce124d..bb75386515 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -228,6 +228,7 @@ void block_job_unref(BlockJob *job)
 {
     if (--job->refcnt == 0) {
         assert(job->status == BLOCK_JOB_STATUS_NULL);
+        assert(!job->txn);
         BlockDriverState *bs = blk_bs(job->blk);
         QLIST_REMOVE(job, job_list);
         bs->job = NULL;
@@ -479,6 +480,7 @@ static int block_job_finalize_single(BlockJob *job)
 
     QLIST_REMOVE(job, txn_list);
     block_job_txn_unref(job->txn);
+    job->txn = NULL;
     block_job_conclude(job);
     return 0;
 }
@@ -994,6 +996,9 @@ void block_job_pause_all(void)
 void block_job_early_fail(BlockJob *job)
 {
     assert(job->status == BLOCK_JOB_STATUS_CREATED);
+    QLIST_REMOVE(job, txn_list);
+    block_job_txn_unref(job->txn);
+    job->txn = NULL;
     block_job_decommission(job);
 }
 
-- 
2.17.0.rc1.1.g4c4f2b46a3


Re: [Qemu-devel] [Qemu-block] [PATCH] blockjob: leak fix, remove from txn when failing early
Posted by John Snow 6 years ago

On 03/27/2018 12:07 PM, Marc-André Lureau wrote:
> This fixes leaks found by ASAN such as:
>   GTESTER tests/test-blockjob
> =================================================================
> ==31442==ERROR: LeakSanitizer: detected memory leaks
> 
> Direct leak of 24 byte(s) in 1 object(s) allocated from:
>     #0 0x7f88483cba38 in __interceptor_calloc (/lib64/libasan.so.4+0xdea38)
>     #1 0x7f8845e1bd77 in g_malloc0 ../glib/gmem.c:129
>     #2 0x7f8845e1c04b in g_malloc0_n ../glib/gmem.c:360
>     #3 0x5584d2732498 in block_job_txn_new /home/elmarco/src/qemu/blockjob.c:172
>     #4 0x5584d2739b28 in block_job_create /home/elmarco/src/qemu/blockjob.c:973
>     #5 0x5584d270ae31 in mk_job /home/elmarco/src/qemu/tests/test-blockjob.c:34
>     #6 0x5584d270b1c1 in do_test_id /home/elmarco/src/qemu/tests/test-blockjob.c:57
>     #7 0x5584d270b65c in test_job_ids /home/elmarco/src/qemu/tests/test-blockjob.c:118
>     #8 0x7f8845e40b69 in test_case_run ../glib/gtestutils.c:2255
>     #9 0x7f8845e40f29 in g_test_run_suite_internal ../glib/gtestutils.c:2339
>     #10 0x7f8845e40fd2 in g_test_run_suite_internal ../glib/gtestutils.c:2351
>     #11 0x7f8845e411e9 in g_test_run_suite ../glib/gtestutils.c:2426
>     #12 0x7f8845e3fe72 in g_test_run ../glib/gtestutils.c:1692
>     #13 0x5584d270d6e2 in main /home/elmarco/src/qemu/tests/test-blockjob.c:377
>     #14 0x7f8843641f29 in __libc_start_main (/lib64/libc.so.6+0x20f29)
> 
> Add an assert to make sure that the job doesn't have associated txn before free().
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
>  blockjob.c | 5 +++++
>  1 file changed, 5 insertions(+)
> 
> diff --git a/blockjob.c b/blockjob.c
> index 11c9ce124d..bb75386515 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -228,6 +228,7 @@ void block_job_unref(BlockJob *job)
>  {
>      if (--job->refcnt == 0) {
>          assert(job->status == BLOCK_JOB_STATUS_NULL);
> +        assert(!job->txn);
>          BlockDriverState *bs = blk_bs(job->blk);
>          QLIST_REMOVE(job, job_list);
>          bs->job = NULL;
> @@ -479,6 +480,7 @@ static int block_job_finalize_single(BlockJob *job)
>  
>      QLIST_REMOVE(job, txn_list);
>      block_job_txn_unref(job->txn);
> +    job->txn = NULL;
>      block_job_conclude(job);
>      return 0;
>  }
> @@ -994,6 +996,9 @@ void block_job_pause_all(void)
>  void block_job_early_fail(BlockJob *job)
>  {
>      assert(job->status == BLOCK_JOB_STATUS_CREATED);
> +    QLIST_REMOVE(job, txn_list);
> +    block_job_txn_unref(job->txn);
> +    job->txn = NULL;
>      block_job_decommission(job);
>  }
>  
> 

Shame on me.

I may have shuffled this into decommission, where if there is a txn we
unlink ourselves from it (especially with the assertion added), but this
patch is fine.

Reviewed-by: John Snow <jsnow@redhat.com>

cc: Jeff Cody

Re: [Qemu-devel] [PATCH] blockjob: leak fix, remove from txn when failing early
Posted by Jeff Cody 6 years ago
On Tue, Mar 27, 2018 at 06:07:36PM +0200, Marc-André Lureau wrote:
> This fixes leaks found by ASAN such as:
>   GTESTER tests/test-blockjob
> =================================================================
> ==31442==ERROR: LeakSanitizer: detected memory leaks
> 
> Direct leak of 24 byte(s) in 1 object(s) allocated from:
>     #0 0x7f88483cba38 in __interceptor_calloc (/lib64/libasan.so.4+0xdea38)
>     #1 0x7f8845e1bd77 in g_malloc0 ../glib/gmem.c:129
>     #2 0x7f8845e1c04b in g_malloc0_n ../glib/gmem.c:360
>     #3 0x5584d2732498 in block_job_txn_new /home/elmarco/src/qemu/blockjob.c:172
>     #4 0x5584d2739b28 in block_job_create /home/elmarco/src/qemu/blockjob.c:973
>     #5 0x5584d270ae31 in mk_job /home/elmarco/src/qemu/tests/test-blockjob.c:34
>     #6 0x5584d270b1c1 in do_test_id /home/elmarco/src/qemu/tests/test-blockjob.c:57
>     #7 0x5584d270b65c in test_job_ids /home/elmarco/src/qemu/tests/test-blockjob.c:118
>     #8 0x7f8845e40b69 in test_case_run ../glib/gtestutils.c:2255
>     #9 0x7f8845e40f29 in g_test_run_suite_internal ../glib/gtestutils.c:2339
>     #10 0x7f8845e40fd2 in g_test_run_suite_internal ../glib/gtestutils.c:2351
>     #11 0x7f8845e411e9 in g_test_run_suite ../glib/gtestutils.c:2426
>     #12 0x7f8845e3fe72 in g_test_run ../glib/gtestutils.c:1692
>     #13 0x5584d270d6e2 in main /home/elmarco/src/qemu/tests/test-blockjob.c:377
>     #14 0x7f8843641f29 in __libc_start_main (/lib64/libc.so.6+0x20f29)
> 
> Add an assert to make sure that the job doesn't have associated txn before free().
> 
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
>  blockjob.c | 5 +++++
>  1 file changed, 5 insertions(+)
> 
> diff --git a/blockjob.c b/blockjob.c
> index 11c9ce124d..bb75386515 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -228,6 +228,7 @@ void block_job_unref(BlockJob *job)
>  {
>      if (--job->refcnt == 0) {
>          assert(job->status == BLOCK_JOB_STATUS_NULL);
> +        assert(!job->txn);
>          BlockDriverState *bs = blk_bs(job->blk);
>          QLIST_REMOVE(job, job_list);
>          bs->job = NULL;
> @@ -479,6 +480,7 @@ static int block_job_finalize_single(BlockJob *job)
>  
>      QLIST_REMOVE(job, txn_list);
>      block_job_txn_unref(job->txn);
> +    job->txn = NULL;
>      block_job_conclude(job);
>      return 0;
>  }
> @@ -994,6 +996,9 @@ void block_job_pause_all(void)
>  void block_job_early_fail(BlockJob *job)
>  {
>      assert(job->status == BLOCK_JOB_STATUS_CREATED);
> +    QLIST_REMOVE(job, txn_list);
> +    block_job_txn_unref(job->txn);
> +    job->txn = NULL;
>      block_job_decommission(job);
>  }
>  
> -- 
> 2.17.0.rc1.1.g4c4f2b46a3
> 

This patch causes a segfault/assert in iotests 031 041 055:

e.g., from 031:

test_set_speed_invalid (__main__.TestSetSpeed) ... DEBUG:QMP:>>> {'execute': 'qmp_capabilities'}
DEBUG:QMP:<<< {u'return': {}}
DEBUG:QMP:>>> {'execute': 'query-block-jobs'}
DEBUG:QMP:<<< {u'return': []}
DEBUG:QMP:>>> {'execute': 'block-stream', 'arguments': {'device': 'drive0', 'speed': -1}}
DEBUG:QMP:<<< None
WARNING:qemu:qemu received signal -11: [...]


Re: [Qemu-devel] [Qemu-block] [PATCH] blockjob: leak fix, remove from txn when failing early
Posted by John Snow 6 years ago

On 03/27/2018 04:10 PM, Jeff Cody wrote:
> On Tue, Mar 27, 2018 at 06:07:36PM +0200, Marc-André Lureau wrote:
>> This fixes leaks found by ASAN such as:
>>   GTESTER tests/test-blockjob
>> =================================================================
>> ==31442==ERROR: LeakSanitizer: detected memory leaks
>>
>> Direct leak of 24 byte(s) in 1 object(s) allocated from:
>>     #0 0x7f88483cba38 in __interceptor_calloc (/lib64/libasan.so.4+0xdea38)
>>     #1 0x7f8845e1bd77 in g_malloc0 ../glib/gmem.c:129
>>     #2 0x7f8845e1c04b in g_malloc0_n ../glib/gmem.c:360
>>     #3 0x5584d2732498 in block_job_txn_new /home/elmarco/src/qemu/blockjob.c:172
>>     #4 0x5584d2739b28 in block_job_create /home/elmarco/src/qemu/blockjob.c:973
>>     #5 0x5584d270ae31 in mk_job /home/elmarco/src/qemu/tests/test-blockjob.c:34
>>     #6 0x5584d270b1c1 in do_test_id /home/elmarco/src/qemu/tests/test-blockjob.c:57
>>     #7 0x5584d270b65c in test_job_ids /home/elmarco/src/qemu/tests/test-blockjob.c:118
>>     #8 0x7f8845e40b69 in test_case_run ../glib/gtestutils.c:2255
>>     #9 0x7f8845e40f29 in g_test_run_suite_internal ../glib/gtestutils.c:2339
>>     #10 0x7f8845e40fd2 in g_test_run_suite_internal ../glib/gtestutils.c:2351
>>     #11 0x7f8845e411e9 in g_test_run_suite ../glib/gtestutils.c:2426
>>     #12 0x7f8845e3fe72 in g_test_run ../glib/gtestutils.c:1692
>>     #13 0x5584d270d6e2 in main /home/elmarco/src/qemu/tests/test-blockjob.c:377
>>     #14 0x7f8843641f29 in __libc_start_main (/lib64/libc.so.6+0x20f29)
>>
>> Add an assert to make sure that the job doesn't have associated txn before free().
>>
>> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
>> ---
>>  blockjob.c | 5 +++++
>>  1 file changed, 5 insertions(+)
>>
>> diff --git a/blockjob.c b/blockjob.c
>> index 11c9ce124d..bb75386515 100644
>> --- a/blockjob.c
>> +++ b/blockjob.c
>> @@ -228,6 +228,7 @@ void block_job_unref(BlockJob *job)
>>  {
>>      if (--job->refcnt == 0) {
>>          assert(job->status == BLOCK_JOB_STATUS_NULL);
>> +        assert(!job->txn);
>>          BlockDriverState *bs = blk_bs(job->blk);
>>          QLIST_REMOVE(job, job_list);
>>          bs->job = NULL;
>> @@ -479,6 +480,7 @@ static int block_job_finalize_single(BlockJob *job)
>>  
>>      QLIST_REMOVE(job, txn_list);
>>      block_job_txn_unref(job->txn);
>> +    job->txn = NULL;
>>      block_job_conclude(job);
>>      return 0;
>>  }
>> @@ -994,6 +996,9 @@ void block_job_pause_all(void)
>>  void block_job_early_fail(BlockJob *job)
>>  {
>>      assert(job->status == BLOCK_JOB_STATUS_CREATED);
>> +    QLIST_REMOVE(job, txn_list);
>> +    block_job_txn_unref(job->txn);
>> +    job->txn = NULL;
>>      block_job_decommission(job);
>>  }
>>  
>> -- 
>> 2.17.0.rc1.1.g4c4f2b46a3
>>
> 
> This patch causes a segfault/assert in iotests 031 041 055:
> 
> e.g., from 031:
> 
> test_set_speed_invalid (__main__.TestSetSpeed) ... DEBUG:QMP:>>> {'execute': 'qmp_capabilities'}
> DEBUG:QMP:<<< {u'return': {}}
> DEBUG:QMP:>>> {'execute': 'query-block-jobs'}
> DEBUG:QMP:<<< {u'return': []}
> DEBUG:QMP:>>> {'execute': 'block-stream', 'arguments': {'device': 'drive0', 'speed': -1}}
> DEBUG:QMP:<<< None
> WARNING:qemu:qemu received signal -11: [...]
> 
> 

Oh, because block_job_early_fail can be called from block_job_create
before we've established a transaction (even if it's the dummy transaction.)

It's a *really* early failure.

I patched it out like this; JTC: take whichever one, credit Marc-Andre
regardless of which you choose.

--js


diff --git a/blockjob.c b/blockjob.c
index ef3ed69ff1..c510a9fde5 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -204,6 +204,15 @@ void block_job_txn_add_job(BlockJobTxn *txn,
BlockJob *job)
     block_job_txn_ref(txn);
 }

+static void block_job_txn_del_job(BlockJob *job)
+{
+    if (job->txn) {
+        QLIST_REMOVE(job, txn_list);
+        block_job_txn_unref(job->txn);
+        job->txn = NULL;
+    }
+}
+
 static void block_job_pause(BlockJob *job)
 {
     job->pause_count++;
@@ -232,6 +241,7 @@ void block_job_unref(BlockJob *job)
 {
     if (--job->refcnt == 0) {
         assert(job->status == BLOCK_JOB_STATUS_NULL);
+        assert(!job->txn);
         BlockDriverState *bs = blk_bs(job->blk);
         QLIST_REMOVE(job, job_list);
         bs->job = NULL;
@@ -392,6 +402,7 @@ static void block_job_decommission(BlockJob *job)
     job->busy = false;
     job->paused = false;
     job->deferred_to_main_loop = true;
+    block_job_txn_del_job(job);
     block_job_state_transition(job, BLOCK_JOB_STATUS_NULL);
     block_job_unref(job);
 }
@@ -481,8 +492,7 @@ static int block_job_finalize_single(BlockJob *job)
         }
     }

-    QLIST_REMOVE(job, txn_list);
-    block_job_txn_unref(job->txn);
+    block_job_txn_del_job(job);
     block_job_conclude(job);
     return 0;
 }