[PATCH v11 11/21] jobs: group together API calls under the same job lock

Emanuele Giuseppe Esposito posted 21 patches 3 years, 5 months ago
Maintainers: Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>, John Snow <jsnow@redhat.com>, Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>, Wen Congyang <wencongyang2@huawei.com>, Xie Changlong <xiechanglong.d@gmail.com>, Markus Armbruster <armbru@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, Fam Zheng <fam@euphon.net>
There is a newer version of this series
[PATCH v11 11/21] jobs: group together API calls under the same job lock
Posted by Emanuele Giuseppe Esposito 3 years, 5 months ago
Now that the API offers also _locked() functions, take advantage
of it and give also the caller control to take the lock and call
_locked functions.

This makes sense especially when we have for loops, because it
makes no sense to have:

for(job = job_next(); ...)

where each job_next() takes the lock internally.
Instead we want

JOB_LOCK_GUARD();
for(job = job_next_locked(); ...)

In addition, protect also direct field accesses, by either creating a
new critical section or widening the existing ones.

Note: at this stage, job_{lock/unlock} and job lock guard macros
are *nop*.

Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
---
 block.c            | 17 ++++++++++-------
 blockdev.c         | 14 ++++++++++----
 blockjob.c         | 35 ++++++++++++++++++++++-------------
 job-qmp.c          |  9 ++++++---
 job.c              |  7 +++++--
 monitor/qmp-cmds.c |  7 +++++--
 qemu-img.c         | 17 +++++++++++------
 7 files changed, 69 insertions(+), 37 deletions(-)

diff --git a/block.c b/block.c
index bc85f46eed..2c6a4f62c9 100644
--- a/block.c
+++ b/block.c
@@ -4980,8 +4980,8 @@ static void bdrv_close(BlockDriverState *bs)
 
 void bdrv_close_all(void)
 {
-    assert(job_next(NULL) == NULL);
     GLOBAL_STATE_CODE();
+    assert(job_next(NULL) == NULL);
 
     /* Drop references from requests still in flight, such as canceled block
      * jobs whose AIO context has not been polled yet */
@@ -6167,13 +6167,16 @@ XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
         }
     }
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
-        GSList *el;
+    WITH_JOB_LOCK_GUARD() {
+        for (job = block_job_next_locked(NULL); job;
+             job = block_job_next_locked(job)) {
+            GSList *el;
 
-        xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
-                           job->job.id);
-        for (el = job->nodes; el; el = el->next) {
-            xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
+            xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
+                                job->job.id);
+            for (el = job->nodes; el; el = el->next) {
+                xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
+            }
         }
     }
 
diff --git a/blockdev.c b/blockdev.c
index 71f793c4ab..dddf5699a0 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -150,12 +150,15 @@ void blockdev_mark_auto_del(BlockBackend *blk)
         return;
     }
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
+    JOB_LOCK_GUARD();
+
+    for (job = block_job_next_locked(NULL); job;
+         job = block_job_next_locked(job)) {
         if (block_job_has_bdrv(job, blk_bs(blk))) {
             AioContext *aio_context = job->job.aio_context;
             aio_context_acquire(aio_context);
 
-            job_cancel(&job->job, false);
+            job_cancel_locked(&job->job, false);
 
             aio_context_release(aio_context);
         }
@@ -3745,7 +3748,10 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
     BlockJobInfoList *head = NULL, **tail = &head;
     BlockJob *job;
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
+    JOB_LOCK_GUARD();
+
+    for (job = block_job_next_locked(NULL); job;
+         job = block_job_next_locked(job)) {
         BlockJobInfo *value;
         AioContext *aio_context;
 
@@ -3754,7 +3760,7 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
         }
         aio_context = block_job_get_aio_context(job);
         aio_context_acquire(aio_context);
-        value = block_job_query(job, errp);
+        value = block_job_query_locked(job, errp);
         aio_context_release(aio_context);
         if (!value) {
             qapi_free_BlockJobInfoList(head);
diff --git a/blockjob.c b/blockjob.c
index 0d59aba439..96fb9d9f73 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -111,8 +111,10 @@ static bool child_job_drained_poll(BdrvChild *c)
     /* An inactive or completed job doesn't have any pending requests. Jobs
      * with !job->busy are either already paused or have a pause point after
      * being reentered, so no job driver code will run before they pause. */
-    if (!job->busy || job_is_completed(job)) {
-        return false;
+    WITH_JOB_LOCK_GUARD() {
+        if (!job->busy || job_is_completed_locked(job)) {
+            return false;
+        }
     }
 
     /* Otherwise, assume that it isn't fully stopped yet, but allow the job to
@@ -475,13 +477,15 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     job->ready_notifier.notify = block_job_event_ready;
     job->idle_notifier.notify = block_job_on_idle;
 
-    notifier_list_add(&job->job.on_finalize_cancelled,
-                      &job->finalize_cancelled_notifier);
-    notifier_list_add(&job->job.on_finalize_completed,
-                      &job->finalize_completed_notifier);
-    notifier_list_add(&job->job.on_pending, &job->pending_notifier);
-    notifier_list_add(&job->job.on_ready, &job->ready_notifier);
-    notifier_list_add(&job->job.on_idle, &job->idle_notifier);
+    WITH_JOB_LOCK_GUARD() {
+        notifier_list_add(&job->job.on_finalize_cancelled,
+                          &job->finalize_cancelled_notifier);
+        notifier_list_add(&job->job.on_finalize_completed,
+                          &job->finalize_completed_notifier);
+        notifier_list_add(&job->job.on_pending, &job->pending_notifier);
+        notifier_list_add(&job->job.on_ready, &job->ready_notifier);
+        notifier_list_add(&job->job.on_idle, &job->idle_notifier);
+    }
 
     error_setg(&job->blocker, "block device is in use by block job: %s",
                job_type_str(&job->job));
@@ -558,10 +562,15 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
                                         action);
     }
     if (action == BLOCK_ERROR_ACTION_STOP) {
-        if (!job->job.user_paused) {
-            job_pause(&job->job);
-            /* make the pause user visible, which will be resumed from QMP. */
-            job->job.user_paused = true;
+        WITH_JOB_LOCK_GUARD() {
+            if (!job->job.user_paused) {
+                job_pause_locked(&job->job);
+                /*
+                 * make the pause user visible, which will be
+                 * resumed from QMP.
+                 */
+                job->job.user_paused = true;
+            }
         }
         block_job_iostatus_set_err(job, error);
     }
diff --git a/job-qmp.c b/job-qmp.c
index b1c456482a..393d3a5b81 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -164,7 +164,8 @@ void qmp_job_dismiss(const char *id, Error **errp)
     aio_context_release(aio_context);
 }
 
-static JobInfo *job_query_single(Job *job, Error **errp)
+/* Called with job_mutex held. */
+static JobInfo *job_query_single_locked(Job *job, Error **errp)
 {
     JobInfo *info;
     uint64_t progress_current;
@@ -194,7 +195,9 @@ JobInfoList *qmp_query_jobs(Error **errp)
     JobInfoList *head = NULL, **tail = &head;
     Job *job;
 
-    for (job = job_next(NULL); job; job = job_next(job)) {
+    JOB_LOCK_GUARD();
+
+    for (job = job_next_locked(NULL); job; job = job_next_locked(job)) {
         JobInfo *value;
         AioContext *aio_context;
 
@@ -203,7 +206,7 @@ JobInfoList *qmp_query_jobs(Error **errp)
         }
         aio_context = job->aio_context;
         aio_context_acquire(aio_context);
-        value = job_query_single(job, errp);
+        value = job_query_single_locked(job, errp);
         aio_context_release(aio_context);
         if (!value) {
             qapi_free_JobInfoList(head);
diff --git a/job.c b/job.c
index dcd561fd46..e336af0c1c 100644
--- a/job.c
+++ b/job.c
@@ -672,7 +672,7 @@ void coroutine_fn job_pause_point(Job *job)
     job_pause_point_locked(job);
 }
 
-void job_yield_locked(Job *job)
+static void job_yield_locked(Job *job)
 {
     assert(job->busy);
 
@@ -1046,11 +1046,14 @@ static void job_completed_txn_abort_locked(Job *job)
 /* Called with job_mutex held, but releases it temporarily */
 static int job_prepare_locked(Job *job)
 {
+    int ret;
+
     GLOBAL_STATE_CODE();
     if (job->ret == 0 && job->driver->prepare) {
         job_unlock();
-        job->ret = job->driver->prepare(job);
+        ret = job->driver->prepare(job);
         job_lock();
+        job->ret = ret;
         job_update_rc_locked(job);
     }
     return job->ret;
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index 7314cd813d..81c8fdadf8 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -135,8 +135,11 @@ void qmp_cont(Error **errp)
         blk_iostatus_reset(blk);
     }
 
-    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
-        block_job_iostatus_reset(job);
+    WITH_JOB_LOCK_GUARD() {
+        for (job = block_job_next_locked(NULL); job;
+             job = block_job_next_locked(job)) {
+            block_job_iostatus_reset_locked(job);
+        }
     }
 
     /* Continuing after completed migration. Images have been inactivated to
diff --git a/qemu-img.c b/qemu-img.c
index 7d4b33b3da..c8ae704166 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -912,25 +912,30 @@ static void run_block_job(BlockJob *job, Error **errp)
     int ret = 0;
 
     aio_context_acquire(aio_context);
-    job_ref(&job->job);
+    job_lock();
+    job_ref_locked(&job->job);
     do {
         float progress = 0.0f;
+        job_unlock();
         aio_poll(aio_context, true);
 
         progress_get_snapshot(&job->job.progress, &progress_current,
-                              &progress_total);
+                                &progress_total);
         if (progress_total) {
             progress = (float)progress_current / progress_total * 100.f;
         }
         qemu_progress_print(progress, 0);
-    } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
+        job_lock();
+    } while (!job_is_ready_locked(&job->job) &&
+                !job_is_completed_locked(&job->job));
 
-    if (!job_is_completed(&job->job)) {
-        ret = job_complete_sync(&job->job, errp);
+    if (!job_is_completed_locked(&job->job)) {
+        ret = job_complete_sync_locked(&job->job, errp);
     } else {
         ret = job->job.ret;
     }
-    job_unref(&job->job);
+    job_unref_locked(&job->job);
+    job_unlock();
     aio_context_release(aio_context);
 
     /* publish completion progress only when success */
-- 
2.31.1
Re: [PATCH v11 11/21] jobs: group together API calls under the same job lock
Posted by Vladimir Sementsov-Ogievskiy 3 years, 4 months ago
On 8/26/22 16:20, Emanuele Giuseppe Esposito wrote:
> Now that the API offers also _locked() functions, take advantage
> of it and give also the caller control to take the lock and call
> _locked functions.
> 
> This makes sense especially when we have for loops, because it
> makes no sense to have:
> 
> for(job = job_next(); ...)
> 
> where each job_next() takes the lock internally.
> Instead we want
> 
> JOB_LOCK_GUARD();
> for(job = job_next_locked(); ...)
> 
> In addition, protect also direct field accesses, by either creating a
> new critical section or widening the existing ones.
> 
> Note: at this stage, job_{lock/unlock} and job lock guard macros
> are *nop*.
> 
> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> ---
>   block.c            | 17 ++++++++++-------
>   blockdev.c         | 14 ++++++++++----
>   blockjob.c         | 35 ++++++++++++++++++++++-------------
>   job-qmp.c          |  9 ++++++---
>   job.c              |  7 +++++--
>   monitor/qmp-cmds.c |  7 +++++--
>   qemu-img.c         | 17 +++++++++++------
>   7 files changed, 69 insertions(+), 37 deletions(-)
> 

[..]

> diff --git a/job.c b/job.c
> index dcd561fd46..e336af0c1c 100644
> --- a/job.c
> +++ b/job.c

job.c hunks are unrelated in this patch, they should go to patch 05.

> @@ -672,7 +672,7 @@ void coroutine_fn job_pause_point(Job *job)
>       job_pause_point_locked(job);
>   }
>   
> -void job_yield_locked(Job *job)
> +static void job_yield_locked(Job *job)
>   {
>       assert(job->busy);
>   
> @@ -1046,11 +1046,14 @@ static void job_completed_txn_abort_locked(Job *job)
>   /* Called with job_mutex held, but releases it temporarily */
>   static int job_prepare_locked(Job *job)
>   {
> +    int ret;
> +
>       GLOBAL_STATE_CODE();
>       if (job->ret == 0 && job->driver->prepare) {
>           job_unlock();
> -        job->ret = job->driver->prepare(job);
> +        ret = job->driver->prepare(job);
>           job_lock();
> +        job->ret = ret;
>           job_update_rc_locked(job);
>       }
>       return job->ret;
> diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
> index 7314cd813d..81c8fdadf8 100644
> --- a/monitor/qmp-cmds.c
> +++ b/monitor/qmp-cmds.c
> @@ -135,8 +135,11 @@ void qmp_cont(Error **errp)
>           blk_iostatus_reset(blk);
>       }
>   
> -    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
> -        block_job_iostatus_reset(job);
> +    WITH_JOB_LOCK_GUARD() {
> +        for (job = block_job_next_locked(NULL); job;
> +             job = block_job_next_locked(job)) {
> +            block_job_iostatus_reset_locked(job);
> +        }
>       }
>   
>       /* Continuing after completed migration. Images have been inactivated to
> diff --git a/qemu-img.c b/qemu-img.c
> index 7d4b33b3da..c8ae704166 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -912,25 +912,30 @@ static void run_block_job(BlockJob *job, Error **errp)
>       int ret = 0;
>   
>       aio_context_acquire(aio_context);
> -    job_ref(&job->job);
> +    job_lock();
> +    job_ref_locked(&job->job);
>       do {
>           float progress = 0.0f;
> +        job_unlock();
>           aio_poll(aio_context, true);
>   
>           progress_get_snapshot(&job->job.progress, &progress_current,
> -                              &progress_total);
> +                                &progress_total);

broken indentation

>           if (progress_total) {
>               progress = (float)progress_current / progress_total * 100.f;
>           }
>           qemu_progress_print(progress, 0);
> -    } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
> +        job_lock();
> +    } while (!job_is_ready_locked(&job->job) &&
> +                !job_is_completed_locked(&job->job));

and here

>   
> -    if (!job_is_completed(&job->job)) {
> -        ret = job_complete_sync(&job->job, errp);
> +    if (!job_is_completed_locked(&job->job)) {
> +        ret = job_complete_sync_locked(&job->job, errp);
>       } else {
>           ret = job->job.ret;
>       }
> -    job_unref(&job->job);
> +    job_unref_locked(&job->job);
> +    job_unlock();
>       aio_context_release(aio_context);
>   
>       /* publish completion progress only when success */


-- 
Best regards,
Vladimir
Re: [PATCH v11 11/21] jobs: group together API calls under the same job lock
Posted by Emanuele Giuseppe Esposito 3 years, 4 months ago

Am 14/09/2022 um 14:36 schrieb Vladimir Sementsov-Ogievskiy:
> On 8/26/22 16:20, Emanuele Giuseppe Esposito wrote:
>> Now that the API offers also _locked() functions, take advantage
>> of it and give also the caller control to take the lock and call
>> _locked functions.
>>
>> This makes sense especially when we have for loops, because it
>> makes no sense to have:
>>
>> for(job = job_next(); ...)
>>
>> where each job_next() takes the lock internally.
>> Instead we want
>>
>> JOB_LOCK_GUARD();
>> for(job = job_next_locked(); ...)
>>
>> In addition, protect also direct field accesses, by either creating a
>> new critical section or widening the existing ones.
>>
>> Note: at this stage, job_{lock/unlock} and job lock guard macros
>> are *nop*.
>>
>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
>> ---
>>   block.c            | 17 ++++++++++-------
>>   blockdev.c         | 14 ++++++++++----
>>   blockjob.c         | 35 ++++++++++++++++++++++-------------
>>   job-qmp.c          |  9 ++++++---
>>   job.c              |  7 +++++--
>>   monitor/qmp-cmds.c |  7 +++++--
>>   qemu-img.c         | 17 +++++++++++------
>>   7 files changed, 69 insertions(+), 37 deletions(-)
>>
> 
> [..]
> 
>> diff --git a/job.c b/job.c
>> index dcd561fd46..e336af0c1c 100644
>> --- a/job.c
>> +++ b/job.c
> 
> job.c hunks are unrelated in this patch, they should go to patch 05.
> 
>> @@ -672,7 +672,7 @@ void coroutine_fn job_pause_point(Job *job)
>>       job_pause_point_locked(job);
>>   }
>>   -void job_yield_locked(Job *job)
>> +static void job_yield_locked(Job *job)
>>   {
>>       assert(job->busy);
>>   @@ -1046,11 +1046,14 @@ static void
>> job_completed_txn_abort_locked(Job *job)
>>   /* Called with job_mutex held, but releases it temporarily */
>>   static int job_prepare_locked(Job *job)
>>   {
>> +    int ret;
>> +
>>       GLOBAL_STATE_CODE();
>>       if (job->ret == 0 && job->driver->prepare) {
>>           job_unlock();
>> -        job->ret = job->driver->prepare(job);
>> +        ret = job->driver->prepare(job);
>>           job_lock();
>> +        job->ret = ret;
>>           job_update_rc_locked(job);
>>       }
>>       return job->ret;
>> diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
>> index 7314cd813d..81c8fdadf8 100644
>> --- a/monitor/qmp-cmds.c
>> +++ b/monitor/qmp-cmds.c
>> @@ -135,8 +135,11 @@ void qmp_cont(Error **errp)
>>           blk_iostatus_reset(blk);
>>       }
>>   -    for (job = block_job_next(NULL); job; job = block_job_next(job)) {
>> -        block_job_iostatus_reset(job);
>> +    WITH_JOB_LOCK_GUARD() {
>> +        for (job = block_job_next_locked(NULL); job;
>> +             job = block_job_next_locked(job)) {
>> +            block_job_iostatus_reset_locked(job);
>> +        }
>>       }
>>         /* Continuing after completed migration. Images have been
>> inactivated to
>> diff --git a/qemu-img.c b/qemu-img.c
>> index 7d4b33b3da..c8ae704166 100644
>> --- a/qemu-img.c
>> +++ b/qemu-img.c
>> @@ -912,25 +912,30 @@ static void run_block_job(BlockJob *job, Error
>> **errp)
>>       int ret = 0;
>>         aio_context_acquire(aio_context);
>> -    job_ref(&job->job);
>> +    job_lock();
>> +    job_ref_locked(&job->job);
>>       do {
>>           float progress = 0.0f;
>> +        job_unlock();
>>           aio_poll(aio_context, true);
>>             progress_get_snapshot(&job->job.progress, &progress_current,
>> -                              &progress_total);
>> +                                &progress_total);
> 
> broken indentation
> 
>>           if (progress_total) {
>>               progress = (float)progress_current / progress_total *
>> 100.f;
>>           }
>>           qemu_progress_print(progress, 0);
>> -    } while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
>> +        job_lock();
>> +    } while (!job_is_ready_locked(&job->job) &&
>> +                !job_is_completed_locked(&job->job));
> 
> and here

Makes sense, I'll fix it.

Thank you,
Emanuele
> 
>>   -    if (!job_is_completed(&job->job)) {
>> -        ret = job_complete_sync(&job->job, errp);
>> +    if (!job_is_completed_locked(&job->job)) {
>> +        ret = job_complete_sync_locked(&job->job, errp);
>>       } else {
>>           ret = job->job.ret;
>>       }
>> -    job_unref(&job->job);
>> +    job_unref_locked(&job->job);
>> +    job_unlock();
>>       aio_context_release(aio_context);
>>         /* publish completion progress only when success */
> 
>