If the job->aio_context is accessed under job_mutex,
leave as it is. Otherwise use job_get_aio_context().
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
block/commit.c | 4 ++--
block/mirror.c | 2 +-
block/replication.c | 2 +-
blockjob.c | 18 +++++++++++-------
job.c | 8 ++++----
tests/unit/test-block-iothread.c | 6 +++---
6 files changed, 22 insertions(+), 18 deletions(-)
diff --git a/block/commit.c b/block/commit.c
index f639eb49c5..961b57edf0 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -369,7 +369,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
goto fail;
}
- s->base = blk_new(s->common.job.aio_context,
+ s->base = blk_new(job_get_aio_context(&s->common.job),
base_perms,
BLK_PERM_CONSISTENT_READ
| BLK_PERM_GRAPH_MOD
@@ -382,7 +382,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
s->base_bs = base;
/* Required permissions are already taken with block_job_add_bdrv() */
- s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL);
+ s->top = blk_new(job_get_aio_context(&s->common.job), 0, BLK_PERM_ALL);
ret = blk_insert_bs(s->top, top, errp);
if (ret < 0) {
goto fail;
diff --git a/block/mirror.c b/block/mirror.c
index 41450df55c..72b4367b4e 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1743,7 +1743,7 @@ static BlockJob *mirror_start_job(
target_perms |= BLK_PERM_GRAPH_MOD;
}
- s->target = blk_new(s->common.job.aio_context,
+ s->target = blk_new(job_get_aio_context(&s->common.job),
target_perms, target_shared_perms);
ret = blk_insert_bs(s->target, target, errp);
if (ret < 0) {
diff --git a/block/replication.c b/block/replication.c
index 50ea778937..68018948b9 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -148,8 +148,8 @@ static void replication_close(BlockDriverState *bs)
}
if (s->stage == BLOCK_REPLICATION_FAILOVER) {
commit_job = &s->commit_job->job;
- assert(commit_job->aio_context == qemu_get_current_aio_context());
WITH_JOB_LOCK_GUARD() {
+ assert(commit_job->aio_context == qemu_get_current_aio_context());
job_cancel_sync_locked(commit_job, false);
}
}
diff --git a/blockjob.c b/blockjob.c
index cf1f49f6c2..468ba735c5 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -155,14 +155,16 @@ static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx,
bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore);
}
- job->job.aio_context = ctx;
+ WITH_JOB_LOCK_GUARD() {
+ job->job.aio_context = ctx;
+ }
}
static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
{
BlockJob *job = c->opaque;
- return job->job.aio_context;
+ return job_get_aio_context(&job->job);
}
static const BdrvChildClass child_job = {
@@ -218,19 +220,21 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
{
BdrvChild *c;
bool need_context_ops;
+ AioContext *job_aiocontext;
assert(qemu_in_main_thread());
bdrv_ref(bs);
- need_context_ops = bdrv_get_aio_context(bs) != job->job.aio_context;
+ job_aiocontext = job_get_aio_context(&job->job);
+ need_context_ops = bdrv_get_aio_context(bs) != job_aiocontext;
- if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
- aio_context_release(job->job.aio_context);
+ if (need_context_ops && job_aiocontext != qemu_get_aio_context()) {
+ aio_context_release(job_aiocontext);
}
c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job,
errp);
- if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
- aio_context_acquire(job->job.aio_context);
+ if (need_context_ops && job_aiocontext != qemu_get_aio_context()) {
+ aio_context_acquire(job_aiocontext);
}
if (c == NULL) {
return -EPERM;
diff --git a/job.c b/job.c
index f16a4ef542..8a5b710d9b 100644
--- a/job.c
+++ b/job.c
@@ -566,7 +566,7 @@ void job_enter_cond_locked(Job *job, bool(*fn)(Job *job))
job->busy = true;
real_job_unlock();
job_unlock();
- aio_co_enter(job->aio_context, job->co);
+ aio_co_enter(job_get_aio_context(job), job->co);
job_lock();
}
@@ -1138,7 +1138,6 @@ static void coroutine_fn job_co_entry(void *opaque)
Job *job = opaque;
int ret;
- assert(job->aio_context == qemu_get_current_aio_context());
assert(job && job->driver && job->driver->run);
job_pause_point(job);
ret = job->driver->run(job, &job->err);
@@ -1177,7 +1176,7 @@ void job_start(Job *job)
job->paused = false;
job_state_transition_locked(job, JOB_STATUS_RUNNING);
}
- aio_co_enter(job->aio_context, job->co);
+ aio_co_enter(job_get_aio_context(job), job->co);
}
void job_cancel_locked(Job *job, bool force)
@@ -1303,7 +1302,8 @@ int job_finish_sync_locked(Job *job, void (*finish)(Job *, Error **errp),
}
job_unlock();
- AIO_WAIT_WHILE(job->aio_context, (job_enter(job), !job_is_completed(job)));
+ AIO_WAIT_WHILE(job_get_aio_context(job),
+ (job_enter(job), !job_is_completed(job)));
job_lock();
ret = (job_is_cancelled_locked(job) && job->ret == 0) ?
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index b9309beec2..addcb5846b 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -379,7 +379,7 @@ static int coroutine_fn test_job_run(Job *job, Error **errp)
job_transition_to_ready(&s->common.job);
while (!s->should_complete) {
s->n++;
- g_assert(qemu_get_current_aio_context() == job->aio_context);
+ g_assert(qemu_get_current_aio_context() == job_get_aio_context(job));
/* Avoid job_sleep_ns() because it marks the job as !busy. We want to
* emulate some actual activity (probably some I/O) here so that the
@@ -390,7 +390,7 @@ static int coroutine_fn test_job_run(Job *job, Error **errp)
job_pause_point(&s->common.job);
}
- g_assert(qemu_get_current_aio_context() == job->aio_context);
+ g_assert(qemu_get_current_aio_context() == job_get_aio_context(job));
return 0;
}
@@ -642,7 +642,7 @@ static void test_propagate_mirror(void)
g_assert(bdrv_get_aio_context(src) == ctx);
g_assert(bdrv_get_aio_context(target) == ctx);
g_assert(bdrv_get_aio_context(filter) == ctx);
- g_assert(job->aio_context == ctx);
+ g_assert(job_get_aio_context(job) == ctx);
/* Change the AioContext of target */
aio_context_acquire(ctx);
--
2.31.1
Getters such as job_get_aio_context are often wrong because the
AioContext can change immediately after returning.
So, I wonder if job.aio_context should be protected with a kind of "fake
rwlock": read under BQL or job_lock, write under BQL+job_lock. For this
to work, you can add an assertion for qemu_in_main_thread() to
child_job_set_aio_ctx, or even better have the assertion in a wrapper
API job_set_aio_context_locked().
And then, we can remove job_get_aio_context().
Let's look at all cases individually:
On 1/5/22 15:02, Emanuele Giuseppe Esposito wrote:
> diff --git a/block/commit.c b/block/commit.c
> index f639eb49c5..961b57edf0 100644
> --- a/block/commit.c
> +++ b/block/commit.c
> @@ -369,7 +369,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
> goto fail;
> }
>
> - s->base = blk_new(s->common.job.aio_context,
> + s->base = blk_new(job_get_aio_context(&s->common.job),
> base_perms,
> BLK_PERM_CONSISTENT_READ
> | BLK_PERM_GRAPH_MOD
Here the AioContext is the one of bs. It cannot change because we're
under BQL. Replace with bdrv_get_aio_context.
> @@ -382,7 +382,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
> s->base_bs = base;
>
> /* Required permissions are already taken with block_job_add_bdrv() */
> - s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL);
> + s->top = blk_new(job_get_aio_context(&s->common.job), 0, BLK_PERM_ALL);
> ret = blk_insert_bs(s->top, top, errp);
> if (ret < 0) {
> goto fail;
Same.
> diff --git a/block/mirror.c b/block/mirror.c
> index 41450df55c..72b4367b4e 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -1743,7 +1743,7 @@ static BlockJob *mirror_start_job(
> target_perms |= BLK_PERM_GRAPH_MOD;
> }
>
> - s->target = blk_new(s->common.job.aio_context,
> + s->target = blk_new(job_get_aio_context(&s->common.job),
> target_perms, target_shared_perms);
> ret = blk_insert_bs(s->target, target, errp);
> if (ret < 0) {
Same.
> diff --git a/block/replication.c b/block/replication.c
> index 50ea778937..68018948b9 100644
> --- a/block/replication.c
> +++ b/block/replication.c
> @@ -148,8 +148,8 @@ static void replication_close(BlockDriverState *bs)
> }
> if (s->stage == BLOCK_REPLICATION_FAILOVER) {
> commit_job = &s->commit_job->job;
> - assert(commit_job->aio_context == qemu_get_current_aio_context());
> WITH_JOB_LOCK_GUARD() {
> + assert(commit_job->aio_context == qemu_get_current_aio_context());
> job_cancel_sync_locked(commit_job, false);
> }
> }
Ok.
> diff --git a/blockjob.c b/blockjob.c
> index cf1f49f6c2..468ba735c5 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -155,14 +155,16 @@ static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx,
> bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore);
> }
>
> - job->job.aio_context = ctx;
> + WITH_JOB_LOCK_GUARD() {
> + job->job.aio_context = ctx;
> + }
> }
>
> static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
> {
> BlockJob *job = c->opaque;
>
> - return job->job.aio_context;
> + return job_get_aio_context(&job->job);
> }
>
> static const BdrvChildClass child_job = {
Both called with BQL held, I think.
> @@ -218,19 +220,21 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
> {
> BdrvChild *c;
> bool need_context_ops;
> + AioContext *job_aiocontext;
> assert(qemu_in_main_thread());
>
> bdrv_ref(bs);
>
> - need_context_ops = bdrv_get_aio_context(bs) != job->job.aio_context;
> + job_aiocontext = job_get_aio_context(&job->job);
> + need_context_ops = bdrv_get_aio_context(bs) != job_aiocontext;
>
> - if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
> - aio_context_release(job->job.aio_context);
> + if (need_context_ops && job_aiocontext != qemu_get_aio_context()) {
> + aio_context_release(job_aiocontext);
> }
> c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job,
> errp);
> - if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
> - aio_context_acquire(job->job.aio_context);
> + if (need_context_ops && job_aiocontext != qemu_get_aio_context()) {
> + aio_context_acquire(job_aiocontext);
> }
> if (c == NULL) {
> return -EPERM;
BQL held, too.
> diff --git a/job.c b/job.c
> index f16a4ef542..8a5b710d9b 100644
> --- a/job.c
> +++ b/job.c
> @@ -566,7 +566,7 @@ void job_enter_cond_locked(Job *job, bool(*fn)(Job *job))
> job->busy = true;
> real_job_unlock();
> job_unlock();
> - aio_co_enter(job->aio_context, job->co);
> + aio_co_enter(job_get_aio_context(job), job->co);
> job_lock();
> }
>
If you replace aio_co_enter with aio_co_schedule, you can call it
without dropping the lock. The difference being that aio_co_schedule
will always go through a bottom half.
> @@ -1138,7 +1138,6 @@ static void coroutine_fn job_co_entry(void *opaque)
> Job *job = opaque;
> int ret;
>
> - assert(job->aio_context == qemu_get_current_aio_context());
> assert(job && job->driver && job->driver->run);
> job_pause_point(job);
> ret = job->driver->run(job, &job->err);
> @@ -1177,7 +1176,7 @@ void job_start(Job *job)
> job->paused = false;
> job_state_transition_locked(job, JOB_STATUS_RUNNING);
> }
> - aio_co_enter(job->aio_context, job->co);
> + aio_co_enter(job_get_aio_context(job), job->co);
Better to use aio_co_schedule here, too, and move it under the previous
WITH_JOB_LOCK_GUARD.
> }
>
> void job_cancel_locked(Job *job, bool force)
> @@ -1303,7 +1302,8 @@ int job_finish_sync_locked(Job *job, void (*finish)(Job *, Error **errp),
> }
>
> job_unlock();
> - AIO_WAIT_WHILE(job->aio_context, (job_enter(job), !job_is_completed(job)));
> + AIO_WAIT_WHILE(job_get_aio_context(job),
> + (job_enter(job), !job_is_completed(job)));
> job_lock();
Here I think we are also holding the BQL, because this function is
"sync", so it's safe to use job->aio_context.
Paolo
On 19/01/2022 11:31, Paolo Bonzini wrote:
>> diff --git a/blockjob.c b/blockjob.c
>> index cf1f49f6c2..468ba735c5 100644
>> --- a/blockjob.c
>> +++ b/blockjob.c
>> @@ -155,14 +155,16 @@ static void child_job_set_aio_ctx(BdrvChild *c,
>> AioContext *ctx,
>> bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore);
>> }
>> - job->job.aio_context = ctx;
>> + WITH_JOB_LOCK_GUARD() {
>> + job->job.aio_context = ctx;
>> + }
>> }
>> static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
>> {
>> BlockJob *job = c->opaque;
>> - return job->job.aio_context;
>> + return job_get_aio_context(&job->job);
>> }
>> static const BdrvChildClass child_job = {
>
> Both called with BQL held, I think.
Yes, as their callbacks .get_parent_aio_context and .set_aio_context are
defined as GS functions in block_int-common.h
>
>> @@ -218,19 +220,21 @@ int block_job_add_bdrv(BlockJob *job, const char
>> *name, BlockDriverState *bs,
>> {
>> BdrvChild *c;
>> bool need_context_ops;
>> + AioContext *job_aiocontext;
>> assert(qemu_in_main_thread());
>> bdrv_ref(bs);
>> - need_context_ops = bdrv_get_aio_context(bs) != job->job.aio_context;
>> + job_aiocontext = job_get_aio_context(&job->job);
>> + need_context_ops = bdrv_get_aio_context(bs) != job_aiocontext;
>> - if (need_context_ops && job->job.aio_context !=
>> qemu_get_aio_context()) {
>> - aio_context_release(job->job.aio_context);
>> + if (need_context_ops && job_aiocontext != qemu_get_aio_context()) {
>> + aio_context_release(job_aiocontext);
>> }
>> c = bdrv_root_attach_child(bs, name, &child_job, 0, perm,
>> shared_perm, job,
>> errp);
>> - if (need_context_ops && job->job.aio_context !=
>> qemu_get_aio_context()) {
>> - aio_context_acquire(job->job.aio_context);
>> + if (need_context_ops && job_aiocontext != qemu_get_aio_context()) {
>> + aio_context_acquire(job_aiocontext);
>> }
>> if (c == NULL) {
>> return -EPERM;
>
> BQL held, too.
Wouldn't it be better to keep job_get_aio_context and implement like this:
AioContext *job_get_aio_context(Job *job)
{
/*
* Job AioContext can be written under BQL+job_mutex,
* but can be read with just the BQL held.
*/
assert(qemu_in_main_thread());
return job->aio_context;
}
and instead job_set_aio_context:
void job_set_aio_context(Job *job, AioContext *ctx)
{
JOB_LOCK_GUARD();
assert(qemu_in_main_thread());
job->aio_context = ctx;
}
(obviously implement also _locked version, if needed, and probably move
the comment in get_aio_context in job.h).
Thank you,
Emanuele
On 21/01/2022 13:33, Emanuele Giuseppe Esposito wrote:
>
>
> On 19/01/2022 11:31, Paolo Bonzini wrote:
>>> diff --git a/blockjob.c b/blockjob.c
>>> index cf1f49f6c2..468ba735c5 100644
>>> --- a/blockjob.c
>>> +++ b/blockjob.c
>>> @@ -155,14 +155,16 @@ static void child_job_set_aio_ctx(BdrvChild *c,
>>> AioContext *ctx,
>>> bdrv_set_aio_context_ignore(sibling->bs, ctx, ignore);
>>> }
>>> - job->job.aio_context = ctx;
>>> + WITH_JOB_LOCK_GUARD() {
>>> + job->job.aio_context = ctx;
>>> + }
>>> }
>>> static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
>>> {
>>> BlockJob *job = c->opaque;
>>> - return job->job.aio_context;
>>> + return job_get_aio_context(&job->job);
>>> }
>>> static const BdrvChildClass child_job = {
>>
>> Both called with BQL held, I think.
>
> Yes, as their callbacks .get_parent_aio_context and .set_aio_context are
> defined as GS functions in block_int-common.h
>>
>>> @@ -218,19 +220,21 @@ int block_job_add_bdrv(BlockJob *job, const
>>> char *name, BlockDriverState *bs,
>>> {
>>> BdrvChild *c;
>>> bool need_context_ops;
>>> + AioContext *job_aiocontext;
>>> assert(qemu_in_main_thread());
>>> bdrv_ref(bs);
>>> - need_context_ops = bdrv_get_aio_context(bs) !=
>>> job->job.aio_context;
>>> + job_aiocontext = job_get_aio_context(&job->job);
>>> + need_context_ops = bdrv_get_aio_context(bs) != job_aiocontext;
>>> - if (need_context_ops && job->job.aio_context !=
>>> qemu_get_aio_context()) {
>>> - aio_context_release(job->job.aio_context);
>>> + if (need_context_ops && job_aiocontext != qemu_get_aio_context()) {
>>> + aio_context_release(job_aiocontext);
>>> }
>>> c = bdrv_root_attach_child(bs, name, &child_job, 0, perm,
>>> shared_perm, job,
>>> errp);
>>> - if (need_context_ops && job->job.aio_context !=
>>> qemu_get_aio_context()) {
>>> - aio_context_acquire(job->job.aio_context);
>>> + if (need_context_ops && job_aiocontext != qemu_get_aio_context()) {
>>> + aio_context_acquire(job_aiocontext);
>>> }
>>> if (c == NULL) {
>>> return -EPERM;
>>
>> BQL held, too.
>
> Wouldn't it be better to keep job_get_aio_context and implement like this:
>
> AioContext *job_get_aio_context(Job *job)
> {
> /*
> * Job AioContext can be written under BQL+job_mutex,
> * but can be read with just the BQL held.
> */
> assert(qemu_in_main_thread());
> return job->aio_context;
> }
Uhm ok this one doesn't really work, because it's ok to read it under
either BQL or job lock. So I will get rid of job_get_aio_context, but
add job_set_aio_context (and use it in child_job_set_aio_ctx).
Emanuele
>
> and instead job_set_aio_context:
>
> void job_set_aio_context(Job *job, AioContext *ctx)
> {
> JOB_LOCK_GUARD();
> assert(qemu_in_main_thread());
> job->aio_context = ctx;
> }
>
> (obviously implement also _locked version, if needed, and probably move
> the comment in get_aio_context in job.h).
>
> Thank you,
> Emanuele
On 19/01/2022 11:31, Paolo Bonzini wrote: > >> diff --git a/job.c b/job.c >> index f16a4ef542..8a5b710d9b 100644 >> --- a/job.c >> +++ b/job.c >> @@ -566,7 +566,7 @@ void job_enter_cond_locked(Job *job, bool(*fn)(Job >> *job)) >> job->busy = true; >> real_job_unlock(); >> job_unlock(); >> - aio_co_enter(job->aio_context, job->co); >> + aio_co_enter(job_get_aio_context(job), job->co); >> job_lock(); >> } > > If you replace aio_co_enter with aio_co_schedule, you can call it > without dropping the lock. The difference being that aio_co_schedule > will always go through a bottom half. > >> @@ -1138,7 +1138,6 @@ static void coroutine_fn job_co_entry(void *opaque) >> Job *job = opaque; >> int ret; >> - assert(job->aio_context == qemu_get_current_aio_context()); >> assert(job && job->driver && job->driver->run); >> job_pause_point(job); >> ret = job->driver->run(job, &job->err); >> @@ -1177,7 +1176,7 @@ void job_start(Job *job) >> job->paused = false; >> job_state_transition_locked(job, JOB_STATUS_RUNNING); >> } >> - aio_co_enter(job->aio_context, job->co); >> + aio_co_enter(job_get_aio_context(job), job->co); > > Better to use aio_co_schedule here, too, and move it under the previous > WITH_JOB_LOCK_GUARD. Unfortunately this does not work straightforward: aio_co_enter invokes aio_co_schedule only if the context is different from the main loop, otherwise it can directly enter the coroutine with qemu_aio_coroutine_enter. So always replacing it with aio_co_schedule breaks the unit tests assumptions, as they expect that when control is returned the job has already executed. A possible solution is to aio_poll() on the condition we want to assert, waiting for the bh to be scheduled. But I don't know if this is then useful to test something. Thank you, Emanuele
On 1/21/22 16:18, Emanuele Giuseppe Esposito wrote: >>> >> >> Better to use aio_co_schedule here, too, and move it under the >> previous WITH_JOB_LOCK_GUARD. > > Unfortunately this does not work straightforward: aio_co_enter invokes > aio_co_schedule only if the context is different from the main loop, > otherwise it can directly enter the coroutine with > qemu_aio_coroutine_enter. So always replacing it with aio_co_schedule > breaks the unit tests assumptions, as they expect that when control is > returned the job has already executed. > > A possible solution is to aio_poll() on the condition we want to assert, > waiting for the bh to be scheduled. But I don't know if this is then > useful to test something. I think you sorted that out, based on IRC conversation? Paolo
On 24/01/2022 15:22, Paolo Bonzini wrote: > On 1/21/22 16:18, Emanuele Giuseppe Esposito wrote: >>>> >>> >>> Better to use aio_co_schedule here, too, and move it under the >>> previous WITH_JOB_LOCK_GUARD. >> >> Unfortunately this does not work straightforward: aio_co_enter invokes >> aio_co_schedule only if the context is different from the main loop, >> otherwise it can directly enter the coroutine with >> qemu_aio_coroutine_enter. So always replacing it with aio_co_schedule >> breaks the unit tests assumptions, as they expect that when control is >> returned the job has already executed. >> >> A possible solution is to aio_poll() on the condition we want to >> assert, waiting for the bh to be scheduled. But I don't know if this >> is then useful to test something. > > I think you sorted that out, based on IRC conversation? > Yes. Thank you, Emanuele
© 2016 - 2026 Red Hat, Inc.