1
The following changes since commit 506e4a00de01e0b29fa83db5cbbc3d154253b4ea:
1
The following changes since commit 8bac3ba57eecc466b7e73dabf7d19328a59f684e:
2
2
3
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-3.1-20180925' into staging (2018-09-25 13:30:45 +0100)
3
Merge remote-tracking branch 'remotes/rth/tags/pull-rx-20200408' into staging (2020-04-09 13:23:30 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://git.xanclic.moe/XanClic/qemu.git tags/pull-block-2018-09-25
7
https://github.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 9c76ff9c16be890e70fce30754b096ff9950d1ee:
9
for you to fetch changes up to 5710a3e09f9b85801e5ce70797a4a511e5fc9e2c:
10
10
11
Merge remote-tracking branch 'kevin/tags/for-upstream' into block (2018-09-25 16:12:44 +0200)
11
async: use explicit memory barriers (2020-04-09 16:17:14 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches:
14
Pull request
15
- Drain fixes
15
16
- node-name parameters for block-commit
16
Fixes for QEMU on aarch64 ARM hosts and fdmon-io_uring.
17
- Refactor block jobs to use transactional callbacks for exiting
18
17
19
----------------------------------------------------------------
18
----------------------------------------------------------------
20
Alberto Garcia (2):
21
block: Fix use after free error in bdrv_open_inherit()
22
qemu-iotests: Test snapshot=on with nonexistent TMPDIR
23
19
24
Fam Zheng (1):
20
Paolo Bonzini (2):
25
job: Fix nested aio_poll() hanging in job_txn_apply
21
aio-wait: delegate polling of main AioContext if BQL not held
22
async: use explicit memory barriers
26
23
27
John Snow (16):
24
Stefan Hajnoczi (1):
28
block/commit: add block job creation flags
25
aio-posix: signal-proof fdmon-io_uring
29
block/mirror: add block job creation flags
30
block/stream: add block job creation flags
31
block/commit: refactor commit to use job callbacks
32
block/mirror: don't install backing chain on abort
33
block/mirror: conservative mirror_exit refactor
34
block/stream: refactor stream to use job callbacks
35
tests/blockjob: replace Blockjob with Job
36
tests/test-blockjob: remove exit callback
37
tests/test-blockjob-txn: move .exit to .clean
38
jobs: remove .exit callback
39
qapi/block-commit: expose new job properties
40
qapi/block-mirror: expose new job properties
41
qapi/block-stream: expose new job properties
42
block/backup: qapi documentation fixup
43
blockdev: document transactional shortcomings
44
26
45
Kevin Wolf (21):
27
include/block/aio-wait.h | 22 ++++++++++++++++++++++
46
commit: Add top-node/base-node options
28
include/block/aio.h | 29 ++++++++++-------------------
47
qemu-iotests: Test commit with top-node/base-node
29
util/aio-posix.c | 16 ++++++++++++++--
48
job: Fix missing locking due to mismerge
30
util/aio-win32.c | 17 ++++++++++++++---
49
blockjob: Wake up BDS when job becomes idle
31
util/async.c | 16 ++++++++++++----
50
aio-wait: Increase num_waiters even in home thread
32
util/fdmon-io_uring.c | 10 ++++++++--
51
test-bdrv-drain: Drain with block jobs in an I/O thread
33
6 files changed, 80 insertions(+), 30 deletions(-)
52
test-blockjob: Acquire AioContext around job_cancel_sync()
53
job: Use AIO_WAIT_WHILE() in job_finish_sync()
54
test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback
55
block: Add missing locking in bdrv_co_drain_bh_cb()
56
block-backend: Add .drained_poll callback
57
block-backend: Fix potential double blk_delete()
58
block-backend: Decrease in_flight only after callback
59
blockjob: Lie better in child_job_drained_poll()
60
block: Remove aio_poll() in bdrv_drain_poll variants
61
test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level()
62
job: Avoid deadlocks in job_completed_txn_abort()
63
test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort
64
test-bdrv-drain: Fix outdated comments
65
block: Use a single global AioWait
66
test-bdrv-drain: Test draining job source child and parent
67
68
Max Reitz (1):
69
Merge remote-tracking branch 'kevin/tags/for-upstream' into block
70
71
Sergio Lopez (2):
72
block/linux-aio: acquire AioContext before qemu_laio_process_completions
73
util/async: use qemu_aio_coroutine_enter in co_schedule_bh_cb
74
75
qapi/block-core.json | 104 ++++++++++++---
76
include/block/aio-wait.h | 28 ++--
77
include/block/block.h | 6 +-
78
include/block/block_int.h | 18 ++-
79
include/block/blockjob.h | 3 +
80
include/qemu/coroutine.h | 5 +
81
include/qemu/job.h | 23 ++--
82
block.c | 6 +-
83
block/block-backend.c | 31 +++--
84
block/commit.c | 97 ++++++++------
85
block/io.c | 30 +++--
86
block/linux-aio.c | 2 +-
87
block/mirror.c | 49 +++++--
88
block/stream.c | 28 ++--
89
blockdev.c | 84 ++++++++++--
90
blockjob.c | 9 +-
91
hmp.c | 5 +-
92
job.c | 144 +++++++++++----------
93
tests/test-bdrv-drain.c | 294 +++++++++++++++++++++++++++++++++++++++---
94
tests/test-blockjob-txn.c | 4 +-
95
tests/test-blockjob.c | 120 ++++++++---------
96
util/aio-wait.c | 11 +-
97
util/async.c | 2 +-
98
util/qemu-coroutine.c | 5 +
99
tests/qemu-iotests/040 | 52 +++++++-
100
tests/qemu-iotests/040.out | 4 +-
101
tests/qemu-iotests/051 | 3 +
102
tests/qemu-iotests/051.out | 3 +
103
tests/qemu-iotests/051.pc.out | 3 +
104
29 files changed, 856 insertions(+), 317 deletions(-)
105
34
106
--
35
--
107
2.17.1
36
2.25.1
108
37
109
diff view generated by jsdifflib
1
From: John Snow <jsnow@redhat.com>
1
The io_uring_enter(2) syscall returns with errno=EINTR when interrupted
2
by a signal. Retry the syscall in this case.
2
3
3
Add support for taking and passing forward job creation flags.
4
It's essential to do this in the io_uring_submit_and_wait() case. My
5
interpretation of the Linux v5.5 io_uring_enter(2) code is that it
6
shouldn't affect the io_uring_submit() case, but there is no guarantee
7
this will always be the case. Let's check for -EINTR around both APIs.
4
8
5
Signed-off-by: John Snow <jsnow@redhat.com>
9
Note that the liburing APIs have -errno return values.
6
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
7
Reviewed-by: Jeff Cody <jcody@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20180906130225.5118-2-jsnow@redhat.com
12
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
13
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
14
Message-id: 20200408091139.273851-1-stefanha@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
16
---
11
include/block/block_int.h | 5 ++++-
17
util/fdmon-io_uring.c | 10 ++++++++--
12
block/commit.c | 5 +++--
18
1 file changed, 8 insertions(+), 2 deletions(-)
13
blockdev.c | 7 ++++---
14
3 files changed, 11 insertions(+), 6 deletions(-)
15
19
16
diff --git a/include/block/block_int.h b/include/block/block_int.h
20
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
17
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
18
--- a/include/block/block_int.h
22
--- a/util/fdmon-io_uring.c
19
+++ b/include/block/block_int.h
23
+++ b/util/fdmon-io_uring.c
20
@@ -XXX,XX +XXX,XX @@ void stream_start(const char *job_id, BlockDriverState *bs,
24
@@ -XXX,XX +XXX,XX @@ static struct io_uring_sqe *get_sqe(AioContext *ctx)
21
* @bs: Active block device.
22
* @top: Top block device to be committed.
23
* @base: Block device that will be written into, and become the new top.
24
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
25
+ * See @BlockJobCreateFlags
26
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
27
* @on_error: The action to take upon error.
28
* @backing_file_str: String to use as the backing file in @top's overlay
29
@@ -XXX,XX +XXX,XX @@ void stream_start(const char *job_id, BlockDriverState *bs,
30
*
31
*/
32
void commit_start(const char *job_id, BlockDriverState *bs,
33
- BlockDriverState *base, BlockDriverState *top, int64_t speed,
34
+ BlockDriverState *base, BlockDriverState *top,
35
+ int creation_flags, int64_t speed,
36
BlockdevOnError on_error, const char *backing_file_str,
37
const char *filter_node_name, Error **errp);
38
/**
39
diff --git a/block/commit.c b/block/commit.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/block/commit.c
42
+++ b/block/commit.c
43
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_commit_top = {
44
};
45
46
void commit_start(const char *job_id, BlockDriverState *bs,
47
- BlockDriverState *base, BlockDriverState *top, int64_t speed,
48
+ BlockDriverState *base, BlockDriverState *top,
49
+ int creation_flags, int64_t speed,
50
BlockdevOnError on_error, const char *backing_file_str,
51
const char *filter_node_name, Error **errp)
52
{
53
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
54
}
25
}
55
26
56
s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
27
/* No free sqes left, submit pending sqes first */
57
- speed, JOB_DEFAULT, NULL, NULL, errp);
28
- ret = io_uring_submit(ring);
58
+ speed, creation_flags, NULL, NULL, errp);
29
+ do {
59
if (!s) {
30
+ ret = io_uring_submit(ring);
60
return;
31
+ } while (ret == -EINTR);
61
}
32
+
62
diff --git a/blockdev.c b/blockdev.c
33
assert(ret > 1);
63
index XXXXXXX..XXXXXXX 100644
34
sqe = io_uring_get_sqe(ring);
64
--- a/blockdev.c
35
assert(sqe);
65
+++ b/blockdev.c
36
@@ -XXX,XX +XXX,XX @@ static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list,
66
@@ -XXX,XX +XXX,XX @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
37
67
* BlockdevOnError change for blkmirror makes it in
38
fill_sq_ring(ctx);
68
*/
39
69
BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;
40
- ret = io_uring_submit_and_wait(&ctx->fdmon_io_uring, wait_nr);
70
+ int job_flags = JOB_DEFAULT;
41
+ do {
71
42
+ ret = io_uring_submit_and_wait(&ctx->fdmon_io_uring, wait_nr);
72
if (!has_speed) {
43
+ } while (ret == -EINTR);
73
speed = 0;
44
+
74
@@ -XXX,XX +XXX,XX @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
45
assert(ret >= 0);
75
goto out;
46
76
}
47
return process_cq_ring(ctx, ready_list);
77
commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
78
- JOB_DEFAULT, speed, on_error,
79
+ job_flags, speed, on_error,
80
filter_node_name, NULL, NULL, false, &local_err);
81
} else {
82
BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
83
if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
84
goto out;
85
}
86
- commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
87
- on_error, has_backing_file ? backing_file : NULL,
88
+ commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, job_flags,
89
+ speed, on_error, has_backing_file ? backing_file : NULL,
90
filter_node_name, &local_err);
91
}
92
if (local_err != NULL) {
93
--
48
--
94
2.17.1
49
2.25.1
95
50
96
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
Add support for taking and passing forward job creation flags.
4
5
Signed-off-by: John Snow <jsnow@redhat.com>
6
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Reviewed-by: Jeff Cody <jcody@redhat.com>
8
Message-id: 20180906130225.5118-3-jsnow@redhat.com
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
11
include/block/block_int.h | 5 ++++-
12
block/mirror.c | 5 +++--
13
blockdev.c | 3 ++-
14
3 files changed, 9 insertions(+), 4 deletions(-)
15
16
diff --git a/include/block/block_int.h b/include/block/block_int.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/block/block_int.h
19
+++ b/include/block/block_int.h
20
@@ -XXX,XX +XXX,XX @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
21
* @target: Block device to write to.
22
* @replaces: Block graph node name to replace once the mirror is done. Can
23
* only be used when full mirroring is selected.
24
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
25
+ * See @BlockJobCreateFlags
26
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
27
* @granularity: The chosen granularity for the dirty bitmap.
28
* @buf_size: The amount of data that can be in flight at one time.
29
@@ -XXX,XX +XXX,XX @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
30
*/
31
void mirror_start(const char *job_id, BlockDriverState *bs,
32
BlockDriverState *target, const char *replaces,
33
- int64_t speed, uint32_t granularity, int64_t buf_size,
34
+ int creation_flags, int64_t speed,
35
+ uint32_t granularity, int64_t buf_size,
36
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
37
BlockdevOnError on_source_error,
38
BlockdevOnError on_target_error,
39
diff --git a/block/mirror.c b/block/mirror.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/block/mirror.c
42
+++ b/block/mirror.c
43
@@ -XXX,XX +XXX,XX @@ fail:
44
45
void mirror_start(const char *job_id, BlockDriverState *bs,
46
BlockDriverState *target, const char *replaces,
47
- int64_t speed, uint32_t granularity, int64_t buf_size,
48
+ int creation_flags, int64_t speed,
49
+ uint32_t granularity, int64_t buf_size,
50
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
51
BlockdevOnError on_source_error,
52
BlockdevOnError on_target_error,
53
@@ -XXX,XX +XXX,XX @@ void mirror_start(const char *job_id, BlockDriverState *bs,
54
}
55
is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
56
base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
57
- mirror_start_job(job_id, bs, JOB_DEFAULT, target, replaces,
58
+ mirror_start_job(job_id, bs, creation_flags, target, replaces,
59
speed, granularity, buf_size, backing_mode,
60
on_source_error, on_target_error, unmap, NULL, NULL,
61
&mirror_job_driver, is_none_mode, base, false,
62
diff --git a/blockdev.c b/blockdev.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/blockdev.c
65
+++ b/blockdev.c
66
@@ -XXX,XX +XXX,XX @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
67
bool has_copy_mode, MirrorCopyMode copy_mode,
68
Error **errp)
69
{
70
+ int job_flags = JOB_DEFAULT;
71
72
if (!has_speed) {
73
speed = 0;
74
@@ -XXX,XX +XXX,XX @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
75
* and will allow to check whether the node still exist at mirror completion
76
*/
77
mirror_start(job_id, bs, target,
78
- has_replaces ? replaces : NULL,
79
+ has_replaces ? replaces : NULL, job_flags,
80
speed, granularity, buf_size, sync, backing_mode,
81
on_source_error, on_target_error, unmap, filter_node_name,
82
copy_mode, errp);
83
--
84
2.17.1
85
86
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
Add support for taking and passing forward job creation flags.
4
5
Signed-off-by: John Snow <jsnow@redhat.com>
6
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Reviewed-by: Jeff Cody <jcody@redhat.com>
8
Message-id: 20180906130225.5118-4-jsnow@redhat.com
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
11
include/block/block_int.h | 5 ++++-
12
block/stream.c | 5 +++--
13
blockdev.c | 3 ++-
14
3 files changed, 9 insertions(+), 4 deletions(-)
15
16
diff --git a/include/block/block_int.h b/include/block/block_int.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/block/block_int.h
19
+++ b/include/block/block_int.h
20
@@ -XXX,XX +XXX,XX @@ int is_windows_drive(const char *filename);
21
* flatten the whole backing file chain onto @bs.
22
* @backing_file_str: The file name that will be written to @bs as the
23
* the new backing file if the job completes. Ignored if @base is %NULL.
24
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
25
+ * See @BlockJobCreateFlags
26
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
27
* @on_error: The action to take upon error.
28
* @errp: Error object.
29
@@ -XXX,XX +XXX,XX @@ int is_windows_drive(const char *filename);
30
*/
31
void stream_start(const char *job_id, BlockDriverState *bs,
32
BlockDriverState *base, const char *backing_file_str,
33
- int64_t speed, BlockdevOnError on_error, Error **errp);
34
+ int creation_flags, int64_t speed,
35
+ BlockdevOnError on_error, Error **errp);
36
37
/**
38
* commit_start:
39
diff --git a/block/stream.c b/block/stream.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/block/stream.c
42
+++ b/block/stream.c
43
@@ -XXX,XX +XXX,XX @@ static const BlockJobDriver stream_job_driver = {
44
45
void stream_start(const char *job_id, BlockDriverState *bs,
46
BlockDriverState *base, const char *backing_file_str,
47
- int64_t speed, BlockdevOnError on_error, Error **errp)
48
+ int creation_flags, int64_t speed,
49
+ BlockdevOnError on_error, Error **errp)
50
{
51
StreamBlockJob *s;
52
BlockDriverState *iter;
53
@@ -XXX,XX +XXX,XX @@ void stream_start(const char *job_id, BlockDriverState *bs,
54
BLK_PERM_GRAPH_MOD,
55
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
56
BLK_PERM_WRITE,
57
- speed, JOB_DEFAULT, NULL, NULL, errp);
58
+ speed, creation_flags, NULL, NULL, errp);
59
if (!s) {
60
goto fail;
61
}
62
diff --git a/blockdev.c b/blockdev.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/blockdev.c
65
+++ b/blockdev.c
66
@@ -XXX,XX +XXX,XX @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
67
AioContext *aio_context;
68
Error *local_err = NULL;
69
const char *base_name = NULL;
70
+ int job_flags = JOB_DEFAULT;
71
72
if (!has_on_error) {
73
on_error = BLOCKDEV_ON_ERROR_REPORT;
74
@@ -XXX,XX +XXX,XX @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
75
base_name = has_backing_file ? backing_file : base_name;
76
77
stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
78
- has_speed ? speed : 0, on_error, &local_err);
79
+ job_flags, has_speed ? speed : 0, on_error, &local_err);
80
if (local_err) {
81
error_propagate(errp, local_err);
82
goto out;
83
--
84
2.17.1
85
86
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
Use the component callbacks; prepare, abort, and clean.
4
5
NB: prepare is only called when the job has not yet failed;
6
and abort can be called after prepare.
7
8
complete -> prepare -> abort -> clean
9
complete -> abort -> clean
10
11
During refactor, a potential problem with bdrv_drop_intermediate
12
was identified, the patched behavior is no worse than the pre-patch
13
behavior, so leave a FIXME for now to be fixed in a future patch.
14
15
Signed-off-by: John Snow <jsnow@redhat.com>
16
Reviewed-by: Max Reitz <mreitz@redhat.com>
17
Message-id: 20180906130225.5118-5-jsnow@redhat.com
18
Reviewed-by: Jeff Cody <jcody@redhat.com>
19
Signed-off-by: Max Reitz <mreitz@redhat.com>
20
---
21
block/commit.c | 92 ++++++++++++++++++++++++++++----------------------
22
1 file changed, 51 insertions(+), 41 deletions(-)
23
24
diff --git a/block/commit.c b/block/commit.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/block/commit.c
27
+++ b/block/commit.c
28
@@ -XXX,XX +XXX,XX @@ typedef struct CommitBlockJob {
29
BlockDriverState *commit_top_bs;
30
BlockBackend *top;
31
BlockBackend *base;
32
+ BlockDriverState *base_bs;
33
BlockdevOnError on_error;
34
int base_flags;
35
char *backing_file_str;
36
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
37
return 0;
38
}
39
40
-static void commit_exit(Job *job)
41
+static int commit_prepare(Job *job)
42
{
43
CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
44
- BlockJob *bjob = &s->common;
45
- BlockDriverState *top = blk_bs(s->top);
46
- BlockDriverState *base = blk_bs(s->base);
47
- BlockDriverState *commit_top_bs = s->commit_top_bs;
48
- bool remove_commit_top_bs = false;
49
-
50
- /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
51
- bdrv_ref(top);
52
- bdrv_ref(commit_top_bs);
53
54
/* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
55
* the normal backing chain can be restored. */
56
blk_unref(s->base);
57
+ s->base = NULL;
58
+
59
+ /* FIXME: bdrv_drop_intermediate treats total failures and partial failures
60
+ * identically. Further work is needed to disambiguate these cases. */
61
+ return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
62
+ s->backing_file_str);
63
+}
64
65
- if (!job_is_cancelled(job) && job->ret == 0) {
66
- /* success */
67
- job->ret = bdrv_drop_intermediate(s->commit_top_bs, base,
68
- s->backing_file_str);
69
- } else {
70
- /* XXX Can (or should) we somehow keep 'consistent read' blocked even
71
- * after the failed/cancelled commit job is gone? If we already wrote
72
- * something to base, the intermediate images aren't valid any more. */
73
- remove_commit_top_bs = true;
74
+static void commit_abort(Job *job)
75
+{
76
+ CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
77
+ BlockDriverState *top_bs = blk_bs(s->top);
78
+
79
+ /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
80
+ bdrv_ref(top_bs);
81
+ bdrv_ref(s->commit_top_bs);
82
+
83
+ if (s->base) {
84
+ blk_unref(s->base);
85
}
86
87
+ /* free the blockers on the intermediate nodes so that bdrv_replace_nodes
88
+ * can succeed */
89
+ block_job_remove_all_bdrv(&s->common);
90
+
91
+ /* If bdrv_drop_intermediate() failed (or was not invoked), remove the
92
+ * commit filter driver from the backing chain now. Do this as the final
93
+ * step so that the 'consistent read' permission can be granted.
94
+ *
95
+ * XXX Can (or should) we somehow keep 'consistent read' blocked even
96
+ * after the failed/cancelled commit job is gone? If we already wrote
97
+ * something to base, the intermediate images aren't valid any more. */
98
+ bdrv_child_try_set_perm(s->commit_top_bs->backing, 0, BLK_PERM_ALL,
99
+ &error_abort);
100
+ bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs),
101
+ &error_abort);
102
+
103
+ bdrv_unref(s->commit_top_bs);
104
+ bdrv_unref(top_bs);
105
+}
106
+
107
+static void commit_clean(Job *job)
108
+{
109
+ CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
110
+
111
/* restore base open flags here if appropriate (e.g., change the base back
112
* to r/o). These reopens do not need to be atomic, since we won't abort
113
* even on failure here */
114
- if (s->base_flags != bdrv_get_flags(base)) {
115
- bdrv_reopen(base, s->base_flags, NULL);
116
+ if (s->base_flags != bdrv_get_flags(s->base_bs)) {
117
+ bdrv_reopen(s->base_bs, s->base_flags, NULL);
118
}
119
+
120
g_free(s->backing_file_str);
121
blk_unref(s->top);
122
-
123
- /* If there is more than one reference to the job (e.g. if called from
124
- * job_finish_sync()), job_completed() won't free it and therefore the
125
- * blockers on the intermediate nodes remain. This would cause
126
- * bdrv_set_backing_hd() to fail. */
127
- block_job_remove_all_bdrv(bjob);
128
-
129
- /* If bdrv_drop_intermediate() didn't already do that, remove the commit
130
- * filter driver from the backing chain. Do this as the final step so that
131
- * the 'consistent read' permission can be granted. */
132
- if (remove_commit_top_bs) {
133
- bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL,
134
- &error_abort);
135
- bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs),
136
- &error_abort);
137
- }
138
-
139
- bdrv_unref(commit_top_bs);
140
- bdrv_unref(top);
141
}
142
143
static int coroutine_fn commit_run(Job *job, Error **errp)
144
@@ -XXX,XX +XXX,XX @@ static const BlockJobDriver commit_job_driver = {
145
.user_resume = block_job_user_resume,
146
.drain = block_job_drain,
147
.run = commit_run,
148
- .exit = commit_exit,
149
+ .prepare = commit_prepare,
150
+ .abort = commit_abort,
151
+ .clean = commit_clean
152
},
153
};
154
155
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
156
if (ret < 0) {
157
goto fail;
158
}
159
+ s->base_bs = base;
160
161
/* Required permissions are already taken with block_job_add_bdrv() */
162
s->top = blk_new(0, BLK_PERM_ALL);
163
--
164
2.17.1
165
166
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
In cases where we abort the block/mirror job, there's no point in
4
installing the new backing chain before we finish aborting.
5
6
Signed-off-by: John Snow <jsnow@redhat.com>
7
Message-id: 20180906130225.5118-6-jsnow@redhat.com
8
Reviewed-by: Jeff Cody <jcody@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
12
block/mirror.c | 2 +-
13
1 file changed, 1 insertion(+), 1 deletion(-)
14
15
diff --git a/block/mirror.c b/block/mirror.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/mirror.c
18
+++ b/block/mirror.c
19
@@ -XXX,XX +XXX,XX @@ static void mirror_exit(Job *job)
20
* required before it could become a backing file of target_bs. */
21
bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
22
&error_abort);
23
- if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
24
+ if (ret == 0 && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
25
BlockDriverState *backing = s->is_none_mode ? src : s->base;
26
if (backing_bs(target_bs) != backing) {
27
bdrv_set_backing_hd(target_bs, backing, &local_err);
28
--
29
2.17.1
30
31
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
For purposes of minimum code movement, refactor the mirror_exit
4
callback to use the post-finalization callbacks in a trivial way.
5
6
Signed-off-by: John Snow <jsnow@redhat.com>
7
Message-id: 20180906130225.5118-7-jsnow@redhat.com
8
Reviewed-by: Jeff Cody <jcody@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
[mreitz: Added comment for the mirror_exit() function]
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
---
13
block/mirror.c | 44 +++++++++++++++++++++++++++++++++-----------
14
1 file changed, 33 insertions(+), 11 deletions(-)
15
16
diff --git a/block/mirror.c b/block/mirror.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/mirror.c
19
+++ b/block/mirror.c
20
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorBlockJob {
21
int max_iov;
22
bool initial_zeroing_ongoing;
23
int in_active_write_counter;
24
+ bool prepared;
25
} MirrorBlockJob;
26
27
typedef struct MirrorBDSOpaque {
28
@@ -XXX,XX +XXX,XX @@ static void mirror_wait_for_all_io(MirrorBlockJob *s)
29
}
30
}
31
32
-static void mirror_exit(Job *job)
33
+/**
34
+ * mirror_exit_common: handle both abort() and prepare() cases.
35
+ * for .prepare, returns 0 on success and -errno on failure.
36
+ * for .abort cases, denoted by abort = true, MUST return 0.
37
+ */
38
+static int mirror_exit_common(Job *job)
39
{
40
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
41
BlockJob *bjob = &s->common;
42
@@ -XXX,XX +XXX,XX @@ static void mirror_exit(Job *job)
43
BlockDriverState *target_bs = blk_bs(s->target);
44
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
45
Error *local_err = NULL;
46
- int ret = job->ret;
47
+ bool abort = job->ret < 0;
48
+ int ret = 0;
49
+
50
+ if (s->prepared) {
51
+ return 0;
52
+ }
53
+ s->prepared = true;
54
55
bdrv_release_dirty_bitmap(src, s->dirty_bitmap);
56
57
@@ -XXX,XX +XXX,XX @@ static void mirror_exit(Job *job)
58
* required before it could become a backing file of target_bs. */
59
bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
60
&error_abort);
61
- if (ret == 0 && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
62
+ if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
63
BlockDriverState *backing = s->is_none_mode ? src : s->base;
64
if (backing_bs(target_bs) != backing) {
65
bdrv_set_backing_hd(target_bs, backing, &local_err);
66
@@ -XXX,XX +XXX,XX @@ static void mirror_exit(Job *job)
67
aio_context_acquire(replace_aio_context);
68
}
69
70
- if (s->should_complete && ret == 0) {
71
- BlockDriverState *to_replace = src;
72
- if (s->to_replace) {
73
- to_replace = s->to_replace;
74
- }
75
+ if (s->should_complete && !abort) {
76
+ BlockDriverState *to_replace = s->to_replace ?: src;
77
78
if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) {
79
bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL);
80
@@ -XXX,XX +XXX,XX @@ static void mirror_exit(Job *job)
81
bdrv_unref(mirror_top_bs);
82
bdrv_unref(src);
83
84
- job->ret = ret;
85
+ return ret;
86
+}
87
+
88
+static int mirror_prepare(Job *job)
89
+{
90
+ return mirror_exit_common(job);
91
+}
92
+
93
+static void mirror_abort(Job *job)
94
+{
95
+ int ret = mirror_exit_common(job);
96
+ assert(ret == 0);
97
}
98
99
static void mirror_throttle(MirrorBlockJob *s)
100
@@ -XXX,XX +XXX,XX @@ static const BlockJobDriver mirror_job_driver = {
101
.user_resume = block_job_user_resume,
102
.drain = block_job_drain,
103
.run = mirror_run,
104
- .exit = mirror_exit,
105
+ .prepare = mirror_prepare,
106
+ .abort = mirror_abort,
107
.pause = mirror_pause,
108
.complete = mirror_complete,
109
},
110
@@ -XXX,XX +XXX,XX @@ static const BlockJobDriver commit_active_job_driver = {
111
.user_resume = block_job_user_resume,
112
.drain = block_job_drain,
113
.run = mirror_run,
114
- .exit = mirror_exit,
115
+ .prepare = mirror_prepare,
116
+ .abort = mirror_abort,
117
.pause = mirror_pause,
118
.complete = mirror_complete,
119
},
120
--
121
2.17.1
122
123
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
Signed-off-by: John Snow <jsnow@redhat.com>
4
Reviewed-by: Max Reitz <mreitz@redhat.com>
5
Message-id: 20180906130225.5118-8-jsnow@redhat.com
6
Reviewed-by: Jeff Cody <jcody@redhat.com>
7
Signed-off-by: Max Reitz <mreitz@redhat.com>
8
---
9
block/stream.c | 23 +++++++++++++++--------
10
1 file changed, 15 insertions(+), 8 deletions(-)
11
12
diff --git a/block/stream.c b/block/stream.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/block/stream.c
15
+++ b/block/stream.c
16
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn stream_populate(BlockBackend *blk,
17
return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
18
}
19
20
-static void stream_exit(Job *job)
21
+static int stream_prepare(Job *job)
22
{
23
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
24
BlockJob *bjob = &s->common;
25
BlockDriverState *bs = blk_bs(bjob->blk);
26
BlockDriverState *base = s->base;
27
Error *local_err = NULL;
28
- int ret = job->ret;
29
+ int ret = 0;
30
31
- if (!job_is_cancelled(job) && bs->backing && ret == 0) {
32
+ if (bs->backing) {
33
const char *base_id = NULL, *base_fmt = NULL;
34
if (base) {
35
base_id = s->backing_file_str;
36
@@ -XXX,XX +XXX,XX @@ static void stream_exit(Job *job)
37
bdrv_set_backing_hd(bs, base, &local_err);
38
if (local_err) {
39
error_report_err(local_err);
40
- ret = -EPERM;
41
- goto out;
42
+ return -EPERM;
43
}
44
}
45
46
-out:
47
+ return ret;
48
+}
49
+
50
+static void stream_clean(Job *job)
51
+{
52
+ StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
53
+ BlockJob *bjob = &s->common;
54
+ BlockDriverState *bs = blk_bs(bjob->blk);
55
+
56
/* Reopen the image back in read-only mode if necessary */
57
if (s->bs_flags != bdrv_get_flags(bs)) {
58
/* Give up write permissions before making it read-only */
59
@@ -XXX,XX +XXX,XX @@ out:
60
}
61
62
g_free(s->backing_file_str);
63
- job->ret = ret;
64
}
65
66
static int coroutine_fn stream_run(Job *job, Error **errp)
67
@@ -XXX,XX +XXX,XX @@ static const BlockJobDriver stream_job_driver = {
68
.job_type = JOB_TYPE_STREAM,
69
.free = block_job_free,
70
.run = stream_run,
71
- .exit = stream_exit,
72
+ .prepare = stream_prepare,
73
+ .clean = stream_clean,
74
.user_resume = block_job_user_resume,
75
.drain = block_job_drain,
76
},
77
--
78
2.17.1
79
80
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
These tests don't actually test blockjobs anymore, they test
4
generic Job lifetimes. Change the types accordingly.
5
6
Signed-off-by: John Snow <jsnow@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Message-id: 20180906130225.5118-9-jsnow@redhat.com
9
Reviewed-by: Jeff Cody <jcody@redhat.com>
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
12
tests/test-blockjob.c | 98 ++++++++++++++++++++++---------------------
13
1 file changed, 50 insertions(+), 48 deletions(-)
14
15
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tests/test-blockjob.c
18
+++ b/tests/test-blockjob.c
19
@@ -XXX,XX +XXX,XX @@ static const BlockJobDriver test_cancel_driver = {
20
},
21
};
22
23
-static CancelJob *create_common(BlockJob **pjob)
24
+static CancelJob *create_common(Job **pjob)
25
{
26
BlockBackend *blk;
27
- BlockJob *job;
28
+ Job *job;
29
+ BlockJob *bjob;
30
CancelJob *s;
31
32
blk = create_blk(NULL);
33
- job = mk_job(blk, "Steve", &test_cancel_driver, true,
34
- JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS);
35
- job_ref(&job->job);
36
- assert(job->job.status == JOB_STATUS_CREATED);
37
- s = container_of(job, CancelJob, common);
38
+ bjob = mk_job(blk, "Steve", &test_cancel_driver, true,
39
+ JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS);
40
+ job = &bjob->job;
41
+ job_ref(job);
42
+ assert(job->status == JOB_STATUS_CREATED);
43
+ s = container_of(bjob, CancelJob, common);
44
s->blk = blk;
45
46
*pjob = job;
47
@@ -XXX,XX +XXX,XX @@ static void cancel_common(CancelJob *s)
48
49
static void test_cancel_created(void)
50
{
51
- BlockJob *job;
52
+ Job *job;
53
CancelJob *s;
54
55
s = create_common(&job);
56
@@ -XXX,XX +XXX,XX @@ static void test_cancel_created(void)
57
58
static void test_cancel_running(void)
59
{
60
- BlockJob *job;
61
+ Job *job;
62
CancelJob *s;
63
64
s = create_common(&job);
65
66
- job_start(&job->job);
67
- assert(job->job.status == JOB_STATUS_RUNNING);
68
+ job_start(job);
69
+ assert(job->status == JOB_STATUS_RUNNING);
70
71
cancel_common(s);
72
}
73
74
static void test_cancel_paused(void)
75
{
76
- BlockJob *job;
77
+ Job *job;
78
CancelJob *s;
79
80
s = create_common(&job);
81
82
- job_start(&job->job);
83
- assert(job->job.status == JOB_STATUS_RUNNING);
84
+ job_start(job);
85
+ assert(job->status == JOB_STATUS_RUNNING);
86
87
- job_user_pause(&job->job, &error_abort);
88
- job_enter(&job->job);
89
- assert(job->job.status == JOB_STATUS_PAUSED);
90
+ job_user_pause(job, &error_abort);
91
+ job_enter(job);
92
+ assert(job->status == JOB_STATUS_PAUSED);
93
94
cancel_common(s);
95
}
96
97
static void test_cancel_ready(void)
98
{
99
- BlockJob *job;
100
+ Job *job;
101
CancelJob *s;
102
103
s = create_common(&job);
104
105
- job_start(&job->job);
106
- assert(job->job.status == JOB_STATUS_RUNNING);
107
+ job_start(job);
108
+ assert(job->status == JOB_STATUS_RUNNING);
109
110
s->should_converge = true;
111
- job_enter(&job->job);
112
- assert(job->job.status == JOB_STATUS_READY);
113
+ job_enter(job);
114
+ assert(job->status == JOB_STATUS_READY);
115
116
cancel_common(s);
117
}
118
119
static void test_cancel_standby(void)
120
{
121
- BlockJob *job;
122
+ Job *job;
123
CancelJob *s;
124
125
s = create_common(&job);
126
127
- job_start(&job->job);
128
- assert(job->job.status == JOB_STATUS_RUNNING);
129
+ job_start(job);
130
+ assert(job->status == JOB_STATUS_RUNNING);
131
132
s->should_converge = true;
133
- job_enter(&job->job);
134
- assert(job->job.status == JOB_STATUS_READY);
135
+ job_enter(job);
136
+ assert(job->status == JOB_STATUS_READY);
137
138
- job_user_pause(&job->job, &error_abort);
139
- job_enter(&job->job);
140
- assert(job->job.status == JOB_STATUS_STANDBY);
141
+ job_user_pause(job, &error_abort);
142
+ job_enter(job);
143
+ assert(job->status == JOB_STATUS_STANDBY);
144
145
cancel_common(s);
146
}
147
148
static void test_cancel_pending(void)
149
{
150
- BlockJob *job;
151
+ Job *job;
152
CancelJob *s;
153
154
s = create_common(&job);
155
156
- job_start(&job->job);
157
- assert(job->job.status == JOB_STATUS_RUNNING);
158
+ job_start(job);
159
+ assert(job->status == JOB_STATUS_RUNNING);
160
161
s->should_converge = true;
162
- job_enter(&job->job);
163
- assert(job->job.status == JOB_STATUS_READY);
164
+ job_enter(job);
165
+ assert(job->status == JOB_STATUS_READY);
166
167
- job_complete(&job->job, &error_abort);
168
- job_enter(&job->job);
169
+ job_complete(job, &error_abort);
170
+ job_enter(job);
171
while (!s->completed) {
172
aio_poll(qemu_get_aio_context(), true);
173
}
174
- assert(job->job.status == JOB_STATUS_PENDING);
175
+ assert(job->status == JOB_STATUS_PENDING);
176
177
cancel_common(s);
178
}
179
180
static void test_cancel_concluded(void)
181
{
182
- BlockJob *job;
183
+ Job *job;
184
CancelJob *s;
185
186
s = create_common(&job);
187
188
- job_start(&job->job);
189
- assert(job->job.status == JOB_STATUS_RUNNING);
190
+ job_start(job);
191
+ assert(job->status == JOB_STATUS_RUNNING);
192
193
s->should_converge = true;
194
- job_enter(&job->job);
195
- assert(job->job.status == JOB_STATUS_READY);
196
+ job_enter(job);
197
+ assert(job->status == JOB_STATUS_READY);
198
199
- job_complete(&job->job, &error_abort);
200
- job_enter(&job->job);
201
+ job_complete(job, &error_abort);
202
+ job_enter(job);
203
while (!s->completed) {
204
aio_poll(qemu_get_aio_context(), true);
205
}
206
- assert(job->job.status == JOB_STATUS_PENDING);
207
+ assert(job->status == JOB_STATUS_PENDING);
208
209
- job_finalize(&job->job, &error_abort);
210
- assert(job->job.status == JOB_STATUS_CONCLUDED);
211
+ job_finalize(job, &error_abort);
212
+ assert(job->status == JOB_STATUS_CONCLUDED);
213
214
cancel_common(s);
215
}
216
--
217
2.17.1
218
219
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
We remove the exit callback and the completed boolean along with it.
4
We can simulate it just fine by waiting for the job to defer to the
5
main loop, and then giving it one final kick to get the main loop
6
portion to run.
7
8
Signed-off-by: John Snow <jsnow@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
Message-id: 20180906130225.5118-10-jsnow@redhat.com
11
Reviewed-by: Jeff Cody <jcody@redhat.com>
12
Signed-off-by: Max Reitz <mreitz@redhat.com>
13
---
14
tests/test-blockjob.c | 16 ++++++----------
15
1 file changed, 6 insertions(+), 10 deletions(-)
16
17
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/tests/test-blockjob.c
20
+++ b/tests/test-blockjob.c
21
@@ -XXX,XX +XXX,XX @@ typedef struct CancelJob {
22
BlockBackend *blk;
23
bool should_converge;
24
bool should_complete;
25
- bool completed;
26
} CancelJob;
27
28
-static void cancel_job_exit(Job *job)
29
-{
30
- CancelJob *s = container_of(job, CancelJob, common.job);
31
- s->completed = true;
32
-}
33
-
34
static void cancel_job_complete(Job *job, Error **errp)
35
{
36
CancelJob *s = container_of(job, CancelJob, common.job);
37
@@ -XXX,XX +XXX,XX @@ static const BlockJobDriver test_cancel_driver = {
38
.user_resume = block_job_user_resume,
39
.drain = block_job_drain,
40
.run = cancel_job_run,
41
- .exit = cancel_job_exit,
42
.complete = cancel_job_complete,
43
},
44
};
45
@@ -XXX,XX +XXX,XX @@ static void test_cancel_pending(void)
46
47
job_complete(job, &error_abort);
48
job_enter(job);
49
- while (!s->completed) {
50
+ while (!job->deferred_to_main_loop) {
51
aio_poll(qemu_get_aio_context(), true);
52
}
53
+ assert(job->status == JOB_STATUS_READY);
54
+ aio_poll(qemu_get_aio_context(), true);
55
assert(job->status == JOB_STATUS_PENDING);
56
57
cancel_common(s);
58
@@ -XXX,XX +XXX,XX @@ static void test_cancel_concluded(void)
59
60
job_complete(job, &error_abort);
61
job_enter(job);
62
- while (!s->completed) {
63
+ while (!job->deferred_to_main_loop) {
64
aio_poll(qemu_get_aio_context(), true);
65
}
66
+ assert(job->status == JOB_STATUS_READY);
67
+ aio_poll(qemu_get_aio_context(), true);
68
assert(job->status == JOB_STATUS_PENDING);
69
70
job_finalize(job, &error_abort);
71
--
72
2.17.1
73
74
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
The exit callback in this test actually only performs cleanup.
4
5
Signed-off-by: John Snow <jsnow@redhat.com>
6
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Message-id: 20180906130225.5118-11-jsnow@redhat.com
8
Reviewed-by: Jeff Cody <jcody@redhat.com>
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
11
tests/test-blockjob-txn.c | 4 ++--
12
1 file changed, 2 insertions(+), 2 deletions(-)
13
14
diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/test-blockjob-txn.c
17
+++ b/tests/test-blockjob-txn.c
18
@@ -XXX,XX +XXX,XX @@ typedef struct {
19
int *result;
20
} TestBlockJob;
21
22
-static void test_block_job_exit(Job *job)
23
+static void test_block_job_clean(Job *job)
24
{
25
BlockJob *bjob = container_of(job, BlockJob, job);
26
BlockDriverState *bs = blk_bs(bjob->blk);
27
@@ -XXX,XX +XXX,XX @@ static const BlockJobDriver test_block_job_driver = {
28
.user_resume = block_job_user_resume,
29
.drain = block_job_drain,
30
.run = test_block_job_run,
31
- .exit = test_block_job_exit,
32
+ .clean = test_block_job_clean,
33
},
34
};
35
36
--
37
2.17.1
38
39
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
Now that all of the jobs use the component finalization callbacks,
4
there's no use for the heavy-hammer .exit callback anymore.
5
6
job_exit becomes a glorified type shim so that we can call
7
job_completed from aio_bh_schedule_oneshot.
8
9
Move these three functions down into job.c to eliminate a
10
forward reference.
11
12
Signed-off-by: John Snow <jsnow@redhat.com>
13
Reviewed-by: Max Reitz <mreitz@redhat.com>
14
Message-id: 20180906130225.5118-12-jsnow@redhat.com
15
Reviewed-by: Jeff Cody <jcody@redhat.com>
16
Signed-off-by: Max Reitz <mreitz@redhat.com>
17
---
18
include/qemu/job.h | 11 -------
19
job.c | 77 ++++++++++++++++++++--------------------------
20
2 files changed, 34 insertions(+), 54 deletions(-)
21
22
diff --git a/include/qemu/job.h b/include/qemu/job.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/include/qemu/job.h
25
+++ b/include/qemu/job.h
26
@@ -XXX,XX +XXX,XX @@ struct JobDriver {
27
*/
28
void (*drain)(Job *job);
29
30
- /**
31
- * If the callback is not NULL, exit will be invoked from the main thread
32
- * when the job's coroutine has finished, but before transactional
33
- * convergence; before @prepare or @abort.
34
- *
35
- * FIXME TODO: This callback is only temporary to transition remaining jobs
36
- * to prepare/commit/abort/clean callbacks and will be removed before 3.1.
37
- * is released.
38
- */
39
- void (*exit)(Job *job);
40
-
41
/**
42
* If the callback is not NULL, prepare will be invoked when all the jobs
43
* belonging to the same transaction complete; or upon this job's completion
44
diff --git a/job.c b/job.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/job.c
47
+++ b/job.c
48
@@ -XXX,XX +XXX,XX @@ void job_drain(Job *job)
49
}
50
}
51
52
-static void job_completed(Job *job);
53
-
54
-static void job_exit(void *opaque)
55
-{
56
- Job *job = (Job *)opaque;
57
- AioContext *aio_context = job->aio_context;
58
-
59
- if (job->driver->exit) {
60
- aio_context_acquire(aio_context);
61
- job->driver->exit(job);
62
- aio_context_release(aio_context);
63
- }
64
- job_completed(job);
65
-}
66
-
67
-/**
68
- * All jobs must allow a pause point before entering their job proper. This
69
- * ensures that jobs can be paused prior to being started, then resumed later.
70
- */
71
-static void coroutine_fn job_co_entry(void *opaque)
72
-{
73
- Job *job = opaque;
74
-
75
- assert(job && job->driver && job->driver->run);
76
- job_pause_point(job);
77
- job->ret = job->driver->run(job, &job->err);
78
- job->deferred_to_main_loop = true;
79
- aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
80
-}
81
-
82
-
83
-void job_start(Job *job)
84
-{
85
- assert(job && !job_started(job) && job->paused &&
86
- job->driver && job->driver->run);
87
- job->co = qemu_coroutine_create(job_co_entry, job);
88
- job->pause_count--;
89
- job->busy = true;
90
- job->paused = false;
91
- job_state_transition(job, JOB_STATUS_RUNNING);
92
- aio_co_enter(job->aio_context, job->co);
93
-}
94
-
95
/* Assumes the block_job_mutex is held */
96
static bool job_timer_not_pending(Job *job)
97
{
98
@@ -XXX,XX +XXX,XX @@ static void job_completed(Job *job)
99
}
100
}
101
102
+/** Useful only as a type shim for aio_bh_schedule_oneshot. */
103
+static void job_exit(void *opaque)
104
+{
105
+ Job *job = (Job *)opaque;
106
+ job_completed(job);
107
+}
108
+
109
+/**
110
+ * All jobs must allow a pause point before entering their job proper. This
111
+ * ensures that jobs can be paused prior to being started, then resumed later.
112
+ */
113
+static void coroutine_fn job_co_entry(void *opaque)
114
+{
115
+ Job *job = opaque;
116
+
117
+ assert(job && job->driver && job->driver->run);
118
+ job_pause_point(job);
119
+ job->ret = job->driver->run(job, &job->err);
120
+ job->deferred_to_main_loop = true;
121
+ aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
122
+}
123
+
124
+void job_start(Job *job)
125
+{
126
+ assert(job && !job_started(job) && job->paused &&
127
+ job->driver && job->driver->run);
128
+ job->co = qemu_coroutine_create(job_co_entry, job);
129
+ job->pause_count--;
130
+ job->busy = true;
131
+ job->paused = false;
132
+ job_state_transition(job, JOB_STATUS_RUNNING);
133
+ aio_co_enter(job->aio_context, job->co);
134
+}
135
+
136
void job_cancel(Job *job, bool force)
137
{
138
if (job->status == JOB_STATUS_CONCLUDED) {
139
--
140
2.17.1
141
142
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
Signed-off-by: John Snow <jsnow@redhat.com>
4
Reviewed-by: Max Reitz <mreitz@redhat.com>
5
Message-id: 20180906130225.5118-13-jsnow@redhat.com
6
Reviewed-by: Jeff Cody <jcody@redhat.com>
7
Signed-off-by: Max Reitz <mreitz@redhat.com>
8
---
9
qapi/block-core.json | 16 +++++++++++++++-
10
blockdev.c | 8 ++++++++
11
2 files changed, 23 insertions(+), 1 deletion(-)
12
13
diff --git a/qapi/block-core.json b/qapi/block-core.json
14
index XXXXXXX..XXXXXXX 100644
15
--- a/qapi/block-core.json
16
+++ b/qapi/block-core.json
17
@@ -XXX,XX +XXX,XX @@
18
# above @top. If this option is not given, a node name is
19
# autogenerated. (Since: 2.9)
20
#
21
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
22
+# finished its work, waiting for @block-job-finalize before
23
+# making any block graph changes.
24
+# When true, this job will automatically
25
+# perform its abort or commit actions.
26
+# Defaults to true. (Since 3.1)
27
+#
28
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
29
+# has completely ceased all work, and awaits @block-job-dismiss.
30
+# When true, this job will automatically disappear from the query
31
+# list without user intervention.
32
+# Defaults to true. (Since 3.1)
33
+#
34
# Returns: Nothing on success
35
# If @device does not exist, DeviceNotFound
36
# Any other error returns a GenericError.
37
@@ -XXX,XX +XXX,XX @@
38
{ 'command': 'block-commit',
39
'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str',
40
'*backing-file': 'str', '*speed': 'int',
41
- '*filter-node-name': 'str' } }
42
+ '*filter-node-name': 'str',
43
+ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
44
45
##
46
# @drive-backup:
47
diff --git a/blockdev.c b/blockdev.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/blockdev.c
50
+++ b/blockdev.c
51
@@ -XXX,XX +XXX,XX @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
52
bool has_backing_file, const char *backing_file,
53
bool has_speed, int64_t speed,
54
bool has_filter_node_name, const char *filter_node_name,
55
+ bool has_auto_finalize, bool auto_finalize,
56
+ bool has_auto_dismiss, bool auto_dismiss,
57
Error **errp)
58
{
59
BlockDriverState *bs;
60
@@ -XXX,XX +XXX,XX @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
61
if (!has_filter_node_name) {
62
filter_node_name = NULL;
63
}
64
+ if (has_auto_finalize && !auto_finalize) {
65
+ job_flags |= JOB_MANUAL_FINALIZE;
66
+ }
67
+ if (has_auto_dismiss && !auto_dismiss) {
68
+ job_flags |= JOB_MANUAL_DISMISS;
69
+ }
70
71
/* Important Note:
72
* libvirt relies on the DeviceNotFound error class in order to probe for
73
--
74
2.17.1
75
76
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
Signed-off-by: John Snow <jsnow@redhat.com>
4
Reviewed-by: Max Reitz <mreitz@redhat.com>
5
Message-id: 20180906130225.5118-14-jsnow@redhat.com
6
Reviewed-by: Jeff Cody <jcody@redhat.com>
7
Signed-off-by: Max Reitz <mreitz@redhat.com>
8
---
9
qapi/block-core.json | 30 ++++++++++++++++++++++++++++--
10
blockdev.c | 14 ++++++++++++++
11
2 files changed, 42 insertions(+), 2 deletions(-)
12
13
diff --git a/qapi/block-core.json b/qapi/block-core.json
14
index XXXXXXX..XXXXXXX 100644
15
--- a/qapi/block-core.json
16
+++ b/qapi/block-core.json
17
@@ -XXX,XX +XXX,XX @@
18
# @copy-mode: when to copy data to the destination; defaults to 'background'
19
# (Since: 3.0)
20
#
21
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
22
+# finished its work, waiting for @block-job-finalize before
23
+# making any block graph changes.
24
+# When true, this job will automatically
25
+# perform its abort or commit actions.
26
+# Defaults to true. (Since 3.1)
27
+#
28
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
29
+# has completely ceased all work, and awaits @block-job-dismiss.
30
+# When true, this job will automatically disappear from the query
31
+# list without user intervention.
32
+# Defaults to true. (Since 3.1)
33
# Since: 1.3
34
##
35
{ 'struct': 'DriveMirror',
36
@@ -XXX,XX +XXX,XX @@
37
'*speed': 'int', '*granularity': 'uint32',
38
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
39
'*on-target-error': 'BlockdevOnError',
40
- '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode' } }
41
+ '*unmap': 'bool', '*copy-mode': 'MirrorCopyMode',
42
+ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
43
44
##
45
# @BlockDirtyBitmap:
46
@@ -XXX,XX +XXX,XX @@
47
# @copy-mode: when to copy data to the destination; defaults to 'background'
48
# (Since: 3.0)
49
#
50
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
51
+# finished its work, waiting for @block-job-finalize before
52
+# making any block graph changes.
53
+# When true, this job will automatically
54
+# perform its abort or commit actions.
55
+# Defaults to true. (Since 3.1)
56
+#
57
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
58
+# has completely ceased all work, and awaits @block-job-dismiss.
59
+# When true, this job will automatically disappear from the query
60
+# list without user intervention.
61
+# Defaults to true. (Since 3.1)
62
# Returns: nothing on success.
63
#
64
# Since: 2.6
65
@@ -XXX,XX +XXX,XX @@
66
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
67
'*on-target-error': 'BlockdevOnError',
68
'*filter-node-name': 'str',
69
- '*copy-mode': 'MirrorCopyMode' } }
70
+ '*copy-mode': 'MirrorCopyMode',
71
+ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
72
73
##
74
# @block_set_io_throttle:
75
diff --git a/blockdev.c b/blockdev.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/blockdev.c
78
+++ b/blockdev.c
79
@@ -XXX,XX +XXX,XX @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
80
bool has_filter_node_name,
81
const char *filter_node_name,
82
bool has_copy_mode, MirrorCopyMode copy_mode,
83
+ bool has_auto_finalize, bool auto_finalize,
84
+ bool has_auto_dismiss, bool auto_dismiss,
85
Error **errp)
86
{
87
int job_flags = JOB_DEFAULT;
88
@@ -XXX,XX +XXX,XX @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
89
if (!has_copy_mode) {
90
copy_mode = MIRROR_COPY_MODE_BACKGROUND;
91
}
92
+ if (has_auto_finalize && !auto_finalize) {
93
+ job_flags |= JOB_MANUAL_FINALIZE;
94
+ }
95
+ if (has_auto_dismiss && !auto_dismiss) {
96
+ job_flags |= JOB_MANUAL_DISMISS;
97
+ }
98
99
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
100
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
101
@@ -XXX,XX +XXX,XX @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
102
arg->has_unmap, arg->unmap,
103
false, NULL,
104
arg->has_copy_mode, arg->copy_mode,
105
+ arg->has_auto_finalize, arg->auto_finalize,
106
+ arg->has_auto_dismiss, arg->auto_dismiss,
107
&local_err);
108
bdrv_unref(target_bs);
109
error_propagate(errp, local_err);
110
@@ -XXX,XX +XXX,XX @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
111
bool has_filter_node_name,
112
const char *filter_node_name,
113
bool has_copy_mode, MirrorCopyMode copy_mode,
114
+ bool has_auto_finalize, bool auto_finalize,
115
+ bool has_auto_dismiss, bool auto_dismiss,
116
Error **errp)
117
{
118
BlockDriverState *bs;
119
@@ -XXX,XX +XXX,XX @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
120
true, true,
121
has_filter_node_name, filter_node_name,
122
has_copy_mode, copy_mode,
123
+ has_auto_finalize, auto_finalize,
124
+ has_auto_dismiss, auto_dismiss,
125
&local_err);
126
error_propagate(errp, local_err);
127
128
--
129
2.17.1
130
131
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
Signed-off-by: John Snow <jsnow@redhat.com>
4
Reviewed-by: Max Reitz <mreitz@redhat.com>
5
Message-id: 20180906130225.5118-15-jsnow@redhat.com
6
Reviewed-by: Jeff Cody <jcody@redhat.com>
7
Signed-off-by: Max Reitz <mreitz@redhat.com>
8
---
9
qapi/block-core.json | 16 +++++++++++++++-
10
blockdev.c | 9 +++++++++
11
hmp.c | 5 +++--
12
3 files changed, 27 insertions(+), 3 deletions(-)
13
14
diff --git a/qapi/block-core.json b/qapi/block-core.json
15
index XXXXXXX..XXXXXXX 100644
16
--- a/qapi/block-core.json
17
+++ b/qapi/block-core.json
18
@@ -XXX,XX +XXX,XX @@
19
# 'stop' and 'enospc' can only be used if the block device
20
# supports io-status (see BlockInfo). Since 1.3.
21
#
22
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
23
+# finished its work, waiting for @block-job-finalize before
24
+# making any block graph changes.
25
+# When true, this job will automatically
26
+# perform its abort or commit actions.
27
+# Defaults to true. (Since 3.1)
28
+#
29
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
30
+# has completely ceased all work, and awaits @block-job-dismiss.
31
+# When true, this job will automatically disappear from the query
32
+# list without user intervention.
33
+# Defaults to true. (Since 3.1)
34
+#
35
# Returns: Nothing on success. If @device does not exist, DeviceNotFound.
36
#
37
# Since: 1.1
38
@@ -XXX,XX +XXX,XX @@
39
{ 'command': 'block-stream',
40
'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
41
'*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
42
- '*on-error': 'BlockdevOnError' } }
43
+ '*on-error': 'BlockdevOnError',
44
+ '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
45
46
##
47
# @block-job-set-speed:
48
diff --git a/blockdev.c b/blockdev.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/blockdev.c
51
+++ b/blockdev.c
52
@@ -XXX,XX +XXX,XX @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
53
bool has_backing_file, const char *backing_file,
54
bool has_speed, int64_t speed,
55
bool has_on_error, BlockdevOnError on_error,
56
+ bool has_auto_finalize, bool auto_finalize,
57
+ bool has_auto_dismiss, bool auto_dismiss,
58
Error **errp)
59
{
60
BlockDriverState *bs, *iter;
61
@@ -XXX,XX +XXX,XX @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
62
/* backing_file string overrides base bs filename */
63
base_name = has_backing_file ? backing_file : base_name;
64
65
+ if (has_auto_finalize && !auto_finalize) {
66
+ job_flags |= JOB_MANUAL_FINALIZE;
67
+ }
68
+ if (has_auto_dismiss && !auto_dismiss) {
69
+ job_flags |= JOB_MANUAL_DISMISS;
70
+ }
71
+
72
stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
73
job_flags, has_speed ? speed : 0, on_error, &local_err);
74
if (local_err) {
75
diff --git a/hmp.c b/hmp.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/hmp.c
78
+++ b/hmp.c
79
@@ -XXX,XX +XXX,XX @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
80
int64_t speed = qdict_get_try_int(qdict, "speed", 0);
81
82
qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
83
- false, NULL, qdict_haskey(qdict, "speed"), speed,
84
- true, BLOCKDEV_ON_ERROR_REPORT, &error);
85
+ false, NULL, qdict_haskey(qdict, "speed"), speed, true,
86
+ BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
87
+ &error);
88
89
hmp_handle_error(mon, &error);
90
}
91
--
92
2.17.1
93
94
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
Fix documentation to match the other jobs amended for 3.1.
4
5
Signed-off-by: John Snow <jsnow@redhat.com>
6
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Message-id: 20180906130225.5118-16-jsnow@redhat.com
8
Reviewed-by: Jeff Cody <jcody@redhat.com>
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
11
qapi/block-core.json | 18 ++++++++++--------
12
1 file changed, 10 insertions(+), 8 deletions(-)
13
14
diff --git a/qapi/block-core.json b/qapi/block-core.json
15
index XXXXXXX..XXXXXXX 100644
16
--- a/qapi/block-core.json
17
+++ b/qapi/block-core.json
18
@@ -XXX,XX +XXX,XX @@
19
# a different block device than @device).
20
#
21
# @auto-finalize: When false, this job will wait in a PENDING state after it has
22
-# finished its work, waiting for @block-job-finalize.
23
-# When true, this job will automatically perform its abort or
24
-# commit actions.
25
+# finished its work, waiting for @block-job-finalize before
26
+# making any block graph changes.
27
+# When true, this job will automatically
28
+# perform its abort or commit actions.
29
# Defaults to true. (Since 2.12)
30
#
31
# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
32
-# has completed ceased all work, and wait for @block-job-dismiss.
33
+# has completely ceased all work, and awaits @block-job-dismiss.
34
# When true, this job will automatically disappear from the query
35
# list without user intervention.
36
# Defaults to true. (Since 2.12)
37
@@ -XXX,XX +XXX,XX @@
38
# a different block device than @device).
39
#
40
# @auto-finalize: When false, this job will wait in a PENDING state after it has
41
-# finished its work, waiting for @block-job-finalize.
42
-# When true, this job will automatically perform its abort or
43
-# commit actions.
44
+# finished its work, waiting for @block-job-finalize before
45
+# making any block graph changes.
46
+# When true, this job will automatically
47
+# perform its abort or commit actions.
48
# Defaults to true. (Since 2.12)
49
#
50
# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
51
-# has completed ceased all work, and wait for @block-job-dismiss.
52
+# has completely ceased all work, and awaits @block-job-dismiss.
53
# When true, this job will automatically disappear from the query
54
# list without user intervention.
55
# Defaults to true. (Since 2.12)
56
--
57
2.17.1
58
59
diff view generated by jsdifflib
Deleted patch
1
From: John Snow <jsnow@redhat.com>
2
1
3
Presently only the backup job really guarantees what one would consider
4
transactional semantics. To guard against someone helpfully adding them
5
in the future, document that there are shortcomings in the model that
6
would need to be audited at that time.
7
8
Signed-off-by: John Snow <jsnow@redhat.com>
9
Message-id: 20180906130225.5118-17-jsnow@redhat.com
10
Reviewed-by: Jeff Cody <jcody@redhat.com>
11
Reviewed-by: Max Reitz <mreitz@redhat.com>
12
Signed-off-by: Max Reitz <mreitz@redhat.com>
13
---
14
blockdev.c | 8 +++++++-
15
1 file changed, 7 insertions(+), 1 deletion(-)
16
17
diff --git a/blockdev.c b/blockdev.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/blockdev.c
20
+++ b/blockdev.c
21
@@ -XXX,XX +XXX,XX @@ static const BlkActionOps actions[] = {
22
.instance_size = sizeof(BlockDirtyBitmapState),
23
.prepare = block_dirty_bitmap_disable_prepare,
24
.abort = block_dirty_bitmap_disable_abort,
25
- }
26
+ },
27
+ /* Where are transactions for MIRROR, COMMIT and STREAM?
28
+ * Although these blockjobs use transaction callbacks like the backup job,
29
+ * these jobs do not necessarily adhere to transaction semantics.
30
+ * These jobs may not fully undo all of their actions on abort, nor do they
31
+ * necessarily work in transactions with more than one job in them.
32
+ */
33
};
34
35
/**
36
--
37
2.17.1
38
39
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
The block-commit QMP command required specifying the top and base nodes
4
of the commit jobs using the file name of that node. While this works
5
in simple cases (local files with absolute paths), the file names
6
generated for more complicated setups can be hard to predict.
7
8
The block-commit command has more problems than just this, so we want to
9
replace it altogether in the long run, but libvirt needs a reliable way
10
to address nodes now. So we don't want to wait for a new, cleaner
11
command, but just add the minimal thing needed right now.
12
13
This adds two new options top-node and base-node to the command, which
14
allow specifying node names instead. They are mutually exclusive with
15
the old options.
16
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
19
qapi/block-core.json | 24 ++++++++++++++++++------
20
blockdev.c | 32 ++++++++++++++++++++++++++++++--
21
2 files changed, 48 insertions(+), 8 deletions(-)
22
23
diff --git a/qapi/block-core.json b/qapi/block-core.json
24
index XXXXXXX..XXXXXXX 100644
25
--- a/qapi/block-core.json
26
+++ b/qapi/block-core.json
27
@@ -XXX,XX +XXX,XX @@
28
#
29
# @device: the device name or node-name of a root node
30
#
31
-# @base: The file name of the backing image to write data into.
32
-# If not specified, this is the deepest backing image.
33
+# @base-node: The node name of the backing image to write data into.
34
+# If not specified, this is the deepest backing image.
35
+# (since: 3.1)
36
#
37
-# @top: The file name of the backing image within the image chain,
38
-# which contains the topmost data to be committed down. If
39
-# not specified, this is the active layer.
40
+# @base: Same as @base-node, except that it is a file name rather than a node
41
+# name. This must be the exact filename string that was used to open the
42
+# node; other strings, even if addressing the same file, are not
43
+# accepted (deprecated, use @base-node instead)
44
+#
45
+# @top-node: The node name of the backing image within the image chain
46
+# which contains the topmost data to be committed down. If
47
+# not specified, this is the active layer. (since: 3.1)
48
+#
49
+# @top: Same as @top-node, except that it is a file name rather than a node
50
+# name. This must be the exact filename string that was used to open the
51
+# node; other strings, even if addressing the same file, are not
52
+# accepted (deprecated, use @base-node instead)
53
#
54
# @backing-file: The backing file string to write into the overlay
55
# image of 'top'. If 'top' is the active layer,
56
@@ -XXX,XX +XXX,XX @@
57
#
58
##
59
{ 'command': 'block-commit',
60
- 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str',
61
+ 'data': { '*job-id': 'str', 'device': 'str', '*base-node': 'str',
62
+ '*base': 'str', '*top-node': 'str', '*top': 'str',
63
'*backing-file': 'str', '*speed': 'int',
64
'*filter-node-name': 'str',
65
'*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
66
diff --git a/blockdev.c b/blockdev.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/blockdev.c
69
+++ b/blockdev.c
70
@@ -XXX,XX +XXX,XX @@ out:
71
}
72
73
void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
74
+ bool has_base_node, const char *base_node,
75
bool has_base, const char *base,
76
+ bool has_top_node, const char *top_node,
77
bool has_top, const char *top,
78
bool has_backing_file, const char *backing_file,
79
bool has_speed, int64_t speed,
80
@@ -XXX,XX +XXX,XX @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
81
/* default top_bs is the active layer */
82
top_bs = bs;
83
84
- if (has_top && top) {
85
+ if (has_top_node && has_top) {
86
+ error_setg(errp, "'top-node' and 'top' are mutually exclusive");
87
+ goto out;
88
+ } else if (has_top_node) {
89
+ top_bs = bdrv_lookup_bs(NULL, top_node, errp);
90
+ if (top_bs == NULL) {
91
+ goto out;
92
+ }
93
+ if (!bdrv_chain_contains(bs, top_bs)) {
94
+ error_setg(errp, "'%s' is not in this backing file chain",
95
+ top_node);
96
+ goto out;
97
+ }
98
+ } else if (has_top && top) {
99
if (strcmp(bs->filename, top) != 0) {
100
top_bs = bdrv_find_backing_image(bs, top);
101
}
102
@@ -XXX,XX +XXX,XX @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
103
104
assert(bdrv_get_aio_context(top_bs) == aio_context);
105
106
- if (has_base && base) {
107
+ if (has_base_node && has_base) {
108
+ error_setg(errp, "'base-node' and 'base' are mutually exclusive");
109
+ goto out;
110
+ } else if (has_base_node) {
111
+ base_bs = bdrv_lookup_bs(NULL, base_node, errp);
112
+ if (base_bs == NULL) {
113
+ goto out;
114
+ }
115
+ if (!bdrv_chain_contains(top_bs, base_bs)) {
116
+ error_setg(errp, "'%s' is not in this backing file chain",
117
+ base_node);
118
+ goto out;
119
+ }
120
+ } else if (has_base && base) {
121
base_bs = bdrv_find_backing_image(top_bs, base);
122
} else {
123
base_bs = bdrv_find_base(top_bs);
124
--
125
2.17.1
126
127
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
This adds some tests for block-commit with the new options top-node and
4
base-node (taking node names) instead of top and base (taking file
5
names).
6
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
9
tests/qemu-iotests/040 | 52 ++++++++++++++++++++++++++++++++++++--
10
tests/qemu-iotests/040.out | 4 +--
11
2 files changed, 52 insertions(+), 4 deletions(-)
12
13
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
14
index XXXXXXX..XXXXXXX 100755
15
--- a/tests/qemu-iotests/040
16
+++ b/tests/qemu-iotests/040
17
@@ -XXX,XX +XXX,XX @@ class ImageCommitTestCase(iotests.QMPTestCase):
18
self.assert_no_active_block_jobs()
19
self.vm.shutdown()
20
21
- def run_commit_test(self, top, base, need_ready=False):
22
+ def run_commit_test(self, top, base, need_ready=False, node_names=False):
23
self.assert_no_active_block_jobs()
24
- result = self.vm.qmp('block-commit', device='drive0', top=top, base=base)
25
+ if node_names:
26
+ result = self.vm.qmp('block-commit', device='drive0', top_node=top, base_node=base)
27
+ else:
28
+ result = self.vm.qmp('block-commit', device='drive0', top=top, base=base)
29
self.assert_qmp(result, 'return', {})
30
self.wait_for_complete(need_ready)
31
32
@@ -XXX,XX +XXX,XX @@ class TestSingleDrive(ImageCommitTestCase):
33
self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
34
self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
35
36
+ def test_commit_node(self):
37
+ self.run_commit_test("mid", "base", node_names=True)
38
+ self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
39
+ self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
40
+
41
def test_device_not_found(self):
42
result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img)
43
self.assert_qmp(result, 'error/class', 'DeviceNotFound')
44
@@ -XXX,XX +XXX,XX @@ class TestSingleDrive(ImageCommitTestCase):
45
self.assert_qmp(result, 'error/class', 'GenericError')
46
self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found')
47
48
+ def test_top_node_invalid(self):
49
+ self.assert_no_active_block_jobs()
50
+ result = self.vm.qmp('block-commit', device='drive0', top_node='badfile', base_node='base')
51
+ self.assert_qmp(result, 'error/class', 'GenericError')
52
+ self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile")
53
+
54
+ def test_base_node_invalid(self):
55
+ self.assert_no_active_block_jobs()
56
+ result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='badfile')
57
+ self.assert_qmp(result, 'error/class', 'GenericError')
58
+ self.assert_qmp(result, 'error/desc', "Cannot find device= nor node_name=badfile")
59
+
60
+ def test_top_path_and_node(self):
61
+ self.assert_no_active_block_jobs()
62
+ result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='base', top='%s' % mid_img)
63
+ self.assert_qmp(result, 'error/class', 'GenericError')
64
+ self.assert_qmp(result, 'error/desc', "'top-node' and 'top' are mutually exclusive")
65
+
66
+ def test_base_path_and_node(self):
67
+ self.assert_no_active_block_jobs()
68
+ result = self.vm.qmp('block-commit', device='drive0', top_node='mid', base_node='base', base='%s' % backing_img)
69
+ self.assert_qmp(result, 'error/class', 'GenericError')
70
+ self.assert_qmp(result, 'error/desc', "'base-node' and 'base' are mutually exclusive")
71
+
72
def test_top_is_active(self):
73
self.run_commit_test(test_img, backing_img, need_ready=True)
74
self.assertEqual(-1, qemu_io('-f', 'raw', '-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
75
@@ -XXX,XX +XXX,XX @@ class TestSingleDrive(ImageCommitTestCase):
76
self.assert_qmp(result, 'error/class', 'GenericError')
77
self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % mid_img)
78
79
+ def test_top_and_base_node_reversed(self):
80
+ self.assert_no_active_block_jobs()
81
+ result = self.vm.qmp('block-commit', device='drive0', top_node='base', base_node='top')
82
+ self.assert_qmp(result, 'error/class', 'GenericError')
83
+ self.assert_qmp(result, 'error/desc', "'top' is not in this backing file chain")
84
+
85
+ def test_top_node_in_wrong_chain(self):
86
+ self.assert_no_active_block_jobs()
87
+
88
+ result = self.vm.qmp('blockdev-add', driver='null-co', node_name='null')
89
+ self.assert_qmp(result, 'return', {})
90
+
91
+ result = self.vm.qmp('block-commit', device='drive0', top_node='null', base_node='base')
92
+ self.assert_qmp(result, 'error/class', 'GenericError')
93
+ self.assert_qmp(result, 'error/desc', "'null' is not in this backing file chain")
94
+
95
# When the job is running on a BB that is automatically deleted on hot
96
# unplug, the job is cancelled when the device disappears
97
def test_hot_unplug(self):
98
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
99
index XXXXXXX..XXXXXXX 100644
100
--- a/tests/qemu-iotests/040.out
101
+++ b/tests/qemu-iotests/040.out
102
@@ -XXX,XX +XXX,XX @@
103
-.............................
104
+...........................................
105
----------------------------------------------------------------------
106
-Ran 29 tests
107
+Ran 43 tests
108
109
OK
110
--
111
2.17.1
112
113
diff view generated by jsdifflib
Deleted patch
1
From: Sergio Lopez <slp@redhat.com>
2
1
3
In qemu_laio_process_completions_and_submit, the AioContext is acquired
4
before the ioq_submit iteration and after qemu_laio_process_completions,
5
but the latter is not thread safe either.
6
7
This change avoids a number of random crashes when the Main Thread and
8
an IO Thread collide processing completions for the same AioContext.
9
This is an example of such crash:
10
11
- The IO Thread is trying to acquire the AioContext at aio_co_enter,
12
which evidences that it didn't lock it before:
13
14
Thread 3 (Thread 0x7fdfd8bd8700 (LWP 36743)):
15
#0 0x00007fdfe0dd542d in __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
16
#1 0x00007fdfe0dd0de6 in _L_lock_870 () at /lib64/libpthread.so.0
17
#2 0x00007fdfe0dd0cdf in __GI___pthread_mutex_lock (mutex=mutex@entry=0x5631fde0e6c0)
18
at ../nptl/pthread_mutex_lock.c:114
19
#3 0x00005631fc0603a7 in qemu_mutex_lock_impl (mutex=0x5631fde0e6c0, file=0x5631fc23520f "util/async.c", line=511) at util/qemu-thread-posix.c:66
20
#4 0x00005631fc05b558 in aio_co_enter (ctx=0x5631fde0e660, co=0x7fdfcc0c2b40) at util/async.c:493
21
#5 0x00005631fc05b5ac in aio_co_wake (co=<optimized out>) at util/async.c:478
22
#6 0x00005631fbfc51ad in qemu_laio_process_completion (laiocb=<optimized out>) at block/linux-aio.c:104
23
#7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670)
24
at block/linux-aio.c:222
25
#8 0x00005631fbfc5499 in qemu_laio_process_completions_and_submit (s=0x7fdfc0297670)
26
at block/linux-aio.c:237
27
#9 0x00005631fc05d978 in aio_dispatch_handlers (ctx=ctx@entry=0x5631fde0e660) at util/aio-posix.c:406
28
#10 0x00005631fc05e3ea in aio_poll (ctx=0x5631fde0e660, blocking=blocking@entry=true)
29
at util/aio-posix.c:693
30
#11 0x00005631fbd7ad96 in iothread_run (opaque=0x5631fde0e1c0) at iothread.c:64
31
#12 0x00007fdfe0dcee25 in start_thread (arg=0x7fdfd8bd8700) at pthread_create.c:308
32
#13 0x00007fdfe0afc34d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113
33
34
- The Main Thread is also processing completions from the same
35
AioContext, and crashes due to failed assertion at util/iov.c:78:
36
37
Thread 1 (Thread 0x7fdfeb5eac80 (LWP 36740)):
38
#0 0x00007fdfe0a391f7 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
39
#1 0x00007fdfe0a3a8e8 in __GI_abort () at abort.c:90
40
#2 0x00007fdfe0a32266 in __assert_fail_base (fmt=0x7fdfe0b84e68 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=assertion@entry=0x5631fc238ccb "offset == 0", file=file@entry=0x5631fc23698e "util/iov.c", line=line@entry=78, function=function@entry=0x5631fc236adc <__PRETTY_FUNCTION__.15220> "iov_memset")
41
at assert.c:92
42
#3 0x00007fdfe0a32312 in __GI___assert_fail (assertion=assertion@entry=0x5631fc238ccb "offset == 0", file=file@entry=0x5631fc23698e "util/iov.c", line=line@entry=78, function=function@entry=0x5631fc236adc <__PRETTY_FUNCTION__.15220> "iov_memset") at assert.c:101
43
#4 0x00005631fc065287 in iov_memset (iov=<optimized out>, iov_cnt=<optimized out>, offset=<optimized out>, offset@entry=65536, fillc=fillc@entry=0, bytes=15515191315812405248) at util/iov.c:78
44
#5 0x00005631fc065a63 in qemu_iovec_memset (qiov=<optimized out>, offset=offset@entry=65536, fillc=fillc@entry=0, bytes=<optimized out>) at util/iov.c:410
45
#6 0x00005631fbfc5178 in qemu_laio_process_completion (laiocb=0x7fdd920df630) at block/linux-aio.c:88
46
#7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670)
47
at block/linux-aio.c:222
48
#8 0x00005631fbfc5499 in qemu_laio_process_completions_and_submit (s=0x7fdfc0297670)
49
at block/linux-aio.c:237
50
#9 0x00005631fbfc54ed in qemu_laio_poll_cb (opaque=<optimized out>) at block/linux-aio.c:272
51
#10 0x00005631fc05d85e in run_poll_handlers_once (ctx=ctx@entry=0x5631fde0e660) at util/aio-posix.c:497
52
#11 0x00005631fc05e2ca in aio_poll (blocking=false, ctx=0x5631fde0e660) at util/aio-posix.c:574
53
#12 0x00005631fc05e2ca in aio_poll (ctx=0x5631fde0e660, blocking=blocking@entry=false)
54
at util/aio-posix.c:604
55
#13 0x00005631fbfcb8a3 in bdrv_do_drained_begin (ignore_parent=<optimized out>, recursive=<optimized out>, bs=<optimized out>) at block/io.c:273
56
#14 0x00005631fbfcb8a3 in bdrv_do_drained_begin (bs=0x5631fe8b6200, recursive=<optimized out>, parent=0x0, ignore_bds_parents=<optimized out>, poll=<optimized out>) at block/io.c:390
57
#15 0x00005631fbfbcd2e in blk_drain (blk=0x5631fe83ac80) at block/block-backend.c:1590
58
#16 0x00005631fbfbe138 in blk_remove_bs (blk=blk@entry=0x5631fe83ac80) at block/block-backend.c:774
59
#17 0x00005631fbfbe3d6 in blk_unref (blk=0x5631fe83ac80) at block/block-backend.c:401
60
#18 0x00005631fbfbe3d6 in blk_unref (blk=0x5631fe83ac80) at block/block-backend.c:449
61
#19 0x00005631fbfc9a69 in commit_complete (job=0x5631fe8b94b0, opaque=0x7fdfcc1bb080)
62
at block/commit.c:92
63
#20 0x00005631fbf7d662 in job_defer_to_main_loop_bh (opaque=0x7fdfcc1b4560) at job.c:973
64
#21 0x00005631fc05ad41 in aio_bh_poll (bh=0x7fdfcc01ad90) at util/async.c:90
65
#22 0x00005631fc05ad41 in aio_bh_poll (ctx=ctx@entry=0x5631fddffdb0) at util/async.c:118
66
#23 0x00005631fc05e210 in aio_dispatch (ctx=0x5631fddffdb0) at util/aio-posix.c:436
67
#24 0x00005631fc05ac1e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at util/async.c:261
68
#25 0x00007fdfeaae44c9 in g_main_context_dispatch (context=0x5631fde00140) at gmain.c:3201
69
#26 0x00007fdfeaae44c9 in g_main_context_dispatch (context=context@entry=0x5631fde00140) at gmain.c:3854
70
#27 0x00005631fc05d503 in main_loop_wait () at util/main-loop.c:215
71
#28 0x00005631fc05d503 in main_loop_wait (timeout=<optimized out>) at util/main-loop.c:238
72
#29 0x00005631fc05d503 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:497
73
#30 0x00005631fbd81412 in main_loop () at vl.c:1866
74
#31 0x00005631fbc18ff3 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>)
75
at vl.c:4647
76
77
- A closer examination shows that s->io_q.in_flight appears to have
78
gone backwards:
79
80
(gdb) frame 7
81
#7 0x00005631fbfc523c in qemu_laio_process_completions (s=s@entry=0x7fdfc0297670)
82
at block/linux-aio.c:222
83
222     qemu_laio_process_completion(laiocb);
84
(gdb) p s
85
$2 = (LinuxAioState *) 0x7fdfc0297670
86
(gdb) p *s
87
$3 = {aio_context = 0x5631fde0e660, ctx = 0x7fdfeb43b000, e = {rfd = 33, wfd = 33}, io_q = {plugged = 0,
88
in_queue = 0, in_flight = 4294967280, blocked = false, pending = {sqh_first = 0x0,
89
sqh_last = 0x7fdfc0297698}}, completion_bh = 0x7fdfc0280ef0, event_idx = 21, event_max = 241}
90
(gdb) p/x s->io_q.in_flight
91
$4 = 0xfffffff0
92
93
Signed-off-by: Sergio Lopez <slp@redhat.com>
94
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
95
---
96
block/linux-aio.c | 2 +-
97
1 file changed, 1 insertion(+), 1 deletion(-)
98
99
diff --git a/block/linux-aio.c b/block/linux-aio.c
100
index XXXXXXX..XXXXXXX 100644
101
--- a/block/linux-aio.c
102
+++ b/block/linux-aio.c
103
@@ -XXX,XX +XXX,XX @@ static void qemu_laio_process_completions(LinuxAioState *s)
104
105
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
106
{
107
+ aio_context_acquire(s->aio_context);
108
qemu_laio_process_completions(s);
109
110
- aio_context_acquire(s->aio_context);
111
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
112
ioq_submit(s);
113
}
114
--
115
2.17.1
116
117
diff view generated by jsdifflib
Deleted patch
1
From: Alberto Garcia <berto@igalia.com>
2
1
3
When a block device is opened with BDRV_O_SNAPSHOT and the
4
bdrv_append_temp_snapshot() call fails then the error code path tries
5
to unref the already destroyed 'options' QDict.
6
7
This can be reproduced easily by setting TMPDIR to a location where
8
the QEMU process can't write:
9
10
$ TMPDIR=/nonexistent $QEMU -drive driver=null-co,snapshot=on
11
12
Signed-off-by: Alberto Garcia <berto@igalia.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
15
block.c | 1 +
16
1 file changed, 1 insertion(+)
17
18
diff --git a/block.c b/block.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/block.c
21
+++ b/block.c
22
@@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
23
bdrv_parent_cb_change_media(bs, true);
24
25
qobject_unref(options);
26
+ options = NULL;
27
28
/* For snapshot=on, create a temporary qcow2 overlay. bs points to the
29
* temporary snapshot afterwards. */
30
--
31
2.17.1
32
33
diff view generated by jsdifflib
Deleted patch
1
From: Alberto Garcia <berto@igalia.com>
2
1
3
We just fixed a bug that was causing a use-after-free when QEMU was
4
unable to create a temporary snapshot. This is a test case for this
5
scenario.
6
7
Signed-off-by: Alberto Garcia <berto@igalia.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
10
tests/qemu-iotests/051 | 3 +++
11
tests/qemu-iotests/051.out | 3 +++
12
tests/qemu-iotests/051.pc.out | 3 +++
13
3 files changed, 9 insertions(+)
14
15
diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
16
index XXXXXXX..XXXXXXX 100755
17
--- a/tests/qemu-iotests/051
18
+++ b/tests/qemu-iotests/051
19
@@ -XXX,XX +XXX,XX @@ printf %b "qemu-io $device_id \"write -P 0x33 0 4k\"\ncommit $device_id\n" |
20
21
$QEMU_IO -c "read -P 0x33 0 4k" "$TEST_IMG" | _filter_qemu_io
22
23
+# Using snapshot=on with a non-existent TMPDIR
24
+TMPDIR=/nonexistent run_qemu -drive driver=null-co,snapshot=on
25
+
26
# success, all done
27
echo "*** done"
28
rm -f $seq.full
29
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
30
index XXXXXXX..XXXXXXX 100644
31
--- a/tests/qemu-iotests/051.out
32
+++ b/tests/qemu-iotests/051.out
33
@@ -XXX,XX +XXX,XX @@ wrote 4096/4096 bytes at offset 0
34
35
read 4096/4096 bytes at offset 0
36
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
37
+Testing: -drive driver=null-co,snapshot=on
38
+QEMU_PROG: -drive driver=null-co,snapshot=on: Could not get temporary filename: No such file or directory
39
+
40
*** done
41
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
42
index XXXXXXX..XXXXXXX 100644
43
--- a/tests/qemu-iotests/051.pc.out
44
+++ b/tests/qemu-iotests/051.pc.out
45
@@ -XXX,XX +XXX,XX @@ wrote 4096/4096 bytes at offset 0
46
47
read 4096/4096 bytes at offset 0
48
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
49
+Testing: -drive driver=null-co,snapshot=on
50
+QEMU_PROG: -drive driver=null-co,snapshot=on: Could not get temporary filename: No such file or directory
51
+
52
*** done
53
--
54
2.17.1
55
56
diff view generated by jsdifflib
1
From: Kevin Wolf <kwolf@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
When draining a block node, we recurse to its parent and for subtree
3
Any thread that is not a iothread returns NULL for qemu_get_current_aio_context().
4
drains also to its children. A single AIO_WAIT_WHILE() is then used to
4
As a result, it would also return true for
5
wait for bdrv_drain_poll() to become true, which depends on all of the
5
in_aio_context_home_thread(qemu_get_aio_context()), causing
6
nodes we recursed to. However, if the respective child or parent becomes
6
AIO_WAIT_WHILE to invoke aio_poll() directly. This is incorrect
7
quiescent and calls bdrv_wakeup(), only the AioWait of the child/parent
7
if the BQL is not held, because aio_poll() does not expect to
8
is checked, while AIO_WAIT_WHILE() depends on the AioWait of the
8
run concurrently from multiple threads, and it can actually
9
original node.
9
happen when savevm writes to the vmstate file from the
10
migration thread.
10
11
11
Fix this by using a single AioWait for all callers of AIO_WAIT_WHILE().
12
Therefore, restrict in_aio_context_home_thread to return true
13
for the main AioContext only if the BQL is held.
12
14
13
This may mean that the draining thread gets a few more unnecessary
15
The function is moved to aio-wait.h because it is mostly used
14
wakeups because an unrelated operation got completed, but we already
16
there and to avoid a circular reference between main-loop.h
15
wake it up when something _could_ have changed rather than only if it
17
and block/aio.h.
16
has certainly changed.
17
18
18
Apart from that, drain is a slow path anyway. In theory it would be
19
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
19
possible to use wakeups more selectively and still correctly, but the
20
Message-Id: <20200407140746.8041-5-pbonzini@redhat.com>
20
gains are likely not worth the additional complexity. In fact, this
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
21
patch is a nice simplification for some places in the code.
22
23
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
24
Reviewed-by: Eric Blake <eblake@redhat.com>
25
Reviewed-by: Max Reitz <mreitz@redhat.com>
26
---
22
---
27
include/block/aio-wait.h | 22 +++++++++++-----------
23
include/block/aio-wait.h | 22 ++++++++++++++++++++++
28
include/block/block.h | 6 +-----
24
include/block/aio.h | 29 ++++++++++-------------------
29
include/block/block_int.h | 3 ---
25
2 files changed, 32 insertions(+), 19 deletions(-)
30
include/block/blockjob.h | 10 ----------
31
block.c | 5 -----
32
block/block-backend.c | 11 ++++-------
33
block/io.c | 7 ++-----
34
blockjob.c | 13 +------------
35
job.c | 3 +--
36
util/aio-wait.c | 11 ++++++-----
37
10 files changed, 26 insertions(+), 65 deletions(-)
38
26
39
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
27
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
40
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
41
--- a/include/block/aio-wait.h
29
--- a/include/block/aio-wait.h
42
+++ b/include/block/aio-wait.h
30
+++ b/include/block/aio-wait.h
43
@@ -XXX,XX +XXX,XX @@
31
@@ -XXX,XX +XXX,XX @@
32
#define QEMU_AIO_WAIT_H
33
34
#include "block/aio.h"
35
+#include "qemu/main-loop.h"
36
44
/**
37
/**
45
* AioWait:
38
* AioWait:
46
*
39
@@ -XXX,XX +XXX,XX @@ void aio_wait_kick(void);
47
- * An object that facilitates synchronous waiting on a condition. The main
40
*/
48
- * loop can wait on an operation running in an IOThread as follows:
41
void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
49
+ * An object that facilitates synchronous waiting on a condition. A single
42
50
+ * global AioWait object (global_aio_wait) is used internally.
43
+/**
44
+ * in_aio_context_home_thread:
45
+ * @ctx: the aio context
51
+ *
46
+ *
52
+ * The main loop can wait on an operation running in an IOThread as follows:
47
+ * Return whether we are running in the thread that normally runs @ctx. Note
53
*
48
+ * that acquiring/releasing ctx does not affect the outcome, each AioContext
54
- * AioWait *wait = ...;
49
+ * still only has one home thread that is responsible for running it.
55
* AioContext *ctx = ...;
50
+ */
56
* MyWork work = { .done = false };
51
+static inline bool in_aio_context_home_thread(AioContext *ctx)
57
* schedule_my_work_in_iothread(ctx, &work);
52
+{
58
- * AIO_WAIT_WHILE(wait, ctx, !work.done);
53
+ if (ctx == qemu_get_current_aio_context()) {
59
+ * AIO_WAIT_WHILE(ctx, !work.done);
54
+ return true;
60
*
55
+ }
61
* The IOThread must call aio_wait_kick() to notify the main loop when
56
+
62
* work.done changes:
57
+ if (ctx == qemu_get_aio_context()) {
63
@@ -XXX,XX +XXX,XX @@
58
+ return qemu_mutex_iothread_locked();
64
* {
59
+ } else {
65
* ...
60
+ return false;
66
* work.done = true;
61
+ }
67
- * aio_wait_kick(wait);
62
+}
68
+ * aio_wait_kick();
63
+
69
* }
64
#endif /* QEMU_AIO_WAIT_H */
65
diff --git a/include/block/aio.h b/include/block/aio.h
66
index XXXXXXX..XXXXXXX 100644
67
--- a/include/block/aio.h
68
+++ b/include/block/aio.h
69
@@ -XXX,XX +XXX,XX @@ struct AioContext {
70
AioHandlerList deleted_aio_handlers;
71
72
/* Used to avoid unnecessary event_notifier_set calls in aio_notify;
73
- * accessed with atomic primitives. If this field is 0, everything
74
- * (file descriptors, bottom halves, timers) will be re-evaluated
75
- * before the next blocking poll(), thus the event_notifier_set call
76
- * can be skipped. If it is non-zero, you may need to wake up a
77
- * concurrent aio_poll or the glib main event loop, making
78
- * event_notifier_set necessary.
79
+ * only written from the AioContext home thread, or under the BQL in
80
+ * the case of the main AioContext. However, it is read from any
81
+ * thread so it is still accessed with atomic primitives.
82
+ *
83
+ * If this field is 0, everything (file descriptors, bottom halves,
84
+ * timers) will be re-evaluated before the next blocking poll() or
85
+ * io_uring wait; therefore, the event_notifier_set call can be
86
+ * skipped. If it is non-zero, you may need to wake up a concurrent
87
+ * aio_poll or the glib main event loop, making event_notifier_set
88
+ * necessary.
89
*
90
* Bit 0 is reserved for GSource usage of the AioContext, and is 1
91
* between a call to aio_ctx_prepare and the next call to aio_ctx_check.
92
@@ -XXX,XX +XXX,XX @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
70
*/
93
*/
71
typedef struct {
94
AioContext *qemu_get_current_aio_context(void);
72
@@ -XXX,XX +XXX,XX @@ typedef struct {
95
73
unsigned num_waiters;
96
-/**
74
} AioWait;
97
- * in_aio_context_home_thread:
75
98
- * @ctx: the aio context
76
+extern AioWait global_aio_wait;
77
+
78
/**
79
* AIO_WAIT_WHILE:
80
- * @wait: the aio wait object
81
* @ctx: the aio context, or NULL if multiple aio contexts (for which the
82
* caller does not hold a lock) are involved in the polling condition.
83
* @cond: wait while this conditional expression is true
84
@@ -XXX,XX +XXX,XX @@ typedef struct {
85
* wait on conditions between two IOThreads since that could lead to deadlock,
86
* go via the main loop instead.
87
*/
88
-#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \
89
+#define AIO_WAIT_WHILE(ctx, cond) ({ \
90
bool waited_ = false; \
91
- AioWait *wait_ = (wait); \
92
+ AioWait *wait_ = &global_aio_wait; \
93
AioContext *ctx_ = (ctx); \
94
/* Increment wait_->num_waiters before evaluating cond. */ \
95
atomic_inc(&wait_->num_waiters); \
96
@@ -XXX,XX +XXX,XX @@ typedef struct {
97
98
/**
99
* aio_wait_kick:
100
- * @wait: the aio wait object that should re-evaluate its condition
101
- *
99
- *
102
* Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During
100
- * Return whether we are running in the thread that normally runs @ctx. Note
103
* synchronous operations performed in an IOThread, the main thread lets the
101
- * that acquiring/releasing ctx does not affect the outcome, each AioContext
104
* IOThread's event loop run, waiting for the operation to complete. A
102
- * still only has one home thread that is responsible for running it.
105
* aio_wait_kick() call will wake up the main thread.
106
*/
107
-void aio_wait_kick(AioWait *wait);
108
+void aio_wait_kick(void);
109
110
/**
111
* aio_wait_bh_oneshot:
112
diff --git a/include/block/block.h b/include/block/block.h
113
index XXXXXXX..XXXXXXX 100644
114
--- a/include/block/block.h
115
+++ b/include/block/block.h
116
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void);
117
void bdrv_drain_all_end(void);
118
void bdrv_drain_all(void);
119
120
-/* Returns NULL when bs == NULL */
121
-AioWait *bdrv_get_aio_wait(BlockDriverState *bs);
122
-
123
#define BDRV_POLL_WHILE(bs, cond) ({ \
124
BlockDriverState *bs_ = (bs); \
125
- AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \
126
- bdrv_get_aio_context(bs_), \
127
+ AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
128
cond); })
129
130
int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes);
131
diff --git a/include/block/block_int.h b/include/block/block_int.h
132
index XXXXXXX..XXXXXXX 100644
133
--- a/include/block/block_int.h
134
+++ b/include/block/block_int.h
135
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
136
unsigned int in_flight;
137
unsigned int serialising_in_flight;
138
139
- /* Kicked to signal main loop when a request completes. */
140
- AioWait wait;
141
-
142
/* counter for nested bdrv_io_plug.
143
* Accessed with atomic ops.
144
*/
145
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
146
index XXXXXXX..XXXXXXX 100644
147
--- a/include/block/blockjob.h
148
+++ b/include/block/blockjob.h
149
@@ -XXX,XX +XXX,XX @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
150
*/
151
void block_job_remove_all_bdrv(BlockJob *job);
152
153
-/**
154
- * block_job_wakeup_all_bdrv:
155
- * @job: The block job
156
- *
157
- * Calls bdrv_wakeup() for all BlockDriverStates that have been added to the
158
- * job. This function is to be called whenever child_job_drained_poll() would
159
- * go from true to false to notify waiting drain requests.
160
- */
103
- */
161
-void block_job_wakeup_all_bdrv(BlockJob *job);
104
-static inline bool in_aio_context_home_thread(AioContext *ctx)
105
-{
106
- return ctx == qemu_get_current_aio_context();
107
-}
162
-
108
-
163
/**
109
/**
164
* block_job_set_speed:
110
* aio_context_setup:
165
* @job: The job to set the speed for.
111
* @ctx: the aio context
166
diff --git a/block.c b/block.c
167
index XXXXXXX..XXXXXXX 100644
168
--- a/block.c
169
+++ b/block.c
170
@@ -XXX,XX +XXX,XX @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
171
return bs ? bs->aio_context : qemu_get_aio_context();
172
}
173
174
-AioWait *bdrv_get_aio_wait(BlockDriverState *bs)
175
-{
176
- return bs ? &bs->wait : NULL;
177
-}
178
-
179
void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
180
{
181
aio_co_enter(bdrv_get_aio_context(bs), co);
182
diff --git a/block/block-backend.c b/block/block-backend.c
183
index XXXXXXX..XXXXXXX 100644
184
--- a/block/block-backend.c
185
+++ b/block/block-backend.c
186
@@ -XXX,XX +XXX,XX @@ struct BlockBackend {
187
* Accessed with atomic ops.
188
*/
189
unsigned int in_flight;
190
- AioWait wait;
191
};
192
193
typedef struct BlockBackendAIOCB {
194
@@ -XXX,XX +XXX,XX @@ static void blk_inc_in_flight(BlockBackend *blk)
195
static void blk_dec_in_flight(BlockBackend *blk)
196
{
197
atomic_dec(&blk->in_flight);
198
- aio_wait_kick(&blk->wait);
199
+ aio_wait_kick();
200
}
201
202
static void error_callback_bh(void *opaque)
203
@@ -XXX,XX +XXX,XX @@ void blk_drain(BlockBackend *blk)
204
}
205
206
/* We may have -ENOMEDIUM completions in flight */
207
- AIO_WAIT_WHILE(&blk->wait,
208
- blk_get_aio_context(blk),
209
- atomic_mb_read(&blk->in_flight) > 0);
210
+ AIO_WAIT_WHILE(blk_get_aio_context(blk),
211
+ atomic_mb_read(&blk->in_flight) > 0);
212
213
if (bs) {
214
bdrv_drained_end(bs);
215
@@ -XXX,XX +XXX,XX @@ void blk_drain_all(void)
216
aio_context_acquire(ctx);
217
218
/* We may have -ENOMEDIUM completions in flight */
219
- AIO_WAIT_WHILE(&blk->wait, ctx,
220
- atomic_mb_read(&blk->in_flight) > 0);
221
+ AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > 0);
222
223
aio_context_release(ctx);
224
}
225
diff --git a/block/io.c b/block/io.c
226
index XXXXXXX..XXXXXXX 100644
227
--- a/block/io.c
228
+++ b/block/io.c
229
@@ -XXX,XX +XXX,XX @@
230
/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
231
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
232
233
-static AioWait drain_all_aio_wait;
234
-
235
static void bdrv_parent_cb_resize(BlockDriverState *bs);
236
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
237
int64_t offset, int bytes, BdrvRequestFlags flags);
238
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
239
}
240
241
/* Now poll the in-flight requests */
242
- AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
243
+ AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
244
245
while ((bs = bdrv_next_all_states(bs))) {
246
bdrv_drain_assert_idle(bs);
247
@@ -XXX,XX +XXX,XX @@ void bdrv_inc_in_flight(BlockDriverState *bs)
248
249
void bdrv_wakeup(BlockDriverState *bs)
250
{
251
- aio_wait_kick(bdrv_get_aio_wait(bs));
252
- aio_wait_kick(&drain_all_aio_wait);
253
+ aio_wait_kick();
254
}
255
256
void bdrv_dec_in_flight(BlockDriverState *bs)
257
diff --git a/blockjob.c b/blockjob.c
258
index XXXXXXX..XXXXXXX 100644
259
--- a/blockjob.c
260
+++ b/blockjob.c
261
@@ -XXX,XX +XXX,XX @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
262
return 0;
263
}
264
265
-void block_job_wakeup_all_bdrv(BlockJob *job)
266
-{
267
- GSList *l;
268
-
269
- for (l = job->nodes; l; l = l->next) {
270
- BdrvChild *c = l->data;
271
- bdrv_wakeup(c->bs);
272
- }
273
-}
274
-
275
static void block_job_on_idle(Notifier *n, void *opaque)
276
{
277
- BlockJob *job = opaque;
278
- block_job_wakeup_all_bdrv(job);
279
+ aio_wait_kick();
280
}
281
282
bool block_job_is_internal(BlockJob *job)
283
diff --git a/job.c b/job.c
284
index XXXXXXX..XXXXXXX 100644
285
--- a/job.c
286
+++ b/job.c
287
@@ -XXX,XX +XXX,XX @@ void job_complete(Job *job, Error **errp)
288
int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
289
{
290
Error *local_err = NULL;
291
- AioWait dummy_wait = {};
292
int ret;
293
294
job_ref(job);
295
@@ -XXX,XX +XXX,XX @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
296
return -EBUSY;
297
}
298
299
- AIO_WAIT_WHILE(&dummy_wait, job->aio_context,
300
+ AIO_WAIT_WHILE(job->aio_context,
301
(job_drain(job), !job_is_completed(job)));
302
303
ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret;
304
diff --git a/util/aio-wait.c b/util/aio-wait.c
305
index XXXXXXX..XXXXXXX 100644
306
--- a/util/aio-wait.c
307
+++ b/util/aio-wait.c
308
@@ -XXX,XX +XXX,XX @@
309
#include "qemu/main-loop.h"
310
#include "block/aio-wait.h"
311
312
+AioWait global_aio_wait;
313
+
314
static void dummy_bh_cb(void *opaque)
315
{
316
/* The point is to make AIO_WAIT_WHILE()'s aio_poll() return */
317
}
318
319
-void aio_wait_kick(AioWait *wait)
320
+void aio_wait_kick(void)
321
{
322
/* The barrier (or an atomic op) is in the caller. */
323
- if (atomic_read(&wait->num_waiters)) {
324
+ if (atomic_read(&global_aio_wait.num_waiters)) {
325
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
326
}
327
}
328
329
typedef struct {
330
- AioWait wait;
331
bool done;
332
QEMUBHFunc *cb;
333
void *opaque;
334
@@ -XXX,XX +XXX,XX @@ static void aio_wait_bh(void *opaque)
335
data->cb(data->opaque);
336
337
data->done = true;
338
- aio_wait_kick(&data->wait);
339
+ aio_wait_kick();
340
}
341
342
void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
343
@@ -XXX,XX +XXX,XX @@ void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
344
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
345
346
aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data);
347
- AIO_WAIT_WHILE(&data.wait, ctx, !data.done);
348
+ AIO_WAIT_WHILE(ctx, !data.done);
349
}
350
--
112
--
351
2.17.1
113
2.25.1
352
114
353
diff view generated by jsdifflib
1
From: Sergio Lopez <slp@redhat.com>
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
2
3
AIO Coroutines shouldn't by managed by an AioContext different than the
3
When using C11 atomics, non-seqcst reads and writes do not participate
4
one assigned when they are created. aio_co_enter avoids entering a
4
in the total order of seqcst operations. In util/async.c and util/aio-posix.c,
5
coroutine from a different AioContext, calling aio_co_schedule instead.
5
in particular, the pattern that we use
6
6
7
Scheduled coroutines are then entered by co_schedule_bh_cb using
7
write ctx->notify_me write bh->scheduled
8
qemu_coroutine_enter, which just calls qemu_aio_coroutine_enter with the
8
read bh->scheduled read ctx->notify_me
9
current AioContext obtained with qemu_get_current_aio_context.
9
if !bh->scheduled, sleep if ctx->notify_me, notify
10
Eventually, co->ctx will be set to the AioContext passed as an argument
11
to qemu_aio_coroutine_enter.
12
10
13
This means that, if an IO Thread's AioConext is being processed by the
11
needs to use seqcst operations for both the write and the read. In
14
Main Thread (due to aio_poll being called with a BDS AioContext, as it
12
general this is something that we do not want, because there can be
15
happens in AIO_WAIT_WHILE among other places), the AioContext from some
13
many sources that are polled in addition to bottom halves. The
16
coroutines may be wrongly replaced with the one from the Main Thread.
14
alternative is to place a seqcst memory barrier between the write
15
and the read. This also comes with a disadvantage, in that the
16
memory barrier is implicit on strongly-ordered architectures and
17
it wastes a few dozen clock cycles.
17
18
18
This is the root cause behind some crashes, mainly triggered by the
19
Fortunately, ctx->notify_me is never written concurrently by two
19
drain code at block/io.c. The most common are these abort and failed
20
threads, so we can assert that and relax the writes to ctx->notify_me.
20
assertion:
21
The resulting solution works and performs well on both aarch64 and x86.
21
22
22
util/async.c:aio_co_schedule
23
Note that the atomic_set/atomic_read combination is not an atomic
23
456 if (scheduled) {
24
read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED;
24
457 fprintf(stderr,
25
on x86, ATOMIC_RELAXED compiles to a locked operation.
25
458 "%s: Co-routine was already scheduled in '%s'\n",
26
459 __func__, scheduled);
27
460 abort();
28
461 }
29
26
30
util/qemu-coroutine-lock.c:
27
Analyzed-by: Ying Fang <fangying1@huawei.com>
31
286 assert(mutex->holder == self);
28
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
29
Tested-by: Ying Fang <fangying1@huawei.com>
30
Message-Id: <20200407140746.8041-6-pbonzini@redhat.com>
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
32
---
33
util/aio-posix.c | 16 ++++++++++++++--
34
util/aio-win32.c | 17 ++++++++++++++---
35
util/async.c | 16 ++++++++++++----
36
3 files changed, 40 insertions(+), 9 deletions(-)
32
37
33
But it's also known to cause random errors at different locations, and
38
diff --git a/util/aio-posix.c b/util/aio-posix.c
34
even SIGSEGV with broken coroutine backtraces.
39
index XXXXXXX..XXXXXXX 100644
35
40
--- a/util/aio-posix.c
36
By using qemu_aio_coroutine_enter directly in co_schedule_bh_cb, we can
41
+++ b/util/aio-posix.c
37
pass the correct AioContext as an argument, making sure co->ctx is not
42
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
38
wrongly altered.
43
int64_t timeout;
39
44
int64_t start = 0;
40
Signed-off-by: Sergio Lopez <slp@redhat.com>
45
41
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
46
+ /*
42
---
47
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
43
util/async.c | 2 +-
48
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
44
1 file changed, 1 insertion(+), 1 deletion(-)
49
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
45
50
+ */
51
assert(in_aio_context_home_thread(ctx));
52
53
/* aio_notify can avoid the expensive event_notifier_set if
54
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
55
* so disable the optimization now.
56
*/
57
if (blocking) {
58
- atomic_add(&ctx->notify_me, 2);
59
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
60
+ /*
61
+ * Write ctx->notify_me before computing the timeout
62
+ * (reading bottom half flags, etc.). Pairs with
63
+ * smp_mb in aio_notify().
64
+ */
65
+ smp_mb();
66
}
67
68
qemu_lockcnt_inc(&ctx->list_lock);
69
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
70
}
71
72
if (blocking) {
73
- atomic_sub(&ctx->notify_me, 2);
74
+ /* Finish the poll before clearing the flag. */
75
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
76
aio_notify_accept(ctx);
77
}
78
79
diff --git a/util/aio-win32.c b/util/aio-win32.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/util/aio-win32.c
82
+++ b/util/aio-win32.c
83
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
84
int count;
85
int timeout;
86
87
+ /*
88
+ * There cannot be two concurrent aio_poll calls for the same AioContext (or
89
+ * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
90
+ * We rely on this below to avoid slow locked accesses to ctx->notify_me.
91
+ */
92
+ assert(in_aio_context_home_thread(ctx));
93
progress = false;
94
95
/* aio_notify can avoid the expensive event_notifier_set if
96
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
97
* so disable the optimization now.
98
*/
99
if (blocking) {
100
- atomic_add(&ctx->notify_me, 2);
101
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
102
+ /*
103
+ * Write ctx->notify_me before computing the timeout
104
+ * (reading bottom half flags, etc.). Pairs with
105
+ * smp_mb in aio_notify().
106
+ */
107
+ smp_mb();
108
}
109
110
qemu_lockcnt_inc(&ctx->list_lock);
111
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
112
ret = WaitForMultipleObjects(count, events, FALSE, timeout);
113
if (blocking) {
114
assert(first);
115
- assert(in_aio_context_home_thread(ctx));
116
- atomic_sub(&ctx->notify_me, 2);
117
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
118
aio_notify_accept(ctx);
119
}
120
46
diff --git a/util/async.c b/util/async.c
121
diff --git a/util/async.c b/util/async.c
47
index XXXXXXX..XXXXXXX 100644
122
index XXXXXXX..XXXXXXX 100644
48
--- a/util/async.c
123
--- a/util/async.c
49
+++ b/util/async.c
124
+++ b/util/async.c
50
@@ -XXX,XX +XXX,XX @@ static void co_schedule_bh_cb(void *opaque)
125
@@ -XXX,XX +XXX,XX @@ aio_ctx_prepare(GSource *source, gint *timeout)
51
126
{
52
/* Protected by write barrier in qemu_aio_coroutine_enter */
127
AioContext *ctx = (AioContext *) source;
53
atomic_set(&co->scheduled, NULL);
128
54
- qemu_coroutine_enter(co);
129
- atomic_or(&ctx->notify_me, 1);
55
+ qemu_aio_coroutine_enter(ctx, co);
130
+ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1);
56
aio_context_release(ctx);
131
+
132
+ /*
133
+ * Write ctx->notify_me before computing the timeout
134
+ * (reading bottom half flags, etc.). Pairs with
135
+ * smp_mb in aio_notify().
136
+ */
137
+ smp_mb();
138
139
/* We assume there is no timeout already supplied */
140
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
141
@@ -XXX,XX +XXX,XX @@ aio_ctx_check(GSource *source)
142
QEMUBH *bh;
143
BHListSlice *s;
144
145
- atomic_and(&ctx->notify_me, ~1);
146
+ /* Finish computing the timeout before clearing the flag. */
147
+ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1);
148
aio_notify_accept(ctx);
149
150
QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
151
@@ -XXX,XX +XXX,XX @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
152
void aio_notify(AioContext *ctx)
153
{
154
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
155
- * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
156
+ * with smp_mb in aio_ctx_prepare or aio_poll.
157
*/
158
smp_mb();
159
- if (ctx->notify_me) {
160
+ if (atomic_read(&ctx->notify_me)) {
161
event_notifier_set(&ctx->notifier);
162
atomic_mb_set(&ctx->notified, true);
57
}
163
}
58
}
59
--
164
--
60
2.17.1
165
2.25.1
61
166
62
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
All callers have acquired ctx already. Doing that again results in
4
aio_poll() hang. This fixes the problem that a BDRV_POLL_WHILE() in the
5
callback cannot make progress because ctx is recursively locked, for
6
example, when drive-backup finishes.
7
8
There are two callers of job_finalize():
9
10
fam@lemon:~/work/qemu [master]$ git grep -w -A1 '^\s*job_finalize'
11
blockdev.c: job_finalize(&job->job, errp);
12
blockdev.c- aio_context_release(aio_context);
13
--
14
job-qmp.c: job_finalize(job, errp);
15
job-qmp.c- aio_context_release(aio_context);
16
--
17
tests/test-blockjob.c: job_finalize(&job->job, &error_abort);
18
tests/test-blockjob.c- assert(job->job.status == JOB_STATUS_CONCLUDED);
19
20
Ignoring the test, it's easy to see both callers to job_finalize (and
21
job_do_finalize) have acquired the context.
22
23
Cc: qemu-stable@nongnu.org
24
Reported-by: Gu Nini <ngu@redhat.com>
25
Reviewed-by: Eric Blake <eblake@redhat.com>
26
Signed-off-by: Fam Zheng <famz@redhat.com>
27
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
28
---
29
job.c | 18 +++++-------------
30
1 file changed, 5 insertions(+), 13 deletions(-)
31
32
diff --git a/job.c b/job.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/job.c
35
+++ b/job.c
36
@@ -XXX,XX +XXX,XX @@ static void job_txn_del_job(Job *job)
37
}
38
}
39
40
-static int job_txn_apply(JobTxn *txn, int fn(Job *), bool lock)
41
+static int job_txn_apply(JobTxn *txn, int fn(Job *))
42
{
43
- AioContext *ctx;
44
Job *job, *next;
45
int rc = 0;
46
47
QLIST_FOREACH_SAFE(job, &txn->jobs, txn_list, next) {
48
- if (lock) {
49
- ctx = job->aio_context;
50
- aio_context_acquire(ctx);
51
- }
52
rc = fn(job);
53
- if (lock) {
54
- aio_context_release(ctx);
55
- }
56
if (rc) {
57
break;
58
}
59
@@ -XXX,XX +XXX,XX @@ static void job_do_finalize(Job *job)
60
assert(job && job->txn);
61
62
/* prepare the transaction to complete */
63
- rc = job_txn_apply(job->txn, job_prepare, true);
64
+ rc = job_txn_apply(job->txn, job_prepare);
65
if (rc) {
66
job_completed_txn_abort(job);
67
} else {
68
- job_txn_apply(job->txn, job_finalize_single, true);
69
+ job_txn_apply(job->txn, job_finalize_single);
70
}
71
}
72
73
@@ -XXX,XX +XXX,XX @@ static void job_completed_txn_success(Job *job)
74
assert(other_job->ret == 0);
75
}
76
77
- job_txn_apply(txn, job_transition_to_pending, false);
78
+ job_txn_apply(txn, job_transition_to_pending);
79
80
/* If no jobs need manual finalization, automatically do so */
81
- if (job_txn_apply(txn, job_needs_finalize, false) == 0) {
82
+ if (job_txn_apply(txn, job_needs_finalize) == 0) {
83
job_do_finalize(job);
84
}
85
}
86
--
87
2.17.1
88
89
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
job_completed() had a problem with double locking that was recently
4
fixed independently by two different commits:
5
6
"job: Fix nested aio_poll() hanging in job_txn_apply"
7
"jobs: add exit shim"
8
9
One fix removed the first aio_context_acquire(), the other fix removed
10
the other one. Now we have a bug again and the code is run without any
11
locking.
12
13
Add it back in one of the places.
14
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
Reviewed-by: Max Reitz <mreitz@redhat.com>
17
Reviewed-by: John Snow <jsnow@redhat.com>
18
---
19
job.c | 4 ++++
20
1 file changed, 4 insertions(+)
21
22
diff --git a/job.c b/job.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/job.c
25
+++ b/job.c
26
@@ -XXX,XX +XXX,XX @@ static void job_completed(Job *job)
27
static void job_exit(void *opaque)
28
{
29
Job *job = (Job *)opaque;
30
+ AioContext *ctx = job->aio_context;
31
+
32
+ aio_context_acquire(ctx);
33
job_completed(job);
34
+ aio_context_release(ctx);
35
}
36
37
/**
38
--
39
2.17.1
40
41
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
In the context of draining a BDS, the .drained_poll callback of block
4
jobs is called. If this returns true (i.e. there is still some activity
5
pending), the drain operation may call aio_poll() with blocking=true to
6
wait for completion.
7
8
As soon as the pending activity is completed and the job finally arrives
9
in a quiescent state (i.e. its coroutine either yields with busy=false
10
or terminates), the block job must notify the aio_poll() loop to wake
11
up, otherwise we get a deadlock if both are running in different
12
threads.
13
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Reviewed-by: Fam Zheng <famz@redhat.com>
16
Reviewed-by: Max Reitz <mreitz@redhat.com>
17
---
18
include/block/blockjob.h | 13 +++++++++++++
19
include/qemu/job.h | 3 +++
20
blockjob.c | 18 ++++++++++++++++++
21
job.c | 7 +++++++
22
4 files changed, 41 insertions(+)
23
24
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/block/blockjob.h
27
+++ b/include/block/blockjob.h
28
@@ -XXX,XX +XXX,XX @@ typedef struct BlockJob {
29
/** Called when the job transitions to READY */
30
Notifier ready_notifier;
31
32
+ /** Called when the job coroutine yields or terminates */
33
+ Notifier idle_notifier;
34
+
35
/** BlockDriverStates that are involved in this block job */
36
GSList *nodes;
37
} BlockJob;
38
@@ -XXX,XX +XXX,XX @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
39
*/
40
void block_job_remove_all_bdrv(BlockJob *job);
41
42
+/**
43
+ * block_job_wakeup_all_bdrv:
44
+ * @job: The block job
45
+ *
46
+ * Calls bdrv_wakeup() for all BlockDriverStates that have been added to the
47
+ * job. This function is to be called whenever child_job_drained_poll() would
48
+ * go from true to false to notify waiting drain requests.
49
+ */
50
+void block_job_wakeup_all_bdrv(BlockJob *job);
51
+
52
/**
53
* block_job_set_speed:
54
* @job: The job to set the speed for.
55
diff --git a/include/qemu/job.h b/include/qemu/job.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/include/qemu/job.h
58
+++ b/include/qemu/job.h
59
@@ -XXX,XX +XXX,XX @@ typedef struct Job {
60
/** Notifiers called when the job transitions to READY */
61
NotifierList on_ready;
62
63
+ /** Notifiers called when the job coroutine yields or terminates */
64
+ NotifierList on_idle;
65
+
66
/** Element of the list of jobs */
67
QLIST_ENTRY(Job) job_list;
68
69
diff --git a/blockjob.c b/blockjob.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/blockjob.c
72
+++ b/blockjob.c
73
@@ -XXX,XX +XXX,XX @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
74
return 0;
75
}
76
77
+void block_job_wakeup_all_bdrv(BlockJob *job)
78
+{
79
+ GSList *l;
80
+
81
+ for (l = job->nodes; l; l = l->next) {
82
+ BdrvChild *c = l->data;
83
+ bdrv_wakeup(c->bs);
84
+ }
85
+}
86
+
87
+static void block_job_on_idle(Notifier *n, void *opaque)
88
+{
89
+ BlockJob *job = opaque;
90
+ block_job_wakeup_all_bdrv(job);
91
+}
92
+
93
bool block_job_is_internal(BlockJob *job)
94
{
95
return (job->job.id == NULL);
96
@@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
97
job->finalize_completed_notifier.notify = block_job_event_completed;
98
job->pending_notifier.notify = block_job_event_pending;
99
job->ready_notifier.notify = block_job_event_ready;
100
+ job->idle_notifier.notify = block_job_on_idle;
101
102
notifier_list_add(&job->job.on_finalize_cancelled,
103
&job->finalize_cancelled_notifier);
104
@@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
105
&job->finalize_completed_notifier);
106
notifier_list_add(&job->job.on_pending, &job->pending_notifier);
107
notifier_list_add(&job->job.on_ready, &job->ready_notifier);
108
+ notifier_list_add(&job->job.on_idle, &job->idle_notifier);
109
110
error_setg(&job->blocker, "block device is in use by block job: %s",
111
job_type_str(&job->job));
112
diff --git a/job.c b/job.c
113
index XXXXXXX..XXXXXXX 100644
114
--- a/job.c
115
+++ b/job.c
116
@@ -XXX,XX +XXX,XX @@ static void job_event_ready(Job *job)
117
notifier_list_notify(&job->on_ready, job);
118
}
119
120
+static void job_event_idle(Job *job)
121
+{
122
+ notifier_list_notify(&job->on_idle, job);
123
+}
124
+
125
void job_enter_cond(Job *job, bool(*fn)(Job *job))
126
{
127
if (!job_started(job)) {
128
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn job_do_yield(Job *job, uint64_t ns)
129
timer_mod(&job->sleep_timer, ns);
130
}
131
job->busy = false;
132
+ job_event_idle(job);
133
job_unlock();
134
qemu_coroutine_yield();
135
136
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn job_co_entry(void *opaque)
137
assert(job && job->driver && job->driver->run);
138
job_pause_point(job);
139
job->ret = job->driver->run(job, &job->err);
140
+ job_event_idle(job);
141
job->deferred_to_main_loop = true;
142
aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
143
}
144
--
145
2.17.1
146
147
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
Even if AIO_WAIT_WHILE() is called in the home context of the
4
AioContext, we still want to allow the condition to change depending on
5
other threads as long as they kick the AioWait. Specfically block jobs
6
can be running in an I/O thread and should then be able to kick a drain
7
in the main loop context.
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
---
12
include/block/aio-wait.h | 6 +++---
13
1 file changed, 3 insertions(+), 3 deletions(-)
14
15
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/block/aio-wait.h
18
+++ b/include/block/aio-wait.h
19
@@ -XXX,XX +XXX,XX @@ typedef struct {
20
bool waited_ = false; \
21
AioWait *wait_ = (wait); \
22
AioContext *ctx_ = (ctx); \
23
+ /* Increment wait_->num_waiters before evaluating cond. */ \
24
+ atomic_inc(&wait_->num_waiters); \
25
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
26
while ((cond)) { \
27
aio_poll(ctx_, true); \
28
@@ -XXX,XX +XXX,XX @@ typedef struct {
29
} else { \
30
assert(qemu_get_current_aio_context() == \
31
qemu_get_aio_context()); \
32
- /* Increment wait_->num_waiters before evaluating cond. */ \
33
- atomic_inc(&wait_->num_waiters); \
34
while ((cond)) { \
35
if (ctx_) { \
36
aio_context_release(ctx_); \
37
@@ -XXX,XX +XXX,XX @@ typedef struct {
38
} \
39
waited_ = true; \
40
} \
41
- atomic_dec(&wait_->num_waiters); \
42
} \
43
+ atomic_dec(&wait_->num_waiters); \
44
waited_; })
45
46
/**
47
--
48
2.17.1
49
50
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
This extends the existing drain test with a block job to include
4
variants where the block job runs in a different AioContext.
5
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Reviewed-by: Fam Zheng <famz@redhat.com>
8
---
9
tests/test-bdrv-drain.c | 92 ++++++++++++++++++++++++++++++++++++++---
10
1 file changed, 86 insertions(+), 6 deletions(-)
11
12
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tests/test-bdrv-drain.c
15
+++ b/tests/test-bdrv-drain.c
16
@@ -XXX,XX +XXX,XX @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
17
}
18
}
19
20
+static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs)
21
+{
22
+ if (drain_type != BDRV_DRAIN_ALL) {
23
+ aio_context_acquire(bdrv_get_aio_context(bs));
24
+ }
25
+ do_drain_begin(drain_type, bs);
26
+ if (drain_type != BDRV_DRAIN_ALL) {
27
+ aio_context_release(bdrv_get_aio_context(bs));
28
+ }
29
+}
30
+
31
+static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs)
32
+{
33
+ if (drain_type != BDRV_DRAIN_ALL) {
34
+ aio_context_acquire(bdrv_get_aio_context(bs));
35
+ }
36
+ do_drain_end(drain_type, bs);
37
+ if (drain_type != BDRV_DRAIN_ALL) {
38
+ aio_context_release(bdrv_get_aio_context(bs));
39
+ }
40
+}
41
+
42
static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
43
{
44
BlockBackend *blk;
45
@@ -XXX,XX +XXX,XX @@ BlockJobDriver test_job_driver = {
46
},
47
};
48
49
-static void test_blockjob_common(enum drain_type drain_type)
50
+static void test_blockjob_common(enum drain_type drain_type, bool use_iothread)
51
{
52
BlockBackend *blk_src, *blk_target;
53
BlockDriverState *src, *target;
54
BlockJob *job;
55
+ IOThread *iothread = NULL;
56
+ AioContext *ctx;
57
int ret;
58
59
src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
60
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
61
blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
62
blk_insert_bs(blk_src, src, &error_abort);
63
64
+ if (use_iothread) {
65
+ iothread = iothread_new();
66
+ ctx = iothread_get_aio_context(iothread);
67
+ blk_set_aio_context(blk_src, ctx);
68
+ } else {
69
+ ctx = qemu_get_aio_context();
70
+ }
71
+
72
target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
73
&error_abort);
74
blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
75
blk_insert_bs(blk_target, target, &error_abort);
76
77
+ aio_context_acquire(ctx);
78
job = block_job_create("job0", &test_job_driver, NULL, src, 0, BLK_PERM_ALL,
79
0, 0, NULL, NULL, &error_abort);
80
block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
81
job_start(&job->job);
82
+ aio_context_release(ctx);
83
84
g_assert_cmpint(job->job.pause_count, ==, 0);
85
g_assert_false(job->job.paused);
86
g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
87
88
- do_drain_begin(drain_type, src);
89
+ do_drain_begin_unlocked(drain_type, src);
90
91
if (drain_type == BDRV_DRAIN_ALL) {
92
/* bdrv_drain_all() drains both src and target */
93
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
94
g_assert_true(job->job.paused);
95
g_assert_false(job->job.busy); /* The job is paused */
96
97
- do_drain_end(drain_type, src);
98
+ do_drain_end_unlocked(drain_type, src);
99
+
100
+ if (use_iothread) {
101
+ /* paused is reset in the I/O thread, wait for it */
102
+ while (job->job.paused) {
103
+ aio_poll(qemu_get_aio_context(), false);
104
+ }
105
+ }
106
107
g_assert_cmpint(job->job.pause_count, ==, 0);
108
g_assert_false(job->job.paused);
109
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
110
111
do_drain_end(drain_type, target);
112
113
+ if (use_iothread) {
114
+ /* paused is reset in the I/O thread, wait for it */
115
+ while (job->job.paused) {
116
+ aio_poll(qemu_get_aio_context(), false);
117
+ }
118
+ }
119
+
120
g_assert_cmpint(job->job.pause_count, ==, 0);
121
g_assert_false(job->job.paused);
122
g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
123
124
+ aio_context_acquire(ctx);
125
ret = job_complete_sync(&job->job, &error_abort);
126
g_assert_cmpint(ret, ==, 0);
127
128
+ if (use_iothread) {
129
+ blk_set_aio_context(blk_src, qemu_get_aio_context());
130
+ }
131
+ aio_context_release(ctx);
132
+
133
blk_unref(blk_src);
134
blk_unref(blk_target);
135
bdrv_unref(src);
136
bdrv_unref(target);
137
+
138
+ if (iothread) {
139
+ iothread_join(iothread);
140
+ }
141
}
142
143
static void test_blockjob_drain_all(void)
144
{
145
- test_blockjob_common(BDRV_DRAIN_ALL);
146
+ test_blockjob_common(BDRV_DRAIN_ALL, false);
147
}
148
149
static void test_blockjob_drain(void)
150
{
151
- test_blockjob_common(BDRV_DRAIN);
152
+ test_blockjob_common(BDRV_DRAIN, false);
153
}
154
155
static void test_blockjob_drain_subtree(void)
156
{
157
- test_blockjob_common(BDRV_SUBTREE_DRAIN);
158
+ test_blockjob_common(BDRV_SUBTREE_DRAIN, false);
159
+}
160
+
161
+static void test_blockjob_iothread_drain_all(void)
162
+{
163
+ test_blockjob_common(BDRV_DRAIN_ALL, true);
164
+}
165
+
166
+static void test_blockjob_iothread_drain(void)
167
+{
168
+ test_blockjob_common(BDRV_DRAIN, true);
169
+}
170
+
171
+static void test_blockjob_iothread_drain_subtree(void)
172
+{
173
+ test_blockjob_common(BDRV_SUBTREE_DRAIN, true);
174
}
175
176
177
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
178
g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
179
test_blockjob_drain_subtree);
180
181
+ g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
182
+ test_blockjob_iothread_drain_all);
183
+ g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
184
+ test_blockjob_iothread_drain);
185
+ g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
186
+ test_blockjob_iothread_drain_subtree);
187
+
188
g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
189
g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
190
g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
191
--
192
2.17.1
193
194
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
All callers in QEMU proper hold the AioContext lock when calling
4
job_finish_sync(). test-blockjob should do the same when it calls the
5
function indirectly through job_cancel_sync().
6
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Reviewed-by: Fam Zheng <famz@redhat.com>
9
---
10
include/qemu/job.h | 6 ++++++
11
tests/test-blockjob.c | 6 ++++++
12
2 files changed, 12 insertions(+)
13
14
diff --git a/include/qemu/job.h b/include/qemu/job.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/qemu/job.h
17
+++ b/include/qemu/job.h
18
@@ -XXX,XX +XXX,XX @@ void job_user_cancel(Job *job, bool force, Error **errp);
19
*
20
* Returns the return value from the job if the job actually completed
21
* during the call, or -ECANCELED if it was canceled.
22
+ *
23
+ * Callers must hold the AioContext lock of job->aio_context.
24
*/
25
int job_cancel_sync(Job *job);
26
27
@@ -XXX,XX +XXX,XX @@ void job_cancel_sync_all(void);
28
* function).
29
*
30
* Returns the return value from the job.
31
+ *
32
+ * Callers must hold the AioContext lock of job->aio_context.
33
*/
34
int job_complete_sync(Job *job, Error **errp);
35
36
@@ -XXX,XX +XXX,XX @@ void job_dismiss(Job **job, Error **errp);
37
*
38
* Returns 0 if the job is successfully completed, -ECANCELED if the job was
39
* cancelled before completing, and -errno in other error cases.
40
+ *
41
+ * Callers must hold the AioContext lock of job->aio_context.
42
*/
43
int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp);
44
45
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tests/test-blockjob.c
48
+++ b/tests/test-blockjob.c
49
@@ -XXX,XX +XXX,XX @@ static void cancel_common(CancelJob *s)
50
BlockJob *job = &s->common;
51
BlockBackend *blk = s->blk;
52
JobStatus sts = job->job.status;
53
+ AioContext *ctx;
54
+
55
+ ctx = job->job.aio_context;
56
+ aio_context_acquire(ctx);
57
58
job_cancel_sync(&job->job);
59
if (sts != JOB_STATUS_CREATED && sts != JOB_STATUS_CONCLUDED) {
60
@@ -XXX,XX +XXX,XX @@ static void cancel_common(CancelJob *s)
61
assert(job->job.status == JOB_STATUS_NULL);
62
job_unref(&job->job);
63
destroy_blk(blk);
64
+
65
+ aio_context_release(ctx);
66
}
67
68
static void test_cancel_created(void)
69
--
70
2.17.1
71
72
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
job_finish_sync() needs to release the AioContext lock of the job before
4
calling aio_poll(). Otherwise, callbacks called by aio_poll() would
5
possibly take the lock a second time and run into a deadlock with a
6
nested AIO_WAIT_WHILE() call.
7
8
Also, job_drain() without aio_poll() isn't necessarily enough to make
9
progress on a job, it could depend on bottom halves to be executed.
10
11
Combine both open-coded while loops into a single AIO_WAIT_WHILE() call
12
that solves both of these problems.
13
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Reviewed-by: Fam Zheng <famz@redhat.com>
16
Reviewed-by: Max Reitz <mreitz@redhat.com>
17
---
18
job.c | 14 ++++++--------
19
1 file changed, 6 insertions(+), 8 deletions(-)
20
21
diff --git a/job.c b/job.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/job.c
24
+++ b/job.c
25
@@ -XXX,XX +XXX,XX @@
26
#include "qemu/job.h"
27
#include "qemu/id.h"
28
#include "qemu/main-loop.h"
29
+#include "block/aio-wait.h"
30
#include "trace-root.h"
31
#include "qapi/qapi-events-job.h"
32
33
@@ -XXX,XX +XXX,XX @@ void job_complete(Job *job, Error **errp)
34
int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
35
{
36
Error *local_err = NULL;
37
+ AioWait dummy_wait = {};
38
int ret;
39
40
job_ref(job);
41
@@ -XXX,XX +XXX,XX @@ int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp)
42
job_unref(job);
43
return -EBUSY;
44
}
45
- /* job_drain calls job_enter, and it should be enough to induce progress
46
- * until the job completes or moves to the main thread. */
47
- while (!job->deferred_to_main_loop && !job_is_completed(job)) {
48
- job_drain(job);
49
- }
50
- while (!job_is_completed(job)) {
51
- aio_poll(qemu_get_aio_context(), true);
52
- }
53
+
54
+ AIO_WAIT_WHILE(&dummy_wait, job->aio_context,
55
+ (job_drain(job), !job_is_completed(job)));
56
+
57
ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret;
58
job_unref(job);
59
return ret;
60
--
61
2.17.1
62
63
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
This is a regression test for a deadlock that occurred in block job
4
completion callbacks (via job_defer_to_main_loop) because the AioContext
5
lock was taken twice: once in job_finish_sync() and then again in
6
job_defer_to_main_loop_bh(). This would cause AIO_WAIT_WHILE() to hang.
7
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
---
11
tests/test-bdrv-drain.c | 10 ++++++++++
12
1 file changed, 10 insertions(+)
13
14
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/test-bdrv-drain.c
17
+++ b/tests/test-bdrv-drain.c
18
@@ -XXX,XX +XXX,XX @@ typedef struct TestBlockJob {
19
bool should_complete;
20
} TestBlockJob;
21
22
+static int test_job_prepare(Job *job)
23
+{
24
+ TestBlockJob *s = container_of(job, TestBlockJob, common.job);
25
+
26
+ /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
27
+ blk_flush(s->common.blk);
28
+ return 0;
29
+}
30
+
31
static int coroutine_fn test_job_run(Job *job, Error **errp)
32
{
33
TestBlockJob *s = container_of(job, TestBlockJob, common.job);
34
@@ -XXX,XX +XXX,XX @@ BlockJobDriver test_job_driver = {
35
.drain = block_job_drain,
36
.run = test_job_run,
37
.complete = test_job_complete,
38
+ .prepare = test_job_prepare,
39
},
40
};
41
42
--
43
2.17.1
44
45
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
bdrv_do_drained_begin/end() assume that they are called with the
4
AioContext lock of bs held. If we call drain functions from a coroutine
5
with the AioContext lock held, we yield and schedule a BH to move out of
6
coroutine context. This means that the lock for the home context of the
7
coroutine is released and must be re-acquired in the bottom half.
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
---
12
include/qemu/coroutine.h | 5 +++++
13
block/io.c | 15 +++++++++++++++
14
util/qemu-coroutine.c | 5 +++++
15
3 files changed, 25 insertions(+)
16
17
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/qemu/coroutine.h
20
+++ b/include/qemu/coroutine.h
21
@@ -XXX,XX +XXX,XX @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
22
*/
23
void coroutine_fn qemu_coroutine_yield(void);
24
25
+/**
26
+ * Get the AioContext of the given coroutine
27
+ */
28
+AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co);
29
+
30
/**
31
* Get the currently executing coroutine
32
*/
33
diff --git a/block/io.c b/block/io.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/block/io.c
36
+++ b/block/io.c
37
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
38
BlockDriverState *bs = data->bs;
39
40
if (bs) {
41
+ AioContext *ctx = bdrv_get_aio_context(bs);
42
+ AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
43
+
44
+ /*
45
+ * When the coroutine yielded, the lock for its home context was
46
+ * released, so we need to re-acquire it here. If it explicitly
47
+ * acquired a different context, the lock is still held and we don't
48
+ * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
49
+ */
50
+ if (ctx == co_ctx) {
51
+ aio_context_acquire(ctx);
52
+ }
53
bdrv_dec_in_flight(bs);
54
if (data->begin) {
55
bdrv_do_drained_begin(bs, data->recursive, data->parent,
56
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
57
bdrv_do_drained_end(bs, data->recursive, data->parent,
58
data->ignore_bds_parents);
59
}
60
+ if (ctx == co_ctx) {
61
+ aio_context_release(ctx);
62
+ }
63
} else {
64
assert(data->begin);
65
bdrv_drain_all_begin();
66
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/util/qemu-coroutine.c
69
+++ b/util/qemu-coroutine.c
70
@@ -XXX,XX +XXX,XX @@ bool qemu_coroutine_entered(Coroutine *co)
71
{
72
return co->caller;
73
}
74
+
75
+AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
76
+{
77
+ return co->ctx;
78
+}
79
--
80
2.17.1
81
82
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
A bdrv_drain operation must ensure that all parents are quiesced, this
4
includes BlockBackends. Otherwise, callbacks called by requests that are
5
completed on the BDS layer, but not quite yet on the BlockBackend layer
6
could still create new requests.
7
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
---
12
block/block-backend.c | 9 +++++++++
13
1 file changed, 9 insertions(+)
14
15
diff --git a/block/block-backend.c b/block/block-backend.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/block-backend.c
18
+++ b/block/block-backend.c
19
@@ -XXX,XX +XXX,XX @@ static void blk_root_inherit_options(int *child_flags, QDict *child_options,
20
abort();
21
}
22
static void blk_root_drained_begin(BdrvChild *child);
23
+static bool blk_root_drained_poll(BdrvChild *child);
24
static void blk_root_drained_end(BdrvChild *child);
25
26
static void blk_root_change_media(BdrvChild *child, bool load);
27
@@ -XXX,XX +XXX,XX @@ static const BdrvChildRole child_root = {
28
.get_parent_desc = blk_root_get_parent_desc,
29
30
.drained_begin = blk_root_drained_begin,
31
+ .drained_poll = blk_root_drained_poll,
32
.drained_end = blk_root_drained_end,
33
34
.activate = blk_root_activate,
35
@@ -XXX,XX +XXX,XX @@ static void blk_root_drained_begin(BdrvChild *child)
36
}
37
}
38
39
+static bool blk_root_drained_poll(BdrvChild *child)
40
+{
41
+ BlockBackend *blk = child->opaque;
42
+ assert(blk->quiesce_counter);
43
+ return !!blk->in_flight;
44
+}
45
+
46
static void blk_root_drained_end(BdrvChild *child)
47
{
48
BlockBackend *blk = child->opaque;
49
--
50
2.17.1
51
52
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
blk_unref() first decreases the refcount of the BlockBackend and calls
4
blk_delete() if the refcount reaches zero. Requests can still be in
5
flight at this point, they are only drained during blk_delete():
6
7
At this point, arbitrary callbacks can run. If any callback takes a
8
temporary BlockBackend reference, it will first increase the refcount to
9
1 and then decrease it to 0 again, triggering another blk_delete(). This
10
will cause a use-after-free crash in the outer blk_delete().
11
12
Fix it by draining the BlockBackend before decreasing to refcount to 0.
13
Assert in blk_ref() that it never takes the first refcount (which would
14
mean that the BlockBackend is already being deleted).
15
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Reviewed-by: Fam Zheng <famz@redhat.com>
18
Reviewed-by: Max Reitz <mreitz@redhat.com>
19
---
20
block/block-backend.c | 9 ++++++++-
21
1 file changed, 8 insertions(+), 1 deletion(-)
22
23
diff --git a/block/block-backend.c b/block/block-backend.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/block/block-backend.c
26
+++ b/block/block-backend.c
27
@@ -XXX,XX +XXX,XX @@ int blk_get_refcnt(BlockBackend *blk)
28
*/
29
void blk_ref(BlockBackend *blk)
30
{
31
+ assert(blk->refcnt > 0);
32
blk->refcnt++;
33
}
34
35
@@ -XXX,XX +XXX,XX @@ void blk_unref(BlockBackend *blk)
36
{
37
if (blk) {
38
assert(blk->refcnt > 0);
39
- if (!--blk->refcnt) {
40
+ if (blk->refcnt > 1) {
41
+ blk->refcnt--;
42
+ } else {
43
+ blk_drain(blk);
44
+ /* blk_drain() cannot resurrect blk, nobody held a reference */
45
+ assert(blk->refcnt == 1);
46
+ blk->refcnt = 0;
47
blk_delete(blk);
48
}
49
}
50
--
51
2.17.1
52
53
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
Request callbacks can do pretty much anything, including operations that
4
will yield from the coroutine (such as draining the backend). In that
5
case, a decreased in_flight would be visible to other code and could
6
lead to a drain completing while the callback hasn't actually completed
7
yet.
8
9
Note that reordering these operations forbids calling drain directly
10
inside an AIO callback. As Paolo explains, indirectly calling it is
11
okay:
12
13
- Calling it through a coroutine is okay, because then
14
bdrv_drained_begin() goes through bdrv_co_yield_to_drain() and you
15
have in_flight=2 when bdrv_co_yield_to_drain() yields, then soon
16
in_flight=1 when the aio_co_wake() in the AIO callback completes, then
17
in_flight=0 after the bottom half starts.
18
19
- Calling it through a bottom half would be okay too, as long as the AIO
20
callback remembers to do inc_in_flight/dec_in_flight just like
21
bdrv_co_yield_to_drain() and bdrv_co_drain_bh_cb() do
22
23
A few more important cases that come to mind:
24
25
- A coroutine that yields because of I/O is okay, with a sequence
26
similar to bdrv_co_yield_to_drain().
27
28
- A coroutine that yields with no I/O pending will correctly decrease
29
in_flight to zero before yielding.
30
31
- Calling more AIO from the callback won't overflow the counter just
32
because of mutual recursion, because AIO functions always yield at
33
least once before invoking the callback.
34
35
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
36
Reviewed-by: Fam Zheng <famz@redhat.com>
37
Reviewed-by: Max Reitz <mreitz@redhat.com>
38
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
39
---
40
block/block-backend.c | 2 +-
41
1 file changed, 1 insertion(+), 1 deletion(-)
42
43
diff --git a/block/block-backend.c b/block/block-backend.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/block/block-backend.c
46
+++ b/block/block-backend.c
47
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
48
static void blk_aio_complete(BlkAioEmAIOCB *acb)
49
{
50
if (acb->has_returned) {
51
- blk_dec_in_flight(acb->rwco.blk);
52
acb->common.cb(acb->common.opaque, acb->rwco.ret);
53
+ blk_dec_in_flight(acb->rwco.blk);
54
qemu_aio_unref(acb);
55
}
56
}
57
--
58
2.17.1
59
60
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
Block jobs claim in .drained_poll() that they are in a quiescent state
4
as soon as job->deferred_to_main_loop is true. This is obviously wrong,
5
they still have a completion BH to run. We only get away with this
6
because commit 91af091f923 added an unconditional aio_poll(false) to the
7
drain functions, but this is bypassing the regular drain mechanisms.
8
9
However, just removing this and telling that the job is still active
10
doesn't work either: The completion callbacks themselves call drain
11
functions (directly, or indirectly with bdrv_reopen), so they would
12
deadlock then.
13
14
As a better lie, tell that the job is active as long as the BH is
15
pending, but falsely call it quiescent from the point in the BH when the
16
completion callback is called. At this point, nested drain calls won't
17
deadlock because they ignore the job, and outer drains will wait for the
18
job to really reach a quiescent state because the callback is already
19
running.
20
21
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
22
Reviewed-by: Max Reitz <mreitz@redhat.com>
23
---
24
include/qemu/job.h | 3 +++
25
blockjob.c | 2 +-
26
job.c | 11 ++++++++++-
27
3 files changed, 14 insertions(+), 2 deletions(-)
28
29
diff --git a/include/qemu/job.h b/include/qemu/job.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/qemu/job.h
32
+++ b/include/qemu/job.h
33
@@ -XXX,XX +XXX,XX @@ typedef struct Job {
34
* Set to false by the job while the coroutine has yielded and may be
35
* re-entered by job_enter(). There may still be I/O or event loop activity
36
* pending. Accessed under block_job_mutex (in blockjob.c).
37
+ *
38
+ * When the job is deferred to the main loop, busy is true as long as the
39
+ * bottom half is still pending.
40
*/
41
bool busy;
42
43
diff --git a/blockjob.c b/blockjob.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/blockjob.c
46
+++ b/blockjob.c
47
@@ -XXX,XX +XXX,XX @@ static bool child_job_drained_poll(BdrvChild *c)
48
/* An inactive or completed job doesn't have any pending requests. Jobs
49
* with !job->busy are either already paused or have a pause point after
50
* being reentered, so no job driver code will run before they pause. */
51
- if (!job->busy || job_is_completed(job) || job->deferred_to_main_loop) {
52
+ if (!job->busy || job_is_completed(job)) {
53
return false;
54
}
55
56
diff --git a/job.c b/job.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/job.c
59
+++ b/job.c
60
@@ -XXX,XX +XXX,XX @@ static void job_exit(void *opaque)
61
AioContext *ctx = job->aio_context;
62
63
aio_context_acquire(ctx);
64
+
65
+ /* This is a lie, we're not quiescent, but still doing the completion
66
+ * callbacks. However, completion callbacks tend to involve operations that
67
+ * drain block nodes, and if .drained_poll still returned true, we would
68
+ * deadlock. */
69
+ job->busy = false;
70
+ job_event_idle(job);
71
+
72
job_completed(job);
73
+
74
aio_context_release(ctx);
75
}
76
77
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn job_co_entry(void *opaque)
78
assert(job && job->driver && job->driver->run);
79
job_pause_point(job);
80
job->ret = job->driver->run(job, &job->err);
81
- job_event_idle(job);
82
job->deferred_to_main_loop = true;
83
+ job->busy = true;
84
aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job);
85
}
86
87
--
88
2.17.1
89
90
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
bdrv_drain_poll_top_level() was buggy because it didn't release the
4
AioContext lock of the node to be drained before calling aio_poll().
5
This way, callbacks called by aio_poll() would possibly take the lock a
6
second time and run into a deadlock with a nested AIO_WAIT_WHILE() call.
7
8
However, it turns out that the aio_poll() call isn't actually needed any
9
more. It was introduced in commit 91af091f923, which is effectively
10
reverted by this patch. The cases it was supposed to fix are now covered
11
by bdrv_drain_poll(), which waits for block jobs to reach a quiescent
12
state.
13
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Reviewed-by: Fam Zheng <famz@redhat.com>
16
Reviewed-by: Max Reitz <mreitz@redhat.com>
17
---
18
block/io.c | 8 --------
19
1 file changed, 8 deletions(-)
20
21
diff --git a/block/io.c b/block/io.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/block/io.c
24
+++ b/block/io.c
25
@@ -XXX,XX +XXX,XX @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
26
static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
27
BdrvChild *ignore_parent)
28
{
29
- /* Execute pending BHs first and check everything else only after the BHs
30
- * have executed. */
31
- while (aio_poll(bs->aio_context, false));
32
-
33
return bdrv_drain_poll(bs, recursive, ignore_parent, false);
34
}
35
36
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_all_poll(void)
37
BlockDriverState *bs = NULL;
38
bool result = false;
39
40
- /* Execute pending BHs first (may modify the graph) and check everything
41
- * else only after the BHs have executed. */
42
- while (aio_poll(qemu_get_aio_context(), false));
43
-
44
/* bdrv_drain_poll() can't make changes to the graph and we are holding the
45
* main AioContext lock, so iterating bdrv_next_all_states() is safe. */
46
while ((bs = bdrv_next_all_states(bs))) {
47
--
48
2.17.1
49
50
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
This is a regression test for a deadlock that could occur in callbacks
4
called from the aio_poll() in bdrv_drain_poll_top_level(). The
5
AioContext lock wasn't released and therefore would be taken a second
6
time in the callback. This would cause a possible AIO_WAIT_WHILE() in
7
the callback to hang.
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
---
12
tests/test-bdrv-drain.c | 13 +++++++++++++
13
1 file changed, 13 insertions(+)
14
15
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tests/test-bdrv-drain.c
18
+++ b/tests/test-bdrv-drain.c
19
@@ -XXX,XX +XXX,XX @@ static void test_iothread_aio_cb(void *opaque, int ret)
20
qemu_event_set(&done_event);
21
}
22
23
+static void test_iothread_main_thread_bh(void *opaque)
24
+{
25
+ struct test_iothread_data *data = opaque;
26
+
27
+ /* Test that the AioContext is not yet locked in a random BH that is
28
+ * executed during drain, otherwise this would deadlock. */
29
+ aio_context_acquire(bdrv_get_aio_context(data->bs));
30
+ bdrv_flush(data->bs);
31
+ aio_context_release(bdrv_get_aio_context(data->bs));
32
+}
33
+
34
/*
35
* Starts an AIO request on a BDS that runs in the AioContext of iothread 1.
36
* The request involves a BH on iothread 2 before it can complete.
37
@@ -XXX,XX +XXX,XX @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
38
aio_context_acquire(ctx_a);
39
}
40
41
+ aio_bh_schedule_oneshot(ctx_a, test_iothread_main_thread_bh, &data);
42
+
43
/* The request is running on the IOThread a. Draining its block device
44
* will make sure that it has completed as far as the BDS is concerned,
45
* but the drain in this thread can continue immediately after
46
--
47
2.17.1
48
49
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
Amongst others, job_finalize_single() calls the .prepare/.commit/.abort
4
callbacks of the individual job driver. Recently, their use was adapted
5
for all block jobs so that they involve code calling AIO_WAIT_WHILE()
6
now. Such code must be called under the AioContext lock for the
7
respective job, but without holding any other AioContext lock.
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
---
12
job.c | 16 +++++++++++-----
13
1 file changed, 11 insertions(+), 5 deletions(-)
14
15
diff --git a/job.c b/job.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/job.c
18
+++ b/job.c
19
@@ -XXX,XX +XXX,XX @@ static void job_cancel_async(Job *job, bool force)
20
21
static void job_completed_txn_abort(Job *job)
22
{
23
+ AioContext *outer_ctx = job->aio_context;
24
AioContext *ctx;
25
JobTxn *txn = job->txn;
26
Job *other_job;
27
@@ -XXX,XX +XXX,XX @@ static void job_completed_txn_abort(Job *job)
28
txn->aborting = true;
29
job_txn_ref(txn);
30
31
- /* We are the first failed job. Cancel other jobs. */
32
- QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
33
- ctx = other_job->aio_context;
34
- aio_context_acquire(ctx);
35
- }
36
+ /* We can only hold the single job's AioContext lock while calling
37
+ * job_finalize_single() because the finalization callbacks can involve
38
+ * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */
39
+ aio_context_release(outer_ctx);
40
41
/* Other jobs are effectively cancelled by us, set the status for
42
* them; this job, however, may or may not be cancelled, depending
43
* on the caller, so leave it. */
44
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
45
if (other_job != job) {
46
+ ctx = other_job->aio_context;
47
+ aio_context_acquire(ctx);
48
job_cancel_async(other_job, false);
49
+ aio_context_release(ctx);
50
}
51
}
52
while (!QLIST_EMPTY(&txn->jobs)) {
53
other_job = QLIST_FIRST(&txn->jobs);
54
ctx = other_job->aio_context;
55
+ aio_context_acquire(ctx);
56
if (!job_is_completed(other_job)) {
57
assert(job_is_cancelled(other_job));
58
job_finish_sync(other_job, NULL, NULL);
59
@@ -XXX,XX +XXX,XX @@ static void job_completed_txn_abort(Job *job)
60
aio_context_release(ctx);
61
}
62
63
+ aio_context_acquire(outer_ctx);
64
+
65
job_txn_unref(txn);
66
}
67
68
--
69
2.17.1
70
71
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
This adds tests for calling AIO_WAIT_WHILE() in the .commit and .abort
4
callbacks. Both reasons why .abort could be called for a single job are
5
tested: Either .run or .prepare could return an error.
6
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
---
10
tests/test-bdrv-drain.c | 116 +++++++++++++++++++++++++++++++++++-----
11
1 file changed, 104 insertions(+), 12 deletions(-)
12
13
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tests/test-bdrv-drain.c
16
+++ b/tests/test-bdrv-drain.c
17
@@ -XXX,XX +XXX,XX @@ static void test_iothread_drain_subtree(void)
18
19
typedef struct TestBlockJob {
20
BlockJob common;
21
+ int run_ret;
22
+ int prepare_ret;
23
bool should_complete;
24
} TestBlockJob;
25
26
@@ -XXX,XX +XXX,XX @@ static int test_job_prepare(Job *job)
27
28
/* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
29
blk_flush(s->common.blk);
30
- return 0;
31
+ return s->prepare_ret;
32
+}
33
+
34
+static void test_job_commit(Job *job)
35
+{
36
+ TestBlockJob *s = container_of(job, TestBlockJob, common.job);
37
+
38
+ /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
39
+ blk_flush(s->common.blk);
40
+}
41
+
42
+static void test_job_abort(Job *job)
43
+{
44
+ TestBlockJob *s = container_of(job, TestBlockJob, common.job);
45
+
46
+ /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
47
+ blk_flush(s->common.blk);
48
}
49
50
static int coroutine_fn test_job_run(Job *job, Error **errp)
51
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn test_job_run(Job *job, Error **errp)
52
job_pause_point(&s->common.job);
53
}
54
55
- return 0;
56
+ return s->run_ret;
57
}
58
59
static void test_job_complete(Job *job, Error **errp)
60
@@ -XXX,XX +XXX,XX @@ BlockJobDriver test_job_driver = {
61
.run = test_job_run,
62
.complete = test_job_complete,
63
.prepare = test_job_prepare,
64
+ .commit = test_job_commit,
65
+ .abort = test_job_abort,
66
},
67
};
68
69
-static void test_blockjob_common(enum drain_type drain_type, bool use_iothread)
70
+enum test_job_result {
71
+ TEST_JOB_SUCCESS,
72
+ TEST_JOB_FAIL_RUN,
73
+ TEST_JOB_FAIL_PREPARE,
74
+};
75
+
76
+static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
77
+ enum test_job_result result)
78
{
79
BlockBackend *blk_src, *blk_target;
80
BlockDriverState *src, *target;
81
BlockJob *job;
82
+ TestBlockJob *tjob;
83
IOThread *iothread = NULL;
84
AioContext *ctx;
85
int ret;
86
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread)
87
blk_insert_bs(blk_target, target, &error_abort);
88
89
aio_context_acquire(ctx);
90
- job = block_job_create("job0", &test_job_driver, NULL, src, 0, BLK_PERM_ALL,
91
- 0, 0, NULL, NULL, &error_abort);
92
+ tjob = block_job_create("job0", &test_job_driver, NULL, src,
93
+ 0, BLK_PERM_ALL,
94
+ 0, 0, NULL, NULL, &error_abort);
95
+ job = &tjob->common;
96
block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
97
+
98
+ switch (result) {
99
+ case TEST_JOB_SUCCESS:
100
+ break;
101
+ case TEST_JOB_FAIL_RUN:
102
+ tjob->run_ret = -EIO;
103
+ break;
104
+ case TEST_JOB_FAIL_PREPARE:
105
+ tjob->prepare_ret = -EIO;
106
+ break;
107
+ }
108
+
109
job_start(&job->job);
110
aio_context_release(ctx);
111
112
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread)
113
114
aio_context_acquire(ctx);
115
ret = job_complete_sync(&job->job, &error_abort);
116
- g_assert_cmpint(ret, ==, 0);
117
+ g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO));
118
119
if (use_iothread) {
120
blk_set_aio_context(blk_src, qemu_get_aio_context());
121
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread)
122
123
static void test_blockjob_drain_all(void)
124
{
125
- test_blockjob_common(BDRV_DRAIN_ALL, false);
126
+ test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS);
127
}
128
129
static void test_blockjob_drain(void)
130
{
131
- test_blockjob_common(BDRV_DRAIN, false);
132
+ test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS);
133
}
134
135
static void test_blockjob_drain_subtree(void)
136
{
137
- test_blockjob_common(BDRV_SUBTREE_DRAIN, false);
138
+ test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS);
139
+}
140
+
141
+static void test_blockjob_error_drain_all(void)
142
+{
143
+ test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN);
144
+ test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_PREPARE);
145
+}
146
+
147
+static void test_blockjob_error_drain(void)
148
+{
149
+ test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_RUN);
150
+ test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE);
151
+}
152
+
153
+static void test_blockjob_error_drain_subtree(void)
154
+{
155
+ test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN);
156
+ test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE);
157
}
158
159
static void test_blockjob_iothread_drain_all(void)
160
{
161
- test_blockjob_common(BDRV_DRAIN_ALL, true);
162
+ test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS);
163
}
164
165
static void test_blockjob_iothread_drain(void)
166
{
167
- test_blockjob_common(BDRV_DRAIN, true);
168
+ test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS);
169
}
170
171
static void test_blockjob_iothread_drain_subtree(void)
172
{
173
- test_blockjob_common(BDRV_SUBTREE_DRAIN, true);
174
+ test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS);
175
+}
176
+
177
+static void test_blockjob_iothread_error_drain_all(void)
178
+{
179
+ test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN);
180
+ test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_PREPARE);
181
+}
182
+
183
+static void test_blockjob_iothread_error_drain(void)
184
+{
185
+ test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_RUN);
186
+ test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE);
187
+}
188
+
189
+static void test_blockjob_iothread_error_drain_subtree(void)
190
+{
191
+ test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN);
192
+ test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE);
193
}
194
195
196
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
197
g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
198
test_blockjob_drain_subtree);
199
200
+ g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
201
+ test_blockjob_error_drain_all);
202
+ g_test_add_func("/bdrv-drain/blockjob/error/drain",
203
+ test_blockjob_error_drain);
204
+ g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree",
205
+ test_blockjob_error_drain_subtree);
206
+
207
g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
208
test_blockjob_iothread_drain_all);
209
g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
210
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
211
g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
212
test_blockjob_iothread_drain_subtree);
213
214
+ g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
215
+ test_blockjob_iothread_error_drain_all);
216
+ g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
217
+ test_blockjob_iothread_error_drain);
218
+ g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree",
219
+ test_blockjob_iothread_error_drain_subtree);
220
+
221
g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
222
g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
223
g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
224
--
225
2.17.1
226
227
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
Commit 89bd030533e changed the test case from using job_sleep_ns() to
4
using qemu_co_sleep_ns() instead. Also, block_job_sleep_ns() became
5
job_sleep_ns() in commit 5d43e86e11f.
6
7
In both cases, some comments in the test case were not updated. Do that
8
now.
9
10
Reported-by: Max Reitz <mreitz@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Eric Blake <eblake@redhat.com>
13
---
14
tests/test-bdrv-drain.c | 10 +++++-----
15
1 file changed, 5 insertions(+), 5 deletions(-)
16
17
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/tests/test-bdrv-drain.c
20
+++ b/tests/test-bdrv-drain.c
21
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn test_job_run(Job *job, Error **errp)
22
23
job_transition_to_ready(&s->common.job);
24
while (!s->should_complete) {
25
- /* Avoid block_job_sleep_ns() because it marks the job as !busy. We
26
- * want to emulate some actual activity (probably some I/O) here so
27
- * that drain has to wait for this acitivity to stop. */
28
+ /* Avoid job_sleep_ns() because it marks the job as !busy. We want to
29
+ * emulate some actual activity (probably some I/O) here so that drain
30
+ * has to wait for this activity to stop. */
31
qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
32
job_pause_point(&s->common.job);
33
}
34
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
35
36
g_assert_cmpint(job->job.pause_count, ==, 0);
37
g_assert_false(job->job.paused);
38
- g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
39
+ g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
40
41
do_drain_begin_unlocked(drain_type, src);
42
43
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
44
45
g_assert_cmpint(job->job.pause_count, ==, 0);
46
g_assert_false(job->job.paused);
47
- g_assert_true(job->job.busy); /* We're in job_sleep_ns() */
48
+ g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
49
50
aio_context_acquire(ctx);
51
ret = job_complete_sync(&job->job, &error_abort);
52
--
53
2.17.1
54
55
diff view generated by jsdifflib
Deleted patch
1
From: Kevin Wolf <kwolf@redhat.com>
2
1
3
For the block job drain test, don't only test draining the source and
4
the target node, but create a backing chain for the source
5
(source_backing <- source <- source_overlay) and test draining each of
6
the nodes in it.
7
8
When using iothreads, the source node (and therefore the job) is in a
9
different AioContext than the drain, which happens from the main
10
thread. This way, the main thread waits in AIO_WAIT_WHILE() for the
11
iothread to make process and aio_wait_kick() is required to notify it.
12
The test validates that calling bdrv_wakeup() for a child or a parent
13
node will actually notify AIO_WAIT_WHILE() instead of letting it hang.
14
15
Increase the sleep time a bit (to 1 ms) because the test case is racy
16
and with the shorter sleep, it didn't reproduce the bug it is supposed
17
to test for me under 'rr record -n'.
18
19
This was because bdrv_drain_invoke_entry() (in the main thread) was only
20
called after the job had already reached the pause point, so we got a
21
bdrv_dec_in_flight() from the main thread and the additional
22
aio_wait_kick() when the job becomes idle (that we really wanted to test
23
here) wasn't even necessary any more to make progress.
24
25
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
26
Reviewed-by: Eric Blake <eblake@redhat.com>
27
Reviewed-by: Max Reitz <mreitz@redhat.com>
28
---
29
tests/test-bdrv-drain.c | 77 ++++++++++++++++++++++++++++++++++++-----
30
1 file changed, 69 insertions(+), 8 deletions(-)
31
32
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tests/test-bdrv-drain.c
35
+++ b/tests/test-bdrv-drain.c
36
@@ -XXX,XX +XXX,XX @@ typedef struct TestBlockJob {
37
BlockJob common;
38
int run_ret;
39
int prepare_ret;
40
+ bool running;
41
bool should_complete;
42
} TestBlockJob;
43
44
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn test_job_run(Job *job, Error **errp)
45
{
46
TestBlockJob *s = container_of(job, TestBlockJob, common.job);
47
48
+ /* We are running the actual job code past the pause point in
49
+ * job_co_entry(). */
50
+ s->running = true;
51
+
52
job_transition_to_ready(&s->common.job);
53
while (!s->should_complete) {
54
/* Avoid job_sleep_ns() because it marks the job as !busy. We want to
55
* emulate some actual activity (probably some I/O) here so that drain
56
* has to wait for this activity to stop. */
57
- qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
58
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
59
+
60
job_pause_point(&s->common.job);
61
}
62
63
@@ -XXX,XX +XXX,XX @@ enum test_job_result {
64
TEST_JOB_FAIL_PREPARE,
65
};
66
67
-static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
68
- enum test_job_result result)
69
+enum test_job_drain_node {
70
+ TEST_JOB_DRAIN_SRC,
71
+ TEST_JOB_DRAIN_SRC_CHILD,
72
+ TEST_JOB_DRAIN_SRC_PARENT,
73
+};
74
+
75
+static void test_blockjob_common_drain_node(enum drain_type drain_type,
76
+ bool use_iothread,
77
+ enum test_job_result result,
78
+ enum test_job_drain_node drain_node)
79
{
80
BlockBackend *blk_src, *blk_target;
81
- BlockDriverState *src, *target;
82
+ BlockDriverState *src, *src_backing, *src_overlay, *target, *drain_bs;
83
BlockJob *job;
84
TestBlockJob *tjob;
85
IOThread *iothread = NULL;
86
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
87
88
src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
89
&error_abort);
90
+ src_backing = bdrv_new_open_driver(&bdrv_test, "source-backing",
91
+ BDRV_O_RDWR, &error_abort);
92
+ src_overlay = bdrv_new_open_driver(&bdrv_test, "source-overlay",
93
+ BDRV_O_RDWR, &error_abort);
94
+
95
+ bdrv_set_backing_hd(src_overlay, src, &error_abort);
96
+ bdrv_unref(src);
97
+ bdrv_set_backing_hd(src, src_backing, &error_abort);
98
+ bdrv_unref(src_backing);
99
+
100
blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
101
- blk_insert_bs(blk_src, src, &error_abort);
102
+ blk_insert_bs(blk_src, src_overlay, &error_abort);
103
+
104
+ switch (drain_node) {
105
+ case TEST_JOB_DRAIN_SRC:
106
+ drain_bs = src;
107
+ break;
108
+ case TEST_JOB_DRAIN_SRC_CHILD:
109
+ drain_bs = src_backing;
110
+ break;
111
+ case TEST_JOB_DRAIN_SRC_PARENT:
112
+ drain_bs = src_overlay;
113
+ break;
114
+ default:
115
+ g_assert_not_reached();
116
+ }
117
118
if (use_iothread) {
119
iothread = iothread_new();
120
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
121
job_start(&job->job);
122
aio_context_release(ctx);
123
124
+ if (use_iothread) {
125
+ /* job_co_entry() is run in the I/O thread, wait for the actual job
126
+ * code to start (we don't want to catch the job in the pause point in
127
+ * job_co_entry(). */
128
+ while (!tjob->running) {
129
+ aio_poll(qemu_get_aio_context(), false);
130
+ }
131
+ }
132
+
133
g_assert_cmpint(job->job.pause_count, ==, 0);
134
g_assert_false(job->job.paused);
135
+ g_assert_true(tjob->running);
136
g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
137
138
- do_drain_begin_unlocked(drain_type, src);
139
+ do_drain_begin_unlocked(drain_type, drain_bs);
140
141
if (drain_type == BDRV_DRAIN_ALL) {
142
/* bdrv_drain_all() drains both src and target */
143
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
144
g_assert_true(job->job.paused);
145
g_assert_false(job->job.busy); /* The job is paused */
146
147
- do_drain_end_unlocked(drain_type, src);
148
+ do_drain_end_unlocked(drain_type, drain_bs);
149
150
if (use_iothread) {
151
/* paused is reset in the I/O thread, wait for it */
152
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
153
154
blk_unref(blk_src);
155
blk_unref(blk_target);
156
- bdrv_unref(src);
157
+ bdrv_unref(src_overlay);
158
bdrv_unref(target);
159
160
if (iothread) {
161
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
162
}
163
}
164
165
+static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
166
+ enum test_job_result result)
167
+{
168
+ test_blockjob_common_drain_node(drain_type, use_iothread, result,
169
+ TEST_JOB_DRAIN_SRC);
170
+ test_blockjob_common_drain_node(drain_type, use_iothread, result,
171
+ TEST_JOB_DRAIN_SRC_CHILD);
172
+ if (drain_type == BDRV_SUBTREE_DRAIN) {
173
+ test_blockjob_common_drain_node(drain_type, use_iothread, result,
174
+ TEST_JOB_DRAIN_SRC_PARENT);
175
+ }
176
+}
177
+
178
static void test_blockjob_drain_all(void)
179
{
180
test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS);
181
--
182
2.17.1
183
184
diff view generated by jsdifflib