1
The following changes since commit 16aaacb307ed607b9780c12702c44f0fe52edc7e:
1
The following changes since commit aa9bbd865502ed517624ab6fe7d4b5d89ca95e43:
2
2
3
Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20200430' into staging (2020-04-30 14:00:36 +0100)
3
Merge tag 'pull-ppc-20230528' of https://gitlab.com/danielhb/qemu into staging (2023-05-29 14:31:52 -0700)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
7
https://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to eaae29ef89d498d0eac553c77b554f310a47f809:
9
for you to fetch changes up to 60f782b6b78211c125970768be726c9f380dbd61:
10
10
11
qemu-storage-daemon: Fix non-string --object properties (2020-04-30 17:51:07 +0200)
11
aio: remove aio_disable_external() API (2023-05-30 17:37:26 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches:
14
Block layer patches
15
15
16
- Fix resize (extending) of short overlays
16
- Fix blockdev-create with iothreads
17
- nvme: introduce PMR support from NVMe 1.4 spec
17
- Remove aio_disable_external() API
18
- qemu-storage-daemon: Fix non-string --object properties
19
18
20
----------------------------------------------------------------
19
----------------------------------------------------------------
21
Alberto Garcia (1):
20
Kevin Wolf (12):
22
qcow2: Add incompatibility note between backing files and raw external data files
21
block-coroutine-wrapper: Take AioContext lock in no_co_wrappers
22
block: Clarify locking rules for bdrv_open(_inherit)()
23
block: Take main AioContext lock when calling bdrv_open()
24
block-backend: Fix blk_new_open() for iothreads
25
mirror: Hold main AioContext lock for calling bdrv_open_backing_file()
26
qcow2: Fix open with 'file' in iothread
27
raw-format: Fix open with 'file' in iothread
28
copy-before-write: Fix open with child in iothread
29
block: Take AioContext lock in bdrv_open_driver()
30
block: Fix AioContext locking in bdrv_insert_node()
31
iotests: Make verify_virtio_scsi_pci_or_ccw() public
32
iotests: Test blockdev-create in iothread
23
33
24
Andrzej Jakowski (1):
34
Stefan Hajnoczi (20):
25
nvme: introduce PMR support from NVMe 1.4 spec
35
block-backend: split blk_do_set_aio_context()
36
hw/qdev: introduce qdev_is_realized() helper
37
virtio-scsi: avoid race between unplug and transport event
38
virtio-scsi: stop using aio_disable_external() during unplug
39
util/vhost-user-server: rename refcount to in_flight counter
40
block/export: wait for vhost-user-blk requests when draining
41
block/export: stop using is_external in vhost-user-blk server
42
hw/xen: do not use aio_set_fd_handler(is_external=true) in xen_xenstore
43
block: add blk_in_drain() API
44
block: drain from main loop thread in bdrv_co_yield_to_drain()
45
xen-block: implement BlockDevOps->drained_begin()
46
hw/xen: do not set is_external=true on evtchn fds
47
block/export: rewrite vduse-blk drain code
48
block/export: don't require AioContext lock around blk_exp_ref/unref()
49
block/fuse: do not set is_external=true on FUSE fd
50
virtio: make it possible to detach host notifier from any thread
51
virtio-blk: implement BlockDevOps->drained_begin()
52
virtio-scsi: implement BlockDevOps->drained_begin()
53
virtio: do not set is_external=true on host notifiers
54
aio: remove aio_disable_external() API
26
55
27
Kevin Wolf (12):
56
hw/block/dataplane/xen-block.h | 2 +
28
block: Add flags to BlockDriver.bdrv_co_truncate()
57
include/block/aio.h | 57 ------------
29
block: Add flags to bdrv(_co)_truncate()
58
include/block/block-common.h | 3 +
30
block-backend: Add flags to blk_truncate()
59
include/block/block_int-common.h | 72 +++++++--------
31
qcow2: Support BDRV_REQ_ZERO_WRITE for truncate
60
include/block/export.h | 2 +
32
raw-format: Support BDRV_REQ_ZERO_WRITE for truncate
61
include/hw/qdev-core.h | 17 +++-
33
file-posix: Support BDRV_REQ_ZERO_WRITE for truncate
62
include/hw/scsi/scsi.h | 14 +++
34
block: truncate: Don't make backing file data visible
63
include/qemu/vhost-user-server.h | 8 +-
35
iotests: Filter testfiles out in filter_img_info()
64
include/sysemu/block-backend-common.h | 25 ++---
36
iotests: Test committing to short backing file
65
include/sysemu/block-backend-global-state.h | 1 +
37
qcow2: Forward ZERO_WRITE flag for full preallocation
66
util/aio-posix.h | 1 -
38
qom: Factor out user_creatable_add_dict()
67
block.c | 46 ++++++---
39
qemu-storage-daemon: Fix non-string --object properties
68
block/blkio.c | 15 +--
40
69
block/block-backend.c | 104 ++++++++++++---------
41
Paolo Bonzini (1):
70
block/copy-before-write.c | 21 ++++-
42
qemu-iotests: allow qcow2 external discarded clusters to contain stale data
71
block/curl.c | 10 +-
43
72
block/export/export.c | 13 ++-
44
docs/interop/qcow2.txt | 3 +
73
block/export/fuse.c | 56 ++++++++++-
45
hw/block/nvme.h | 2 +
74
block/export/vduse-blk.c | 128 ++++++++++++++++++--------
46
include/block/block.h | 5 +-
75
block/export/vhost-user-blk-server.c | 52 +++++++++--
47
include/block/block_int.h | 10 +-
76
block/io.c | 16 ++--
48
include/block/nvme.h | 172 ++++++++++++++++++++++++++
77
block/io_uring.c | 4 +-
49
include/qom/object_interfaces.h | 16 +++
78
block/iscsi.c | 3 +-
50
include/sysemu/block-backend.h | 2 +-
79
block/linux-aio.c | 4 +-
51
block.c | 3 +-
80
block/mirror.c | 6 ++
52
block/block-backend.c | 4 +-
81
block/nfs.c | 5 +-
53
block/commit.c | 4 +-
82
block/nvme.c | 8 +-
54
block/crypto.c | 7 +-
83
block/qapi-sysemu.c | 3 +
55
block/file-posix.c | 6 +-
84
block/qcow2.c | 8 +-
56
block/file-win32.c | 2 +-
85
block/raw-format.c | 5 +
57
block/gluster.c | 1 +
86
block/ssh.c | 4 +-
58
block/io.c | 43 ++++++-
87
block/win32-aio.c | 6 +-
59
block/iscsi.c | 2 +-
88
blockdev.c | 29 ++++--
60
block/mirror.c | 2 +-
89
hw/block/dataplane/virtio-blk.c | 23 +++--
61
block/nfs.c | 3 +-
90
hw/block/dataplane/xen-block.c | 42 ++++++---
62
block/parallels.c | 6 +-
91
hw/block/virtio-blk.c | 38 +++++++-
63
block/qcow.c | 4 +-
92
hw/block/xen-block.c | 24 ++++-
64
block/qcow2-cluster.c | 2 +-
93
hw/i386/kvm/xen_xenstore.c | 2 +-
65
block/qcow2-refcount.c | 2 +-
94
hw/scsi/scsi-bus.c | 46 ++++++++-
66
block/qcow2.c | 73 +++++++++--
95
hw/scsi/scsi-disk.c | 27 +++++-
67
block/qed.c | 3 +-
96
hw/scsi/virtio-scsi-dataplane.c | 32 +++++--
68
block/raw-format.c | 6 +-
97
hw/scsi/virtio-scsi.c | 127 +++++++++++++++++++------
69
block/rbd.c | 1 +
98
hw/virtio/virtio.c | 9 +-
70
block/sheepdog.c | 4 +-
99
hw/xen/xen-bus.c | 11 ++-
71
block/ssh.c | 2 +-
100
io/channel-command.c | 6 +-
72
block/vdi.c | 2 +-
101
io/channel-file.c | 3 +-
73
block/vhdx-log.c | 2 +-
102
io/channel-socket.c | 3 +-
74
block/vhdx.c | 6 +-
103
migration/rdma.c | 16 ++--
75
block/vmdk.c | 8 +-
104
qemu-nbd.c | 4 +
76
block/vpc.c | 2 +-
105
tests/unit/test-aio.c | 27 +-----
77
blockdev.c | 2 +-
106
tests/unit/test-bdrv-drain.c | 15 +--
78
hw/block/nvme.c | 109 ++++++++++++++++
107
tests/unit/test-block-iothread.c | 4 +-
79
qemu-img.c | 2 +-
108
tests/unit/test-fdmon-epoll.c | 73 ---------------
80
qemu-io-cmds.c | 2 +-
109
tests/unit/test-nested-aio-poll.c | 9 +-
81
qemu-storage-daemon.c | 4 +-
110
util/aio-posix.c | 20 +---
82
qom/object_interfaces.c | 31 +++++
111
util/aio-win32.c | 8 +-
83
qom/qom-qmp-cmds.c | 24 +---
112
util/async.c | 3 +-
84
tests/test-block-iothread.c | 9 +-
113
util/fdmon-epoll.c | 10 --
85
tests/qemu-iotests/iotests.py | 5 +-
114
util/fdmon-io_uring.c | 8 +-
86
hw/block/Makefile.objs | 2 +-
115
util/fdmon-poll.c | 3 +-
87
hw/block/trace-events | 4 +
116
util/main-loop.c | 7 +-
88
tests/qemu-iotests/244 | 10 +-
117
util/qemu-coroutine-io.c | 7 +-
89
tests/qemu-iotests/244.out | 9 +-
118
util/vhost-user-server.c | 33 ++++---
90
tests/qemu-iotests/274 | 155 +++++++++++++++++++++++
119
scripts/block-coroutine-wrapper.py | 25 +++--
91
tests/qemu-iotests/274.out | 268 ++++++++++++++++++++++++++++++++++++++++
120
tests/qemu-iotests/iotests.py | 2 +-
92
tests/qemu-iotests/group | 1 +
121
hw/scsi/trace-events | 2 +
93
49 files changed, 951 insertions(+), 96 deletions(-)
122
tests/qemu-iotests/256 | 2 +-
94
create mode 100755 tests/qemu-iotests/274
123
tests/qemu-iotests/tests/iothreads-create | 67 ++++++++++++++
95
create mode 100644 tests/qemu-iotests/274.out
124
tests/qemu-iotests/tests/iothreads-create.out | 4 +
96
125
tests/unit/meson.build | 3 -
97
126
70 files changed, 931 insertions(+), 562 deletions(-)
127
delete mode 100644 tests/unit/test-fdmon-epoll.c
128
create mode 100755 tests/qemu-iotests/tests/iothreads-create
129
create mode 100644 tests/qemu-iotests/tests/iothreads-create.out
diff view generated by jsdifflib
New patch
1
All of the functions that currently take a BlockDriverState, BdrvChild
2
or BlockBackend as their first parameter expect the associated
3
AioContext to be locked when they are called. In the case of
4
no_co_wrappers, they are called from bottom halves directly in the main
5
loop, so no other caller can be expected to take the lock for them. This
6
can result in assertion failures because a lock that isn't taken is
7
released in nested event loops.
1
8
9
Looking at the first parameter is already done by co_wrappers to decide
10
where the coroutine should run, so doing the same in no_co_wrappers is
11
only consistent. Take the lock in the generated bottom halves to fix the
12
problem.
13
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Message-Id: <20230525124713.401149-2-kwolf@redhat.com>
16
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
19
include/block/block-common.h | 3 +++
20
block/block-backend.c | 7 ++++++-
21
scripts/block-coroutine-wrapper.py | 25 +++++++++++++++----------
22
3 files changed, 24 insertions(+), 11 deletions(-)
23
24
diff --git a/include/block/block-common.h b/include/block/block-common.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/block/block-common.h
27
+++ b/include/block/block-common.h
28
@@ -XXX,XX +XXX,XX @@
29
* scheduling a BH in the bottom half that runs the respective non-coroutine
30
* function. The coroutine yields after scheduling the BH and is reentered when
31
* the wrapped function returns.
32
+ *
33
+ * If the first parameter of the function is a BlockDriverState, BdrvChild or
34
+ * BlockBackend pointer, the AioContext lock for it is taken in the wrapper.
35
*/
36
#define no_co_wrapper
37
38
diff --git a/block/block-backend.c b/block/block-backend.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/block/block-backend.c
41
+++ b/block/block-backend.c
42
@@ -XXX,XX +XXX,XX @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
43
44
AioContext *blk_get_aio_context(BlockBackend *blk)
45
{
46
- BlockDriverState *bs = blk_bs(blk);
47
+ BlockDriverState *bs;
48
IO_CODE();
49
50
+ if (!blk) {
51
+ return qemu_get_aio_context();
52
+ }
53
+
54
+ bs = blk_bs(blk);
55
if (bs) {
56
AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
57
assert(ctx == blk->ctx);
58
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
59
index XXXXXXX..XXXXXXX 100644
60
--- a/scripts/block-coroutine-wrapper.py
61
+++ b/scripts/block-coroutine-wrapper.py
62
@@ -XXX,XX +XXX,XX @@ def __init__(self, wrapper_type: str, return_type: str, name: str,
63
raise ValueError(f"no_co function can't be rdlock: {self.name}")
64
self.target_name = f'{subsystem}_{subname}'
65
66
- t = self.args[0].type
67
- if t == 'BlockDriverState *':
68
- ctx = 'bdrv_get_aio_context(bs)'
69
- elif t == 'BdrvChild *':
70
- ctx = 'bdrv_get_aio_context(child->bs)'
71
- elif t == 'BlockBackend *':
72
- ctx = 'blk_get_aio_context(blk)'
73
- else:
74
- ctx = 'qemu_get_aio_context()'
75
- self.ctx = ctx
76
+ self.ctx = self.gen_ctx()
77
78
self.get_result = 's->ret = '
79
self.ret = 'return s.ret;'
80
@@ -XXX,XX +XXX,XX @@ def __init__(self, wrapper_type: str, return_type: str, name: str,
81
self.co_ret = ''
82
self.return_field = ''
83
84
+ def gen_ctx(self, prefix: str = '') -> str:
85
+ t = self.args[0].type
86
+ if t == 'BlockDriverState *':
87
+ return f'bdrv_get_aio_context({prefix}bs)'
88
+ elif t == 'BdrvChild *':
89
+ return f'bdrv_get_aio_context({prefix}child->bs)'
90
+ elif t == 'BlockBackend *':
91
+ return f'blk_get_aio_context({prefix}blk)'
92
+ else:
93
+ return 'qemu_get_aio_context()'
94
+
95
def gen_list(self, format: str) -> str:
96
return ', '.join(format.format_map(arg.__dict__) for arg in self.args)
97
98
@@ -XXX,XX +XXX,XX @@ def gen_no_co_wrapper(func: FuncDecl) -> str:
99
static void {name}_bh(void *opaque)
100
{{
101
{struct_name} *s = opaque;
102
+ AioContext *ctx = {func.gen_ctx('s->')};
103
104
+ aio_context_acquire(ctx);
105
{func.get_result}{name}({ func.gen_list('s->{name}') });
106
+ aio_context_release(ctx);
107
108
aio_co_wake(s->co);
109
}}
110
--
111
2.40.1
diff view generated by jsdifflib
New patch
1
These functions specify that the caller must hold the "@filename
2
AioContext lock". This doesn't make sense, file names don't have an
3
AioContext. New BlockDriverStates always start in the main AioContext,
4
so this is what we really need here.
1
5
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Message-Id: <20230525124713.401149-3-kwolf@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
11
block.c | 10 ++--------
12
1 file changed, 2 insertions(+), 8 deletions(-)
13
14
diff --git a/block.c b/block.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block.c
17
+++ b/block.c
18
@@ -XXX,XX +XXX,XX @@ out:
19
* should be opened. If specified, neither options nor a filename may be given,
20
* nor can an existing BDS be reused (that is, *pbs has to be NULL).
21
*
22
- * The caller must always hold @filename AioContext lock, because this
23
- * function eventually calls bdrv_refresh_total_sectors() which polls
24
- * when called from non-coroutine context.
25
+ * The caller must always hold the main AioContext lock.
26
*/
27
static BlockDriverState * no_coroutine_fn
28
bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
29
@@ -XXX,XX +XXX,XX @@ close_and_fail:
30
return NULL;
31
}
32
33
-/*
34
- * The caller must always hold @filename AioContext lock, because this
35
- * function eventually calls bdrv_refresh_total_sectors() which polls
36
- * when called from non-coroutine context.
37
- */
38
+/* The caller must always hold the main AioContext lock. */
39
BlockDriverState *bdrv_open(const char *filename, const char *reference,
40
QDict *options, int flags, Error **errp)
41
{
42
--
43
2.40.1
diff view generated by jsdifflib
1
Now that node level interface bdrv_truncate() supports passing request
1
The function documentation already says that all callers must hold the
2
flags to the block driver, expose this on the BlockBackend level, too.
2
main AioContext lock, but not all of them do. This can cause assertion
3
failures when functions called by bdrv_open() try to drop the lock. Fix
4
a few more callers to take the lock before calling bdrv_open().
3
5
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Message-Id: <20230525124713.401149-4-kwolf@redhat.com>
6
Reviewed-by: Alberto Garcia <berto@igalia.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Message-Id: <20200424125448.63318-4-kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
10
---
11
include/sysemu/block-backend.h | 2 +-
11
block.c | 3 +++
12
block.c | 3 ++-
12
block/block-backend.c | 2 ++
13
block/block-backend.c | 4 ++--
13
block/qapi-sysemu.c | 3 +++
14
block/commit.c | 4 ++--
14
blockdev.c | 29 +++++++++++++++++++++++------
15
block/crypto.c | 2 +-
15
qemu-nbd.c | 4 ++++
16
block/mirror.c | 2 +-
16
tests/unit/test-block-iothread.c | 3 +++
17
block/qcow2.c | 4 ++--
17
6 files changed, 38 insertions(+), 6 deletions(-)
18
block/qed.c | 2 +-
19
block/vdi.c | 2 +-
20
block/vhdx.c | 4 ++--
21
block/vmdk.c | 6 +++---
22
block/vpc.c | 2 +-
23
blockdev.c | 2 +-
24
qemu-img.c | 2 +-
25
qemu-io-cmds.c | 2 +-
26
15 files changed, 22 insertions(+), 21 deletions(-)
27
18
28
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/include/sysemu/block-backend.h
31
+++ b/include/sysemu/block-backend.h
32
@@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
33
int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
34
int bytes);
35
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
36
- PreallocMode prealloc, Error **errp);
37
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
38
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes);
39
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
40
int64_t pos, int size);
41
diff --git a/block.c b/block.c
19
diff --git a/block.c b/block.c
42
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
43
--- a/block.c
21
--- a/block.c
44
+++ b/block.c
22
+++ b/block.c
45
@@ -XXX,XX +XXX,XX @@ static int64_t create_file_fallback_truncate(BlockBackend *blk,
23
@@ -XXX,XX +XXX,XX @@ void bdrv_img_create(const char *filename, const char *fmt,
46
int64_t size;
24
return;
47
int ret;
25
}
48
26
49
- ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, &local_err);
27
+ aio_context_acquire(qemu_get_aio_context());
50
+ ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
28
+
51
+ &local_err);
29
/* Create parameter list */
52
if (ret < 0 && ret != -ENOTSUP) {
30
create_opts = qemu_opts_append(create_opts, drv->create_opts);
53
error_propagate(errp, local_err);
31
create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
54
return ret;
32
@@ -XXX,XX +XXX,XX @@ out:
33
qemu_opts_del(opts);
34
qemu_opts_free(create_opts);
35
error_propagate(errp, local_err);
36
+ aio_context_release(qemu_get_aio_context());
37
}
38
39
AioContext *bdrv_get_aio_context(BlockDriverState *bs)
55
diff --git a/block/block-backend.c b/block/block-backend.c
40
diff --git a/block/block-backend.c b/block/block-backend.c
56
index XXXXXXX..XXXXXXX 100644
41
index XXXXXXX..XXXXXXX 100644
57
--- a/block/block-backend.c
42
--- a/block/block-backend.c
58
+++ b/block/block-backend.c
43
+++ b/block/block-backend.c
59
@@ -XXX,XX +XXX,XX @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
44
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
60
}
61
62
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
63
- PreallocMode prealloc, Error **errp)
64
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
65
{
66
if (!blk_is_available(blk)) {
67
error_setg(errp, "No medium inserted");
68
return -ENOMEDIUM;
69
}
45
}
70
46
71
- return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp);
47
blk = blk_new(qemu_get_aio_context(), perm, shared);
72
+ return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp);
48
+ aio_context_acquire(qemu_get_aio_context());
73
}
49
bs = bdrv_open(filename, reference, options, flags, errp);
74
50
+ aio_context_release(qemu_get_aio_context());
75
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
51
if (!bs) {
76
diff --git a/block/commit.c b/block/commit.c
52
blk_unref(blk);
53
return NULL;
54
diff --git a/block/qapi-sysemu.c b/block/qapi-sysemu.c
77
index XXXXXXX..XXXXXXX 100644
55
index XXXXXXX..XXXXXXX 100644
78
--- a/block/commit.c
56
--- a/block/qapi-sysemu.c
79
+++ b/block/commit.c
57
+++ b/block/qapi-sysemu.c
80
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn commit_run(Job *job, Error **errp)
58
@@ -XXX,XX +XXX,XX @@ void qmp_blockdev_change_medium(const char *device,
59
qdict_put_str(options, "driver", format);
81
}
60
}
82
61
83
if (base_len < len) {
62
+ aio_context_acquire(qemu_get_aio_context());
84
- ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, NULL);
63
medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp);
85
+ ret = blk_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
64
+ aio_context_release(qemu_get_aio_context());
86
if (ret) {
65
+
87
goto out;
66
if (!medium_bs) {
88
}
67
goto fail;
89
@@ -XXX,XX +XXX,XX @@ int bdrv_commit(BlockDriverState *bs)
90
* grow the backing file image if possible. If not possible,
91
* we must return an error */
92
if (length > backing_length) {
93
- ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF,
94
+ ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0,
95
&local_err);
96
if (ret < 0) {
97
error_report_err(local_err);
98
diff --git a/block/crypto.c b/block/crypto.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/block/crypto.c
101
+++ b/block/crypto.c
102
@@ -XXX,XX +XXX,XX @@ static ssize_t block_crypto_init_func(QCryptoBlock *block,
103
* which will be used by the crypto header
104
*/
105
return blk_truncate(data->blk, data->size + headerlen, false,
106
- data->prealloc, errp);
107
+ data->prealloc, 0, errp);
108
}
109
110
111
diff --git a/block/mirror.c b/block/mirror.c
112
index XXXXXXX..XXXXXXX 100644
113
--- a/block/mirror.c
114
+++ b/block/mirror.c
115
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
116
117
if (s->bdev_length > base_length) {
118
ret = blk_truncate(s->target, s->bdev_length, false,
119
- PREALLOC_MODE_OFF, NULL);
120
+ PREALLOC_MODE_OFF, 0, NULL);
121
if (ret < 0) {
122
goto immediate_exit;
123
}
124
diff --git a/block/qcow2.c b/block/qcow2.c
125
index XXXXXXX..XXXXXXX 100644
126
--- a/block/qcow2.c
127
+++ b/block/qcow2.c
128
@@ -XXX,XX +XXX,XX @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
129
130
/* Okay, now that we have a valid image, let's give it the right size */
131
ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation,
132
- errp);
133
+ 0, errp);
134
if (ret < 0) {
135
error_prepend(errp, "Could not resize image: ");
136
goto out;
137
@@ -XXX,XX +XXX,XX @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
138
* Amending image options should ensure that the image has
139
* exactly the given new values, so pass exact=true here.
140
*/
141
- ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp);
142
+ ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, 0, errp);
143
blk_unref(blk);
144
if (ret < 0) {
145
return ret;
146
diff --git a/block/qed.c b/block/qed.c
147
index XXXXXXX..XXXXXXX 100644
148
--- a/block/qed.c
149
+++ b/block/qed.c
150
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts,
151
* The QED format associates file length with allocation status,
152
* so a new file (which is empty) must have a length of 0.
153
*/
154
- ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, errp);
155
+ ret = blk_truncate(blk, 0, true, PREALLOC_MODE_OFF, 0, errp);
156
if (ret < 0) {
157
goto out;
158
}
159
diff --git a/block/vdi.c b/block/vdi.c
160
index XXXXXXX..XXXXXXX 100644
161
--- a/block/vdi.c
162
+++ b/block/vdi.c
163
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options,
164
165
if (image_type == VDI_TYPE_STATIC) {
166
ret = blk_truncate(blk, offset + blocks * block_size, false,
167
- PREALLOC_MODE_OFF, errp);
168
+ PREALLOC_MODE_OFF, 0, errp);
169
if (ret < 0) {
170
error_prepend(errp, "Failed to statically allocate file");
171
goto exit;
172
diff --git a/block/vhdx.c b/block/vhdx.c
173
index XXXXXXX..XXXXXXX 100644
174
--- a/block/vhdx.c
175
+++ b/block/vhdx.c
176
@@ -XXX,XX +XXX,XX @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
177
/* All zeroes, so we can just extend the file - the end of the BAT
178
* is the furthest thing we have written yet */
179
ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF,
180
- errp);
181
+ 0, errp);
182
if (ret < 0) {
183
goto exit;
184
}
185
} else if (type == VHDX_TYPE_FIXED) {
186
ret = blk_truncate(blk, data_file_offset + image_size, false,
187
- PREALLOC_MODE_OFF, errp);
188
+ PREALLOC_MODE_OFF, 0, errp);
189
if (ret < 0) {
190
goto exit;
191
}
192
diff --git a/block/vmdk.c b/block/vmdk.c
193
index XXXXXXX..XXXXXXX 100644
194
--- a/block/vmdk.c
195
+++ b/block/vmdk.c
196
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk,
197
int gd_buf_size;
198
199
if (flat) {
200
- ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, errp);
201
+ ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
202
goto exit;
203
}
204
magic = cpu_to_be32(VMDK4_MAGIC);
205
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_extent(BlockBackend *blk,
206
}
207
208
ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
209
- PREALLOC_MODE_OFF, errp);
210
+ PREALLOC_MODE_OFF, 0, errp);
211
if (ret < 0) {
212
goto exit;
213
}
214
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn vmdk_co_do_create(int64_t size,
215
/* bdrv_pwrite write padding zeros to align to sector, we don't need that
216
* for description file */
217
if (desc_offset == 0) {
218
- ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, errp);
219
+ ret = blk_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp);
220
if (ret < 0) {
221
goto exit;
222
}
223
diff --git a/block/vpc.c b/block/vpc.c
224
index XXXXXXX..XXXXXXX 100644
225
--- a/block/vpc.c
226
+++ b/block/vpc.c
227
@@ -XXX,XX +XXX,XX @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
228
/* Add footer to total size */
229
total_size += HEADER_SIZE;
230
231
- ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, errp);
232
+ ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp);
233
if (ret < 0) {
234
return ret;
235
}
68
}
236
diff --git a/blockdev.c b/blockdev.c
69
diff --git a/blockdev.c b/blockdev.c
237
index XXXXXXX..XXXXXXX 100644
70
index XXXXXXX..XXXXXXX 100644
238
--- a/blockdev.c
71
--- a/blockdev.c
239
+++ b/blockdev.c
72
+++ b/blockdev.c
240
@@ -XXX,XX +XXX,XX @@ void qmp_block_resize(bool has_device, const char *device,
73
@@ -XXX,XX +XXX,XX @@ err_no_opts:
74
/* Takes the ownership of bs_opts */
75
BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
76
{
77
+ BlockDriverState *bs;
78
int bdrv_flags = 0;
79
80
GLOBAL_STATE_CODE();
81
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
82
bdrv_flags |= BDRV_O_INACTIVE;
241
}
83
}
242
84
243
bdrv_drained_begin(bs);
85
- return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
244
- ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, errp);
86
+ aio_context_acquire(qemu_get_aio_context());
245
+ ret = blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
87
+ bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
246
bdrv_drained_end(bs);
88
+ aio_context_release(qemu_get_aio_context());
247
89
+
248
out:
90
+ return bs;
249
diff --git a/qemu-img.c b/qemu-img.c
91
}
92
93
void blockdev_close_all_bdrv_states(void)
94
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_action(TransactionAction *action,
95
}
96
qdict_put_str(options, "driver", format);
97
}
98
+ aio_context_release(aio_context);
99
100
+ aio_context_acquire(qemu_get_aio_context());
101
state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags,
102
errp);
103
+ aio_context_release(qemu_get_aio_context());
104
+
105
/* We will manually add the backing_hd field to the bs later */
106
if (!state->new_bs) {
107
- goto out;
108
+ return;
109
}
110
111
+ aio_context_acquire(aio_context);
112
+
113
/*
114
* Allow attaching a backing file to an overlay that's already in use only
115
* if the parents don't assume that they are already seeing a valid image.
116
@@ -XXX,XX +XXX,XX @@ static void drive_backup_action(DriveBackup *backup,
117
if (format) {
118
qdict_put_str(options, "driver", format);
119
}
120
+ aio_context_release(aio_context);
121
122
+ aio_context_acquire(qemu_get_aio_context());
123
target_bs = bdrv_open(backup->target, NULL, options, flags, errp);
124
+ aio_context_release(qemu_get_aio_context());
125
+
126
if (!target_bs) {
127
- goto out;
128
+ return;
129
}
130
131
/* Honor bdrv_try_change_aio_context() context acquisition requirements. */
132
old_context = bdrv_get_aio_context(target_bs);
133
- aio_context_release(aio_context);
134
aio_context_acquire(old_context);
135
136
ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
137
@@ -XXX,XX +XXX,XX @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
138
if (format) {
139
qdict_put_str(options, "driver", format);
140
}
141
+ aio_context_release(aio_context);
142
143
/* Mirroring takes care of copy-on-write using the source's backing
144
* file.
145
*/
146
+ aio_context_acquire(qemu_get_aio_context());
147
target_bs = bdrv_open(arg->target, NULL, options, flags, errp);
148
+ aio_context_release(qemu_get_aio_context());
149
+
150
if (!target_bs) {
151
- goto out;
152
+ return;
153
}
154
155
zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL &&
156
@@ -XXX,XX +XXX,XX @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
157
158
/* Honor bdrv_try_change_aio_context() context acquisition requirements. */
159
old_context = bdrv_get_aio_context(target_bs);
160
- aio_context_release(aio_context);
161
aio_context_acquire(old_context);
162
163
ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp);
164
diff --git a/qemu-nbd.c b/qemu-nbd.c
250
index XXXXXXX..XXXXXXX 100644
165
index XXXXXXX..XXXXXXX 100644
251
--- a/qemu-img.c
166
--- a/qemu-nbd.c
252
+++ b/qemu-img.c
167
+++ b/qemu-nbd.c
253
@@ -XXX,XX +XXX,XX @@ static int img_resize(int argc, char **argv)
168
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
254
* resizing, so pass @exact=true. It is of no use to report
169
qdict_put_str(raw_opts, "driver", "raw");
255
* success when the image has not actually been resized.
170
qdict_put_str(raw_opts, "file", bs->node_name);
256
*/
171
qdict_put_int(raw_opts, "offset", dev_offset);
257
- ret = blk_truncate(blk, total_size, true, prealloc, &err);
172
+
258
+ ret = blk_truncate(blk, total_size, true, prealloc, 0, &err);
173
+ aio_context_acquire(qemu_get_aio_context());
259
if (!ret) {
174
bs = bdrv_open(NULL, NULL, raw_opts, flags, &error_fatal);
260
qprintf(quiet, "Image resized.\n");
175
+ aio_context_release(qemu_get_aio_context());
261
} else {
176
+
262
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
177
blk_remove_bs(blk);
178
blk_insert_bs(blk, bs, &error_fatal);
179
bdrv_unref(bs);
180
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
263
index XXXXXXX..XXXXXXX 100644
181
index XXXXXXX..XXXXXXX 100644
264
--- a/qemu-io-cmds.c
182
--- a/tests/unit/test-block-iothread.c
265
+++ b/qemu-io-cmds.c
183
+++ b/tests/unit/test-block-iothread.c
266
@@ -XXX,XX +XXX,XX @@ static int truncate_f(BlockBackend *blk, int argc, char **argv)
184
@@ -XXX,XX +XXX,XX @@ static void test_attach_second_node(void)
267
* exact=true. It is better to err on the "emit more errors" side
185
qdict_put_str(options, "driver", "raw");
268
* than to be overly permissive.
186
qdict_put_str(options, "file", "base");
269
*/
187
270
- ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, &local_err);
188
+ /* FIXME raw_open() should take ctx's lock internally */
271
+ ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, 0, &local_err);
189
aio_context_acquire(ctx);
272
if (ret < 0) {
190
+ aio_context_acquire(main_ctx);
273
error_report_err(local_err);
191
filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
274
return ret;
192
+ aio_context_release(main_ctx);
193
aio_context_release(ctx);
194
195
g_assert(blk_get_aio_context(blk) == ctx);
275
--
196
--
276
2.25.3
197
2.40.1
277
278
diff view generated by jsdifflib
New patch
1
This fixes blk_new_open() to not assume that bs is in the main context.
1
2
3
In particular, the BlockBackend must be created with the right
4
AioContext because it will refuse to move to a different context
5
afterwards. (blk->allow_aio_context_change is false.)
6
7
Use this opportunity to use blk_insert_bs() instead of duplicating the
8
bdrv_root_attach_child() call. This is consistent with what
9
blk_new_with_bs() does. Add comments to document the locking rules.
10
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Message-Id: <20230525124713.401149-5-kwolf@redhat.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
16
block/block-backend.c | 27 +++++++++++++++++++++------
17
1 file changed, 21 insertions(+), 6 deletions(-)
18
19
diff --git a/block/block-backend.c b/block/block-backend.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/block-backend.c
22
+++ b/block/block-backend.c
23
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
24
* Both sets of permissions can be changed later using blk_set_perm().
25
*
26
* Return the new BlockBackend on success, null on failure.
27
+ *
28
+ * Callers must hold the AioContext lock of @bs.
29
*/
30
BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
31
uint64_t shared_perm, Error **errp)
32
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
33
34
/*
35
* Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
36
- * The new BlockBackend is in the main AioContext.
37
+ * By default, the new BlockBackend is in the main AioContext, but if the
38
+ * parameters connect it with any existing node in a different AioContext, it
39
+ * may end up there instead.
40
*
41
* Just as with bdrv_open(), after having called this function the reference to
42
* @options belongs to the block layer (even on failure).
43
*
44
+ * Called without holding an AioContext lock.
45
+ *
46
* TODO: Remove @filename and @flags; it should be possible to specify a whole
47
* BDS tree just by specifying the @options QDict (or @reference,
48
* alternatively). At the time of adding this function, this is not possible,
49
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
50
{
51
BlockBackend *blk;
52
BlockDriverState *bs;
53
+ AioContext *ctx;
54
uint64_t perm = 0;
55
uint64_t shared = BLK_PERM_ALL;
56
57
@@ -XXX,XX +XXX,XX @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
58
shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
59
}
60
61
- blk = blk_new(qemu_get_aio_context(), perm, shared);
62
aio_context_acquire(qemu_get_aio_context());
63
bs = bdrv_open(filename, reference, options, flags, errp);
64
aio_context_release(qemu_get_aio_context());
65
if (!bs) {
66
- blk_unref(blk);
67
return NULL;
68
}
69
70
- blk->root = bdrv_root_attach_child(bs, "root", &child_root,
71
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
72
- perm, shared, blk, errp);
73
+ /* bdrv_open() could have moved bs to a different AioContext */
74
+ ctx = bdrv_get_aio_context(bs);
75
+ blk = blk_new(bdrv_get_aio_context(bs), perm, shared);
76
+ blk->perm = perm;
77
+ blk->shared_perm = shared;
78
+
79
+ aio_context_acquire(ctx);
80
+ blk_insert_bs(blk, bs, errp);
81
+ bdrv_unref(bs);
82
+ aio_context_release(ctx);
83
+
84
if (!blk->root) {
85
blk_unref(blk);
86
return NULL;
87
@@ -XXX,XX +XXX,XX @@ void blk_remove_bs(BlockBackend *blk)
88
89
/*
90
* Associates a new BlockDriverState with @blk.
91
+ *
92
+ * Callers must hold the AioContext lock of @bs.
93
*/
94
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
95
{
96
--
97
2.40.1
diff view generated by jsdifflib
New patch
1
bdrv_open_backing_file() calls bdrv_open_inherit(), so all callers must
2
hold the main AioContext lock.
1
3
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Message-Id: <20230525124713.401149-6-kwolf@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
9
block.c | 2 ++
10
block/mirror.c | 6 ++++++
11
2 files changed, 8 insertions(+)
12
13
diff --git a/block.c b/block.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/block.c
16
+++ b/block.c
17
@@ -XXX,XX +XXX,XX @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
18
* itself, all options starting with "${bdref_key}." are considered part of the
19
* BlockdevRef.
20
*
21
+ * The caller must hold the main AioContext lock.
22
+ *
23
* TODO Can this be unified with bdrv_open_image()?
24
*/
25
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
26
diff --git a/block/mirror.c b/block/mirror.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/block/mirror.c
29
+++ b/block/mirror.c
30
@@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job)
31
bool abort = job->ret < 0;
32
int ret = 0;
33
34
+ GLOBAL_STATE_CODE();
35
+
36
if (s->prepared) {
37
return 0;
38
}
39
s->prepared = true;
40
41
+ aio_context_acquire(qemu_get_aio_context());
42
+
43
mirror_top_bs = s->mirror_top_bs;
44
bs_opaque = mirror_top_bs->opaque;
45
src = mirror_top_bs->backing->bs;
46
@@ -XXX,XX +XXX,XX @@ static int mirror_exit_common(Job *job)
47
bdrv_unref(mirror_top_bs);
48
bdrv_unref(src);
49
50
+ aio_context_release(qemu_get_aio_context());
51
+
52
return ret;
53
}
54
55
--
56
2.40.1
diff view generated by jsdifflib
1
The BDRV_REQ_ZERO_WRITE is currently implemented in a way that first the
1
qcow2_open() doesn't work correctly when opening the 'file' child moves
2
image is possibly preallocated and then the zero flag is added to all
2
bs to an iothread, for several reasons:
3
clusters. This means that a copy-on-write operation may be needed when
4
writing to these clusters, despite having used preallocation, negating
5
one of the major benefits of preallocation.
6
3
7
Instead, try to forward the BDRV_REQ_ZERO_WRITE to the protocol driver,
4
- It uses BDRV_POLL_WHILE() to wait for the qcow2_open_entry()
8
and if the protocol driver can ensure that the new area reads as zeros,
5
coroutine, which involves dropping the AioContext lock for bs when it
9
we can skip setting the zero flag in the qcow2 layer.
6
is not in the main context - but we don't hold it, so this crashes.
10
7
11
Unfortunately, the same approach doesn't work for metadata
8
- It runs the qcow2_open_entry() coroutine in the current thread instead
12
preallocation, so we'll still set the zero flag there.
9
of the new AioContext of bs.
10
11
- qcow2_open_entry() doesn't notify the main loop when it's done.
12
13
This patches fixes these issues around delegating work to a coroutine.
14
Temporarily dropping the main AioContext lock is not necessary because
15
we know we run in the main thread.
13
16
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
18
Message-Id: <20230525124713.401149-7-kwolf@redhat.com>
16
Message-Id: <20200424142701.67053-1-kwolf@redhat.com>
19
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
18
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
19
---
21
---
20
block/qcow2.c | 22 +++++++++++++++++++---
22
block.c | 6 ++++++
21
tests/qemu-iotests/274.out | 4 ++--
23
block/qcow2.c | 8 ++++++--
22
2 files changed, 21 insertions(+), 5 deletions(-)
24
2 files changed, 12 insertions(+), 2 deletions(-)
23
25
26
diff --git a/block.c b/block.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/block.c
29
+++ b/block.c
30
@@ -XXX,XX +XXX,XX @@ done:
31
* BlockdevRef.
32
*
33
* The BlockdevRef will be removed from the options QDict.
34
+ *
35
+ * @parent can move to a different AioContext in this function. Callers must
36
+ * make sure that their AioContext locking is still correct after this.
37
*/
38
BdrvChild *bdrv_open_child(const char *filename,
39
QDict *options, const char *bdref_key,
40
@@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_open_child(const char *filename,
41
42
/*
43
* Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
44
+ *
45
+ * @parent can move to a different AioContext in this function. Callers must
46
+ * make sure that their AioContext locking is still correct after this.
47
*/
48
int bdrv_open_file_child(const char *filename,
49
QDict *options, const char *bdref_key,
24
diff --git a/block/qcow2.c b/block/qcow2.c
50
diff --git a/block/qcow2.c b/block/qcow2.c
25
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
26
--- a/block/qcow2.c
52
--- a/block/qcow2.c
27
+++ b/block/qcow2.c
53
+++ b/block/qcow2.c
28
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
54
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn qcow2_open_entry(void *opaque)
29
/* Allocate the data area */
55
qoc->ret = qcow2_do_open(qoc->bs, qoc->options, qoc->flags, true,
30
new_file_size = allocation_start +
56
qoc->errp);
31
nb_new_data_clusters * s->cluster_size;
57
qemu_co_mutex_unlock(&s->lock);
32
- /* Image file grows, so @exact does not matter */
58
+
33
- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
59
+ aio_wait_kick();
34
- errp);
60
}
35
+ /*
61
36
+ * Image file grows, so @exact does not matter.
62
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
37
+ *
63
@@ -XXX,XX +XXX,XX @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
38
+ * If we need to zero out the new area, try first whether the protocol
64
39
+ * driver can already take care of this.
65
assert(!qemu_in_coroutine());
40
+ */
66
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
41
+ if (flags & BDRV_REQ_ZERO_WRITE) {
67
- qemu_coroutine_enter(qemu_coroutine_create(qcow2_open_entry, &qoc));
42
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc,
68
- BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
43
+ BDRV_REQ_ZERO_WRITE, NULL);
69
+
44
+ if (ret >= 0) {
70
+ aio_co_enter(bdrv_get_aio_context(bs),
45
+ flags &= ~BDRV_REQ_ZERO_WRITE;
71
+ qemu_coroutine_create(qcow2_open_entry, &qoc));
46
+ }
72
+ AIO_WAIT_WHILE_UNLOCKED(NULL, qoc.ret == -EINPROGRESS);
47
+ } else {
73
48
+ ret = -1;
74
return qoc.ret;
49
+ }
75
}
50
+ if (ret < 0) {
51
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
52
+ errp);
53
+ }
54
if (ret < 0) {
55
error_prepend(errp, "Failed to resize underlying file: ");
56
qcow2_free_clusters(bs, allocation_start,
57
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
58
index XXXXXXX..XXXXXXX 100644
59
--- a/tests/qemu-iotests/274.out
60
+++ b/tests/qemu-iotests/274.out
61
@@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 9437184
62
10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000)
63
64
[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false},
65
-{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}]
66
+{ "start": 5242880, "length": 10485760, "depth": 0, "zero": false, "data": true, "offset": 327680}]
67
68
=== preallocation=full ===
69
Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
70
@@ -XXX,XX +XXX,XX @@ read 65536/65536 bytes at offset 11534336
71
4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000)
72
73
[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false},
74
-{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}]
75
+{ "start": 8388608, "length": 4194304, "depth": 0, "zero": false, "data": true, "offset": 327680}]
76
77
=== preallocation=off ===
78
Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
79
--
76
--
80
2.25.3
77
2.40.1
81
82
diff view generated by jsdifflib
1
The raw format driver can simply forward the flag and let its bs->file
1
When opening the 'file' child moves bs to an iothread, we need to hold
2
child take care of actually providing the zeros.
2
the AioContext lock of it before we can call raw_apply_options() (and
3
more specifically, bdrv_getlength() inside of it).
3
4
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Max Reitz <mreitz@redhat.com>
6
Message-Id: <20230525124713.401149-8-kwolf@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Message-Id: <20200424125448.63318-6-kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
9
---
11
block/raw-format.c | 4 +++-
10
block/raw-format.c | 5 +++++
12
1 file changed, 3 insertions(+), 1 deletion(-)
11
tests/unit/test-block-iothread.c | 3 ---
12
2 files changed, 5 insertions(+), 3 deletions(-)
13
13
14
diff --git a/block/raw-format.c b/block/raw-format.c
14
diff --git a/block/raw-format.c b/block/raw-format.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/raw-format.c
16
--- a/block/raw-format.c
17
+++ b/block/raw-format.c
17
+++ b/block/raw-format.c
18
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
19
20
s->size = offset;
21
offset += s->offset;
22
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
23
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
24
}
25
26
static void raw_eject(BlockDriverState *bs, bool eject_flag)
27
@@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
18
@@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
28
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
19
Error **errp)
29
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
20
{
30
bs->file->bs->supported_zero_flags);
21
BDRVRawState *s = bs->opaque;
31
+ bs->supported_truncate_flags = bs->file->bs->supported_truncate_flags &
22
+ AioContext *ctx;
32
+ BDRV_REQ_ZERO_WRITE;
23
bool has_size;
33
24
uint64_t offset, size;
34
if (bs->probed && !bdrv_is_read_only(bs)) {
25
BdrvChildRole file_role;
35
bdrv_refresh_filename(bs->file->bs);
26
@@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
27
bs->file->bs->filename);
28
}
29
30
+ ctx = bdrv_get_aio_context(bs);
31
+ aio_context_acquire(ctx);
32
ret = raw_apply_options(bs, s, offset, has_size, size, errp);
33
+ aio_context_release(ctx);
34
+
35
if (ret < 0) {
36
return ret;
37
}
38
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/tests/unit/test-block-iothread.c
41
+++ b/tests/unit/test-block-iothread.c
42
@@ -XXX,XX +XXX,XX @@ static void test_attach_second_node(void)
43
qdict_put_str(options, "driver", "raw");
44
qdict_put_str(options, "file", "base");
45
46
- /* FIXME raw_open() should take ctx's lock internally */
47
- aio_context_acquire(ctx);
48
aio_context_acquire(main_ctx);
49
filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
50
aio_context_release(main_ctx);
51
- aio_context_release(ctx);
52
53
g_assert(blk_get_aio_context(blk) == ctx);
54
g_assert(bdrv_get_aio_context(bs) == ctx);
36
--
55
--
37
2.25.3
56
2.40.1
38
39
diff view generated by jsdifflib
New patch
1
The AioContext lock must not be held for bdrv_open_child(), but it is
2
necessary for the following operations, in particular those using nested
3
event loops in coroutine wrappers.
1
4
5
Temporarily dropping the main AioContext lock is not necessary because
6
we know we run in the main thread.
7
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Message-Id: <20230525124713.401149-9-kwolf@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
block/copy-before-write.c | 21 ++++++++++++++++-----
14
1 file changed, 16 insertions(+), 5 deletions(-)
15
16
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/copy-before-write.c
19
+++ b/block/copy-before-write.c
20
@@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
21
int64_t cluster_size;
22
g_autoptr(BlockdevOptions) full_opts = NULL;
23
BlockdevOptionsCbw *opts;
24
+ AioContext *ctx;
25
int ret;
26
27
full_opts = cbw_parse_options(options, errp);
28
@@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
29
return -EINVAL;
30
}
31
32
+ ctx = bdrv_get_aio_context(bs);
33
+ aio_context_acquire(ctx);
34
+
35
if (opts->bitmap) {
36
bitmap = block_dirty_bitmap_lookup(opts->bitmap->node,
37
opts->bitmap->name, NULL, errp);
38
if (!bitmap) {
39
- return -EINVAL;
40
+ ret = -EINVAL;
41
+ goto out;
42
}
43
}
44
s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error :
45
@@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
46
s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
47
if (!s->bcs) {
48
error_prepend(errp, "Cannot create block-copy-state: ");
49
- return -EINVAL;
50
+ ret = -EINVAL;
51
+ goto out;
52
}
53
54
cluster_size = block_copy_cluster_size(s->bcs);
55
56
s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
57
if (!s->done_bitmap) {
58
- return -EINVAL;
59
+ ret = -EINVAL;
60
+ goto out;
61
}
62
bdrv_disable_dirty_bitmap(s->done_bitmap);
63
64
/* s->access_bitmap starts equal to bcs bitmap */
65
s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
66
if (!s->access_bitmap) {
67
- return -EINVAL;
68
+ ret = -EINVAL;
69
+ goto out;
70
}
71
bdrv_disable_dirty_bitmap(s->access_bitmap);
72
bdrv_dirty_bitmap_merge_internal(s->access_bitmap,
73
@@ -XXX,XX +XXX,XX @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
74
qemu_co_mutex_init(&s->lock);
75
QLIST_INIT(&s->frozen_read_reqs);
76
77
- return 0;
78
+ ret = 0;
79
+out:
80
+ aio_context_release(ctx);
81
+ return ret;
82
}
83
84
static void cbw_close(BlockDriverState *bs)
85
--
86
2.40.1
diff view generated by jsdifflib
1
If BDRV_REQ_ZERO_WRITE is set and we're extending the image, calling
1
bdrv_refresh_total_sectors() and bdrv_refresh_limits() expect to be
2
qcow2_cluster_zeroize() with flags=0 does the right thing: It doesn't
2
called under the AioContext lock of the node. Take the lock.
3
undo any previous preallocation, but just adds the zero flag to all
4
relevant L2 entries. If an external data file is in use, a write_zeroes
5
request to the data file is made instead.
6
3
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Message-Id: <20200424125448.63318-5-kwolf@redhat.com>
5
Message-Id: <20230525124713.401149-10-kwolf@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
8
---
13
block/qcow2-cluster.c | 2 +-
9
block.c | 7 +++++++
14
block/qcow2.c | 34 ++++++++++++++++++++++++++++++++++
10
1 file changed, 7 insertions(+)
15
2 files changed, 35 insertions(+), 1 deletion(-)
16
11
17
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
12
diff --git a/block.c b/block.c
18
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
19
--- a/block/qcow2-cluster.c
14
--- a/block.c
20
+++ b/block/qcow2-cluster.c
15
+++ b/block.c
21
@@ -XXX,XX +XXX,XX @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
16
@@ -XXX,XX +XXX,XX @@ static int no_coroutine_fn GRAPH_UNLOCKED
22
/* Caller must pass aligned values, except at image end */
17
bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
23
assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
18
QDict *options, int open_flags, Error **errp)
24
assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
19
{
25
- end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
20
+ AioContext *ctx;
26
+ end_offset >= bs->total_sectors << BDRV_SECTOR_BITS);
21
Error *local_err = NULL;
27
22
int i, ret;
28
/* The zero flag is only supported by version 3 and newer */
23
GLOBAL_STATE_CODE();
29
if (s->qcow_version < 3) {
24
@@ -XXX,XX +XXX,XX @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
30
diff --git a/block/qcow2.c b/block/qcow2.c
25
bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
31
index XXXXXXX..XXXXXXX 100644
26
bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;
32
--- a/block/qcow2.c
27
33
+++ b/block/qcow2.c
28
+ /* Get the context after .bdrv_open, it can change the context */
34
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
29
+ ctx = bdrv_get_aio_context(bs);
35
30
+ aio_context_acquire(ctx);
36
bs->supported_zero_flags = header.version >= 3 ?
31
+
37
BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0;
32
ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
38
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
33
if (ret < 0) {
39
34
error_setg_errno(errp, -ret, "Could not refresh total sector count");
40
/* Repair image if dirty */
35
+ aio_context_release(ctx);
41
if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
36
return ret;
42
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
43
g_assert_not_reached();
44
}
37
}
45
38
46
+ if ((flags & BDRV_REQ_ZERO_WRITE) && offset > old_length) {
39
bdrv_graph_rdlock_main_loop();
47
+ uint64_t zero_start = QEMU_ALIGN_UP(old_length, s->cluster_size);
40
bdrv_refresh_limits(bs, NULL, &local_err);
48
+
41
bdrv_graph_rdunlock_main_loop();
49
+ /*
42
+ aio_context_release(ctx);
50
+ * Use zero clusters as much as we can. qcow2_cluster_zeroize()
43
51
+ * requires a cluster-aligned start. The end may be unaligned if it is
44
if (local_err) {
52
+ * at the end of the image (which it is here).
45
error_propagate(errp, local_err);
53
+ */
54
+ ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0);
55
+ if (ret < 0) {
56
+ error_setg_errno(errp, -ret, "Failed to zero out new clusters");
57
+ goto fail;
58
+ }
59
+
60
+ /* Write explicit zeros for the unaligned head */
61
+ if (zero_start > old_length) {
62
+ uint64_t len = zero_start - old_length;
63
+ uint8_t *buf = qemu_blockalign0(bs, len);
64
+ QEMUIOVector qiov;
65
+ qemu_iovec_init_buf(&qiov, buf, len);
66
+
67
+ qemu_co_mutex_unlock(&s->lock);
68
+ ret = qcow2_co_pwritev_part(bs, old_length, len, &qiov, 0, 0);
69
+ qemu_co_mutex_lock(&s->lock);
70
+
71
+ qemu_vfree(buf);
72
+ if (ret < 0) {
73
+ error_setg_errno(errp, -ret, "Failed to zero out the new area");
74
+ goto fail;
75
+ }
76
+ }
77
+ }
78
+
79
if (prealloc != PREALLOC_MODE_OFF) {
80
/* Flush metadata before actually changing the image size */
81
ret = qcow2_write_caches(bs);
82
--
46
--
83
2.25.3
47
2.40.1
84
85
diff view generated by jsdifflib
1
For regular files, we always get BDRV_REQ_ZERO_WRITE behaviour from the
1
While calling bdrv_new_open_driver_opts(), the main AioContext lock must
2
OS, so we can advertise the flag and just ignore it.
2
be held, not the lock of the AioContext of the block subtree it will be
3
added to afterwards.
3
4
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Message-Id: <20230525124713.401149-11-kwolf@redhat.com>
6
Reviewed-by: Alberto Garcia <berto@igalia.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Message-Id: <20200424125448.63318-7-kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
9
---
11
block/file-posix.c | 4 ++++
10
block.c | 11 +++++++++++
12
1 file changed, 4 insertions(+)
11
1 file changed, 11 insertions(+)
13
12
14
diff --git a/block/file-posix.c b/block/file-posix.c
13
diff --git a/block.c b/block.c
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/block/file-posix.c
15
--- a/block.c
17
+++ b/block/file-posix.c
16
+++ b/block.c
18
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
17
@@ -XXX,XX +XXX,XX @@ static void bdrv_delete(BlockDriverState *bs)
19
#endif
18
* empty set of options. The reference to the QDict belongs to the block layer
20
19
* after the call (even on failure), so if the caller intends to reuse the
21
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
20
* dictionary, it needs to use qobject_ref() before calling bdrv_open.
22
+ if (S_ISREG(st.st_mode)) {
21
+ *
23
+ /* When extending regular files, we get zeros from the OS */
22
+ * The caller holds the AioContext lock for @bs. It must make sure that @bs
24
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
23
+ * stays in the same AioContext, i.e. @options must not refer to nodes in a
25
+ }
24
+ * different AioContext.
26
ret = 0;
25
*/
27
fail:
26
BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
28
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
27
int flags, Error **errp)
28
{
29
ERRP_GUARD();
30
int ret;
31
+ AioContext *ctx = bdrv_get_aio_context(bs);
32
BlockDriverState *new_node_bs = NULL;
33
const char *drvname, *node_name;
34
BlockDriver *drv;
35
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
36
37
GLOBAL_STATE_CODE();
38
39
+ aio_context_release(ctx);
40
+ aio_context_acquire(qemu_get_aio_context());
41
new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
42
errp);
43
+ aio_context_release(qemu_get_aio_context());
44
+ aio_context_acquire(ctx);
45
+ assert(bdrv_get_aio_context(bs) == ctx);
46
+
47
options = NULL; /* bdrv_new_open_driver() eats options */
48
if (!new_node_bs) {
49
error_prepend(errp, "Could not create node: ");
29
--
50
--
30
2.25.3
51
2.40.1
31
32
diff view generated by jsdifflib
1
We want to keep TEST_IMG for the full path of the main test image, but
1
It has no internal callers, so its only use is being called from
2
filter_testfiles() must be called for other test images before replacing
2
individual test cases. If the name starts with an underscore, it is
3
other things like the image format because the test directory path could
3
considered private and linters warn against calling it. 256 only gets
4
contain the format as a substring.
4
away with it currently because it's on the exception list for linters.
5
6
Insert a filter_testfiles() call between both.
7
5
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Message-Id: <20230525124713.401149-12-kwolf@redhat.com>
10
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Message-Id: <20200424125448.63318-9-kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
10
---
14
tests/qemu-iotests/iotests.py | 5 +++--
11
tests/qemu-iotests/iotests.py | 2 +-
15
1 file changed, 3 insertions(+), 2 deletions(-)
12
tests/qemu-iotests/256 | 2 +-
13
2 files changed, 2 insertions(+), 2 deletions(-)
16
14
17
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
15
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/tests/qemu-iotests/iotests.py
17
--- a/tests/qemu-iotests/iotests.py
20
+++ b/tests/qemu-iotests/iotests.py
18
+++ b/tests/qemu-iotests/iotests.py
21
@@ -XXX,XX +XXX,XX @@ def filter_img_info(output, filename):
19
@@ -XXX,XX +XXX,XX @@ def _verify_virtio_blk() -> None:
22
for line in output.split('\n'):
20
if 'virtio-blk' not in out:
23
if 'disk size' in line or 'actual-size' in line:
21
notrun('Missing virtio-blk in QEMU binary')
24
continue
22
25
- line = line.replace(filename, 'TEST_IMG') \
23
-def _verify_virtio_scsi_pci_or_ccw() -> None:
26
- .replace(imgfmt, 'IMGFMT')
24
+def verify_virtio_scsi_pci_or_ccw() -> None:
27
+ line = line.replace(filename, 'TEST_IMG')
25
out = qemu_pipe('-M', 'none', '-device', 'help')
28
+ line = filter_testfiles(line)
26
if 'virtio-scsi-pci' not in out and 'virtio-scsi-ccw' not in out:
29
+ line = line.replace(imgfmt, 'IMGFMT')
27
notrun('Missing virtio-scsi-pci or virtio-scsi-ccw in QEMU binary')
30
line = re.sub('iters: [0-9]+', 'iters: XXX', line)
28
diff --git a/tests/qemu-iotests/256 b/tests/qemu-iotests/256
31
line = re.sub('uuid: [-a-f0-9]+', 'uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', line)
29
index XXXXXXX..XXXXXXX 100755
32
line = re.sub('cid: [0-9]+', 'cid: XXXXXXXXXX', line)
30
--- a/tests/qemu-iotests/256
31
+++ b/tests/qemu-iotests/256
32
@@ -XXX,XX +XXX,XX @@ import os
33
import iotests
34
from iotests import log
35
36
-iotests._verify_virtio_scsi_pci_or_ccw()
37
+iotests.verify_virtio_scsi_pci_or_ccw()
38
39
iotests.script_initialize(supported_fmts=['qcow2'])
40
size = 64 * 1024 * 1024
33
--
41
--
34
2.25.3
42
2.40.1
35
36
diff view generated by jsdifflib
1
If blockdev-create references an existing node in an iothread (e.g. as
2
it's 'file' child), then suddenly all of the image creation code must
3
run in that AioContext, too. Test that this actually works.
4
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
Message-Id: <20200424125448.63318-10-kwolf@redhat.com>
6
Message-Id: <20230525124713.401149-13-kwolf@redhat.com>
3
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
9
---
7
tests/qemu-iotests/274 | 155 +++++++++++++++++++++
10
tests/qemu-iotests/tests/iothreads-create | 67 +++++++++++++++++++
8
tests/qemu-iotests/274.out | 268 +++++++++++++++++++++++++++++++++++++
11
tests/qemu-iotests/tests/iothreads-create.out | 4 ++
9
tests/qemu-iotests/group | 1 +
12
2 files changed, 71 insertions(+)
10
3 files changed, 424 insertions(+)
13
create mode 100755 tests/qemu-iotests/tests/iothreads-create
11
create mode 100755 tests/qemu-iotests/274
14
create mode 100644 tests/qemu-iotests/tests/iothreads-create.out
12
create mode 100644 tests/qemu-iotests/274.out
13
15
14
diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274
16
diff --git a/tests/qemu-iotests/tests/iothreads-create b/tests/qemu-iotests/tests/iothreads-create
15
new file mode 100755
17
new file mode 100755
16
index XXXXXXX..XXXXXXX
18
index XXXXXXX..XXXXXXX
17
--- /dev/null
19
--- /dev/null
18
+++ b/tests/qemu-iotests/274
20
+++ b/tests/qemu-iotests/tests/iothreads-create
19
@@ -XXX,XX +XXX,XX @@
21
@@ -XXX,XX +XXX,XX @@
20
+#!/usr/bin/env python3
22
+#!/usr/bin/env python3
23
+# group: rw quick
21
+#
24
+#
22
+# Copyright (C) 2019 Red Hat, Inc.
25
+# Copyright (C) 2023 Red Hat, Inc.
23
+#
26
+#
24
+# This program is free software; you can redistribute it and/or modify
27
+# This program is free software; you can redistribute it and/or modify
25
+# it under the terms of the GNU General Public License as published by
28
+# it under the terms of the GNU General Public License as published by
26
+# the Free Software Foundation; either version 2 of the License, or
29
+# the Free Software Foundation; either version 2 of the License, or
27
+# (at your option) any later version.
30
+# (at your option) any later version.
...
...
33
+#
36
+#
34
+# You should have received a copy of the GNU General Public License
37
+# You should have received a copy of the GNU General Public License
35
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
38
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
36
+#
39
+#
37
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
40
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
38
+#
39
+# Some tests for short backing files and short overlays
40
+
41
+
42
+import asyncio
41
+import iotests
43
+import iotests
42
+
44
+
43
+iotests.verify_image_format(supported_fmts=['qcow2'])
45
+iotests.script_initialize(supported_fmts=['qcow2', 'qcow', 'qed', 'vdi',
44
+iotests.verify_platform(['linux'])
46
+ 'vmdk', 'parallels'])
47
+iotests.verify_virtio_scsi_pci_or_ccw()
45
+
48
+
46
+size_short = 1 * 1024 * 1024
49
+with iotests.FilePath('disk.img') as img_path, \
47
+size_long = 2 * 1024 * 1024
50
+ iotests.VM() as vm:
48
+size_diff = size_long - size_short
49
+
51
+
50
+def create_chain() -> None:
52
+ iotests.qemu_img_create('-f', 'raw', img_path, '0')
51
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base,
52
+ str(size_long))
53
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, mid,
54
+ str(size_short))
55
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', mid, top,
56
+ str(size_long))
57
+
53
+
58
+ iotests.qemu_io_log('-c', 'write -P 1 0 %d' % size_long, base)
54
+ vm.add_object('iothread,id=iothread0')
55
+ vm.add_blockdev(f'file,node-name=img-file,read-only=on,'
56
+ f'filename={img_path}')
57
+ vm.add_device('virtio-scsi,iothread=iothread0')
58
+ vm.add_device('scsi-hd,drive=img-file,share-rw=on')
59
+
59
+
60
+def create_vm() -> iotests.VM:
60
+ vm.launch()
61
+ vm = iotests.VM()
62
+ vm.add_blockdev('file,filename=%s,node-name=base-file' % base)
63
+ vm.add_blockdev('%s,file=base-file,node-name=base' % iotests.imgfmt)
64
+ vm.add_blockdev('file,filename=%s,node-name=mid-file' % mid)
65
+ vm.add_blockdev('%s,file=mid-file,node-name=mid,backing=base'
66
+ % iotests.imgfmt)
67
+ vm.add_drive(top, 'backing=mid,node-name=top')
68
+ return vm
69
+
61
+
70
+with iotests.FilePath('base') as base, \
62
+ iotests.log(vm.qmp(
71
+ iotests.FilePath('mid') as mid, \
63
+ 'blockdev-reopen',
72
+ iotests.FilePath('top') as top:
64
+ options=[{
65
+ 'driver': 'file',
66
+ 'filename': img_path,
67
+ 'node-name': 'img-file',
68
+ 'read-only': False,
69
+ }],
70
+ ))
71
+ iotests.log(vm.qmp(
72
+ 'blockdev-create',
73
+ job_id='job0',
74
+ options={
75
+ 'driver': iotests.imgfmt,
76
+ 'file': 'img-file',
77
+ 'size': 1024 * 1024,
78
+ },
79
+ ))
73
+
80
+
74
+ iotests.log('== Commit tests ==')
81
+ # Should succeed and not time out
75
+
82
+ try:
76
+ create_chain()
83
+ vm.run_job('job0', wait=5.0)
77
+
84
+ vm.shutdown()
78
+ iotests.log('=== Check visible data ===')
85
+ except asyncio.TimeoutError:
79
+
86
+ # VM may be stuck, kill it
80
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, top)
87
+ vm.kill()
81
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), top)
88
+ raise
82
+
89
diff --git a/tests/qemu-iotests/tests/iothreads-create.out b/tests/qemu-iotests/tests/iothreads-create.out
83
+ iotests.log('=== Checking allocation status ===')
84
+
85
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
86
+ '-c', 'alloc %d %d' % (size_short, size_diff),
87
+ base)
88
+
89
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
90
+ '-c', 'alloc %d %d' % (size_short, size_diff),
91
+ mid)
92
+
93
+ iotests.qemu_io_log('-c', 'alloc 0 %d' % size_short,
94
+ '-c', 'alloc %d %d' % (size_short, size_diff),
95
+ top)
96
+
97
+ iotests.log('=== Checking map ===')
98
+
99
+ iotests.qemu_img_log('map', '--output=json', base)
100
+ iotests.qemu_img_log('map', '--output=human', base)
101
+ iotests.qemu_img_log('map', '--output=json', mid)
102
+ iotests.qemu_img_log('map', '--output=human', mid)
103
+ iotests.qemu_img_log('map', '--output=json', top)
104
+ iotests.qemu_img_log('map', '--output=human', top)
105
+
106
+ iotests.log('=== Testing qemu-img commit (top -> mid) ===')
107
+
108
+ iotests.qemu_img_log('commit', top)
109
+ iotests.img_info_log(mid)
110
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
111
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
112
+
113
+ iotests.log('=== Testing HMP commit (top -> mid) ===')
114
+
115
+ create_chain()
116
+ with create_vm() as vm:
117
+ vm.launch()
118
+ vm.qmp_log('human-monitor-command', command_line='commit drive0')
119
+
120
+ iotests.img_info_log(mid)
121
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
122
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
123
+
124
+ iotests.log('=== Testing QMP active commit (top -> mid) ===')
125
+
126
+ create_chain()
127
+ with create_vm() as vm:
128
+ vm.launch()
129
+ vm.qmp_log('block-commit', device='top', base_node='mid',
130
+ job_id='job0', auto_dismiss=False)
131
+ vm.run_job('job0', wait=5)
132
+
133
+ iotests.img_info_log(mid)
134
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
135
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
136
+
137
+
138
+ iotests.log('== Resize tests ==')
139
+
140
+ # Use different sizes for different allocation modes:
141
+ #
142
+ # We want to have at least one test where 32 bit truncation in the size of
143
+ # the overlapping area becomes visible. This is covered by the
144
+ # prealloc='off' case (1G to 6G is an overlap of 5G).
145
+ #
146
+ # However, we can only do this for modes that don't preallocate data
147
+ # because otherwise we might run out of space on the test host.
148
+ #
149
+ # We also want to test some unaligned combinations.
150
+ for (prealloc, base_size, top_size_old, top_size_new, off) in [
151
+ ('off', '6G', '1G', '8G', '5G'),
152
+ ('metadata', '32G', '30G', '33G', '31G'),
153
+ ('falloc', '10M', '5M', '15M', '9M'),
154
+ ('full', '16M', '8M', '12M', '11M'),
155
+ ('off', '384k', '253k', '512k', '253k'),
156
+ ('off', '400k', '256k', '512k', '336k'),
157
+ ('off', '512k', '256k', '500k', '436k')]:
158
+
159
+ iotests.log('=== preallocation=%s ===' % prealloc)
160
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, base, base_size)
161
+ iotests.qemu_img_log('create', '-f', iotests.imgfmt, '-b', base, top,
162
+ top_size_old)
163
+ iotests.qemu_io_log('-c', 'write -P 1 %s 64k' % off, base)
164
+
165
+ # After this, top_size_old to base_size should be allocated/zeroed.
166
+ #
167
+ # In theory, leaving base_size to top_size_new unallocated would be
168
+ # correct, but in practice, if we zero out anything, we zero out
169
+ # everything up to top_size_new.
170
+ iotests.qemu_img_log('resize', '-f', iotests.imgfmt,
171
+ '--preallocation', prealloc, top, top_size_new)
172
+ iotests.qemu_io_log('-c', 'read -P 0 %s 64k' % off, top)
173
+ iotests.qemu_io_log('-c', 'map', top)
174
+ iotests.qemu_img_log('map', '--output=json', top)
175
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
176
new file mode 100644
90
new file mode 100644
177
index XXXXXXX..XXXXXXX
91
index XXXXXXX..XXXXXXX
178
--- /dev/null
92
--- /dev/null
179
+++ b/tests/qemu-iotests/274.out
93
+++ b/tests/qemu-iotests/tests/iothreads-create.out
180
@@ -XXX,XX +XXX,XX @@
94
@@ -XXX,XX +XXX,XX @@
181
+== Commit tests ==
182
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
183
+
184
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
185
+
186
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
187
+
188
+wrote 2097152/2097152 bytes at offset 0
189
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
190
+
191
+=== Check visible data ===
192
+read 1048576/1048576 bytes at offset 0
193
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
194
+
195
+read 1048576/1048576 bytes at offset 1048576
196
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
197
+
198
+=== Checking allocation status ===
199
+1048576/1048576 bytes allocated at offset 0 bytes
200
+1048576/1048576 bytes allocated at offset 1 MiB
201
+
202
+0/1048576 bytes allocated at offset 0 bytes
203
+0/0 bytes allocated at offset 1 MiB
204
+
205
+0/1048576 bytes allocated at offset 0 bytes
206
+0/1048576 bytes allocated at offset 1 MiB
207
+
208
+=== Checking map ===
209
+[{ "start": 0, "length": 2097152, "depth": 0, "zero": false, "data": true, "offset": 327680}]
210
+
211
+Offset Length Mapped to File
212
+0 0x200000 0x50000 TEST_DIR/PID-base
213
+
214
+[{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, "offset": 327680}]
215
+
216
+Offset Length Mapped to File
217
+0 0x100000 0x50000 TEST_DIR/PID-base
218
+
219
+[{ "start": 0, "length": 1048576, "depth": 2, "zero": false, "data": true, "offset": 327680},
220
+{ "start": 1048576, "length": 1048576, "depth": 0, "zero": true, "data": false}]
221
+
222
+Offset Length Mapped to File
223
+0 0x100000 0x50000 TEST_DIR/PID-base
224
+
225
+=== Testing qemu-img commit (top -> mid) ===
226
+Image committed.
227
+
228
+image: TEST_IMG
229
+file format: IMGFMT
230
+virtual size: 2 MiB (2097152 bytes)
231
+cluster_size: 65536
232
+backing file: TEST_DIR/PID-base
233
+Format specific information:
234
+ compat: 1.1
235
+ lazy refcounts: false
236
+ refcount bits: 16
237
+ corrupt: false
238
+
239
+read 1048576/1048576 bytes at offset 0
240
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
241
+
242
+read 1048576/1048576 bytes at offset 1048576
243
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
244
+
245
+=== Testing HMP commit (top -> mid) ===
246
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
247
+
248
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
249
+
250
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
251
+
252
+wrote 2097152/2097152 bytes at offset 0
253
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
254
+
255
+{"execute": "human-monitor-command", "arguments": {"command-line": "commit drive0"}}
256
+{"return": ""}
257
+image: TEST_IMG
258
+file format: IMGFMT
259
+virtual size: 2 MiB (2097152 bytes)
260
+cluster_size: 65536
261
+backing file: TEST_DIR/PID-base
262
+Format specific information:
263
+ compat: 1.1
264
+ lazy refcounts: false
265
+ refcount bits: 16
266
+ corrupt: false
267
+
268
+read 1048576/1048576 bytes at offset 0
269
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
270
+
271
+read 1048576/1048576 bytes at offset 1048576
272
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
273
+
274
+=== Testing QMP active commit (top -> mid) ===
275
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=2097152 cluster_size=65536 lazy_refcounts=off refcount_bits=16
276
+
277
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 size=1048576 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
278
+
279
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=2097152 backing_file=TEST_DIR/PID-mid cluster_size=65536 lazy_refcounts=off refcount_bits=16
280
+
281
+wrote 2097152/2097152 bytes at offset 0
282
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
283
+
284
+{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "mid", "device": "top", "job-id": "job0"}}
285
+{"return": {}}
95
+{"return": {}}
286
+{"execute": "job-complete", "arguments": {"id": "job0"}}
287
+{"return": {}}
96
+{"return": {}}
288
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
289
+{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
290
+{"execute": "job-dismiss", "arguments": {"id": "job0"}}
97
+{"execute": "job-dismiss", "arguments": {"id": "job0"}}
291
+{"return": {}}
98
+{"return": {}}
292
+image: TEST_IMG
293
+file format: IMGFMT
294
+virtual size: 2 MiB (2097152 bytes)
295
+cluster_size: 65536
296
+backing file: TEST_DIR/PID-base
297
+Format specific information:
298
+ compat: 1.1
299
+ lazy refcounts: false
300
+ refcount bits: 16
301
+ corrupt: false
302
+
303
+read 1048576/1048576 bytes at offset 0
304
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
305
+
306
+read 1048576/1048576 bytes at offset 1048576
307
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
308
+
309
+== Resize tests ==
310
+=== preallocation=off ===
311
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=6442450944 cluster_size=65536 lazy_refcounts=off refcount_bits=16
312
+
313
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=1073741824 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
314
+
315
+wrote 65536/65536 bytes at offset 5368709120
316
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
317
+
318
+Image resized.
319
+
320
+read 65536/65536 bytes at offset 5368709120
321
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
322
+
323
+1 GiB (0x40000000) bytes not allocated at offset 0 bytes (0x0)
324
+7 GiB (0x1c0000000) bytes allocated at offset 1 GiB (0x40000000)
325
+
326
+[{ "start": 0, "length": 1073741824, "depth": 1, "zero": true, "data": false},
327
+{ "start": 1073741824, "length": 7516192768, "depth": 0, "zero": true, "data": false}]
328
+
329
+=== preallocation=metadata ===
330
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=34359738368 cluster_size=65536 lazy_refcounts=off refcount_bits=16
331
+
332
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=32212254720 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
333
+
334
+wrote 65536/65536 bytes at offset 33285996544
335
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
336
+
337
+Image resized.
338
+
339
+read 65536/65536 bytes at offset 33285996544
340
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
341
+
342
+30 GiB (0x780000000) bytes not allocated at offset 0 bytes (0x0)
343
+3 GiB (0xc0000000) bytes allocated at offset 30 GiB (0x780000000)
344
+
345
+[{ "start": 0, "length": 32212254720, "depth": 1, "zero": true, "data": false},
346
+{ "start": 32212254720, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 327680},
347
+{ "start": 32749125632, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 537264128},
348
+{ "start": 33285996544, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1074200576},
349
+{ "start": 33822867456, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 1611137024},
350
+{ "start": 34359738368, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2148139008},
351
+{ "start": 34896609280, "length": 536870912, "depth": 0, "zero": true, "data": false, "offset": 2685075456}]
352
+
353
+=== preallocation=falloc ===
354
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=10485760 cluster_size=65536 lazy_refcounts=off refcount_bits=16
355
+
356
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=5242880 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
357
+
358
+wrote 65536/65536 bytes at offset 9437184
359
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
360
+
361
+Image resized.
362
+
363
+read 65536/65536 bytes at offset 9437184
364
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
365
+
366
+5 MiB (0x500000) bytes not allocated at offset 0 bytes (0x0)
367
+10 MiB (0xa00000) bytes allocated at offset 5 MiB (0x500000)
368
+
369
+[{ "start": 0, "length": 5242880, "depth": 1, "zero": true, "data": false},
370
+{ "start": 5242880, "length": 10485760, "depth": 0, "zero": true, "data": false, "offset": 327680}]
371
+
372
+=== preallocation=full ===
373
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=16777216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
374
+
375
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=8388608 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
376
+
377
+wrote 65536/65536 bytes at offset 11534336
378
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
379
+
380
+Image resized.
381
+
382
+read 65536/65536 bytes at offset 11534336
383
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
384
+
385
+8 MiB (0x800000) bytes not allocated at offset 0 bytes (0x0)
386
+4 MiB (0x400000) bytes allocated at offset 8 MiB (0x800000)
387
+
388
+[{ "start": 0, "length": 8388608, "depth": 1, "zero": true, "data": false},
389
+{ "start": 8388608, "length": 4194304, "depth": 0, "zero": true, "data": false, "offset": 327680}]
390
+
391
+=== preallocation=off ===
392
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=393216 cluster_size=65536 lazy_refcounts=off refcount_bits=16
393
+
394
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=259072 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
395
+
396
+wrote 65536/65536 bytes at offset 259072
397
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
398
+
399
+Image resized.
400
+
401
+read 65536/65536 bytes at offset 259072
402
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
403
+
404
+192 KiB (0x30000) bytes not allocated at offset 0 bytes (0x0)
405
+320 KiB (0x50000) bytes allocated at offset 192 KiB (0x30000)
406
+
407
+[{ "start": 0, "length": 196608, "depth": 1, "zero": true, "data": false},
408
+{ "start": 196608, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": 327680},
409
+{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}]
410
+
411
+=== preallocation=off ===
412
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=409600 cluster_size=65536 lazy_refcounts=off refcount_bits=16
413
+
414
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
415
+
416
+wrote 65536/65536 bytes at offset 344064
417
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
418
+
419
+Image resized.
420
+
421
+read 65536/65536 bytes at offset 344064
422
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
423
+
424
+256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
425
+256 KiB (0x40000) bytes allocated at offset 256 KiB (0x40000)
426
+
427
+[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false},
428
+{ "start": 262144, "length": 262144, "depth": 0, "zero": true, "data": false}]
429
+
430
+=== preallocation=off ===
431
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 size=524288 cluster_size=65536 lazy_refcounts=off refcount_bits=16
432
+
433
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 size=262144 backing_file=TEST_DIR/PID-base cluster_size=65536 lazy_refcounts=off refcount_bits=16
434
+
435
+wrote 65536/65536 bytes at offset 446464
436
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
437
+
438
+Image resized.
439
+
440
+read 65536/65536 bytes at offset 446464
441
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
442
+
443
+256 KiB (0x40000) bytes not allocated at offset 0 bytes (0x0)
444
+244 KiB (0x3d000) bytes allocated at offset 256 KiB (0x40000)
445
+
446
+[{ "start": 0, "length": 262144, "depth": 1, "zero": true, "data": false},
447
+{ "start": 262144, "length": 249856, "depth": 0, "zero": true, "data": false}]
448
+
449
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
450
index XXXXXXX..XXXXXXX 100644
451
--- a/tests/qemu-iotests/group
452
+++ b/tests/qemu-iotests/group
453
@@ -XXX,XX +XXX,XX @@
454
270 rw backing quick
455
272 rw
456
273 backing quick
457
+274 rw backing
458
277 rw quick
459
279 rw backing quick
460
280 rw migration quick
461
--
99
--
462
2.25.3
100
2.40.1
463
464
diff view generated by jsdifflib
1
Now that block drivers can support flags for .bdrv_co_truncate, expose
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
the parameter in the node level interfaces bdrv_co_truncate() and
3
bdrv_truncate().
4
2
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
blk_set_aio_context() is not fully transactional because
6
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
4
blk_do_set_aio_context() updates blk->ctx outside the transaction. Most
7
Reviewed-by: Alberto Garcia <berto@igalia.com>
5
of the time this goes unnoticed but a BlockDevOps.drained_end() callback
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
6
that invokes blk_get_aio_context() fails assert(ctx == blk->ctx). This
9
Message-Id: <20200424125448.63318-3-kwolf@redhat.com>
7
happens because blk->ctx is only assigned after
8
BlockDevOps.drained_end() is called and we're in an intermediate state
9
where BlockDrvierState nodes already have the new context and the
10
BlockBackend still has the old context.
11
12
Making blk_set_aio_context() fully transactional solves this assertion
13
failure because the BlockBackend's context is updated as part of the
14
transaction (before BlockDevOps.drained_end() is called).
15
16
Split blk_do_set_aio_context() in order to solve this assertion failure.
17
This helper function actually serves two different purposes:
18
1. It drives blk_set_aio_context().
19
2. It responds to BdrvChildClass->change_aio_ctx().
20
21
Get rid of the helper function. Do #1 inside blk_set_aio_context() and
22
do #2 inside blk_root_set_aio_ctx_commit(). This simplifies the code.
23
24
The only drawback of the fully transactional approach is that
25
blk_set_aio_context() must contend with blk_root_set_aio_ctx_commit()
26
being invoked as part of the AioContext change propagation. This can be
27
solved by temporarily setting blk->allow_aio_context_change to true.
28
29
Future patches call blk_get_aio_context() from
30
BlockDevOps->drained_end(), so this patch will become necessary.
31
32
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
33
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
34
Message-Id: <20230516190238.8401-2-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
35
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
36
---
12
include/block/block.h | 5 +++--
37
block/block-backend.c | 61 ++++++++++++++++---------------------------
13
block/block-backend.c | 2 +-
38
1 file changed, 23 insertions(+), 38 deletions(-)
14
block/crypto.c | 2 +-
15
block/io.c | 12 +++++++-----
16
block/parallels.c | 6 +++---
17
block/qcow.c | 4 ++--
18
block/qcow2-refcount.c | 2 +-
19
block/qcow2.c | 15 +++++++++------
20
block/raw-format.c | 2 +-
21
block/vhdx-log.c | 2 +-
22
block/vhdx.c | 2 +-
23
block/vmdk.c | 2 +-
24
tests/test-block-iothread.c | 6 +++---
25
13 files changed, 34 insertions(+), 28 deletions(-)
26
39
27
diff --git a/include/block/block.h b/include/block/block.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/block/block.h
30
+++ b/include/block/block.h
31
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
32
void bdrv_refresh_filename(BlockDriverState *bs);
33
34
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
35
- PreallocMode prealloc, Error **errp);
36
+ PreallocMode prealloc, BdrvRequestFlags flags,
37
+ Error **errp);
38
int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
39
- PreallocMode prealloc, Error **errp);
40
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
41
42
int64_t bdrv_nb_sectors(BlockDriverState *bs);
43
int64_t bdrv_getlength(BlockDriverState *bs);
44
diff --git a/block/block-backend.c b/block/block-backend.c
40
diff --git a/block/block-backend.c b/block/block-backend.c
45
index XXXXXXX..XXXXXXX 100644
41
index XXXXXXX..XXXXXXX 100644
46
--- a/block/block-backend.c
42
--- a/block/block-backend.c
47
+++ b/block/block-backend.c
43
+++ b/block/block-backend.c
48
@@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
44
@@ -XXX,XX +XXX,XX @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
49
return -ENOMEDIUM;
45
return blk_get_aio_context(blk_acb->blk);
46
}
47
48
-static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
49
- bool update_root_node, Error **errp)
50
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
51
+ Error **errp)
52
{
53
+ bool old_allow_change;
54
BlockDriverState *bs = blk_bs(blk);
55
- ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
56
int ret;
57
58
- if (bs) {
59
- bdrv_ref(bs);
60
-
61
- if (update_root_node) {
62
- /*
63
- * update_root_node MUST be false for blk_root_set_aio_ctx_commit(),
64
- * as we are already in the commit function of a transaction.
65
- */
66
- ret = bdrv_try_change_aio_context(bs, new_context, blk->root, errp);
67
- if (ret < 0) {
68
- bdrv_unref(bs);
69
- return ret;
70
- }
71
- }
72
- /*
73
- * Make blk->ctx consistent with the root node before we invoke any
74
- * other operations like drain that might inquire blk->ctx
75
- */
76
- blk->ctx = new_context;
77
- if (tgm->throttle_state) {
78
- bdrv_drained_begin(bs);
79
- throttle_group_detach_aio_context(tgm);
80
- throttle_group_attach_aio_context(tgm, new_context);
81
- bdrv_drained_end(bs);
82
- }
83
+ GLOBAL_STATE_CODE();
84
85
- bdrv_unref(bs);
86
- } else {
87
+ if (!bs) {
88
blk->ctx = new_context;
89
+ return 0;
50
}
90
}
51
91
52
- return bdrv_truncate(blk->root, offset, exact, prealloc, errp);
92
- return 0;
53
+ return bdrv_truncate(blk->root, offset, exact, prealloc, 0, errp);
93
-}
94
+ bdrv_ref(bs);
95
96
-int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
97
- Error **errp)
98
-{
99
- GLOBAL_STATE_CODE();
100
- return blk_do_set_aio_context(blk, new_context, true, errp);
101
+ old_allow_change = blk->allow_aio_context_change;
102
+ blk->allow_aio_context_change = true;
103
+
104
+ ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp);
105
+
106
+ blk->allow_aio_context_change = old_allow_change;
107
+
108
+ bdrv_unref(bs);
109
+ return ret;
54
}
110
}
55
111
56
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
112
typedef struct BdrvStateBlkRootContext {
57
diff --git a/block/crypto.c b/block/crypto.c
113
@@ -XXX,XX +XXX,XX @@ static void blk_root_set_aio_ctx_commit(void *opaque)
58
index XXXXXXX..XXXXXXX 100644
114
{
59
--- a/block/crypto.c
115
BdrvStateBlkRootContext *s = opaque;
60
+++ b/block/crypto.c
116
BlockBackend *blk = s->blk;
61
@@ -XXX,XX +XXX,XX @@ block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
117
+ AioContext *new_context = s->new_ctx;
62
118
+ ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
63
offset += payload_offset;
119
64
120
- blk_do_set_aio_context(blk, s->new_ctx, false, &error_abort);
65
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
121
+ blk->ctx = new_context;
66
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
122
+ if (tgm->throttle_state) {
123
+ throttle_group_detach_aio_context(tgm);
124
+ throttle_group_attach_aio_context(tgm, new_context);
125
+ }
67
}
126
}
68
127
69
static void block_crypto_close(BlockDriverState *bs)
128
static TransactionActionDrv set_blk_root_context = {
70
diff --git a/block/io.c b/block/io.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/block/io.c
73
+++ b/block/io.c
74
@@ -XXX,XX +XXX,XX @@ static void bdrv_parent_cb_resize(BlockDriverState *bs)
75
* 'offset' bytes in length.
76
*/
77
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
78
- PreallocMode prealloc, Error **errp)
79
+ PreallocMode prealloc, BdrvRequestFlags flags,
80
+ Error **errp)
81
{
82
BlockDriverState *bs = child->bs;
83
BlockDriver *drv = bs->drv;
84
BdrvTrackedRequest req;
85
- BdrvRequestFlags flags = 0;
86
int64_t old_size, new_bytes;
87
int ret;
88
89
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
90
}
91
ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
92
} else if (bs->file && drv->is_filter) {
93
- ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
94
+ ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp);
95
} else {
96
error_setg(errp, "Image format driver does not support resize");
97
ret = -ENOTSUP;
98
@@ -XXX,XX +XXX,XX @@ typedef struct TruncateCo {
99
int64_t offset;
100
bool exact;
101
PreallocMode prealloc;
102
+ BdrvRequestFlags flags;
103
Error **errp;
104
int ret;
105
} TruncateCo;
106
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_truncate_co_entry(void *opaque)
107
{
108
TruncateCo *tco = opaque;
109
tco->ret = bdrv_co_truncate(tco->child, tco->offset, tco->exact,
110
- tco->prealloc, tco->errp);
111
+ tco->prealloc, tco->flags, tco->errp);
112
aio_wait_kick();
113
}
114
115
int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
116
- PreallocMode prealloc, Error **errp)
117
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
118
{
119
Coroutine *co;
120
TruncateCo tco = {
121
@@ -XXX,XX +XXX,XX @@ int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
122
.offset = offset,
123
.exact = exact,
124
.prealloc = prealloc,
125
+ .flags = flags,
126
.errp = errp,
127
.ret = NOT_DONE,
128
};
129
diff --git a/block/parallels.c b/block/parallels.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/block/parallels.c
132
+++ b/block/parallels.c
133
@@ -XXX,XX +XXX,XX @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
134
} else {
135
ret = bdrv_truncate(bs->file,
136
(s->data_end + space) << BDRV_SECTOR_BITS,
137
- false, PREALLOC_MODE_OFF, NULL);
138
+ false, PREALLOC_MODE_OFF, 0, NULL);
139
}
140
if (ret < 0) {
141
return ret;
142
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs,
143
* That means we have to pass exact=true.
144
*/
145
ret = bdrv_truncate(bs->file, res->image_end_offset, true,
146
- PREALLOC_MODE_OFF, &local_err);
147
+ PREALLOC_MODE_OFF, 0, &local_err);
148
if (ret < 0) {
149
error_report_err(local_err);
150
res->check_errors++;
151
@@ -XXX,XX +XXX,XX @@ static void parallels_close(BlockDriverState *bs)
152
153
/* errors are ignored, so we might as well pass exact=true */
154
bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true,
155
- PREALLOC_MODE_OFF, NULL);
156
+ PREALLOC_MODE_OFF, 0, NULL);
157
}
158
159
g_free(s->bat_dirty_bmap);
160
diff --git a/block/qcow.c b/block/qcow.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/block/qcow.c
163
+++ b/block/qcow.c
164
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
165
return -E2BIG;
166
}
167
ret = bdrv_truncate(bs->file, cluster_offset + s->cluster_size,
168
- false, PREALLOC_MODE_OFF, NULL);
169
+ false, PREALLOC_MODE_OFF, 0, NULL);
170
if (ret < 0) {
171
return ret;
172
}
173
@@ -XXX,XX +XXX,XX @@ static int qcow_make_empty(BlockDriverState *bs)
174
l1_length) < 0)
175
return -1;
176
ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length, false,
177
- PREALLOC_MODE_OFF, NULL);
178
+ PREALLOC_MODE_OFF, 0, NULL);
179
if (ret < 0)
180
return ret;
181
182
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
183
index XXXXXXX..XXXXXXX 100644
184
--- a/block/qcow2-refcount.c
185
+++ b/block/qcow2-refcount.c
186
@@ -XXX,XX +XXX,XX @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
187
}
188
189
ret = bdrv_truncate(bs->file, offset + s->cluster_size, false,
190
- PREALLOC_MODE_OFF, &local_err);
191
+ PREALLOC_MODE_OFF, 0, &local_err);
192
if (ret < 0) {
193
error_report_err(local_err);
194
goto resize_fail;
195
diff --git a/block/qcow2.c b/block/qcow2.c
196
index XXXXXXX..XXXXXXX 100644
197
--- a/block/qcow2.c
198
+++ b/block/qcow2.c
199
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn preallocate_co(BlockDriverState *bs, uint64_t offset,
200
mode = PREALLOC_MODE_OFF;
201
}
202
ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
203
- mode, errp);
204
+ mode, 0, errp);
205
if (ret < 0) {
206
return ret;
207
}
208
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
209
* always fulfilled, so there is no need to pass it on.)
210
*/
211
bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
212
- false, PREALLOC_MODE_OFF, &local_err);
213
+ false, PREALLOC_MODE_OFF, 0, &local_err);
214
if (local_err) {
215
warn_reportf_err(local_err,
216
"Failed to truncate the tail of the image: ");
217
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
218
* file should be resized to the exact target size, too,
219
* so we pass @exact here.
220
*/
221
- ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp);
222
+ ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, 0,
223
+ errp);
224
if (ret < 0) {
225
goto fail;
226
}
227
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
228
new_file_size = allocation_start +
229
nb_new_data_clusters * s->cluster_size;
230
/* Image file grows, so @exact does not matter */
231
- ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp);
232
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, 0,
233
+ errp);
234
if (ret < 0) {
235
error_prepend(errp, "Failed to resize underlying file: ");
236
qcow2_free_clusters(bs, allocation_start,
237
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
238
if (len < 0) {
239
return len;
240
}
241
- return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL);
242
+ return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, 0,
243
+ NULL);
244
}
245
246
if (offset_into_cluster(s, offset)) {
247
@@ -XXX,XX +XXX,XX @@ static int make_completely_empty(BlockDriverState *bs)
248
}
249
250
ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
251
- PREALLOC_MODE_OFF, &local_err);
252
+ PREALLOC_MODE_OFF, 0, &local_err);
253
if (ret < 0) {
254
error_report_err(local_err);
255
goto fail;
256
diff --git a/block/raw-format.c b/block/raw-format.c
257
index XXXXXXX..XXXXXXX 100644
258
--- a/block/raw-format.c
259
+++ b/block/raw-format.c
260
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
261
262
s->size = offset;
263
offset += s->offset;
264
- return bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
265
+ return bdrv_co_truncate(bs->file, offset, exact, prealloc, 0, errp);
266
}
267
268
static void raw_eject(BlockDriverState *bs, bool eject_flag)
269
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
270
index XXXXXXX..XXXXXXX 100644
271
--- a/block/vhdx-log.c
272
+++ b/block/vhdx-log.c
273
@@ -XXX,XX +XXX,XX @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
274
goto exit;
275
}
276
ret = bdrv_truncate(bs->file, new_file_size, false,
277
- PREALLOC_MODE_OFF, NULL);
278
+ PREALLOC_MODE_OFF, 0, NULL);
279
if (ret < 0) {
280
goto exit;
281
}
282
diff --git a/block/vhdx.c b/block/vhdx.c
283
index XXXXXXX..XXXXXXX 100644
284
--- a/block/vhdx.c
285
+++ b/block/vhdx.c
286
@@ -XXX,XX +XXX,XX @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
287
}
288
289
return bdrv_truncate(bs->file, *new_offset + s->block_size, false,
290
- PREALLOC_MODE_OFF, NULL);
291
+ PREALLOC_MODE_OFF, 0, NULL);
292
}
293
294
/*
295
diff --git a/block/vmdk.c b/block/vmdk.c
296
index XXXXXXX..XXXXXXX 100644
297
--- a/block/vmdk.c
298
+++ b/block/vmdk.c
299
@@ -XXX,XX +XXX,XX @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
300
}
301
length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE);
302
ret = bdrv_truncate(s->extents[i].file, length, false,
303
- PREALLOC_MODE_OFF, NULL);
304
+ PREALLOC_MODE_OFF, 0, NULL);
305
if (ret < 0) {
306
return ret;
307
}
308
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
309
index XXXXXXX..XXXXXXX 100644
310
--- a/tests/test-block-iothread.c
311
+++ b/tests/test-block-iothread.c
312
@@ -XXX,XX +XXX,XX @@ static void test_sync_op_truncate(BdrvChild *c)
313
int ret;
314
315
/* Normal success path */
316
- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL);
317
+ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL);
318
g_assert_cmpint(ret, ==, 0);
319
320
/* Early error: Negative offset */
321
- ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, NULL);
322
+ ret = bdrv_truncate(c, -2, false, PREALLOC_MODE_OFF, 0, NULL);
323
g_assert_cmpint(ret, ==, -EINVAL);
324
325
/* Error: Read-only image */
326
c->bs->read_only = true;
327
c->bs->open_flags &= ~BDRV_O_RDWR;
328
329
- ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, NULL);
330
+ ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL);
331
g_assert_cmpint(ret, ==, -EACCES);
332
333
c->bs->read_only = false;
334
--
129
--
335
2.25.3
130
2.40.1
336
337
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Backing files and raw external data files are mutually exclusive.
3
Add a helper function to check whether the device is realized without
4
The documentation of the raw external data bit (in autoclear_features)
4
requiring the Big QEMU Lock. The next patch adds a second caller. The
5
already indicates that, but we should also mention it on the other
5
goal is to avoid spreading DeviceState field accesses throughout the
6
side.
6
code.
7
7
8
Suggested-by: Eric Blake <eblake@redhat.com>
8
Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Signed-off-by: Alberto Garcia <berto@igalia.com>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
10
Message-Id: <20200410121816.8334-1-berto@igalia.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
11
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
Message-Id: <20230516190238.8401-3-stefanha@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
14
---
14
docs/interop/qcow2.txt | 3 +++
15
include/hw/qdev-core.h | 17 ++++++++++++++---
15
1 file changed, 3 insertions(+)
16
hw/scsi/scsi-bus.c | 3 +--
17
2 files changed, 15 insertions(+), 5 deletions(-)
16
18
17
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
19
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
18
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
19
--- a/docs/interop/qcow2.txt
21
--- a/include/hw/qdev-core.h
20
+++ b/docs/interop/qcow2.txt
22
+++ b/include/hw/qdev-core.h
21
@@ -XXX,XX +XXX,XX @@ The first cluster of a qcow2 image contains the file header:
23
@@ -XXX,XX +XXX,XX @@
22
is stored (NB: The string is not null terminated). 0 if the
24
#ifndef QDEV_CORE_H
23
image doesn't have a backing file.
25
#define QDEV_CORE_H
24
26
25
+ Note: backing files are incompatible with raw external data
27
+#include "qemu/atomic.h"
26
+ files (auto-clear feature bit 1).
28
#include "qemu/queue.h"
29
#include "qemu/bitmap.h"
30
#include "qemu/rcu.h"
31
@@ -XXX,XX +XXX,XX @@ typedef struct {
32
33
/**
34
* DeviceState:
35
- * @realized: Indicates whether the device has been fully constructed.
36
- * When accessed outside big qemu lock, must be accessed with
37
- * qatomic_load_acquire()
38
* @reset: ResettableState for the device; handled by Resettable interface.
39
*
40
* This structure should not be accessed directly. We declare it here
41
@@ -XXX,XX +XXX,XX @@ DeviceState *qdev_new(const char *name);
42
*/
43
DeviceState *qdev_try_new(const char *name);
44
45
+/**
46
+ * qdev_is_realized:
47
+ * @dev: The device to check.
48
+ *
49
+ * May be called outside big qemu lock.
50
+ *
51
+ * Returns: %true% if the device has been fully constructed, %false% otherwise.
52
+ */
53
+static inline bool qdev_is_realized(DeviceState *dev)
54
+{
55
+ return qatomic_load_acquire(&dev->realized);
56
+}
27
+
57
+
28
16 - 19: backing_file_size
58
/**
29
Length of the backing file name in bytes. Must not be
59
* qdev_realize: Realize @dev.
30
longer than 1023 bytes. Undefined if the image doesn't have
60
* @dev: device to realize
61
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/hw/scsi/scsi-bus.c
64
+++ b/hw/scsi/scsi-bus.c
65
@@ -XXX,XX +XXX,XX @@ static SCSIDevice *do_scsi_device_find(SCSIBus *bus,
66
* the user access the device.
67
*/
68
69
- if (retval && !include_unrealized &&
70
- !qatomic_load_acquire(&retval->qdev.realized)) {
71
+ if (retval && !include_unrealized && !qdev_is_realized(&retval->qdev)) {
72
retval = NULL;
73
}
74
31
--
75
--
32
2.25.3
76
2.40.1
33
77
34
78
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
Only report a transport reset event to the guest after the SCSIDevice
4
has been unrealized by qdev_simple_device_unplug_cb().
5
6
qdev_simple_device_unplug_cb() sets the SCSIDevice's qdev.realized field
7
to false so that scsi_device_find/get() no longer see it.
8
9
scsi_target_emulate_report_luns() also needs to be updated to filter out
10
SCSIDevices that are unrealized.
11
12
Change virtio_scsi_push_event() to take event information as an argument
13
instead of the SCSIDevice. This allows virtio_scsi_hotunplug() to emit a
14
VIRTIO_SCSI_T_TRANSPORT_RESET event after the SCSIDevice has already
15
been unrealized.
16
17
These changes ensure that the guest driver does not see the SCSIDevice
18
that's being unplugged if it responds very quickly to the transport
19
reset event.
20
21
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
22
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
23
Reviewed-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
24
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
25
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
26
Message-Id: <20230516190238.8401-4-stefanha@redhat.com>
27
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
28
---
29
hw/scsi/scsi-bus.c | 3 +-
30
hw/scsi/virtio-scsi.c | 86 ++++++++++++++++++++++++++++++-------------
31
2 files changed, 63 insertions(+), 26 deletions(-)
32
33
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/hw/scsi/scsi-bus.c
36
+++ b/hw/scsi/scsi-bus.c
37
@@ -XXX,XX +XXX,XX @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r)
38
DeviceState *qdev = kid->child;
39
SCSIDevice *dev = SCSI_DEVICE(qdev);
40
41
- if (dev->channel == channel && dev->id == id && dev->lun != 0) {
42
+ if (dev->channel == channel && dev->id == id && dev->lun != 0 &&
43
+ qdev_is_realized(&dev->qdev)) {
44
store_lun(tmp, dev->lun);
45
g_byte_array_append(buf, tmp, 8);
46
len += 8;
47
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/hw/scsi/virtio-scsi.c
50
+++ b/hw/scsi/virtio-scsi.c
51
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev)
52
s->events_dropped = false;
53
}
54
55
-static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
56
- uint32_t event, uint32_t reason)
57
+typedef struct {
58
+ uint32_t event;
59
+ uint32_t reason;
60
+ union {
61
+ /* Used by messages specific to a device */
62
+ struct {
63
+ uint32_t id;
64
+ uint32_t lun;
65
+ } address;
66
+ };
67
+} VirtIOSCSIEventInfo;
68
+
69
+static void virtio_scsi_push_event(VirtIOSCSI *s,
70
+ const VirtIOSCSIEventInfo *info)
71
{
72
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
73
VirtIOSCSIReq *req;
74
VirtIOSCSIEvent *evt;
75
VirtIODevice *vdev = VIRTIO_DEVICE(s);
76
+ uint32_t event = info->event;
77
+ uint32_t reason = info->reason;
78
79
if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
80
return;
81
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
82
memset(evt, 0, sizeof(VirtIOSCSIEvent));
83
evt->event = virtio_tswap32(vdev, event);
84
evt->reason = virtio_tswap32(vdev, reason);
85
- if (!dev) {
86
- assert(event == VIRTIO_SCSI_T_EVENTS_MISSED);
87
- } else {
88
+ if (event != VIRTIO_SCSI_T_EVENTS_MISSED) {
89
evt->lun[0] = 1;
90
- evt->lun[1] = dev->id;
91
+ evt->lun[1] = info->address.id;
92
93
/* Linux wants us to keep the same encoding we use for REPORT LUNS. */
94
- if (dev->lun >= 256) {
95
- evt->lun[2] = (dev->lun >> 8) | 0x40;
96
+ if (info->address.lun >= 256) {
97
+ evt->lun[2] = (info->address.lun >> 8) | 0x40;
98
}
99
- evt->lun[3] = dev->lun & 0xFF;
100
+ evt->lun[3] = info->address.lun & 0xFF;
101
}
102
trace_virtio_scsi_event(virtio_scsi_get_lun(evt->lun), event, reason);
103
-
104
+
105
virtio_scsi_complete_req(req);
106
}
107
108
static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
109
{
110
if (s->events_dropped) {
111
- virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
112
+ VirtIOSCSIEventInfo info = {
113
+ .event = VIRTIO_SCSI_T_NO_EVENT,
114
+ };
115
+ virtio_scsi_push_event(s, &info);
116
}
117
}
118
119
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense)
120
121
if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_CHANGE) &&
122
dev->type != TYPE_ROM) {
123
+ VirtIOSCSIEventInfo info = {
124
+ .event = VIRTIO_SCSI_T_PARAM_CHANGE,
125
+ .reason = sense.asc | (sense.ascq << 8),
126
+ .address = {
127
+ .id = dev->id,
128
+ .lun = dev->lun,
129
+ },
130
+ };
131
+
132
virtio_scsi_acquire(s);
133
- virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE,
134
- sense.asc | (sense.ascq << 8));
135
+ virtio_scsi_push_event(s, &info);
136
virtio_scsi_release(s);
137
}
138
}
139
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
140
}
141
142
if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
143
+ VirtIOSCSIEventInfo info = {
144
+ .event = VIRTIO_SCSI_T_TRANSPORT_RESET,
145
+ .reason = VIRTIO_SCSI_EVT_RESET_RESCAN,
146
+ .address = {
147
+ .id = sd->id,
148
+ .lun = sd->lun,
149
+ },
150
+ };
151
+
152
virtio_scsi_acquire(s);
153
- virtio_scsi_push_event(s, sd,
154
- VIRTIO_SCSI_T_TRANSPORT_RESET,
155
- VIRTIO_SCSI_EVT_RESET_RESCAN);
156
+ virtio_scsi_push_event(s, &info);
157
scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED));
158
virtio_scsi_release(s);
159
}
160
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
161
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
162
SCSIDevice *sd = SCSI_DEVICE(dev);
163
AioContext *ctx = s->ctx ?: qemu_get_aio_context();
164
-
165
- if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
166
- virtio_scsi_acquire(s);
167
- virtio_scsi_push_event(s, sd,
168
- VIRTIO_SCSI_T_TRANSPORT_RESET,
169
- VIRTIO_SCSI_EVT_RESET_REMOVED);
170
- scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED));
171
- virtio_scsi_release(s);
172
- }
173
+ VirtIOSCSIEventInfo info = {
174
+ .event = VIRTIO_SCSI_T_TRANSPORT_RESET,
175
+ .reason = VIRTIO_SCSI_EVT_RESET_REMOVED,
176
+ .address = {
177
+ .id = sd->id,
178
+ .lun = sd->lun,
179
+ },
180
+ };
181
182
aio_disable_external(ctx);
183
qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
184
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
185
blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL);
186
virtio_scsi_release(s);
187
}
188
+
189
+ if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
190
+ virtio_scsi_acquire(s);
191
+ virtio_scsi_push_event(s, &info);
192
+ scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED));
193
+ virtio_scsi_release(s);
194
+ }
195
}
196
197
static struct SCSIBusInfo virtio_scsi_scsi_info = {
198
--
199
2.40.1
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
This patch is part of an effort to remove the aio_disable_external()
4
API because it does not fit in a multi-queue block layer world where
5
many AioContexts may be submitting requests to the same disk.
6
7
The SCSI emulation code is already in good shape to stop using
8
aio_disable_external(). It was only used by commit 9c5aad84da1c
9
("virtio-scsi: fixed virtio_scsi_ctx_check failed when detaching scsi
10
disk") to ensure that virtio_scsi_hotunplug() works while the guest
11
driver is submitting I/O.
12
13
Ensure virtio_scsi_hotunplug() is safe as follows:
14
15
1. qdev_simple_device_unplug_cb() -> qdev_unrealize() ->
16
device_set_realized() calls qatomic_set(&dev->realized, false) so
17
that future scsi_device_get() calls return NULL because they exclude
18
SCSIDevices with realized=false.
19
20
That means virtio-scsi will reject new I/O requests to this
21
SCSIDevice with VIRTIO_SCSI_S_BAD_TARGET even while
22
virtio_scsi_hotunplug() is still executing. We are protected against
23
new requests!
24
25
2. scsi_qdev_unrealize() already contains a call to
26
scsi_device_purge_requests() so that in-flight requests are cancelled
27
synchronously. This ensures that no in-flight requests remain once
28
qdev_simple_device_unplug_cb() returns.
29
30
Thanks to these two conditions we don't need aio_disable_external()
31
anymore.
32
33
Cc: Zhengui Li <lizhengui@huawei.com>
34
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
35
Reviewed-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
36
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
37
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
38
Message-Id: <20230516190238.8401-5-stefanha@redhat.com>
39
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
40
---
41
hw/scsi/virtio-scsi.c | 3 ---
42
1 file changed, 3 deletions(-)
43
44
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
45
index XXXXXXX..XXXXXXX 100644
46
--- a/hw/scsi/virtio-scsi.c
47
+++ b/hw/scsi/virtio-scsi.c
48
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
49
VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
50
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
51
SCSIDevice *sd = SCSI_DEVICE(dev);
52
- AioContext *ctx = s->ctx ?: qemu_get_aio_context();
53
VirtIOSCSIEventInfo info = {
54
.event = VIRTIO_SCSI_T_TRANSPORT_RESET,
55
.reason = VIRTIO_SCSI_EVT_RESET_REMOVED,
56
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
57
},
58
};
59
60
- aio_disable_external(ctx);
61
qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
62
- aio_enable_external(ctx);
63
64
if (s->ctx) {
65
virtio_scsi_acquire(s);
66
--
67
2.40.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Test 244 checks the expected behavior of qcow2 external data files
3
The VuServer object has a refcount field and ref/unref APIs. The name is
4
with respect to zero and discarded clusters. Filesystems however
4
confusing because it's actually an in-flight request counter instead of
5
are free to ignore discard requests, and this seems to be the
5
a refcount.
6
case for overlayfs. Relax the tests to skip checks on the
7
external data file for discarded areas, which implies not using
8
qemu-img compare in the data_file_raw=on case.
9
6
10
This fixes docker tests on RHEL8.
7
Normally a refcount destroys the object upon reaching zero. The VuServer
8
counter is used to wake up the vhost-user coroutine when there are no
9
more requests.
11
10
12
Cc: Kevin Wolf <kwolf@redhat.com>
11
Avoid confusing by renaming refcount and ref/unref to in_flight and
13
Cc: qemu-block@nongnu.org
12
inc/dec.
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
13
15
Message-Id: <20200409191006.24429-1-pbonzini@redhat.com>
14
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
15
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
18
Message-Id: <20230516190238.8401-6-stefanha@redhat.com>
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
---
20
---
18
tests/qemu-iotests/244 | 10 ++++++++--
21
include/qemu/vhost-user-server.h | 6 +++---
19
tests/qemu-iotests/244.out | 9 ++++++---
22
block/export/vhost-user-blk-server.c | 11 +++++++----
20
2 files changed, 14 insertions(+), 5 deletions(-)
23
util/vhost-user-server.c | 14 +++++++-------
24
3 files changed, 17 insertions(+), 14 deletions(-)
21
25
22
diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244
26
diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h
23
index XXXXXXX..XXXXXXX 100755
24
--- a/tests/qemu-iotests/244
25
+++ b/tests/qemu-iotests/244
26
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \
27
echo
28
$QEMU_IO -c 'read -P 0 0 1M' \
29
-c 'read -P 0x11 1M 1M' \
30
- -c 'read -P 0 2M 2M' \
31
-c 'read -P 0x11 4M 1M' \
32
-c 'read -P 0 5M 1M' \
33
-f raw "$TEST_IMG.data" |
34
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'read -P 0 0 1M' \
35
-f $IMGFMT "$TEST_IMG" |
36
_filter_qemu_io
37
38
+# Discarded clusters are only marked as such in the qcow2 metadata, but
39
+# they can contain stale data in the external data file. Instead, zero
40
+# clusters must be zeroed in the external data file too.
41
echo
42
-$QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.data"
43
+$QEMU_IO -c 'read -P 0 0 1M' \
44
+ -c 'read -P 0x11 1M 1M' \
45
+ -c 'read -P 0 3M 3M' \
46
+ -f raw "$TEST_IMG".data |
47
+ _filter_qemu_io
48
49
echo -n "qcow2 file size after I/O: "
50
du -b $TEST_IMG | cut -f1
51
diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out
52
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
53
--- a/tests/qemu-iotests/244.out
28
--- a/include/qemu/vhost-user-server.h
54
+++ b/tests/qemu-iotests/244.out
29
+++ b/include/qemu/vhost-user-server.h
55
@@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 0
30
@@ -XXX,XX +XXX,XX @@ typedef struct {
56
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
31
const VuDevIface *vu_iface;
57
read 1048576/1048576 bytes at offset 1048576
32
58
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
33
/* Protected by ctx lock */
59
-read 2097152/2097152 bytes at offset 2097152
34
- unsigned int refcount;
60
-2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
35
+ unsigned int in_flight;
61
read 1048576/1048576 bytes at offset 4194304
36
bool wait_idle;
62
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
37
VuDev vu_dev;
63
read 1048576/1048576 bytes at offset 5242880
38
QIOChannel *ioc; /* The I/O channel with the client */
64
@@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 1048576
39
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
65
read 4194304/4194304 bytes at offset 2097152
40
66
4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
41
void vhost_user_server_stop(VuServer *server);
67
42
68
-Images are identical.
43
-void vhost_user_server_ref(VuServer *server);
69
+read 1048576/1048576 bytes at offset 0
44
-void vhost_user_server_unref(VuServer *server);
70
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
45
+void vhost_user_server_inc_in_flight(VuServer *server);
71
+read 1048576/1048576 bytes at offset 1048576
46
+void vhost_user_server_dec_in_flight(VuServer *server);
72
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
47
73
+read 3145728/3145728 bytes at offset 3145728
48
void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
74
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
49
void vhost_user_server_detach_aio_context(VuServer *server);
75
qcow2 file size after I/O: 327680
50
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
76
51
index XXXXXXX..XXXXXXX 100644
77
=== bdrv_co_block_status test for file and offset=0 ===
52
--- a/block/export/vhost-user-blk-server.c
53
+++ b/block/export/vhost-user-blk-server.c
54
@@ -XXX,XX +XXX,XX @@ static void vu_blk_req_complete(VuBlkReq *req, size_t in_len)
55
free(req);
56
}
57
58
-/* Called with server refcount increased, must decrease before returning */
59
+/*
60
+ * Called with server in_flight counter increased, must decrease before
61
+ * returning.
62
+ */
63
static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
64
{
65
VuBlkReq *req = opaque;
66
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
67
in_num, out_num);
68
if (in_len < 0) {
69
free(req);
70
- vhost_user_server_unref(server);
71
+ vhost_user_server_dec_in_flight(server);
72
return;
73
}
74
75
vu_blk_req_complete(req, in_len);
76
- vhost_user_server_unref(server);
77
+ vhost_user_server_dec_in_flight(server);
78
}
79
80
static void vu_blk_process_vq(VuDev *vu_dev, int idx)
81
@@ -XXX,XX +XXX,XX @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx)
82
Coroutine *co =
83
qemu_coroutine_create(vu_blk_virtio_process_req, req);
84
85
- vhost_user_server_ref(server);
86
+ vhost_user_server_inc_in_flight(server);
87
qemu_coroutine_enter(co);
88
}
89
}
90
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/util/vhost-user-server.c
93
+++ b/util/vhost-user-server.c
94
@@ -XXX,XX +XXX,XX @@ static void panic_cb(VuDev *vu_dev, const char *buf)
95
error_report("vu_panic: %s", buf);
96
}
97
98
-void vhost_user_server_ref(VuServer *server)
99
+void vhost_user_server_inc_in_flight(VuServer *server)
100
{
101
assert(!server->wait_idle);
102
- server->refcount++;
103
+ server->in_flight++;
104
}
105
106
-void vhost_user_server_unref(VuServer *server)
107
+void vhost_user_server_dec_in_flight(VuServer *server)
108
{
109
- server->refcount--;
110
- if (server->wait_idle && !server->refcount) {
111
+ server->in_flight--;
112
+ if (server->wait_idle && !server->in_flight) {
113
aio_co_wake(server->co_trip);
114
}
115
}
116
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void vu_client_trip(void *opaque)
117
/* Keep running */
118
}
119
120
- if (server->refcount) {
121
+ if (server->in_flight) {
122
/* Wait for requests to complete before we can unmap the memory */
123
server->wait_idle = true;
124
qemu_coroutine_yield();
125
server->wait_idle = false;
126
}
127
- assert(server->refcount == 0);
128
+ assert(server->in_flight == 0);
129
130
vu_deinit(vu_dev);
131
78
--
132
--
79
2.25.3
133
2.40.1
80
134
81
135
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
Each vhost-user-blk request runs in a coroutine. When the BlockBackend
4
enters a drained section we need to enter a quiescent state. Currently
5
any in-flight requests race with bdrv_drained_begin() because it is
6
unaware of vhost-user-blk requests.
7
8
When blk_co_preadv/pwritev()/etc returns it wakes the
9
bdrv_drained_begin() thread but vhost-user-blk request processing has
10
not yet finished. The request coroutine continues executing while the
11
main loop thread thinks it is in a drained section.
12
13
One example where this is unsafe is for blk_set_aio_context() where
14
bdrv_drained_begin() is called before .aio_context_detached() and
15
.aio_context_attach(). If request coroutines are still running after
16
bdrv_drained_begin(), then the AioContext could change underneath them
17
and they race with new requests processed in the new AioContext. This
18
could lead to virtqueue corruption, for example.
19
20
(This example is theoretical, I came across this while reading the
21
code and have not tried to reproduce it.)
22
23
It's easy to make bdrv_drained_begin() wait for in-flight requests: add
24
a .drained_poll() callback that checks the VuServer's in-flight counter.
25
VuServer just needs an API that returns true when there are requests in
26
flight. The in-flight counter needs to be atomic.
27
28
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
29
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
30
Message-Id: <20230516190238.8401-7-stefanha@redhat.com>
31
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
32
---
33
include/qemu/vhost-user-server.h | 4 +++-
34
block/export/vhost-user-blk-server.c | 13 +++++++++++++
35
util/vhost-user-server.c | 18 ++++++++++++------
36
3 files changed, 28 insertions(+), 7 deletions(-)
37
38
diff --git a/include/qemu/vhost-user-server.h b/include/qemu/vhost-user-server.h
39
index XXXXXXX..XXXXXXX 100644
40
--- a/include/qemu/vhost-user-server.h
41
+++ b/include/qemu/vhost-user-server.h
42
@@ -XXX,XX +XXX,XX @@ typedef struct {
43
int max_queues;
44
const VuDevIface *vu_iface;
45
46
+ unsigned int in_flight; /* atomic */
47
+
48
/* Protected by ctx lock */
49
- unsigned int in_flight;
50
bool wait_idle;
51
VuDev vu_dev;
52
QIOChannel *ioc; /* The I/O channel with the client */
53
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_stop(VuServer *server);
54
55
void vhost_user_server_inc_in_flight(VuServer *server);
56
void vhost_user_server_dec_in_flight(VuServer *server);
57
+bool vhost_user_server_has_in_flight(VuServer *server);
58
59
void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
60
void vhost_user_server_detach_aio_context(VuServer *server);
61
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/block/export/vhost-user-blk-server.c
64
+++ b/block/export/vhost-user-blk-server.c
65
@@ -XXX,XX +XXX,XX @@ static void vu_blk_exp_resize(void *opaque)
66
vu_config_change_msg(&vexp->vu_server.vu_dev);
67
}
68
69
+/*
70
+ * Ensures that bdrv_drained_begin() waits until in-flight requests complete.
71
+ *
72
+ * Called with vexp->export.ctx acquired.
73
+ */
74
+static bool vu_blk_drained_poll(void *opaque)
75
+{
76
+ VuBlkExport *vexp = opaque;
77
+
78
+ return vhost_user_server_has_in_flight(&vexp->vu_server);
79
+}
80
+
81
static const BlockDevOps vu_blk_dev_ops = {
82
+ .drained_poll = vu_blk_drained_poll,
83
.resize_cb = vu_blk_exp_resize,
84
};
85
86
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/util/vhost-user-server.c
89
+++ b/util/vhost-user-server.c
90
@@ -XXX,XX +XXX,XX @@ static void panic_cb(VuDev *vu_dev, const char *buf)
91
void vhost_user_server_inc_in_flight(VuServer *server)
92
{
93
assert(!server->wait_idle);
94
- server->in_flight++;
95
+ qatomic_inc(&server->in_flight);
96
}
97
98
void vhost_user_server_dec_in_flight(VuServer *server)
99
{
100
- server->in_flight--;
101
- if (server->wait_idle && !server->in_flight) {
102
- aio_co_wake(server->co_trip);
103
+ if (qatomic_fetch_dec(&server->in_flight) == 1) {
104
+ if (server->wait_idle) {
105
+ aio_co_wake(server->co_trip);
106
+ }
107
}
108
}
109
110
+bool vhost_user_server_has_in_flight(VuServer *server)
111
+{
112
+ return qatomic_load_acquire(&server->in_flight) > 0;
113
+}
114
+
115
static bool coroutine_fn
116
vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
117
{
118
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void vu_client_trip(void *opaque)
119
/* Keep running */
120
}
121
122
- if (server->in_flight) {
123
+ if (vhost_user_server_has_in_flight(server)) {
124
/* Wait for requests to complete before we can unmap the memory */
125
server->wait_idle = true;
126
qemu_coroutine_yield();
127
server->wait_idle = false;
128
}
129
- assert(server->in_flight == 0);
130
+ assert(!vhost_user_server_has_in_flight(server));
131
132
vu_deinit(vu_dev);
133
134
--
135
2.40.1
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
vhost-user activity must be suspended during bdrv_drained_begin/end().
4
This prevents new requests from interfering with whatever is happening
5
in the drained section.
6
7
Previously this was done using aio_set_fd_handler()'s is_external
8
argument. In a multi-queue block layer world the aio_disable_external()
9
API cannot be used since multiple AioContext may be processing I/O, not
10
just one.
11
12
Switch to BlockDevOps->drained_begin/end() callbacks.
13
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
16
Message-Id: <20230516190238.8401-8-stefanha@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
19
block/export/vhost-user-blk-server.c | 28 ++++++++++++++++++++++++++--
20
util/vhost-user-server.c | 10 +++++-----
21
2 files changed, 31 insertions(+), 7 deletions(-)
22
23
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/block/export/vhost-user-blk-server.c
26
+++ b/block/export/vhost-user-blk-server.c
27
@@ -XXX,XX +XXX,XX @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
28
{
29
VuBlkExport *vexp = opaque;
30
31
+ /*
32
+ * The actual attach will happen in vu_blk_drained_end() and we just
33
+ * restore ctx here.
34
+ */
35
vexp->export.ctx = ctx;
36
- vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
37
}
38
39
static void blk_aio_detach(void *opaque)
40
{
41
VuBlkExport *vexp = opaque;
42
43
- vhost_user_server_detach_aio_context(&vexp->vu_server);
44
+ /*
45
+ * The actual detach already happened in vu_blk_drained_begin() but from
46
+ * this point on we must not access ctx anymore.
47
+ */
48
vexp->export.ctx = NULL;
49
}
50
51
@@ -XXX,XX +XXX,XX @@ static void vu_blk_exp_resize(void *opaque)
52
vu_config_change_msg(&vexp->vu_server.vu_dev);
53
}
54
55
+/* Called with vexp->export.ctx acquired */
56
+static void vu_blk_drained_begin(void *opaque)
57
+{
58
+ VuBlkExport *vexp = opaque;
59
+
60
+ vhost_user_server_detach_aio_context(&vexp->vu_server);
61
+}
62
+
63
+/* Called with vexp->export.blk AioContext acquired */
64
+static void vu_blk_drained_end(void *opaque)
65
+{
66
+ VuBlkExport *vexp = opaque;
67
+
68
+ vhost_user_server_attach_aio_context(&vexp->vu_server, vexp->export.ctx);
69
+}
70
+
71
/*
72
* Ensures that bdrv_drained_begin() waits until in-flight requests complete.
73
*
74
@@ -XXX,XX +XXX,XX @@ static bool vu_blk_drained_poll(void *opaque)
75
}
76
77
static const BlockDevOps vu_blk_dev_ops = {
78
+ .drained_begin = vu_blk_drained_begin,
79
+ .drained_end = vu_blk_drained_end,
80
.drained_poll = vu_blk_drained_poll,
81
.resize_cb = vu_blk_exp_resize,
82
};
83
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/util/vhost-user-server.c
86
+++ b/util/vhost-user-server.c
87
@@ -XXX,XX +XXX,XX @@ set_watch(VuDev *vu_dev, int fd, int vu_evt,
88
vu_fd_watch->fd = fd;
89
vu_fd_watch->cb = cb;
90
qemu_socket_set_nonblock(fd);
91
- aio_set_fd_handler(server->ioc->ctx, fd, true, kick_handler,
92
+ aio_set_fd_handler(server->ioc->ctx, fd, false, kick_handler,
93
NULL, NULL, NULL, vu_fd_watch);
94
vu_fd_watch->vu_dev = vu_dev;
95
vu_fd_watch->pvt = pvt;
96
@@ -XXX,XX +XXX,XX @@ static void remove_watch(VuDev *vu_dev, int fd)
97
if (!vu_fd_watch) {
98
return;
99
}
100
- aio_set_fd_handler(server->ioc->ctx, fd, true,
101
+ aio_set_fd_handler(server->ioc->ctx, fd, false,
102
NULL, NULL, NULL, NULL, NULL);
103
104
QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
105
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_stop(VuServer *server)
106
VuFdWatch *vu_fd_watch;
107
108
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
109
- aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
110
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false,
111
NULL, NULL, NULL, NULL, vu_fd_watch);
112
}
113
114
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
115
qio_channel_attach_aio_context(server->ioc, ctx);
116
117
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
118
- aio_set_fd_handler(ctx, vu_fd_watch->fd, true, kick_handler, NULL,
119
+ aio_set_fd_handler(ctx, vu_fd_watch->fd, false, kick_handler, NULL,
120
NULL, NULL, vu_fd_watch);
121
}
122
123
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_detach_aio_context(VuServer *server)
124
VuFdWatch *vu_fd_watch;
125
126
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
127
- aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
128
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false,
129
NULL, NULL, NULL, NULL, vu_fd_watch);
130
}
131
132
--
133
2.40.1
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
There is no need to suspend activity between aio_disable_external() and
4
aio_enable_external(), which is mainly used for the block layer's drain
5
operation.
6
7
This is part of ongoing work to remove the aio_disable_external() API.
8
9
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
10
Reviewed-by: Paul Durrant <paul@xen.org>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Message-Id: <20230516190238.8401-9-stefanha@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
16
hw/i386/kvm/xen_xenstore.c | 2 +-
17
1 file changed, 1 insertion(+), 1 deletion(-)
18
19
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/i386/kvm/xen_xenstore.c
22
+++ b/hw/i386/kvm/xen_xenstore.c
23
@@ -XXX,XX +XXX,XX @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp)
24
error_setg(errp, "Xenstore evtchn port init failed");
25
return;
26
}
27
- aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), true,
28
+ aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), false,
29
xen_xenstore_event, NULL, NULL, NULL, s);
30
31
s->impl = xs_impl_create(xen_domid);
32
--
33
2.40.1
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
The BlockBackend quiesce_counter is greater than zero during drained
4
sections. Add an API to check whether the BlockBackend is in a drained
5
section.
6
7
The next patch will use this API.
8
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Message-Id: <20230516190238.8401-10-stefanha@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
14
include/sysemu/block-backend-global-state.h | 1 +
15
block/block-backend.c | 7 +++++++
16
2 files changed, 8 insertions(+)
17
18
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/sysemu/block-backend-global-state.h
21
+++ b/include/sysemu/block-backend-global-state.h
22
@@ -XXX,XX +XXX,XX @@ void blk_activate(BlockBackend *blk, Error **errp);
23
int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
24
void blk_aio_cancel(BlockAIOCB *acb);
25
int blk_commit_all(void);
26
+bool blk_in_drain(BlockBackend *blk);
27
void blk_drain(BlockBackend *blk);
28
void blk_drain_all(void);
29
void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
30
diff --git a/block/block-backend.c b/block/block-backend.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/block/block-backend.c
33
+++ b/block/block-backend.c
34
@@ -XXX,XX +XXX,XX @@ blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes)
35
return 0;
36
}
37
38
+/* Are we currently in a drained section? */
39
+bool blk_in_drain(BlockBackend *blk)
40
+{
41
+ GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */
42
+ return qatomic_read(&blk->quiesce_counter);
43
+}
44
+
45
/* To be called between exactly one pair of blk_inc/dec_in_flight() */
46
static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
47
{
48
--
49
2.40.1
diff view generated by jsdifflib
1
When extending the size of an image that has a backing file larger than
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
its old size, make sure that the backing file data doesn't become
2
3
visible in the guest, but the added area is properly zeroed out.
3
For simplicity, always run BlockDevOps .drained_begin/end/poll()
4
4
callbacks in the main loop thread. This makes it easier to implement the
5
Consider the following scenario where the overlay is shorter than its
5
callbacks and avoids extra locks.
6
backing file:
6
7
7
Move the function pointer declarations from the I/O Code section to the
8
base.qcow2: AAAAAAAA
8
Global State section for BlockDevOps, BdrvChildClass, and BlockDriver.
9
overlay.qcow2: BBBB
9
10
10
Narrow IO_OR_GS_CODE() to GLOBAL_STATE_CODE() where appropriate.
11
When resizing (extending) overlay.qcow2, the new blocks should not stay
11
12
unallocated and make the additional As from base.qcow2 visible like
12
The test-bdrv-drain test case calls bdrv_drain() from an IOThread. This
13
before this patch, but zeros should be read.
13
is now only allowed from coroutine context, so update the test case to
14
14
run in a coroutine.
15
A similar case happens with the various variants of a commit job when an
15
16
intermediate file is short (- for unallocated):
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
17
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
18
base.qcow2: A-A-AAAA
18
Message-Id: <20230516190238.8401-11-stefanha@redhat.com>
19
mid.qcow2: BB-B
20
top.qcow2: C--C--C-
21
22
After commit top.qcow2 to mid.qcow2, the following happens:
23
24
mid.qcow2: CB-C00C0 (correct result)
25
mid.qcow2: CB-C--C- (before this fix)
26
27
Without the fix, blocks that previously read as zeros on top.qcow2
28
suddenly turn into A.
29
30
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
31
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
32
Message-Id: <20200424125448.63318-8-kwolf@redhat.com>
33
Reviewed-by: Max Reitz <mreitz@redhat.com>
34
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
35
---
20
---
36
block/io.c | 25 +++++++++++++++++++++++++
21
include/block/block_int-common.h | 72 +++++++++++++--------------
37
1 file changed, 25 insertions(+)
22
include/sysemu/block-backend-common.h | 25 +++++-----
38
23
block/io.c | 14 ++++--
24
tests/unit/test-bdrv-drain.c | 14 +++---
25
4 files changed, 67 insertions(+), 58 deletions(-)
26
27
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/block/block_int-common.h
30
+++ b/include/block/block_int-common.h
31
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
32
void (*bdrv_attach_aio_context)(BlockDriverState *bs,
33
AioContext *new_context);
34
35
+ /**
36
+ * bdrv_drain_begin is called if implemented in the beginning of a
37
+ * drain operation to drain and stop any internal sources of requests in
38
+ * the driver.
39
+ * bdrv_drain_end is called if implemented at the end of the drain.
40
+ *
41
+ * They should be used by the driver to e.g. manage scheduled I/O
42
+ * requests, or toggle an internal state. After the end of the drain new
43
+ * requests will continue normally.
44
+ *
45
+ * Implementations of both functions must not call aio_poll().
46
+ */
47
+ void (*bdrv_drain_begin)(BlockDriverState *bs);
48
+ void (*bdrv_drain_end)(BlockDriverState *bs);
49
+
50
/**
51
* Try to get @bs's logical and physical block size.
52
* On success, store them in @bsz and return zero.
53
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
54
void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_io_unplug)(
55
BlockDriverState *bs);
56
57
- /**
58
- * bdrv_drain_begin is called if implemented in the beginning of a
59
- * drain operation to drain and stop any internal sources of requests in
60
- * the driver.
61
- * bdrv_drain_end is called if implemented at the end of the drain.
62
- *
63
- * They should be used by the driver to e.g. manage scheduled I/O
64
- * requests, or toggle an internal state. After the end of the drain new
65
- * requests will continue normally.
66
- *
67
- * Implementations of both functions must not call aio_poll().
68
- */
69
- void (*bdrv_drain_begin)(BlockDriverState *bs);
70
- void (*bdrv_drain_end)(BlockDriverState *bs);
71
-
72
bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
73
74
bool coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_can_store_new_dirty_bitmap)(
75
@@ -XXX,XX +XXX,XX @@ struct BdrvChildClass {
76
void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child);
77
void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child);
78
79
+ /*
80
+ * If this pair of functions is implemented, the parent doesn't issue new
81
+ * requests after returning from .drained_begin() until .drained_end() is
82
+ * called.
83
+ *
84
+ * These functions must not change the graph (and therefore also must not
85
+ * call aio_poll(), which could change the graph indirectly).
86
+ *
87
+ * Note that this can be nested. If drained_begin() was called twice, new
88
+ * I/O is allowed only after drained_end() was called twice, too.
89
+ */
90
+ void (*drained_begin)(BdrvChild *child);
91
+ void (*drained_end)(BdrvChild *child);
92
+
93
+ /*
94
+ * Returns whether the parent has pending requests for the child. This
95
+ * callback is polled after .drained_begin() has been called until all
96
+ * activity on the child has stopped.
97
+ */
98
+ bool (*drained_poll)(BdrvChild *child);
99
+
100
/*
101
* Notifies the parent that the filename of its child has changed (e.g.
102
* because the direct child was removed from the backing chain), so that it
103
@@ -XXX,XX +XXX,XX @@ struct BdrvChildClass {
104
const char *(*get_name)(BdrvChild *child);
105
106
AioContext *(*get_parent_aio_context)(BdrvChild *child);
107
-
108
- /*
109
- * If this pair of functions is implemented, the parent doesn't issue new
110
- * requests after returning from .drained_begin() until .drained_end() is
111
- * called.
112
- *
113
- * These functions must not change the graph (and therefore also must not
114
- * call aio_poll(), which could change the graph indirectly).
115
- *
116
- * Note that this can be nested. If drained_begin() was called twice, new
117
- * I/O is allowed only after drained_end() was called twice, too.
118
- */
119
- void (*drained_begin)(BdrvChild *child);
120
- void (*drained_end)(BdrvChild *child);
121
-
122
- /*
123
- * Returns whether the parent has pending requests for the child. This
124
- * callback is polled after .drained_begin() has been called until all
125
- * activity on the child has stopped.
126
- */
127
- bool (*drained_poll)(BdrvChild *child);
128
};
129
130
extern const BdrvChildClass child_of_bds;
131
diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h
132
index XXXXXXX..XXXXXXX 100644
133
--- a/include/sysemu/block-backend-common.h
134
+++ b/include/sysemu/block-backend-common.h
135
@@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps {
136
*/
137
bool (*is_medium_locked)(void *opaque);
138
139
+ /*
140
+ * Runs when the backend receives a drain request.
141
+ */
142
+ void (*drained_begin)(void *opaque);
143
+ /*
144
+ * Runs when the backend's last drain request ends.
145
+ */
146
+ void (*drained_end)(void *opaque);
147
+ /*
148
+ * Is the device still busy?
149
+ */
150
+ bool (*drained_poll)(void *opaque);
151
+
152
/*
153
* I/O API functions. These functions are thread-safe.
154
*
155
@@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps {
156
* Runs when the size changed (e.g. monitor command block_resize)
157
*/
158
void (*resize_cb)(void *opaque);
159
- /*
160
- * Runs when the backend receives a drain request.
161
- */
162
- void (*drained_begin)(void *opaque);
163
- /*
164
- * Runs when the backend's last drain request ends.
165
- */
166
- void (*drained_end)(void *opaque);
167
- /*
168
- * Is the device still busy?
169
- */
170
- bool (*drained_poll)(void *opaque);
171
} BlockDevOps;
172
173
/*
39
diff --git a/block/io.c b/block/io.c
174
diff --git a/block/io.c b/block/io.c
40
index XXXXXXX..XXXXXXX 100644
175
index XXXXXXX..XXXXXXX 100644
41
--- a/block/io.c
176
--- a/block/io.c
42
+++ b/block/io.c
177
+++ b/block/io.c
43
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
178
@@ -XXX,XX +XXX,XX @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
44
goto out;
179
180
void bdrv_parent_drained_end_single(BdrvChild *c)
181
{
182
- IO_OR_GS_CODE();
183
+ GLOBAL_STATE_CODE();
184
185
assert(c->quiesced_parent);
186
c->quiesced_parent = false;
187
@@ -XXX,XX +XXX,XX @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
188
189
void bdrv_parent_drained_begin_single(BdrvChild *c)
190
{
191
- IO_OR_GS_CODE();
192
+ GLOBAL_STATE_CODE();
193
194
assert(!c->quiesced_parent);
195
c->quiesced_parent = true;
196
@@ -XXX,XX +XXX,XX @@ typedef struct {
197
bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
198
bool ignore_bds_parents)
199
{
200
- IO_OR_GS_CODE();
201
+ GLOBAL_STATE_CODE();
202
203
if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
204
return true;
205
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
206
if (ctx != co_ctx) {
207
aio_context_release(ctx);
45
}
208
}
46
209
- replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
47
+ /*
210
+ replay_bh_schedule_oneshot_event(qemu_get_aio_context(),
48
+ * If the image has a backing file that is large enough that it would
211
+ bdrv_co_drain_bh_cb, &data);
49
+ * provide data for the new area, we cannot leave it unallocated because
212
50
+ * then the backing file content would become visible. Instead, zero-fill
213
qemu_coroutine_yield();
51
+ * the new area.
214
/* If we are resumed from some other event (such as an aio completion or a
52
+ *
215
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
53
+ * Note that if the image has a backing file, but was opened without the
216
return;
54
+ * backing file, taking care of keeping things consistent with that backing
217
}
55
+ * file is the user's responsibility.
218
56
+ */
219
+ GLOBAL_STATE_CODE();
57
+ if (new_bytes && bs->backing) {
220
+
58
+ int64_t backing_len;
221
/* Stop things in parent-to-child order */
59
+
222
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
60
+ backing_len = bdrv_getlength(backing_bs(bs));
223
aio_disable_external(bdrv_get_aio_context(bs));
61
+ if (backing_len < 0) {
224
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
62
+ ret = backing_len;
225
{
63
+ error_setg_errno(errp, -ret, "Could not get backing file size");
226
int old_quiesce_counter;
64
+ goto out;
227
65
+ }
228
+ IO_OR_GS_CODE();
66
+
229
+
67
+ if (backing_len > old_size) {
230
if (qemu_in_coroutine()) {
68
+ flags |= BDRV_REQ_ZERO_WRITE;
231
bdrv_co_yield_to_drain(bs, false, parent, false);
69
+ }
232
return;
70
+ }
233
}
71
+
234
assert(bs->quiesce_counter > 0);
72
if (drv->bdrv_co_truncate) {
235
+ GLOBAL_STATE_CODE();
73
if (flags & ~bs->supported_truncate_flags) {
236
74
error_setg(errp, "Block driver does not support requested flags");
237
/* Re-enable things in child-to-parent order */
238
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
239
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
240
index XXXXXXX..XXXXXXX 100644
241
--- a/tests/unit/test-bdrv-drain.c
242
+++ b/tests/unit/test-bdrv-drain.c
243
@@ -XXX,XX +XXX,XX @@ struct test_iothread_data {
244
BlockDriverState *bs;
245
enum drain_type drain_type;
246
int *aio_ret;
247
+ bool co_done;
248
};
249
250
-static void test_iothread_drain_entry(void *opaque)
251
+static void coroutine_fn test_iothread_drain_co_entry(void *opaque)
252
{
253
struct test_iothread_data *data = opaque;
254
255
- aio_context_acquire(bdrv_get_aio_context(data->bs));
256
do_drain_begin(data->drain_type, data->bs);
257
g_assert_cmpint(*data->aio_ret, ==, 0);
258
do_drain_end(data->drain_type, data->bs);
259
- aio_context_release(bdrv_get_aio_context(data->bs));
260
261
- qemu_event_set(&done_event);
262
+ data->co_done = true;
263
+ aio_wait_kick();
264
}
265
266
static void test_iothread_aio_cb(void *opaque, int ret)
267
@@ -XXX,XX +XXX,XX @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
268
BlockDriverState *bs;
269
BDRVTestState *s;
270
BlockAIOCB *acb;
271
+ Coroutine *co;
272
int aio_ret;
273
struct test_iothread_data data;
274
275
@@ -XXX,XX +XXX,XX @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
276
}
277
break;
278
case 1:
279
- aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data);
280
- qemu_event_wait(&done_event);
281
+ co = qemu_coroutine_create(test_iothread_drain_co_entry, &data);
282
+ aio_co_enter(ctx_a, co);
283
+ AIO_WAIT_WHILE_UNLOCKED(NULL, !data.co_done);
284
break;
285
default:
286
g_assert_not_reached();
75
--
287
--
76
2.25.3
288
2.40.1
77
78
diff view generated by jsdifflib
1
The QMP handler qmp_object_add() and the implementation of --object in
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
qemu-storage-daemon can share most of the code. Currently,
3
qemu-storage-daemon calls qmp_object_add(), but this is not correct
4
because different visitors need to be used.
5
2
6
As a first step towards a fix, make qmp_object_add() a wrapper around a
3
Detach event channels during drained sections to stop I/O submission
7
new function user_creatable_add_dict() that can get an additional
4
from the ring. xen-block is no longer reliant on aio_disable_external()
8
parameter. The handling of "props" is only required for compatibility
5
after this patch. This will allow us to remove the
9
and not required for the qemu-storage-daemon command line, so it stays
6
aio_disable_external() API once all other code that relies on it is
10
in qmp_object_add().
7
converted.
11
8
9
Extend xen_device_set_event_channel_context() to allow ctx=NULL. The
10
event channel still exists but the event loop does not monitor the file
11
descriptor. Event channel processing can resume by calling
12
xen_device_set_event_channel_context() with a non-NULL ctx.
13
14
Factor out xen_device_set_event_channel_context() calls in
15
hw/block/dataplane/xen-block.c into attach/detach helper functions.
16
Incidentally, these don't require the AioContext lock because
17
aio_set_fd_handler() is thread-safe.
18
19
It's safer to register BlockDevOps after the dataplane instance has been
20
created. The BlockDevOps .drained_begin/end() callbacks depend on the
21
dataplane instance, so move the blk_set_dev_ops() call after
22
xen_block_dataplane_create().
23
24
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
25
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
26
Message-Id: <20230516190238.8401-12-stefanha@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
27
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
28
---
14
include/qom/object_interfaces.h | 12 ++++++++++++
29
hw/block/dataplane/xen-block.h | 2 ++
15
qom/object_interfaces.c | 27 +++++++++++++++++++++++++++
30
hw/block/dataplane/xen-block.c | 42 +++++++++++++++++++++++++---------
16
qom/qom-qmp-cmds.c | 24 +-----------------------
31
hw/block/xen-block.c | 24 ++++++++++++++++---
17
3 files changed, 40 insertions(+), 23 deletions(-)
32
hw/xen/xen-bus.c | 7 ++++--
33
4 files changed, 59 insertions(+), 16 deletions(-)
18
34
19
diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h
35
diff --git a/hw/block/dataplane/xen-block.h b/hw/block/dataplane/xen-block.h
20
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
21
--- a/include/qom/object_interfaces.h
37
--- a/hw/block/dataplane/xen-block.h
22
+++ b/include/qom/object_interfaces.h
38
+++ b/hw/block/dataplane/xen-block.h
23
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
39
@@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
24
const QDict *qdict,
40
unsigned int protocol,
25
Visitor *v, Error **errp);
41
Error **errp);
26
42
void xen_block_dataplane_stop(XenBlockDataPlane *dataplane);
27
+/**
43
+void xen_block_dataplane_attach(XenBlockDataPlane *dataplane);
28
+ * user_creatable_add_dict:
44
+void xen_block_dataplane_detach(XenBlockDataPlane *dataplane);
29
+ * @qdict: the object definition
45
30
+ * @errp: if an error occurs, a pointer to an area to store the error
46
#endif /* HW_BLOCK_DATAPLANE_XEN_BLOCK_H */
31
+ *
47
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
32
+ * Create an instance of the user creatable object that is defined by
33
+ * @qdict. The object type is taken from the QDict key 'qom-type', its
34
+ * ID from the key 'id'. The remaining entries in @qdict are used to
35
+ * initialize the object properties.
36
+ */
37
+void user_creatable_add_dict(QDict *qdict, Error **errp);
38
+
39
/**
40
* user_creatable_add_opts:
41
* @opts: the object definition
42
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
43
index XXXXXXX..XXXXXXX 100644
48
index XXXXXXX..XXXXXXX 100644
44
--- a/qom/object_interfaces.c
49
--- a/hw/block/dataplane/xen-block.c
45
+++ b/qom/object_interfaces.c
50
+++ b/hw/block/dataplane/xen-block.c
46
@@ -XXX,XX +XXX,XX @@
51
@@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_destroy(XenBlockDataPlane *dataplane)
47
#include "qapi/qmp/qerror.h"
52
g_free(dataplane);
48
#include "qapi/qmp/qjson.h"
49
#include "qapi/qmp/qstring.h"
50
+#include "qapi/qobject-input-visitor.h"
51
#include "qom/object_interfaces.h"
52
#include "qemu/help_option.h"
53
#include "qemu/module.h"
54
@@ -XXX,XX +XXX,XX @@ out:
55
return obj;
56
}
53
}
57
54
58
+void user_creatable_add_dict(QDict *qdict, Error **errp)
55
+void xen_block_dataplane_detach(XenBlockDataPlane *dataplane)
59
+{
56
+{
60
+ Visitor *v;
57
+ if (!dataplane || !dataplane->event_channel) {
61
+ Object *obj;
62
+ g_autofree char *type = NULL;
63
+ g_autofree char *id = NULL;
64
+
65
+ type = g_strdup(qdict_get_try_str(qdict, "qom-type"));
66
+ if (!type) {
67
+ error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
68
+ return;
58
+ return;
69
+ }
59
+ }
70
+ qdict_del(qdict, "qom-type");
71
+
60
+
72
+ id = g_strdup(qdict_get_try_str(qdict, "id"));
61
+ /* Only reason for failure is a NULL channel */
73
+ if (!id) {
62
+ xen_device_set_event_channel_context(dataplane->xendev,
74
+ error_setg(errp, QERR_MISSING_PARAMETER, "id");
63
+ dataplane->event_channel,
64
+ NULL, &error_abort);
65
+}
66
+
67
+void xen_block_dataplane_attach(XenBlockDataPlane *dataplane)
68
+{
69
+ if (!dataplane || !dataplane->event_channel) {
75
+ return;
70
+ return;
76
+ }
71
+ }
77
+ qdict_del(qdict, "id");
78
+
72
+
79
+ v = qobject_input_visitor_new(QOBJECT(qdict));
73
+ /* Only reason for failure is a NULL channel */
80
+ obj = user_creatable_add_type(type, id, qdict, v, errp);
74
+ xen_device_set_event_channel_context(dataplane->xendev,
81
+ visit_free(v);
75
+ dataplane->event_channel,
82
+ object_unref(obj);
76
+ dataplane->ctx, &error_abort);
83
+}
77
+}
84
78
+
85
Object *user_creatable_add_opts(QemuOpts *opts, Error **errp)
79
void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
86
{
80
{
87
diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c
81
XenDevice *xendev;
82
@@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
83
84
xendev = dataplane->xendev;
85
86
- aio_context_acquire(dataplane->ctx);
87
- if (dataplane->event_channel) {
88
- /* Only reason for failure is a NULL channel */
89
- xen_device_set_event_channel_context(xendev, dataplane->event_channel,
90
- qemu_get_aio_context(),
91
- &error_abort);
92
+ if (!blk_in_drain(dataplane->blk)) {
93
+ xen_block_dataplane_detach(dataplane);
94
}
95
+
96
+ aio_context_acquire(dataplane->ctx);
97
/* Xen doesn't have multiple users for nodes, so this can't fail */
98
blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort);
99
aio_context_release(dataplane->ctx);
100
@@ -XXX,XX +XXX,XX @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
101
blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL);
102
aio_context_release(old_context);
103
104
- /* Only reason for failure is a NULL channel */
105
- aio_context_acquire(dataplane->ctx);
106
- xen_device_set_event_channel_context(xendev, dataplane->event_channel,
107
- dataplane->ctx, &error_abort);
108
- aio_context_release(dataplane->ctx);
109
+ if (!blk_in_drain(dataplane->blk)) {
110
+ xen_block_dataplane_attach(dataplane);
111
+ }
112
113
return;
114
115
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
88
index XXXXXXX..XXXXXXX 100644
116
index XXXXXXX..XXXXXXX 100644
89
--- a/qom/qom-qmp-cmds.c
117
--- a/hw/block/xen-block.c
90
+++ b/qom/qom-qmp-cmds.c
118
+++ b/hw/block/xen-block.c
91
@@ -XXX,XX +XXX,XX @@
119
@@ -XXX,XX +XXX,XX @@ static void xen_block_resize_cb(void *opaque)
92
#include "qapi/qapi-commands-qom.h"
120
xen_device_backend_printf(xendev, "state", "%u", state);
93
#include "qapi/qmp/qdict.h"
121
}
94
#include "qapi/qmp/qerror.h"
122
95
-#include "qapi/qobject-input-visitor.h"
123
+/* Suspend request handling */
96
#include "qemu/cutils.h"
124
+static void xen_block_drained_begin(void *opaque)
97
#include "qom/object_interfaces.h"
125
+{
98
#include "qom/qom-qobject.h"
126
+ XenBlockDevice *blockdev = opaque;
99
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
127
+
100
{
128
+ xen_block_dataplane_detach(blockdev->dataplane);
101
QObject *props;
129
+}
102
QDict *pdict;
130
+
103
- Visitor *v;
131
+/* Resume request handling */
104
- Object *obj;
132
+static void xen_block_drained_end(void *opaque)
105
- g_autofree char *type = NULL;
133
+{
106
- g_autofree char *id = NULL;
134
+ XenBlockDevice *blockdev = opaque;
135
+
136
+ xen_block_dataplane_attach(blockdev->dataplane);
137
+}
138
+
139
static const BlockDevOps xen_block_dev_ops = {
140
- .resize_cb = xen_block_resize_cb,
141
+ .resize_cb = xen_block_resize_cb,
142
+ .drained_begin = xen_block_drained_begin,
143
+ .drained_end = xen_block_drained_end,
144
};
145
146
static void xen_block_realize(XenDevice *xendev, Error **errp)
147
@@ -XXX,XX +XXX,XX @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
148
return;
149
}
150
151
- blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev);
107
-
152
-
108
- type = g_strdup(qdict_get_try_str(qdict, "qom-type"));
153
if (conf->discard_granularity == -1) {
109
- if (!type) {
154
conf->discard_granularity = conf->physical_block_size;
110
- error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
111
- return;
112
- }
113
- qdict_del(qdict, "qom-type");
114
-
115
- id = g_strdup(qdict_get_try_str(qdict, "id"));
116
- if (!id) {
117
- error_setg(errp, QERR_MISSING_PARAMETER, "id");
118
- return;
119
- }
120
- qdict_del(qdict, "id");
121
122
props = qdict_get(qdict, "props");
123
if (props) {
124
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
125
qobject_unref(pdict);
126
}
155
}
127
156
@@ -XXX,XX +XXX,XX @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
128
- v = qobject_input_visitor_new(QOBJECT(qdict));
157
blockdev->dataplane =
129
- obj = user_creatable_add_type(type, id, qdict, v, errp);
158
xen_block_dataplane_create(xendev, blk, conf->logical_block_size,
130
- visit_free(v);
159
blockdev->props.iothread);
131
- object_unref(obj);
160
+
132
+ user_creatable_add_dict(qdict, errp);
161
+ blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev);
133
}
162
}
134
163
135
void qmp_object_del(const char *id, Error **errp)
164
static void xen_block_frontend_changed(XenDevice *xendev,
165
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
166
index XXXXXXX..XXXXXXX 100644
167
--- a/hw/xen/xen-bus.c
168
+++ b/hw/xen/xen-bus.c
169
@@ -XXX,XX +XXX,XX @@ void xen_device_set_event_channel_context(XenDevice *xendev,
170
NULL, NULL, NULL, NULL, NULL);
171
172
channel->ctx = ctx;
173
- aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
174
- xen_device_event, NULL, xen_device_poll, NULL, channel);
175
+ if (ctx) {
176
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh),
177
+ true, xen_device_event, NULL, xen_device_poll, NULL,
178
+ channel);
179
+ }
180
}
181
182
XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
136
--
183
--
137
2.25.3
184
2.40.1
138
139
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
is_external=true suspends fd handlers between aio_disable_external() and
4
aio_enable_external(). The block layer's drain operation uses this
5
mechanism to prevent new I/O from sneaking in between
6
bdrv_drained_begin() and bdrv_drained_end().
7
8
The previous commit converted the xen-block device to use BlockDevOps
9
.drained_begin/end() callbacks. It no longer relies on is_external=true
10
so it is safe to pass is_external=false.
11
12
This is part of ongoing work to remove the aio_disable_external() API.
13
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
16
Message-Id: <20230516190238.8401-13-stefanha@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
19
hw/xen/xen-bus.c | 8 ++++----
20
1 file changed, 4 insertions(+), 4 deletions(-)
21
22
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/xen/xen-bus.c
25
+++ b/hw/xen/xen-bus.c
26
@@ -XXX,XX +XXX,XX @@ void xen_device_set_event_channel_context(XenDevice *xendev,
27
}
28
29
if (channel->ctx)
30
- aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
31
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false,
32
NULL, NULL, NULL, NULL, NULL);
33
34
channel->ctx = ctx;
35
if (ctx) {
36
aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh),
37
- true, xen_device_event, NULL, xen_device_poll, NULL,
38
- channel);
39
+ false, xen_device_event, NULL, xen_device_poll,
40
+ NULL, channel);
41
}
42
}
43
44
@@ -XXX,XX +XXX,XX @@ void xen_device_unbind_event_channel(XenDevice *xendev,
45
46
QLIST_REMOVE(channel, list);
47
48
- aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
49
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false,
50
NULL, NULL, NULL, NULL, NULL);
51
52
if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) {
53
--
54
2.40.1
diff view generated by jsdifflib
New patch
1
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
3
vduse_blk_detach_ctx() waits for in-flight requests using
4
AIO_WAIT_WHILE(). This is not allowed according to a comment in
5
bdrv_set_aio_context_commit():
6
7
/*
8
* Take the old AioContex when detaching it from bs.
9
* At this point, new_context lock is already acquired, and we are now
10
* also taking old_context. This is safe as long as bdrv_detach_aio_context
11
* does not call AIO_POLL_WHILE().
12
*/
13
14
Use this opportunity to rewrite the drain code in vduse-blk:
15
16
- Use the BlockExport refcount so that vduse_blk_exp_delete() is only
17
called when there are no more requests in flight.
18
19
- Implement .drained_poll() so in-flight request coroutines are stopped
20
by the time .bdrv_detach_aio_context() is called.
21
22
- Remove AIO_WAIT_WHILE() from vduse_blk_detach_ctx() to solve the
23
.bdrv_detach_aio_context() constraint violation. It's no longer
24
needed due to the previous changes.
25
26
- Always handle the VDUSE file descriptor, even in drained sections. The
27
VDUSE file descriptor doesn't submit I/O, so it's safe to handle it in
28
drained sections. This ensures that the VDUSE kernel code gets a fast
29
response.
30
31
- Suspend virtqueue fd handlers in .drained_begin() and resume them in
32
.drained_end(). This eliminates the need for the
33
aio_set_fd_handler(is_external=true) flag, which is being removed from
34
QEMU.
35
36
This is a long list but splitting it into individual commits would
37
probably lead to git bisect failures - the changes are all related.
38
39
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
40
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
41
Message-Id: <20230516190238.8401-14-stefanha@redhat.com>
42
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
43
---
44
block/export/vduse-blk.c | 132 +++++++++++++++++++++++++++------------
45
1 file changed, 93 insertions(+), 39 deletions(-)
46
47
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/block/export/vduse-blk.c
50
+++ b/block/export/vduse-blk.c
51
@@ -XXX,XX +XXX,XX @@ typedef struct VduseBlkExport {
52
VduseDev *dev;
53
uint16_t num_queues;
54
char *recon_file;
55
- unsigned int inflight;
56
+ unsigned int inflight; /* atomic */
57
+ bool vqs_started;
58
} VduseBlkExport;
59
60
typedef struct VduseBlkReq {
61
@@ -XXX,XX +XXX,XX @@ typedef struct VduseBlkReq {
62
63
static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
64
{
65
- vblk_exp->inflight++;
66
+ if (qatomic_fetch_inc(&vblk_exp->inflight) == 0) {
67
+ /* Prevent export from being deleted */
68
+ aio_context_acquire(vblk_exp->export.ctx);
69
+ blk_exp_ref(&vblk_exp->export);
70
+ aio_context_release(vblk_exp->export.ctx);
71
+ }
72
}
73
74
static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
75
{
76
- if (--vblk_exp->inflight == 0) {
77
+ if (qatomic_fetch_dec(&vblk_exp->inflight) == 1) {
78
+ /* Wake AIO_WAIT_WHILE() */
79
aio_wait_kick();
80
+
81
+ /* Now the export can be deleted */
82
+ aio_context_acquire(vblk_exp->export.ctx);
83
+ blk_exp_unref(&vblk_exp->export);
84
+ aio_context_release(vblk_exp->export.ctx);
85
}
86
}
87
88
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
89
{
90
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
91
92
+ if (!vblk_exp->vqs_started) {
93
+ return; /* vduse_blk_drained_end() will start vqs later */
94
+ }
95
+
96
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
97
- true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
98
+ false, on_vduse_vq_kick, NULL, NULL, NULL, vq);
99
/* Make sure we don't miss any kick afer reconnecting */
100
eventfd_write(vduse_queue_get_fd(vq), 1);
101
}
102
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
103
static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
104
{
105
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
106
+ int fd = vduse_queue_get_fd(vq);
107
108
- aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
109
- true, NULL, NULL, NULL, NULL, NULL);
110
+ if (fd < 0) {
111
+ return;
112
+ }
113
+
114
+ aio_set_fd_handler(vblk_exp->export.ctx, fd, false,
115
+ NULL, NULL, NULL, NULL, NULL);
116
}
117
118
static const VduseOps vduse_blk_ops = {
119
@@ -XXX,XX +XXX,XX @@ static void on_vduse_dev_kick(void *opaque)
120
121
static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
122
{
123
- int i;
124
-
125
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
126
- true, on_vduse_dev_kick, NULL, NULL, NULL,
127
+ false, on_vduse_dev_kick, NULL, NULL, NULL,
128
vblk_exp->dev);
129
130
- for (i = 0; i < vblk_exp->num_queues; i++) {
131
- VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
132
- int fd = vduse_queue_get_fd(vq);
133
-
134
- if (fd < 0) {
135
- continue;
136
- }
137
- aio_set_fd_handler(vblk_exp->export.ctx, fd, true,
138
- on_vduse_vq_kick, NULL, NULL, NULL, vq);
139
- }
140
+ /* Virtqueues are handled by vduse_blk_drained_end() */
141
}
142
143
static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
144
{
145
- int i;
146
-
147
- for (i = 0; i < vblk_exp->num_queues; i++) {
148
- VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
149
- int fd = vduse_queue_get_fd(vq);
150
-
151
- if (fd < 0) {
152
- continue;
153
- }
154
- aio_set_fd_handler(vblk_exp->export.ctx, fd,
155
- true, NULL, NULL, NULL, NULL, NULL);
156
- }
157
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
158
- true, NULL, NULL, NULL, NULL, NULL);
159
+ false, NULL, NULL, NULL, NULL, NULL);
160
161
- AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0);
162
+ /* Virtqueues are handled by vduse_blk_drained_begin() */
163
}
164
165
166
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_resize(void *opaque)
167
(char *)&config.capacity);
168
}
169
170
+static void vduse_blk_stop_virtqueues(VduseBlkExport *vblk_exp)
171
+{
172
+ for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
173
+ VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
174
+ vduse_blk_disable_queue(vblk_exp->dev, vq);
175
+ }
176
+
177
+ vblk_exp->vqs_started = false;
178
+}
179
+
180
+static void vduse_blk_start_virtqueues(VduseBlkExport *vblk_exp)
181
+{
182
+ vblk_exp->vqs_started = true;
183
+
184
+ for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
185
+ VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
186
+ vduse_blk_enable_queue(vblk_exp->dev, vq);
187
+ }
188
+}
189
+
190
+static void vduse_blk_drained_begin(void *opaque)
191
+{
192
+ BlockExport *exp = opaque;
193
+ VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
194
+
195
+ vduse_blk_stop_virtqueues(vblk_exp);
196
+}
197
+
198
+static void vduse_blk_drained_end(void *opaque)
199
+{
200
+ BlockExport *exp = opaque;
201
+ VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
202
+
203
+ vduse_blk_start_virtqueues(vblk_exp);
204
+}
205
+
206
+static bool vduse_blk_drained_poll(void *opaque)
207
+{
208
+ BlockExport *exp = opaque;
209
+ VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
210
+
211
+ return qatomic_read(&vblk_exp->inflight) > 0;
212
+}
213
+
214
static const BlockDevOps vduse_block_ops = {
215
- .resize_cb = vduse_blk_resize,
216
+ .resize_cb = vduse_blk_resize,
217
+ .drained_begin = vduse_blk_drained_begin,
218
+ .drained_end = vduse_blk_drained_end,
219
+ .drained_poll = vduse_blk_drained_poll,
220
};
221
222
static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
223
@@ -XXX,XX +XXX,XX @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
224
vblk_exp->handler.serial = g_strdup(vblk_opts->serial ?: "");
225
vblk_exp->handler.logical_block_size = logical_block_size;
226
vblk_exp->handler.writable = opts->writable;
227
+ vblk_exp->vqs_started = true;
228
229
config.capacity =
230
cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
231
@@ -XXX,XX +XXX,XX @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
232
vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
233
}
234
235
- aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true,
236
+ aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), false,
237
on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
238
239
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
240
vblk_exp);
241
-
242
blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
243
244
+ /*
245
+ * We handle draining ourselves using an in-flight counter and by disabling
246
+ * virtqueue fd handlers. Do not queue BlockBackend requests, they need to
247
+ * complete so the in-flight counter reaches zero.
248
+ */
249
+ blk_set_disable_request_queuing(exp->blk, true);
250
+
251
return 0;
252
err:
253
vduse_dev_destroy(vblk_exp->dev);
254
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_exp_delete(BlockExport *exp)
255
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
256
int ret;
257
258
+ assert(qatomic_read(&vblk_exp->inflight) == 0);
259
+
260
+ vduse_blk_detach_ctx(vblk_exp);
261
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
262
vblk_exp);
263
ret = vduse_dev_destroy(vblk_exp->dev);
264
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_exp_delete(BlockExport *exp)
265
g_free(vblk_exp->handler.serial);
266
}
267
268
+/* Called with exp->ctx acquired */
269
static void vduse_blk_exp_request_shutdown(BlockExport *exp)
270
{
271
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
272
273
- aio_context_acquire(vblk_exp->export.ctx);
274
- vduse_blk_detach_ctx(vblk_exp);
275
- aio_context_acquire(vblk_exp->export.ctx);
276
+ vduse_blk_stop_virtqueues(vblk_exp);
277
}
278
279
const BlockExportDriver blk_exp_vduse_blk = {
280
--
281
2.40.1
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
The FUSE export calls blk_exp_ref/unref() without the AioContext lock.
4
Instead of fixing the FUSE export, adjust blk_exp_ref/unref() so they
5
work without the AioContext lock. This way it's less error-prone.
6
7
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Message-Id: <20230516190238.8401-15-stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
include/block/export.h | 2 ++
14
block/export/export.c | 13 ++++++-------
15
block/export/vduse-blk.c | 4 ----
16
3 files changed, 8 insertions(+), 11 deletions(-)
17
18
diff --git a/include/block/export.h b/include/block/export.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/block/export.h
21
+++ b/include/block/export.h
22
@@ -XXX,XX +XXX,XX @@ struct BlockExport {
23
* Reference count for this block export. This includes strong references
24
* both from the owner (qemu-nbd or the monitor) and clients connected to
25
* the export.
26
+ *
27
+ * Use atomics to access this field.
28
*/
29
int refcount;
30
31
diff --git a/block/export/export.c b/block/export/export.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/block/export/export.c
34
+++ b/block/export/export.c
35
@@ -XXX,XX +XXX,XX @@ fail:
36
return NULL;
37
}
38
39
-/* Callers must hold exp->ctx lock */
40
void blk_exp_ref(BlockExport *exp)
41
{
42
- assert(exp->refcount > 0);
43
- exp->refcount++;
44
+ assert(qatomic_read(&exp->refcount) > 0);
45
+ qatomic_inc(&exp->refcount);
46
}
47
48
/* Runs in the main thread */
49
@@ -XXX,XX +XXX,XX @@ static void blk_exp_delete_bh(void *opaque)
50
aio_context_release(aio_context);
51
}
52
53
-/* Callers must hold exp->ctx lock */
54
void blk_exp_unref(BlockExport *exp)
55
{
56
- assert(exp->refcount > 0);
57
- if (--exp->refcount == 0) {
58
+ assert(qatomic_read(&exp->refcount) > 0);
59
+ if (qatomic_fetch_dec(&exp->refcount) == 1) {
60
/* Touch the block_exports list only in the main thread */
61
aio_bh_schedule_oneshot(qemu_get_aio_context(), blk_exp_delete_bh,
62
exp);
63
@@ -XXX,XX +XXX,XX @@ void qmp_block_export_del(const char *id,
64
if (!has_mode) {
65
mode = BLOCK_EXPORT_REMOVE_MODE_SAFE;
66
}
67
- if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE && exp->refcount > 1) {
68
+ if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE &&
69
+ qatomic_read(&exp->refcount) > 1) {
70
error_setg(errp, "export '%s' still in use", exp->id);
71
error_append_hint(errp, "Use mode='hard' to force client "
72
"disconnect\n");
73
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/block/export/vduse-blk.c
76
+++ b/block/export/vduse-blk.c
77
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
78
{
79
if (qatomic_fetch_inc(&vblk_exp->inflight) == 0) {
80
/* Prevent export from being deleted */
81
- aio_context_acquire(vblk_exp->export.ctx);
82
blk_exp_ref(&vblk_exp->export);
83
- aio_context_release(vblk_exp->export.ctx);
84
}
85
}
86
87
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
88
aio_wait_kick();
89
90
/* Now the export can be deleted */
91
- aio_context_acquire(vblk_exp->export.ctx);
92
blk_exp_unref(&vblk_exp->export);
93
- aio_context_release(vblk_exp->export.ctx);
94
}
95
}
96
97
--
98
2.40.1
diff view generated by jsdifflib
1
From: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
This patch introduces support for PMR that has been defined as part of NVMe 1.4
3
This is part of ongoing work to remove the aio_disable_external() API.
4
spec. User can now specify a pmrdev option that should point to HostMemoryBackend.
5
pmrdev memory region will subsequently be exposed as PCI BAR 2 in emulated NVMe
6
device. Guest OS can perform mmio read and writes to the PMR region that will stay
7
persistent across system reboot.
8
4
9
Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
5
Use BlockDevOps .drained_begin/end/poll() instead of
10
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
6
aio_set_fd_handler(is_external=true).
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
12
Message-Id: <20200330164656.9348-1-andrzej.jakowski@linux.intel.com>
8
As a side-effect the FUSE export now follows AioContext changes like the
13
Reviewed-by: Keith Busch <kbusch@kernel.org>
9
other export types.
10
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Message-Id: <20230516190238.8401-16-stefanha@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
15
---
16
hw/block/nvme.h | 2 +
16
block/export/fuse.c | 56 +++++++++++++++++++++++++++++++++++++++++++--
17
include/block/nvme.h | 172 +++++++++++++++++++++++++++++++++++++++++
17
1 file changed, 54 insertions(+), 2 deletions(-)
18
hw/block/nvme.c | 109 ++++++++++++++++++++++++++
19
hw/block/Makefile.objs | 2 +-
20
hw/block/trace-events | 4 +
21
5 files changed, 288 insertions(+), 1 deletion(-)
22
18
23
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
19
diff --git a/block/export/fuse.c b/block/export/fuse.c
24
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
25
--- a/hw/block/nvme.h
21
--- a/block/export/fuse.c
26
+++ b/hw/block/nvme.h
22
+++ b/block/export/fuse.c
27
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeCtrl {
23
@@ -XXX,XX +XXX,XX @@ typedef struct FuseExport {
28
uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
24
29
25
struct fuse_session *fuse_session;
30
char *serial;
26
struct fuse_buf fuse_buf;
31
+ HostMemoryBackend *pmrdev;
27
+ unsigned int in_flight; /* atomic */
28
bool mounted, fd_handler_set_up;
29
30
char *mountpoint;
31
@@ -XXX,XX +XXX,XX @@ static void read_from_fuse_export(void *opaque);
32
static bool is_regular_file(const char *path, Error **errp);
33
34
35
+static void fuse_export_drained_begin(void *opaque)
36
+{
37
+ FuseExport *exp = opaque;
32
+
38
+
33
NvmeNamespace *namespaces;
39
+ aio_set_fd_handler(exp->common.ctx,
34
NvmeSQueue **sq;
40
+ fuse_session_fd(exp->fuse_session), false,
35
NvmeCQueue **cq;
41
+ NULL, NULL, NULL, NULL, NULL);
36
diff --git a/include/block/nvme.h b/include/block/nvme.h
42
+ exp->fd_handler_set_up = false;
37
index XXXXXXX..XXXXXXX 100644
43
+}
38
--- a/include/block/nvme.h
44
+
39
+++ b/include/block/nvme.h
45
+static void fuse_export_drained_end(void *opaque)
40
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeBar {
46
+{
41
uint64_t acq;
47
+ FuseExport *exp = opaque;
42
uint32_t cmbloc;
48
+
43
uint32_t cmbsz;
49
+ /* Refresh AioContext in case it changed */
44
+ uint8_t padding[3520]; /* not used by QEMU */
50
+ exp->common.ctx = blk_get_aio_context(exp->common.blk);
45
+ uint32_t pmrcap;
51
+
46
+ uint32_t pmrctl;
52
+ aio_set_fd_handler(exp->common.ctx,
47
+ uint32_t pmrsts;
53
+ fuse_session_fd(exp->fuse_session), false,
48
+ uint32_t pmrebs;
54
+ read_from_fuse_export, NULL, NULL, NULL, exp);
49
+ uint32_t pmrswtp;
55
+ exp->fd_handler_set_up = true;
50
+ uint32_t pmrmsc;
56
+}
51
} NvmeBar;
57
+
52
58
+static bool fuse_export_drained_poll(void *opaque)
53
enum NvmeCapShift {
59
+{
54
@@ -XXX,XX +XXX,XX @@ enum NvmeCapShift {
60
+ FuseExport *exp = opaque;
55
CAP_CSS_SHIFT = 37,
61
+
56
CAP_MPSMIN_SHIFT = 48,
62
+ return qatomic_read(&exp->in_flight) > 0;
57
CAP_MPSMAX_SHIFT = 52,
63
+}
58
+ CAP_PMR_SHIFT = 56,
64
+
59
};
65
+static const BlockDevOps fuse_export_blk_dev_ops = {
60
66
+ .drained_begin = fuse_export_drained_begin,
61
enum NvmeCapMask {
67
+ .drained_end = fuse_export_drained_end,
62
@@ -XXX,XX +XXX,XX @@ enum NvmeCapMask {
68
+ .drained_poll = fuse_export_drained_poll,
63
CAP_CSS_MASK = 0xff,
64
CAP_MPSMIN_MASK = 0xf,
65
CAP_MPSMAX_MASK = 0xf,
66
+ CAP_PMR_MASK = 0x1,
67
};
68
69
#define NVME_CAP_MQES(cap) (((cap) >> CAP_MQES_SHIFT) & CAP_MQES_MASK)
70
@@ -XXX,XX +XXX,XX @@ enum NvmeCapMask {
71
<< CAP_MPSMIN_SHIFT)
72
#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\
73
<< CAP_MPSMAX_SHIFT)
74
+#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\
75
+ << CAP_PMR_SHIFT)
76
77
enum NvmeCcShift {
78
CC_EN_SHIFT = 0,
79
@@ -XXX,XX +XXX,XX @@ enum NvmeCmbszMask {
80
#define NVME_CMBSZ_GETSIZE(cmbsz) \
81
(NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz))))
82
83
+enum NvmePmrcapShift {
84
+ PMRCAP_RDS_SHIFT = 3,
85
+ PMRCAP_WDS_SHIFT = 4,
86
+ PMRCAP_BIR_SHIFT = 5,
87
+ PMRCAP_PMRTU_SHIFT = 8,
88
+ PMRCAP_PMRWBM_SHIFT = 10,
89
+ PMRCAP_PMRTO_SHIFT = 16,
90
+ PMRCAP_CMSS_SHIFT = 24,
91
+};
69
+};
92
+
70
+
93
+enum NvmePmrcapMask {
71
static int fuse_export_create(BlockExport *blk_exp,
94
+ PMRCAP_RDS_MASK = 0x1,
72
BlockExportOptions *blk_exp_args,
95
+ PMRCAP_WDS_MASK = 0x1,
73
Error **errp)
96
+ PMRCAP_BIR_MASK = 0x7,
74
@@ -XXX,XX +XXX,XX @@ static int fuse_export_create(BlockExport *blk_exp,
97
+ PMRCAP_PMRTU_MASK = 0x3,
75
}
98
+ PMRCAP_PMRWBM_MASK = 0xf,
76
}
99
+ PMRCAP_PMRTO_MASK = 0xff,
77
100
+ PMRCAP_CMSS_MASK = 0x1,
78
+ blk_set_dev_ops(exp->common.blk, &fuse_export_blk_dev_ops, exp);
101
+};
102
+
79
+
103
+#define NVME_PMRCAP_RDS(pmrcap) \
80
+ /*
104
+ ((pmrcap >> PMRCAP_RDS_SHIFT) & PMRCAP_RDS_MASK)
81
+ * We handle draining ourselves using an in-flight counter and by disabling
105
+#define NVME_PMRCAP_WDS(pmrcap) \
82
+ * the FUSE fd handler. Do not queue BlockBackend requests, they need to
106
+ ((pmrcap >> PMRCAP_WDS_SHIFT) & PMRCAP_WDS_MASK)
83
+ * complete so the in-flight counter reaches zero.
107
+#define NVME_PMRCAP_BIR(pmrcap) \
84
+ */
108
+ ((pmrcap >> PMRCAP_BIR_SHIFT) & PMRCAP_BIR_MASK)
85
+ blk_set_disable_request_queuing(exp->common.blk, true);
109
+#define NVME_PMRCAP_PMRTU(pmrcap) \
110
+ ((pmrcap >> PMRCAP_PMRTU_SHIFT) & PMRCAP_PMRTU_MASK)
111
+#define NVME_PMRCAP_PMRWBM(pmrcap) \
112
+ ((pmrcap >> PMRCAP_PMRWBM_SHIFT) & PMRCAP_PMRWBM_MASK)
113
+#define NVME_PMRCAP_PMRTO(pmrcap) \
114
+ ((pmrcap >> PMRCAP_PMRTO_SHIFT) & PMRCAP_PMRTO_MASK)
115
+#define NVME_PMRCAP_CMSS(pmrcap) \
116
+ ((pmrcap >> PMRCAP_CMSS_SHIFT) & PMRCAP_CMSS_MASK)
117
+
86
+
118
+#define NVME_PMRCAP_SET_RDS(pmrcap, val) \
87
init_exports_table();
119
+ (pmrcap |= (uint64_t)(val & PMRCAP_RDS_MASK) << PMRCAP_RDS_SHIFT)
88
120
+#define NVME_PMRCAP_SET_WDS(pmrcap, val) \
89
/*
121
+ (pmrcap |= (uint64_t)(val & PMRCAP_WDS_MASK) << PMRCAP_WDS_SHIFT)
90
@@ -XXX,XX +XXX,XX @@ static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
122
+#define NVME_PMRCAP_SET_BIR(pmrcap, val) \
91
g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
123
+ (pmrcap |= (uint64_t)(val & PMRCAP_BIR_MASK) << PMRCAP_BIR_SHIFT)
92
124
+#define NVME_PMRCAP_SET_PMRTU(pmrcap, val) \
93
aio_set_fd_handler(exp->common.ctx,
125
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRTU_MASK) << PMRCAP_PMRTU_SHIFT)
94
- fuse_session_fd(exp->fuse_session), true,
126
+#define NVME_PMRCAP_SET_PMRWBM(pmrcap, val) \
95
+ fuse_session_fd(exp->fuse_session), false,
127
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRWBM_MASK) << PMRCAP_PMRWBM_SHIFT)
96
read_from_fuse_export, NULL, NULL, NULL, exp);
128
+#define NVME_PMRCAP_SET_PMRTO(pmrcap, val) \
97
exp->fd_handler_set_up = true;
129
+ (pmrcap |= (uint64_t)(val & PMRCAP_PMRTO_MASK) << PMRCAP_PMRTO_SHIFT)
98
130
+#define NVME_PMRCAP_SET_CMSS(pmrcap, val) \
99
@@ -XXX,XX +XXX,XX @@ static void read_from_fuse_export(void *opaque)
131
+ (pmrcap |= (uint64_t)(val & PMRCAP_CMSS_MASK) << PMRCAP_CMSS_SHIFT)
100
101
blk_exp_ref(&exp->common);
102
103
+ qatomic_inc(&exp->in_flight);
132
+
104
+
133
+enum NvmePmrctlShift {
105
do {
134
+ PMRCTL_EN_SHIFT = 0,
106
ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
135
+};
107
} while (ret == -EINTR);
136
+
108
@@ -XXX,XX +XXX,XX @@ static void read_from_fuse_export(void *opaque)
137
+enum NvmePmrctlMask {
109
fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
138
+ PMRCTL_EN_MASK = 0x1,
110
139
+};
111
out:
140
+
112
+ if (qatomic_fetch_dec(&exp->in_flight) == 1) {
141
+#define NVME_PMRCTL_EN(pmrctl) ((pmrctl >> PMRCTL_EN_SHIFT) & PMRCTL_EN_MASK)
113
+ aio_wait_kick(); /* wake AIO_WAIT_WHILE() */
142
+
143
+#define NVME_PMRCTL_SET_EN(pmrctl, val) \
144
+ (pmrctl |= (uint64_t)(val & PMRCTL_EN_MASK) << PMRCTL_EN_SHIFT)
145
+
146
+enum NvmePmrstsShift {
147
+ PMRSTS_ERR_SHIFT = 0,
148
+ PMRSTS_NRDY_SHIFT = 8,
149
+ PMRSTS_HSTS_SHIFT = 9,
150
+ PMRSTS_CBAI_SHIFT = 12,
151
+};
152
+
153
+enum NvmePmrstsMask {
154
+ PMRSTS_ERR_MASK = 0xff,
155
+ PMRSTS_NRDY_MASK = 0x1,
156
+ PMRSTS_HSTS_MASK = 0x7,
157
+ PMRSTS_CBAI_MASK = 0x1,
158
+};
159
+
160
+#define NVME_PMRSTS_ERR(pmrsts) \
161
+ ((pmrsts >> PMRSTS_ERR_SHIFT) & PMRSTS_ERR_MASK)
162
+#define NVME_PMRSTS_NRDY(pmrsts) \
163
+ ((pmrsts >> PMRSTS_NRDY_SHIFT) & PMRSTS_NRDY_MASK)
164
+#define NVME_PMRSTS_HSTS(pmrsts) \
165
+ ((pmrsts >> PMRSTS_HSTS_SHIFT) & PMRSTS_HSTS_MASK)
166
+#define NVME_PMRSTS_CBAI(pmrsts) \
167
+ ((pmrsts >> PMRSTS_CBAI_SHIFT) & PMRSTS_CBAI_MASK)
168
+
169
+#define NVME_PMRSTS_SET_ERR(pmrsts, val) \
170
+ (pmrsts |= (uint64_t)(val & PMRSTS_ERR_MASK) << PMRSTS_ERR_SHIFT)
171
+#define NVME_PMRSTS_SET_NRDY(pmrsts, val) \
172
+ (pmrsts |= (uint64_t)(val & PMRSTS_NRDY_MASK) << PMRSTS_NRDY_SHIFT)
173
+#define NVME_PMRSTS_SET_HSTS(pmrsts, val) \
174
+ (pmrsts |= (uint64_t)(val & PMRSTS_HSTS_MASK) << PMRSTS_HSTS_SHIFT)
175
+#define NVME_PMRSTS_SET_CBAI(pmrsts, val) \
176
+ (pmrsts |= (uint64_t)(val & PMRSTS_CBAI_MASK) << PMRSTS_CBAI_SHIFT)
177
+
178
+enum NvmePmrebsShift {
179
+ PMREBS_PMRSZU_SHIFT = 0,
180
+ PMREBS_RBB_SHIFT = 4,
181
+ PMREBS_PMRWBZ_SHIFT = 8,
182
+};
183
+
184
+enum NvmePmrebsMask {
185
+ PMREBS_PMRSZU_MASK = 0xf,
186
+ PMREBS_RBB_MASK = 0x1,
187
+ PMREBS_PMRWBZ_MASK = 0xffffff,
188
+};
189
+
190
+#define NVME_PMREBS_PMRSZU(pmrebs) \
191
+ ((pmrebs >> PMREBS_PMRSZU_SHIFT) & PMREBS_PMRSZU_MASK)
192
+#define NVME_PMREBS_RBB(pmrebs) \
193
+ ((pmrebs >> PMREBS_RBB_SHIFT) & PMREBS_RBB_MASK)
194
+#define NVME_PMREBS_PMRWBZ(pmrebs) \
195
+ ((pmrebs >> PMREBS_PMRWBZ_SHIFT) & PMREBS_PMRWBZ_MASK)
196
+
197
+#define NVME_PMREBS_SET_PMRSZU(pmrebs, val) \
198
+ (pmrebs |= (uint64_t)(val & PMREBS_PMRSZU_MASK) << PMREBS_PMRSZU_SHIFT)
199
+#define NVME_PMREBS_SET_RBB(pmrebs, val) \
200
+ (pmrebs |= (uint64_t)(val & PMREBS_RBB_MASK) << PMREBS_RBB_SHIFT)
201
+#define NVME_PMREBS_SET_PMRWBZ(pmrebs, val) \
202
+ (pmrebs |= (uint64_t)(val & PMREBS_PMRWBZ_MASK) << PMREBS_PMRWBZ_SHIFT)
203
+
204
+enum NvmePmrswtpShift {
205
+ PMRSWTP_PMRSWTU_SHIFT = 0,
206
+ PMRSWTP_PMRSWTV_SHIFT = 8,
207
+};
208
+
209
+enum NvmePmrswtpMask {
210
+ PMRSWTP_PMRSWTU_MASK = 0xf,
211
+ PMRSWTP_PMRSWTV_MASK = 0xffffff,
212
+};
213
+
214
+#define NVME_PMRSWTP_PMRSWTU(pmrswtp) \
215
+ ((pmrswtp >> PMRSWTP_PMRSWTU_SHIFT) & PMRSWTP_PMRSWTU_MASK)
216
+#define NVME_PMRSWTP_PMRSWTV(pmrswtp) \
217
+ ((pmrswtp >> PMRSWTP_PMRSWTV_SHIFT) & PMRSWTP_PMRSWTV_MASK)
218
+
219
+#define NVME_PMRSWTP_SET_PMRSWTU(pmrswtp, val) \
220
+ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTU_MASK) << PMRSWTP_PMRSWTU_SHIFT)
221
+#define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val) \
222
+ (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT)
223
+
224
+enum NvmePmrmscShift {
225
+ PMRMSC_CMSE_SHIFT = 1,
226
+ PMRMSC_CBA_SHIFT = 12,
227
+};
228
+
229
+enum NvmePmrmscMask {
230
+ PMRMSC_CMSE_MASK = 0x1,
231
+ PMRMSC_CBA_MASK = 0xfffffffffffff,
232
+};
233
+
234
+#define NVME_PMRMSC_CMSE(pmrmsc) \
235
+ ((pmrmsc >> PMRMSC_CMSE_SHIFT) & PMRMSC_CMSE_MASK)
236
+#define NVME_PMRMSC_CBA(pmrmsc) \
237
+ ((pmrmsc >> PMRMSC_CBA_SHIFT) & PMRMSC_CBA_MASK)
238
+
239
+#define NVME_PMRMSC_SET_CMSE(pmrmsc, val) \
240
+ (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT)
241
+#define NVME_PMRMSC_SET_CBA(pmrmsc, val) \
242
+ (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
243
+
244
typedef struct NvmeCmd {
245
uint8_t opcode;
246
uint8_t fuse;
247
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
248
index XXXXXXX..XXXXXXX 100644
249
--- a/hw/block/nvme.c
250
+++ b/hw/block/nvme.c
251
@@ -XXX,XX +XXX,XX @@
252
* -drive file=<file>,if=none,id=<drive_id>
253
* -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
254
* cmb_size_mb=<cmb_size_mb[optional]>, \
255
+ * [pmrdev=<mem_backend_file_id>,] \
256
* num_queues=<N[optional]>
257
*
258
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
259
* offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
260
+ *
261
+ * cmb_size_mb= and pmrdev= options are mutually exclusive due to limitation
262
+ * in available BAR's. cmb_size_mb= will take precedence over pmrdev= when
263
+ * both provided.
264
+ * Enabling pmr emulation can be achieved by pointing to memory-backend-file.
265
+ * For example:
266
+ * -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \
267
+ * size=<size> .... -device nvme,...,pmrdev=<mem_id>
268
*/
269
270
#include "qemu/osdep.h"
271
@@ -XXX,XX +XXX,XX @@
272
#include "sysemu/sysemu.h"
273
#include "qapi/error.h"
274
#include "qapi/visitor.h"
275
+#include "sysemu/hostmem.h"
276
#include "sysemu/block-backend.h"
277
+#include "exec/ram_addr.h"
278
279
#include "qemu/log.h"
280
#include "qemu/module.h"
281
@@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
282
NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
283
"invalid write to read only CMBSZ, ignored");
284
return;
285
+ case 0xE00: /* PMRCAP */
286
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly,
287
+ "invalid write to PMRCAP register, ignored");
288
+ return;
289
+ case 0xE04: /* TODO PMRCTL */
290
+ break;
291
+ case 0xE08: /* PMRSTS */
292
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly,
293
+ "invalid write to PMRSTS register, ignored");
294
+ return;
295
+ case 0xE0C: /* PMREBS */
296
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly,
297
+ "invalid write to PMREBS register, ignored");
298
+ return;
299
+ case 0xE10: /* PMRSWTP */
300
+ NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly,
301
+ "invalid write to PMRSWTP register, ignored");
302
+ return;
303
+ case 0xE14: /* TODO PMRMSC */
304
+ break;
305
default:
306
NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
307
"invalid MMIO write,"
308
@@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
309
}
310
311
if (addr < sizeof(n->bar)) {
312
+ /*
313
+ * When PMRWBM bit 1 is set then read from
314
+ * from PMRSTS should ensure prior writes
315
+ * made it to persistent media
316
+ */
317
+ if (addr == 0xE08 &&
318
+ (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
319
+ qemu_ram_writeback(n->pmrdev->mr.ram_block,
320
+ 0, n->pmrdev->size);
321
+ }
322
memcpy(&val, ptr + addr, size);
323
} else {
324
NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
325
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
326
error_setg(errp, "serial property not set");
327
return;
328
}
329
+
330
+ if (!n->cmb_size_mb && n->pmrdev) {
331
+ if (host_memory_backend_is_mapped(n->pmrdev)) {
332
+ char *path = object_get_canonical_path_component(OBJECT(n->pmrdev));
333
+ error_setg(errp, "can't use already busy memdev: %s", path);
334
+ g_free(path);
335
+ return;
336
+ }
337
+
338
+ if (!is_power_of_2(n->pmrdev->size)) {
339
+ error_setg(errp, "pmr backend size needs to be power of 2 in size");
340
+ return;
341
+ }
342
+
343
+ host_memory_backend_set_mapped(n->pmrdev, true);
344
+ }
114
+ }
345
+
115
+
346
blkconf_blocksizes(&n->conf);
116
blk_exp_unref(&exp->common);
347
if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
348
false, errp)) {
349
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
350
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
351
PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
352
353
+ } else if (n->pmrdev) {
354
+ /* Controller Capabilities register */
355
+ NVME_CAP_SET_PMRS(n->bar.cap, 1);
356
+
357
+ /* PMR Capabities register */
358
+ n->bar.pmrcap = 0;
359
+ NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0);
360
+ NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0);
361
+ NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2);
362
+ NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0);
363
+ /* Turn on bit 1 support */
364
+ NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02);
365
+ NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0);
366
+ NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0);
367
+
368
+ /* PMR Control register */
369
+ n->bar.pmrctl = 0;
370
+ NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0);
371
+
372
+ /* PMR Status register */
373
+ n->bar.pmrsts = 0;
374
+ NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0);
375
+ NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0);
376
+ NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0);
377
+ NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0);
378
+
379
+ /* PMR Elasticity Buffer Size register */
380
+ n->bar.pmrebs = 0;
381
+ NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0);
382
+ NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0);
383
+ NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0);
384
+
385
+ /* PMR Sustained Write Throughput register */
386
+ n->bar.pmrswtp = 0;
387
+ NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0);
388
+ NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0);
389
+
390
+ /* PMR Memory Space Control register */
391
+ n->bar.pmrmsc = 0;
392
+ NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0);
393
+ NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0);
394
+
395
+ pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap),
396
+ PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
397
+ PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr);
398
}
399
400
for (i = 0; i < n->num_namespaces; i++) {
401
@@ -XXX,XX +XXX,XX @@ static void nvme_exit(PCIDevice *pci_dev)
402
if (n->cmb_size_mb) {
403
g_free(n->cmbuf);
404
}
405
+
406
+ if (n->pmrdev) {
407
+ host_memory_backend_set_mapped(n->pmrdev, false);
408
+ }
409
msix_uninit_exclusive_bar(pci_dev);
410
}
117
}
411
118
412
static Property nvme_props[] = {
119
@@ -XXX,XX +XXX,XX @@ static void fuse_export_shutdown(BlockExport *blk_exp)
413
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
120
414
+ DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND,
121
if (exp->fd_handler_set_up) {
415
+ HostMemoryBackend *),
122
aio_set_fd_handler(exp->common.ctx,
416
DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
123
- fuse_session_fd(exp->fuse_session), true,
417
DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0),
124
+ fuse_session_fd(exp->fuse_session), false,
418
DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64),
125
NULL, NULL, NULL, NULL, NULL);
419
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
126
exp->fd_handler_set_up = false;
420
index XXXXXXX..XXXXXXX 100644
127
}
421
--- a/hw/block/Makefile.objs
422
+++ b/hw/block/Makefile.objs
423
@@ -XXX,XX +XXX,XX @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
424
common-obj-$(CONFIG_XEN) += xen-block.o
425
common-obj-$(CONFIG_ECC) += ecc.o
426
common-obj-$(CONFIG_ONENAND) += onenand.o
427
-common-obj-$(CONFIG_NVME_PCI) += nvme.o
428
common-obj-$(CONFIG_SWIM) += swim.o
429
430
common-obj-$(CONFIG_SH4) += tc58128.o
431
432
obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o
433
obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o
434
+obj-$(CONFIG_NVME_PCI) += nvme.o
435
436
obj-y += dataplane/
437
diff --git a/hw/block/trace-events b/hw/block/trace-events
438
index XXXXXXX..XXXXXXX 100644
439
--- a/hw/block/trace-events
440
+++ b/hw/block/trace-events
441
@@ -XXX,XX +XXX,XX @@ nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CA
442
nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
443
nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
444
nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
445
+nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored"
446
+nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored"
447
+nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored"
448
+nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored"
449
nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
450
nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
451
nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
452
--
128
--
453
2.25.3
129
2.40.1
454
455
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
virtio_queue_aio_detach_host_notifier() does two things:
4
1. It removes the fd handler from the event loop.
5
2. It processes the virtqueue one last time.
6
7
The first step can be peformed by any thread and without taking the
8
AioContext lock.
9
10
The second step may need the AioContext lock (depending on the device
11
implementation) and runs in the thread where request processing takes
12
place. virtio-blk and virtio-scsi therefore call
13
virtio_queue_aio_detach_host_notifier() from a BH that is scheduled in
14
AioContext.
15
16
The next patch will introduce a .drained_begin() function that needs to
17
call virtio_queue_aio_detach_host_notifier(). .drained_begin() functions
18
cannot call aio_poll() to wait synchronously for the BH. It is possible
19
for a .drained_poll() callback to asynchronously wait for the BH, but
20
that is more complex than necessary here.
21
22
Move the virtqueue processing out to the callers of
23
virtio_queue_aio_detach_host_notifier() so that the function can be
24
called from any thread. This is in preparation for the next patch.
25
26
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
27
Message-Id: <20230516190238.8401-17-stefanha@redhat.com>
28
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
29
---
30
hw/block/dataplane/virtio-blk.c | 7 +++++++
31
hw/scsi/virtio-scsi-dataplane.c | 14 ++++++++++++++
32
hw/virtio/virtio.c | 3 ---
33
3 files changed, 21 insertions(+), 3 deletions(-)
34
35
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/block/dataplane/virtio-blk.c
38
+++ b/hw/block/dataplane/virtio-blk.c
39
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_data_plane_stop_bh(void *opaque)
40
41
for (i = 0; i < s->conf->num_queues; i++) {
42
VirtQueue *vq = virtio_get_queue(s->vdev, i);
43
+ EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq);
44
45
virtio_queue_aio_detach_host_notifier(vq, s->ctx);
46
+
47
+ /*
48
+ * Test and clear notifier after disabling event, in case poll callback
49
+ * didn't have time to run.
50
+ */
51
+ virtio_queue_host_notifier_read(host_notifier);
52
}
53
}
54
55
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/hw/scsi/virtio-scsi-dataplane.c
58
+++ b/hw/scsi/virtio-scsi-dataplane.c
59
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_dataplane_stop_bh(void *opaque)
60
{
61
VirtIOSCSI *s = opaque;
62
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
63
+ EventNotifier *host_notifier;
64
int i;
65
66
virtio_queue_aio_detach_host_notifier(vs->ctrl_vq, s->ctx);
67
+ host_notifier = virtio_queue_get_host_notifier(vs->ctrl_vq);
68
+
69
+ /*
70
+ * Test and clear notifier after disabling event, in case poll callback
71
+ * didn't have time to run.
72
+ */
73
+ virtio_queue_host_notifier_read(host_notifier);
74
+
75
virtio_queue_aio_detach_host_notifier(vs->event_vq, s->ctx);
76
+ host_notifier = virtio_queue_get_host_notifier(vs->event_vq);
77
+ virtio_queue_host_notifier_read(host_notifier);
78
+
79
for (i = 0; i < vs->conf.num_queues; i++) {
80
virtio_queue_aio_detach_host_notifier(vs->cmd_vqs[i], s->ctx);
81
+ host_notifier = virtio_queue_get_host_notifier(vs->cmd_vqs[i]);
82
+ virtio_queue_host_notifier_read(host_notifier);
83
}
84
}
85
86
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/hw/virtio/virtio.c
89
+++ b/hw/virtio/virtio.c
90
@@ -XXX,XX +XXX,XX @@ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ct
91
void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
92
{
93
aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
94
- /* Test and clear notifier before after disabling event,
95
- * in case poll callback didn't have time to run. */
96
- virtio_queue_host_notifier_read(&vq->host_notifier);
97
}
98
99
void virtio_queue_host_notifier_read(EventNotifier *n)
100
--
101
2.40.1
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
Detach ioeventfds during drained sections to stop I/O submission from
4
the guest. virtio-blk is no longer reliant on aio_disable_external()
5
after this patch. This will allow us to remove the
6
aio_disable_external() API once all other code that relies on it is
7
converted.
8
9
Take extra care to avoid attaching/detaching ioeventfds if the data
10
plane is started/stopped during a drained section. This should be rare,
11
but maybe the mirror block job can trigger it.
12
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Message-Id: <20230516190238.8401-18-stefanha@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
---
17
hw/block/dataplane/virtio-blk.c | 16 ++++++++------
18
hw/block/virtio-blk.c | 38 ++++++++++++++++++++++++++++++++-
19
2 files changed, 47 insertions(+), 7 deletions(-)
20
21
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/block/dataplane/virtio-blk.c
24
+++ b/hw/block/dataplane/virtio-blk.c
25
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
26
}
27
28
/* Get this show started by hooking up our callbacks */
29
- aio_context_acquire(s->ctx);
30
- for (i = 0; i < nvqs; i++) {
31
- VirtQueue *vq = virtio_get_queue(s->vdev, i);
32
+ if (!blk_in_drain(s->conf->conf.blk)) {
33
+ aio_context_acquire(s->ctx);
34
+ for (i = 0; i < nvqs; i++) {
35
+ VirtQueue *vq = virtio_get_queue(s->vdev, i);
36
37
- virtio_queue_aio_attach_host_notifier(vq, s->ctx);
38
+ virtio_queue_aio_attach_host_notifier(vq, s->ctx);
39
+ }
40
+ aio_context_release(s->ctx);
41
}
42
- aio_context_release(s->ctx);
43
return 0;
44
45
fail_aio_context:
46
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
47
s->stopping = true;
48
trace_virtio_blk_data_plane_stop(s);
49
50
- aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
51
+ if (!blk_in_drain(s->conf->conf.blk)) {
52
+ aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
53
+ }
54
55
aio_context_acquire(s->ctx);
56
57
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/hw/block/virtio-blk.c
60
+++ b/hw/block/virtio-blk.c
61
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_resize(void *opaque)
62
aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev);
63
}
64
65
+/* Suspend virtqueue ioeventfd processing during drain */
66
+static void virtio_blk_drained_begin(void *opaque)
67
+{
68
+ VirtIOBlock *s = opaque;
69
+ VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
70
+ AioContext *ctx = blk_get_aio_context(s->conf.conf.blk);
71
+
72
+ if (!s->dataplane || !s->dataplane_started) {
73
+ return;
74
+ }
75
+
76
+ for (uint16_t i = 0; i < s->conf.num_queues; i++) {
77
+ VirtQueue *vq = virtio_get_queue(vdev, i);
78
+ virtio_queue_aio_detach_host_notifier(vq, ctx);
79
+ }
80
+}
81
+
82
+/* Resume virtqueue ioeventfd processing after drain */
83
+static void virtio_blk_drained_end(void *opaque)
84
+{
85
+ VirtIOBlock *s = opaque;
86
+ VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
87
+ AioContext *ctx = blk_get_aio_context(s->conf.conf.blk);
88
+
89
+ if (!s->dataplane || !s->dataplane_started) {
90
+ return;
91
+ }
92
+
93
+ for (uint16_t i = 0; i < s->conf.num_queues; i++) {
94
+ VirtQueue *vq = virtio_get_queue(vdev, i);
95
+ virtio_queue_aio_attach_host_notifier(vq, ctx);
96
+ }
97
+}
98
+
99
static const BlockDevOps virtio_block_ops = {
100
- .resize_cb = virtio_blk_resize,
101
+ .resize_cb = virtio_blk_resize,
102
+ .drained_begin = virtio_blk_drained_begin,
103
+ .drained_end = virtio_blk_drained_end,
104
};
105
106
static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
107
--
108
2.40.1
diff view generated by jsdifflib
New patch
1
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
3
The virtio-scsi Host Bus Adapter provides access to devices on a SCSI
4
bus. Those SCSI devices typically have a BlockBackend. When the
5
BlockBackend enters a drained section, the SCSI device must temporarily
6
stop submitting new I/O requests.
7
8
Implement this behavior by temporarily stopping virtio-scsi virtqueue
9
processing when one of the SCSI devices enters a drained section. The
10
new scsi_device_drained_begin() API allows scsi-disk to message the
11
virtio-scsi HBA.
12
13
scsi_device_drained_begin() uses a drain counter so that multiple SCSI
14
devices can have overlapping drained sections. The HBA only sees one
15
pair of .drained_begin/end() calls.
16
17
After this commit, virtio-scsi no longer depends on hw/virtio's
18
ioeventfd aio_set_event_notifier(is_external=true). This commit is a
19
step towards removing the aio_disable_external() API.
20
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
Message-Id: <20230516190238.8401-19-stefanha@redhat.com>
23
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
24
---
25
include/hw/scsi/scsi.h | 14 ++++++++++++
26
hw/scsi/scsi-bus.c | 40 +++++++++++++++++++++++++++++++++
27
hw/scsi/scsi-disk.c | 27 +++++++++++++++++-----
28
hw/scsi/virtio-scsi-dataplane.c | 18 +++++++++------
29
hw/scsi/virtio-scsi.c | 38 +++++++++++++++++++++++++++++++
30
hw/scsi/trace-events | 2 ++
31
6 files changed, 127 insertions(+), 12 deletions(-)
32
33
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/include/hw/scsi/scsi.h
36
+++ b/include/hw/scsi/scsi.h
37
@@ -XXX,XX +XXX,XX @@ struct SCSIBusInfo {
38
void (*save_request)(QEMUFile *f, SCSIRequest *req);
39
void *(*load_request)(QEMUFile *f, SCSIRequest *req);
40
void (*free_request)(SCSIBus *bus, void *priv);
41
+
42
+ /*
43
+ * Temporarily stop submitting new requests between drained_begin() and
44
+ * drained_end(). Called from the main loop thread with the BQL held.
45
+ *
46
+ * Implement these callbacks if request processing is triggered by a file
47
+ * descriptor like an EventNotifier. Otherwise set them to NULL.
48
+ */
49
+ void (*drained_begin)(SCSIBus *bus);
50
+ void (*drained_end)(SCSIBus *bus);
51
};
52
53
#define TYPE_SCSI_BUS "SCSI"
54
@@ -XXX,XX +XXX,XX @@ struct SCSIBus {
55
56
SCSISense unit_attention;
57
const SCSIBusInfo *info;
58
+
59
+ int drain_count; /* protected by BQL */
60
};
61
62
/**
63
@@ -XXX,XX +XXX,XX @@ void scsi_req_cancel_complete(SCSIRequest *req);
64
void scsi_req_cancel(SCSIRequest *req);
65
void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier);
66
void scsi_req_retry(SCSIRequest *req);
67
+void scsi_device_drained_begin(SCSIDevice *sdev);
68
+void scsi_device_drained_end(SCSIDevice *sdev);
69
void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense);
70
void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense);
71
void scsi_device_report_change(SCSIDevice *dev, SCSISense sense);
72
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/hw/scsi/scsi-bus.c
75
+++ b/hw/scsi/scsi-bus.c
76
@@ -XXX,XX +XXX,XX @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
77
scsi_device_set_ua(sdev, sense);
78
}
79
80
+void scsi_device_drained_begin(SCSIDevice *sdev)
81
+{
82
+ SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, sdev->qdev.parent_bus);
83
+ if (!bus) {
84
+ return;
85
+ }
86
+
87
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
88
+ assert(bus->drain_count < INT_MAX);
89
+
90
+ /*
91
+ * Multiple BlockBackends can be on a SCSIBus and each may begin/end
92
+ * draining at any time. Keep a counter so HBAs only see begin/end once.
93
+ */
94
+ if (bus->drain_count++ == 0) {
95
+ trace_scsi_bus_drained_begin(bus, sdev);
96
+ if (bus->info->drained_begin) {
97
+ bus->info->drained_begin(bus);
98
+ }
99
+ }
100
+}
101
+
102
+void scsi_device_drained_end(SCSIDevice *sdev)
103
+{
104
+ SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, sdev->qdev.parent_bus);
105
+ if (!bus) {
106
+ return;
107
+ }
108
+
109
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
110
+ assert(bus->drain_count > 0);
111
+
112
+ if (bus->drain_count-- == 1) {
113
+ trace_scsi_bus_drained_end(bus, sdev);
114
+ if (bus->info->drained_end) {
115
+ bus->info->drained_end(bus);
116
+ }
117
+ }
118
+}
119
+
120
static char *scsibus_get_dev_path(DeviceState *dev)
121
{
122
SCSIDevice *d = SCSI_DEVICE(dev);
123
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
124
index XXXXXXX..XXXXXXX 100644
125
--- a/hw/scsi/scsi-disk.c
126
+++ b/hw/scsi/scsi-disk.c
127
@@ -XXX,XX +XXX,XX @@ static void scsi_disk_reset(DeviceState *dev)
128
s->qdev.scsi_version = s->qdev.default_scsi_version;
129
}
130
131
+static void scsi_disk_drained_begin(void *opaque)
132
+{
133
+ SCSIDiskState *s = opaque;
134
+
135
+ scsi_device_drained_begin(&s->qdev);
136
+}
137
+
138
+static void scsi_disk_drained_end(void *opaque)
139
+{
140
+ SCSIDiskState *s = opaque;
141
+
142
+ scsi_device_drained_end(&s->qdev);
143
+}
144
+
145
static void scsi_disk_resize_cb(void *opaque)
146
{
147
SCSIDiskState *s = opaque;
148
@@ -XXX,XX +XXX,XX @@ static bool scsi_cd_is_medium_locked(void *opaque)
149
}
150
151
static const BlockDevOps scsi_disk_removable_block_ops = {
152
- .change_media_cb = scsi_cd_change_media_cb,
153
+ .change_media_cb = scsi_cd_change_media_cb,
154
+ .drained_begin = scsi_disk_drained_begin,
155
+ .drained_end = scsi_disk_drained_end,
156
.eject_request_cb = scsi_cd_eject_request_cb,
157
- .is_tray_open = scsi_cd_is_tray_open,
158
.is_medium_locked = scsi_cd_is_medium_locked,
159
-
160
- .resize_cb = scsi_disk_resize_cb,
161
+ .is_tray_open = scsi_cd_is_tray_open,
162
+ .resize_cb = scsi_disk_resize_cb,
163
};
164
165
static const BlockDevOps scsi_disk_block_ops = {
166
- .resize_cb = scsi_disk_resize_cb,
167
+ .drained_begin = scsi_disk_drained_begin,
168
+ .drained_end = scsi_disk_drained_end,
169
+ .resize_cb = scsi_disk_resize_cb,
170
};
171
172
static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
173
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
174
index XXXXXXX..XXXXXXX 100644
175
--- a/hw/scsi/virtio-scsi-dataplane.c
176
+++ b/hw/scsi/virtio-scsi-dataplane.c
177
@@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
178
s->dataplane_starting = false;
179
s->dataplane_started = true;
180
181
- aio_context_acquire(s->ctx);
182
- virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
183
- virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
184
+ if (s->bus.drain_count == 0) {
185
+ aio_context_acquire(s->ctx);
186
+ virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
187
+ virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
188
189
- for (i = 0; i < vs->conf.num_queues; i++) {
190
- virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
191
+ for (i = 0; i < vs->conf.num_queues; i++) {
192
+ virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
193
+ }
194
+ aio_context_release(s->ctx);
195
}
196
- aio_context_release(s->ctx);
197
return 0;
198
199
fail_host_notifiers:
200
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
201
}
202
s->dataplane_stopping = true;
203
204
- aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
205
+ if (s->bus.drain_count == 0) {
206
+ aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
207
+ }
208
209
blk_drain_all(); /* ensure there are no in-flight requests */
210
211
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
212
index XXXXXXX..XXXXXXX 100644
213
--- a/hw/scsi/virtio-scsi.c
214
+++ b/hw/scsi/virtio-scsi.c
215
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
216
}
217
}
218
219
+/* Suspend virtqueue ioeventfd processing during drain */
220
+static void virtio_scsi_drained_begin(SCSIBus *bus)
221
+{
222
+ VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
223
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
224
+ uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED +
225
+ s->parent_obj.conf.num_queues;
226
+
227
+ if (!s->dataplane_started) {
228
+ return;
229
+ }
230
+
231
+ for (uint32_t i = 0; i < total_queues; i++) {
232
+ VirtQueue *vq = virtio_get_queue(vdev, i);
233
+ virtio_queue_aio_detach_host_notifier(vq, s->ctx);
234
+ }
235
+}
236
+
237
+/* Resume virtqueue ioeventfd processing after drain */
238
+static void virtio_scsi_drained_end(SCSIBus *bus)
239
+{
240
+ VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
241
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
242
+ uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED +
243
+ s->parent_obj.conf.num_queues;
244
+
245
+ if (!s->dataplane_started) {
246
+ return;
247
+ }
248
+
249
+ for (uint32_t i = 0; i < total_queues; i++) {
250
+ VirtQueue *vq = virtio_get_queue(vdev, i);
251
+ virtio_queue_aio_attach_host_notifier(vq, s->ctx);
252
+ }
253
+}
254
+
255
static struct SCSIBusInfo virtio_scsi_scsi_info = {
256
.tcq = true,
257
.max_channel = VIRTIO_SCSI_MAX_CHANNEL,
258
@@ -XXX,XX +XXX,XX @@ static struct SCSIBusInfo virtio_scsi_scsi_info = {
259
.get_sg_list = virtio_scsi_get_sg_list,
260
.save_request = virtio_scsi_save_request,
261
.load_request = virtio_scsi_load_request,
262
+ .drained_begin = virtio_scsi_drained_begin,
263
+ .drained_end = virtio_scsi_drained_end,
264
};
265
266
void virtio_scsi_common_realize(DeviceState *dev,
267
diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events
268
index XXXXXXX..XXXXXXX 100644
269
--- a/hw/scsi/trace-events
270
+++ b/hw/scsi/trace-events
271
@@ -XXX,XX +XXX,XX @@ scsi_req_cancel(int target, int lun, int tag) "target %d lun %d tag %d"
272
scsi_req_data(int target, int lun, int tag, int len) "target %d lun %d tag %d len %d"
273
scsi_req_data_canceled(int target, int lun, int tag, int len) "target %d lun %d tag %d len %d"
274
scsi_req_dequeue(int target, int lun, int tag) "target %d lun %d tag %d"
275
+scsi_bus_drained_begin(void *bus, void *sdev) "bus %p sdev %p"
276
+scsi_bus_drained_end(void *bus, void *sdev) "bus %p sdev %p"
277
scsi_req_continue(int target, int lun, int tag) "target %d lun %d tag %d"
278
scsi_req_continue_canceled(int target, int lun, int tag) "target %d lun %d tag %d"
279
scsi_req_parsed(int target, int lun, int tag, int cmd, int mode, int xfer) "target %d lun %d tag %d command %d dir %d length %d"
280
--
281
2.40.1
diff view generated by jsdifflib
1
After processing the option string with the keyval parser, we get a
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
QDict that contains only strings. This QDict must be fed to a keyval
3
visitor which converts the strings into the right data types.
4
2
5
qmp_object_add(), however, uses the normal QObject input visitor, which
3
Host notifiers can now use is_external=false since virtio-blk and
6
expects a QDict where all properties already have the QType that matches
4
virtio-scsi no longer rely on is_external=true for drained sections.
7
the data type required by the QOM object type.
8
5
9
Change the --object implementation in qemu-storage-daemon so that it
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
doesn't call qmp_object_add(), but calls user_creatable_add_dict()
7
Message-Id: <20230516190238.8401-20-stefanha@redhat.com>
11
directly instead and pass it a new keyval boolean that decides which
12
visitor must be used.
13
14
Reported-by: Coiby Xu <coiby.xu@gmail.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
---
9
---
17
include/qom/object_interfaces.h | 6 +++++-
10
hw/virtio/virtio.c | 6 +++---
18
qemu-storage-daemon.c | 4 +---
11
1 file changed, 3 insertions(+), 3 deletions(-)
19
qom/object_interfaces.c | 8 ++++++--
20
qom/qom-qmp-cmds.c | 2 +-
21
4 files changed, 13 insertions(+), 7 deletions(-)
22
12
23
diff --git a/include/qom/object_interfaces.h b/include/qom/object_interfaces.h
13
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
24
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
25
--- a/include/qom/object_interfaces.h
15
--- a/hw/virtio/virtio.c
26
+++ b/include/qom/object_interfaces.h
16
+++ b/hw/virtio/virtio.c
27
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
17
@@ -XXX,XX +XXX,XX @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
28
/**
18
29
* user_creatable_add_dict:
19
void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
30
* @qdict: the object definition
20
{
31
+ * @keyval: if true, use a keyval visitor for processing @qdict (i.e.
21
- aio_set_event_notifier(ctx, &vq->host_notifier, true,
32
+ * assume that all @qdict values are strings); otherwise, use
22
+ aio_set_event_notifier(ctx, &vq->host_notifier, false,
33
+ * the normal QObject visitor (i.e. assume all @qdict values
23
virtio_queue_host_notifier_read,
34
+ * have the QType expected by the QOM object type)
24
virtio_queue_host_notifier_aio_poll,
35
* @errp: if an error occurs, a pointer to an area to store the error
25
virtio_queue_host_notifier_aio_poll_ready);
36
*
26
@@ -XXX,XX +XXX,XX @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
37
* Create an instance of the user creatable object that is defined by
38
@@ -XXX,XX +XXX,XX @@ Object *user_creatable_add_type(const char *type, const char *id,
39
* ID from the key 'id'. The remaining entries in @qdict are used to
40
* initialize the object properties.
41
*/
27
*/
42
-void user_creatable_add_dict(QDict *qdict, Error **errp);
28
void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
43
+void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp);
29
{
44
30
- aio_set_event_notifier(ctx, &vq->host_notifier, true,
45
/**
31
+ aio_set_event_notifier(ctx, &vq->host_notifier, false,
46
* user_creatable_add_opts:
32
virtio_queue_host_notifier_read,
47
diff --git a/qemu-storage-daemon.c b/qemu-storage-daemon.c
33
NULL, NULL);
48
index XXXXXXX..XXXXXXX 100644
49
--- a/qemu-storage-daemon.c
50
+++ b/qemu-storage-daemon.c
51
@@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[])
52
QemuOpts *opts;
53
const char *type;
54
QDict *args;
55
- QObject *ret_data = NULL;
56
57
/* FIXME The keyval parser rejects 'help' arguments, so we must
58
* unconditionall try QemuOpts first. */
59
@@ -XXX,XX +XXX,XX @@ static void process_options(int argc, char *argv[])
60
qemu_opts_del(opts);
61
62
args = keyval_parse(optarg, "qom-type", &error_fatal);
63
- qmp_object_add(args, &ret_data, &error_fatal);
64
+ user_creatable_add_dict(args, true, &error_fatal);
65
qobject_unref(args);
66
- qobject_unref(ret_data);
67
break;
68
}
69
default:
70
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
71
index XXXXXXX..XXXXXXX 100644
72
--- a/qom/object_interfaces.c
73
+++ b/qom/object_interfaces.c
74
@@ -XXX,XX +XXX,XX @@ out:
75
return obj;
76
}
34
}
77
35
78
-void user_creatable_add_dict(QDict *qdict, Error **errp)
36
void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
79
+void user_creatable_add_dict(QDict *qdict, bool keyval, Error **errp)
80
{
37
{
81
Visitor *v;
38
- aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
82
Object *obj;
39
+ aio_set_event_notifier(ctx, &vq->host_notifier, false, NULL, NULL, NULL);
83
@@ -XXX,XX +XXX,XX @@ void user_creatable_add_dict(QDict *qdict, Error **errp)
84
}
85
qdict_del(qdict, "id");
86
87
- v = qobject_input_visitor_new(QOBJECT(qdict));
88
+ if (keyval) {
89
+ v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
90
+ } else {
91
+ v = qobject_input_visitor_new(QOBJECT(qdict));
92
+ }
93
obj = user_creatable_add_type(type, id, qdict, v, errp);
94
visit_free(v);
95
object_unref(obj);
96
diff --git a/qom/qom-qmp-cmds.c b/qom/qom-qmp-cmds.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/qom/qom-qmp-cmds.c
99
+++ b/qom/qom-qmp-cmds.c
100
@@ -XXX,XX +XXX,XX @@ void qmp_object_add(QDict *qdict, QObject **ret_data, Error **errp)
101
qobject_unref(pdict);
102
}
103
104
- user_creatable_add_dict(qdict, errp);
105
+ user_creatable_add_dict(qdict, false, errp);
106
}
40
}
107
41
108
void qmp_object_del(const char *id, Error **errp)
42
void virtio_queue_host_notifier_read(EventNotifier *n)
109
--
43
--
110
2.25.3
44
2.40.1
111
112
diff view generated by jsdifflib
1
This adds a new BdrvRequestFlags parameter to the .bdrv_co_truncate()
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
driver callbacks, and a supported_truncate_flags field in
3
BlockDriverState that allows drivers to advertise support for request
4
flags in the context of truncate.
5
2
6
For now, we always pass 0 and no drivers declare support for any flag.
3
All callers now pass is_external=false to aio_set_fd_handler() and
4
aio_set_event_notifier(). The aio_disable_external() API that
5
temporarily disables fd handlers that were registered is_external=true
6
is therefore dead code.
7
7
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Remove aio_disable_external(), aio_enable_external(), and the
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
is_external arguments to aio_set_fd_handler() and
10
Reviewed-by: Alberto Garcia <berto@igalia.com>
10
aio_set_event_notifier().
11
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
12
Message-Id: <20200424125448.63318-2-kwolf@redhat.com>
12
The entire test-fdmon-epoll test is removed because its sole purpose was
13
testing aio_disable_external().
14
15
Parts of this patch were generated using the following coccinelle
16
(https://coccinelle.lip6.fr/) semantic patch:
17
18
@@
19
expression ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque;
20
@@
21
- aio_set_fd_handler(ctx, fd, is_external, io_read, io_write, io_poll, io_poll_ready, opaque)
22
+ aio_set_fd_handler(ctx, fd, io_read, io_write, io_poll, io_poll_ready, opaque)
23
24
@@
25
expression ctx, notifier, is_external, io_read, io_poll, io_poll_ready;
26
@@
27
- aio_set_event_notifier(ctx, notifier, is_external, io_read, io_poll, io_poll_ready)
28
+ aio_set_event_notifier(ctx, notifier, io_read, io_poll, io_poll_ready)
29
30
Reviewed-by: Juan Quintela <quintela@redhat.com>
31
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
32
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
33
Message-Id: <20230516190238.8401-21-stefanha@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
34
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
35
---
15
include/block/block_int.h | 10 +++++++++-
36
include/block/aio.h | 57 ------------------------
16
block/crypto.c | 3 ++-
37
util/aio-posix.h | 1 -
17
block/file-posix.c | 2 +-
38
block.c | 7 ---
18
block/file-win32.c | 2 +-
39
block/blkio.c | 15 +++----
19
block/gluster.c | 1 +
40
block/curl.c | 10 ++---
20
block/io.c | 8 +++++++-
41
block/export/fuse.c | 8 ++--
21
block/iscsi.c | 2 +-
42
block/export/vduse-blk.c | 10 ++---
22
block/nfs.c | 3 ++-
43
block/io.c | 2 -
23
block/qcow2.c | 2 +-
44
block/io_uring.c | 4 +-
24
block/qed.c | 1 +
45
block/iscsi.c | 3 +-
25
block/raw-format.c | 2 +-
46
block/linux-aio.c | 4 +-
26
block/rbd.c | 1 +
47
block/nfs.c | 5 +--
27
block/sheepdog.c | 4 ++--
48
block/nvme.c | 8 ++--
28
block/ssh.c | 2 +-
49
block/ssh.c | 4 +-
29
tests/test-block-iothread.c | 3 ++-
50
block/win32-aio.c | 6 +--
30
15 files changed, 33 insertions(+), 13 deletions(-)
51
hw/i386/kvm/xen_xenstore.c | 2 +-
52
hw/virtio/virtio.c | 6 +--
53
hw/xen/xen-bus.c | 8 ++--
54
io/channel-command.c | 6 +--
55
io/channel-file.c | 3 +-
56
io/channel-socket.c | 3 +-
57
migration/rdma.c | 16 +++----
58
tests/unit/test-aio.c | 27 +-----------
59
tests/unit/test-bdrv-drain.c | 1 -
60
tests/unit/test-fdmon-epoll.c | 73 -------------------------------
61
tests/unit/test-nested-aio-poll.c | 9 ++--
62
util/aio-posix.c | 20 +++------
63
util/aio-win32.c | 8 +---
64
util/async.c | 3 +-
65
util/fdmon-epoll.c | 10 -----
66
util/fdmon-io_uring.c | 8 +---
67
util/fdmon-poll.c | 3 +-
68
util/main-loop.c | 7 ++-
69
util/qemu-coroutine-io.c | 7 ++-
70
util/vhost-user-server.c | 11 +++--
71
tests/unit/meson.build | 3 --
72
36 files changed, 80 insertions(+), 298 deletions(-)
73
delete mode 100644 tests/unit/test-fdmon-epoll.c
31
74
32
diff --git a/include/block/block_int.h b/include/block/block_int.h
75
diff --git a/include/block/aio.h b/include/block/aio.h
33
index XXXXXXX..XXXXXXX 100644
76
index XXXXXXX..XXXXXXX 100644
34
--- a/include/block/block_int.h
77
--- a/include/block/aio.h
35
+++ b/include/block/block_int.h
78
+++ b/include/block/aio.h
36
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
79
@@ -XXX,XX +XXX,XX @@ struct AioContext {
37
*/
80
*/
38
int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
81
QEMUTimerListGroup tlg;
39
bool exact, PreallocMode prealloc,
82
40
- Error **errp);
83
- int external_disable_cnt;
41
+ BdrvRequestFlags flags, Error **errp);
84
-
42
85
/* Number of AioHandlers without .io_poll() */
43
int64_t (*bdrv_getlength)(BlockDriverState *bs);
86
int poll_disable_cnt;
44
bool has_variable_length;
87
45
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
88
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking);
46
/* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
89
*/
47
* BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
90
void aio_set_fd_handler(AioContext *ctx,
48
unsigned int supported_zero_flags;
91
int fd,
49
+ /*
92
- bool is_external,
50
+ * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
93
IOHandler *io_read,
51
+ *
94
IOHandler *io_write,
52
+ * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
95
AioPollFn *io_poll,
53
+ * that any added space reads as all zeros. If this can't be guaranteed,
96
@@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx,
54
+ * the operation must fail.
97
*/
55
+ */
98
void aio_set_event_notifier(AioContext *ctx,
56
+ unsigned int supported_truncate_flags;
99
EventNotifier *notifier,
57
100
- bool is_external,
58
/* the following member gives a name to every node on the bs graph. */
101
EventNotifierHandler *io_read,
59
char node_name[32];
102
AioPollFn *io_poll,
60
diff --git a/block/crypto.c b/block/crypto.c
103
EventNotifierHandler *io_poll_ready);
61
index XXXXXXX..XXXXXXX 100644
104
@@ -XXX,XX +XXX,XX @@ static inline void aio_timer_init(AioContext *ctx,
62
--- a/block/crypto.c
105
*/
63
+++ b/block/crypto.c
106
int64_t aio_compute_timeout(AioContext *ctx);
64
@@ -XXX,XX +XXX,XX @@ static int block_crypto_co_create_generic(BlockDriverState *bs,
107
65
108
-/**
66
static int coroutine_fn
109
- * aio_disable_external:
67
block_crypto_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
110
- * @ctx: the aio context
68
- PreallocMode prealloc, Error **errp)
111
- *
69
+ PreallocMode prealloc, BdrvRequestFlags flags,
112
- * Disable the further processing of external clients.
70
+ Error **errp)
113
- */
71
{
114
-static inline void aio_disable_external(AioContext *ctx)
72
BlockCrypto *crypto = bs->opaque;
115
-{
73
uint64_t payload_offset =
116
- qatomic_inc(&ctx->external_disable_cnt);
74
diff --git a/block/file-posix.c b/block/file-posix.c
117
-}
75
index XXXXXXX..XXXXXXX 100644
118
-
76
--- a/block/file-posix.c
119
-/**
77
+++ b/block/file-posix.c
120
- * aio_enable_external:
78
@@ -XXX,XX +XXX,XX @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset,
121
- * @ctx: the aio context
79
122
- *
80
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
123
- * Enable the processing of external clients.
81
bool exact, PreallocMode prealloc,
124
- */
82
- Error **errp)
125
-static inline void aio_enable_external(AioContext *ctx)
83
+ BdrvRequestFlags flags, Error **errp)
126
-{
84
{
127
- int old;
85
BDRVRawState *s = bs->opaque;
128
-
86
struct stat st;
129
- old = qatomic_fetch_dec(&ctx->external_disable_cnt);
87
diff --git a/block/file-win32.c b/block/file-win32.c
130
- assert(old > 0);
88
index XXXXXXX..XXXXXXX 100644
131
- if (old == 1) {
89
--- a/block/file-win32.c
132
- /* Kick event loop so it re-arms file descriptors */
90
+++ b/block/file-win32.c
133
- aio_notify(ctx);
91
@@ -XXX,XX +XXX,XX @@ static void raw_close(BlockDriverState *bs)
134
- }
92
135
-}
93
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
136
-
94
bool exact, PreallocMode prealloc,
137
-/**
95
- Error **errp)
138
- * aio_external_disabled:
96
+ BdrvRequestFlags flags, Error **errp)
139
- * @ctx: the aio context
97
{
140
- *
98
BDRVRawState *s = bs->opaque;
141
- * Return true if the external clients are disabled.
99
LONG low, high;
142
- */
100
diff --git a/block/gluster.c b/block/gluster.c
143
-static inline bool aio_external_disabled(AioContext *ctx)
101
index XXXXXXX..XXXXXXX 100644
144
-{
102
--- a/block/gluster.c
145
- return qatomic_read(&ctx->external_disable_cnt);
103
+++ b/block/gluster.c
146
-}
104
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qemu_gluster_co_truncate(BlockDriverState *bs,
147
-
105
int64_t offset,
148
-/**
106
bool exact,
149
- * aio_node_check:
107
PreallocMode prealloc,
150
- * @ctx: the aio context
108
+ BdrvRequestFlags flags,
151
- * @is_external: Whether or not the checked node is an external event source.
109
Error **errp)
152
- *
110
{
153
- * Check if the node's is_external flag is okay to be polled by the ctx at this
111
BDRVGlusterState *s = bs->opaque;
154
- * moment. True means green light.
155
- */
156
-static inline bool aio_node_check(AioContext *ctx, bool is_external)
157
-{
158
- return !is_external || !qatomic_read(&ctx->external_disable_cnt);
159
-}
160
-
161
/**
162
* aio_co_schedule:
163
* @ctx: the aio context
164
diff --git a/util/aio-posix.h b/util/aio-posix.h
165
index XXXXXXX..XXXXXXX 100644
166
--- a/util/aio-posix.h
167
+++ b/util/aio-posix.h
168
@@ -XXX,XX +XXX,XX @@ struct AioHandler {
169
#endif
170
int64_t poll_idle_timeout; /* when to stop userspace polling */
171
bool poll_ready; /* has polling detected an event? */
172
- bool is_external;
173
};
174
175
/* Add a handler to a ready list */
176
diff --git a/block.c b/block.c
177
index XXXXXXX..XXXXXXX 100644
178
--- a/block.c
179
+++ b/block.c
180
@@ -XXX,XX +XXX,XX @@ static void bdrv_detach_aio_context(BlockDriverState *bs)
181
bs->drv->bdrv_detach_aio_context(bs);
182
}
183
184
- if (bs->quiesce_counter) {
185
- aio_enable_external(bs->aio_context);
186
- }
187
bs->aio_context = NULL;
188
}
189
190
@@ -XXX,XX +XXX,XX @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
191
BdrvAioNotifier *ban, *ban_tmp;
192
GLOBAL_STATE_CODE();
193
194
- if (bs->quiesce_counter) {
195
- aio_disable_external(new_context);
196
- }
197
-
198
bs->aio_context = new_context;
199
200
if (bs->drv && bs->drv->bdrv_attach_aio_context) {
201
diff --git a/block/blkio.c b/block/blkio.c
202
index XXXXXXX..XXXXXXX 100644
203
--- a/block/blkio.c
204
+++ b/block/blkio.c
205
@@ -XXX,XX +XXX,XX @@ static void blkio_attach_aio_context(BlockDriverState *bs,
206
{
207
BDRVBlkioState *s = bs->opaque;
208
209
- aio_set_fd_handler(new_context,
210
- s->completion_fd,
211
- false,
212
- blkio_completion_fd_read,
213
- NULL,
214
+ aio_set_fd_handler(new_context, s->completion_fd,
215
+ blkio_completion_fd_read, NULL,
216
blkio_completion_fd_poll,
217
- blkio_completion_fd_poll_ready,
218
- bs);
219
+ blkio_completion_fd_poll_ready, bs);
220
}
221
222
static void blkio_detach_aio_context(BlockDriverState *bs)
223
{
224
BDRVBlkioState *s = bs->opaque;
225
226
- aio_set_fd_handler(bdrv_get_aio_context(bs),
227
- s->completion_fd,
228
- false, NULL, NULL, NULL, NULL, NULL);
229
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->completion_fd, NULL, NULL,
230
+ NULL, NULL, NULL);
231
}
232
233
/* Call with s->blkio_lock held to submit I/O after enqueuing a new request */
234
diff --git a/block/curl.c b/block/curl.c
235
index XXXXXXX..XXXXXXX 100644
236
--- a/block/curl.c
237
+++ b/block/curl.c
238
@@ -XXX,XX +XXX,XX @@ static gboolean curl_drop_socket(void *key, void *value, void *opaque)
239
CURLSocket *socket = value;
240
BDRVCURLState *s = socket->s;
241
242
- aio_set_fd_handler(s->aio_context, socket->fd, false,
243
+ aio_set_fd_handler(s->aio_context, socket->fd,
244
NULL, NULL, NULL, NULL, NULL);
245
return true;
246
}
247
@@ -XXX,XX +XXX,XX @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
248
trace_curl_sock_cb(action, (int)fd);
249
switch (action) {
250
case CURL_POLL_IN:
251
- aio_set_fd_handler(s->aio_context, fd, false,
252
+ aio_set_fd_handler(s->aio_context, fd,
253
curl_multi_do, NULL, NULL, NULL, socket);
254
break;
255
case CURL_POLL_OUT:
256
- aio_set_fd_handler(s->aio_context, fd, false,
257
+ aio_set_fd_handler(s->aio_context, fd,
258
NULL, curl_multi_do, NULL, NULL, socket);
259
break;
260
case CURL_POLL_INOUT:
261
- aio_set_fd_handler(s->aio_context, fd, false,
262
+ aio_set_fd_handler(s->aio_context, fd,
263
curl_multi_do, curl_multi_do,
264
NULL, NULL, socket);
265
break;
266
case CURL_POLL_REMOVE:
267
- aio_set_fd_handler(s->aio_context, fd, false,
268
+ aio_set_fd_handler(s->aio_context, fd,
269
NULL, NULL, NULL, NULL, NULL);
270
break;
271
}
272
diff --git a/block/export/fuse.c b/block/export/fuse.c
273
index XXXXXXX..XXXXXXX 100644
274
--- a/block/export/fuse.c
275
+++ b/block/export/fuse.c
276
@@ -XXX,XX +XXX,XX @@ static void fuse_export_drained_begin(void *opaque)
277
FuseExport *exp = opaque;
278
279
aio_set_fd_handler(exp->common.ctx,
280
- fuse_session_fd(exp->fuse_session), false,
281
+ fuse_session_fd(exp->fuse_session),
282
NULL, NULL, NULL, NULL, NULL);
283
exp->fd_handler_set_up = false;
284
}
285
@@ -XXX,XX +XXX,XX @@ static void fuse_export_drained_end(void *opaque)
286
exp->common.ctx = blk_get_aio_context(exp->common.blk);
287
288
aio_set_fd_handler(exp->common.ctx,
289
- fuse_session_fd(exp->fuse_session), false,
290
+ fuse_session_fd(exp->fuse_session),
291
read_from_fuse_export, NULL, NULL, NULL, exp);
292
exp->fd_handler_set_up = true;
293
}
294
@@ -XXX,XX +XXX,XX @@ static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
295
g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
296
297
aio_set_fd_handler(exp->common.ctx,
298
- fuse_session_fd(exp->fuse_session), false,
299
+ fuse_session_fd(exp->fuse_session),
300
read_from_fuse_export, NULL, NULL, NULL, exp);
301
exp->fd_handler_set_up = true;
302
303
@@ -XXX,XX +XXX,XX @@ static void fuse_export_shutdown(BlockExport *blk_exp)
304
305
if (exp->fd_handler_set_up) {
306
aio_set_fd_handler(exp->common.ctx,
307
- fuse_session_fd(exp->fuse_session), false,
308
+ fuse_session_fd(exp->fuse_session),
309
NULL, NULL, NULL, NULL, NULL);
310
exp->fd_handler_set_up = false;
311
}
312
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
313
index XXXXXXX..XXXXXXX 100644
314
--- a/block/export/vduse-blk.c
315
+++ b/block/export/vduse-blk.c
316
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
317
}
318
319
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
320
- false, on_vduse_vq_kick, NULL, NULL, NULL, vq);
321
+ on_vduse_vq_kick, NULL, NULL, NULL, vq);
322
/* Make sure we don't miss any kick afer reconnecting */
323
eventfd_write(vduse_queue_get_fd(vq), 1);
324
}
325
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
326
return;
327
}
328
329
- aio_set_fd_handler(vblk_exp->export.ctx, fd, false,
330
+ aio_set_fd_handler(vblk_exp->export.ctx, fd,
331
NULL, NULL, NULL, NULL, NULL);
332
}
333
334
@@ -XXX,XX +XXX,XX @@ static void on_vduse_dev_kick(void *opaque)
335
static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
336
{
337
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
338
- false, on_vduse_dev_kick, NULL, NULL, NULL,
339
+ on_vduse_dev_kick, NULL, NULL, NULL,
340
vblk_exp->dev);
341
342
/* Virtqueues are handled by vduse_blk_drained_end() */
343
@@ -XXX,XX +XXX,XX @@ static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
344
static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
345
{
346
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
347
- false, NULL, NULL, NULL, NULL, NULL);
348
+ NULL, NULL, NULL, NULL, NULL);
349
350
/* Virtqueues are handled by vduse_blk_drained_begin() */
351
}
352
@@ -XXX,XX +XXX,XX @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
353
vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
354
}
355
356
- aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), false,
357
+ aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev),
358
on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
359
360
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
112
diff --git a/block/io.c b/block/io.c
361
diff --git a/block/io.c b/block/io.c
113
index XXXXXXX..XXXXXXX 100644
362
index XXXXXXX..XXXXXXX 100644
114
--- a/block/io.c
363
--- a/block/io.c
115
+++ b/block/io.c
364
+++ b/block/io.c
116
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
365
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
117
BlockDriverState *bs = child->bs;
366
118
BlockDriver *drv = bs->drv;
367
/* Stop things in parent-to-child order */
119
BdrvTrackedRequest req;
368
if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
120
+ BdrvRequestFlags flags = 0;
369
- aio_disable_external(bdrv_get_aio_context(bs));
121
int64_t old_size, new_bytes;
370
bdrv_parent_drained_begin(bs, parent);
122
int ret;
371
if (bs->drv && bs->drv->bdrv_drain_begin) {
123
372
bs->drv->bdrv_drain_begin(bs);
124
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
373
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
125
}
374
bs->drv->bdrv_drain_end(bs);
126
375
}
127
if (drv->bdrv_co_truncate) {
376
bdrv_parent_drained_end(bs, parent);
128
- ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, errp);
377
- aio_enable_external(bdrv_get_aio_context(bs));
129
+ if (flags & ~bs->supported_truncate_flags) {
378
}
130
+ error_setg(errp, "Block driver does not support requested flags");
379
}
131
+ ret = -ENOTSUP;
380
132
+ goto out;
381
diff --git a/block/io_uring.c b/block/io_uring.c
133
+ }
382
index XXXXXXX..XXXXXXX 100644
134
+ ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
383
--- a/block/io_uring.c
135
} else if (bs->file && drv->is_filter) {
384
+++ b/block/io_uring.c
136
ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, errp);
385
@@ -XXX,XX +XXX,XX @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
137
} else {
386
387
void luring_detach_aio_context(LuringState *s, AioContext *old_context)
388
{
389
- aio_set_fd_handler(old_context, s->ring.ring_fd, false,
390
+ aio_set_fd_handler(old_context, s->ring.ring_fd,
391
NULL, NULL, NULL, NULL, s);
392
qemu_bh_delete(s->completion_bh);
393
s->aio_context = NULL;
394
@@ -XXX,XX +XXX,XX @@ void luring_attach_aio_context(LuringState *s, AioContext *new_context)
395
{
396
s->aio_context = new_context;
397
s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s);
398
- aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false,
399
+ aio_set_fd_handler(s->aio_context, s->ring.ring_fd,
400
qemu_luring_completion_cb, NULL,
401
qemu_luring_poll_cb, qemu_luring_poll_ready, s);
402
}
138
diff --git a/block/iscsi.c b/block/iscsi.c
403
diff --git a/block/iscsi.c b/block/iscsi.c
139
index XXXXXXX..XXXXXXX 100644
404
index XXXXXXX..XXXXXXX 100644
140
--- a/block/iscsi.c
405
--- a/block/iscsi.c
141
+++ b/block/iscsi.c
406
+++ b/block/iscsi.c
142
@@ -XXX,XX +XXX,XX @@ static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
407
@@ -XXX,XX +XXX,XX @@ iscsi_set_events(IscsiLun *iscsilun)
143
408
144
static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset,
409
if (ev != iscsilun->events) {
145
bool exact, PreallocMode prealloc,
410
aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
146
- Error **errp)
411
- false,
147
+ BdrvRequestFlags flags, Error **errp)
412
(ev & POLLIN) ? iscsi_process_read : NULL,
148
{
413
(ev & POLLOUT) ? iscsi_process_write : NULL,
414
NULL, NULL,
415
@@ -XXX,XX +XXX,XX @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
149
IscsiLun *iscsilun = bs->opaque;
416
IscsiLun *iscsilun = bs->opaque;
150
int64_t cur_length;
417
418
aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
419
- false, NULL, NULL, NULL, NULL, NULL);
420
+ NULL, NULL, NULL, NULL, NULL);
421
iscsilun->events = 0;
422
423
if (iscsilun->nop_timer) {
424
diff --git a/block/linux-aio.c b/block/linux-aio.c
425
index XXXXXXX..XXXXXXX 100644
426
--- a/block/linux-aio.c
427
+++ b/block/linux-aio.c
428
@@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
429
430
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
431
{
432
- aio_set_event_notifier(old_context, &s->e, false, NULL, NULL, NULL);
433
+ aio_set_event_notifier(old_context, &s->e, NULL, NULL, NULL);
434
qemu_bh_delete(s->completion_bh);
435
s->aio_context = NULL;
436
}
437
@@ -XXX,XX +XXX,XX @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
438
{
439
s->aio_context = new_context;
440
s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
441
- aio_set_event_notifier(new_context, &s->e, false,
442
+ aio_set_event_notifier(new_context, &s->e,
443
qemu_laio_completion_cb,
444
qemu_laio_poll_cb,
445
qemu_laio_poll_ready);
151
diff --git a/block/nfs.c b/block/nfs.c
446
diff --git a/block/nfs.c b/block/nfs.c
152
index XXXXXXX..XXXXXXX 100644
447
index XXXXXXX..XXXXXXX 100644
153
--- a/block/nfs.c
448
--- a/block/nfs.c
154
+++ b/block/nfs.c
449
+++ b/block/nfs.c
155
@@ -XXX,XX +XXX,XX @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
450
@@ -XXX,XX +XXX,XX @@ static void nfs_set_events(NFSClient *client)
156
451
int ev = nfs_which_events(client->context);
157
static int coroutine_fn
452
if (ev != client->events) {
158
nfs_file_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
453
aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
159
- PreallocMode prealloc, Error **errp)
454
- false,
160
+ PreallocMode prealloc, BdrvRequestFlags flags,
455
(ev & POLLIN) ? nfs_process_read : NULL,
161
+ Error **errp)
456
(ev & POLLOUT) ? nfs_process_write : NULL,
162
{
457
NULL, NULL, client);
458
@@ -XXX,XX +XXX,XX @@ static void nfs_detach_aio_context(BlockDriverState *bs)
163
NFSClient *client = bs->opaque;
459
NFSClient *client = bs->opaque;
164
int ret;
460
165
diff --git a/block/qcow2.c b/block/qcow2.c
461
aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
166
index XXXXXXX..XXXXXXX 100644
462
- false, NULL, NULL, NULL, NULL, NULL);
167
--- a/block/qcow2.c
463
+ NULL, NULL, NULL, NULL, NULL);
168
+++ b/block/qcow2.c
464
client->events = 0;
169
@@ -XXX,XX +XXX,XX @@ fail:
465
}
170
466
171
static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
467
@@ -XXX,XX +XXX,XX @@ static void nfs_client_close(NFSClient *client)
172
bool exact, PreallocMode prealloc,
468
if (client->context) {
173
- Error **errp)
469
qemu_mutex_lock(&client->mutex);
174
+ BdrvRequestFlags flags, Error **errp)
470
aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
175
{
471
- false, NULL, NULL, NULL, NULL, NULL);
176
BDRVQcow2State *s = bs->opaque;
472
+ NULL, NULL, NULL, NULL, NULL);
177
uint64_t old_length;
473
qemu_mutex_unlock(&client->mutex);
178
diff --git a/block/qed.c b/block/qed.c
474
if (client->fh) {
179
index XXXXXXX..XXXXXXX 100644
475
nfs_close(client->context, client->fh);
180
--- a/block/qed.c
476
diff --git a/block/nvme.c b/block/nvme.c
181
+++ b/block/qed.c
477
index XXXXXXX..XXXXXXX 100644
182
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_truncate(BlockDriverState *bs,
478
--- a/block/nvme.c
183
int64_t offset,
479
+++ b/block/nvme.c
184
bool exact,
480
@@ -XXX,XX +XXX,XX @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
185
PreallocMode prealloc,
481
}
186
+ BdrvRequestFlags flags,
482
aio_set_event_notifier(bdrv_get_aio_context(bs),
187
Error **errp)
483
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
188
{
484
- false, nvme_handle_event, nvme_poll_cb,
189
BDRVQEDState *s = bs->opaque;
485
+ nvme_handle_event, nvme_poll_cb,
190
diff --git a/block/raw-format.c b/block/raw-format.c
486
nvme_poll_ready);
191
index XXXXXXX..XXXXXXX 100644
487
192
--- a/block/raw-format.c
488
if (!nvme_identify(bs, namespace, errp)) {
193
+++ b/block/raw-format.c
489
@@ -XXX,XX +XXX,XX @@ static void nvme_close(BlockDriverState *bs)
194
@@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
490
g_free(s->queues);
195
491
aio_set_event_notifier(bdrv_get_aio_context(bs),
196
static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
492
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
197
bool exact, PreallocMode prealloc,
493
- false, NULL, NULL, NULL);
198
- Error **errp)
494
+ NULL, NULL, NULL);
199
+ BdrvRequestFlags flags, Error **errp)
495
event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
200
{
496
qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map,
201
BDRVRawState *s = bs->opaque;
497
0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE);
202
498
@@ -XXX,XX +XXX,XX @@ static void nvme_detach_aio_context(BlockDriverState *bs)
203
diff --git a/block/rbd.c b/block/rbd.c
499
204
index XXXXXXX..XXXXXXX 100644
500
aio_set_event_notifier(bdrv_get_aio_context(bs),
205
--- a/block/rbd.c
501
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
206
+++ b/block/rbd.c
502
- false, NULL, NULL, NULL);
207
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
503
+ NULL, NULL, NULL);
208
int64_t offset,
504
}
209
bool exact,
505
210
PreallocMode prealloc,
506
static void nvme_attach_aio_context(BlockDriverState *bs,
211
+ BdrvRequestFlags flags,
507
@@ -XXX,XX +XXX,XX @@ static void nvme_attach_aio_context(BlockDriverState *bs,
212
Error **errp)
508
213
{
509
s->aio_context = new_context;
214
int r;
510
aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
215
diff --git a/block/sheepdog.c b/block/sheepdog.c
511
- false, nvme_handle_event, nvme_poll_cb,
216
index XXXXXXX..XXXXXXX 100644
512
+ nvme_handle_event, nvme_poll_cb,
217
--- a/block/sheepdog.c
513
nvme_poll_ready);
218
+++ b/block/sheepdog.c
514
219
@@ -XXX,XX +XXX,XX @@ static int64_t sd_getlength(BlockDriverState *bs)
515
for (unsigned i = 0; i < s->queue_count; i++) {
220
221
static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset,
222
bool exact, PreallocMode prealloc,
223
- Error **errp)
224
+ BdrvRequestFlags flags, Error **errp)
225
{
226
BDRVSheepdogState *s = bs->opaque;
227
int ret, fd;
228
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
229
230
assert(!flags);
231
if (offset > s->inode.vdi_size) {
232
- ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, NULL);
233
+ ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL);
234
if (ret < 0) {
235
return ret;
236
}
237
diff --git a/block/ssh.c b/block/ssh.c
516
diff --git a/block/ssh.c b/block/ssh.c
238
index XXXXXXX..XXXXXXX 100644
517
index XXXXXXX..XXXXXXX 100644
239
--- a/block/ssh.c
518
--- a/block/ssh.c
240
+++ b/block/ssh.c
519
+++ b/block/ssh.c
241
@@ -XXX,XX +XXX,XX @@ static int64_t ssh_getlength(BlockDriverState *bs)
520
@@ -XXX,XX +XXX,XX @@ static void restart_coroutine(void *opaque)
242
521
AioContext *ctx = bdrv_get_aio_context(bs);
243
static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
522
244
bool exact, PreallocMode prealloc,
523
trace_ssh_restart_coroutine(restart->co);
245
- Error **errp)
524
- aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL, NULL);
246
+ BdrvRequestFlags flags, Error **errp)
525
+ aio_set_fd_handler(ctx, s->sock, NULL, NULL, NULL, NULL, NULL);
247
{
526
248
BDRVSSHState *s = bs->opaque;
527
aio_co_wake(restart->co);
249
528
}
250
diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c
529
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
251
index XXXXXXX..XXXXXXX 100644
530
trace_ssh_co_yield(s->sock, rd_handler, wr_handler);
252
--- a/tests/test-block-iothread.c
531
253
+++ b/tests/test-block-iothread.c
532
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
254
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_test_co_pdiscard(BlockDriverState *bs,
533
- false, rd_handler, wr_handler, NULL, NULL, &restart);
255
534
+ rd_handler, wr_handler, NULL, NULL, &restart);
256
static int coroutine_fn
535
qemu_coroutine_yield();
257
bdrv_test_co_truncate(BlockDriverState *bs, int64_t offset, bool exact,
536
trace_ssh_co_yield_back(s->sock);
258
- PreallocMode prealloc, Error **errp)
537
}
259
+ PreallocMode prealloc, BdrvRequestFlags flags,
538
diff --git a/block/win32-aio.c b/block/win32-aio.c
260
+ Error **errp)
539
index XXXXXXX..XXXXXXX 100644
261
{
540
--- a/block/win32-aio.c
262
return 0;
541
+++ b/block/win32-aio.c
263
}
542
@@ -XXX,XX +XXX,XX @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
543
void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
544
AioContext *old_context)
545
{
546
- aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL, NULL);
547
+ aio_set_event_notifier(old_context, &aio->e, NULL, NULL, NULL);
548
aio->aio_ctx = NULL;
549
}
550
551
@@ -XXX,XX +XXX,XX @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
552
AioContext *new_context)
553
{
554
aio->aio_ctx = new_context;
555
- aio_set_event_notifier(new_context, &aio->e, false,
556
- win32_aio_completion_cb, NULL, NULL);
557
+ aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb,
558
+ NULL, NULL);
559
}
560
561
QEMUWin32AIOState *win32_aio_init(void)
562
diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
563
index XXXXXXX..XXXXXXX 100644
564
--- a/hw/i386/kvm/xen_xenstore.c
565
+++ b/hw/i386/kvm/xen_xenstore.c
566
@@ -XXX,XX +XXX,XX @@ static void xen_xenstore_realize(DeviceState *dev, Error **errp)
567
error_setg(errp, "Xenstore evtchn port init failed");
568
return;
569
}
570
- aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh), false,
571
+ aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh),
572
xen_xenstore_event, NULL, NULL, NULL, s);
573
574
s->impl = xs_impl_create(xen_domid);
575
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
576
index XXXXXXX..XXXXXXX 100644
577
--- a/hw/virtio/virtio.c
578
+++ b/hw/virtio/virtio.c
579
@@ -XXX,XX +XXX,XX @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
580
581
void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
582
{
583
- aio_set_event_notifier(ctx, &vq->host_notifier, false,
584
+ aio_set_event_notifier(ctx, &vq->host_notifier,
585
virtio_queue_host_notifier_read,
586
virtio_queue_host_notifier_aio_poll,
587
virtio_queue_host_notifier_aio_poll_ready);
588
@@ -XXX,XX +XXX,XX @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx)
589
*/
590
void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx)
591
{
592
- aio_set_event_notifier(ctx, &vq->host_notifier, false,
593
+ aio_set_event_notifier(ctx, &vq->host_notifier,
594
virtio_queue_host_notifier_read,
595
NULL, NULL);
596
}
597
598
void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
599
{
600
- aio_set_event_notifier(ctx, &vq->host_notifier, false, NULL, NULL, NULL);
601
+ aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL);
602
}
603
604
void virtio_queue_host_notifier_read(EventNotifier *n)
605
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
606
index XXXXXXX..XXXXXXX 100644
607
--- a/hw/xen/xen-bus.c
608
+++ b/hw/xen/xen-bus.c
609
@@ -XXX,XX +XXX,XX @@ void xen_device_set_event_channel_context(XenDevice *xendev,
610
}
611
612
if (channel->ctx)
613
- aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false,
614
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh),
615
NULL, NULL, NULL, NULL, NULL);
616
617
channel->ctx = ctx;
618
if (ctx) {
619
aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh),
620
- false, xen_device_event, NULL, xen_device_poll,
621
- NULL, channel);
622
+ xen_device_event, NULL, xen_device_poll, NULL,
623
+ channel);
624
}
625
}
626
627
@@ -XXX,XX +XXX,XX @@ void xen_device_unbind_event_channel(XenDevice *xendev,
628
629
QLIST_REMOVE(channel, list);
630
631
- aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), false,
632
+ aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh),
633
NULL, NULL, NULL, NULL, NULL);
634
635
if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) {
636
diff --git a/io/channel-command.c b/io/channel-command.c
637
index XXXXXXX..XXXXXXX 100644
638
--- a/io/channel-command.c
639
+++ b/io/channel-command.c
640
@@ -XXX,XX +XXX,XX @@ static void qio_channel_command_set_aio_fd_handler(QIOChannel *ioc,
641
void *opaque)
642
{
643
QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
644
- aio_set_fd_handler(ctx, cioc->readfd, false,
645
- io_read, NULL, NULL, NULL, opaque);
646
- aio_set_fd_handler(ctx, cioc->writefd, false,
647
- NULL, io_write, NULL, NULL, opaque);
648
+ aio_set_fd_handler(ctx, cioc->readfd, io_read, NULL, NULL, NULL, opaque);
649
+ aio_set_fd_handler(ctx, cioc->writefd, NULL, io_write, NULL, NULL, opaque);
650
}
651
652
653
diff --git a/io/channel-file.c b/io/channel-file.c
654
index XXXXXXX..XXXXXXX 100644
655
--- a/io/channel-file.c
656
+++ b/io/channel-file.c
657
@@ -XXX,XX +XXX,XX @@ static void qio_channel_file_set_aio_fd_handler(QIOChannel *ioc,
658
void *opaque)
659
{
660
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
661
- aio_set_fd_handler(ctx, fioc->fd, false, io_read, io_write,
662
- NULL, NULL, opaque);
663
+ aio_set_fd_handler(ctx, fioc->fd, io_read, io_write, NULL, NULL, opaque);
664
}
665
666
static GSource *qio_channel_file_create_watch(QIOChannel *ioc,
667
diff --git a/io/channel-socket.c b/io/channel-socket.c
668
index XXXXXXX..XXXXXXX 100644
669
--- a/io/channel-socket.c
670
+++ b/io/channel-socket.c
671
@@ -XXX,XX +XXX,XX @@ static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc,
672
void *opaque)
673
{
674
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
675
- aio_set_fd_handler(ctx, sioc->fd, false,
676
- io_read, io_write, NULL, NULL, opaque);
677
+ aio_set_fd_handler(ctx, sioc->fd, io_read, io_write, NULL, NULL, opaque);
678
}
679
680
static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
681
diff --git a/migration/rdma.c b/migration/rdma.c
682
index XXXXXXX..XXXXXXX 100644
683
--- a/migration/rdma.c
684
+++ b/migration/rdma.c
685
@@ -XXX,XX +XXX,XX @@ static void qio_channel_rdma_set_aio_fd_handler(QIOChannel *ioc,
686
{
687
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
688
if (io_read) {
689
- aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd,
690
- false, io_read, io_write, NULL, NULL, opaque);
691
- aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd,
692
- false, io_read, io_write, NULL, NULL, opaque);
693
+ aio_set_fd_handler(ctx, rioc->rdmain->recv_comp_channel->fd, io_read,
694
+ io_write, NULL, NULL, opaque);
695
+ aio_set_fd_handler(ctx, rioc->rdmain->send_comp_channel->fd, io_read,
696
+ io_write, NULL, NULL, opaque);
697
} else {
698
- aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd,
699
- false, io_read, io_write, NULL, NULL, opaque);
700
- aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd,
701
- false, io_read, io_write, NULL, NULL, opaque);
702
+ aio_set_fd_handler(ctx, rioc->rdmaout->recv_comp_channel->fd, io_read,
703
+ io_write, NULL, NULL, opaque);
704
+ aio_set_fd_handler(ctx, rioc->rdmaout->send_comp_channel->fd, io_read,
705
+ io_write, NULL, NULL, opaque);
706
}
707
}
708
709
diff --git a/tests/unit/test-aio.c b/tests/unit/test-aio.c
710
index XXXXXXX..XXXXXXX 100644
711
--- a/tests/unit/test-aio.c
712
+++ b/tests/unit/test-aio.c
713
@@ -XXX,XX +XXX,XX @@ static void *test_acquire_thread(void *opaque)
714
static void set_event_notifier(AioContext *ctx, EventNotifier *notifier,
715
EventNotifierHandler *handler)
716
{
717
- aio_set_event_notifier(ctx, notifier, false, handler, NULL, NULL);
718
+ aio_set_event_notifier(ctx, notifier, handler, NULL, NULL);
719
}
720
721
static void dummy_notifier_read(EventNotifier *n)
722
@@ -XXX,XX +XXX,XX @@ static void test_flush_event_notifier(void)
723
event_notifier_cleanup(&data.e);
724
}
725
726
-static void test_aio_external_client(void)
727
-{
728
- int i, j;
729
-
730
- for (i = 1; i < 3; i++) {
731
- EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true };
732
- event_notifier_init(&data.e, false);
733
- aio_set_event_notifier(ctx, &data.e, true, event_ready_cb, NULL, NULL);
734
- event_notifier_set(&data.e);
735
- for (j = 0; j < i; j++) {
736
- aio_disable_external(ctx);
737
- }
738
- for (j = 0; j < i; j++) {
739
- assert(!aio_poll(ctx, false));
740
- assert(event_notifier_test_and_clear(&data.e));
741
- event_notifier_set(&data.e);
742
- aio_enable_external(ctx);
743
- }
744
- assert(aio_poll(ctx, false));
745
- set_event_notifier(ctx, &data.e, NULL);
746
- event_notifier_cleanup(&data.e);
747
- }
748
-}
749
-
750
static void test_wait_event_notifier_noflush(void)
751
{
752
EventNotifierTestData data = { .n = 0 };
753
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
754
g_test_add_func("/aio/event/wait", test_wait_event_notifier);
755
g_test_add_func("/aio/event/wait/no-flush-cb", test_wait_event_notifier_noflush);
756
g_test_add_func("/aio/event/flush", test_flush_event_notifier);
757
- g_test_add_func("/aio/external-client", test_aio_external_client);
758
g_test_add_func("/aio/timer/schedule", test_timer_schedule);
759
760
g_test_add_func("/aio/coroutine/queue-chaining", test_queue_chaining);
761
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
762
index XXXXXXX..XXXXXXX 100644
763
--- a/tests/unit/test-bdrv-drain.c
764
+++ b/tests/unit/test-bdrv-drain.c
765
@@ -XXX,XX +XXX,XX @@ static void test_graph_change_drain_all(void)
766
767
g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
768
g_assert_cmpint(b_s->drain_count, ==, 0);
769
- g_assert_cmpint(qemu_get_aio_context()->external_disable_cnt, ==, 0);
770
771
bdrv_unref(bs_b);
772
blk_unref(blk_b);
773
diff --git a/tests/unit/test-fdmon-epoll.c b/tests/unit/test-fdmon-epoll.c
774
deleted file mode 100644
775
index XXXXXXX..XXXXXXX
776
--- a/tests/unit/test-fdmon-epoll.c
777
+++ /dev/null
778
@@ -XXX,XX +XXX,XX @@
779
-/* SPDX-License-Identifier: GPL-2.0-or-later */
780
-/*
781
- * fdmon-epoll tests
782
- *
783
- * Copyright (c) 2020 Red Hat, Inc.
784
- */
785
-
786
-#include "qemu/osdep.h"
787
-#include "block/aio.h"
788
-#include "qapi/error.h"
789
-#include "qemu/main-loop.h"
790
-
791
-static AioContext *ctx;
792
-
793
-static void dummy_fd_handler(EventNotifier *notifier)
794
-{
795
- event_notifier_test_and_clear(notifier);
796
-}
797
-
798
-static void add_event_notifiers(EventNotifier *notifiers, size_t n)
799
-{
800
- for (size_t i = 0; i < n; i++) {
801
- event_notifier_init(&notifiers[i], false);
802
- aio_set_event_notifier(ctx, &notifiers[i], false,
803
- dummy_fd_handler, NULL, NULL);
804
- }
805
-}
806
-
807
-static void remove_event_notifiers(EventNotifier *notifiers, size_t n)
808
-{
809
- for (size_t i = 0; i < n; i++) {
810
- aio_set_event_notifier(ctx, &notifiers[i], false, NULL, NULL, NULL);
811
- event_notifier_cleanup(&notifiers[i]);
812
- }
813
-}
814
-
815
-/* Check that fd handlers work when external clients are disabled */
816
-static void test_external_disabled(void)
817
-{
818
- EventNotifier notifiers[100];
819
-
820
- /* fdmon-epoll is only enabled when many fd handlers are registered */
821
- add_event_notifiers(notifiers, G_N_ELEMENTS(notifiers));
822
-
823
- event_notifier_set(&notifiers[0]);
824
- assert(aio_poll(ctx, true));
825
-
826
- aio_disable_external(ctx);
827
- event_notifier_set(&notifiers[0]);
828
- assert(aio_poll(ctx, true));
829
- aio_enable_external(ctx);
830
-
831
- remove_event_notifiers(notifiers, G_N_ELEMENTS(notifiers));
832
-}
833
-
834
-int main(int argc, char **argv)
835
-{
836
- /*
837
- * This code relies on the fact that fdmon-io_uring disables itself when
838
- * the glib main loop is in use. The main loop uses fdmon-poll and upgrades
839
- * to fdmon-epoll when the number of fds exceeds a threshold.
840
- */
841
- qemu_init_main_loop(&error_fatal);
842
- ctx = qemu_get_aio_context();
843
-
844
- while (g_main_context_iteration(NULL, false)) {
845
- /* Do nothing */
846
- }
847
-
848
- g_test_init(&argc, &argv, NULL);
849
- g_test_add_func("/fdmon-epoll/external-disabled", test_external_disabled);
850
- return g_test_run();
851
-}
852
diff --git a/tests/unit/test-nested-aio-poll.c b/tests/unit/test-nested-aio-poll.c
853
index XXXXXXX..XXXXXXX 100644
854
--- a/tests/unit/test-nested-aio-poll.c
855
+++ b/tests/unit/test-nested-aio-poll.c
856
@@ -XXX,XX +XXX,XX @@ static void test(void)
857
858
/* Make the event notifier active (set) right away */
859
event_notifier_init(&td.poll_notifier, 1);
860
- aio_set_event_notifier(td.ctx, &td.poll_notifier, false,
861
+ aio_set_event_notifier(td.ctx, &td.poll_notifier,
862
io_read, io_poll_true, io_poll_ready);
863
864
/* This event notifier will be used later */
865
event_notifier_init(&td.dummy_notifier, 0);
866
- aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
867
+ aio_set_event_notifier(td.ctx, &td.dummy_notifier,
868
io_read, io_poll_false, io_poll_never_ready);
869
870
/* Consume aio_notify() */
871
@@ -XXX,XX +XXX,XX @@ static void test(void)
872
/* Run io_poll()/io_poll_ready() one more time to show it keeps working */
873
g_assert(aio_poll(td.ctx, true));
874
875
- aio_set_event_notifier(td.ctx, &td.dummy_notifier, false,
876
- NULL, NULL, NULL);
877
- aio_set_event_notifier(td.ctx, &td.poll_notifier, false, NULL, NULL, NULL);
878
+ aio_set_event_notifier(td.ctx, &td.dummy_notifier, NULL, NULL, NULL);
879
+ aio_set_event_notifier(td.ctx, &td.poll_notifier, NULL, NULL, NULL);
880
event_notifier_cleanup(&td.dummy_notifier);
881
event_notifier_cleanup(&td.poll_notifier);
882
aio_context_unref(td.ctx);
883
diff --git a/util/aio-posix.c b/util/aio-posix.c
884
index XXXXXXX..XXXXXXX 100644
885
--- a/util/aio-posix.c
886
+++ b/util/aio-posix.c
887
@@ -XXX,XX +XXX,XX @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
888
889
void aio_set_fd_handler(AioContext *ctx,
890
int fd,
891
- bool is_external,
892
IOHandler *io_read,
893
IOHandler *io_write,
894
AioPollFn *io_poll,
895
@@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx,
896
new_node->io_poll = io_poll;
897
new_node->io_poll_ready = io_poll_ready;
898
new_node->opaque = opaque;
899
- new_node->is_external = is_external;
900
901
if (is_new) {
902
new_node->pfd.fd = fd;
903
@@ -XXX,XX +XXX,XX @@ static void aio_set_fd_poll(AioContext *ctx, int fd,
904
905
void aio_set_event_notifier(AioContext *ctx,
906
EventNotifier *notifier,
907
- bool is_external,
908
EventNotifierHandler *io_read,
909
AioPollFn *io_poll,
910
EventNotifierHandler *io_poll_ready)
911
{
912
- aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
913
+ aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
914
(IOHandler *)io_read, NULL, io_poll,
915
(IOHandler *)io_poll_ready, notifier);
916
}
917
@@ -XXX,XX +XXX,XX @@ bool aio_pending(AioContext *ctx)
918
919
/* TODO should this check poll ready? */
920
revents = node->pfd.revents & node->pfd.events;
921
- if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
922
- aio_node_check(ctx, node->is_external)) {
923
+ if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
924
result = true;
925
break;
926
}
927
- if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
928
- aio_node_check(ctx, node->is_external)) {
929
+ if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
930
result = true;
931
break;
932
}
933
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
934
QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
935
}
936
if (!QLIST_IS_INSERTED(node, node_deleted) &&
937
- poll_ready && revents == 0 &&
938
- aio_node_check(ctx, node->is_external) &&
939
- node->io_poll_ready) {
940
+ poll_ready && revents == 0 && node->io_poll_ready) {
941
/*
942
* Remove temporarily to avoid infinite loops when ->io_poll_ready()
943
* calls aio_poll() before clearing the condition that made the poll
944
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
945
946
if (!QLIST_IS_INSERTED(node, node_deleted) &&
947
(revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
948
- aio_node_check(ctx, node->is_external) &&
949
node->io_read) {
950
node->io_read(node->opaque);
951
952
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
953
}
954
if (!QLIST_IS_INSERTED(node, node_deleted) &&
955
(revents & (G_IO_OUT | G_IO_ERR)) &&
956
- aio_node_check(ctx, node->is_external) &&
957
node->io_write) {
958
node->io_write(node->opaque);
959
progress = true;
960
@@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers_once(AioContext *ctx,
961
AioHandler *tmp;
962
963
QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
964
- if (aio_node_check(ctx, node->is_external) &&
965
- node->io_poll(node->opaque)) {
966
+ if (node->io_poll(node->opaque)) {
967
aio_add_poll_ready_handler(ready_list, node);
968
969
node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
970
diff --git a/util/aio-win32.c b/util/aio-win32.c
971
index XXXXXXX..XXXXXXX 100644
972
--- a/util/aio-win32.c
973
+++ b/util/aio-win32.c
974
@@ -XXX,XX +XXX,XX @@ struct AioHandler {
975
GPollFD pfd;
976
int deleted;
977
void *opaque;
978
- bool is_external;
979
QLIST_ENTRY(AioHandler) node;
980
};
981
982
@@ -XXX,XX +XXX,XX @@ static void aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
983
984
void aio_set_fd_handler(AioContext *ctx,
985
int fd,
986
- bool is_external,
987
IOHandler *io_read,
988
IOHandler *io_write,
989
AioPollFn *io_poll,
990
@@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx,
991
node->opaque = opaque;
992
node->io_read = io_read;
993
node->io_write = io_write;
994
- node->is_external = is_external;
995
996
if (io_read) {
997
bitmask |= FD_READ | FD_ACCEPT | FD_CLOSE;
998
@@ -XXX,XX +XXX,XX @@ void aio_set_fd_handler(AioContext *ctx,
999
1000
void aio_set_event_notifier(AioContext *ctx,
1001
EventNotifier *e,
1002
- bool is_external,
1003
EventNotifierHandler *io_notify,
1004
AioPollFn *io_poll,
1005
EventNotifierHandler *io_poll_ready)
1006
@@ -XXX,XX +XXX,XX @@ void aio_set_event_notifier(AioContext *ctx,
1007
node->e = e;
1008
node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
1009
node->pfd.events = G_IO_IN;
1010
- node->is_external = is_external;
1011
QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, node, node);
1012
1013
g_source_add_poll(&ctx->source, &node->pfd);
1014
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
1015
/* fill fd sets */
1016
count = 0;
1017
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
1018
- if (!node->deleted && node->io_notify
1019
- && aio_node_check(ctx, node->is_external)) {
1020
+ if (!node->deleted && node->io_notify) {
1021
assert(count < MAXIMUM_WAIT_OBJECTS);
1022
events[count++] = event_notifier_get_handle(node->e);
1023
}
1024
diff --git a/util/async.c b/util/async.c
1025
index XXXXXXX..XXXXXXX 100644
1026
--- a/util/async.c
1027
+++ b/util/async.c
1028
@@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source)
1029
g_free(bh);
1030
}
1031
1032
- aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL, NULL);
1033
+ aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL, NULL);
1034
event_notifier_cleanup(&ctx->notifier);
1035
qemu_rec_mutex_destroy(&ctx->lock);
1036
qemu_lockcnt_destroy(&ctx->list_lock);
1037
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
1038
QSLIST_INIT(&ctx->scheduled_coroutines);
1039
1040
aio_set_event_notifier(ctx, &ctx->notifier,
1041
- false,
1042
aio_context_notifier_cb,
1043
aio_context_notifier_poll,
1044
aio_context_notifier_poll_ready);
1045
diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c
1046
index XXXXXXX..XXXXXXX 100644
1047
--- a/util/fdmon-epoll.c
1048
+++ b/util/fdmon-epoll.c
1049
@@ -XXX,XX +XXX,XX @@ static int fdmon_epoll_wait(AioContext *ctx, AioHandlerList *ready_list,
1050
int i, ret = 0;
1051
struct epoll_event events[128];
1052
1053
- /* Fall back while external clients are disabled */
1054
- if (qatomic_read(&ctx->external_disable_cnt)) {
1055
- return fdmon_poll_ops.wait(ctx, ready_list, timeout);
1056
- }
1057
-
1058
if (timeout > 0) {
1059
ret = qemu_poll_ns(&pfd, 1, timeout);
1060
if (ret > 0) {
1061
@@ -XXX,XX +XXX,XX @@ bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd)
1062
return false;
1063
}
1064
1065
- /* Do not upgrade while external clients are disabled */
1066
- if (qatomic_read(&ctx->external_disable_cnt)) {
1067
- return false;
1068
- }
1069
-
1070
if (npfd < EPOLL_ENABLE_THRESHOLD) {
1071
return false;
1072
}
1073
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
1074
index XXXXXXX..XXXXXXX 100644
1075
--- a/util/fdmon-io_uring.c
1076
+++ b/util/fdmon-io_uring.c
1077
@@ -XXX,XX +XXX,XX @@ static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list,
1078
unsigned wait_nr = 1; /* block until at least one cqe is ready */
1079
int ret;
1080
1081
- /* Fall back while external clients are disabled */
1082
- if (qatomic_read(&ctx->external_disable_cnt)) {
1083
- return fdmon_poll_ops.wait(ctx, ready_list, timeout);
1084
- }
1085
-
1086
if (timeout == 0) {
1087
wait_nr = 0; /* non-blocking */
1088
} else if (timeout > 0) {
1089
@@ -XXX,XX +XXX,XX @@ static bool fdmon_io_uring_need_wait(AioContext *ctx)
1090
return true;
1091
}
1092
1093
- /* Are we falling back to fdmon-poll? */
1094
- return qatomic_read(&ctx->external_disable_cnt);
1095
+ return false;
1096
}
1097
1098
static const FDMonOps fdmon_io_uring_ops = {
1099
diff --git a/util/fdmon-poll.c b/util/fdmon-poll.c
1100
index XXXXXXX..XXXXXXX 100644
1101
--- a/util/fdmon-poll.c
1102
+++ b/util/fdmon-poll.c
1103
@@ -XXX,XX +XXX,XX @@ static int fdmon_poll_wait(AioContext *ctx, AioHandlerList *ready_list,
1104
assert(npfd == 0);
1105
1106
QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
1107
- if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events
1108
- && aio_node_check(ctx, node->is_external)) {
1109
+ if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events) {
1110
add_pollfd(node);
1111
}
1112
}
1113
diff --git a/util/main-loop.c b/util/main-loop.c
1114
index XXXXXXX..XXXXXXX 100644
1115
--- a/util/main-loop.c
1116
+++ b/util/main-loop.c
1117
@@ -XXX,XX +XXX,XX @@ void qemu_set_fd_handler(int fd,
1118
void *opaque)
1119
{
1120
iohandler_init();
1121
- aio_set_fd_handler(iohandler_ctx, fd, false,
1122
- fd_read, fd_write, NULL, NULL, opaque);
1123
+ aio_set_fd_handler(iohandler_ctx, fd, fd_read, fd_write, NULL, NULL,
1124
+ opaque);
1125
}
1126
1127
void event_notifier_set_handler(EventNotifier *e,
1128
EventNotifierHandler *handler)
1129
{
1130
iohandler_init();
1131
- aio_set_event_notifier(iohandler_ctx, e, false,
1132
- handler, NULL, NULL);
1133
+ aio_set_event_notifier(iohandler_ctx, e, handler, NULL, NULL);
1134
}
1135
diff --git a/util/qemu-coroutine-io.c b/util/qemu-coroutine-io.c
1136
index XXXXXXX..XXXXXXX 100644
1137
--- a/util/qemu-coroutine-io.c
1138
+++ b/util/qemu-coroutine-io.c
1139
@@ -XXX,XX +XXX,XX @@ typedef struct {
1140
static void fd_coroutine_enter(void *opaque)
1141
{
1142
FDYieldUntilData *data = opaque;
1143
- aio_set_fd_handler(data->ctx, data->fd, false,
1144
- NULL, NULL, NULL, NULL, NULL);
1145
+ aio_set_fd_handler(data->ctx, data->fd, NULL, NULL, NULL, NULL, NULL);
1146
qemu_coroutine_enter(data->co);
1147
}
1148
1149
@@ -XXX,XX +XXX,XX @@ void coroutine_fn yield_until_fd_readable(int fd)
1150
data.ctx = qemu_get_current_aio_context();
1151
data.co = qemu_coroutine_self();
1152
data.fd = fd;
1153
- aio_set_fd_handler(
1154
- data.ctx, fd, false, fd_coroutine_enter, NULL, NULL, NULL, &data);
1155
+ aio_set_fd_handler(data.ctx, fd, fd_coroutine_enter, NULL, NULL, NULL,
1156
+ &data);
1157
qemu_coroutine_yield();
1158
}
1159
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
1160
index XXXXXXX..XXXXXXX 100644
1161
--- a/util/vhost-user-server.c
1162
+++ b/util/vhost-user-server.c
1163
@@ -XXX,XX +XXX,XX @@ set_watch(VuDev *vu_dev, int fd, int vu_evt,
1164
vu_fd_watch->fd = fd;
1165
vu_fd_watch->cb = cb;
1166
qemu_socket_set_nonblock(fd);
1167
- aio_set_fd_handler(server->ioc->ctx, fd, false, kick_handler,
1168
+ aio_set_fd_handler(server->ioc->ctx, fd, kick_handler,
1169
NULL, NULL, NULL, vu_fd_watch);
1170
vu_fd_watch->vu_dev = vu_dev;
1171
vu_fd_watch->pvt = pvt;
1172
@@ -XXX,XX +XXX,XX @@ static void remove_watch(VuDev *vu_dev, int fd)
1173
if (!vu_fd_watch) {
1174
return;
1175
}
1176
- aio_set_fd_handler(server->ioc->ctx, fd, false,
1177
- NULL, NULL, NULL, NULL, NULL);
1178
+ aio_set_fd_handler(server->ioc->ctx, fd, NULL, NULL, NULL, NULL, NULL);
1179
1180
QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
1181
g_free(vu_fd_watch);
1182
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_stop(VuServer *server)
1183
VuFdWatch *vu_fd_watch;
1184
1185
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
1186
- aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false,
1187
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd,
1188
NULL, NULL, NULL, NULL, vu_fd_watch);
1189
}
1190
1191
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
1192
qio_channel_attach_aio_context(server->ioc, ctx);
1193
1194
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
1195
- aio_set_fd_handler(ctx, vu_fd_watch->fd, false, kick_handler, NULL,
1196
+ aio_set_fd_handler(ctx, vu_fd_watch->fd, kick_handler, NULL,
1197
NULL, NULL, vu_fd_watch);
1198
}
1199
1200
@@ -XXX,XX +XXX,XX @@ void vhost_user_server_detach_aio_context(VuServer *server)
1201
VuFdWatch *vu_fd_watch;
1202
1203
QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
1204
- aio_set_fd_handler(server->ctx, vu_fd_watch->fd, false,
1205
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd,
1206
NULL, NULL, NULL, NULL, vu_fd_watch);
1207
}
1208
1209
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
1210
index XXXXXXX..XXXXXXX 100644
1211
--- a/tests/unit/meson.build
1212
+++ b/tests/unit/meson.build
1213
@@ -XXX,XX +XXX,XX @@ if have_block
1214
if nettle.found() or gcrypt.found()
1215
tests += {'test-crypto-pbkdf': [io]}
1216
endif
1217
- if config_host_data.get('CONFIG_EPOLL_CREATE1')
1218
- tests += {'test-fdmon-epoll': [testblock]}
1219
- endif
1220
endif
1221
1222
if have_system
264
--
1223
--
265
2.25.3
1224
2.40.1
266
1225
267
1226
diff view generated by jsdifflib