1
The following changes since commit dd2db39d78431ab5a0b78777afaab3d61e94533e:
1
The following changes since commit 8cb41fda78c7ebde0dd248c6afe1d336efb0de50:
2
2
3
Merge remote-tracking branch 'remotes/ehabkost-gl/tags/x86-next-pull-request' into staging (2021-06-01 21:23:26 +0100)
3
Merge remote-tracking branch 'remotes/philmd/tags/machine-20211101' into staging (2021-11-02 05:53:45 -0400)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to b317006a3f1f04191a7981cef83417cb2477213b:
9
for you to fetch changes up to a8951438946d72d74c9bdbdb38fce95aa2973a88:
10
10
11
docs/secure-coding-practices: Describe how to use 'null-co' block driver (2021-06-02 14:29:14 +0200)
11
block/nvme: Extract nvme_free_queue() from nvme_free_queue_pair() (2021-11-02 15:49:13 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches
14
Block layer patches
15
15
16
- NBD server: Fix crashes related to switching between AioContexts
16
- Fail gracefully when blockdev-snapshot creates loops
17
- file-posix: Workaround for discard/write_zeroes on buggy filesystems
17
- ide: Fix IDENTIFY DEVICE for disks > 128 GiB
18
- Follow-up fixes for the reopen vs. permission changes
18
- file-posix: Fix return value translation for AIO discards
19
- quorum: Fix error handling for flush
19
- file-posix: add 'aio-max-batch' option
20
- block-copy: Refactor copy_range handling
20
- rbd: implement bdrv_co_block_status
21
- docs: Describe how to use 'null-co' block driver
21
- Code cleanups and build fixes
22
22
23
----------------------------------------------------------------
23
----------------------------------------------------------------
24
Lukas Straub (1):
24
Ari Sundholm (1):
25
block/quorum: Provide .bdrv_co_flush instead of .bdrv_co_flush_to_disk
25
block/file-posix: Fix return value translation for AIO discards
26
26
27
Philippe Mathieu-Daudé (1):
27
Fabrice Fontaine (1):
28
docs/secure-coding-practices: Describe how to use 'null-co' block driver
28
block/export/fuse.c: fix musl build
29
29
30
Sergio Lopez (2):
30
Hanna Reitz (1):
31
block-backend: add drained_poll
31
block-backend: Silence clang -m32 compiler warning
32
nbd/server: Use drained block ops to quiesce the server
33
32
34
Thomas Huth (2):
33
Kevin Wolf (1):
35
block/file-posix: Fix problem with fallocate(PUNCH_HOLE) on GPFS
34
block: Fail gracefully when blockdev-snapshot creates loops
36
block/file-posix: Try other fallbacks after invalid FALLOC_FL_ZERO_RANGE
37
35
38
Vladimir Sementsov-Ogievskiy (14):
36
Peter Lieven (1):
39
qemu-io-cmds: assert that we don't have .perm requested in no-blk case
37
block/rbd: implement bdrv_co_block_status
40
block/vvfat: child_vvfat_qcow: add .get_parent_aio_context, fix crash
41
block/vvfat: fix vvfat_child_perm crash
42
block: consistently use bdrv_is_read_only()
43
block: drop BlockDriverState::read_only
44
block: drop BlockBackendRootState::read_only
45
block: document child argument of bdrv_attach_child_common()
46
block-backend: improve blk_root_get_parent_desc()
47
block: improve bdrv_child_get_parent_desc()
48
block/vvfat: inherit child_vvfat_qcow from child_of_bds
49
block: simplify bdrv_child_user_desc()
50
block: improve permission conflict error message
51
block-copy: fix block_copy_task_entry() progress update
52
block-copy: refactor copy_range handling
53
38
54
docs/devel/secure-coding-practices.rst | 9 ++++
39
Philippe Mathieu-Daudé (3):
55
include/block/block.h | 1 +
40
block/nvme: Automatically free qemu_memalign() with QEMU_AUTO_VFREE
56
include/block/block_int.h | 2 -
41
block/nvme: Display CQ/SQ pointer in nvme_free_queue_pair()
57
include/sysemu/block-backend.h | 4 ++
42
block/nvme: Extract nvme_free_queue() from nvme_free_queue_pair()
58
block.c | 82 ++++++++++++++++++++--------------
43
59
block/block-backend.c | 26 +++++------
44
Samuel Thibault (1):
60
block/block-copy.c | 80 ++++++++++++++++++++++-----------
45
ide: Cap LBA28 capacity announcement to 2^28-1
61
block/commit.c | 2 +-
46
62
block/file-posix.c | 29 ++++++++----
47
Stefano Garzarella (3):
63
block/io.c | 4 +-
48
file-posix: add `aio-max-batch` option
64
block/qapi.c | 2 +-
49
linux-aio: add `dev_max_batch` parameter to laio_co_submit()
65
block/qcow2-snapshot.c | 2 +-
50
linux-aio: add `dev_max_batch` parameter to laio_io_unplug()
66
block/qcow2.c | 5 +--
51
67
block/quorum.c | 2 +-
52
qapi/block-core.json | 7 +++
68
block/snapshot.c | 2 +-
53
include/block/raw-aio.h | 6 ++-
69
block/vhdx-log.c | 2 +-
54
block.c | 10 ++++
70
block/vvfat.c | 14 +++---
55
block/block-backend.c | 2 +-
71
blockdev.c | 3 +-
56
block/export/fuse.c | 4 ++
72
nbd/server.c | 82 +++++++++++++++++++++++++---------
57
block/file-posix.c | 18 ++++++--
73
qemu-io-cmds.c | 14 +++++-
58
block/linux-aio.c | 38 ++++++++++-----
74
tests/unit/test-block-iothread.c | 6 ---
59
block/nvme.c | 22 +++++----
75
tests/qemu-iotests/283.out | 2 +-
60
block/rbd.c | 112 +++++++++++++++++++++++++++++++++++++++++++++
76
tests/qemu-iotests/307.out | 2 +-
61
hw/ide/core.c | 8 +++-
77
tests/qemu-iotests/tests/qsd-jobs.out | 2 +-
62
block/trace-events | 2 +-
78
24 files changed, 241 insertions(+), 138 deletions(-)
63
tests/qemu-iotests/085 | 31 ++++++++++++-
64
tests/qemu-iotests/085.out | 33 +++++++++++--
65
13 files changed, 258 insertions(+), 35 deletions(-)
79
66
80
67
diff view generated by jsdifflib
Deleted patch
1
From: Lukas Straub <lukasstraub2@web.de>
2
1
3
The quorum block driver uses a custom flush callback to handle the
4
case when some children return io errors. In that case it still
5
returns success if enough children are healthy.
6
However, it provides it as the .bdrv_co_flush_to_disk callback, not
7
as .bdrv_co_flush. This causes the block layer to do it's own
8
generic flushing for the children instead, which doesn't handle
9
errors properly.
10
11
Fix this by providing .bdrv_co_flush instead of
12
.bdrv_co_flush_to_disk so the block layer uses the custom flush
13
callback.
14
15
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
16
Reported-by: Minghao Yuan <meeho@qq.com>
17
Message-Id: <20210518134214.11ccf05f@gecko.fritz.box>
18
Tested-by: Zhang Chen <chen.zhang@intel.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
---
21
block/quorum.c | 2 +-
22
1 file changed, 1 insertion(+), 1 deletion(-)
23
24
diff --git a/block/quorum.c b/block/quorum.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/block/quorum.c
27
+++ b/block/quorum.c
28
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_quorum = {
29
.bdrv_dirname = quorum_dirname,
30
.bdrv_co_block_status = quorum_co_block_status,
31
32
- .bdrv_co_flush_to_disk = quorum_co_flush,
33
+ .bdrv_co_flush = quorum_co_flush,
34
35
.bdrv_getlength = quorum_getlength,
36
37
--
38
2.30.2
39
40
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Coverity thinks blk may be NULL. It's a false-positive, as described in
4
a new comment.
5
6
Fixes: Coverity CID 1453194
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Message-Id: <20210519090532.3753-1-vsementsov@virtuozzo.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
11
qemu-io-cmds.c | 14 ++++++++++++--
12
1 file changed, 12 insertions(+), 2 deletions(-)
13
14
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/qemu-io-cmds.c
17
+++ b/qemu-io-cmds.c
18
@@ -XXX,XX +XXX,XX @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc,
19
return -EINVAL;
20
}
21
22
- /* Request additional permissions if necessary for this command. The caller
23
+ /*
24
+ * Request additional permissions if necessary for this command. The caller
25
* is responsible for restoring the original permissions afterwards if this
26
- * is what it wants. */
27
+ * is what it wants.
28
+ *
29
+ * Coverity thinks that blk may be NULL in the following if condition. It's
30
+ * not so: in init_check_command() we fail if blk is NULL for command with
31
+ * both CMD_FLAG_GLOBAL and CMD_NOFILE_OK flags unset. And in
32
+ * qemuio_add_command() we assert that command with non-zero .perm field
33
+ * doesn't set this flags. So, the following assertion is to silence
34
+ * Coverity:
35
+ */
36
+ assert(blk || !ct->perm);
37
if (ct->perm && blk_is_available(blk)) {
38
uint64_t orig_perm, orig_shared_perm;
39
blk_get_perm(blk, &orig_perm, &orig_shared_perm);
40
--
41
2.30.2
42
43
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Commit 3ca1f3225727419ba573673b744edac10904276f
4
"block: BdrvChildClass: add .get_parent_aio_context handler" introduced
5
new handler and commit 228ca37e12f97788e05bd0c92f89b3e5e4019607
6
"block: drop ctx argument from bdrv_root_attach_child" made a generic
7
use of it. But 3ca1f3225727419ba573673b744edac10904276f didn't update
8
child_vvfat_qcow. Fix that.
9
10
Before that fix the command
11
12
./build/qemu-system-x86_64 -usb -device usb-storage,drive=fat16 \
13
-drive file=fat:rw:fat-type=16:"<path of a host folder>",id=fat16,format=raw,if=none
14
15
crashes:
16
17
1 bdrv_child_get_parent_aio_context (c=0x559d62426d20)
18
at ../block.c:1440
19
2 bdrv_attach_child_common
20
(child_bs=0x559d62468190, child_name=0x559d606f9e3d "write-target",
21
child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3,
22
perm=3, shared_perm=4, opaque=0x559d62445690,
23
child=0x7ffc74c2acc8, tran=0x559d6246ddd0, errp=0x7ffc74c2ae60)
24
at ../block.c:2795
25
3 bdrv_attach_child_noperm
26
(parent_bs=0x559d62445690, child_bs=0x559d62468190,
27
child_name=0x559d606f9e3d "write-target",
28
child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3,
29
child=0x7ffc74c2acc8, tran=0x559d6246ddd0, errp=0x7ffc74c2ae60) at
30
../block.c:2855
31
4 bdrv_attach_child
32
(parent_bs=0x559d62445690, child_bs=0x559d62468190,
33
child_name=0x559d606f9e3d "write-target",
34
child_class=0x559d60c58d20 <child_vvfat_qcow>, child_role=3,
35
errp=0x7ffc74c2ae60) at ../block.c:2953
36
5 bdrv_open_child
37
(filename=0x559d62464b80 "/var/tmp/vl.h3TIS4",
38
options=0x559d6246ec20, bdref_key=0x559d606f9e3d "write-target",
39
parent=0x559d62445690, child_class=0x559d60c58d20
40
<child_vvfat_qcow>, child_role=3, allow_none=false,
41
errp=0x7ffc74c2ae60) at ../block.c:3351
42
6 enable_write_target (bs=0x559d62445690, errp=0x7ffc74c2ae60) at
43
../block/vvfat.c:3176
44
7 vvfat_open (bs=0x559d62445690, options=0x559d6244adb0, flags=155650,
45
errp=0x7ffc74c2ae60) at ../block/vvfat.c:1236
46
8 bdrv_open_driver (bs=0x559d62445690, drv=0x559d60d4f7e0
47
<bdrv_vvfat>, node_name=0x0,
48
options=0x559d6244adb0, open_flags=155650,
49
errp=0x7ffc74c2af70) at ../block.c:1557
50
9 bdrv_open_common (bs=0x559d62445690, file=0x0,
51
options=0x559d6244adb0, errp=0x7ffc74c2af70) at
52
...
53
54
(gdb) fr 1
55
#1 0x0000559d603ea3bf in bdrv_child_get_parent_aio_context
56
(c=0x559d62426d20) at ../block.c:1440
57
1440 return c->klass->get_parent_aio_context(c);
58
(gdb) p c->klass
59
$1 = (const BdrvChildClass *) 0x559d60c58d20 <child_vvfat_qcow>
60
(gdb) p c->klass->get_parent_aio_context
61
$2 = (AioContext *(*)(BdrvChild *)) 0x0
62
63
Fixes: 3ca1f3225727419ba573673b744edac10904276f
64
Fixes: 228ca37e12f97788e05bd0c92f89b3e5e4019607
65
Reported-by: John Arbuckle <programmingkidx@gmail.com>
66
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
67
Message-Id: <20210524101257.119377-2-vsementsov@virtuozzo.com>
68
Tested-by: John Arbuckle <programmingkidx@gmail.com>
69
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
70
---
71
include/block/block.h | 1 +
72
block.c | 4 ++--
73
block/vvfat.c | 1 +
74
3 files changed, 4 insertions(+), 2 deletions(-)
75
76
diff --git a/include/block/block.h b/include/block/block.h
77
index XXXXXXX..XXXXXXX 100644
78
--- a/include/block/block.h
79
+++ b/include/block/block.h
80
@@ -XXX,XX +XXX,XX @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
81
bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
82
GSList **ignore, Error **errp);
83
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
84
+AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
85
86
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
87
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
88
diff --git a/block.c b/block.c
89
index XXXXXXX..XXXXXXX 100644
90
--- a/block.c
91
+++ b/block.c
92
@@ -XXX,XX +XXX,XX @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
93
return 0;
94
}
95
96
-static AioContext *bdrv_child_cb_get_parent_aio_context(BdrvChild *c)
97
+AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
98
{
99
BlockDriverState *bs = c->opaque;
100
101
@@ -XXX,XX +XXX,XX @@ const BdrvChildClass child_of_bds = {
102
.can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
103
.set_aio_ctx = bdrv_child_cb_set_aio_ctx,
104
.update_filename = bdrv_child_cb_update_filename,
105
- .get_parent_aio_context = bdrv_child_cb_get_parent_aio_context,
106
+ .get_parent_aio_context = child_of_bds_get_parent_aio_context,
107
};
108
109
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
110
diff --git a/block/vvfat.c b/block/vvfat.c
111
index XXXXXXX..XXXXXXX 100644
112
--- a/block/vvfat.c
113
+++ b/block/vvfat.c
114
@@ -XXX,XX +XXX,XX @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format,
115
static const BdrvChildClass child_vvfat_qcow = {
116
.parent_is_bds = true,
117
.inherit_options = vvfat_qcow_options,
118
+ .get_parent_aio_context = child_of_bds_get_parent_aio_context,
119
};
120
121
static int enable_write_target(BlockDriverState *bs, Error **errp)
122
--
123
2.30.2
124
125
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
It's wrong to rely on s->qcow in vvfat_child_perm, as on permission
4
update during bdrv_open_child() call this field is not set yet.
5
6
Still prior to aa5a04c7db27eea6b36de32f241b155f0d9ce34d, it didn't
7
crash, as bdrv_open_child passed NULL as child to bdrv_child_perm(),
8
and NULL was equal to NULL in assertion (still, it was bad guarantee
9
for child being s->qcow, not backing :).
10
11
Since aa5a04c7db27eea6b36de32f241b155f0d9ce34d
12
"add bdrv_attach_child_noperm" bdrv_refresh_perms called on parent node
13
when attaching child, and new correct child pointer is passed to
14
.bdrv_child_perm. Still, s->qcow is NULL at the moment. Let's rely only
15
on role instead.
16
17
Without that fix,
18
./build/qemu-system-x86_64 -usb -device usb-storage,drive=fat16 \
19
-drive \
20
file=fat:rw:fat-type=16:"<path of a host folder>",id=fat16,format=raw,if=none
21
22
crashes:
23
(gdb) bt
24
0 raise () at /lib64/libc.so.6
25
1 abort () at /lib64/libc.so.6
26
2 _nl_load_domain.cold () at /lib64/libc.so.6
27
3 annobin_assert.c_end () at /lib64/libc.so.6
28
4 vvfat_child_perm (bs=0x559186f3d690, c=0x559186f1ed20, role=3,
29
reopen_queue=0x0, perm=0, shared=31,
30
nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) at
31
../block/vvfat.c:3214
32
5 bdrv_child_perm (bs=0x559186f3d690, child_bs=0x559186f60190,
33
c=0x559186f1ed20, role=3, reopen_queue=0x0,
34
parent_perm=0, parent_shared=31,
35
nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0)
36
at ../block.c:2094
37
6 bdrv_node_refresh_perm (bs=0x559186f3d690, q=0x0,
38
tran=0x559186f65850, errp=0x7ffe56f28530) at
39
../block.c:2336
40
7 bdrv_list_refresh_perms (list=0x559186db5b90 = {...}, q=0x0,
41
tran=0x559186f65850, errp=0x7ffe56f28530)
42
at ../block.c:2358
43
8 bdrv_refresh_perms (bs=0x559186f3d690, errp=0x7ffe56f28530) at
44
../block.c:2419
45
9 bdrv_attach_child
46
(parent_bs=0x559186f3d690, child_bs=0x559186f60190,
47
child_name=0x559184d83e3d "write-target",
48
child_class=0x5591852f3b00 <child_vvfat_qcow>, child_role=3,
49
errp=0x7ffe56f28530) at ../block.c:2959
50
10 bdrv_open_child
51
(filename=0x559186f5cb80 "/var/tmp/vl.7WYmFU",
52
options=0x559186f66c20, bdref_key=0x559184d83e3d "write-target",
53
parent=0x559186f3d690, child_class=0x5591852f3b00
54
<child_vvfat_qcow>, child_role=3, allow_none=false,
55
errp=0x7ffe56f28530) at ../block.c:3351
56
11 enable_write_target (bs=0x559186f3d690, errp=0x7ffe56f28530) at
57
../block/vvfat.c:3177
58
12 vvfat_open (bs=0x559186f3d690, options=0x559186f42db0, flags=155650,
59
errp=0x7ffe56f28530) at ../block/vvfat.c:1236
60
13 bdrv_open_driver (bs=0x559186f3d690, drv=0x5591853d97e0
61
<bdrv_vvfat>, node_name=0x0,
62
options=0x559186f42db0, open_flags=155650,
63
errp=0x7ffe56f28640) at ../block.c:1557
64
14 bdrv_open_common (bs=0x559186f3d690, file=0x0,
65
options=0x559186f42db0, errp=0x7ffe56f28640) at
66
../block.c:1833
67
...
68
69
(gdb) fr 4
70
#4 vvfat_child_perm (bs=0x559186f3d690, c=0x559186f1ed20, role=3,
71
reopen_queue=0x0, perm=0, shared=31,
72
nperm=0x7ffe56f28298, nshared=0x7ffe56f282a0) at
73
../block/vvfat.c:3214
74
3214 assert(c == s->qcow || (role & BDRV_CHILD_COW));
75
(gdb) p role
76
$1 = 3 # BDRV_CHILD_DATA | BDRV_CHILD_METADATA
77
(gdb) p *c
78
$2 = {bs = 0x559186f60190, name = 0x559186f669d0 "write-target", klass
79
= 0x5591852f3b00 <child_vvfat_qcow>, role = 3, opaque =
80
0x559186f3d690, perm = 3, shared_perm = 4, frozen = false,
81
parent_quiesce_counter = 0, next = {le_next = 0x0, le_prev =
82
0x559186f41818}, next_parent = {le_next = 0x0, le_prev =
83
0x559186f64320}}
84
(gdb) p s->qcow
85
$3 = (BdrvChild *) 0x0
86
87
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
88
Message-Id: <20210524101257.119377-3-vsementsov@virtuozzo.com>
89
Tested-by: John Arbuckle <programmingkidx@gmail.com>
90
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
91
---
92
block/vvfat.c | 7 ++-----
93
1 file changed, 2 insertions(+), 5 deletions(-)
94
95
diff --git a/block/vvfat.c b/block/vvfat.c
96
index XXXXXXX..XXXXXXX 100644
97
--- a/block/vvfat.c
98
+++ b/block/vvfat.c
99
@@ -XXX,XX +XXX,XX @@ static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
100
uint64_t perm, uint64_t shared,
101
uint64_t *nperm, uint64_t *nshared)
102
{
103
- BDRVVVFATState *s = bs->opaque;
104
-
105
- assert(c == s->qcow || (role & BDRV_CHILD_COW));
106
-
107
- if (c == s->qcow) {
108
+ if (role & BDRV_CHILD_DATA) {
109
/* This is a private node, nobody should try to attach to it */
110
*nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
111
*nshared = BLK_PERM_WRITE_UNCHANGED;
112
} else {
113
+ assert(role & BDRV_CHILD_COW);
114
/* The backing file is there so 'commit' can use it. vvfat doesn't
115
* access it in any way. */
116
*nperm = 0;
117
--
118
2.30.2
119
120
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
It's better to use accessor function instead of bs->read_only directly.
4
In some places use bdrv_is_writable() instead of
5
checking both BDRV_O_RDWR set and BDRV_O_INACTIVE not set.
6
7
In bdrv_open_common() it's a bit strange to add one more variable, but
8
we are going to drop bs->read_only in the next patch, so new ro local
9
variable substitutes it here.
10
11
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
12
Message-Id: <20210527154056.70294-2-vsementsov@virtuozzo.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
15
block.c | 11 +++++++----
16
block/block-backend.c | 2 +-
17
block/commit.c | 2 +-
18
block/io.c | 4 ++--
19
block/qapi.c | 2 +-
20
block/qcow2-snapshot.c | 2 +-
21
block/qcow2.c | 5 ++---
22
block/snapshot.c | 2 +-
23
block/vhdx-log.c | 2 +-
24
9 files changed, 17 insertions(+), 15 deletions(-)
25
26
diff --git a/block.c b/block.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/block.c
29
+++ b/block.c
30
@@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
31
QemuOpts *opts;
32
BlockDriver *drv;
33
Error *local_err = NULL;
34
+ bool ro;
35
36
assert(bs->file == NULL);
37
assert(options != NULL && bs->options != options);
38
@@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
39
40
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
41
42
- if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
43
- if (!bs->read_only && bdrv_is_whitelisted(drv, true)) {
44
+ ro = bdrv_is_read_only(bs);
45
+
46
+ if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
47
+ if (!ro && bdrv_is_whitelisted(drv, true)) {
48
ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
49
} else {
50
ret = -ENOTSUP;
51
}
52
if (ret < 0) {
53
error_setg(errp,
54
- !bs->read_only && bdrv_is_whitelisted(drv, true)
55
+ !ro && bdrv_is_whitelisted(drv, true)
56
? "Driver '%s' can only be used for read-only devices"
57
: "Driver '%s' is not whitelisted",
58
drv->format_name);
59
@@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
60
assert(qatomic_read(&bs->copy_on_read) == 0);
61
62
if (bs->open_flags & BDRV_O_COPY_ON_READ) {
63
- if (!bs->read_only) {
64
+ if (!ro) {
65
bdrv_enable_copy_on_read(bs);
66
} else {
67
error_setg(errp, "Can't use copy-on-read on read-only device");
68
diff --git a/block/block-backend.c b/block/block-backend.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/block/block-backend.c
71
+++ b/block/block-backend.c
72
@@ -XXX,XX +XXX,XX @@ void blk_update_root_state(BlockBackend *blk)
73
assert(blk->root);
74
75
blk->root_state.open_flags = blk->root->bs->open_flags;
76
- blk->root_state.read_only = blk->root->bs->read_only;
77
+ blk->root_state.read_only = bdrv_is_read_only(blk->root->bs);
78
blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
79
}
80
81
diff --git a/block/commit.c b/block/commit.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/block/commit.c
84
+++ b/block/commit.c
85
@@ -XXX,XX +XXX,XX @@ int bdrv_commit(BlockDriverState *bs)
86
return -EBUSY;
87
}
88
89
- ro = backing_file_bs->read_only;
90
+ ro = bdrv_is_read_only(backing_file_bs);
91
92
if (ro) {
93
if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) {
94
diff --git a/block/io.c b/block/io.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/block/io.c
97
+++ b/block/io.c
98
@@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
99
100
bdrv_check_request(offset, bytes, &error_abort);
101
102
- if (bs->read_only) {
103
+ if (bdrv_is_read_only(bs)) {
104
return -EPERM;
105
}
106
107
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
108
if (new_bytes) {
109
bdrv_make_request_serialising(&req, 1);
110
}
111
- if (bs->read_only) {
112
+ if (bdrv_is_read_only(bs)) {
113
error_setg(errp, "Image is read-only");
114
ret = -EACCES;
115
goto out;
116
diff --git a/block/qapi.c b/block/qapi.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/block/qapi.c
119
+++ b/block/qapi.c
120
@@ -XXX,XX +XXX,XX @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
121
122
info = g_malloc0(sizeof(*info));
123
info->file = g_strdup(bs->filename);
124
- info->ro = bs->read_only;
125
+ info->ro = bdrv_is_read_only(bs);
126
info->drv = g_strdup(bs->drv->format_name);
127
info->encrypted = bs->encrypted;
128
129
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/block/qcow2-snapshot.c
132
+++ b/block/qcow2-snapshot.c
133
@@ -XXX,XX +XXX,XX @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
134
int new_l1_bytes;
135
int ret;
136
137
- assert(bs->read_only);
138
+ assert(bdrv_is_read_only(bs));
139
140
/* Search the snapshot */
141
snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
142
diff --git a/block/qcow2.c b/block/qcow2.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/block/qcow2.c
145
+++ b/block/qcow2.c
146
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
147
148
/* Clear unknown autoclear feature bits */
149
update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
150
- update_header =
151
- update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
152
+ update_header = update_header && bdrv_is_writable(bs);
153
if (update_header) {
154
s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
155
}
156
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
157
bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
158
159
/* Repair image if dirty */
160
- if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
161
+ if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) &&
162
(s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
163
BdrvCheckResult result = {0};
164
165
diff --git a/block/snapshot.c b/block/snapshot.c
166
index XXXXXXX..XXXXXXX 100644
167
--- a/block/snapshot.c
168
+++ b/block/snapshot.c
169
@@ -XXX,XX +XXX,XX @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
170
error_setg(errp, "snapshot_id and name are both NULL");
171
return -EINVAL;
172
}
173
- if (!bs->read_only) {
174
+ if (!bdrv_is_read_only(bs)) {
175
error_setg(errp, "Device is not readonly");
176
return -EINVAL;
177
}
178
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
179
index XXXXXXX..XXXXXXX 100644
180
--- a/block/vhdx-log.c
181
+++ b/block/vhdx-log.c
182
@@ -XXX,XX +XXX,XX @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
183
}
184
185
if (logs.valid) {
186
- if (bs->read_only) {
187
+ if (bdrv_is_read_only(bs)) {
188
bdrv_refresh_filename(bs);
189
ret = -EPERM;
190
error_setg(errp,
191
--
192
2.30.2
193
194
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
This variable is just a cache for !(bs->open_flags & BDRV_O_RDWR),
4
which we have to synchronize everywhere. Let's just drop it and
5
consistently use bdrv_is_read_only().
6
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Message-Id: <20210527154056.70294-3-vsementsov@virtuozzo.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
11
include/block/block_int.h | 1 -
12
block.c | 7 +------
13
tests/unit/test-block-iothread.c | 6 ------
14
3 files changed, 1 insertion(+), 13 deletions(-)
15
16
diff --git a/include/block/block_int.h b/include/block/block_int.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/block/block_int.h
19
+++ b/include/block/block_int.h
20
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
21
* locking needed during I/O...
22
*/
23
int open_flags; /* flags used to open the file, re-used for re-open */
24
- bool read_only; /* if true, the media is read only */
25
bool encrypted; /* if true, the media is encrypted */
26
bool sg; /* if true, the device is a /dev/sg* */
27
bool probed; /* if true, format was probed rather than specified */
28
diff --git a/block.c b/block.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/block.c
31
+++ b/block.c
32
@@ -XXX,XX +XXX,XX @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
33
* image is inactivated. */
34
bool bdrv_is_read_only(BlockDriverState *bs)
35
{
36
- return bs->read_only;
37
+ return !(bs->open_flags & BDRV_O_RDWR);
38
}
39
40
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
41
@@ -XXX,XX +XXX,XX @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
42
goto fail;
43
}
44
45
- bs->read_only = true;
46
bs->open_flags &= ~BDRV_O_RDWR;
47
48
return 0;
49
@@ -XXX,XX +XXX,XX @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
50
}
51
52
bs->drv = drv;
53
- bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
54
bs->opaque = g_malloc0(drv->instance_size);
55
56
if (drv->bdrv_file_open) {
57
@@ -XXX,XX +XXX,XX @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
58
trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
59
drv->format_name);
60
61
- bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
62
-
63
ro = bdrv_is_read_only(bs);
64
65
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
66
@@ -XXX,XX +XXX,XX @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
67
bs->explicit_options = reopen_state->explicit_options;
68
bs->options = reopen_state->options;
69
bs->open_flags = reopen_state->flags;
70
- bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
71
bs->detect_zeroes = reopen_state->detect_zeroes;
72
73
if (reopen_state->replace_backing_bs) {
74
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/tests/unit/test-block-iothread.c
77
+++ b/tests/unit/test-block-iothread.c
78
@@ -XXX,XX +XXX,XX @@ static void test_sync_op_truncate(BdrvChild *c)
79
g_assert_cmpint(ret, ==, -EINVAL);
80
81
/* Error: Read-only image */
82
- c->bs->read_only = true;
83
c->bs->open_flags &= ~BDRV_O_RDWR;
84
85
ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL);
86
g_assert_cmpint(ret, ==, -EACCES);
87
88
- c->bs->read_only = false;
89
c->bs->open_flags |= BDRV_O_RDWR;
90
}
91
92
@@ -XXX,XX +XXX,XX @@ static void test_sync_op_flush(BdrvChild *c)
93
g_assert_cmpint(ret, ==, 0);
94
95
/* Early success: Read-only image */
96
- c->bs->read_only = true;
97
c->bs->open_flags &= ~BDRV_O_RDWR;
98
99
ret = bdrv_flush(c->bs);
100
g_assert_cmpint(ret, ==, 0);
101
102
- c->bs->read_only = false;
103
c->bs->open_flags |= BDRV_O_RDWR;
104
}
105
106
@@ -XXX,XX +XXX,XX @@ static void test_sync_op_blk_flush(BlockBackend *blk)
107
g_assert_cmpint(ret, ==, 0);
108
109
/* Early success: Read-only image */
110
- bs->read_only = true;
111
bs->open_flags &= ~BDRV_O_RDWR;
112
113
ret = blk_flush(blk);
114
g_assert_cmpint(ret, ==, 0);
115
116
- bs->read_only = false;
117
bs->open_flags |= BDRV_O_RDWR;
118
}
119
120
--
121
2.30.2
122
123
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Ari Sundholm <ari@tuxera.com>
2
2
3
A customer reported that running
3
AIO discards regressed as a result of the following commit:
4
    0dfc7af2 block/file-posix: Optimize for macOS
4
5
5
qemu-img convert -t none -O qcow2 -f qcow2 input.qcow2 output.qcow2
6
When trying to run blkdiscard within a Linux guest, the request would
7
fail, with some errors in dmesg:
6
8
7
fails for them with the following error message when the images are
9
---- [ snip ] ----
8
stored on a GPFS file system :
10
[ 4.010070] sd 2:0:0:0: [sda] tag#0 FAILED Result: hostbyte=DID_OK
11
driverbyte=DRIVER_SENSE
12
[ 4.011061] sd 2:0:0:0: [sda] tag#0 Sense Key : Aborted Command
13
[current]
14
[ 4.011061] sd 2:0:0:0: [sda] tag#0 Add. Sense: I/O process
15
terminated
16
[ 4.011061] sd 2:0:0:0: [sda] tag#0 CDB: Unmap/Read sub-channel 42
17
00 00 00 00 00 00 00 18 00
18
[ 4.011061] blk_update_request: I/O error, dev sda, sector 0
19
---- [ snip ] ----
9
20
10
qemu-img: error while writing sector 0: Invalid argument
21
This turns out to be a result of a flaw in changes to the error value
22
translation logic in handle_aiocb_discard(). The default return value
23
may be left untranslated in some configurations, and the wrong variable
24
is used in one translation.
11
25
12
After analyzing the strace output, it seems like the problem is in
26
Fix both issues.
13
handle_aiocb_write_zeroes(): The call to fallocate(FALLOC_FL_PUNCH_HOLE)
14
returns EINVAL, which can apparently happen if the file system has
15
a different idea of the granularity of the operation. It's arguably
16
a bug in GPFS, since the PUNCH_HOLE mode should not result in EINVAL
17
according to the man-page of fallocate(), but the file system is out
18
there in production and so we have to deal with it. In commit 294682cc3a
19
("block: workaround for unaligned byte range in fallocate()") we also
20
already applied the a work-around for the same problem to the earlier
21
fallocate(FALLOC_FL_ZERO_RANGE) call, so do it now similar with the
22
PUNCH_HOLE call. But instead of silently catching and returning
23
-ENOTSUP (which causes the caller to fall back to writing zeroes),
24
let's rather inform the user once about the buggy file system and
25
try the other fallback instead.
26
27
27
Signed-off-by: Thomas Huth <thuth@redhat.com>
28
Fixes: 0dfc7af2b28 ("block/file-posix: Optimize for macOS")
28
Message-Id: <20210527172020.847617-2-thuth@redhat.com>
29
Cc: qemu-stable@nongnu.org
30
Signed-off-by: Ari Sundholm <ari@tuxera.com>
31
Signed-off-by: Emil Karlson <jkarlson@tuxera.com>
32
Reviewed-by: Akihiko Odaki <akihiko.odaki@gmail.com>
33
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
34
Message-Id: <20211019110954.4170931-1-ari@tuxera.com>
29
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
35
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
30
---
36
---
31
block/file-posix.c | 11 +++++++++++
37
block/file-posix.c | 4 ++--
32
1 file changed, 11 insertions(+)
38
1 file changed, 2 insertions(+), 2 deletions(-)
33
39
34
diff --git a/block/file-posix.c b/block/file-posix.c
40
diff --git a/block/file-posix.c b/block/file-posix.c
35
index XXXXXXX..XXXXXXX 100644
41
index XXXXXXX..XXXXXXX 100644
36
--- a/block/file-posix.c
42
--- a/block/file-posix.c
37
+++ b/block/file-posix.c
43
+++ b/block/file-posix.c
38
@@ -XXX,XX +XXX,XX @@ static int handle_aiocb_write_zeroes(void *opaque)
44
@@ -XXX,XX +XXX,XX @@ static int handle_aiocb_copy_range(void *opaque)
39
return ret;
45
static int handle_aiocb_discard(void *opaque)
40
}
46
{
41
s->has_fallocate = false;
47
RawPosixAIOData *aiocb = opaque;
42
+ } else if (ret == -EINVAL) {
48
- int ret = -EOPNOTSUPP;
43
+ /*
49
+ int ret = -ENOTSUP;
44
+ * Some file systems like older versions of GPFS do not like un-
50
BDRVRawState *s = aiocb->bs->opaque;
45
+ * aligned byte ranges, and return EINVAL in such a case, though
51
46
+ * they should not do it according to the man-page of fallocate().
52
if (!s->has_discard) {
47
+ * Warn about the bad filesystem and try the final fallback instead.
53
@@ -XXX,XX +XXX,XX @@ static int handle_aiocb_discard(void *opaque)
48
+ */
54
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
49
+ warn_report_once("Your file system is misbehaving: "
55
ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
50
+ "fallocate(FALLOC_FL_PUNCH_HOLE) returned EINVAL. "
56
aiocb->aio_offset, aiocb->aio_nbytes);
51
+ "Please report this bug to your file sytem "
57
- ret = translate_err(-errno);
52
+ "vendor.");
58
+ ret = translate_err(ret);
53
} else if (ret != -ENOTSUP) {
59
#elif defined(__APPLE__) && (__MACH__)
54
return ret;
60
fpunchhole_t fpunchhole;
55
} else {
61
fpunchhole.fp_flags = 0;
56
--
62
--
57
2.30.2
63
2.31.1
58
64
59
65
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
Using blockdev-snapshot to append a node as an overlay to itself, or to
2
any of its parents, causes crashes. Catch the condition and return an
3
error for these cases instead.
2
4
3
The logic around **child is not obvious: this reference is used not
5
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1824363
4
only to return resulting child, but also to rollback NULL value on
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
transaction abort.
7
Message-Id: <20211018134714.48438-1-kwolf@redhat.com>
6
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
So, let's add documentation and some assertions.
8
9
While being here, drop extra declaration of bdrv_attach_child_noperm().
10
11
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
12
Message-Id: <20210601075218.79249-2-vsementsov@virtuozzo.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
10
---
15
block.c | 24 +++++++++++++++---------
11
block.c | 10 ++++++++++
16
1 file changed, 15 insertions(+), 9 deletions(-)
12
tests/qemu-iotests/085 | 31 ++++++++++++++++++++++++++++++-
13
tests/qemu-iotests/085.out | 33 ++++++++++++++++++++++++++++++---
14
3 files changed, 70 insertions(+), 4 deletions(-)
17
15
18
diff --git a/block.c b/block.c
16
diff --git a/block.c b/block.c
19
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
20
--- a/block.c
18
--- a/block.c
21
+++ b/block.c
19
+++ b/block.c
22
@@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
20
@@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
23
21
BdrvChildRole child_role,
22
Error **errp);
23
24
+static bool bdrv_recurse_has_child(BlockDriverState *bs,
25
+ BlockDriverState *child);
26
+
24
static void bdrv_replace_child_noperm(BdrvChild *child,
27
static void bdrv_replace_child_noperm(BdrvChild *child,
25
BlockDriverState *new_bs);
28
BlockDriverState *new_bs);
26
-static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
29
static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
27
- BlockDriverState *child_bs,
30
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
28
- const char *child_name,
31
int drain_saldo;
29
- const BdrvChildClass *child_class,
32
30
- BdrvChildRole child_role,
33
assert(!child->frozen);
31
- BdrvChild **child,
34
+ assert(old_bs != new_bs);
32
- Transaction *tran,
35
33
- Error **errp);
36
if (old_bs && new_bs) {
34
static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
37
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
35
Transaction *tran);
38
@@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
36
39
37
@@ -XXX,XX +XXX,XX @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
40
assert(parent_bs->drv);
38
41
39
/*
42
+ if (bdrv_recurse_has_child(child_bs, parent_bs)) {
40
* Common part of attaching bdrv child to bs or to blk or to job
43
+ error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
41
+ *
44
+ child_bs->node_name, child_name, parent_bs->node_name);
42
+ * Resulting new child is returned through @child.
45
+ return -EINVAL;
43
+ * At start *@child must be NULL.
46
+ }
44
+ * @child is saved to a new entry of @tran, so that *@child could be reverted to
47
+
45
+ * NULL on abort(). So referenced variable must live at least until transaction
48
bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
46
+ * end.
49
bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
47
*/
50
perm, shared_perm, &perm, &shared_perm);
48
static int bdrv_attach_child_common(BlockDriverState *child_bs,
51
diff --git a/tests/qemu-iotests/085 b/tests/qemu-iotests/085
49
const char *child_name,
52
index XXXXXXX..XXXXXXX 100755
50
@@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_common(BlockDriverState *child_bs,
53
--- a/tests/qemu-iotests/085
51
return 0;
54
+++ b/tests/qemu-iotests/085
55
@@ -XXX,XX +XXX,XX @@ do_blockdev_add()
52
}
56
}
53
57
54
+/*
58
# ${1}: unique identifier for the snapshot filename
55
+ * Variable referenced by @child must live at least until transaction end.
59
-add_snapshot_image()
56
+ * (see bdrv_attach_child_common() doc for details)
60
+create_snapshot_image()
57
+ */
61
{
58
static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
62
base_image="${TEST_DIR}/$((${1}-1))-${snapshot_virt0}"
59
BlockDriverState *child_bs,
63
snapshot_file="${TEST_DIR}/${1}-${snapshot_virt0}"
60
const char *child_name,
64
TEST_IMG=$snapshot_file _make_test_img -u -b "${base_image}" -F $IMGFMT "$size"
61
@@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
65
+}
62
child_role, perm, shared_perm, opaque,
63
&child, tran, errp);
64
if (ret < 0) {
65
- assert(child == NULL);
66
goto out;
67
}
68
69
@@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
70
71
out:
72
tran_finalize(tran, ret);
73
+ /* child is unset on failure by bdrv_attach_child_common_abort() */
74
+ assert((ret < 0) == !child);
75
+
66
+
76
bdrv_unref(child_bs);
67
+# ${1}: unique identifier for the snapshot filename
77
return child;
68
+add_snapshot_image()
69
+{
70
+ snapshot_file="${TEST_DIR}/${1}-${snapshot_virt0}"
71
+ create_snapshot_image "$1"
72
do_blockdev_add "$1" "'backing': null, " "${snapshot_file}"
78
}
73
}
79
@@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
74
80
75
@@ -XXX,XX +XXX,XX @@ _make_test_img -b "${TEST_IMG}.base" -F $IMGFMT "$size"
81
out:
76
do_blockdev_add ${SNAPSHOTS} "" "${TEST_IMG}"
82
tran_finalize(tran, ret);
77
blockdev_snapshot ${SNAPSHOTS} error
83
+ /* child is unset on failure by bdrv_attach_child_common_abort() */
78
84
+ assert((ret < 0) == !child);
79
+echo
85
80
+echo === Invalid command - creating loops ===
86
bdrv_unref(child_bs);
81
+echo
87
82
+
83
+SNAPSHOTS=$((${SNAPSHOTS}+1))
84
+add_snapshot_image ${SNAPSHOTS}
85
+
86
+_send_qemu_cmd $h "{ 'execute': 'blockdev-snapshot',
87
+ 'arguments': { 'node':'snap_${SNAPSHOTS}',
88
+ 'overlay':'snap_${SNAPSHOTS}' }
89
+ }" "error"
90
+
91
+SNAPSHOTS=$((${SNAPSHOTS}+1))
92
+create_snapshot_image ${SNAPSHOTS}
93
+do_blockdev_add ${SNAPSHOTS} "'backing': 'snap_$((${SNAPSHOTS}-1))', " \
94
+ "${TEST_DIR}/${SNAPSHOTS}-${snapshot_virt0}"
95
+
96
+_send_qemu_cmd $h "{ 'execute': 'blockdev-snapshot',
97
+ 'arguments': { 'node':'snap_${SNAPSHOTS}',
98
+ 'overlay':'snap_$((${SNAPSHOTS}-1))' }
99
+ }" "error"
100
+
101
echo
102
echo === Invalid command - The node does not exist ===
103
echo
104
diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out
105
index XXXXXXX..XXXXXXX 100644
106
--- a/tests/qemu-iotests/085.out
107
+++ b/tests/qemu-iotests/085.out
108
@@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/
109
'overlay':'snap_13' } }
110
{"error": {"class": "GenericError", "desc": "The overlay already has a backing image"}}
111
112
+=== Invalid command - creating loops ===
113
+
114
+Formatting 'TEST_DIR/14-snapshot-v0.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/13-snapshot-v0.IMGFMT backing_fmt=IMGFMT
115
+{ 'execute': 'blockdev-add', 'arguments':
116
+ { 'driver': 'IMGFMT', 'node-name': 'snap_14', 'backing': null,
117
+ 'file':
118
+ { 'driver': 'file', 'filename': 'TEST_DIR/14-snapshot-v0.IMGFMT',
119
+ 'node-name': 'file_14' } } }
120
+{"return": {}}
121
+{ 'execute': 'blockdev-snapshot',
122
+ 'arguments': { 'node':'snap_14',
123
+ 'overlay':'snap_14' }
124
+ }
125
+{"error": {"class": "GenericError", "desc": "Making 'snap_14' a backing child of 'snap_14' would create a cycle"}}
126
+Formatting 'TEST_DIR/15-snapshot-v0.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/14-snapshot-v0.IMGFMT backing_fmt=IMGFMT
127
+{ 'execute': 'blockdev-add', 'arguments':
128
+ { 'driver': 'IMGFMT', 'node-name': 'snap_15', 'backing': 'snap_14',
129
+ 'file':
130
+ { 'driver': 'file', 'filename': 'TEST_DIR/15-snapshot-v0.IMGFMT',
131
+ 'node-name': 'file_15' } } }
132
+{"return": {}}
133
+{ 'execute': 'blockdev-snapshot',
134
+ 'arguments': { 'node':'snap_15',
135
+ 'overlay':'snap_14' }
136
+ }
137
+{"error": {"class": "GenericError", "desc": "Making 'snap_15' a backing child of 'snap_14' would create a cycle"}}
138
+
139
=== Invalid command - The node does not exist ===
140
141
{ 'execute': 'blockdev-snapshot',
142
'arguments': { 'node': 'virtio0',
143
- 'overlay':'snap_14' } }
144
-{"error": {"class": "GenericError", "desc": "Cannot find device='snap_14' nor node-name='snap_14'"}}
145
+ 'overlay':'snap_16' } }
146
+{"error": {"class": "GenericError", "desc": "Cannot find device='snap_16' nor node-name='snap_16'"}}
147
{ 'execute': 'blockdev-snapshot',
148
'arguments': { 'node':'nodevice',
149
- 'overlay':'snap_13' }
150
+ 'overlay':'snap_15' }
151
}
152
{"error": {"class": "GenericError", "desc": "Cannot find device='nodevice' nor node-name='nodevice'"}}
153
*** done
88
--
154
--
89
2.30.2
155
2.31.1
90
156
91
157
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Peter Lieven <pl@kamp.de>
2
2
3
Now permissions are updated as follows:
3
the qemu rbd driver currently lacks support for bdrv_co_block_status.
4
1. do graph modifications ignoring permissions
4
This results mainly in incorrect progress during block operations (e.g.
5
2. do permission update
5
qemu-img convert with an rbd image as source).
6
6
7
(of course, we rollback [1] if [2] fails)
7
This patch utilizes the rbd_diff_iterate2 call from librbd to detect
8
allocated and unallocated (all zero areas).
8
9
9
So, on stage [2] we can't say which users are "old" and which are
10
To avoid querying the ceph OSDs for the answer this is only done if
10
"new" and exist only since [1]. And current error message is a bit
11
the image has the fast-diff feature which depends on the object-map and
11
outdated. Let's improve it, to make everything clean.
12
exclusive-lock features. In this case it is guaranteed that the information
13
is present in memory in the librbd client and thus very fast.
12
14
13
While being here, add also a comment and some good assertions.
15
If fast-diff is not available all areas are reported to be allocated
16
which is the current behaviour if bdrv_co_block_status is not implemented.
14
17
15
iotests 283, 307, qsd-jobs outputs are updated.
18
Signed-off-by: Peter Lieven <pl@kamp.de>
16
19
Message-Id: <20211012152231.24868-1-pl@kamp.de>
17
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
20
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
18
Message-Id: <20210601075218.79249-7-vsementsov@virtuozzo.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
21
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
---
22
---
21
block.c | 29 ++++++++++++++++++++-------
23
block/rbd.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++
22
tests/qemu-iotests/283.out | 2 +-
24
1 file changed, 112 insertions(+)
23
tests/qemu-iotests/307.out | 2 +-
24
tests/qemu-iotests/tests/qsd-jobs.out | 2 +-
25
4 files changed, 25 insertions(+), 10 deletions(-)
26
25
27
diff --git a/block.c b/block.c
26
diff --git a/block/rbd.c b/block/rbd.c
28
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
29
--- a/block.c
28
--- a/block/rbd.c
30
+++ b/block.c
29
+++ b/block/rbd.c
31
@@ -XXX,XX +XXX,XX @@ static char *bdrv_child_user_desc(BdrvChild *c)
30
@@ -XXX,XX +XXX,XX @@ typedef struct RBDTask {
32
return c->klass->get_parent_desc(c);
31
int64_t ret;
32
} RBDTask;
33
34
+typedef struct RBDDiffIterateReq {
35
+ uint64_t offs;
36
+ uint64_t bytes;
37
+ bool exists;
38
+} RBDDiffIterateReq;
39
+
40
static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
41
BlockdevOptionsRbd *opts, bool cache,
42
const char *keypairs, const char *secretid,
43
@@ -XXX,XX +XXX,XX @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
44
return spec_info;
33
}
45
}
34
46
35
+/*
47
+/*
36
+ * Check that @a allows everything that @b needs. @a and @b must reference same
48
+ * rbd_diff_iterate2 allows to interrupt the exection by returning a negative
37
+ * child node.
49
+ * value in the callback routine. Choose a value that does not conflict with
50
+ * an existing exitcode and return it if we want to prematurely stop the
51
+ * execution because we detected a change in the allocation status.
38
+ */
52
+ */
39
static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
53
+#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000
54
+
55
+static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
56
+ int exists, void *opaque)
57
+{
58
+ RBDDiffIterateReq *req = opaque;
59
+
60
+ assert(req->offs + req->bytes <= offs);
61
+ /*
62
+ * we do not diff against a snapshot so we should never receive a callback
63
+ * for a hole.
64
+ */
65
+ assert(exists);
66
+
67
+ if (!req->exists && offs > req->offs) {
68
+ /*
69
+ * we started in an unallocated area and hit the first allocated
70
+ * block. req->bytes must be set to the length of the unallocated area
71
+ * before the allocated area. stop further processing.
72
+ */
73
+ req->bytes = offs - req->offs;
74
+ return QEMU_RBD_EXIT_DIFF_ITERATE2;
75
+ }
76
+
77
+ if (req->exists && offs > req->offs + req->bytes) {
78
+ /*
79
+ * we started in an allocated area and jumped over an unallocated area,
80
+ * req->bytes contains the length of the allocated area before the
81
+ * unallocated area. stop further processing.
82
+ */
83
+ return QEMU_RBD_EXIT_DIFF_ITERATE2;
84
+ }
85
+
86
+ req->bytes += len;
87
+ req->exists = true;
88
+
89
+ return 0;
90
+}
91
+
92
+static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
93
+ bool want_zero, int64_t offset,
94
+ int64_t bytes, int64_t *pnum,
95
+ int64_t *map,
96
+ BlockDriverState **file)
97
+{
98
+ BDRVRBDState *s = bs->opaque;
99
+ int status, r;
100
+ RBDDiffIterateReq req = { .offs = offset };
101
+ uint64_t features, flags;
102
+
103
+ assert(offset + bytes <= s->image_size);
104
+
105
+ /* default to all sectors allocated */
106
+ status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
107
+ *map = offset;
108
+ *file = bs;
109
+ *pnum = bytes;
110
+
111
+ /* check if RBD image supports fast-diff */
112
+ r = rbd_get_features(s->image, &features);
113
+ if (r < 0) {
114
+ return status;
115
+ }
116
+ if (!(features & RBD_FEATURE_FAST_DIFF)) {
117
+ return status;
118
+ }
119
+
120
+ /* check if RBD fast-diff result is valid */
121
+ r = rbd_get_flags(s->image, &flags);
122
+ if (r < 0) {
123
+ return status;
124
+ }
125
+ if (flags & RBD_FLAG_FAST_DIFF_INVALID) {
126
+ return status;
127
+ }
128
+
129
+ r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
130
+ qemu_rbd_diff_iterate_cb, &req);
131
+ if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
132
+ return status;
133
+ }
134
+ assert(req.bytes <= bytes);
135
+ if (!req.exists) {
136
+ if (r == 0) {
137
+ /*
138
+ * rbd_diff_iterate2 does not invoke callbacks for unallocated
139
+ * areas. This here catches the case where no callback was
140
+ * invoked at all (req.bytes == 0).
141
+ */
142
+ assert(req.bytes == 0);
143
+ req.bytes = bytes;
144
+ }
145
+ status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
146
+ }
147
+
148
+ *pnum = req.bytes;
149
+ return status;
150
+}
151
+
152
static int64_t qemu_rbd_getlength(BlockDriverState *bs)
40
{
153
{
41
- g_autofree char *user = NULL;
154
BDRVRBDState *s = bs->opaque;
42
- g_autofree char *perm_names = NULL;
155
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_rbd = {
43
+ const char *child_bs_name;
156
#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
44
+ g_autofree char *a_user = NULL;
157
.bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
45
+ g_autofree char *b_user = NULL;
158
#endif
46
+ g_autofree char *perms = NULL;
159
+ .bdrv_co_block_status = qemu_rbd_co_block_status,
47
+
160
48
+ assert(a->bs);
161
.bdrv_snapshot_create = qemu_rbd_snap_create,
49
+ assert(a->bs == b->bs);
162
.bdrv_snapshot_delete = qemu_rbd_snap_remove,
50
51
if ((b->perm & a->shared_perm) == b->perm) {
52
return true;
53
}
54
55
- perm_names = bdrv_perm_names(b->perm & ~a->shared_perm);
56
- user = bdrv_child_user_desc(a);
57
- error_setg(errp, "Conflicts with use by %s as '%s', which does not "
58
- "allow '%s' on %s",
59
- user, a->name, perm_names, bdrv_get_node_name(b->bs));
60
+ child_bs_name = bdrv_get_node_name(b->bs);
61
+ a_user = bdrv_child_user_desc(a);
62
+ b_user = bdrv_child_user_desc(b);
63
+ perms = bdrv_perm_names(b->perm & ~a->shared_perm);
64
+
65
+ error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
66
+ "both required by %s (uses node '%s' as '%s' child) and "
67
+ "unshared by %s (uses node '%s' as '%s' child).",
68
+ child_bs_name, perms,
69
+ b_user, child_bs_name, b->name,
70
+ a_user, child_bs_name, a->name);
71
72
return false;
73
}
74
diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out
75
index XXXXXXX..XXXXXXX 100644
76
--- a/tests/qemu-iotests/283.out
77
+++ b/tests/qemu-iotests/283.out
78
@@ -XXX,XX +XXX,XX @@
79
{"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}}
80
{"return": {}}
81
{"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}}
82
-{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by node 'source' as 'image', which does not allow 'write' on base"}}
83
+{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Permission conflict on node 'base': permissions 'write' are both required by node 'other' (uses node 'base' as 'image' child) and unshared by node 'source' (uses node 'base' as 'image' child)."}}
84
85
=== backup-top should be gone after job-finalize ===
86
87
diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out
88
index XXXXXXX..XXXXXXX 100644
89
--- a/tests/qemu-iotests/307.out
90
+++ b/tests/qemu-iotests/307.out
91
@@ -XXX,XX +XXX,XX @@ exports available: 1
92
93
=== Add a writable export ===
94
{"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}}
95
-{"error": {"class": "GenericError", "desc": "Conflicts with use by block device 'sda' as 'root', which does not allow 'write' on fmt"}}
96
+{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt': permissions 'write' are both required by an unnamed block device (uses node 'fmt' as 'root' child) and unshared by block device 'sda' (uses node 'fmt' as 'root' child)."}}
97
{"execute": "device_del", "arguments": {"id": "sda"}}
98
{"return": {}}
99
{"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
100
diff --git a/tests/qemu-iotests/tests/qsd-jobs.out b/tests/qemu-iotests/tests/qsd-jobs.out
101
index XXXXXXX..XXXXXXX 100644
102
--- a/tests/qemu-iotests/tests/qsd-jobs.out
103
+++ b/tests/qemu-iotests/tests/qsd-jobs.out
104
@@ -XXX,XX +XXX,XX @@ QMP_VERSION
105
{"return": {}}
106
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
107
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
108
-{"error": {"class": "GenericError", "desc": "Conflicts with use by stream job 'job0' as 'intermediate node', which does not allow 'write' on fmt_base"}}
109
+{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt_base': permissions 'write' are both required by an unnamed block device (uses node 'fmt_base' as 'root' child) and unshared by stream job 'job0' (uses node 'fmt_base' as 'intermediate node' child)."}}
110
{"return": {}}
111
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}}
112
*** done
113
--
163
--
114
2.30.2
164
2.31.1
115
165
116
166
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
2
2
3
Don't report successful progress on failure, when call_state->ret is
3
The LBA28 capacity (at offsets 60/61 of identification) is supposed to
4
set.
4
express the maximum size supported by LBA28 commands. If the device is
5
larger than this, we have to cap it to 2^28-1.
5
6
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
At least NetBSD happens to be using this value to determine whether to use
7
Message-Id: <20210528141628.44287-2-vsementsov@virtuozzo.com>
8
LBA28 or LBA48 for its commands, using LBA28 for sectors that don't need
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
LBA48. This commit thus fixes NetBSD access to disks larger than 128GiB.
10
11
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
12
Message-Id: <20210824104344.3878849-1-samuel.thibault@ens-lyon.org>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
14
---
11
block/block-copy.c | 8 +++++---
15
hw/ide/core.c | 8 ++++++--
12
1 file changed, 5 insertions(+), 3 deletions(-)
16
1 file changed, 6 insertions(+), 2 deletions(-)
13
17
14
diff --git a/block/block-copy.c b/block/block-copy.c
18
diff --git a/hw/ide/core.c b/hw/ide/core.c
15
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
16
--- a/block/block-copy.c
20
--- a/hw/ide/core.c
17
+++ b/block/block-copy.c
21
+++ b/hw/ide/core.c
18
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
22
@@ -XXX,XX +XXX,XX @@ static void put_le16(uint16_t *p, unsigned int v)
19
23
static void ide_identify_size(IDEState *s)
20
ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes,
24
{
21
&error_is_read);
25
uint16_t *p = (uint16_t *)s->identify_data;
22
- if (ret < 0 && !t->call_state->ret) {
26
- put_le16(p + 60, s->nb_sectors);
23
- t->call_state->ret = ret;
27
- put_le16(p + 61, s->nb_sectors >> 16);
24
- t->call_state->error_is_read = error_is_read;
28
+ int64_t nb_sectors_lba28 = s->nb_sectors;
25
+ if (ret < 0) {
29
+ if (nb_sectors_lba28 >= 1 << 28) {
26
+ if (!t->call_state->ret) {
30
+ nb_sectors_lba28 = (1 << 28) - 1;
27
+ t->call_state->ret = ret;
31
+ }
28
+ t->call_state->error_is_read = error_is_read;
32
+ put_le16(p + 60, nb_sectors_lba28);
29
+ }
33
+ put_le16(p + 61, nb_sectors_lba28 >> 16);
30
} else {
34
put_le16(p + 100, s->nb_sectors);
31
progress_work_done(t->s->progress, t->bytes);
35
put_le16(p + 101, s->nb_sectors >> 16);
32
}
36
put_le16(p + 102, s->nb_sectors >> 32);
33
--
37
--
34
2.30.2
38
2.31.1
35
39
36
40
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Fabrice Fontaine <fontaine.fabrice@gmail.com>
2
2
3
All child classes have this callback. So, drop unreachable code.
3
Include linux/falloc.h if CONFIG_FALLOCATE_ZERO_RANGE is defined to fix
4
https://gitlab.com/qemu-project/qemu/-/commit/50482fda98bd62e072c30b7ea73c985c4e9d9bbb
5
and avoid the following build failure on musl:
4
6
5
Still add an assertion to bdrv_attach_child_common(), to early detect
7
../block/export/fuse.c: In function 'fuse_fallocate':
6
bad classes.
8
../block/export/fuse.c:643:21: error: 'FALLOC_FL_ZERO_RANGE' undeclared (first use in this function)
9
643 | else if (mode & FALLOC_FL_ZERO_RANGE) {
10
| ^~~~~~~~~~~~~~~~~~~~
7
11
8
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
12
Fixes:
9
Message-Id: <20210601075218.79249-6-vsementsov@virtuozzo.com>
13
- http://autobuild.buildroot.org/results/be24433a429fda681fb66698160132c1c99bc53b
14
15
Fixes: 50482fda98b ("block/export/fuse.c: fix musl build")
16
Signed-off-by: Fabrice Fontaine <fontaine.fabrice@gmail.com>
17
Message-Id: <20211022095209.1319671-1-fontaine.fabrice@gmail.com>
18
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
20
---
12
block.c | 7 ++-----
21
block/export/fuse.c | 4 ++++
13
1 file changed, 2 insertions(+), 5 deletions(-)
22
1 file changed, 4 insertions(+)
14
23
15
diff --git a/block.c b/block.c
24
diff --git a/block/export/fuse.c b/block/export/fuse.c
16
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
17
--- a/block.c
26
--- a/block/export/fuse.c
18
+++ b/block.c
27
+++ b/block/export/fuse.c
19
@@ -XXX,XX +XXX,XX @@ bool bdrv_is_writable(BlockDriverState *bs)
28
@@ -XXX,XX +XXX,XX @@
20
29
#include <fuse.h>
21
static char *bdrv_child_user_desc(BdrvChild *c)
30
#include <fuse_lowlevel.h>
22
{
31
23
- if (c->klass->get_parent_desc) {
32
+#if defined(CONFIG_FALLOCATE_ZERO_RANGE)
24
- return c->klass->get_parent_desc(c);
33
+#include <linux/falloc.h>
25
- }
34
+#endif
26
-
35
+
27
- return g_strdup("another user");
36
#ifdef __linux__
28
+ return c->klass->get_parent_desc(c);
37
#include <linux/fs.h>
29
}
38
#endif
30
31
static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
32
@@ -XXX,XX +XXX,XX @@ static int bdrv_attach_child_common(BlockDriverState *child_bs,
33
34
assert(child);
35
assert(*child == NULL);
36
+ assert(child_class->get_parent_desc);
37
38
new_child = g_new(BdrvChild, 1);
39
*new_child = (BdrvChild) {
40
--
39
--
41
2.30.2
40
2.31.1
42
41
43
42
diff view generated by jsdifflib
1
From: Sergio Lopez <slp@redhat.com>
1
From: Stefano Garzarella <sgarzare@redhat.com>
2
2
3
Allow block backends to poll their devices/users to check if they have
3
Commit d7ddd0a161 ("linux-aio: limit the batch size using
4
been quiesced when entering a drained section.
4
`aio-max-batch` parameter") added a way to limit the batch size
5
of Linux AIO backend for the entire AIO context.
5
6
6
This will be used in the next patch to wait for the NBD server to be
7
The same AIO context can be shared by multiple devices, so
7
completely quiesced.
8
latency-sensitive devices may want to limit the batch size even
9
more to avoid increasing latency.
10
11
For this reason we add the `aio-max-batch` option to the file
12
backend, which will be used by the next commits to limit the size of
13
batches including requests generated by this device.
8
14
9
Suggested-by: Kevin Wolf <kwolf@redhat.com>
15
Suggested-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
16
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
17
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
12
Signed-off-by: Sergio Lopez <slp@redhat.com>
18
Message-Id: <20211026162346.253081-2-sgarzare@redhat.com>
13
Message-Id: <20210602060552.17433-2-slp@redhat.com>
19
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
---
21
---
17
include/sysemu/block-backend.h | 4 ++++
22
qapi/block-core.json | 7 +++++++
18
block/block-backend.c | 7 ++++++-
23
block/file-posix.c | 9 +++++++++
19
2 files changed, 10 insertions(+), 1 deletion(-)
24
2 files changed, 16 insertions(+)
20
25
21
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
26
diff --git a/qapi/block-core.json b/qapi/block-core.json
22
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
23
--- a/include/sysemu/block-backend.h
28
--- a/qapi/block-core.json
24
+++ b/include/sysemu/block-backend.h
29
+++ b/qapi/block-core.json
25
@@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps {
30
@@ -XXX,XX +XXX,XX @@
26
* Runs when the backend's last drain request ends.
31
# for this device (default: none, forward the commands via SG_IO;
27
*/
32
# since 2.11)
28
void (*drained_end)(void *opaque);
33
# @aio: AIO backend (default: threads) (since: 2.8)
29
+ /*
34
+# @aio-max-batch: maximum number of requests to batch together into a single
30
+ * Is the device still busy?
35
+# submission in the AIO backend. The smallest value between
31
+ */
36
+# this and the aio-max-batch value of the IOThread object is
32
+ bool (*drained_poll)(void *opaque);
37
+# chosen.
33
} BlockDevOps;
38
+# 0 means that the AIO backend will handle it automatically.
34
39
+# (default: 0, since 6.2)
35
/* This struct is embedded in (the private) BlockBackend struct and contains
40
# @locking: whether to enable file locking. If set to 'auto', only enable
36
diff --git a/block/block-backend.c b/block/block-backend.c
41
# when Open File Descriptor (OFD) locking API is available
42
# (default: auto, since 2.10)
43
@@ -XXX,XX +XXX,XX @@
44
'*pr-manager': 'str',
45
'*locking': 'OnOffAuto',
46
'*aio': 'BlockdevAioOptions',
47
+ '*aio-max-batch': 'int',
48
'*drop-cache': {'type': 'bool',
49
'if': 'CONFIG_LINUX'},
50
'*x-check-cache-dropped': { 'type': 'bool',
51
diff --git a/block/file-posix.c b/block/file-posix.c
37
index XXXXXXX..XXXXXXX 100644
52
index XXXXXXX..XXXXXXX 100644
38
--- a/block/block-backend.c
53
--- a/block/file-posix.c
39
+++ b/block/block-backend.c
54
+++ b/block/file-posix.c
40
@@ -XXX,XX +XXX,XX @@ static void blk_root_drained_begin(BdrvChild *child)
55
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVRawState {
41
static bool blk_root_drained_poll(BdrvChild *child)
56
uint64_t locked_perm;
42
{
57
uint64_t locked_shared_perm;
43
BlockBackend *blk = child->opaque;
58
44
+ bool busy = false;
59
+ uint64_t aio_max_batch;
45
assert(blk->quiesce_counter);
46
- return !!blk->in_flight;
47
+
60
+
48
+ if (blk->dev_ops && blk->dev_ops->drained_poll) {
61
int perm_change_fd;
49
+ busy = blk->dev_ops->drained_poll(blk->dev_opaque);
62
int perm_change_flags;
50
+ }
63
BDRVReopenState *reopen_state;
51
+ return busy || !!blk->in_flight;
64
@@ -XXX,XX +XXX,XX @@ static QemuOptsList raw_runtime_opts = {
52
}
65
.type = QEMU_OPT_STRING,
53
66
.help = "host AIO implementation (threads, native, io_uring)",
54
static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
67
},
68
+ {
69
+ .name = "aio-max-batch",
70
+ .type = QEMU_OPT_NUMBER,
71
+ .help = "AIO max batch size (0 = auto handled by AIO backend, default: 0)",
72
+ },
73
{
74
.name = "locking",
75
.type = QEMU_OPT_STRING,
76
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
77
s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING);
78
#endif
79
80
+ s->aio_max_batch = qemu_opt_get_number(opts, "aio-max-batch", 0);
81
+
82
locking = qapi_enum_parse(&OnOffAuto_lookup,
83
qemu_opt_get(opts, "locking"),
84
ON_OFF_AUTO_AUTO, &local_err);
55
--
85
--
56
2.30.2
86
2.31.1
57
87
58
88
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Stefano Garzarella <sgarzare@redhat.com>
2
2
3
Instead of keeping additional boolean field, let's store the
3
This new parameter can be used by block devices to limit the
4
information in BDRV_O_RDWR bit of BlockBackendRootState::open_flags.
4
Linux AIO batch size more than the limit set by the AIO context.
5
5
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
file-posix backend supports this, passing its `aio-max-batch` option
7
Message-Id: <20210527154056.70294-4-vsementsov@virtuozzo.com>
7
previously added.
8
9
Add an helper function to calculate the maximum batch size.
10
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
14
Message-Id: <20211026162346.253081-3-sgarzare@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
16
---
10
include/block/block_int.h | 1 -
17
include/block/raw-aio.h | 3 ++-
11
block/block-backend.c | 10 ++--------
18
block/file-posix.c | 3 ++-
12
blockdev.c | 3 +--
19
block/linux-aio.c | 30 ++++++++++++++++++++++--------
13
3 files changed, 3 insertions(+), 11 deletions(-)
20
3 files changed, 26 insertions(+), 10 deletions(-)
14
21
15
diff --git a/include/block/block_int.h b/include/block/block_int.h
22
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
16
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
17
--- a/include/block/block_int.h
24
--- a/include/block/raw-aio.h
18
+++ b/include/block/block_int.h
25
+++ b/include/block/raw-aio.h
19
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
26
@@ -XXX,XX +XXX,XX @@ typedef struct LinuxAioState LinuxAioState;
20
27
LinuxAioState *laio_init(Error **errp);
21
struct BlockBackendRootState {
28
void laio_cleanup(LinuxAioState *s);
22
int open_flags;
29
int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
23
- bool read_only;
30
- uint64_t offset, QEMUIOVector *qiov, int type);
24
BlockdevDetectZeroesOptions detect_zeroes;
31
+ uint64_t offset, QEMUIOVector *qiov, int type,
25
};
32
+ uint64_t dev_max_batch);
26
33
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
27
diff --git a/block/block-backend.c b/block/block-backend.c
34
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
35
void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
36
diff --git a/block/file-posix.c b/block/file-posix.c
28
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
29
--- a/block/block-backend.c
38
--- a/block/file-posix.c
30
+++ b/block/block-backend.c
39
+++ b/block/file-posix.c
31
@@ -XXX,XX +XXX,XX @@ bool blk_supports_write_perm(BlockBackend *blk)
40
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
32
if (bs) {
41
} else if (s->use_linux_aio) {
33
return !bdrv_is_read_only(bs);
42
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
34
} else {
43
assert(qiov->size == bytes);
35
- return !blk->root_state.read_only;
44
- return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
36
+ return blk->root_state.open_flags & BDRV_O_RDWR;
45
+ return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
46
+ s->aio_max_batch);
47
#endif
48
}
49
50
diff --git a/block/linux-aio.c b/block/linux-aio.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/block/linux-aio.c
53
+++ b/block/linux-aio.c
54
@@ -XXX,XX +XXX,XX @@ static void ioq_submit(LinuxAioState *s)
37
}
55
}
38
}
56
}
39
57
40
@@ -XXX,XX +XXX,XX @@ void blk_update_root_state(BlockBackend *blk)
58
+static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch)
41
assert(blk->root);
59
+{
42
60
+ uint64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;
43
blk->root_state.open_flags = blk->root->bs->open_flags;
61
+
44
- blk->root_state.read_only = bdrv_is_read_only(blk->root->bs);
62
+ /*
45
blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
63
+ * AIO context can be shared between multiple block devices, so
64
+ * `dev_max_batch` allows reducing the batch size for latency-sensitive
65
+ * devices.
66
+ */
67
+ max_batch = MIN_NON_ZERO(dev_max_batch, max_batch);
68
+
69
+ /* limit the batch with the number of available events */
70
+ max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch);
71
+
72
+ return max_batch;
73
+}
74
+
75
void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
76
{
77
s->io_q.plugged++;
78
@@ -XXX,XX +XXX,XX @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
46
}
79
}
47
80
48
@@ -XXX,XX +XXX,XX @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
81
static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
49
*/
82
- int type)
50
int blk_get_open_flags_from_root_state(BlockBackend *blk)
83
+ int type, uint64_t dev_max_batch)
51
{
84
{
52
- int bs_flags;
85
LinuxAioState *s = laiocb->ctx;
86
struct iocb *iocbs = &laiocb->iocb;
87
QEMUIOVector *qiov = laiocb->qiov;
88
- int64_t max_batch = s->aio_context->aio_max_batch ?: DEFAULT_MAX_BATCH;
53
-
89
-
54
- bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
90
- /* limit the batch with the number of available events */
55
- bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
91
- max_batch = MIN_NON_ZERO(MAX_EVENTS - s->io_q.in_flight, max_batch);
56
-
92
57
- return bs_flags;
93
switch (type) {
58
+ return blk->root_state.open_flags;
94
case QEMU_AIO_WRITE:
95
@@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
96
s->io_q.in_queue++;
97
if (!s->io_q.blocked &&
98
(!s->io_q.plugged ||
99
- s->io_q.in_queue >= max_batch)) {
100
+ s->io_q.in_queue >= laio_max_batch(s, dev_max_batch))) {
101
ioq_submit(s);
102
}
103
104
@@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
59
}
105
}
60
106
61
BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
107
int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
62
diff --git a/blockdev.c b/blockdev.c
108
- uint64_t offset, QEMUIOVector *qiov, int type)
63
index XXXXXXX..XXXXXXX 100644
109
+ uint64_t offset, QEMUIOVector *qiov, int type,
64
--- a/blockdev.c
110
+ uint64_t dev_max_batch)
65
+++ b/blockdev.c
111
{
66
@@ -XXX,XX +XXX,XX @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
112
int ret;
67
113
struct qemu_laiocb laiocb = {
68
blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
114
@@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
69
blk_rs = blk_get_root_state(blk);
115
.qiov = qiov,
70
- blk_rs->open_flags = bdrv_flags;
116
};
71
- blk_rs->read_only = read_only;
117
72
+ blk_rs->open_flags = bdrv_flags | (read_only ? 0 : BDRV_O_RDWR);
118
- ret = laio_do_submit(fd, &laiocb, offset, type);
73
blk_rs->detect_zeroes = detect_zeroes;
119
+ ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch);
74
120
if (ret < 0) {
75
qobject_unref(bs_opts);
121
return ret;
122
}
76
--
123
--
77
2.30.2
124
2.31.1
78
125
79
126
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Stefano Garzarella <sgarzare@redhat.com>
2
2
3
If fallocate(... FALLOC_FL_ZERO_RANGE ...) returns EINVAL, it's likely
3
Between the submission of a request and the unplug, other devices
4
an indication that the file system is buggy and does not implement
4
with larger limits may have been queued new requests without flushing
5
unaligned accesses right. We still might be lucky with the other
5
the batch.
6
fallback fallocate() calls later in this function, though, so we should
7
not return immediately and try the others first.
8
Since FALLOC_FL_ZERO_RANGE could also return EINVAL if the file descriptor
9
is not a regular file, we ignore this filesystem bug silently, without
10
printing an error message for the user.
11
6
12
Signed-off-by: Thomas Huth <thuth@redhat.com>
7
Using the new `dev_max_batch` parameter, laio_io_unplug() can check
13
Message-Id: <20210527172020.847617-3-thuth@redhat.com>
8
if the batch exceeds the device limit to flush the current batch.
9
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
13
Message-Id: <20211026162346.253081-4-sgarzare@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
15
---
16
block/file-posix.c | 18 +++++++++---------
16
include/block/raw-aio.h | 3 ++-
17
1 file changed, 9 insertions(+), 9 deletions(-)
17
block/file-posix.c | 2 +-
18
block/linux-aio.c | 8 +++++---
19
3 files changed, 8 insertions(+), 5 deletions(-)
18
20
21
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
22
index XXXXXXX..XXXXXXX 100644
23
--- a/include/block/raw-aio.h
24
+++ b/include/block/raw-aio.h
25
@@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
26
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
27
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
28
void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
29
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s);
30
+void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
31
+ uint64_t dev_max_batch);
32
#endif
33
/* io_uring.c - Linux io_uring implementation */
34
#ifdef CONFIG_LINUX_IO_URING
19
diff --git a/block/file-posix.c b/block/file-posix.c
35
diff --git a/block/file-posix.c b/block/file-posix.c
20
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
21
--- a/block/file-posix.c
37
--- a/block/file-posix.c
22
+++ b/block/file-posix.c
38
+++ b/block/file-posix.c
23
@@ -XXX,XX +XXX,XX @@ static int handle_aiocb_write_zeroes(void *opaque)
39
@@ -XXX,XX +XXX,XX @@ static void raw_aio_unplug(BlockDriverState *bs)
24
if (s->has_write_zeroes) {
40
#ifdef CONFIG_LINUX_AIO
25
int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
41
if (s->use_linux_aio) {
26
aiocb->aio_offset, aiocb->aio_nbytes);
42
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
27
- if (ret == -EINVAL) {
43
- laio_io_unplug(bs, aio);
28
- /*
44
+ laio_io_unplug(bs, aio, s->aio_max_batch);
29
- * Allow falling back to pwrite for file systems that
30
- * do not support fallocate() for an unaligned byte range.
31
- */
32
- return -ENOTSUP;
33
- }
34
- if (ret == 0 || ret != -ENOTSUP) {
35
+ if (ret == -ENOTSUP) {
36
+ s->has_write_zeroes = false;
37
+ } else if (ret == 0 || ret != -EINVAL) {
38
return ret;
39
}
40
- s->has_write_zeroes = false;
41
+ /*
42
+ * Note: Some file systems do not like unaligned byte ranges, and
43
+ * return EINVAL in such a case, though they should not do it according
44
+ * to the man-page of fallocate(). Thus we simply ignore this return
45
+ * value and try the other fallbacks instead.
46
+ */
47
}
45
}
48
#endif
46
#endif
49
47
#ifdef CONFIG_LINUX_IO_URING
48
diff --git a/block/linux-aio.c b/block/linux-aio.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/block/linux-aio.c
51
+++ b/block/linux-aio.c
52
@@ -XXX,XX +XXX,XX @@ void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
53
s->io_q.plugged++;
54
}
55
56
-void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
57
+void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
58
+ uint64_t dev_max_batch)
59
{
60
assert(s->io_q.plugged);
61
- if (--s->io_q.plugged == 0 &&
62
- !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
63
+ if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
64
+ (--s->io_q.plugged == 0 &&
65
+ !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
66
ioq_submit(s);
67
}
68
}
50
--
69
--
51
2.30.2
70
2.31.1
52
71
53
72
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Hanna Reitz <hreitz@redhat.com>
2
2
3
We have different types of parents: block nodes, block backends and
3
Similarly to e7e588d432d31ecebc26358e47201dd108db964c, there is a
4
jobs. So, it makes sense to specify type together with name.
4
warning in block/block-backend.c that qiov->size <= INT64_MAX is always
5
true on machines where size_t is narrower than a uint64_t. In said
6
commit, we silenced this warning by casting to uint64_t.
5
7
6
While being here also use g_autofree.
8
The commit introducing this warning here
9
(a93d81c84afa717b0a1a6947524d8d1fbfd6bbf5) anticipated it and so tried
10
to address it the same way. However, it only did so in one of two
11
places where this comparison occurs, and so we still need to fix up the
12
other one.
7
13
8
iotest 307 output is updated.
14
Fixes: a93d81c84afa717b0a1a6947524d8d1fbfd6bbf5
9
15
("block-backend: convert blk_aio_ functions to int64_t bytes
10
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
16
paramter")
11
Reviewed-by: Alberto Garcia <berto@igalia.com>
17
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
12
Message-Id: <20210601075218.79249-3-vsementsov@virtuozzo.com>
18
Message-Id: <20211026090745.30800-1-hreitz@redhat.com>
19
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
21
---
15
block/block-backend.c | 9 ++++-----
22
block/block-backend.c | 2 +-
16
tests/qemu-iotests/307.out | 2 +-
23
1 file changed, 1 insertion(+), 1 deletion(-)
17
2 files changed, 5 insertions(+), 6 deletions(-)
18
24
19
diff --git a/block/block-backend.c b/block/block-backend.c
25
diff --git a/block/block-backend.c b/block/block-backend.c
20
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
21
--- a/block/block-backend.c
27
--- a/block/block-backend.c
22
+++ b/block/block-backend.c
28
+++ b/block/block-backend.c
23
@@ -XXX,XX +XXX,XX @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
29
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
24
static char *blk_root_get_parent_desc(BdrvChild *child)
30
QEMUIOVector *qiov, BdrvRequestFlags flags,
31
BlockCompletionFunc *cb, void *opaque)
25
{
32
{
26
BlockBackend *blk = child->opaque;
33
- assert(qiov->size <= INT64_MAX);
27
- char *dev_id;
34
+ assert((uint64_t)qiov->size <= INT64_MAX);
28
+ g_autofree char *dev_id = NULL;
35
return blk_aio_prwv(blk, offset, qiov->size, qiov,
29
36
blk_aio_write_entry, flags, cb, opaque);
30
if (blk->name) {
31
- return g_strdup(blk->name);
32
+ return g_strdup_printf("block device '%s'", blk->name);
33
}
34
35
dev_id = blk_get_attached_dev_id(blk);
36
if (*dev_id) {
37
- return dev_id;
38
+ return g_strdup_printf("block device '%s'", dev_id);
39
} else {
40
/* TODO Callback into the BB owner for something more detailed */
41
- g_free(dev_id);
42
- return g_strdup("a block device");
43
+ return g_strdup("an unnamed block device");
44
}
45
}
37
}
46
47
diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out
48
index XXXXXXX..XXXXXXX 100644
49
--- a/tests/qemu-iotests/307.out
50
+++ b/tests/qemu-iotests/307.out
51
@@ -XXX,XX +XXX,XX @@ exports available: 1
52
53
=== Add a writable export ===
54
{"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}}
55
-{"error": {"class": "GenericError", "desc": "Conflicts with use by sda as 'root', which does not allow 'write' on fmt"}}
56
+{"error": {"class": "GenericError", "desc": "Conflicts with use by block device 'sda' as 'root', which does not allow 'write' on fmt"}}
57
{"execute": "device_del", "arguments": {"id": "sda"}}
58
{"return": {}}
59
{"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
60
--
38
--
61
2.30.2
39
2.31.1
62
40
63
41
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
We have different types of parents: block nodes, block backends and
4
jobs. So, it makes sense to specify type together with name.
5
6
Next, this handler us used to compose an error message about permission
7
conflict. And permission conflict occurs in a specific place of block
8
graph. We shouldn't report name of parent device (as it refers another
9
place in block graph), but exactly and only the name of the node. So,
10
use bdrv_get_node_name() directly.
11
12
iotest 283 output is updated.
13
14
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
15
Reviewed-by: Alberto Garcia <berto@igalia.com>
16
Message-Id: <20210601075218.79249-4-vsementsov@virtuozzo.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
19
block.c | 2 +-
20
tests/qemu-iotests/283.out | 2 +-
21
2 files changed, 2 insertions(+), 2 deletions(-)
22
23
diff --git a/block.c b/block.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/block.c
26
+++ b/block.c
27
@@ -XXX,XX +XXX,XX @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
28
static char *bdrv_child_get_parent_desc(BdrvChild *c)
29
{
30
BlockDriverState *parent = c->opaque;
31
- return g_strdup(bdrv_get_device_or_node_name(parent));
32
+ return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
33
}
34
35
static void bdrv_child_cb_drained_begin(BdrvChild *child)
36
diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tests/qemu-iotests/283.out
39
+++ b/tests/qemu-iotests/283.out
40
@@ -XXX,XX +XXX,XX @@
41
{"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}}
42
{"return": {}}
43
{"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}}
44
-{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by source as 'image', which does not allow 'write' on base"}}
45
+{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by node 'source' as 'image', which does not allow 'write' on base"}}
46
47
=== backup-top should be gone after job-finalize ===
48
49
--
50
2.30.2
51
52
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Recently we've fixed a crash by adding .get_parent_aio_context handler
4
to child_vvfat_qcow. Now we want it to support .get_parent_desc as
5
well. child_vvfat_qcow wants to implement own .inherit_options, it's
6
not bad. But omitting all other handlers is a bad idea. Let's inherit
7
the class from child_of_bds instead, similar to chain_child_class and
8
detach_by_driver_cb_class in test-bdrv-drain.c.
9
10
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Message-Id: <20210601075218.79249-5-vsementsov@virtuozzo.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
14
block/vvfat.c | 8 +++-----
15
1 file changed, 3 insertions(+), 5 deletions(-)
16
17
diff --git a/block/vvfat.c b/block/vvfat.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/vvfat.c
20
+++ b/block/vvfat.c
21
@@ -XXX,XX +XXX,XX @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format,
22
qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
23
}
24
25
-static const BdrvChildClass child_vvfat_qcow = {
26
- .parent_is_bds = true,
27
- .inherit_options = vvfat_qcow_options,
28
- .get_parent_aio_context = child_of_bds_get_parent_aio_context,
29
-};
30
+static BdrvChildClass child_vvfat_qcow;
31
32
static int enable_write_target(BlockDriverState *bs, Error **errp)
33
{
34
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vvfat = {
35
36
static void bdrv_vvfat_init(void)
37
{
38
+ child_vvfat_qcow = child_of_bds;
39
+ child_vvfat_qcow.inherit_options = vvfat_qcow_options;
40
bdrv_register(&bdrv_vvfat);
41
}
42
43
--
44
2.30.2
45
46
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
Currently we update s->use_copy_range and s->copy_size in
3
Since commit 4d324c0bf65 ("introduce QEMU_AUTO_VFREE") buffers
4
block_copy_do_copy().
4
allocated by qemu_memalign() can automatically freed when using
5
the QEMU_AUTO_VFREE macro. Use it to simplify a bit.
5
6
6
It's not very good:
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
8
Message-Id: <20211006164931.172349-2-philmd@redhat.com>
8
1. block_copy_do_copy() is intended to be a simple function, that wraps
9
bdrv_co_<io> functions for need of block copy. That's why we don't pass
10
BlockCopyTask into it. So, block_copy_do_copy() is bad place for
11
manipulation with generic state of block-copy process
12
13
2. We are going to make block-copy thread-safe. So, it's good to move
14
manipulation with state of block-copy to the places where we'll need
15
critical sections anyway, to not introduce extra synchronization
16
primitives in block_copy_do_copy().
17
18
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
19
Message-Id: <20210528141628.44287-3-vsementsov@virtuozzo.com>
20
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
21
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
22
---
11
---
23
block/block-copy.c | 72 +++++++++++++++++++++++++++++++---------------
12
block/nvme.c | 11 ++++-------
24
1 file changed, 49 insertions(+), 23 deletions(-)
13
1 file changed, 4 insertions(+), 7 deletions(-)
25
14
26
diff --git a/block/block-copy.c b/block/block-copy.c
15
diff --git a/block/nvme.c b/block/nvme.c
27
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
28
--- a/block/block-copy.c
17
--- a/block/nvme.c
29
+++ b/block/block-copy.c
18
+++ b/block/nvme.c
30
@@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyTask {
19
@@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
31
int64_t offset;
32
int64_t bytes;
33
bool zeroes;
34
+ bool copy_range;
35
QLIST_ENTRY(BlockCopyTask) list;
36
CoQueue wait_queue; /* coroutines blocked on this task */
37
} BlockCopyTask;
38
@@ -XXX,XX +XXX,XX @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
39
.call_state = call_state,
40
.offset = offset,
41
.bytes = bytes,
42
+ .copy_range = s->use_copy_range,
43
};
44
qemu_co_queue_init(&task->wait_queue);
45
QLIST_INSERT_HEAD(&s->tasks, task, list);
46
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
47
*
48
* No sync here: nor bitmap neighter intersecting requests handling, only copy.
49
*
50
+ * @copy_range is an in-out argument: if *copy_range is false, copy_range is not
51
+ * done. If *copy_range is true, copy_range is attempted. If the copy_range
52
+ * attempt fails, the function falls back to the usual read+write and
53
+ * *copy_range is set to false. *copy_range and zeroes must not be true
54
+ * simultaneously.
55
+ *
56
* Returns 0 on success.
57
*/
58
static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
59
int64_t offset, int64_t bytes,
60
- bool zeroes, bool *error_is_read)
61
+ bool zeroes, bool *copy_range,
62
+ bool *error_is_read)
63
{
20
{
64
int ret;
21
BDRVNVMeState *s = bs->opaque;
65
int64_t nbytes = MIN(offset + bytes, s->len) - offset;
22
bool ret = false;
66
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
23
- union {
67
assert(offset + bytes <= s->len ||
24
+ QEMU_AUTO_VFREE union {
68
offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
25
NvmeIdCtrl ctrl;
69
assert(nbytes < INT_MAX);
26
NvmeIdNs ns;
70
+ assert(!(*copy_range && zeroes));
27
- } *id;
71
28
+ } *id = NULL;
72
if (zeroes) {
29
NvmeLBAF *lbaf;
73
ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
30
uint16_t oncs;
74
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
31
int r;
75
return ret;
32
@@ -XXX,XX +XXX,XX @@ static bool nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
76
}
33
s->blkshift = lbaf->ds;
77
34
out:
78
- if (s->use_copy_range) {
35
qemu_vfio_dma_unmap(s->vfio, id);
79
+ if (*copy_range) {
36
- qemu_vfree(id);
80
ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
37
81
0, s->write_flags);
82
if (ret < 0) {
83
trace_block_copy_copy_range_fail(s, offset, ret);
84
- s->use_copy_range = false;
85
- s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
86
+ *copy_range = false;
87
/* Fallback to read+write with allocated buffer */
88
} else {
89
- if (s->use_copy_range) {
90
- /*
91
- * Successful copy-range. Now increase copy_size. copy_range
92
- * does not respect max_transfer (it's a TODO), so we factor
93
- * that in here.
94
- *
95
- * Note: we double-check s->use_copy_range for the case when
96
- * parallel block-copy request unsets it during previous
97
- * bdrv_co_copy_range call.
98
- */
99
- s->copy_size =
100
- MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
101
- QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
102
- s->target),
103
- s->cluster_size));
104
- }
105
- goto out;
106
+ return 0;
107
}
108
}
109
110
@@ -XXX,XX +XXX,XX @@ out:
111
return ret;
38
return ret;
112
}
39
}
113
40
@@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
114
+static void block_copy_handle_copy_range_result(BlockCopyState *s,
115
+ bool is_success)
116
+{
117
+ if (!s->use_copy_range) {
118
+ /* already disabled */
119
+ return;
120
+ }
121
+
122
+ if (is_success) {
123
+ /*
124
+ * Successful copy-range. Now increase copy_size. copy_range
125
+ * does not respect max_transfer (it's a TODO), so we factor
126
+ * that in here.
127
+ */
128
+ s->copy_size =
129
+ MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
130
+ QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
131
+ s->target),
132
+ s->cluster_size));
133
+ } else {
134
+ /* Copy-range failed, disable it. */
135
+ s->use_copy_range = false;
136
+ s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
137
+ }
138
+}
139
+
140
static coroutine_fn int block_copy_task_entry(AioTask *task)
141
{
41
{
142
BlockCopyTask *t = container_of(task, BlockCopyTask, task);
42
BDRVNVMeState *s = bs->opaque;
143
bool error_is_read = false;
43
int r;
144
+ bool copy_range = t->copy_range;
44
- uint8_t *buf = NULL;
45
+ QEMU_AUTO_VFREE uint8_t *buf = NULL;
46
QEMUIOVector local_qiov;
47
size_t len = QEMU_ALIGN_UP(bytes, qemu_real_host_page_size);
48
assert(QEMU_IS_ALIGNED(offset, s->page_size));
49
@@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
50
if (!r && !is_write) {
51
qemu_iovec_from_buf(qiov, 0, buf, bytes);
52
}
53
- qemu_vfree(buf);
54
return r;
55
}
56
57
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
58
BDRVNVMeState *s = bs->opaque;
59
NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
60
NVMeRequest *req;
61
- NvmeDsmRange *buf;
62
+ QEMU_AUTO_VFREE NvmeDsmRange *buf = NULL;
63
QEMUIOVector local_qiov;
145
int ret;
64
int ret;
146
65
147
ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes,
66
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
148
- &error_is_read);
67
trace_nvme_dsm_done(s, offset, bytes, ret);
149
+ &copy_range, &error_is_read);
68
out:
150
+ if (t->copy_range) {
69
qemu_iovec_destroy(&local_qiov);
151
+ block_copy_handle_copy_range_result(t->s, copy_range);
70
- qemu_vfree(buf);
152
+ }
71
return ret;
153
if (ret < 0) {
72
154
if (!t->call_state->ret) {
73
}
155
t->call_state->ret = ret;
156
@@ -XXX,XX +XXX,XX @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
157
g_free(task);
158
continue;
159
}
160
- task->zeroes = ret & BDRV_BLOCK_ZERO;
161
+ if (ret & BDRV_BLOCK_ZERO) {
162
+ task->zeroes = true;
163
+ task->copy_range = false;
164
+ }
165
166
if (s->speed) {
167
if (!call_state->ignore_ratelimit) {
168
--
74
--
169
2.30.2
75
2.31.1
170
76
171
77
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
Document that security reports must use 'null-co,read-zeroes=on'
3
For debugging purpose it is helpful to know the CQ/SQ pointers.
4
because otherwise the memory is left uninitialized (which is an
4
We already have a trace event in nvme_free_queue_pair(), extend
5
on-purpose performance feature).
5
it to report these pointer addresses.
6
6
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Message-Id: <20210601162548.2076631-1-philmd@redhat.com>
8
Message-Id: <20211006164931.172349-3-philmd@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
11
---
12
docs/devel/secure-coding-practices.rst | 9 +++++++++
12
block/nvme.c | 2 +-
13
1 file changed, 9 insertions(+)
13
block/trace-events | 2 +-
14
2 files changed, 2 insertions(+), 2 deletions(-)
14
15
15
diff --git a/docs/devel/secure-coding-practices.rst b/docs/devel/secure-coding-practices.rst
16
diff --git a/block/nvme.c b/block/nvme.c
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/docs/devel/secure-coding-practices.rst
18
--- a/block/nvme.c
18
+++ b/docs/devel/secure-coding-practices.rst
19
+++ b/block/nvme.c
19
@@ -XXX,XX +XXX,XX @@ structures and only process the local copy. This prevents
20
@@ -XXX,XX +XXX,XX @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
20
time-of-check-to-time-of-use (TOCTOU) race conditions that could cause QEMU to
21
21
crash when a vCPU thread modifies guest RAM while device emulation is
22
static void nvme_free_queue_pair(NVMeQueuePair *q)
22
processing it.
23
{
23
+
24
- trace_nvme_free_queue_pair(q->index, q);
24
+Use of null-co block drivers
25
+ trace_nvme_free_queue_pair(q->index, q, &q->cq, &q->sq);
25
+----------------------------
26
if (q->completion_bh) {
26
+
27
qemu_bh_delete(q->completion_bh);
27
+The ``null-co`` block driver is designed for performance: its read accesses are
28
}
28
+not initialized by default. In case this driver has to be used for security
29
diff --git a/block/trace-events b/block/trace-events
29
+research, it must be used with the ``read-zeroes=on`` option which fills read
30
index XXXXXXX..XXXXXXX 100644
30
+buffers with zeroes. Security issues reported with the default
31
--- a/block/trace-events
31
+(``read-zeroes=off``) will be discarded.
32
+++ b/block/trace-events
33
@@ -XXX,XX +XXX,XX @@ nvme_dsm_done(void *s, int64_t offset, int64_t bytes, int ret) "s %p offset 0x%"
34
nvme_dma_map_flush(void *s) "s %p"
35
nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u"
36
nvme_create_queue_pair(unsigned q_index, void *q, size_t size, void *aio_context, int fd) "index %u q %p size %zu aioctx %p fd %d"
37
-nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p"
38
+nvme_free_queue_pair(unsigned q_index, void *q, void *cq, void *sq) "index %u q %p cq %p sq %p"
39
nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s %p cmd %p req %p qiov %p entries %d"
40
nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 0x%"PRIx64
41
nvme_cmd_map_qiov_iov(void *s, int i, void *page, int pages) "s %p iov[%d] %p pages %d"
32
--
42
--
33
2.30.2
43
2.31.1
34
44
35
45
diff view generated by jsdifflib
1
From: Sergio Lopez <slp@redhat.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
Before switching between AioContexts we need to make sure that we're
3
Instead of duplicating code, extract the common helper to free
4
fully quiesced ("nb_requests == 0" for every client) when entering the
4
a single queue.
5
drained section.
6
5
7
To do this, we set "quiescing = true" for every client on
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
".drained_begin" to prevent new coroutines from being created, and
7
Message-Id: <20211006164931.172349-4-philmd@redhat.com>
9
check if "nb_requests == 0" on ".drained_poll". Finally, once we're
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
exiting the drained section, on ".drained_end" we set "quiescing =
11
false" and call "nbd_client_receive_next_request()" to resume the
12
processing of new requests.
13
14
With these changes, "blk_aio_attach()" and "blk_aio_detach()" can be
15
reverted to be as simple as they were before f148ae7d36.
16
17
RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1960137
18
Suggested-by: Kevin Wolf <kwolf@redhat.com>
19
Signed-off-by: Sergio Lopez <slp@redhat.com>
20
Message-Id: <20210602060552.17433-3-slp@redhat.com>
21
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
22
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
23
---
10
---
24
nbd/server.c | 82 ++++++++++++++++++++++++++++++++++++++--------------
11
block/nvme.c | 9 +++++++--
25
1 file changed, 61 insertions(+), 21 deletions(-)
12
1 file changed, 7 insertions(+), 2 deletions(-)
26
13
27
diff --git a/nbd/server.c b/nbd/server.c
14
diff --git a/block/nvme.c b/block/nvme.c
28
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
29
--- a/nbd/server.c
16
--- a/block/nvme.c
30
+++ b/nbd/server.c
17
+++ b/block/nvme.c
31
@@ -XXX,XX +XXX,XX @@ static void nbd_request_put(NBDRequestData *req)
18
@@ -XXX,XX +XXX,XX @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
32
g_free(req);
19
return r == 0;
33
34
client->nb_requests--;
35
+
36
+ if (client->quiescing && client->nb_requests == 0) {
37
+ aio_wait_kick();
38
+ }
39
+
40
nbd_client_receive_next_request(client);
41
42
nbd_client_put(client);
43
@@ -XXX,XX +XXX,XX @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
44
QTAILQ_FOREACH(client, &exp->clients, next) {
45
qio_channel_attach_aio_context(client->ioc, ctx);
46
47
+ assert(client->nb_requests == 0);
48
assert(client->recv_coroutine == NULL);
49
assert(client->send_coroutine == NULL);
50
-
51
- if (client->quiescing) {
52
- client->quiescing = false;
53
- nbd_client_receive_next_request(client);
54
- }
55
}
56
}
20
}
57
21
58
-static void nbd_aio_detach_bh(void *opaque)
22
+static void nvme_free_queue(NVMeQueue *q)
59
+static void blk_aio_detach(void *opaque)
23
+{
60
{
24
+ qemu_vfree(q->queue);
61
NBDExport *exp = opaque;
62
NBDClient *client;
63
64
+ trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
65
+
66
QTAILQ_FOREACH(client, &exp->clients, next) {
67
qio_channel_detach_aio_context(client->ioc);
68
+ }
69
+
70
+ exp->common.ctx = NULL;
71
+}
25
+}
72
+
26
+
73
+static void nbd_drained_begin(void *opaque)
27
static void nvme_free_queue_pair(NVMeQueuePair *q)
74
+{
28
{
75
+ NBDExport *exp = opaque;
29
trace_nvme_free_queue_pair(q->index, q, &q->cq, &q->sq);
76
+ NBDClient *client;
30
if (q->completion_bh) {
77
+
31
qemu_bh_delete(q->completion_bh);
78
+ QTAILQ_FOREACH(client, &exp->clients, next) {
79
client->quiescing = true;
80
+ }
81
+}
82
83
- if (client->recv_coroutine) {
84
- if (client->read_yielding) {
85
- qemu_aio_coroutine_enter(exp->common.ctx,
86
- client->recv_coroutine);
87
- } else {
88
- AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL);
89
- }
90
- }
91
+static void nbd_drained_end(void *opaque)
92
+{
93
+ NBDExport *exp = opaque;
94
+ NBDClient *client;
95
96
- if (client->send_coroutine) {
97
- AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL);
98
- }
99
+ QTAILQ_FOREACH(client, &exp->clients, next) {
100
+ client->quiescing = false;
101
+ nbd_client_receive_next_request(client);
102
}
32
}
33
+ nvme_free_queue(&q->sq);
34
+ nvme_free_queue(&q->cq);
35
qemu_vfree(q->prp_list_pages);
36
- qemu_vfree(q->sq.queue);
37
- qemu_vfree(q->cq.queue);
38
qemu_mutex_destroy(&q->lock);
39
g_free(q);
103
}
40
}
104
105
-static void blk_aio_detach(void *opaque)
106
+static bool nbd_drained_poll(void *opaque)
107
{
108
NBDExport *exp = opaque;
109
+ NBDClient *client;
110
111
- trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
112
+ QTAILQ_FOREACH(client, &exp->clients, next) {
113
+ if (client->nb_requests != 0) {
114
+ /*
115
+ * If there's a coroutine waiting for a request on nbd_read_eof()
116
+ * enter it here so we don't depend on the client to wake it up.
117
+ */
118
+ if (client->recv_coroutine != NULL && client->read_yielding) {
119
+ qemu_aio_coroutine_enter(exp->common.ctx,
120
+ client->recv_coroutine);
121
+ }
122
123
- aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp);
124
+ return true;
125
+ }
126
+ }
127
128
- exp->common.ctx = NULL;
129
+ return false;
130
}
131
132
static void nbd_eject_notifier(Notifier *n, void *data)
133
@@ -XXX,XX +XXX,XX @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
134
blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
135
}
136
137
+static const BlockDevOps nbd_block_ops = {
138
+ .drained_begin = nbd_drained_begin,
139
+ .drained_end = nbd_drained_end,
140
+ .drained_poll = nbd_drained_poll,
141
+};
142
+
143
static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
144
Error **errp)
145
{
146
@@ -XXX,XX +XXX,XX @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
147
148
exp->allocation_depth = arg->allocation_depth;
149
150
+ /*
151
+ * We need to inhibit request queuing in the block layer to ensure we can
152
+ * be properly quiesced when entering a drained section, as our coroutines
153
+ * servicing pending requests might enter blk_pread().
154
+ */
155
+ blk_set_disable_request_queuing(blk, true);
156
+
157
blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
158
159
+ blk_set_dev_ops(blk, &nbd_block_ops, exp);
160
+
161
QTAILQ_INSERT_TAIL(&exports, exp, next);
162
163
return 0;
164
@@ -XXX,XX +XXX,XX @@ static void nbd_export_delete(BlockExport *blk_exp)
165
}
166
blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
167
blk_aio_detach, exp);
168
+ blk_set_disable_request_queuing(exp->common.blk, false);
169
}
170
171
for (i = 0; i < exp->nr_export_bitmaps; i++) {
172
--
41
--
173
2.30.2
42
2.31.1
174
43
175
44
diff view generated by jsdifflib