1
The following changes since commit 86f4c7e05b1c44dbe1b329a51f311f10aef6ff34:
1
The following changes since commit 281f327487c9c9b1599f93c589a408bbf4a651b8:
2
2
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20180302' into staging (2018-03-02 14:37:10 +0000)
3
Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.12-pull-request' into staging (2017-12-22 00:11:36 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to 9d9b4b640f9e583ff4b24dc762630945f3ccc16d:
9
for you to fetch changes up to 1a63a907507fbbcfaee3f622907ec244b7eabda8:
10
10
11
Merge remote-tracking branch 'mreitz/tags/pull-block-2018-03-02' into queue-block (2018-03-02 18:45:03 +0100)
11
block: Keep nodes drained between reopen_queue/multiple (2017-12-22 15:05:32 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches
14
Block layer patches
15
15
16
----------------------------------------------------------------
16
----------------------------------------------------------------
17
Alberto Garcia (3):
17
Doug Gale (1):
18
specs/qcow2: Fix documentation of the compressed cluster descriptor
18
nvme: Add tracing
19
docs: document how to use the l2-cache-entry-size parameter
20
qcow2: Replace align_offset() with ROUND_UP()
21
19
22
Anton Nefedov (2):
20
Edgar Kaziakhmedov (1):
23
block: fix write with zero flag set and iovector provided
21
qcow2: get rid of qcow2_backing_read1 routine
24
iotest 033: add misaligned write-zeroes test via truncate
25
22
26
Eric Blake (21):
23
Fam Zheng (2):
27
block: Add .bdrv_co_block_status() callback
24
block: Open backing image in force share mode for size probe
28
nvme: Drop pointless .bdrv_co_get_block_status()
25
block: Remove unused bdrv_requests_pending
29
block: Switch passthrough drivers to .bdrv_co_block_status()
30
file-posix: Switch to .bdrv_co_block_status()
31
gluster: Switch to .bdrv_co_block_status()
32
iscsi: Switch cluster_sectors to byte-based
33
iscsi: Switch iscsi_allocmap_update() to byte-based
34
iscsi: Switch to .bdrv_co_block_status()
35
null: Switch to .bdrv_co_block_status()
36
parallels: Switch to .bdrv_co_block_status()
37
qcow: Switch to .bdrv_co_block_status()
38
qcow2: Switch to .bdrv_co_block_status()
39
qed: Switch to .bdrv_co_block_status()
40
raw: Switch to .bdrv_co_block_status()
41
sheepdog: Switch to .bdrv_co_block_status()
42
vdi: Avoid bitrot of debugging code
43
vdi: Switch to .bdrv_co_block_status()
44
vmdk: Switch to .bdrv_co_block_status()
45
vpc: Switch to .bdrv_co_block_status()
46
vvfat: Switch to .bdrv_co_block_status()
47
block: Drop unused .bdrv_co_get_block_status()
48
26
49
Kevin Wolf (2):
27
John Snow (1):
50
block: test blk_aio_flush() with blk->root == NULL
28
iotests: fix 197 for vpc
51
Merge remote-tracking branch 'mreitz/tags/pull-block-2018-03-02' into queue-block
52
29
53
Max Reitz (4):
30
Kevin Wolf (27):
54
qemu-img: Make resize error message more general
31
block: Formats don't need CONSISTENT_READ with NO_IO
55
block/ssh: Pull ssh_grow_file() from ssh_create()
32
block: Make bdrv_drain_invoke() recursive
56
block/ssh: Make ssh_grow_file() blocking
33
block: Call .drain_begin only once in bdrv_drain_all_begin()
57
block/ssh: Add basic .bdrv_truncate()
34
test-bdrv-drain: Test BlockDriver callbacks for drain
35
block: bdrv_drain_recurse(): Remove unused begin parameter
36
block: Don't wait for requests in bdrv_drain*_end()
37
block: Unify order in drain functions
38
block: Don't acquire AioContext in hmp_qemu_io()
39
block: Document that x-blockdev-change breaks quorum children list
40
block: Assert drain_all is only called from main AioContext
41
block: Make bdrv_drain() driver callbacks non-recursive
42
test-bdrv-drain: Test callback for bdrv_drain
43
test-bdrv-drain: Test bs->quiesce_counter
44
blockjob: Pause job on draining any job BDS
45
test-bdrv-drain: Test drain vs. block jobs
46
block: Don't block_job_pause_all() in bdrv_drain_all()
47
block: Nested drain_end must still call callbacks
48
test-bdrv-drain: Test nested drain sections
49
block: Don't notify parents in drain call chain
50
block: Add bdrv_subtree_drained_begin/end()
51
test-bdrv-drain: Tests for bdrv_subtree_drain
52
test-bdrv-drain: Test behaviour in coroutine context
53
test-bdrv-drain: Recursive draining with multiple parents
54
block: Allow graph changes in subtree drained section
55
test-bdrv-drain: Test graph changes in drained section
56
commit: Simplify reopen of base
57
block: Keep nodes drained between reopen_queue/multiple
58
58
59
Stefan Hajnoczi (6):
59
Thomas Huth (3):
60
aio: rename aio_context_in_iothread() to in_aio_context_home_thread()
60
block: Remove the obsolete -drive boot=on|off parameter
61
block: extract AIO_WAIT_WHILE() from BlockDriverState
61
block: Remove the deprecated -hdachs option
62
block: add BlockBackend->in_flight counter
62
block: Mention -drive cyls/heads/secs/trans/serial/addr in deprecation chapter
63
Revert "IDE: Do not flush empty CDROM drives"
64
block: rename .bdrv_create() to .bdrv_co_create_opts()
65
qcow2: make qcow2_co_create2() a coroutine_fn
66
63
67
docs/interop/qcow2.txt | 16 ++++-
64
qapi/block-core.json | 4 +
68
docs/qcow2-cache.txt | 46 ++++++++++++-
65
block/qcow2.h | 3 -
69
block/qcow2.h | 6 --
66
include/block/block.h | 15 +-
70
include/block/aio-wait.h | 116 ++++++++++++++++++++++++++++++++
67
include/block/block_int.h | 6 +-
71
include/block/aio.h | 7 +-
68
block.c | 75 ++++-
72
include/block/block.h | 54 ++++-----------
69
block/commit.c | 8 +-
73
include/block/block_int.h | 61 ++++++++++-------
70
block/io.c | 164 +++++++---
74
block.c | 11 ++-
71
block/qcow2.c | 51 +--
75
block/blkdebug.c | 20 +++---
72
block/replication.c | 6 +
76
block/block-backend.c | 60 +++++++++++++++--
73
blockdev.c | 11 -
77
block/commit.c | 2 +-
74
blockjob.c | 22 +-
78
block/crypto.c | 8 +--
75
hmp.c | 6 -
79
block/file-posix.c | 79 +++++++++++-----------
76
hw/block/nvme.c | 349 +++++++++++++++++----
80
block/file-win32.c | 5 +-
77
qemu-io-cmds.c | 3 +
81
block/gluster.c | 83 ++++++++++++-----------
78
tests/test-bdrv-drain.c | 651 +++++++++++++++++++++++++++++++++++++++
82
block/io.c | 98 +++++++++++----------------
79
vl.c | 86 +-----
83
block/iscsi.c | 164 ++++++++++++++++++++++++---------------------
80
hw/block/trace-events | 93 ++++++
84
block/mirror.c | 2 +-
81
qemu-doc.texi | 29 +-
85
block/nfs.c | 5 +-
82
qemu-options.hx | 19 +-
86
block/null.c | 23 ++++---
83
tests/Makefile.include | 2 +
87
block/nvme.c | 14 ----
84
tests/qemu-iotests/197 | 4 +
88
block/parallels.c | 28 +++++---
85
tests/qemu-iotests/common.filter | 3 +-
89
block/qcow.c | 32 +++++----
86
22 files changed, 1294 insertions(+), 316 deletions(-)
90
block/qcow2-bitmap.c | 4 +-
87
create mode 100644 tests/test-bdrv-drain.c
91
block/qcow2-cluster.c | 4 +-
92
block/qcow2-refcount.c | 4 +-
93
block/qcow2-snapshot.c | 10 +--
94
block/qcow2.c | 60 +++++++++--------
95
block/qed.c | 82 ++++++++---------------
96
block/raw-format.c | 21 +++---
97
block/rbd.c | 6 +-
98
block/sheepdog.c | 36 +++++-----
99
block/ssh.c | 66 +++++++++++++++---
100
block/throttle.c | 2 +-
101
block/vdi.c | 50 +++++++-------
102
block/vhdx.c | 5 +-
103
block/vmdk.c | 43 +++++-------
104
block/vpc.c | 50 +++++++-------
105
block/vvfat.c | 16 ++---
106
hw/ide/core.c | 10 +--
107
qemu-img.c | 2 +-
108
tests/test-block-backend.c | 82 +++++++++++++++++++++++
109
util/aio-wait.c | 40 +++++++++++
110
tests/Makefile.include | 2 +
111
tests/qemu-iotests/033 | 29 ++++++++
112
tests/qemu-iotests/033.out | 13 ++++
113
util/Makefile.objs | 2 +-
114
47 files changed, 973 insertions(+), 606 deletions(-)
115
create mode 100644 include/block/aio-wait.h
116
create mode 100644 tests/test-block-backend.c
117
create mode 100644 util/aio-wait.c
118
88
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Commit 1f4ad7d fixed 'qemu-img info' for raw images that are currently
2
in use as a mirror target. It is not enough for image formats, though,
3
as these still unconditionally request BLK_PERM_CONSISTENT_READ.
2
4
3
BlockBackend currently relies on BlockDriverState->in_flight to track
5
As this permission is geared towards whether the guest-visible data is
4
requests for blk_drain(). There is a corner case where
6
consistent, and has no impact on whether the metadata is sane, and
5
BlockDriverState->in_flight cannot be used though: blk->root can be NULL
7
'qemu-img info' does not read guest-visible data (except for the raw
6
when there is no medium. This results in a segfault when the NULL
8
format), it makes sense to not require BLK_PERM_CONSISTENT_READ if there
7
pointer is dereferenced.
9
is not going to be any guest I/O performed, regardless of image format.
8
9
Introduce a BlockBackend->in_flight counter for aio requests so it works
10
even when blk->root == NULL.
11
12
Based on a patch by Kevin Wolf <kwolf@redhat.com>.
13
10
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
---
12
---
18
block.c | 2 +-
13
block.c | 6 +++++-
19
block/block-backend.c | 60 +++++++++++++++++++++++++++++++++++++++++++++------
14
1 file changed, 5 insertions(+), 1 deletion(-)
20
2 files changed, 54 insertions(+), 8 deletions(-)
21
15
22
diff --git a/block.c b/block.c
16
diff --git a/block.c b/block.c
23
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
24
--- a/block.c
18
--- a/block.c
25
+++ b/block.c
19
+++ b/block.c
26
@@ -XXX,XX +XXX,XX @@ out:
20
@@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
27
21
assert(role == &child_backing || role == &child_file);
28
AioContext *bdrv_get_aio_context(BlockDriverState *bs)
22
29
{
23
if (!backing) {
30
- return bs->aio_context;
24
+ int flags = bdrv_reopen_get_flags(reopen_queue, bs);
31
+ return bs ? bs->aio_context : qemu_get_aio_context();
32
}
33
34
AioWait *bdrv_get_aio_wait(BlockDriverState *bs)
35
diff --git a/block/block-backend.c b/block/block-backend.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/block/block-backend.c
38
+++ b/block/block-backend.c
39
@@ -XXX,XX +XXX,XX @@ struct BlockBackend {
40
int quiesce_counter;
41
VMChangeStateEntry *vmsh;
42
bool force_allow_inactivate;
43
+
25
+
44
+ /* Number of in-flight aio requests. BlockDriverState also counts
26
/* Apart from the modifications below, the same permissions are
45
+ * in-flight requests but aio requests can exist even when blk->root is
27
* forwarded and left alone as for filters */
46
+ * NULL, so we cannot rely on its counter for that case.
28
bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared,
47
+ * Accessed with atomic ops.
29
@@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
48
+ */
30
49
+ unsigned int in_flight;
31
/* bs->file always needs to be consistent because of the metadata. We
50
+ AioWait wait;
32
* can never allow other users to resize or write to it. */
51
};
33
- perm |= BLK_PERM_CONSISTENT_READ;
52
34
+ if (!(flags & BDRV_O_NO_IO)) {
53
typedef struct BlockBackendAIOCB {
35
+ perm |= BLK_PERM_CONSISTENT_READ;
54
@@ -XXX,XX +XXX,XX @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
36
+ }
55
return bdrv_make_zero(blk->root, flags);
37
shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
56
}
38
} else {
57
39
/* We want consistent read from backing files if the parent needs it.
58
+static void blk_inc_in_flight(BlockBackend *blk)
59
+{
60
+ atomic_inc(&blk->in_flight);
61
+}
62
+
63
+static void blk_dec_in_flight(BlockBackend *blk)
64
+{
65
+ atomic_dec(&blk->in_flight);
66
+ aio_wait_kick(&blk->wait);
67
+}
68
+
69
static void error_callback_bh(void *opaque)
70
{
71
struct BlockBackendAIOCB *acb = opaque;
72
73
- bdrv_dec_in_flight(acb->common.bs);
74
+ blk_dec_in_flight(acb->blk);
75
acb->common.cb(acb->common.opaque, acb->ret);
76
qemu_aio_unref(acb);
77
}
78
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
79
{
80
struct BlockBackendAIOCB *acb;
81
82
- bdrv_inc_in_flight(blk_bs(blk));
83
+ blk_inc_in_flight(blk);
84
acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
85
acb->blk = blk;
86
acb->ret = ret;
87
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
88
static void blk_aio_complete(BlkAioEmAIOCB *acb)
89
{
90
if (acb->has_returned) {
91
- bdrv_dec_in_flight(acb->common.bs);
92
+ blk_dec_in_flight(acb->rwco.blk);
93
acb->common.cb(acb->common.opaque, acb->rwco.ret);
94
qemu_aio_unref(acb);
95
}
96
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
97
BlkAioEmAIOCB *acb;
98
Coroutine *co;
99
100
- bdrv_inc_in_flight(blk_bs(blk));
101
+ blk_inc_in_flight(blk);
102
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
103
acb->rwco = (BlkRwCo) {
104
.blk = blk,
105
@@ -XXX,XX +XXX,XX @@ int blk_flush(BlockBackend *blk)
106
107
void blk_drain(BlockBackend *blk)
108
{
109
- if (blk_bs(blk)) {
110
- bdrv_drain(blk_bs(blk));
111
+ BlockDriverState *bs = blk_bs(blk);
112
+
113
+ if (bs) {
114
+ bdrv_drained_begin(bs);
115
+ }
116
+
117
+ /* We may have -ENOMEDIUM completions in flight */
118
+ AIO_WAIT_WHILE(&blk->wait,
119
+ blk_get_aio_context(blk),
120
+ atomic_mb_read(&blk->in_flight) > 0);
121
+
122
+ if (bs) {
123
+ bdrv_drained_end(bs);
124
}
125
}
126
127
void blk_drain_all(void)
128
{
129
- bdrv_drain_all();
130
+ BlockBackend *blk = NULL;
131
+
132
+ bdrv_drain_all_begin();
133
+
134
+ while ((blk = blk_all_next(blk)) != NULL) {
135
+ AioContext *ctx = blk_get_aio_context(blk);
136
+
137
+ aio_context_acquire(ctx);
138
+
139
+ /* We may have -ENOMEDIUM completions in flight */
140
+ AIO_WAIT_WHILE(&blk->wait, ctx,
141
+ atomic_mb_read(&blk->in_flight) > 0);
142
+
143
+ aio_context_release(ctx);
144
+ }
145
+
146
+ bdrv_drain_all_end();
147
}
148
149
void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
150
--
40
--
151
2.13.6
41
2.13.6
152
42
153
43
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
From: John Snow <jsnow@redhat.com>
2
2
3
libssh2 does not seem to offer real truncation support, so we can only
3
VPC has some difficulty creating geometries of particular size.
4
grow files -- but that is better than nothing.
4
However, we can indeed force it to use a literal one, so let's
5
do that for the sake of test 197, which is testing some specific
6
offsets.
5
7
6
Signed-off-by: Max Reitz <mreitz@redhat.com>
8
Signed-off-by: John Snow <jsnow@redhat.com>
7
Message-id: 20180214204915.7980-4-mreitz@redhat.com
8
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Reviewed-by: Richard W.M. Jones <rjones@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Lukáš Doktor <ldoktor@redhat.com>
11
---
13
---
12
block/ssh.c | 24 ++++++++++++++++++++++++
14
tests/qemu-iotests/197 | 4 ++++
13
1 file changed, 24 insertions(+)
15
tests/qemu-iotests/common.filter | 3 ++-
16
2 files changed, 6 insertions(+), 1 deletion(-)
14
17
15
diff --git a/block/ssh.c b/block/ssh.c
18
diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
19
index XXXXXXX..XXXXXXX 100755
20
--- a/tests/qemu-iotests/197
21
+++ b/tests/qemu-iotests/197
22
@@ -XXX,XX +XXX,XX @@ echo '=== Copy-on-read ==='
23
echo
24
25
# Prep the images
26
+# VPC rounds image sizes to a specific geometry, force a specific size.
27
+if [ "$IMGFMT" = "vpc" ]; then
28
+ IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size")
29
+fi
30
_make_test_img 4G
31
$QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io
32
IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \
33
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
16
index XXXXXXX..XXXXXXX 100644
34
index XXXXXXX..XXXXXXX 100644
17
--- a/block/ssh.c
35
--- a/tests/qemu-iotests/common.filter
18
+++ b/block/ssh.c
36
+++ b/tests/qemu-iotests/common.filter
19
@@ -XXX,XX +XXX,XX @@ static int64_t ssh_getlength(BlockDriverState *bs)
37
@@ -XXX,XX +XXX,XX @@ _filter_img_create()
20
return length;
38
-e "s# log_size=[0-9]\\+##g" \
39
-e "s# refcount_bits=[0-9]\\+##g" \
40
-e "s# key-secret=[a-zA-Z0-9]\\+##g" \
41
- -e "s# iter-time=[0-9]\\+##g"
42
+ -e "s# iter-time=[0-9]\\+##g" \
43
+ -e "s# force_size=\\(on\\|off\\)##g"
21
}
44
}
22
45
23
+static int ssh_truncate(BlockDriverState *bs, int64_t offset,
46
_filter_img_info()
24
+ PreallocMode prealloc, Error **errp)
25
+{
26
+ BDRVSSHState *s = bs->opaque;
27
+
28
+ if (prealloc != PREALLOC_MODE_OFF) {
29
+ error_setg(errp, "Unsupported preallocation mode '%s'",
30
+ PreallocMode_str(prealloc));
31
+ return -ENOTSUP;
32
+ }
33
+
34
+ if (offset < s->attrs.filesize) {
35
+ error_setg(errp, "ssh driver does not support shrinking files");
36
+ return -ENOTSUP;
37
+ }
38
+
39
+ if (offset == s->attrs.filesize) {
40
+ return 0;
41
+ }
42
+
43
+ return ssh_grow_file(s, offset, errp);
44
+}
45
+
46
static BlockDriver bdrv_ssh = {
47
.format_name = "ssh",
48
.protocol_name = "ssh",
49
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_ssh = {
50
.bdrv_co_readv = ssh_co_readv,
51
.bdrv_co_writev = ssh_co_writev,
52
.bdrv_getlength = ssh_getlength,
53
+ .bdrv_truncate = ssh_truncate,
54
.bdrv_co_flush_to_disk = ssh_co_flush,
55
.create_opts = &ssh_create_opts,
56
};
57
--
47
--
58
2.13.6
48
2.13.6
59
49
60
50
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
This change separates bdrv_drain_invoke(), which calls the BlockDriver
2
drain callbacks, from bdrv_drain_recurse(). Instead, the function
3
performs its own recursion now.
2
4
3
If we ever want to offer even rudimentary truncation functionality for
5
One reason for this is that bdrv_drain_recurse() can be called multiple
4
ssh, we should put the respective code into a reusable function.
6
times by bdrv_drain_all_begin(), but the callbacks may only be called
7
once. The separation is necessary to fix this bug.
5
8
6
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
The other reason is that we intend to go to a model where we call all
7
Message-id: 20180214204915.7980-2-mreitz@redhat.com
10
driver callbacks first, and only then start polling. This is not fully
8
Reviewed-by: Eric Blake <eblake@redhat.com>
11
achieved yet with this patch, as bdrv_drain_invoke() contains a
9
Reviewed-by: Richard W.M. Jones <rjones@redhat.com>
12
BDRV_POLL_WHILE() loop for the block driver callbacks, which can still
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
13
call callbacks for any unrelated event. It's a step in this direction
14
anyway.
15
16
Cc: qemu-stable@nongnu.org
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
19
---
12
block/ssh.c | 30 ++++++++++++++++++++++--------
20
block/io.c | 14 +++++++++++---
13
1 file changed, 22 insertions(+), 8 deletions(-)
21
1 file changed, 11 insertions(+), 3 deletions(-)
14
22
15
diff --git a/block/ssh.c b/block/ssh.c
23
diff --git a/block/io.c b/block/io.c
16
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
17
--- a/block/ssh.c
25
--- a/block/io.c
18
+++ b/block/ssh.c
26
+++ b/block/io.c
19
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
27
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
20
return ret;
28
bdrv_wakeup(bs);
21
}
29
}
22
30
23
+static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
31
+/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
24
+{
32
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
25
+ ssize_t ret;
33
{
26
+ char c[1] = { '\0' };
34
+ BdrvChild *child, *tmp;
35
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
36
37
if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
38
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
39
data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
40
bdrv_coroutine_enter(bs, data.co);
41
BDRV_POLL_WHILE(bs, !data.done);
27
+
42
+
28
+ /* offset must be strictly greater than the current size so we do
43
+ QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
29
+ * not overwrite anything */
44
+ bdrv_drain_invoke(child->bs, begin);
30
+ assert(offset > 0 && offset > s->attrs.filesize);
31
+
32
+ libssh2_sftp_seek64(s->sftp_handle, offset - 1);
33
+ ret = libssh2_sftp_write(s->sftp_handle, c, 1);
34
+ if (ret < 0) {
35
+ sftp_error_setg(errp, s, "Failed to grow file");
36
+ return -EIO;
37
+ }
45
+ }
38
+
46
}
39
+ s->attrs.filesize = offset;
47
40
+ return 0;
48
static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
41
+}
49
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
42
+
50
BdrvChild *child, *tmp;
43
static QemuOptsList ssh_create_opts = {
51
bool waited;
44
.name = "ssh-create-opts",
52
45
.head = QTAILQ_HEAD_INITIALIZER(ssh_create_opts.head),
53
- /* Ensure any pending metadata writes are submitted to bs->file. */
46
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts,
54
- bdrv_drain_invoke(bs, begin);
47
int64_t total_size = 0;
55
-
48
QDict *uri_options = NULL;
56
/* Wait for drained requests to finish */
49
BDRVSSHState s;
57
waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
50
- ssize_t r2;
58
51
- char c[1] = { '\0' };
59
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
52
60
bdrv_parent_drained_begin(bs);
53
ssh_state_init(&s);
54
55
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts,
56
}
61
}
57
62
58
if (total_size > 0) {
63
+ bdrv_drain_invoke(bs, true);
59
- libssh2_sftp_seek64(s.sftp_handle, total_size-1);
64
bdrv_drain_recurse(bs, true);
60
- r2 = libssh2_sftp_write(s.sftp_handle, c, 1);
65
}
61
- if (r2 < 0) {
66
62
- sftp_error_setg(errp, &s, "truncate failed");
67
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
63
- ret = -EINVAL;
64
+ ret = ssh_grow_file(&s, total_size, errp);
65
+ if (ret < 0) {
66
goto out;
67
}
68
- s.attrs.filesize = total_size;
69
}
68
}
70
69
71
ret = 0;
70
bdrv_parent_drained_end(bs);
71
+ bdrv_drain_invoke(bs, false);
72
bdrv_drain_recurse(bs, false);
73
aio_enable_external(bdrv_get_aio_context(bs));
74
}
75
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
76
aio_context_acquire(aio_context);
77
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
78
if (aio_context == bdrv_get_aio_context(bs)) {
79
+ /* FIXME Calling this multiple times is wrong */
80
+ bdrv_drain_invoke(bs, true);
81
waited |= bdrv_drain_recurse(bs, true);
82
}
83
}
84
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
85
aio_context_acquire(aio_context);
86
aio_enable_external(aio_context);
87
bdrv_parent_drained_end(bs);
88
+ bdrv_drain_invoke(bs, false);
89
bdrv_drain_recurse(bs, false);
90
aio_context_release(aio_context);
91
}
72
--
92
--
73
2.13.6
93
2.13.6
74
94
75
95
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
bdrv_drain_all_begin() used to call the .bdrv_co_drain_begin() driver
2
callback inside its polling loop. This means that how many times it got
3
called for each node depended on long it had to poll the event loop.
2
4
3
At runtime (that is, during a future ssh_truncate()), the SSH session is
5
This is obviously not right and results in nodes that stay drained even
4
non-blocking. However, ssh_truncate() (or rather, bdrv_truncate() in
6
after bdrv_drain_all_end(), which calls .bdrv_co_drain_begin() once per
5
general) is not a coroutine, so this resize operation needs to block.
7
node.
6
8
7
For ssh_create(), that is fine, too; the session is never set to
9
Fix bdrv_drain_all_begin() to call the callback only once, too.
8
non-blocking anyway.
9
10
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
Cc: qemu-stable@nongnu.org
11
Message-id: 20180214204915.7980-3-mreitz@redhat.com
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Eric Blake <eblake@redhat.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Reviewed-by: Richard W.M. Jones <rjones@redhat.com>
14
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
---
14
---
16
block/ssh.c | 7 +++++++
15
block/io.c | 3 +--
17
1 file changed, 7 insertions(+)
16
1 file changed, 1 insertion(+), 2 deletions(-)
18
17
19
diff --git a/block/ssh.c b/block/ssh.c
18
diff --git a/block/io.c b/block/io.c
20
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
21
--- a/block/ssh.c
20
--- a/block/io.c
22
+++ b/block/ssh.c
21
+++ b/block/io.c
23
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
22
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
24
return ret;
23
aio_context_acquire(aio_context);
25
}
24
bdrv_parent_drained_begin(bs);
26
25
aio_disable_external(aio_context);
27
+/* Note: This is a blocking operation */
26
+ bdrv_drain_invoke(bs, true);
28
static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
27
aio_context_release(aio_context);
29
{
28
30
ssize_t ret;
29
if (!g_slist_find(aio_ctxs, aio_context)) {
31
char c[1] = { '\0' };
30
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
32
+ int was_blocking = libssh2_session_get_blocking(s->session);
31
aio_context_acquire(aio_context);
33
32
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
34
/* offset must be strictly greater than the current size so we do
33
if (aio_context == bdrv_get_aio_context(bs)) {
35
* not overwrite anything */
34
- /* FIXME Calling this multiple times is wrong */
36
assert(offset > 0 && offset > s->attrs.filesize);
35
- bdrv_drain_invoke(bs, true);
37
36
waited |= bdrv_drain_recurse(bs, true);
38
+ libssh2_session_set_blocking(s->session, 1);
37
}
39
+
38
}
40
libssh2_sftp_seek64(s->sftp_handle, offset - 1);
41
ret = libssh2_sftp_write(s->sftp_handle, c, 1);
42
+
43
+ libssh2_session_set_blocking(s->session, was_blocking);
44
+
45
if (ret < 0) {
46
sftp_error_setg(errp, s, "Failed to grow file");
47
return -EIO;
48
--
39
--
49
2.13.6
40
2.13.6
50
41
51
42
diff view generated by jsdifflib
1
This patch adds test cases for the scenario where blk_aio_flush() is
1
This adds a test case that the BlockDriver callbacks for drain are
2
called on a BlockBackend with no root. Calling drain afterwards should
2
called in bdrv_drained_all_begin/end(), and that both of them are called
3
complete the requests with -ENOMEDIUM.
3
exactly once.
4
4
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
8
---
10
tests/test-block-backend.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++
9
tests/test-bdrv-drain.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++
11
tests/Makefile.include | 2 ++
10
tests/Makefile.include | 2 +
12
2 files changed, 84 insertions(+)
11
2 files changed, 139 insertions(+)
13
create mode 100644 tests/test-block-backend.c
12
create mode 100644 tests/test-bdrv-drain.c
14
13
15
diff --git a/tests/test-block-backend.c b/tests/test-block-backend.c
14
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
16
new file mode 100644
15
new file mode 100644
17
index XXXXXXX..XXXXXXX
16
index XXXXXXX..XXXXXXX
18
--- /dev/null
17
--- /dev/null
19
+++ b/tests/test-block-backend.c
18
+++ b/tests/test-bdrv-drain.c
20
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@
21
+/*
20
+/*
22
+ * BlockBackend tests
21
+ * Block node draining tests
23
+ *
22
+ *
24
+ * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com>
23
+ * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com>
25
+ *
24
+ *
26
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
25
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
27
+ * of this software and associated documentation files (the "Software"), to deal
26
+ * of this software and associated documentation files (the "Software"), to deal
...
...
45
+#include "qemu/osdep.h"
44
+#include "qemu/osdep.h"
46
+#include "block/block.h"
45
+#include "block/block.h"
47
+#include "sysemu/block-backend.h"
46
+#include "sysemu/block-backend.h"
48
+#include "qapi/error.h"
47
+#include "qapi/error.h"
49
+
48
+
50
+static void test_drain_aio_error_flush_cb(void *opaque, int ret)
49
+typedef struct BDRVTestState {
50
+ int drain_count;
51
+} BDRVTestState;
52
+
53
+static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
51
+{
54
+{
52
+ bool *completed = opaque;
55
+ BDRVTestState *s = bs->opaque;
53
+
56
+ s->drain_count++;
54
+ g_assert(ret == -ENOMEDIUM);
55
+ *completed = true;
56
+}
57
+}
57
+
58
+
58
+static void test_drain_aio_error(void)
59
+static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
59
+{
60
+{
60
+ BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
61
+ BDRVTestState *s = bs->opaque;
61
+ BlockAIOCB *acb;
62
+ s->drain_count--;
62
+ bool completed = false;
63
+
64
+ acb = blk_aio_flush(blk, test_drain_aio_error_flush_cb, &completed);
65
+ g_assert(acb != NULL);
66
+ g_assert(completed == false);
67
+
68
+ blk_drain(blk);
69
+ g_assert(completed == true);
70
+
71
+ blk_unref(blk);
72
+}
63
+}
73
+
64
+
74
+static void test_drain_all_aio_error(void)
65
+static void bdrv_test_close(BlockDriverState *bs)
75
+{
66
+{
76
+ BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
67
+ BDRVTestState *s = bs->opaque;
68
+ g_assert_cmpint(s->drain_count, >, 0);
69
+}
70
+
71
+static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
72
+ uint64_t offset, uint64_t bytes,
73
+ QEMUIOVector *qiov, int flags)
74
+{
75
+ /* We want this request to stay until the polling loop in drain waits for
76
+ * it to complete. We need to sleep a while as bdrv_drain_invoke() comes
77
+ * first and polls its result, too, but it shouldn't accidentally complete
78
+ * this request yet. */
79
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
80
+
81
+ return 0;
82
+}
83
+
84
+static BlockDriver bdrv_test = {
85
+ .format_name = "test",
86
+ .instance_size = sizeof(BDRVTestState),
87
+
88
+ .bdrv_close = bdrv_test_close,
89
+ .bdrv_co_preadv = bdrv_test_co_preadv,
90
+
91
+ .bdrv_co_drain_begin = bdrv_test_co_drain_begin,
92
+ .bdrv_co_drain_end = bdrv_test_co_drain_end,
93
+};
94
+
95
+static void aio_ret_cb(void *opaque, int ret)
96
+{
97
+ int *aio_ret = opaque;
98
+ *aio_ret = ret;
99
+}
100
+
101
+static void test_drv_cb_drain_all(void)
102
+{
103
+ BlockBackend *blk;
104
+ BlockDriverState *bs;
105
+ BDRVTestState *s;
77
+ BlockAIOCB *acb;
106
+ BlockAIOCB *acb;
78
+ bool completed = false;
107
+ int aio_ret;
79
+
108
+
80
+ acb = blk_aio_flush(blk, test_drain_aio_error_flush_cb, &completed);
109
+ QEMUIOVector qiov;
110
+ struct iovec iov = {
111
+ .iov_base = NULL,
112
+ .iov_len = 0,
113
+ };
114
+ qemu_iovec_init_external(&qiov, &iov, 1);
115
+
116
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
117
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
118
+ &error_abort);
119
+ s = bs->opaque;
120
+ blk_insert_bs(blk, bs, &error_abort);
121
+
122
+ /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
123
+ g_assert_cmpint(s->drain_count, ==, 0);
124
+ bdrv_drain_all_begin();
125
+ g_assert_cmpint(s->drain_count, ==, 1);
126
+ bdrv_drain_all_end();
127
+ g_assert_cmpint(s->drain_count, ==, 0);
128
+
129
+ /* Now do the same while a request is pending */
130
+ aio_ret = -EINPROGRESS;
131
+ acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
81
+ g_assert(acb != NULL);
132
+ g_assert(acb != NULL);
82
+ g_assert(completed == false);
133
+ g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
83
+
134
+
84
+ blk_drain_all();
135
+ g_assert_cmpint(s->drain_count, ==, 0);
85
+ g_assert(completed == true);
136
+ bdrv_drain_all_begin();
137
+ g_assert_cmpint(aio_ret, ==, 0);
138
+ g_assert_cmpint(s->drain_count, ==, 1);
139
+ bdrv_drain_all_end();
140
+ g_assert_cmpint(s->drain_count, ==, 0);
86
+
141
+
142
+ bdrv_unref(bs);
87
+ blk_unref(blk);
143
+ blk_unref(blk);
88
+}
144
+}
89
+
145
+
90
+int main(int argc, char **argv)
146
+int main(int argc, char **argv)
91
+{
147
+{
92
+ bdrv_init();
148
+ bdrv_init();
93
+ qemu_init_main_loop(&error_abort);
149
+ qemu_init_main_loop(&error_abort);
94
+
150
+
95
+ g_test_init(&argc, &argv, NULL);
151
+ g_test_init(&argc, &argv, NULL);
96
+
152
+
97
+ g_test_add_func("/block-backend/drain_aio_error", test_drain_aio_error);
153
+ g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
98
+ g_test_add_func("/block-backend/drain_all_aio_error",
99
+ test_drain_all_aio_error);
100
+
154
+
101
+ return g_test_run();
155
+ return g_test_run();
102
+}
156
+}
103
diff --git a/tests/Makefile.include b/tests/Makefile.include
157
diff --git a/tests/Makefile.include b/tests/Makefile.include
104
index XXXXXXX..XXXXXXX 100644
158
index XXXXXXX..XXXXXXX 100644
105
--- a/tests/Makefile.include
159
--- a/tests/Makefile.include
106
+++ b/tests/Makefile.include
160
+++ b/tests/Makefile.include
107
@@ -XXX,XX +XXX,XX @@ gcov-files-test-hbitmap-y = blockjob.c
161
@@ -XXX,XX +XXX,XX @@ gcov-files-test-thread-pool-y = thread-pool.c
108
check-unit-y += tests/test-bdrv-drain$(EXESUF)
162
gcov-files-test-hbitmap-y = util/hbitmap.c
163
check-unit-y += tests/test-hbitmap$(EXESUF)
164
gcov-files-test-hbitmap-y = blockjob.c
165
+check-unit-y += tests/test-bdrv-drain$(EXESUF)
109
check-unit-y += tests/test-blockjob$(EXESUF)
166
check-unit-y += tests/test-blockjob$(EXESUF)
110
check-unit-y += tests/test-blockjob-txn$(EXESUF)
167
check-unit-y += tests/test-blockjob-txn$(EXESUF)
111
+check-unit-y += tests/test-block-backend$(EXESUF)
112
check-unit-y += tests/test-x86-cpuid$(EXESUF)
168
check-unit-y += tests/test-x86-cpuid$(EXESUF)
113
# all code tested by test-x86-cpuid is inside topology.h
169
@@ -XXX,XX +XXX,XX @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
114
gcov-files-test-x86-cpuid-y =
170
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
115
@@ -XXX,XX +XXX,XX @@ tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
171
tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y)
116
tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y)
172
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
173
+tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y)
117
tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
174
tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
118
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
175
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
119
+tests/test-block-backend$(EXESUF): tests/test-block-backend.o $(test-block-obj-y) $(test-util-obj-y)
120
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
176
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
121
tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y)
122
tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y) $(test-crypto-obj-y)
123
--
177
--
124
2.13.6
178
2.13.6
125
179
126
180
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Now that the bdrv_drain_invoke() calls are pulled up to the callers of
2
bdrv_drain_recurse(), the 'begin' parameter isn't needed any more.
2
3
3
We are gradually moving away from sector-based interfaces, towards
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
byte-based. Update the vpc driver accordingly.
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
block/io.c | 12 ++++++------
8
1 file changed, 6 insertions(+), 6 deletions(-)
5
9
6
Signed-off-by: Eric Blake <eblake@redhat.com>
10
diff --git a/block/io.c b/block/io.c
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
11
block/vpc.c | 45 +++++++++++++++++++++++----------------------
12
1 file changed, 23 insertions(+), 22 deletions(-)
13
14
diff --git a/block/vpc.c b/block/vpc.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/block/vpc.c
12
--- a/block/io.c
17
+++ b/block/vpc.c
13
+++ b/block/io.c
18
@@ -XXX,XX +XXX,XX @@ fail:
14
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
19
return ret;
15
}
20
}
16
}
21
17
22
-static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
18
-static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
23
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
19
+static bool bdrv_drain_recurse(BlockDriverState *bs)
24
+static int coroutine_fn vpc_co_block_status(BlockDriverState *bs,
25
+ bool want_zero,
26
+ int64_t offset, int64_t bytes,
27
+ int64_t *pnum, int64_t *map,
28
+ BlockDriverState **file)
29
{
20
{
30
BDRVVPCState *s = bs->opaque;
21
BdrvChild *child, *tmp;
31
VHDFooter *footer = (VHDFooter*) s->footer_buf;
22
bool waited;
32
- int64_t start, offset;
23
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
33
+ int64_t image_offset;
24
*/
34
bool allocated;
25
bdrv_ref(bs);
35
- int64_t ret;
26
}
36
- int n;
27
- waited |= bdrv_drain_recurse(bs, begin);
37
+ int ret;
28
+ waited |= bdrv_drain_recurse(bs);
38
+ int64_t n;
29
if (in_main_loop) {
39
30
bdrv_unref(bs);
40
if (be32_to_cpu(footer->type) == VHD_FIXED) {
31
}
41
- *pnum = nb_sectors;
32
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
42
+ *pnum = bytes;
43
+ *map = offset;
44
*file = bs->file->bs;
45
- return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
46
- (sector_num << BDRV_SECTOR_BITS);
47
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
48
}
33
}
49
34
50
qemu_co_mutex_lock(&s->lock);
35
bdrv_drain_invoke(bs, true);
51
36
- bdrv_drain_recurse(bs, true);
52
- offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false, NULL);
37
+ bdrv_drain_recurse(bs);
53
- start = offset;
38
}
54
- allocated = (offset != -1);
39
55
+ image_offset = get_image_offset(bs, offset, false, NULL);
40
void bdrv_drained_end(BlockDriverState *bs)
56
+ allocated = (image_offset != -1);
41
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
57
*pnum = 0;
42
58
ret = 0;
43
bdrv_parent_drained_end(bs);
59
44
bdrv_drain_invoke(bs, false);
60
do {
45
- bdrv_drain_recurse(bs, false);
61
/* All sectors in a block are contiguous (without using the bitmap) */
46
+ bdrv_drain_recurse(bs);
62
- n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
47
aio_enable_external(bdrv_get_aio_context(bs));
63
- - sector_num;
48
}
64
- n = MIN(n, nb_sectors);
49
65
+ n = ROUND_UP(offset + 1, s->block_size) - offset;
50
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
66
+ n = MIN(n, bytes);
51
aio_context_acquire(aio_context);
67
52
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
68
*pnum += n;
53
if (aio_context == bdrv_get_aio_context(bs)) {
69
- sector_num += n;
54
- waited |= bdrv_drain_recurse(bs, true);
70
- nb_sectors -= n;
55
+ waited |= bdrv_drain_recurse(bs);
71
+ offset += n;
56
}
72
+ bytes -= n;
57
}
73
/* *pnum can't be greater than one block for allocated
58
aio_context_release(aio_context);
74
* sectors since there is always a bitmap in between. */
59
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
75
if (allocated) {
60
aio_enable_external(aio_context);
76
*file = bs->file->bs;
61
bdrv_parent_drained_end(bs);
77
- ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
62
bdrv_drain_invoke(bs, false);
78
+ *map = image_offset;
63
- bdrv_drain_recurse(bs, false);
79
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
64
+ bdrv_drain_recurse(bs);
80
break;
65
aio_context_release(aio_context);
81
}
66
}
82
- if (nb_sectors == 0) {
83
+ if (bytes == 0) {
84
break;
85
}
86
- offset = get_image_offset(bs, sector_num << BDRV_SECTOR_BITS, false,
87
- NULL);
88
- } while (offset == -1);
89
+ image_offset = get_image_offset(bs, offset, false, NULL);
90
+ } while (image_offset == -1);
91
92
qemu_co_mutex_unlock(&s->lock);
93
return ret;
94
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vpc = {
95
96
.bdrv_co_preadv = vpc_co_preadv,
97
.bdrv_co_pwritev = vpc_co_pwritev,
98
- .bdrv_co_get_block_status = vpc_co_get_block_status,
99
+ .bdrv_co_block_status = vpc_co_block_status,
100
101
.bdrv_get_info = vpc_get_info,
102
67
103
--
68
--
104
2.13.6
69
2.13.6
105
70
106
71
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
The device is drained, so there is no point in waiting for requests at
2
the end of the drained section. Remove the bdrv_drain_recurse() calls
3
there.
2
4
3
We are gradually moving away from sector-based interfaces, towards
5
The bdrv_drain_recurse() calls were introduced in commit 481cad48e5e
4
byte-based. Update the vdi driver accordingly. Note that the
6
in order to call the .bdrv_co_drain_end() driver callback. This is now
5
TODO is already covered (the block layer guarantees bounds of its
7
done by a separate bdrv_drain_invoke() call.
6
requests), and that we can remove the now-unused s->block_sectors.
7
8
8
Signed-off-by: Eric Blake <eblake@redhat.com>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
12
---
13
block/vdi.c | 33 +++++++++++++--------------------
13
block/io.c | 2 --
14
1 file changed, 13 insertions(+), 20 deletions(-)
14
1 file changed, 2 deletions(-)
15
15
16
diff --git a/block/vdi.c b/block/vdi.c
16
diff --git a/block/io.c b/block/io.c
17
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/vdi.c
18
--- a/block/io.c
19
+++ b/block/vdi.c
19
+++ b/block/io.c
20
@@ -XXX,XX +XXX,XX @@ typedef struct {
20
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
21
uint32_t *bmap;
21
22
/* Size of block (bytes). */
22
bdrv_parent_drained_end(bs);
23
uint32_t block_size;
23
bdrv_drain_invoke(bs, false);
24
- /* Size of block (sectors). */
24
- bdrv_drain_recurse(bs);
25
- uint32_t block_sectors;
25
aio_enable_external(bdrv_get_aio_context(bs));
26
/* First sector of block map. */
27
uint32_t bmap_sector;
28
/* VDI header (converted to host endianness). */
29
@@ -XXX,XX +XXX,XX @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
30
bs->total_sectors = header.disk_size / SECTOR_SIZE;
31
32
s->block_size = header.block_size;
33
- s->block_sectors = header.block_size / SECTOR_SIZE;
34
s->bmap_sector = header.offset_bmap / SECTOR_SIZE;
35
s->header = header;
36
37
@@ -XXX,XX +XXX,XX @@ static int vdi_reopen_prepare(BDRVReopenState *state,
38
return 0;
39
}
26
}
40
27
41
-static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
28
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
42
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
29
aio_enable_external(aio_context);
43
+static int coroutine_fn vdi_co_block_status(BlockDriverState *bs,
30
bdrv_parent_drained_end(bs);
44
+ bool want_zero,
31
bdrv_drain_invoke(bs, false);
45
+ int64_t offset, int64_t bytes,
32
- bdrv_drain_recurse(bs);
46
+ int64_t *pnum, int64_t *map,
33
aio_context_release(aio_context);
47
+ BlockDriverState **file)
48
{
49
- /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
50
BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
51
- size_t bmap_index = sector_num / s->block_sectors;
52
- size_t sector_in_block = sector_num % s->block_sectors;
53
- int n_sectors = s->block_sectors - sector_in_block;
54
+ size_t bmap_index = offset / s->block_size;
55
+ size_t index_in_block = offset % s->block_size;
56
uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]);
57
- uint64_t offset;
58
int result;
59
60
- logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum);
61
- if (n_sectors > nb_sectors) {
62
- n_sectors = nb_sectors;
63
- }
64
- *pnum = n_sectors;
65
+ logout("%p, %" PRId64 ", %" PRId64 ", %p\n", bs, offset, bytes, pnum);
66
+ *pnum = MIN(s->block_size - index_in_block, bytes);
67
result = VDI_IS_ALLOCATED(bmap_entry);
68
if (!result) {
69
return 0;
70
}
34
}
71
35
72
- offset = s->header.offset_data +
73
- (uint64_t)bmap_entry * s->block_size +
74
- sector_in_block * SECTOR_SIZE;
75
+ *map = s->header.offset_data + (uint64_t)bmap_entry * s->block_size +
76
+ index_in_block;
77
*file = bs->file->bs;
78
- return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
79
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
80
}
81
82
static int coroutine_fn
83
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vdi = {
84
.bdrv_child_perm = bdrv_format_default_perms,
85
.bdrv_create = vdi_create,
86
.bdrv_has_zero_init = bdrv_has_zero_init_1,
87
- .bdrv_co_get_block_status = vdi_co_get_block_status,
88
+ .bdrv_co_block_status = vdi_co_block_status,
89
.bdrv_make_empty = vdi_make_empty,
90
91
.bdrv_co_preadv = vdi_co_preadv,
92
--
36
--
93
2.13.6
37
2.13.6
94
38
95
39
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Drain requests are propagated to child nodes, parent nodes and directly
2
to the AioContext. The order in which this happened was different
3
between all combinations of drain/drain_all and begin/end.
2
4
3
We are gradually moving away from sector-based interfaces, towards
5
The correct order is to keep children only drained when their parents
4
byte-based. Update the iscsi driver accordingly. In this case,
6
are also drained. This means that at the start of a drained section, the
5
it is handy to teach iscsi_co_block_status() to handle a NULL map
7
AioContext needs to be drained first, the parents second and only then
6
and file parameter, even though the block layer passes non-NULL
8
the children. The correct order for the end of a drained section is the
7
values, because we also call the function directly. For now, there
9
opposite.
8
are no optimizations done based on the want_zero flag.
9
10
10
We can also make the simplification of asserting that the block
11
This patch changes the three other functions to follow the example of
11
layer passed in aligned values.
12
bdrv_drained_begin(), which is the only one that got it right.
12
13
13
Signed-off-by: Eric Blake <eblake@redhat.com>
14
Reviewed-by: Fam Zheng <famz@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
16
---
17
block/iscsi.c | 69 ++++++++++++++++++++++++++++-------------------------------
17
block/io.c | 12 ++++++++----
18
1 file changed, 33 insertions(+), 36 deletions(-)
18
1 file changed, 8 insertions(+), 4 deletions(-)
19
19
20
diff --git a/block/iscsi.c b/block/iscsi.c
20
diff --git a/block/io.c b/block/io.c
21
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
22
--- a/block/iscsi.c
22
--- a/block/io.c
23
+++ b/block/iscsi.c
23
+++ b/block/io.c
24
@@ -XXX,XX +XXX,XX @@ out_unlock:
24
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
25
25
return;
26
27
28
-static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
29
- int64_t sector_num,
30
- int nb_sectors, int *pnum,
31
- BlockDriverState **file)
32
+static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs,
33
+ bool want_zero, int64_t offset,
34
+ int64_t bytes, int64_t *pnum,
35
+ int64_t *map,
36
+ BlockDriverState **file)
37
{
38
IscsiLun *iscsilun = bs->opaque;
39
struct scsi_get_lba_status *lbas = NULL;
40
struct scsi_lba_status_descriptor *lbasd = NULL;
41
struct IscsiTask iTask;
42
uint64_t lba;
43
- int64_t ret;
44
+ int ret;
45
46
iscsi_co_init_iscsitask(iscsilun, &iTask);
47
48
- if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
49
- ret = -EINVAL;
50
- goto out;
51
- }
52
+ assert(QEMU_IS_ALIGNED(offset | bytes, iscsilun->block_size));
53
54
/* default to all sectors allocated */
55
- ret = BDRV_BLOCK_DATA;
56
- ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
57
- *pnum = nb_sectors;
58
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
59
+ if (map) {
60
+ *map = offset;
61
+ }
62
+ *pnum = bytes;
63
64
/* LUN does not support logical block provisioning */
65
if (!iscsilun->lbpme) {
66
goto out;
67
}
26
}
68
27
69
- lba = sector_qemu2lun(sector_num, iscsilun);
28
+ /* Stop things in parent-to-child order */
70
+ lba = offset / iscsilun->block_size;
29
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
71
30
aio_disable_external(bdrv_get_aio_context(bs));
72
qemu_mutex_lock(&iscsilun->mutex);
31
bdrv_parent_drained_begin(bs);
73
retry:
32
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
74
@@ -XXX,XX +XXX,XX @@ retry:
33
return;
75
76
lbasd = &lbas->descriptors[0];
77
78
- if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
79
+ if (lba != lbasd->lba) {
80
ret = -EIO;
81
goto out_unlock;
82
}
34
}
83
35
84
- *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
36
- bdrv_parent_drained_end(bs);
85
+ *pnum = lbasd->num_blocks * iscsilun->block_size;
37
+ /* Re-enable things in child-to-parent order */
86
38
bdrv_drain_invoke(bs, false);
87
if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
39
+ bdrv_parent_drained_end(bs);
88
lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
40
aio_enable_external(bdrv_get_aio_context(bs));
89
@@ -XXX,XX +XXX,XX @@ retry:
41
}
42
43
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
44
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
45
AioContext *aio_context = bdrv_get_aio_context(bs);
46
47
+ /* Stop things in parent-to-child order */
48
aio_context_acquire(aio_context);
49
- bdrv_parent_drained_begin(bs);
50
aio_disable_external(aio_context);
51
+ bdrv_parent_drained_begin(bs);
52
bdrv_drain_invoke(bs, true);
53
aio_context_release(aio_context);
54
55
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
56
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
57
AioContext *aio_context = bdrv_get_aio_context(bs);
58
59
+ /* Re-enable things in child-to-parent order */
60
aio_context_acquire(aio_context);
61
- aio_enable_external(aio_context);
62
- bdrv_parent_drained_end(bs);
63
bdrv_drain_invoke(bs, false);
64
+ bdrv_parent_drained_end(bs);
65
+ aio_enable_external(aio_context);
66
aio_context_release(aio_context);
90
}
67
}
91
68
92
if (ret & BDRV_BLOCK_ZERO) {
93
- iscsi_allocmap_set_unallocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
94
- *pnum * BDRV_SECTOR_SIZE);
95
+ iscsi_allocmap_set_unallocated(iscsilun, offset, *pnum);
96
} else {
97
- iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
98
- *pnum * BDRV_SECTOR_SIZE);
99
+ iscsi_allocmap_set_allocated(iscsilun, offset, *pnum);
100
}
101
102
- if (*pnum > nb_sectors) {
103
- *pnum = nb_sectors;
104
+ if (*pnum > bytes) {
105
+ *pnum = bytes;
106
}
107
out_unlock:
108
qemu_mutex_unlock(&iscsilun->mutex);
109
@@ -XXX,XX +XXX,XX @@ out:
110
if (iTask.task != NULL) {
111
scsi_free_scsi_task(iTask.task);
112
}
113
- if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
114
+ if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) {
115
*file = bs;
116
}
117
return ret;
118
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
119
nb_sectors * BDRV_SECTOR_SIZE) &&
120
!iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
121
nb_sectors * BDRV_SECTOR_SIZE)) {
122
- int pnum;
123
- BlockDriverState *file;
124
+ int64_t pnum;
125
/* check the block status from the beginning of the cluster
126
* containing the start sector */
127
- int cluster_sectors = iscsilun->cluster_size >> BDRV_SECTOR_BITS;
128
- int head;
129
- int64_t ret;
130
-
131
- assert(cluster_sectors);
132
- head = sector_num % cluster_sectors;
133
- ret = iscsi_co_get_block_status(bs, sector_num - head,
134
- BDRV_REQUEST_MAX_SECTORS, &pnum,
135
- &file);
136
+ int64_t head;
137
+ int ret;
138
+
139
+ assert(iscsilun->cluster_size);
140
+ head = (sector_num * BDRV_SECTOR_SIZE) % iscsilun->cluster_size;
141
+ ret = iscsi_co_block_status(bs, true,
142
+ sector_num * BDRV_SECTOR_SIZE - head,
143
+ BDRV_REQUEST_MAX_BYTES, &pnum, NULL, NULL);
144
if (ret < 0) {
145
return ret;
146
}
147
/* if the whole request falls into an unallocated area we can avoid
148
* reading and directly return zeroes instead */
149
- if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors + head) {
150
+ if (ret & BDRV_BLOCK_ZERO &&
151
+ pnum >= nb_sectors * BDRV_SECTOR_SIZE + head) {
152
qemu_iovec_memset(iov, 0, 0x00, iov->size);
153
return 0;
154
}
155
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iscsi = {
156
.bdrv_truncate = iscsi_truncate,
157
.bdrv_refresh_limits = iscsi_refresh_limits,
158
159
- .bdrv_co_get_block_status = iscsi_co_get_block_status,
160
+ .bdrv_co_block_status = iscsi_co_block_status,
161
.bdrv_co_pdiscard = iscsi_co_pdiscard,
162
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
163
.bdrv_co_readv = iscsi_co_readv,
164
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iser = {
165
.bdrv_truncate = iscsi_truncate,
166
.bdrv_refresh_limits = iscsi_refresh_limits,
167
168
- .bdrv_co_get_block_status = iscsi_co_get_block_status,
169
+ .bdrv_co_block_status = iscsi_co_block_status,
170
.bdrv_co_pdiscard = iscsi_co_pdiscard,
171
.bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
172
.bdrv_co_readv = iscsi_co_readv,
173
--
69
--
174
2.13.6
70
2.13.6
175
71
176
72
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Commit 15afd94a047 added code to acquire and release the AioContext in
2
qemuio_command(). This means that the lock is taken twice now in the
3
call path from hmp_qemu_io(). This causes BDRV_POLL_WHILE() to hang for
4
any requests issued to nodes in a non-mainloop AioContext.
2
5
3
We are gradually moving away from sector-based interfaces, towards
6
Dropping the first locking from hmp_qemu_io() fixes the problem.
4
byte-based. Update the sheepdog driver accordingly.
5
7
6
Signed-off-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Reviewed-by: Jeff Cody <jcody@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
10
---
12
block/sheepdog.c | 26 +++++++++++++-------------
11
hmp.c | 6 ------
13
1 file changed, 13 insertions(+), 13 deletions(-)
12
1 file changed, 6 deletions(-)
14
13
15
diff --git a/block/sheepdog.c b/block/sheepdog.c
14
diff --git a/hmp.c b/hmp.c
16
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
17
--- a/block/sheepdog.c
16
--- a/hmp.c
18
+++ b/block/sheepdog.c
17
+++ b/hmp.c
19
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
18
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
20
return acb.ret;
21
}
22
23
-static coroutine_fn int64_t
24
-sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
25
- int *pnum, BlockDriverState **file)
26
+static coroutine_fn int
27
+sd_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
28
+ int64_t bytes, int64_t *pnum, int64_t *map,
29
+ BlockDriverState **file)
30
{
19
{
31
BDRVSheepdogState *s = bs->opaque;
20
BlockBackend *blk;
32
SheepdogInode *inode = &s->inode;
21
BlockBackend *local_blk = NULL;
33
uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
22
- AioContext *aio_context;
34
- uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
23
const char* device = qdict_get_str(qdict, "device");
35
unsigned long start = offset / object_size,
24
const char* command = qdict_get_str(qdict, "command");
36
- end = DIV_ROUND_UP((sector_num + nb_sectors) *
25
Error *err = NULL;
37
- BDRV_SECTOR_SIZE, object_size);
26
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
38
+ end = DIV_ROUND_UP(offset + bytes, object_size);
39
unsigned long idx;
40
- int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
41
+ *map = offset;
42
+ int ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
43
44
for (idx = start; idx < end; idx++) {
45
if (inode->data_vdi_id[idx] == 0) {
46
@@ -XXX,XX +XXX,XX @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
47
}
27
}
48
}
28
}
49
29
50
- *pnum = (idx - start) * object_size / BDRV_SECTOR_SIZE;
30
- aio_context = blk_get_aio_context(blk);
51
- if (*pnum > nb_sectors) {
31
- aio_context_acquire(aio_context);
52
- *pnum = nb_sectors;
32
-
53
+ *pnum = (idx - start) * object_size;
33
/*
54
+ if (*pnum > bytes) {
34
* Notably absent: Proper permission management. This is sad, but it seems
55
+ *pnum = bytes;
35
* almost impossible to achieve without changing the semantics and thereby
56
}
36
@@ -XXX,XX +XXX,XX @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
57
if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
37
*/
58
*file = bs;
38
qemuio_command(blk, command);
59
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog = {
39
60
.bdrv_co_writev = sd_co_writev,
40
- aio_context_release(aio_context);
61
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
41
-
62
.bdrv_co_pdiscard = sd_co_pdiscard,
42
fail:
63
- .bdrv_co_get_block_status = sd_co_get_block_status,
43
blk_unref(local_blk);
64
+ .bdrv_co_block_status = sd_co_block_status,
44
hmp_handle_error(mon, &err);
65
66
.bdrv_snapshot_create = sd_snapshot_create,
67
.bdrv_snapshot_goto = sd_snapshot_goto,
68
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_tcp = {
69
.bdrv_co_writev = sd_co_writev,
70
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
71
.bdrv_co_pdiscard = sd_co_pdiscard,
72
- .bdrv_co_get_block_status = sd_co_get_block_status,
73
+ .bdrv_co_block_status = sd_co_block_status,
74
75
.bdrv_snapshot_create = sd_snapshot_create,
76
.bdrv_snapshot_goto = sd_snapshot_goto,
77
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_unix = {
78
.bdrv_co_writev = sd_co_writev,
79
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
80
.bdrv_co_pdiscard = sd_co_pdiscard,
81
- .bdrv_co_get_block_status = sd_co_get_block_status,
82
+ .bdrv_co_block_status = sd_co_block_status,
83
84
.bdrv_snapshot_create = sd_snapshot_create,
85
.bdrv_snapshot_goto = sd_snapshot_goto,
86
--
45
--
87
2.13.6
46
2.13.6
88
47
89
48
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>
2
2
3
The align_offset() function is equivalent to the ROUND_UP() macro so
3
Since bdrv_co_preadv does all neccessary checks including
4
there's no need to use the former. The ROUND_UP() name is also a bit
4
reading after the end of the backing file, avoid duplication
5
more explicit.
5
of verification before bdrv_co_preadv call.
6
6
7
This patch uses ROUND_UP() instead of the slower QEMU_ALIGN_UP()
7
Signed-off-by: Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com>
8
because align_offset() already requires that the second parameter is a
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
power of two.
10
11
Signed-off-by: Alberto Garcia <berto@igalia.com>
12
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
13
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Message-id: 20180215131008.5153-1-berto@igalia.com
15
Signed-off-by: Max Reitz <mreitz@redhat.com>
16
---
11
---
17
block/qcow2.h | 6 ------
12
block/qcow2.h | 3 ---
18
block/qcow2-bitmap.c | 4 ++--
13
block/qcow2.c | 51 ++++++++-------------------------------------------
19
block/qcow2-cluster.c | 4 ++--
14
2 files changed, 8 insertions(+), 46 deletions(-)
20
block/qcow2-refcount.c | 4 ++--
21
block/qcow2-snapshot.c | 10 +++++-----
22
block/qcow2.c | 14 +++++++-------
23
6 files changed, 18 insertions(+), 24 deletions(-)
24
15
25
diff --git a/block/qcow2.h b/block/qcow2.h
16
diff --git a/block/qcow2.h b/block/qcow2.h
26
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
27
--- a/block/qcow2.h
18
--- a/block/qcow2.h
28
+++ b/block/qcow2.h
19
+++ b/block/qcow2.h
29
@@ -XXX,XX +XXX,XX @@ static inline int offset_to_l2_slice_index(BDRVQcow2State *s, int64_t offset)
20
@@ -XXX,XX +XXX,XX @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset)
30
return (offset >> s->cluster_bits) & (s->l2_slice_size - 1);
31
}
21
}
32
22
33
-static inline int64_t align_offset(int64_t offset, int n)
23
/* qcow2.c functions */
34
-{
24
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
35
- offset = (offset + n - 1) & ~(n - 1);
25
- int64_t sector_num, int nb_sectors);
36
- return offset;
37
-}
38
-
26
-
39
static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s)
27
int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
40
{
28
int refcount_order, bool generous_increase,
41
return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
29
uint64_t *refblock_count);
42
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/block/qcow2-bitmap.c
45
+++ b/block/qcow2-bitmap.c
46
@@ -XXX,XX +XXX,XX @@ static inline void bitmap_dir_entry_to_be(Qcow2BitmapDirEntry *entry)
47
48
static inline int calc_dir_entry_size(size_t name_size, size_t extra_data_size)
49
{
50
- return align_offset(sizeof(Qcow2BitmapDirEntry) +
51
- name_size + extra_data_size, 8);
52
+ int size = sizeof(Qcow2BitmapDirEntry) + name_size + extra_data_size;
53
+ return ROUND_UP(size, 8);
54
}
55
56
static inline int dir_entry_size(Qcow2BitmapDirEntry *entry)
57
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/block/qcow2-cluster.c
60
+++ b/block/qcow2-cluster.c
61
@@ -XXX,XX +XXX,XX @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
62
63
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
64
new_l1_table = qemu_try_blockalign(bs->file->bs,
65
- align_offset(new_l1_size2, 512));
66
+ ROUND_UP(new_l1_size2, 512));
67
if (new_l1_table == NULL) {
68
return -ENOMEM;
69
}
70
- memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
71
+ memset(new_l1_table, 0, ROUND_UP(new_l1_size2, 512));
72
73
if (s->l1_size) {
74
memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
75
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
76
index XXXXXXX..XXXXXXX 100644
77
--- a/block/qcow2-refcount.c
78
+++ b/block/qcow2-refcount.c
79
@@ -XXX,XX +XXX,XX @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
80
* l1_table_offset when it is the current s->l1_table_offset! Be careful
81
* when changing this! */
82
if (l1_table_offset != s->l1_table_offset) {
83
- l1_table = g_try_malloc0(align_offset(l1_size2, 512));
84
+ l1_table = g_try_malloc0(ROUND_UP(l1_size2, 512));
85
if (l1_size2 && l1_table == NULL) {
86
ret = -ENOMEM;
87
goto fail;
88
@@ -XXX,XX +XXX,XX @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
89
}
90
91
/* align range to test to cluster boundaries */
92
- size = align_offset(offset_into_cluster(s, offset) + size, s->cluster_size);
93
+ size = ROUND_UP(offset_into_cluster(s, offset) + size, s->cluster_size);
94
offset = start_of_cluster(s, offset);
95
96
if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
97
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/block/qcow2-snapshot.c
100
+++ b/block/qcow2-snapshot.c
101
@@ -XXX,XX +XXX,XX @@ int qcow2_read_snapshots(BlockDriverState *bs)
102
103
for(i = 0; i < s->nb_snapshots; i++) {
104
/* Read statically sized part of the snapshot header */
105
- offset = align_offset(offset, 8);
106
+ offset = ROUND_UP(offset, 8);
107
ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
108
if (ret < 0) {
109
goto fail;
110
@@ -XXX,XX +XXX,XX @@ static int qcow2_write_snapshots(BlockDriverState *bs)
111
offset = 0;
112
for(i = 0; i < s->nb_snapshots; i++) {
113
sn = s->snapshots + i;
114
- offset = align_offset(offset, 8);
115
+ offset = ROUND_UP(offset, 8);
116
offset += sizeof(h);
117
offset += sizeof(extra);
118
offset += strlen(sn->id_str);
119
@@ -XXX,XX +XXX,XX @@ static int qcow2_write_snapshots(BlockDriverState *bs)
120
assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
121
h.id_str_size = cpu_to_be16(id_str_size);
122
h.name_size = cpu_to_be16(name_size);
123
- offset = align_offset(offset, 8);
124
+ offset = ROUND_UP(offset, 8);
125
126
ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
127
if (ret < 0) {
128
@@ -XXX,XX +XXX,XX @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
129
/* The VM state isn't needed any more in the active L1 table; in fact, it
130
* hurts by causing expensive COW for the next snapshot. */
131
qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
132
- align_offset(sn->vm_state_size, s->cluster_size),
133
+ ROUND_UP(sn->vm_state_size, s->cluster_size),
134
QCOW2_DISCARD_NEVER, false);
135
136
#ifdef DEBUG_ALLOC
137
@@ -XXX,XX +XXX,XX @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
138
}
139
new_l1_bytes = sn->l1_size * sizeof(uint64_t);
140
new_l1_table = qemu_try_blockalign(bs->file->bs,
141
- align_offset(new_l1_bytes, 512));
142
+ ROUND_UP(new_l1_bytes, 512));
143
if (new_l1_table == NULL) {
144
return -ENOMEM;
145
}
146
diff --git a/block/qcow2.c b/block/qcow2.c
30
diff --git a/block/qcow2.c b/block/qcow2.c
147
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
148
--- a/block/qcow2.c
32
--- a/block/qcow2.c
149
+++ b/block/qcow2.c
33
+++ b/block/qcow2.c
150
@@ -XXX,XX +XXX,XX @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
34
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
151
35
return status;
152
if (s->l1_size > 0) {
36
}
153
s->l1_table = qemu_try_blockalign(bs->file->bs,
37
154
- align_offset(s->l1_size * sizeof(uint64_t), 512));
38
-/* handle reading after the end of the backing file */
155
+ ROUND_UP(s->l1_size * sizeof(uint64_t), 512));
39
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
156
if (s->l1_table == NULL) {
40
- int64_t offset, int bytes)
157
error_setg(errp, "Could not allocate L1 table");
41
-{
158
ret = -ENOMEM;
42
- uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
159
@@ -XXX,XX +XXX,XX @@ static int64_t qcow2_calc_prealloc_size(int64_t total_size,
43
- int n1;
44
-
45
- if ((offset + bytes) <= bs_size) {
46
- return bytes;
47
- }
48
-
49
- if (offset >= bs_size) {
50
- n1 = 0;
51
- } else {
52
- n1 = bs_size - offset;
53
- }
54
-
55
- qemu_iovec_memset(qiov, n1, 0, bytes - n1);
56
-
57
- return n1;
58
-}
59
-
60
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
61
uint64_t bytes, QEMUIOVector *qiov,
62
int flags)
160
{
63
{
161
int64_t meta_size = 0;
64
BDRVQcow2State *s = bs->opaque;
162
uint64_t nl1e, nl2e;
65
- int offset_in_cluster, n1;
163
- int64_t aligned_total_size = align_offset(total_size, cluster_size);
66
+ int offset_in_cluster;
164
+ int64_t aligned_total_size = ROUND_UP(total_size, cluster_size);
67
int ret;
165
68
unsigned int cur_bytes; /* number of bytes in current iteration */
166
/* header: 1 cluster */
69
uint64_t cluster_offset = 0;
167
meta_size += cluster_size;
70
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
168
71
case QCOW2_CLUSTER_UNALLOCATED:
169
/* total size of L2 tables */
72
170
nl2e = aligned_total_size / cluster_size;
73
if (bs->backing) {
171
- nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t));
74
- /* read from the base image */
172
+ nl2e = ROUND_UP(nl2e, cluster_size / sizeof(uint64_t));
75
- n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
173
meta_size += nl2e * sizeof(uint64_t);
76
- offset, cur_bytes);
174
77
- if (n1 > 0) {
175
/* total size of L1 tables */
78
- QEMUIOVector local_qiov;
176
nl1e = nl2e * sizeof(uint64_t) / cluster_size;
79
-
177
- nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t));
80
- qemu_iovec_init(&local_qiov, hd_qiov.niov);
178
+ nl1e = ROUND_UP(nl1e, cluster_size / sizeof(uint64_t));
81
- qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
179
meta_size += nl1e * sizeof(uint64_t);
82
-
180
83
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
181
/* total size of refcount table and blocks */
84
- qemu_co_mutex_unlock(&s->lock);
182
@@ -XXX,XX +XXX,XX @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
85
- ret = bdrv_co_preadv(bs->backing, offset, n1,
183
has_backing_file = !!optstr;
86
- &local_qiov, 0);
184
g_free(optstr);
87
- qemu_co_mutex_lock(&s->lock);
185
88
-
186
- virtual_size = align_offset(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
89
- qemu_iovec_destroy(&local_qiov);
187
- cluster_size);
90
-
188
+ virtual_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
91
- if (ret < 0) {
189
+ virtual_size = ROUND_UP(virtual_size, cluster_size);
92
- goto fail;
190
93
- }
191
/* Check that virtual disk size is valid */
94
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
192
l2_tables = DIV_ROUND_UP(virtual_size / cluster_size,
95
+ qemu_co_mutex_unlock(&s->lock);
193
@@ -XXX,XX +XXX,XX @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
96
+ ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
194
goto err;
97
+ &hd_qiov, 0);
195
}
98
+ qemu_co_mutex_lock(&s->lock);
196
99
+ if (ret < 0) {
197
- virtual_size = align_offset(ssize, cluster_size);
100
+ goto fail;
198
+ virtual_size = ROUND_UP(ssize, cluster_size);
101
}
199
102
} else {
200
if (has_backing_file) {
103
/* Note: in this case, no need to wait */
201
/* We don't how much of the backing chain is shared by the input
202
--
104
--
203
2.13.6
105
2.13.6
204
106
205
107
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
Removing a quorum child node with x-blockdev-change results in a quorum
2
driver state that cannot be recreated with create options because it
3
would require a list with gaps. This causes trouble in at least
4
.bdrv_refresh_filename().
2
5
3
The issue:
6
Document this problem so that we won't accidentally mark the command
7
stable without having addressed it.
4
8
5
$ qemu-img resize -f qcow2 foo.qcow2
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
qemu-img: Expecting one image file name
10
Reviewed-by: Alberto Garcia <berto@igalia.com>
7
Try 'qemu-img --help' for more information
11
---
12
qapi/block-core.json | 4 ++++
13
1 file changed, 4 insertions(+)
8
14
9
So we gave an image file name, but we omitted the length. qemu-img
15
diff --git a/qapi/block-core.json b/qapi/block-core.json
10
thinks the last argument is always the size and removes it immediately
11
from argv (by decrementing argc), and tries to verify that it is a valid
12
size only at a later point.
13
14
So we do not actually know whether that last argument we called "size"
15
is indeed a size or whether the user instead forgot to specify that size
16
but did give a file name.
17
18
Therefore, the error message should be more general.
19
20
Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1523458
21
Signed-off-by: Max Reitz <mreitz@redhat.com>
22
Message-id: 20180205162745.23650-1-mreitz@redhat.com
23
Reviewed-by: John Snow <jsnow@redhat.com>
24
Reviewed-by: Eric Blake <eblake@redhat.com>
25
Signed-off-by: Max Reitz <mreitz@redhat.com>
26
---
27
qemu-img.c | 2 +-
28
1 file changed, 1 insertion(+), 1 deletion(-)
29
30
diff --git a/qemu-img.c b/qemu-img.c
31
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
32
--- a/qemu-img.c
17
--- a/qapi/block-core.json
33
+++ b/qemu-img.c
18
+++ b/qapi/block-core.json
34
@@ -XXX,XX +XXX,XX @@ static int img_resize(int argc, char **argv)
19
@@ -XXX,XX +XXX,XX @@
35
}
20
# does not support all kinds of operations, all kinds of children, nor
36
}
21
# all block drivers.
37
if (optind != argc - 1) {
22
#
38
- error_exit("Expecting one image file name");
23
+# FIXME Removing children from a quorum node means introducing gaps in the
39
+ error_exit("Expecting image file name and size");
24
+# child indices. This cannot be represented in the 'children' list of
40
}
25
+# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename().
41
filename = argv[optind++];
26
+#
42
27
# Warning: The data in a new quorum child MUST be consistent with that of
28
# the rest of the array.
29
#
43
--
30
--
44
2.13.6
31
2.13.6
45
32
46
33
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
From: Doug Gale <doug16k@gmail.com>
2
2
3
We are gradually converting to byte-based interfaces, as they are
3
Add trace output for commands, errors, and undefined behavior.
4
easier to reason about than sector-based. Convert all uses of
4
Add guest error log output for undefined behavior.
5
the cluster size in sectors, along with adding assertions that we
5
Report invalid undefined accesses to MMIO.
6
are not dividing by zero.
6
Annotate unlikely error checks with unlikely.
7
7
8
Improve some comment grammar while in the area.
8
Signed-off-by: Doug Gale <doug16k@gmail.com>
9
9
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Signed-off-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
12
---
15
block/iscsi.c | 56 +++++++++++++++++++++++++++++++++++---------------------
13
hw/block/nvme.c | 349 ++++++++++++++++++++++++++++++++++++++++++--------
16
1 file changed, 35 insertions(+), 21 deletions(-)
14
hw/block/trace-events | 93 ++++++++++++++
15
2 files changed, 390 insertions(+), 52 deletions(-)
17
16
18
diff --git a/block/iscsi.c b/block/iscsi.c
17
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
19
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
20
--- a/block/iscsi.c
19
--- a/hw/block/nvme.c
21
+++ b/block/iscsi.c
20
+++ b/hw/block/nvme.c
22
@@ -XXX,XX +XXX,XX @@ typedef struct IscsiLun {
21
@@ -XXX,XX +XXX,XX @@
23
unsigned long *allocmap;
22
#include "qapi/visitor.h"
24
unsigned long *allocmap_valid;
23
#include "sysemu/block-backend.h"
25
long allocmap_size;
24
26
- int cluster_sectors;
25
+#include "qemu/log.h"
27
+ int cluster_size;
26
+#include "trace.h"
28
bool use_16_for_rw;
27
#include "nvme.h"
29
bool write_protected;
28
30
bool lbpme;
29
+#define NVME_GUEST_ERR(trace, fmt, ...) \
31
@@ -XXX,XX +XXX,XX @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
30
+ do { \
31
+ (trace_##trace)(__VA_ARGS__); \
32
+ qemu_log_mask(LOG_GUEST_ERROR, #trace \
33
+ " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
34
+ } while (0)
35
+
36
static void nvme_process_sq(void *opaque);
37
38
static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
39
@@ -XXX,XX +XXX,XX @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
32
{
40
{
33
iscsi_allocmap_free(iscsilun);
41
if (cq->irq_enabled) {
34
42
if (msix_enabled(&(n->parent_obj))) {
35
+ assert(iscsilun->cluster_size);
43
+ trace_nvme_irq_msix(cq->vector);
36
iscsilun->allocmap_size =
44
msix_notify(&(n->parent_obj), cq->vector);
37
- DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun),
45
} else {
38
- iscsilun->cluster_sectors);
46
+ trace_nvme_irq_pin();
39
+ DIV_ROUND_UP(iscsilun->num_blocks * iscsilun->block_size,
47
pci_irq_pulse(&n->parent_obj);
40
+ iscsilun->cluster_size);
48
}
41
49
+ } else {
42
iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
50
+ trace_nvme_irq_masked();
43
if (!iscsilun->allocmap) {
51
}
44
@@ -XXX,XX +XXX,XX @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
52
}
45
}
53
46
54
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
47
if (open_flags & BDRV_O_NOCACHE) {
55
trans_len = MIN(len, trans_len);
48
- /* in case that cache.direct = on all allocmap entries are
56
int num_prps = (len >> n->page_bits) + 1;
49
+ /* when cache.direct = on all allocmap entries are
57
50
* treated as invalid to force a relookup of the block
58
- if (!prp1) {
51
* status on every read request */
59
+ if (unlikely(!prp1)) {
52
return 0;
60
+ trace_nvme_err_invalid_prp();
53
@@ -XXX,XX +XXX,XX @@ iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
61
return NVME_INVALID_FIELD | NVME_DNR;
54
int nb_sectors, bool allocated, bool valid)
62
} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
63
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
64
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
65
}
66
len -= trans_len;
67
if (len) {
68
- if (!prp2) {
69
+ if (unlikely(!prp2)) {
70
+ trace_nvme_err_invalid_prp2_missing();
71
goto unmap;
72
}
73
if (len > n->page_size) {
74
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
75
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
76
77
if (i == n->max_prp_ents - 1 && len > n->page_size) {
78
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
79
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
80
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
81
goto unmap;
82
}
83
84
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
85
prp_ent = le64_to_cpu(prp_list[i]);
86
}
87
88
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
89
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
90
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
91
goto unmap;
92
}
93
94
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
95
i++;
96
}
97
} else {
98
- if (prp2 & (n->page_size - 1)) {
99
+ if (unlikely(prp2 & (n->page_size - 1))) {
100
+ trace_nvme_err_invalid_prp2_align(prp2);
101
goto unmap;
102
}
103
if (qsg->nsg) {
104
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
105
QEMUIOVector iov;
106
uint16_t status = NVME_SUCCESS;
107
108
+ trace_nvme_dma_read(prp1, prp2);
109
+
110
if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
111
return NVME_INVALID_FIELD | NVME_DNR;
112
}
113
if (qsg.nsg > 0) {
114
- if (dma_buf_read(ptr, len, &qsg)) {
115
+ if (unlikely(dma_buf_read(ptr, len, &qsg))) {
116
+ trace_nvme_err_invalid_dma();
117
status = NVME_INVALID_FIELD | NVME_DNR;
118
}
119
qemu_sglist_destroy(&qsg);
120
} else {
121
- if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
122
+ if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) {
123
+ trace_nvme_err_invalid_dma();
124
status = NVME_INVALID_FIELD | NVME_DNR;
125
}
126
qemu_iovec_destroy(&iov);
127
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
128
uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS);
129
uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);
130
131
- if (slba + nlb > ns->id_ns.nsze) {
132
+ if (unlikely(slba + nlb > ns->id_ns.nsze)) {
133
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
134
return NVME_LBA_RANGE | NVME_DNR;
135
}
136
137
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
138
int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
139
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
140
141
- if ((slba + nlb) > ns->id_ns.nsze) {
142
+ trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
143
+
144
+ if (unlikely((slba + nlb) > ns->id_ns.nsze)) {
145
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
146
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
147
return NVME_LBA_RANGE | NVME_DNR;
148
}
149
150
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
151
NvmeNamespace *ns;
152
uint32_t nsid = le32_to_cpu(cmd->nsid);
153
154
- if (nsid == 0 || nsid > n->num_namespaces) {
155
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
156
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
157
return NVME_INVALID_NSID | NVME_DNR;
158
}
159
160
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
161
case NVME_CMD_READ:
162
return nvme_rw(n, ns, cmd, req);
163
default:
164
+ trace_nvme_err_invalid_opc(cmd->opcode);
165
return NVME_INVALID_OPCODE | NVME_DNR;
166
}
167
}
168
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
169
NvmeCQueue *cq;
170
uint16_t qid = le16_to_cpu(c->qid);
171
172
- if (!qid || nvme_check_sqid(n, qid)) {
173
+ if (unlikely(!qid || nvme_check_sqid(n, qid))) {
174
+ trace_nvme_err_invalid_del_sq(qid);
175
return NVME_INVALID_QID | NVME_DNR;
176
}
177
178
+ trace_nvme_del_sq(qid);
179
+
180
sq = n->sq[qid];
181
while (!QTAILQ_EMPTY(&sq->out_req_list)) {
182
req = QTAILQ_FIRST(&sq->out_req_list);
183
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
184
uint16_t qflags = le16_to_cpu(c->sq_flags);
185
uint64_t prp1 = le64_to_cpu(c->prp1);
186
187
- if (!cqid || nvme_check_cqid(n, cqid)) {
188
+ trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
189
+
190
+ if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
191
+ trace_nvme_err_invalid_create_sq_cqid(cqid);
192
return NVME_INVALID_CQID | NVME_DNR;
193
}
194
- if (!sqid || !nvme_check_sqid(n, sqid)) {
195
+ if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) {
196
+ trace_nvme_err_invalid_create_sq_sqid(sqid);
197
return NVME_INVALID_QID | NVME_DNR;
198
}
199
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
200
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
201
+ trace_nvme_err_invalid_create_sq_size(qsize);
202
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
203
}
204
- if (!prp1 || prp1 & (n->page_size - 1)) {
205
+ if (unlikely(!prp1 || prp1 & (n->page_size - 1))) {
206
+ trace_nvme_err_invalid_create_sq_addr(prp1);
207
return NVME_INVALID_FIELD | NVME_DNR;
208
}
209
- if (!(NVME_SQ_FLAGS_PC(qflags))) {
210
+ if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
211
+ trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
212
return NVME_INVALID_FIELD | NVME_DNR;
213
}
214
sq = g_malloc0(sizeof(*sq));
215
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
216
NvmeCQueue *cq;
217
uint16_t qid = le16_to_cpu(c->qid);
218
219
- if (!qid || nvme_check_cqid(n, qid)) {
220
+ if (unlikely(!qid || nvme_check_cqid(n, qid))) {
221
+ trace_nvme_err_invalid_del_cq_cqid(qid);
222
return NVME_INVALID_CQID | NVME_DNR;
223
}
224
225
cq = n->cq[qid];
226
- if (!QTAILQ_EMPTY(&cq->sq_list)) {
227
+ if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
228
+ trace_nvme_err_invalid_del_cq_notempty(qid);
229
return NVME_INVALID_QUEUE_DEL;
230
}
231
+ trace_nvme_del_cq(qid);
232
nvme_free_cq(cq, n);
233
return NVME_SUCCESS;
234
}
235
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
236
uint16_t qflags = le16_to_cpu(c->cq_flags);
237
uint64_t prp1 = le64_to_cpu(c->prp1);
238
239
- if (!cqid || !nvme_check_cqid(n, cqid)) {
240
+ trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
241
+ NVME_CQ_FLAGS_IEN(qflags) != 0);
242
+
243
+ if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) {
244
+ trace_nvme_err_invalid_create_cq_cqid(cqid);
245
return NVME_INVALID_CQID | NVME_DNR;
246
}
247
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
248
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
249
+ trace_nvme_err_invalid_create_cq_size(qsize);
250
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
251
}
252
- if (!prp1) {
253
+ if (unlikely(!prp1)) {
254
+ trace_nvme_err_invalid_create_cq_addr(prp1);
255
return NVME_INVALID_FIELD | NVME_DNR;
256
}
257
- if (vector > n->num_queues) {
258
+ if (unlikely(vector > n->num_queues)) {
259
+ trace_nvme_err_invalid_create_cq_vector(vector);
260
return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
261
}
262
- if (!(NVME_CQ_FLAGS_PC(qflags))) {
263
+ if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
264
+ trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
265
return NVME_INVALID_FIELD | NVME_DNR;
266
}
267
268
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
269
uint64_t prp1 = le64_to_cpu(c->prp1);
270
uint64_t prp2 = le64_to_cpu(c->prp2);
271
272
+ trace_nvme_identify_ctrl();
273
+
274
return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
275
prp1, prp2);
276
}
277
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
278
uint64_t prp1 = le64_to_cpu(c->prp1);
279
uint64_t prp2 = le64_to_cpu(c->prp2);
280
281
- if (nsid == 0 || nsid > n->num_namespaces) {
282
+ trace_nvme_identify_ns(nsid);
283
+
284
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
285
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
286
return NVME_INVALID_NSID | NVME_DNR;
287
}
288
289
ns = &n->namespaces[nsid - 1];
290
+
291
return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns),
292
prp1, prp2);
293
}
294
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
295
uint16_t ret;
296
int i, j = 0;
297
298
+ trace_nvme_identify_nslist(min_nsid);
299
+
300
list = g_malloc0(data_len);
301
for (i = 0; i < n->num_namespaces; i++) {
302
if (i < min_nsid) {
303
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
304
case 0x02:
305
return nvme_identify_nslist(n, c);
306
default:
307
+ trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
308
return NVME_INVALID_FIELD | NVME_DNR;
309
}
310
}
311
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
312
switch (dw10) {
313
case NVME_VOLATILE_WRITE_CACHE:
314
result = blk_enable_write_cache(n->conf.blk);
315
+ trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
316
break;
317
case NVME_NUMBER_OF_QUEUES:
318
result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
319
+ trace_nvme_getfeat_numq(result);
320
break;
321
default:
322
+ trace_nvme_err_invalid_getfeat(dw10);
323
return NVME_INVALID_FIELD | NVME_DNR;
324
}
325
326
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
327
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
328
break;
329
case NVME_NUMBER_OF_QUEUES:
330
+ trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
331
+ ((dw11 >> 16) & 0xFFFF) + 1,
332
+ n->num_queues - 1, n->num_queues - 1);
333
req->cqe.result =
334
cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
335
break;
336
default:
337
+ trace_nvme_err_invalid_setfeat(dw10);
338
return NVME_INVALID_FIELD | NVME_DNR;
339
}
340
return NVME_SUCCESS;
341
@@ -XXX,XX +XXX,XX @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
342
case NVME_ADM_CMD_GET_FEATURES:
343
return nvme_get_feature(n, cmd, req);
344
default:
345
+ trace_nvme_err_invalid_admin_opc(cmd->opcode);
346
return NVME_INVALID_OPCODE | NVME_DNR;
347
}
348
}
349
@@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n)
350
uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
351
uint32_t page_size = 1 << page_bits;
352
353
- if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq ||
354
- n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) ||
355
- NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) ||
356
- NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) ||
357
- NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) ||
358
- NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) ||
359
- NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) ||
360
- NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) ||
361
- !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) {
362
+ if (unlikely(n->cq[0])) {
363
+ trace_nvme_err_startfail_cq();
364
+ return -1;
365
+ }
366
+ if (unlikely(n->sq[0])) {
367
+ trace_nvme_err_startfail_sq();
368
+ return -1;
369
+ }
370
+ if (unlikely(!n->bar.asq)) {
371
+ trace_nvme_err_startfail_nbarasq();
372
+ return -1;
373
+ }
374
+ if (unlikely(!n->bar.acq)) {
375
+ trace_nvme_err_startfail_nbaracq();
376
+ return -1;
377
+ }
378
+ if (unlikely(n->bar.asq & (page_size - 1))) {
379
+ trace_nvme_err_startfail_asq_misaligned(n->bar.asq);
380
+ return -1;
381
+ }
382
+ if (unlikely(n->bar.acq & (page_size - 1))) {
383
+ trace_nvme_err_startfail_acq_misaligned(n->bar.acq);
384
+ return -1;
385
+ }
386
+ if (unlikely(NVME_CC_MPS(n->bar.cc) <
387
+ NVME_CAP_MPSMIN(n->bar.cap))) {
388
+ trace_nvme_err_startfail_page_too_small(
389
+ NVME_CC_MPS(n->bar.cc),
390
+ NVME_CAP_MPSMIN(n->bar.cap));
391
+ return -1;
392
+ }
393
+ if (unlikely(NVME_CC_MPS(n->bar.cc) >
394
+ NVME_CAP_MPSMAX(n->bar.cap))) {
395
+ trace_nvme_err_startfail_page_too_large(
396
+ NVME_CC_MPS(n->bar.cc),
397
+ NVME_CAP_MPSMAX(n->bar.cap));
398
+ return -1;
399
+ }
400
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) <
401
+ NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
402
+ trace_nvme_err_startfail_cqent_too_small(
403
+ NVME_CC_IOCQES(n->bar.cc),
404
+ NVME_CTRL_CQES_MIN(n->bar.cap));
405
+ return -1;
406
+ }
407
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) >
408
+ NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
409
+ trace_nvme_err_startfail_cqent_too_large(
410
+ NVME_CC_IOCQES(n->bar.cc),
411
+ NVME_CTRL_CQES_MAX(n->bar.cap));
412
+ return -1;
413
+ }
414
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) <
415
+ NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
416
+ trace_nvme_err_startfail_sqent_too_small(
417
+ NVME_CC_IOSQES(n->bar.cc),
418
+ NVME_CTRL_SQES_MIN(n->bar.cap));
419
+ return -1;
420
+ }
421
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) >
422
+ NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
423
+ trace_nvme_err_startfail_sqent_too_large(
424
+ NVME_CC_IOSQES(n->bar.cc),
425
+ NVME_CTRL_SQES_MAX(n->bar.cap));
426
+ return -1;
427
+ }
428
+ if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) {
429
+ trace_nvme_err_startfail_asqent_sz_zero();
430
+ return -1;
431
+ }
432
+ if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) {
433
+ trace_nvme_err_startfail_acqent_sz_zero();
434
return -1;
435
}
436
437
@@ -XXX,XX +XXX,XX @@ static int nvme_start_ctrl(NvmeCtrl *n)
438
static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
439
unsigned size)
55
{
440
{
56
int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
441
+ if (unlikely(offset & (sizeof(uint32_t) - 1))) {
57
+ int cluster_sectors = iscsilun->cluster_size >> BDRV_SECTOR_BITS;
442
+ NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32,
58
443
+ "MMIO write not 32-bit aligned,"
59
if (iscsilun->allocmap == NULL) {
444
+ " offset=0x%"PRIx64"", offset);
445
+ /* should be ignored, fall through for now */
446
+ }
447
+
448
+ if (unlikely(size < sizeof(uint32_t))) {
449
+ NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall,
450
+ "MMIO write smaller than 32-bits,"
451
+ " offset=0x%"PRIx64", size=%u",
452
+ offset, size);
453
+ /* should be ignored, fall through for now */
454
+ }
455
+
456
switch (offset) {
457
- case 0xc:
458
+ case 0xc: /* INTMS */
459
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
460
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
461
+ "undefined access to interrupt mask set"
462
+ " when MSI-X is enabled");
463
+ /* should be ignored, fall through for now */
464
+ }
465
n->bar.intms |= data & 0xffffffff;
466
n->bar.intmc = n->bar.intms;
467
+ trace_nvme_mmio_intm_set(data & 0xffffffff,
468
+ n->bar.intmc);
469
break;
470
- case 0x10:
471
+ case 0x10: /* INTMC */
472
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
473
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
474
+ "undefined access to interrupt mask clr"
475
+ " when MSI-X is enabled");
476
+ /* should be ignored, fall through for now */
477
+ }
478
n->bar.intms &= ~(data & 0xffffffff);
479
n->bar.intmc = n->bar.intms;
480
+ trace_nvme_mmio_intm_clr(data & 0xffffffff,
481
+ n->bar.intmc);
482
break;
483
- case 0x14:
484
+ case 0x14: /* CC */
485
+ trace_nvme_mmio_cfg(data & 0xffffffff);
486
/* Windows first sends data, then sends enable bit */
487
if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
488
!NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc))
489
@@ -XXX,XX +XXX,XX @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
490
491
if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) {
492
n->bar.cc = data;
493
- if (nvme_start_ctrl(n)) {
494
+ if (unlikely(nvme_start_ctrl(n))) {
495
+ trace_nvme_err_startfail();
496
n->bar.csts = NVME_CSTS_FAILED;
497
} else {
498
+ trace_nvme_mmio_start_success();
499
n->bar.csts = NVME_CSTS_READY;
500
}
501
} else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
502
+ trace_nvme_mmio_stopped();
503
nvme_clear_ctrl(n);
504
n->bar.csts &= ~NVME_CSTS_READY;
505
}
506
if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
507
- nvme_clear_ctrl(n);
508
- n->bar.cc = data;
509
- n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
510
+ trace_nvme_mmio_shutdown_set();
511
+ nvme_clear_ctrl(n);
512
+ n->bar.cc = data;
513
+ n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
514
} else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
515
- n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
516
- n->bar.cc = data;
517
+ trace_nvme_mmio_shutdown_cleared();
518
+ n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
519
+ n->bar.cc = data;
520
+ }
521
+ break;
522
+ case 0x1C: /* CSTS */
523
+ if (data & (1 << 4)) {
524
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported,
525
+ "attempted to W1C CSTS.NSSRO"
526
+ " but CAP.NSSRS is zero (not supported)");
527
+ } else if (data != 0) {
528
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts,
529
+ "attempted to set a read only bit"
530
+ " of controller status");
531
+ }
532
+ break;
533
+ case 0x20: /* NSSR */
534
+ if (data == 0x4E564D65) {
535
+ trace_nvme_ub_mmiowr_ssreset_unsupported();
536
+ } else {
537
+ /* The spec says that writes of other values have no effect */
538
+ return;
539
}
540
break;
541
- case 0x24:
542
+ case 0x24: /* AQA */
543
n->bar.aqa = data & 0xffffffff;
544
+ trace_nvme_mmio_aqattr(data & 0xffffffff);
545
break;
546
- case 0x28:
547
+ case 0x28: /* ASQ */
548
n->bar.asq = data;
549
+ trace_nvme_mmio_asqaddr(data);
550
break;
551
- case 0x2c:
552
+ case 0x2c: /* ASQ hi */
553
n->bar.asq |= data << 32;
554
+ trace_nvme_mmio_asqaddr_hi(data, n->bar.asq);
555
break;
556
- case 0x30:
557
+ case 0x30: /* ACQ */
558
+ trace_nvme_mmio_acqaddr(data);
559
n->bar.acq = data;
560
break;
561
- case 0x34:
562
+ case 0x34: /* ACQ hi */
563
n->bar.acq |= data << 32;
564
+ trace_nvme_mmio_acqaddr_hi(data, n->bar.acq);
565
break;
566
+ case 0x38: /* CMBLOC */
567
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved,
568
+ "invalid write to reserved CMBLOC"
569
+ " when CMBSZ is zero, ignored");
570
+ return;
571
+ case 0x3C: /* CMBSZ */
572
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
573
+ "invalid write to read only CMBSZ, ignored");
574
+ return;
575
default:
576
+ NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
577
+ "invalid MMIO write,"
578
+ " offset=0x%"PRIx64", data=%"PRIx64"",
579
+ offset, data);
580
break;
581
}
582
}
583
@@ -XXX,XX +XXX,XX @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
584
uint8_t *ptr = (uint8_t *)&n->bar;
585
uint64_t val = 0;
586
587
+ if (unlikely(addr & (sizeof(uint32_t) - 1))) {
588
+ NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32,
589
+ "MMIO read not 32-bit aligned,"
590
+ " offset=0x%"PRIx64"", addr);
591
+ /* should RAZ, fall through for now */
592
+ } else if (unlikely(size < sizeof(uint32_t))) {
593
+ NVME_GUEST_ERR(nvme_ub_mmiord_toosmall,
594
+ "MMIO read smaller than 32-bits,"
595
+ " offset=0x%"PRIx64"", addr);
596
+ /* should RAZ, fall through for now */
597
+ }
598
+
599
if (addr < sizeof(n->bar)) {
600
memcpy(&val, ptr + addr, size);
601
+ } else {
602
+ NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
603
+ "MMIO read beyond last register,"
604
+ " offset=0x%"PRIx64", returning 0", addr);
605
}
606
+
607
return val;
608
}
609
610
@@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
611
{
612
uint32_t qid;
613
614
- if (addr & ((1 << 2) - 1)) {
615
+ if (unlikely(addr & ((1 << 2) - 1))) {
616
+ NVME_GUEST_ERR(nvme_ub_db_wr_misaligned,
617
+ "doorbell write not 32-bit aligned,"
618
+ " offset=0x%"PRIx64", ignoring", addr);
60
return;
619
return;
61
}
620
}
62
/* expand to entirely contain all affected clusters */
621
63
- cl_num_expanded = sector_num / iscsilun->cluster_sectors;
622
if (((addr - 0x1000) >> 2) & 1) {
64
+ assert(cluster_sectors);
623
+ /* Completion queue doorbell write */
65
+ cl_num_expanded = sector_num / cluster_sectors;
624
+
66
nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors,
625
uint16_t new_head = val & 0xffff;
67
- iscsilun->cluster_sectors) - cl_num_expanded;
626
int start_sqs;
68
+ cluster_sectors) - cl_num_expanded;
627
NvmeCQueue *cq;
69
/* shrink to touch only completely contained clusters */
628
70
- cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
629
qid = (addr - (0x1000 + (1 << 2))) >> 3;
71
- nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors
630
- if (nvme_check_cqid(n, qid)) {
72
+ cl_num_shrunk = DIV_ROUND_UP(sector_num, cluster_sectors);
631
+ if (unlikely(nvme_check_cqid(n, qid))) {
73
+ nb_cls_shrunk = (sector_num + nb_sectors) / cluster_sectors
632
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq,
74
- cl_num_shrunk;
633
+ "completion queue doorbell write"
75
if (allocated) {
634
+ " for nonexistent queue,"
76
bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
635
+ " sqid=%"PRIu32", ignoring", qid);
77
@@ -XXX,XX +XXX,XX @@ iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num,
636
return;
78
if (iscsilun->allocmap == NULL) {
637
}
79
return true;
638
80
}
639
cq = n->cq[qid];
81
- size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
640
- if (new_head >= cq->size) {
82
+ assert(iscsilun->cluster_size);
641
+ if (unlikely(new_head >= cq->size)) {
83
+ size = DIV_ROUND_UP(sector_num + nb_sectors,
642
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead,
84
+ iscsilun->cluster_size >> BDRV_SECTOR_BITS);
643
+ "completion queue doorbell write value"
85
return !(find_next_bit(iscsilun->allocmap, size,
644
+ " beyond queue size, sqid=%"PRIu32","
86
- sector_num / iscsilun->cluster_sectors) == size);
645
+ " new_head=%"PRIu16", ignoring",
87
+ sector_num * BDRV_SECTOR_SIZE /
646
+ qid, new_head);
88
+ iscsilun->cluster_size) == size);
647
return;
89
}
648
}
90
649
91
static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
650
@@ -XXX,XX +XXX,XX @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
92
@@ -XXX,XX +XXX,XX @@ static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
651
nvme_isr_notify(n, cq);
93
if (iscsilun->allocmap_valid == NULL) {
652
}
94
return false;
653
} else {
95
}
654
+ /* Submission queue doorbell write */
96
- size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
655
+
97
+ assert(iscsilun->cluster_size);
656
uint16_t new_tail = val & 0xffff;
98
+ size = DIV_ROUND_UP(sector_num + nb_sectors,
657
NvmeSQueue *sq;
99
+ iscsilun->cluster_size >> BDRV_SECTOR_BITS);
658
100
return (find_next_zero_bit(iscsilun->allocmap_valid, size,
659
qid = (addr - 0x1000) >> 3;
101
- sector_num / iscsilun->cluster_sectors) == size);
660
- if (nvme_check_sqid(n, qid)) {
102
+ sector_num * BDRV_SECTOR_SIZE /
661
+ if (unlikely(nvme_check_sqid(n, qid))) {
103
+ iscsilun->cluster_size) == size);
662
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq,
104
}
663
+ "submission queue doorbell write"
105
664
+ " for nonexistent queue,"
106
static int coroutine_fn
665
+ " sqid=%"PRIu32", ignoring", qid);
107
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
666
return;
108
BlockDriverState *file;
667
}
109
/* check the block status from the beginning of the cluster
668
110
* containing the start sector */
669
sq = n->sq[qid];
111
- int64_t ret = iscsi_co_get_block_status(bs,
670
- if (new_tail >= sq->size) {
112
- sector_num - sector_num % iscsilun->cluster_sectors,
671
+ if (unlikely(new_tail >= sq->size)) {
113
- BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
672
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail,
114
+ int cluster_sectors = iscsilun->cluster_size >> BDRV_SECTOR_BITS;
673
+ "submission queue doorbell write value"
115
+ int head;
674
+ " beyond queue size, sqid=%"PRIu32","
116
+ int64_t ret;
675
+ " new_tail=%"PRIu16", ignoring",
117
+
676
+ qid, new_tail);
118
+ assert(cluster_sectors);
677
return;
119
+ head = sector_num % cluster_sectors;
678
}
120
+ ret = iscsi_co_get_block_status(bs, sector_num - head,
679
121
+ BDRV_REQUEST_MAX_SECTORS, &pnum,
680
diff --git a/hw/block/trace-events b/hw/block/trace-events
122
+ &file);
681
index XXXXXXX..XXXXXXX 100644
123
if (ret < 0) {
682
--- a/hw/block/trace-events
124
return ret;
683
+++ b/hw/block/trace-events
125
}
684
@@ -XXX,XX +XXX,XX @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6
126
/* if the whole request falls into an unallocated area we can avoid
685
hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d"
127
- * to read and directly return zeroes instead */
686
hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d"
128
- if (ret & BDRV_BLOCK_ZERO &&
687
129
- pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) {
688
+# hw/block/nvme.c
130
+ * reading and directly return zeroes instead */
689
+# nvme traces for successful events
131
+ if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors + head) {
690
+nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
132
qemu_iovec_memset(iov, 0, 0x00, iov->size);
691
+nvme_irq_pin(void) "pulsing IRQ pin"
133
return 0;
692
+nvme_irq_masked(void) "IRQ is masked"
134
}
693
+nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
135
@@ -XXX,XX +XXX,XX @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
694
+nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
136
* reasonable size */
695
+nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
137
if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
696
+nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
138
iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
697
+nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
139
- iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
698
+nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
140
- iscsilun->block_size) >> BDRV_SECTOR_BITS;
699
+nvme_identify_ctrl(void) "identify controller"
141
+ iscsilun->cluster_size = iscsilun->bl.opt_unmap_gran *
700
+nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
142
+ iscsilun->block_size;
701
+nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
143
if (iscsilun->lbprz) {
702
+nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s"
144
ret = iscsi_allocmap_init(iscsilun, bs->open_flags);
703
+nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
145
}
704
+nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
146
@@ -XXX,XX +XXX,XX @@ static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
705
+nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
147
{
706
+nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
148
IscsiLun *iscsilun = bs->opaque;
707
+nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
149
bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
708
+nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64""
150
- bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
709
+nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64""
151
+ bdi->cluster_size = iscsilun->cluster_size;
710
+nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64""
152
return 0;
711
+nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
153
}
712
+nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
154
713
+nvme_mmio_start_success(void) "setting controller enable bit succeeded"
714
+nvme_mmio_stopped(void) "cleared controller enable bit"
715
+nvme_mmio_shutdown_set(void) "shutdown bit set"
716
+nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
717
+
718
+# nvme traces for error conditions
719
+nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
720
+nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
721
+nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
722
+nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred"
723
+nvme_err_invalid_field(void) "invalid field"
724
+nvme_err_invalid_prp(void) "invalid PRP"
725
+nvme_err_invalid_sgl(void) "invalid SGL"
726
+nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u"
727
+nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
728
+nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
729
+nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
730
+nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
731
+nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
732
+nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16""
733
+nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16""
734
+nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64""
735
+nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16""
736
+nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16""
737
+nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16""
738
+nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16""
739
+nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16""
740
+nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64""
741
+nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16""
742
+nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16""
743
+nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
744
+nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
745
+nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
746
+nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
747
+nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
748
+nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
749
+nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null"
750
+nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64""
751
+nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64""
752
+nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u"
753
+nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u"
754
+nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u"
755
+nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u"
756
+nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u"
757
+nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u"
758
+nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
759
+nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
760
+nvme_err_startfail(void) "setting controller enable bit failed"
761
+
762
+# Traces for undefined behavior
763
+nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
764
+nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u"
765
+nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled"
766
+nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status"
767
+nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)"
768
+nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
769
+nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
770
+nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
771
+nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
772
+nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
773
+nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
774
+nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0"
775
+nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring"
776
+nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring"
777
+nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring"
778
+nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring"
779
+nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring"
780
+
781
# hw/block/xen_disk.c
782
xen_disk_alloc(char *name) "%s"
783
xen_disk_init(char *name) "%s"
155
--
784
--
156
2.13.6
785
2.13.6
157
786
158
787
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Fam Zheng <famz@redhat.com>
2
2
3
This patch fixes several mistakes in the documentation of the
3
Management tools create overlays of running guests with qemu-img:
4
compressed cluster descriptor:
5
4
6
1) the documentation claims that the cluster descriptor contains the
5
$ qemu-img create -b /image/in/use.qcow2 -f qcow2 /overlay/image.qcow2
7
number of sectors used to store the compressed data, but what it
8
actually contains is the number of sectors *minus one* or, in other
9
words, the number of additional sectors after the first one.
10
6
11
2) the width of the fields is incorrectly specified. The number of bits
7
but this doesn't work anymore due to image locking:
12
used by each field is
13
8
14
x = 62 - (cluster_bits - 8) for the offset field
9
qemu-img: /overlay/image.qcow2: Failed to get shared "write" lock
15
y = (cluster_bits - 8) for the size field
10
Is another process using the image?
16
11
Could not open backing image to determine size.
17
So the offset field's location is [0, x-1], not [0, x] as stated.
12
Use the force share option to allow this use case again.
18
19
3) the size field does not contain the size of the compressed data,
20
but rather the number of sectors where that data is stored. The
21
compressed data starts at the exact point specified in the offset
22
field and ends when there's enough data to produce a cluster of
23
decompressed data. Both points can be in the middle of a sector,
24
allowing several compressed clusters to be stored next to one
25
another, sharing sectors if necessary.
26
13
27
Cc: qemu-stable@nongnu.org
14
Cc: qemu-stable@nongnu.org
28
Signed-off-by: Alberto Garcia <berto@igalia.com>
15
Signed-off-by: Fam Zheng <famz@redhat.com>
29
Reviewed-by: Eric Blake <eblake@redhat.com>
16
Reviewed-by: Eric Blake <eblake@redhat.com>
30
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
31
---
18
---
32
docs/interop/qcow2.txt | 16 +++++++++++++---
19
block.c | 3 ++-
33
1 file changed, 13 insertions(+), 3 deletions(-)
20
1 file changed, 2 insertions(+), 1 deletion(-)
34
21
35
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
22
diff --git a/block.c b/block.c
36
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
37
--- a/docs/interop/qcow2.txt
24
--- a/block.c
38
+++ b/docs/interop/qcow2.txt
25
+++ b/block.c
39
@@ -XXX,XX +XXX,XX @@ Standard Cluster Descriptor:
26
@@ -XXX,XX +XXX,XX @@ void bdrv_img_create(const char *filename, const char *fmt,
40
27
back_flags = flags;
41
Compressed Clusters Descriptor (x = 62 - (cluster_bits - 8)):
28
back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
42
29
43
- Bit 0 - x: Host cluster offset. This is usually _not_ aligned to a
30
+ backing_options = qdict_new();
44
- cluster boundary!
31
if (backing_fmt) {
45
+ Bit 0 - x-1: Host cluster offset. This is usually _not_ aligned to a
32
- backing_options = qdict_new();
46
+ cluster or sector boundary!
33
qdict_put_str(backing_options, "driver", backing_fmt);
47
34
}
48
- x+1 - 61: Compressed size of the images in sectors of 512 bytes
35
+ qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
49
+ x - 61: Number of additional 512-byte sectors used for the
36
50
+ compressed data, beyond the sector containing the offset
37
bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
51
+ in the previous field. Some of these sectors may reside
38
&local_err);
52
+ in the next contiguous host cluster.
53
+
54
+ Note that the compressed data does not necessarily occupy
55
+ all of the bytes in the final sector; rather, decompression
56
+ stops when it has produced a cluster of data.
57
+
58
+ Another compressed cluster may map to the tail of the final
59
+ sector used by this compressed cluster.
60
61
If a cluster is unallocated, read requests shall read the data from the backing
62
file (except if bit 0 in the Standard Cluster Descriptor is set). If there is
63
--
39
--
64
2.13.6
40
2.13.6
65
41
66
42
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
We are gradually moving away from sector-based interfaces, towards
3
It's not working anymore since QEMU v1.3.0 - time to remove it now.
4
byte-based. Update the parallels driver accordingly. Note that
5
the internal function block_status() is still sector-based, because
6
it is still in use by other sector-based functions; but that's okay
7
because request_alignment is 512 as a result of those functions.
8
For now, no optimizations are added based on the mapping hint.
9
4
10
Signed-off-by: Eric Blake <eblake@redhat.com>
5
Signed-off-by: Thomas Huth <thuth@redhat.com>
11
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Reviewed-by: John Snow <jsnow@redhat.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
7
Reviewed-by: Markus Armbruster <armbru@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
9
---
15
block/parallels.c | 22 +++++++++++++++-------
10
blockdev.c | 11 -----------
16
1 file changed, 15 insertions(+), 7 deletions(-)
11
qemu-doc.texi | 6 ------
12
2 files changed, 17 deletions(-)
17
13
18
diff --git a/block/parallels.c b/block/parallels.c
14
diff --git a/blockdev.c b/blockdev.c
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/block/parallels.c
16
--- a/blockdev.c
21
+++ b/block/parallels.c
17
+++ b/blockdev.c
22
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
18
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_legacy_drive_opts = {
23
}
19
.type = QEMU_OPT_STRING,
24
20
.help = "chs translation (auto, lba, none)",
25
21
},{
26
-static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs,
22
- .name = "boot",
27
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
23
- .type = QEMU_OPT_BOOL,
28
+static int coroutine_fn parallels_co_block_status(BlockDriverState *bs,
24
- .help = "(deprecated, ignored)",
29
+ bool want_zero,
25
- },{
30
+ int64_t offset,
26
.name = "addr",
31
+ int64_t bytes,
27
.type = QEMU_OPT_STRING,
32
+ int64_t *pnum,
28
.help = "pci address (virtio only)",
33
+ int64_t *map,
29
@@ -XXX,XX +XXX,XX @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
34
+ BlockDriverState **file)
30
goto fail;
35
{
36
BDRVParallelsState *s = bs->opaque;
37
- int64_t offset;
38
+ int count;
39
40
+ assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
41
qemu_co_mutex_lock(&s->lock);
42
- offset = block_status(s, sector_num, nb_sectors, pnum);
43
+ offset = block_status(s, offset >> BDRV_SECTOR_BITS,
44
+ bytes >> BDRV_SECTOR_BITS, &count);
45
qemu_co_mutex_unlock(&s->lock);
46
47
+ *pnum = count * BDRV_SECTOR_SIZE;
48
if (offset < 0) {
49
return 0;
50
}
31
}
51
32
52
+ *map = offset * BDRV_SECTOR_SIZE;
33
- /* Deprecated option boot=[on|off] */
53
*file = bs->file->bs;
34
- if (qemu_opt_get(legacy_opts, "boot") != NULL) {
54
- return (offset << BDRV_SECTOR_BITS) |
35
- fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be "
55
- BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
36
- "ignored. Future versions will reject this parameter. Please "
56
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
37
- "update your scripts.\n");
57
}
38
- }
58
39
-
59
static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
40
/* Other deprecated options */
60
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_parallels = {
41
if (!qtest_enabled()) {
61
.bdrv_open        = parallels_open,
42
for (i = 0; i < ARRAY_SIZE(deprecated); i++) {
62
.bdrv_close        = parallels_close,
43
diff --git a/qemu-doc.texi b/qemu-doc.texi
63
.bdrv_child_perm = bdrv_format_default_perms,
44
index XXXXXXX..XXXXXXX 100644
64
- .bdrv_co_get_block_status = parallels_co_get_block_status,
45
--- a/qemu-doc.texi
65
+ .bdrv_co_block_status = parallels_co_block_status,
46
+++ b/qemu-doc.texi
66
.bdrv_has_zero_init = bdrv_has_zero_init_1,
47
@@ -XXX,XX +XXX,XX @@ deprecated.
67
.bdrv_co_flush_to_os = parallels_co_flush_to_os,
48
68
.bdrv_co_readv = parallels_co_readv,
49
@section System emulator command line arguments
50
51
-@subsection -drive boot=on|off (since 1.3.0)
52
-
53
-The ``boot=on|off'' option to the ``-drive'' argument is
54
-ignored. Applications should use the ``bootindex=N'' parameter
55
-to set an absolute ordering between devices instead.
56
-
57
@subsection -tdf (since 1.3.0)
58
59
The ``-tdf'' argument is ignored. The behaviour implemented
69
--
60
--
70
2.13.6
61
2.13.6
71
62
72
63
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
qcow2_create2() calls qemu_co_mutex_lock(). Only a coroutine_fn may
3
It's been marked as deprecated since QEMU v2.10.0, and so far nobody
4
call another coroutine_fn. In fact, qcow2_create2 is always called from
4
complained that we should keep it, so let's remove this legacy option
5
coroutine context.
5
now to simplify the code quite a bit.
6
6
7
Rename the function to add the "co" moniker and add coroutine_fn.
7
Signed-off-by: Thomas Huth <thuth@redhat.com>
8
8
Reviewed-by: John Snow <jsnow@redhat.com>
9
Reported-by: Marc-André Lureau <marcandre.lureau@redhat.com>
9
Reviewed-by: Markus Armbruster <armbru@redhat.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Message-Id: <20170705102231.20711-3-stefanha@redhat.com>
12
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
13
Reviewed-by: Eric Blake <eblake@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
11
---
16
block/qcow2.c | 17 +++++++++--------
12
vl.c | 86 ++-------------------------------------------------------
17
1 file changed, 9 insertions(+), 8 deletions(-)
13
qemu-doc.texi | 8 ------
18
14
qemu-options.hx | 19 ++-----------
19
diff --git a/block/qcow2.c b/block/qcow2.c
15
3 files changed, 4 insertions(+), 109 deletions(-)
16
17
diff --git a/vl.c b/vl.c
20
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
21
--- a/block/qcow2.c
19
--- a/vl.c
22
+++ b/block/qcow2.c
20
+++ b/vl.c
23
@@ -XXX,XX +XXX,XX @@ static uint64_t qcow2_opt_get_refcount_bits_del(QemuOpts *opts, int version,
21
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
24
return refcount_bits;
22
const char *boot_order = NULL;
25
}
23
const char *boot_once = NULL;
26
24
DisplayState *ds;
27
-static int qcow2_create2(const char *filename, int64_t total_size,
25
- int cyls, heads, secs, translation;
28
- const char *backing_file, const char *backing_format,
26
QemuOpts *opts, *machine_opts;
29
- int flags, size_t cluster_size, PreallocMode prealloc,
27
- QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL;
30
- QemuOpts *opts, int version, int refcount_order,
28
+ QemuOpts *icount_opts = NULL, *accel_opts = NULL;
31
- const char *encryptfmt, Error **errp)
29
QemuOptsList *olist;
32
+static int coroutine_fn
30
int optind;
33
+qcow2_co_create2(const char *filename, int64_t total_size,
31
const char *optarg;
34
+ const char *backing_file, const char *backing_format,
32
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
35
+ int flags, size_t cluster_size, PreallocMode prealloc,
33
36
+ QemuOpts *opts, int version, int refcount_order,
34
cpu_model = NULL;
37
+ const char *encryptfmt, Error **errp)
35
snapshot = 0;
38
{
36
- cyls = heads = secs = 0;
39
QDict *options;
37
- translation = BIOS_ATA_TRANSLATION_AUTO;
40
38
41
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opt
39
nb_nics = 0;
42
40
43
refcount_order = ctz32(refcount_bits);
41
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
44
42
if (optind >= argc)
45
- ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
43
break;
46
- cluster_size, prealloc, opts, version, refcount_order,
44
if (argv[optind][0] != '-') {
47
- encryptfmt, &local_err);
45
- hda_opts = drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS);
48
+ ret = qcow2_co_create2(filename, size, backing_file, backing_fmt, flags,
46
+ drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS);
49
+ cluster_size, prealloc, opts, version, refcount_order,
47
} else {
50
+ encryptfmt, &local_err);
48
const QEMUOption *popt;
51
error_propagate(errp, local_err);
49
52
50
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
53
finish:
51
cpu_model = optarg;
52
break;
53
case QEMU_OPTION_hda:
54
- {
55
- char buf[256];
56
- if (cyls == 0)
57
- snprintf(buf, sizeof(buf), "%s", HD_OPTS);
58
- else
59
- snprintf(buf, sizeof(buf),
60
- "%s,cyls=%d,heads=%d,secs=%d%s",
61
- HD_OPTS , cyls, heads, secs,
62
- translation == BIOS_ATA_TRANSLATION_LBA ?
63
- ",trans=lba" :
64
- translation == BIOS_ATA_TRANSLATION_NONE ?
65
- ",trans=none" : "");
66
- drive_add(IF_DEFAULT, 0, optarg, buf);
67
- break;
68
- }
69
case QEMU_OPTION_hdb:
70
case QEMU_OPTION_hdc:
71
case QEMU_OPTION_hdd:
72
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
73
case QEMU_OPTION_snapshot:
74
snapshot = 1;
75
break;
76
- case QEMU_OPTION_hdachs:
77
- {
78
- const char *p;
79
- p = optarg;
80
- cyls = strtol(p, (char **)&p, 0);
81
- if (cyls < 1 || cyls > 16383)
82
- goto chs_fail;
83
- if (*p != ',')
84
- goto chs_fail;
85
- p++;
86
- heads = strtol(p, (char **)&p, 0);
87
- if (heads < 1 || heads > 16)
88
- goto chs_fail;
89
- if (*p != ',')
90
- goto chs_fail;
91
- p++;
92
- secs = strtol(p, (char **)&p, 0);
93
- if (secs < 1 || secs > 63)
94
- goto chs_fail;
95
- if (*p == ',') {
96
- p++;
97
- if (!strcmp(p, "large")) {
98
- translation = BIOS_ATA_TRANSLATION_LARGE;
99
- } else if (!strcmp(p, "rechs")) {
100
- translation = BIOS_ATA_TRANSLATION_RECHS;
101
- } else if (!strcmp(p, "none")) {
102
- translation = BIOS_ATA_TRANSLATION_NONE;
103
- } else if (!strcmp(p, "lba")) {
104
- translation = BIOS_ATA_TRANSLATION_LBA;
105
- } else if (!strcmp(p, "auto")) {
106
- translation = BIOS_ATA_TRANSLATION_AUTO;
107
- } else {
108
- goto chs_fail;
109
- }
110
- } else if (*p != '\0') {
111
- chs_fail:
112
- error_report("invalid physical CHS format");
113
- exit(1);
114
- }
115
- if (hda_opts != NULL) {
116
- qemu_opt_set_number(hda_opts, "cyls", cyls,
117
- &error_abort);
118
- qemu_opt_set_number(hda_opts, "heads", heads,
119
- &error_abort);
120
- qemu_opt_set_number(hda_opts, "secs", secs,
121
- &error_abort);
122
- if (translation == BIOS_ATA_TRANSLATION_LARGE) {
123
- qemu_opt_set(hda_opts, "trans", "large",
124
- &error_abort);
125
- } else if (translation == BIOS_ATA_TRANSLATION_RECHS) {
126
- qemu_opt_set(hda_opts, "trans", "rechs",
127
- &error_abort);
128
- } else if (translation == BIOS_ATA_TRANSLATION_LBA) {
129
- qemu_opt_set(hda_opts, "trans", "lba",
130
- &error_abort);
131
- } else if (translation == BIOS_ATA_TRANSLATION_NONE) {
132
- qemu_opt_set(hda_opts, "trans", "none",
133
- &error_abort);
134
- }
135
- }
136
- }
137
- error_report("'-hdachs' is deprecated, please use '-device"
138
- " ide-hd,cyls=c,heads=h,secs=s,...' instead");
139
- break;
140
case QEMU_OPTION_numa:
141
opts = qemu_opts_parse_noisily(qemu_find_opts("numa"),
142
optarg, true);
143
diff --git a/qemu-doc.texi b/qemu-doc.texi
144
index XXXXXXX..XXXXXXX 100644
145
--- a/qemu-doc.texi
146
+++ b/qemu-doc.texi
147
@@ -XXX,XX +XXX,XX @@ The ``--net dump'' argument is now replaced with the
148
``-object filter-dump'' argument which works in combination
149
with the modern ``-netdev`` backends instead.
150
151
-@subsection -hdachs (since 2.10.0)
152
-
153
-The ``-hdachs'' argument is now a synonym for setting
154
-the ``cyls'', ``heads'', ``secs'', and ``trans'' properties
155
-on the ``ide-hd'' device using the ``-device'' argument.
156
-The new syntax allows different settings to be provided
157
-per disk.
158
-
159
@subsection -usbdevice (since 2.10.0)
160
161
The ``-usbdevice DEV'' argument is now a synonym for setting
162
diff --git a/qemu-options.hx b/qemu-options.hx
163
index XXXXXXX..XXXXXXX 100644
164
--- a/qemu-options.hx
165
+++ b/qemu-options.hx
166
@@ -XXX,XX +XXX,XX @@ of available connectors of a given interface type.
167
@item media=@var{media}
168
This option defines the type of the media: disk or cdrom.
169
@item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}]
170
-These options have the same definition as they have in @option{-hdachs}.
171
-These parameters are deprecated, use the corresponding parameters
172
+Force disk physical geometry and the optional BIOS translation (trans=none or
173
+lba). These parameters are deprecated, use the corresponding parameters
174
of @code{-device} instead.
175
@item snapshot=@var{snapshot}
176
@var{snapshot} is "on" or "off" and controls snapshot mode for the given drive
177
@@ -XXX,XX +XXX,XX @@ the raw disk image you use is not written back. You can however force
178
the write back by pressing @key{C-a s} (@pxref{disk_images}).
179
ETEXI
180
181
-DEF("hdachs", HAS_ARG, QEMU_OPTION_hdachs, \
182
- "-hdachs c,h,s[,t]\n" \
183
- " force hard disk 0 physical geometry and the optional BIOS\n" \
184
- " translation (t=none or lba) (usually QEMU can guess them)\n",
185
- QEMU_ARCH_ALL)
186
-STEXI
187
-@item -hdachs @var{c},@var{h},@var{s},[,@var{t}]
188
-@findex -hdachs
189
-Force hard disk 0 physical geometry (1 <= @var{c} <= 16383, 1 <=
190
-@var{h} <= 16, 1 <= @var{s} <= 63) and optionally force the BIOS
191
-translation mode (@var{t}=none, lba or auto). Usually QEMU can guess
192
-all those parameters. This option is deprecated, please use
193
-@code{-device ide-hd,cyls=c,heads=h,secs=s,...} instead.
194
-ETEXI
195
-
196
DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev,
197
"-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n"
198
" [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n"
54
--
199
--
55
2.13.6
200
2.13.6
56
201
57
202
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
This reverts commit 4da97120d51a4383aa96d741a2b837f8c4bbcd0b.
3
Looks like we forgot to announce the deprecation of these options in
4
the corresponding chapter of the qemu-doc text, so let's do that now.
4
5
5
blk_aio_flush() now handles the blk->root == NULL case, so we no longer
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
6
need this workaround.
7
Reviewed-by: John Snow <jsnow@redhat.com>
7
8
Reviewed-by: Markus Armbruster <armbru@redhat.com>
8
Cc: John Snow <jsnow@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Eric Blake <eblake@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
10
---
13
hw/ide/core.c | 10 +---------
11
qemu-doc.texi | 15 +++++++++++++++
14
1 file changed, 1 insertion(+), 9 deletions(-)
12
1 file changed, 15 insertions(+)
15
13
16
diff --git a/hw/ide/core.c b/hw/ide/core.c
14
diff --git a/qemu-doc.texi b/qemu-doc.texi
17
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/ide/core.c
16
--- a/qemu-doc.texi
19
+++ b/hw/ide/core.c
17
+++ b/qemu-doc.texi
20
@@ -XXX,XX +XXX,XX @@ static void ide_flush_cache(IDEState *s)
18
@@ -XXX,XX +XXX,XX @@ longer be directly supported in QEMU.
21
s->status |= BUSY_STAT;
19
The ``-drive if=scsi'' argument is replaced by the the
22
ide_set_retry(s);
20
``-device BUS-TYPE'' argument combined with ``-drive if=none''.
23
block_acct_start(blk_get_stats(s->blk), &s->acct, 0, BLOCK_ACCT_FLUSH);
21
24
-
22
+@subsection -drive cyls=...,heads=...,secs=...,trans=... (since 2.10.0)
25
- if (blk_bs(s->blk)) {
23
+
26
- s->pio_aiocb = blk_aio_flush(s->blk, ide_flush_cb, s);
24
+The drive geometry arguments are replaced by the the geometry arguments
27
- } else {
25
+that can be specified with the ``-device'' parameter.
28
- /* XXX blk_aio_flush() crashes when blk_bs(blk) is NULL, remove this
26
+
29
- * temporary workaround when blk_aio_*() functions handle NULL blk_bs.
27
+@subsection -drive serial=... (since 2.10.0)
30
- */
28
+
31
- ide_flush_cb(s, 0);
29
+The drive serial argument is replaced by the the serial argument
32
- }
30
+that can be specified with the ``-device'' parameter.
33
+ s->pio_aiocb = blk_aio_flush(s->blk, ide_flush_cb, s);
31
+
34
}
32
+@subsection -drive addr=... (since 2.10.0)
35
33
+
36
static void ide_cfata_metadata_inquiry(IDEState *s)
34
+The drive addr argument is replaced by the the addr argument
35
+that can be specified with the ``-device'' parameter.
36
+
37
@subsection -net dump (since 2.10.0)
38
39
The ``--net dump'' argument is now replaced with the
37
--
40
--
38
2.13.6
41
2.13.6
39
42
40
43
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
From: Fam Zheng <famz@redhat.com>
2
2
3
We are gradually moving away from sector-based interfaces, towards
3
Signed-off-by: Fam Zheng <famz@redhat.com>
4
byte-based. Update the generic helpers, and all passthrough clients
5
(blkdebug, commit, mirror, throttle) accordingly.
6
7
Signed-off-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
5
---
12
include/block/block_int.h | 28 ++++++++++++++++------------
6
include/block/block_int.h | 1 -
13
block/blkdebug.c | 20 +++++++++++---------
7
block/io.c | 18 ------------------
14
block/commit.c | 2 +-
8
2 files changed, 19 deletions(-)
15
block/io.c | 36 ++++++++++++++++++++----------------
16
block/mirror.c | 2 +-
17
block/throttle.c | 2 +-
18
6 files changed, 50 insertions(+), 40 deletions(-)
19
9
20
diff --git a/include/block/block_int.h b/include/block/block_int.h
10
diff --git a/include/block/block_int.h b/include/block/block_int.h
21
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
22
--- a/include/block/block_int.h
12
--- a/include/block/block_int.h
23
+++ b/include/block/block_int.h
13
+++ b/include/block/block_int.h
24
@@ -XXX,XX +XXX,XX @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
14
@@ -XXX,XX +XXX,XX @@ bool blk_dev_is_tray_open(BlockBackend *blk);
25
uint64_t *nperm, uint64_t *nshared);
15
bool blk_dev_is_medium_locked(BlockBackend *blk);
26
16
27
/*
17
void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
28
- * Default implementation for drivers to pass bdrv_co_get_block_status() to
18
-bool bdrv_requests_pending(BlockDriverState *bs);
29
+ * Default implementation for drivers to pass bdrv_co_block_status() to
19
30
* their file.
20
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
31
*/
21
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);
32
-int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
33
- int64_t sector_num,
34
- int nb_sectors,
35
- int *pnum,
36
- BlockDriverState **file);
37
+int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
38
+ bool want_zero,
39
+ int64_t offset,
40
+ int64_t bytes,
41
+ int64_t *pnum,
42
+ int64_t *map,
43
+ BlockDriverState **file);
44
/*
45
- * Default implementation for drivers to pass bdrv_co_get_block_status() to
46
+ * Default implementation for drivers to pass bdrv_co_block_status() to
47
* their backing file.
48
*/
49
-int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
50
- int64_t sector_num,
51
- int nb_sectors,
52
- int *pnum,
53
- BlockDriverState **file);
54
+int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
55
+ bool want_zero,
56
+ int64_t offset,
57
+ int64_t bytes,
58
+ int64_t *pnum,
59
+ int64_t *map,
60
+ BlockDriverState **file);
61
const char *bdrv_get_parent_name(const BlockDriverState *bs);
62
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
63
bool blk_dev_has_removable_media(BlockBackend *blk);
64
diff --git a/block/blkdebug.c b/block/blkdebug.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/block/blkdebug.c
67
+++ b/block/blkdebug.c
68
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
69
return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
70
}
71
72
-static int64_t coroutine_fn blkdebug_co_get_block_status(
73
- BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
74
- BlockDriverState **file)
75
+static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
76
+ bool want_zero,
77
+ int64_t offset,
78
+ int64_t bytes,
79
+ int64_t *pnum,
80
+ int64_t *map,
81
+ BlockDriverState **file)
82
{
83
- assert(QEMU_IS_ALIGNED(sector_num | nb_sectors,
84
- DIV_ROUND_UP(bs->bl.request_alignment,
85
- BDRV_SECTOR_SIZE)));
86
- return bdrv_co_get_block_status_from_file(bs, sector_num, nb_sectors,
87
- pnum, file);
88
+ assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
89
+ return bdrv_co_block_status_from_file(bs, want_zero, offset, bytes,
90
+ pnum, map, file);
91
}
92
93
static void blkdebug_close(BlockDriverState *bs)
94
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_blkdebug = {
95
.bdrv_co_flush_to_disk = blkdebug_co_flush,
96
.bdrv_co_pwrite_zeroes = blkdebug_co_pwrite_zeroes,
97
.bdrv_co_pdiscard = blkdebug_co_pdiscard,
98
- .bdrv_co_get_block_status = blkdebug_co_get_block_status,
99
+ .bdrv_co_block_status = blkdebug_co_block_status,
100
101
.bdrv_debug_event = blkdebug_debug_event,
102
.bdrv_debug_breakpoint = blkdebug_debug_breakpoint,
103
diff --git a/block/commit.c b/block/commit.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/block/commit.c
106
+++ b/block/commit.c
107
@@ -XXX,XX +XXX,XX @@ static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
108
static BlockDriver bdrv_commit_top = {
109
.format_name = "commit_top",
110
.bdrv_co_preadv = bdrv_commit_top_preadv,
111
- .bdrv_co_get_block_status = bdrv_co_get_block_status_from_backing,
112
+ .bdrv_co_block_status = bdrv_co_block_status_from_backing,
113
.bdrv_refresh_filename = bdrv_commit_top_refresh_filename,
114
.bdrv_close = bdrv_commit_top_close,
115
.bdrv_child_perm = bdrv_commit_top_child_perm,
116
diff --git a/block/io.c b/block/io.c
22
diff --git a/block/io.c b/block/io.c
117
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
118
--- a/block/io.c
24
--- a/block/io.c
119
+++ b/block/io.c
25
+++ b/block/io.c
120
@@ -XXX,XX +XXX,XX @@ typedef struct BdrvCoBlockStatusData {
26
@@ -XXX,XX +XXX,XX @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
121
bool done;
27
assert(old >= 1);
122
} BdrvCoBlockStatusData;
123
124
-int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
125
- int64_t sector_num,
126
- int nb_sectors,
127
- int *pnum,
128
- BlockDriverState **file)
129
+int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
130
+ bool want_zero,
131
+ int64_t offset,
132
+ int64_t bytes,
133
+ int64_t *pnum,
134
+ int64_t *map,
135
+ BlockDriverState **file)
136
{
137
assert(bs->file && bs->file->bs);
138
- *pnum = nb_sectors;
139
+ *pnum = bytes;
140
+ *map = offset;
141
*file = bs->file->bs;
142
- return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
143
- (sector_num << BDRV_SECTOR_BITS);
144
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
145
}
28
}
146
29
147
-int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
30
-/* Check if any requests are in-flight (including throttled requests) */
148
- int64_t sector_num,
31
-bool bdrv_requests_pending(BlockDriverState *bs)
149
- int nb_sectors,
32
-{
150
- int *pnum,
33
- BdrvChild *child;
151
- BlockDriverState **file)
34
-
152
+int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
35
- if (atomic_read(&bs->in_flight)) {
153
+ bool want_zero,
36
- return true;
154
+ int64_t offset,
37
- }
155
+ int64_t bytes,
38
-
156
+ int64_t *pnum,
39
- QLIST_FOREACH(child, &bs->children, next) {
157
+ int64_t *map,
40
- if (bdrv_requests_pending(child->bs)) {
158
+ BlockDriverState **file)
41
- return true;
159
{
42
- }
160
assert(bs->backing && bs->backing->bs);
43
- }
161
- *pnum = nb_sectors;
44
-
162
+ *pnum = bytes;
45
- return false;
163
+ *map = offset;
46
-}
164
*file = bs->backing->bs;
47
-
165
- return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
48
typedef struct {
166
- (sector_num << BDRV_SECTOR_BITS);
49
Coroutine *co;
167
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
50
BlockDriverState *bs;
168
}
169
170
/*
171
diff --git a/block/mirror.c b/block/mirror.c
172
index XXXXXXX..XXXXXXX 100644
173
--- a/block/mirror.c
174
+++ b/block/mirror.c
175
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_mirror_top = {
176
.bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes,
177
.bdrv_co_pdiscard = bdrv_mirror_top_pdiscard,
178
.bdrv_co_flush = bdrv_mirror_top_flush,
179
- .bdrv_co_get_block_status = bdrv_co_get_block_status_from_backing,
180
+ .bdrv_co_block_status = bdrv_co_block_status_from_backing,
181
.bdrv_refresh_filename = bdrv_mirror_top_refresh_filename,
182
.bdrv_close = bdrv_mirror_top_close,
183
.bdrv_child_perm = bdrv_mirror_top_child_perm,
184
diff --git a/block/throttle.c b/block/throttle.c
185
index XXXXXXX..XXXXXXX 100644
186
--- a/block/throttle.c
187
+++ b/block/throttle.c
188
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_throttle = {
189
.bdrv_reopen_prepare = throttle_reopen_prepare,
190
.bdrv_reopen_commit = throttle_reopen_commit,
191
.bdrv_reopen_abort = throttle_reopen_abort,
192
- .bdrv_co_get_block_status = bdrv_co_get_block_status_from_file,
193
+ .bdrv_co_block_status = bdrv_co_block_status_from_file,
194
195
.bdrv_co_drain_begin = throttle_co_drain_begin,
196
.bdrv_co_drain_end = throttle_co_drain_end,
197
--
51
--
198
2.13.6
52
2.13.6
199
53
200
54
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
Reviewed-by: Fam Zheng <famz@redhat.com>
3
---
4
block/io.c | 6 ++++++
5
1 file changed, 6 insertions(+)
2
6
3
We are gradually moving away from sector-based interfaces, towards
4
byte-based. Now that all drivers have been updated to provide the
5
byte-based .bdrv_co_block_status(), we can delete the sector-based
6
interface.
7
8
Signed-off-by: Eric Blake <eblake@redhat.com>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
include/block/block_int.h | 3 ---
14
block/io.c | 50 ++++++++++-------------------------------------
15
2 files changed, 10 insertions(+), 43 deletions(-)
16
17
diff --git a/include/block/block_int.h b/include/block/block_int.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/block/block_int.h
20
+++ b/include/block/block_int.h
21
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
22
* as well as non-NULL pnum, map, and file; in turn, the driver
23
* must return an error or set pnum to an aligned non-zero value.
24
*/
25
- int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
26
- int64_t sector_num, int nb_sectors, int *pnum,
27
- BlockDriverState **file);
28
int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
29
bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
30
int64_t *map, BlockDriverState **file);
31
diff --git a/block/io.c b/block/io.c
7
diff --git a/block/io.c b/block/io.c
32
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
33
--- a/block/io.c
9
--- a/block/io.c
34
+++ b/block/io.c
10
+++ b/block/io.c
35
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
11
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
36
12
BdrvNextIterator it;
37
/* Must be non-NULL or bdrv_getlength() would have failed */
13
GSList *aio_ctxs = NULL, *ctx;
38
assert(bs->drv);
14
39
- if (!bs->drv->bdrv_co_get_block_status && !bs->drv->bdrv_co_block_status) {
15
+ /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
40
+ if (!bs->drv->bdrv_co_block_status) {
16
+ * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
41
*pnum = bytes;
17
+ * nodes in several different AioContexts, so make sure we're in the main
42
ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
18
+ * context. */
43
if (offset + bytes == total_size) {
19
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
44
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
20
+
45
21
block_job_pause_all();
46
/* Round out to request_alignment boundaries */
22
47
align = bs->bl.request_alignment;
23
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
48
- if (bs->drv->bdrv_co_get_block_status && align < BDRV_SECTOR_SIZE) {
49
- align = BDRV_SECTOR_SIZE;
50
- }
51
aligned_offset = QEMU_ALIGN_DOWN(offset, align);
52
aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
53
54
- if (bs->drv->bdrv_co_get_block_status) {
55
- int count; /* sectors */
56
- int64_t longret;
57
-
58
- assert(QEMU_IS_ALIGNED(aligned_offset | aligned_bytes,
59
- BDRV_SECTOR_SIZE));
60
- /*
61
- * The contract allows us to return pnum smaller than bytes, even
62
- * if the next query would see the same status; we truncate the
63
- * request to avoid overflowing the driver's 32-bit interface.
64
- */
65
- longret = bs->drv->bdrv_co_get_block_status(
66
- bs, aligned_offset >> BDRV_SECTOR_BITS,
67
- MIN(INT_MAX, aligned_bytes) >> BDRV_SECTOR_BITS, &count,
68
- &local_file);
69
- if (longret < 0) {
70
- assert(INT_MIN <= longret);
71
- ret = longret;
72
- goto out;
73
- }
74
- if (longret & BDRV_BLOCK_OFFSET_VALID) {
75
- local_map = longret & BDRV_BLOCK_OFFSET_MASK;
76
- }
77
- ret = longret & ~BDRV_BLOCK_OFFSET_MASK;
78
- *pnum = count * BDRV_SECTOR_SIZE;
79
- } else {
80
- ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
81
- aligned_bytes, pnum, &local_map,
82
- &local_file);
83
- if (ret < 0) {
84
- *pnum = 0;
85
- goto out;
86
- }
87
- assert(*pnum); /* The block driver must make progress */
88
+ ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
89
+ aligned_bytes, pnum, &local_map,
90
+ &local_file);
91
+ if (ret < 0) {
92
+ *pnum = 0;
93
+ goto out;
94
}
95
96
/*
97
- * The driver's result must be a multiple of request_alignment.
98
+ * The driver's result must be a non-zero multiple of request_alignment.
99
* Clamp pnum and adjust map to original request.
100
*/
101
- assert(QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset);
102
+ assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
103
+ align > offset - aligned_offset);
104
*pnum -= offset - aligned_offset;
105
if (*pnum > bytes) {
106
*pnum = bytes;
107
--
24
--
108
2.13.6
25
2.13.6
109
26
110
27
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
bdrv_drained_begin() doesn't increase bs->quiesce_counter recursively
2
and also doesn't notify other parent nodes of children, which both means
3
that the child nodes are not actually drained, and bdrv_drained_begin()
4
is providing useful functionality only on a single node.
2
5
3
We are gradually moving away from sector-based interfaces, towards
6
To keep things consistent, we also shouldn't call the block driver
4
byte-based. Update the vmdk driver accordingly. Drop the
7
callbacks recursively.
5
now-unused vmdk_find_index_in_cluster().
6
8
7
Also, fix a pre-existing bug: if find_extent() fails (unlikely,
9
A proper recursive drain version that provides an actually working
8
since the block layer did a bounds check), then we must return a
10
drained section for child nodes will be introduced later.
9
failure, rather than 0.
10
11
11
Signed-off-by: Eric Blake <eblake@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
13
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Reviewed-by: Fam Zheng <famz@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
14
---
16
block/vmdk.c | 38 ++++++++++++++------------------------
15
block/io.c | 16 +++++++++-------
17
1 file changed, 14 insertions(+), 24 deletions(-)
16
1 file changed, 9 insertions(+), 7 deletions(-)
18
17
19
diff --git a/block/vmdk.c b/block/vmdk.c
18
diff --git a/block/io.c b/block/io.c
20
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
21
--- a/block/vmdk.c
20
--- a/block/io.c
22
+++ b/block/vmdk.c
21
+++ b/block/io.c
23
@@ -XXX,XX +XXX,XX @@ static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
22
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
24
return extent_relative_offset % cluster_size;
25
}
23
}
26
24
27
-static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent,
25
/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
28
- int64_t sector_num)
26
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
29
-{
27
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive)
30
- uint64_t offset;
31
- offset = vmdk_find_offset_in_cluster(extent, sector_num * BDRV_SECTOR_SIZE);
32
- return offset / BDRV_SECTOR_SIZE;
33
-}
34
-
35
-static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
36
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
37
+static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs,
38
+ bool want_zero,
39
+ int64_t offset, int64_t bytes,
40
+ int64_t *pnum, int64_t *map,
41
+ BlockDriverState **file)
42
{
28
{
43
BDRVVmdkState *s = bs->opaque;
29
BdrvChild *child, *tmp;
44
int64_t index_in_cluster, n, ret;
30
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
45
- uint64_t offset;
31
@@ -XXX,XX +XXX,XX @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
46
+ uint64_t cluster_offset;
32
bdrv_coroutine_enter(bs, data.co);
47
VmdkExtent *extent;
33
BDRV_POLL_WHILE(bs, !data.done);
48
34
49
- extent = find_extent(s, sector_num, NULL);
35
- QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
50
+ extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL);
36
- bdrv_drain_invoke(child->bs, begin);
51
if (!extent) {
37
+ if (recursive) {
52
- return 0;
38
+ QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
53
+ return -EIO;
39
+ bdrv_drain_invoke(child->bs, begin, true);
40
+ }
54
}
41
}
55
qemu_co_mutex_lock(&s->lock);
42
}
56
- ret = get_cluster_offset(bs, extent, NULL,
43
57
- sector_num * 512, false, &offset,
44
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
58
+ ret = get_cluster_offset(bs, extent, NULL, offset, false, &cluster_offset,
45
bdrv_parent_drained_begin(bs);
59
0, 0);
60
qemu_co_mutex_unlock(&s->lock);
61
62
- index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
63
+ index_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
64
switch (ret) {
65
case VMDK_ERROR:
66
ret = -EIO;
67
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
68
ret = BDRV_BLOCK_DATA;
69
if (!extent->compressed) {
70
ret |= BDRV_BLOCK_OFFSET_VALID;
71
- ret |= (offset + (index_in_cluster << BDRV_SECTOR_BITS))
72
- & BDRV_BLOCK_OFFSET_MASK;
73
+ *map = cluster_offset + index_in_cluster;
74
}
75
*file = extent->file->bs;
76
break;
77
}
46
}
78
47
79
- n = extent->cluster_sectors - index_in_cluster;
48
- bdrv_drain_invoke(bs, true);
80
- if (n > nb_sectors) {
49
+ bdrv_drain_invoke(bs, true, false);
81
- n = nb_sectors;
50
bdrv_drain_recurse(bs);
82
- }
83
- *pnum = n;
84
+ n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster;
85
+ *pnum = MIN(n, bytes);
86
return ret;
87
}
51
}
88
52
89
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vmdk = {
53
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
90
.bdrv_close = vmdk_close,
54
}
91
.bdrv_create = vmdk_create,
55
92
.bdrv_co_flush_to_disk = vmdk_co_flush,
56
/* Re-enable things in child-to-parent order */
93
- .bdrv_co_get_block_status = vmdk_co_get_block_status,
57
- bdrv_drain_invoke(bs, false);
94
+ .bdrv_co_block_status = vmdk_co_block_status,
58
+ bdrv_drain_invoke(bs, false, false);
95
.bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
59
bdrv_parent_drained_end(bs);
96
.bdrv_has_zero_init = vmdk_has_zero_init,
60
aio_enable_external(bdrv_get_aio_context(bs));
97
.bdrv_get_specific_info = vmdk_get_specific_info,
61
}
62
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
63
aio_context_acquire(aio_context);
64
aio_disable_external(aio_context);
65
bdrv_parent_drained_begin(bs);
66
- bdrv_drain_invoke(bs, true);
67
+ bdrv_drain_invoke(bs, true, true);
68
aio_context_release(aio_context);
69
70
if (!g_slist_find(aio_ctxs, aio_context)) {
71
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
72
73
/* Re-enable things in child-to-parent order */
74
aio_context_acquire(aio_context);
75
- bdrv_drain_invoke(bs, false);
76
+ bdrv_drain_invoke(bs, false, true);
77
bdrv_parent_drained_end(bs);
78
aio_enable_external(aio_context);
79
aio_context_release(aio_context);
98
--
80
--
99
2.13.6
81
2.13.6
100
82
101
83
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
The existing test is for bdrv_drain_all_begin/end() only. Generalise the
2
test case so that it can be run for the other variants as well. At the
3
moment this is only bdrv_drain_begin/end(), but in a while, we'll add
4
another one.
2
5
3
We are gradually moving away from sector-based interfaces, towards
6
Also, add a backing file to the test node to test whether the operations
4
byte-based. Update the vvfat driver accordingly. Note that we
7
work recursively.
5
can rely on the block driver having already clamped limits to our
6
block size, and simplify accordingly.
7
8
8
Signed-off-by: Eric Blake <eblake@redhat.com>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
10
---
13
block/vvfat.c | 16 +++++++---------
11
tests/test-bdrv-drain.c | 69 ++++++++++++++++++++++++++++++++++++++++++++-----
14
1 file changed, 7 insertions(+), 9 deletions(-)
12
1 file changed, 62 insertions(+), 7 deletions(-)
15
13
16
diff --git a/block/vvfat.c b/block/vvfat.c
14
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
17
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
18
--- a/block/vvfat.c
16
--- a/tests/test-bdrv-drain.c
19
+++ b/block/vvfat.c
17
+++ b/tests/test-bdrv-drain.c
20
@@ -XXX,XX +XXX,XX @@ vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
18
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_test = {
21
return ret;
19
20
.bdrv_co_drain_begin = bdrv_test_co_drain_begin,
21
.bdrv_co_drain_end = bdrv_test_co_drain_end,
22
+
23
+ .bdrv_child_perm = bdrv_format_default_perms,
24
};
25
26
static void aio_ret_cb(void *opaque, int ret)
27
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
28
*aio_ret = ret;
22
}
29
}
23
30
24
-static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
31
-static void test_drv_cb_drain_all(void)
25
- int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file)
32
+enum drain_type {
26
+static int coroutine_fn vvfat_co_block_status(BlockDriverState *bs,
33
+ BDRV_DRAIN_ALL,
27
+ bool want_zero, int64_t offset,
34
+ BDRV_DRAIN,
28
+ int64_t bytes, int64_t *n,
35
+};
29
+ int64_t *map,
36
+
30
+ BlockDriverState **file)
37
+static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
38
+{
39
+ switch (drain_type) {
40
+ case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
41
+ case BDRV_DRAIN: bdrv_drained_begin(bs); break;
42
+ default: g_assert_not_reached();
43
+ }
44
+}
45
+
46
+static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
47
+{
48
+ switch (drain_type) {
49
+ case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
50
+ case BDRV_DRAIN: bdrv_drained_end(bs); break;
51
+ default: g_assert_not_reached();
52
+ }
53
+}
54
+
55
+static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
31
{
56
{
32
- *n = bs->total_sectors - sector_num;
57
BlockBackend *blk;
33
- if (*n > nb_sectors) {
58
- BlockDriverState *bs;
34
- *n = nb_sectors;
59
- BDRVTestState *s;
35
- } else if (*n < 0) {
60
+ BlockDriverState *bs, *backing;
36
- return 0;
61
+ BDRVTestState *s, *backing_s;
37
- }
62
BlockAIOCB *acb;
38
+ *n = bytes;
63
int aio_ret;
39
return BDRV_BLOCK_DATA;
64
65
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void)
66
s = bs->opaque;
67
blk_insert_bs(blk, bs, &error_abort);
68
69
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
70
+ backing_s = backing->opaque;
71
+ bdrv_set_backing_hd(bs, backing, &error_abort);
72
+
73
/* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
74
g_assert_cmpint(s->drain_count, ==, 0);
75
- bdrv_drain_all_begin();
76
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
77
+
78
+ do_drain_begin(drain_type, bs);
79
+
80
g_assert_cmpint(s->drain_count, ==, 1);
81
- bdrv_drain_all_end();
82
+ g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
83
+
84
+ do_drain_end(drain_type, bs);
85
+
86
g_assert_cmpint(s->drain_count, ==, 0);
87
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
88
89
/* Now do the same while a request is pending */
90
aio_ret = -EINPROGRESS;
91
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_all(void)
92
g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
93
94
g_assert_cmpint(s->drain_count, ==, 0);
95
- bdrv_drain_all_begin();
96
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
97
+
98
+ do_drain_begin(drain_type, bs);
99
+
100
g_assert_cmpint(aio_ret, ==, 0);
101
g_assert_cmpint(s->drain_count, ==, 1);
102
- bdrv_drain_all_end();
103
+ g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
104
+
105
+ do_drain_end(drain_type, bs);
106
+
107
g_assert_cmpint(s->drain_count, ==, 0);
108
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
109
110
+ bdrv_unref(backing);
111
bdrv_unref(bs);
112
blk_unref(blk);
40
}
113
}
41
114
42
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vvfat = {
115
+static void test_drv_cb_drain_all(void)
43
116
+{
44
.bdrv_co_preadv = vvfat_co_preadv,
117
+ test_drv_cb_common(BDRV_DRAIN_ALL, true);
45
.bdrv_co_pwritev = vvfat_co_pwritev,
118
+}
46
- .bdrv_co_get_block_status = vvfat_co_get_block_status,
119
+
47
+ .bdrv_co_block_status = vvfat_co_block_status,
120
+static void test_drv_cb_drain(void)
48
};
121
+{
49
122
+ test_drv_cb_common(BDRV_DRAIN, false);
50
static void bdrv_vvfat_init(void)
123
+}
124
+
125
int main(int argc, char **argv)
126
{
127
bdrv_init();
128
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
129
g_test_init(&argc, &argv, NULL);
130
131
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
132
+ g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
133
134
return g_test_run();
135
}
51
--
136
--
52
2.13.6
137
2.13.6
53
138
54
139
diff view generated by jsdifflib
1
From: Anton Nefedov <anton.nefedov@virtuozzo.com>
1
This is currently only working correctly for bdrv_drain(), not for
2
bdrv_drain_all(). Leave a comment for the drain_all case, we'll address
3
it later.
2
4
3
This new test case only makes sense for qcow2 while iotest 033 is generic;
4
however it matches the test purpose perfectly and also 033 contains those
5
do_test() tricks to pass the alignment, which won't look nice being
6
duplicated in other tests or moved to the common code.
7
8
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
6
---
11
tests/qemu-iotests/033 | 29 +++++++++++++++++++++++++++++
7
tests/test-bdrv-drain.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
12
tests/qemu-iotests/033.out | 13 +++++++++++++
8
1 file changed, 45 insertions(+)
13
2 files changed, 42 insertions(+)
14
9
15
diff --git a/tests/qemu-iotests/033 b/tests/qemu-iotests/033
10
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
16
index XXXXXXX..XXXXXXX 100755
11
index XXXXXXX..XXXXXXX 100644
17
--- a/tests/qemu-iotests/033
12
--- a/tests/test-bdrv-drain.c
18
+++ b/tests/qemu-iotests/033
13
+++ b/tests/test-bdrv-drain.c
19
@@ -XXX,XX +XXX,XX @@ do_test()
14
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void)
20
    } | $QEMU_IO $IO_EXTRA_ARGS
15
test_drv_cb_common(BDRV_DRAIN, false);
21
}
16
}
22
17
23
+echo
18
+static void test_quiesce_common(enum drain_type drain_type, bool recursive)
24
+echo "=== Test aligned and misaligned write zeroes operations ==="
19
+{
20
+ BlockBackend *blk;
21
+ BlockDriverState *bs, *backing;
25
+
22
+
26
for write_zero_cmd in "write -z" "aio_write -z"; do
23
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
27
for align in 512 4k; do
24
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
28
    echo
25
+ &error_abort);
29
@@ -XXX,XX +XXX,XX @@ for align in 512 4k; do
26
+ blk_insert_bs(blk, bs, &error_abort);
30
done
31
done
32
33
+
27
+
34
+# Trigger truncate that would shrink qcow2 L1 table, which is done by
28
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
35
+# clearing one entry (8 bytes) with bdrv_co_pwrite_zeroes()
29
+ bdrv_set_backing_hd(bs, backing, &error_abort);
36
+
30
+
37
+echo
31
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
38
+echo "=== Test misaligned write zeroes via truncate ==="
32
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
39
+echo
40
+
33
+
41
+# any size will do, but the smaller the size the smaller the required image
34
+ do_drain_begin(drain_type, bs);
42
+CLUSTER_SIZE=$((4 * 1024))
43
+L2_COVERAGE=$(($CLUSTER_SIZE * $CLUSTER_SIZE / 8))
44
+_make_test_img $(($L2_COVERAGE * 2))
45
+
35
+
46
+do_test 512 "write -P 1 0 0x200" "$TEST_IMG" | _filter_qemu_io
36
+ g_assert_cmpint(bs->quiesce_counter, ==, 1);
47
+# next L2 table
37
+ g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
48
+do_test 512 "write -P 1 $L2_COVERAGE 0x200" "$TEST_IMG" | _filter_qemu_io
49
+
38
+
50
+# only interested in qcow2 here; also other formats might respond with
39
+ do_drain_end(drain_type, bs);
51
+# "not supported" error message
52
+if [ $IMGFMT = "qcow2" ]; then
53
+ do_test 512 "truncate $L2_COVERAGE" "$TEST_IMG" | _filter_qemu_io
54
+fi
55
+
40
+
56
+do_test 512 "read -P 1 0 0x200" "$TEST_IMG" | _filter_qemu_io
41
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
42
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
57
+
43
+
58
# success, all done
44
+ bdrv_unref(backing);
59
+echo
45
+ bdrv_unref(bs);
60
echo "*** done"
46
+ blk_unref(blk);
61
rm -f $seq.full
47
+}
62
status=0
63
diff --git a/tests/qemu-iotests/033.out b/tests/qemu-iotests/033.out
64
index XXXXXXX..XXXXXXX 100644
65
--- a/tests/qemu-iotests/033.out
66
+++ b/tests/qemu-iotests/033.out
67
@@ -XXX,XX +XXX,XX @@
68
QA output created by 033
69
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
70
71
+=== Test aligned and misaligned write zeroes operations ===
72
+
48
+
73
== preparing image ==
49
+static void test_quiesce_drain_all(void)
74
wrote 1024/1024 bytes at offset 512
50
+{
75
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
51
+ // XXX drain_all doesn't quiesce
76
@@ -XXX,XX +XXX,XX @@ read 512/512 bytes at offset 512
52
+ //test_quiesce_common(BDRV_DRAIN_ALL, true);
77
read 3072/3072 bytes at offset 1024
53
+}
78
3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
79
80
+
54
+
81
+=== Test misaligned write zeroes via truncate ===
55
+static void test_quiesce_drain(void)
56
+{
57
+ test_quiesce_common(BDRV_DRAIN, false);
58
+}
82
+
59
+
83
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
60
int main(int argc, char **argv)
84
+wrote 512/512 bytes at offset 0
61
{
85
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
62
bdrv_init();
86
+wrote 512/512 bytes at offset 2097152
63
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
87
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
64
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
88
+read 512/512 bytes at offset 0
65
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
89
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
66
67
+ g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
68
+ g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
90
+
69
+
91
*** done
70
return g_test_run();
71
}
92
--
72
--
93
2.13.6
73
2.13.6
94
74
95
75
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Block jobs already paused themselves when their main BlockBackend
2
entered a drained section. This is not good enough: We also want to
3
pause a block job and may not submit new requests if, for example, the
4
mirror target node should be drained.
2
5
3
We are gradually moving away from sector-based interfaces, towards
6
This implements .drained_begin/end callbacks in child_job in order to
4
byte-based. Update the gluster driver accordingly.
7
consider all block nodes related to the job, and removes the
8
BlockBackend callbacks which are unnecessary now because the root of the
9
job main BlockBackend is always referenced with a child_job, too.
5
10
6
In want_zero mode, we continue to report fine-grained hole
7
information (the caller wants as much mapping detail as possible);
8
but when not in that mode, the caller prefers larger *pnum and
9
merely cares about what offsets are allocated at this layer, rather
10
than where the holes live. Since holes still read as zeroes at
11
this layer (rather than deferring to a backing layer), we can take
12
the shortcut of skipping find_allocation(), and merely state that
13
all bytes are allocated.
14
15
We can also drop redundant bounds checks that are already
16
guaranteed by the block layer.
17
18
Signed-off-by: Eric Blake <eblake@redhat.com>
19
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
20
Reviewed-by: Fam Zheng <famz@redhat.com>
21
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
22
---
12
---
23
block/gluster.c | 70 ++++++++++++++++++++++++++++-----------------------------
13
blockjob.c | 22 +++++++++-------------
24
1 file changed, 34 insertions(+), 36 deletions(-)
14
1 file changed, 9 insertions(+), 13 deletions(-)
25
15
26
diff --git a/block/gluster.c b/block/gluster.c
16
diff --git a/blockjob.c b/blockjob.c
27
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
28
--- a/block/gluster.c
18
--- a/blockjob.c
29
+++ b/block/gluster.c
19
+++ b/blockjob.c
30
@@ -XXX,XX +XXX,XX @@ exit:
20
@@ -XXX,XX +XXX,XX @@ static char *child_job_get_parent_desc(BdrvChild *c)
21
job->id);
31
}
22
}
32
23
33
/*
24
-static const BdrvChildRole child_job = {
34
- * Returns the allocation status of the specified sectors.
25
- .get_parent_desc = child_job_get_parent_desc,
35
+ * Returns the allocation status of the specified offset.
26
- .stay_at_node = true,
36
*
27
-};
37
- * If 'sector_num' is beyond the end of the disk image the return value is 0
28
-
38
- * and 'pnum' is set to 0.
29
-static void block_job_drained_begin(void *opaque)
39
+ * The block layer guarantees 'offset' and 'bytes' are within bounds.
30
+static void child_job_drained_begin(BdrvChild *c)
40
*
41
- * 'pnum' is set to the number of sectors (including and immediately following
42
- * the specified sector) that are known to be in the same
43
+ * 'pnum' is set to the number of bytes (including and immediately following
44
+ * the specified offset) that are known to be in the same
45
* allocated/unallocated state.
46
*
47
- * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
48
- * beyond the end of the disk image it will be clamped.
49
+ * 'bytes' is the max value 'pnum' should be set to.
50
*
51
- * (Based on raw_co_get_block_status() from file-posix.c.)
52
+ * (Based on raw_co_block_status() from file-posix.c.)
53
*/
54
-static int64_t coroutine_fn qemu_gluster_co_get_block_status(
55
- BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
56
- BlockDriverState **file)
57
+static int coroutine_fn qemu_gluster_co_block_status(BlockDriverState *bs,
58
+ bool want_zero,
59
+ int64_t offset,
60
+ int64_t bytes,
61
+ int64_t *pnum,
62
+ int64_t *map,
63
+ BlockDriverState **file)
64
{
31
{
65
BDRVGlusterState *s = bs->opaque;
32
- BlockJob *job = opaque;
66
- off_t start, data = 0, hole = 0;
33
+ BlockJob *job = c->opaque;
67
- int64_t total_size;
34
block_job_pause(job);
68
+ off_t data = 0, hole = 0;
69
int ret = -EINVAL;
70
71
if (!s->fd) {
72
return ret;
73
}
74
75
- start = sector_num * BDRV_SECTOR_SIZE;
76
- total_size = bdrv_getlength(bs);
77
- if (total_size < 0) {
78
- return total_size;
79
- } else if (start >= total_size) {
80
- *pnum = 0;
81
- return 0;
82
- } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
83
- nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
84
+ if (!want_zero) {
85
+ *pnum = bytes;
86
+ *map = offset;
87
+ *file = bs;
88
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
89
}
90
91
- ret = find_allocation(bs, start, &data, &hole);
92
+ ret = find_allocation(bs, offset, &data, &hole);
93
if (ret == -ENXIO) {
94
/* Trailing hole */
95
- *pnum = nb_sectors;
96
+ *pnum = bytes;
97
ret = BDRV_BLOCK_ZERO;
98
} else if (ret < 0) {
99
/* No info available, so pretend there are no holes */
100
- *pnum = nb_sectors;
101
+ *pnum = bytes;
102
ret = BDRV_BLOCK_DATA;
103
- } else if (data == start) {
104
- /* On a data extent, compute sectors to the end of the extent,
105
+ } else if (data == offset) {
106
+ /* On a data extent, compute bytes to the end of the extent,
107
* possibly including a partial sector at EOF. */
108
- *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
109
+ *pnum = MIN(bytes, hole - offset);
110
ret = BDRV_BLOCK_DATA;
111
} else {
112
- /* On a hole, compute sectors to the beginning of the next extent. */
113
- assert(hole == start);
114
- *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
115
+ /* On a hole, compute bytes to the beginning of the next extent. */
116
+ assert(hole == offset);
117
+ *pnum = MIN(bytes, data - offset);
118
ret = BDRV_BLOCK_ZERO;
119
}
120
121
+ *map = offset;
122
*file = bs;
123
124
- return ret | BDRV_BLOCK_OFFSET_VALID | start;
125
+ return ret | BDRV_BLOCK_OFFSET_VALID;
126
}
35
}
127
36
128
37
-static void block_job_drained_end(void *opaque)
129
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster = {
38
+static void child_job_drained_end(BdrvChild *c)
130
#ifdef CONFIG_GLUSTERFS_ZEROFILL
39
{
131
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
40
- BlockJob *job = opaque;
132
#endif
41
+ BlockJob *job = c->opaque;
133
- .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
42
block_job_resume(job);
134
+ .bdrv_co_block_status = qemu_gluster_co_block_status,
43
}
135
.create_opts = &qemu_gluster_create_opts,
44
45
-static const BlockDevOps block_job_dev_ops = {
46
- .drained_begin = block_job_drained_begin,
47
- .drained_end = block_job_drained_end,
48
+static const BdrvChildRole child_job = {
49
+ .get_parent_desc = child_job_get_parent_desc,
50
+ .drained_begin = child_job_drained_begin,
51
+ .drained_end = child_job_drained_end,
52
+ .stay_at_node = true,
136
};
53
};
137
54
138
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_tcp = {
55
void block_job_remove_all_bdrv(BlockJob *job)
139
#ifdef CONFIG_GLUSTERFS_ZEROFILL
56
@@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
140
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
57
block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
141
#endif
58
bs->job = job;
142
- .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
59
143
+ .bdrv_co_block_status = qemu_gluster_co_block_status,
60
- blk_set_dev_ops(blk, &block_job_dev_ops, job);
144
.create_opts = &qemu_gluster_create_opts,
61
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
145
};
62
146
63
QLIST_INSERT_HEAD(&block_jobs, job, job_list);
147
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_unix = {
148
#ifdef CONFIG_GLUSTERFS_ZEROFILL
149
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
150
#endif
151
- .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
152
+ .bdrv_co_block_status = qemu_gluster_co_block_status,
153
.create_opts = &qemu_gluster_create_opts,
154
};
155
156
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_rdma = {
157
#ifdef CONFIG_GLUSTERFS_ZEROFILL
158
.bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
159
#endif
160
- .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
161
+ .bdrv_co_block_status = qemu_gluster_co_block_status,
162
.create_opts = &qemu_gluster_create_opts,
163
};
164
165
--
64
--
166
2.13.6
65
2.13.6
167
66
168
67
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Block jobs must be paused if any of the involved nodes are drained.
2
2
3
We are gradually moving away from sector-based interfaces, towards
4
byte-based. Update the qed driver accordingly, taking the opportunity
5
to inline qed_is_allocated_cb() into its lone caller (the callback
6
used to be important, until we switched qed to coroutines). There is
7
no intent to optimize based on the want_zero flag for this format.
8
9
Signed-off-by: Eric Blake <eblake@redhat.com>
10
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Reviewed-by: Fam Zheng <famz@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
4
---
14
block/qed.c | 76 +++++++++++++++++++------------------------------------------
5
tests/test-bdrv-drain.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++
15
1 file changed, 24 insertions(+), 52 deletions(-)
6
1 file changed, 121 insertions(+)
16
7
17
diff --git a/block/qed.c b/block/qed.c
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
18
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
19
--- a/block/qed.c
10
--- a/tests/test-bdrv-drain.c
20
+++ b/block/qed.c
11
+++ b/tests/test-bdrv-drain.c
21
@@ -XXX,XX +XXX,XX @@ finish:
12
@@ -XXX,XX +XXX,XX @@
22
return ret;
13
14
#include "qemu/osdep.h"
15
#include "block/block.h"
16
+#include "block/blockjob_int.h"
17
#include "sysemu/block-backend.h"
18
#include "qapi/error.h"
19
20
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
21
test_quiesce_common(BDRV_DRAIN, false);
23
}
22
}
24
23
25
-typedef struct {
24
+
26
- BlockDriverState *bs;
25
+typedef struct TestBlockJob {
27
- Coroutine *co;
26
+ BlockJob common;
28
- uint64_t pos;
27
+ bool should_complete;
29
- int64_t status;
28
+} TestBlockJob;
30
- int *pnum;
29
+
31
- BlockDriverState **file;
30
+static void test_job_completed(BlockJob *job, void *opaque)
32
-} QEDIsAllocatedCB;
31
+{
33
-
32
+ block_job_completed(job, 0);
34
-/* Called with table_lock held. */
33
+}
35
-static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
34
+
36
+static int coroutine_fn bdrv_qed_co_block_status(BlockDriverState *bs,
35
+static void coroutine_fn test_job_start(void *opaque)
37
+ bool want_zero,
36
+{
38
+ int64_t pos, int64_t bytes,
37
+ TestBlockJob *s = opaque;
39
+ int64_t *pnum, int64_t *map,
38
+
40
+ BlockDriverState **file)
39
+ while (!s->should_complete) {
41
{
40
+ block_job_sleep_ns(&s->common, 100000);
42
- QEDIsAllocatedCB *cb = opaque;
41
+ }
43
- BDRVQEDState *s = cb->bs->opaque;
42
+
44
- *cb->pnum = len / BDRV_SECTOR_SIZE;
43
+ block_job_defer_to_main_loop(&s->common, test_job_completed, NULL);
45
+ BDRVQEDState *s = bs->opaque;
44
+}
46
+ size_t len = MIN(bytes, SIZE_MAX);
45
+
47
+ int status;
46
+static void test_job_complete(BlockJob *job, Error **errp)
48
+ QEDRequest request = { .l2_table = NULL };
47
+{
49
+ uint64_t offset;
48
+ TestBlockJob *s = container_of(job, TestBlockJob, common);
49
+ s->should_complete = true;
50
+}
51
+
52
+BlockJobDriver test_job_driver = {
53
+ .instance_size = sizeof(TestBlockJob),
54
+ .start = test_job_start,
55
+ .complete = test_job_complete,
56
+};
57
+
58
+static void test_blockjob_common(enum drain_type drain_type)
59
+{
60
+ BlockBackend *blk_src, *blk_target;
61
+ BlockDriverState *src, *target;
62
+ BlockJob *job;
50
+ int ret;
63
+ int ret;
51
+
64
+
52
+ qemu_co_mutex_lock(&s->table_lock);
65
+ src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
53
+ ret = qed_find_cluster(s, &request, pos, &len, &offset);
66
+ &error_abort);
67
+ blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
68
+ blk_insert_bs(blk_src, src, &error_abort);
54
+
69
+
55
+ *pnum = len;
70
+ target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
56
switch (ret) {
71
+ &error_abort);
57
case QED_CLUSTER_FOUND:
72
+ blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
58
- offset |= qed_offset_into_cluster(s, cb->pos);
73
+ blk_insert_bs(blk_target, target, &error_abort);
59
- cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
74
+
60
- *cb->file = cb->bs->file->bs;
75
+ job = block_job_create("job0", &test_job_driver, src, 0, BLK_PERM_ALL, 0,
61
+ *map = offset | qed_offset_into_cluster(s, pos);
76
+ 0, NULL, NULL, &error_abort);
62
+ status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
77
+ block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
63
+ *file = bs->file->bs;
78
+ block_job_start(job);
64
break;
79
+
65
case QED_CLUSTER_ZERO:
80
+ g_assert_cmpint(job->pause_count, ==, 0);
66
- cb->status = BDRV_BLOCK_ZERO;
81
+ g_assert_false(job->paused);
67
+ status = BDRV_BLOCK_ZERO;
82
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
68
break;
83
+
69
case QED_CLUSTER_L2:
84
+ do_drain_begin(drain_type, src);
70
case QED_CLUSTER_L1:
85
+
71
- cb->status = 0;
86
+ if (drain_type == BDRV_DRAIN_ALL) {
72
+ status = 0;
87
+ /* bdrv_drain_all() drains both src and target, and involves an
73
break;
88
+ * additional block_job_pause_all() */
74
default:
89
+ g_assert_cmpint(job->pause_count, ==, 3);
75
assert(ret < 0);
90
+ } else {
76
- cb->status = ret;
91
+ g_assert_cmpint(job->pause_count, ==, 1);
77
+ status = ret;
92
+ }
78
break;
93
+ /* XXX We don't wait until the job is actually paused. Is this okay? */
79
}
94
+ /* g_assert_true(job->paused); */
80
95
+ g_assert_false(job->busy); /* The job is paused */
81
- if (cb->co) {
96
+
82
- aio_co_wake(cb->co);
97
+ do_drain_end(drain_type, src);
83
- }
98
+
84
-}
99
+ g_assert_cmpint(job->pause_count, ==, 0);
85
-
100
+ g_assert_false(job->paused);
86
-static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
101
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
87
- int64_t sector_num,
102
+
88
- int nb_sectors, int *pnum,
103
+ do_drain_begin(drain_type, target);
89
- BlockDriverState **file)
104
+
90
-{
105
+ if (drain_type == BDRV_DRAIN_ALL) {
91
- BDRVQEDState *s = bs->opaque;
106
+ /* bdrv_drain_all() drains both src and target, and involves an
92
- size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
107
+ * additional block_job_pause_all() */
93
- QEDIsAllocatedCB cb = {
108
+ g_assert_cmpint(job->pause_count, ==, 3);
94
- .bs = bs,
109
+ } else {
95
- .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
110
+ g_assert_cmpint(job->pause_count, ==, 1);
96
- .status = BDRV_BLOCK_OFFSET_MASK,
111
+ }
97
- .pnum = pnum,
112
+ /* XXX We don't wait until the job is actually paused. Is this okay? */
98
- .file = file,
113
+ /* g_assert_true(job->paused); */
99
- };
114
+ g_assert_false(job->busy); /* The job is paused */
100
- QEDRequest request = { .l2_table = NULL };
115
+
101
- uint64_t offset;
116
+ do_drain_end(drain_type, target);
102
- int ret;
117
+
103
-
118
+ g_assert_cmpint(job->pause_count, ==, 0);
104
- qemu_co_mutex_lock(&s->table_lock);
119
+ g_assert_false(job->paused);
105
- ret = qed_find_cluster(s, &request, cb.pos, &len, &offset);
120
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
106
- qed_is_allocated_cb(&cb, ret, offset, len);
121
+
107
-
122
+ ret = block_job_complete_sync(job, &error_abort);
108
- /* The callback was invoked immediately */
123
+ g_assert_cmpint(ret, ==, 0);
109
- assert(cb.status != BDRV_BLOCK_OFFSET_MASK);
124
+
110
-
125
+ blk_unref(blk_src);
111
qed_unref_l2_cache_entry(request.l2_table);
126
+ blk_unref(blk_target);
112
qemu_co_mutex_unlock(&s->table_lock);
127
+ bdrv_unref(src);
113
128
+ bdrv_unref(target);
114
- return cb.status;
129
+}
115
+ return status;
130
+
131
+static void test_blockjob_drain_all(void)
132
+{
133
+ test_blockjob_common(BDRV_DRAIN_ALL);
134
+}
135
+
136
+static void test_blockjob_drain(void)
137
+{
138
+ test_blockjob_common(BDRV_DRAIN);
139
+}
140
+
141
int main(int argc, char **argv)
142
{
143
bdrv_init();
144
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
145
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
146
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
147
148
+ g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
149
+ g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
150
+
151
return g_test_run();
116
}
152
}
117
118
static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
119
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qed = {
120
.bdrv_child_perm = bdrv_format_default_perms,
121
.bdrv_create = bdrv_qed_create,
122
.bdrv_has_zero_init = bdrv_has_zero_init_1,
123
- .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
124
+ .bdrv_co_block_status = bdrv_qed_co_block_status,
125
.bdrv_co_readv = bdrv_qed_co_readv,
126
.bdrv_co_writev = bdrv_qed_co_writev,
127
.bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes,
128
--
153
--
129
2.13.6
154
2.13.6
130
155
131
156
diff view generated by jsdifflib
1
From: Anton Nefedov <anton.nefedov@virtuozzo.com>
1
Block jobs are already paused using the BdrvChildRole drain callbacks,
2
so we don't need an additional block_job_pause_all() call.
2
3
3
The normal bdrv_co_pwritev() use is either
4
- BDRV_REQ_ZERO_WRITE clear and iovector provided
5
- BDRV_REQ_ZERO_WRITE set and iovector == NULL
6
7
while
8
- the flag clear and iovector == NULL is an assertion failure
9
in bdrv_co_do_zero_pwritev()
10
- the flag set and iovector provided is in fact allowed
11
(the flag prevails and zeroes are written)
12
13
However the alignment logic does not support the latter case so the padding
14
areas get overwritten with zeroes.
15
16
Currently, general functions like bdrv_rw_co() do provide iovector
17
regardless of flags. So, keep it supported and use bdrv_co_do_zero_pwritev()
18
alignment for it which also makes the code a bit more obvious anyway.
19
20
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
21
Reviewed-by: Eric Blake <eblake@redhat.com>
22
Reviewed-by: Alberto Garcia <berto@igalia.com>
23
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
24
---
5
---
25
block/io.c | 2 +-
6
block/io.c | 4 ----
26
1 file changed, 1 insertion(+), 1 deletion(-)
7
tests/test-bdrv-drain.c | 10 ++++------
8
2 files changed, 4 insertions(+), 10 deletions(-)
27
9
28
diff --git a/block/io.c b/block/io.c
10
diff --git a/block/io.c b/block/io.c
29
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
30
--- a/block/io.c
12
--- a/block/io.c
31
+++ b/block/io.c
13
+++ b/block/io.c
32
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
14
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
33
*/
15
* context. */
34
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
16
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
35
17
36
- if (!qiov) {
18
- block_job_pause_all();
37
+ if (flags & BDRV_REQ_ZERO_WRITE) {
19
-
38
ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
20
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
39
goto out;
21
AioContext *aio_context = bdrv_get_aio_context(bs);
22
23
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
24
aio_enable_external(aio_context);
25
aio_context_release(aio_context);
26
}
27
-
28
- block_job_resume_all();
29
}
30
31
void bdrv_drain_all(void)
32
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tests/test-bdrv-drain.c
35
+++ b/tests/test-bdrv-drain.c
36
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
37
do_drain_begin(drain_type, src);
38
39
if (drain_type == BDRV_DRAIN_ALL) {
40
- /* bdrv_drain_all() drains both src and target, and involves an
41
- * additional block_job_pause_all() */
42
- g_assert_cmpint(job->pause_count, ==, 3);
43
+ /* bdrv_drain_all() drains both src and target */
44
+ g_assert_cmpint(job->pause_count, ==, 2);
45
} else {
46
g_assert_cmpint(job->pause_count, ==, 1);
47
}
48
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_common(enum drain_type drain_type)
49
do_drain_begin(drain_type, target);
50
51
if (drain_type == BDRV_DRAIN_ALL) {
52
- /* bdrv_drain_all() drains both src and target, and involves an
53
- * additional block_job_pause_all() */
54
- g_assert_cmpint(job->pause_count, ==, 3);
55
+ /* bdrv_drain_all() drains both src and target */
56
+ g_assert_cmpint(job->pause_count, ==, 2);
57
} else {
58
g_assert_cmpint(job->pause_count, ==, 1);
40
}
59
}
41
--
60
--
42
2.13.6
61
2.13.6
43
62
44
63
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
bdrv_do_drained_begin() restricts the call of parent callbacks and
2
aio_disable_external() to the outermost drain section, but the block
3
driver callbacks are always called. bdrv_do_drained_end() must match
4
this behaviour, otherwise nodes stay drained even if begin/end calls
5
were balanced.
2
6
3
We are gradually moving away from sector-based interfaces, towards
4
byte-based. Update the file protocol driver accordingly.
5
6
In want_zero mode, we continue to report fine-grained hole
7
information (the caller wants as much mapping detail as possible);
8
but when not in that mode, the caller prefers larger *pnum and
9
merely cares about what offsets are allocated at this layer, rather
10
than where the holes live. Since holes still read as zeroes at
11
this layer (rather than deferring to a backing layer), we can take
12
the shortcut of skipping lseek(), and merely state that all bytes
13
are allocated.
14
15
We can also drop redundant bounds checks that are already
16
guaranteed by the block layer.
17
18
Signed-off-by: Eric Blake <eblake@redhat.com>
19
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
20
Reviewed-by: Fam Zheng <famz@redhat.com>
21
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
22
---
8
---
23
block/file-posix.c | 64 +++++++++++++++++++++++++-----------------------------
9
block/io.c | 12 +++++++-----
24
1 file changed, 30 insertions(+), 34 deletions(-)
10
1 file changed, 7 insertions(+), 5 deletions(-)
25
11
26
diff --git a/block/file-posix.c b/block/file-posix.c
12
diff --git a/block/io.c b/block/io.c
27
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
28
--- a/block/file-posix.c
14
--- a/block/io.c
29
+++ b/block/file-posix.c
15
+++ b/block/io.c
30
@@ -XXX,XX +XXX,XX @@ static int find_allocation(BlockDriverState *bs, off_t start,
16
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs)
17
18
void bdrv_drained_end(BlockDriverState *bs)
19
{
20
+ int old_quiesce_counter;
21
+
22
if (qemu_in_coroutine()) {
23
bdrv_co_yield_to_drain(bs, false);
24
return;
25
}
26
assert(bs->quiesce_counter > 0);
27
- if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
28
- return;
29
- }
30
+ old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
31
32
/* Re-enable things in child-to-parent order */
33
bdrv_drain_invoke(bs, false, false);
34
- bdrv_parent_drained_end(bs);
35
- aio_enable_external(bdrv_get_aio_context(bs));
36
+ if (old_quiesce_counter == 1) {
37
+ bdrv_parent_drained_end(bs);
38
+ aio_enable_external(bdrv_get_aio_context(bs));
39
+ }
31
}
40
}
32
41
33
/*
42
/*
34
- * Returns the allocation status of the specified sectors.
35
+ * Returns the allocation status of the specified offset.
36
*
37
- * If 'sector_num' is beyond the end of the disk image the return value is 0
38
- * and 'pnum' is set to 0.
39
+ * The block layer guarantees 'offset' and 'bytes' are within bounds.
40
*
41
- * 'pnum' is set to the number of sectors (including and immediately following
42
- * the specified sector) that are known to be in the same
43
+ * 'pnum' is set to the number of bytes (including and immediately following
44
+ * the specified offset) that are known to be in the same
45
* allocated/unallocated state.
46
*
47
- * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
48
- * beyond the end of the disk image it will be clamped.
49
+ * 'bytes' is the max value 'pnum' should be set to.
50
*/
51
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
52
- int64_t sector_num,
53
- int nb_sectors, int *pnum,
54
- BlockDriverState **file)
55
-{
56
- off_t start, data = 0, hole = 0;
57
- int64_t total_size;
58
+static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
59
+ bool want_zero,
60
+ int64_t offset,
61
+ int64_t bytes, int64_t *pnum,
62
+ int64_t *map,
63
+ BlockDriverState **file)
64
+{
65
+ off_t data = 0, hole = 0;
66
int ret;
67
68
ret = fd_open(bs);
69
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
70
return ret;
71
}
72
73
- start = sector_num * BDRV_SECTOR_SIZE;
74
- total_size = bdrv_getlength(bs);
75
- if (total_size < 0) {
76
- return total_size;
77
- } else if (start >= total_size) {
78
- *pnum = 0;
79
- return 0;
80
- } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
81
- nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
82
+ if (!want_zero) {
83
+ *pnum = bytes;
84
+ *map = offset;
85
+ *file = bs;
86
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
87
}
88
89
- ret = find_allocation(bs, start, &data, &hole);
90
+ ret = find_allocation(bs, offset, &data, &hole);
91
if (ret == -ENXIO) {
92
/* Trailing hole */
93
- *pnum = nb_sectors;
94
+ *pnum = bytes;
95
ret = BDRV_BLOCK_ZERO;
96
} else if (ret < 0) {
97
/* No info available, so pretend there are no holes */
98
- *pnum = nb_sectors;
99
+ *pnum = bytes;
100
ret = BDRV_BLOCK_DATA;
101
- } else if (data == start) {
102
- /* On a data extent, compute sectors to the end of the extent,
103
+ } else if (data == offset) {
104
+ /* On a data extent, compute bytes to the end of the extent,
105
* possibly including a partial sector at EOF. */
106
- *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
107
+ *pnum = MIN(bytes, hole - offset);
108
ret = BDRV_BLOCK_DATA;
109
} else {
110
- /* On a hole, compute sectors to the beginning of the next extent. */
111
- assert(hole == start);
112
- *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
113
+ /* On a hole, compute bytes to the beginning of the next extent. */
114
+ assert(hole == offset);
115
+ *pnum = MIN(bytes, data - offset);
116
ret = BDRV_BLOCK_ZERO;
117
}
118
+ *map = offset;
119
*file = bs;
120
- return ret | BDRV_BLOCK_OFFSET_VALID | start;
121
+ return ret | BDRV_BLOCK_OFFSET_VALID;
122
}
123
124
static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
125
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = {
126
.bdrv_close = raw_close,
127
.bdrv_create = raw_create,
128
.bdrv_has_zero_init = bdrv_has_zero_init_1,
129
- .bdrv_co_get_block_status = raw_co_get_block_status,
130
+ .bdrv_co_block_status = raw_co_block_status,
131
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
132
133
.bdrv_co_preadv = raw_co_preadv,
134
--
43
--
135
2.13.6
44
2.13.6
136
45
137
46
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
2
3
Commit bdd6a90 has a bug: drivers should never directly set
4
BDRV_BLOCK_ALLOCATED, but only io.c should do that (as needed).
5
Instead, drivers should report BDRV_BLOCK_DATA if it knows that
6
data comes from this BDS.
7
8
But let's look at the bigger picture: semantically, the nvme
9
driver is similar to the nbd, null, and raw drivers (no backing
10
file, all data comes from this BDS). But while two of those
11
other drivers have to supply the callback (null because it can
12
special-case BDRV_BLOCK_ZERO, raw because it can special-case
13
a different offset), in this case the block layer defaults are
14
good enough without the callback at all (similar to nbd).
15
16
So, fix the bug by deletion ;)
17
18
Signed-off-by: Eric Blake <eblake@redhat.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
---
2
---
21
block/nvme.c | 14 --------------
3
tests/test-bdrv-drain.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
22
1 file changed, 14 deletions(-)
4
1 file changed, 57 insertions(+)
23
5
24
diff --git a/block/nvme.c b/block/nvme.c
6
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
25
index XXXXXXX..XXXXXXX 100644
7
index XXXXXXX..XXXXXXX 100644
26
--- a/block/nvme.c
8
--- a/tests/test-bdrv-drain.c
27
+++ b/block/nvme.c
9
+++ b/tests/test-bdrv-drain.c
28
@@ -XXX,XX +XXX,XX @@ static int nvme_reopen_prepare(BDRVReopenState *reopen_state,
10
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
29
return 0;
11
enum drain_type {
12
BDRV_DRAIN_ALL,
13
BDRV_DRAIN,
14
+ DRAIN_TYPE_MAX,
15
};
16
17
static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
18
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
19
test_quiesce_common(BDRV_DRAIN, false);
30
}
20
}
31
21
32
-static int64_t coroutine_fn nvme_co_get_block_status(BlockDriverState *bs,
22
+static void test_nested(void)
33
- int64_t sector_num,
23
+{
34
- int nb_sectors, int *pnum,
24
+ BlockBackend *blk;
35
- BlockDriverState **file)
25
+ BlockDriverState *bs, *backing;
36
-{
26
+ BDRVTestState *s, *backing_s;
37
- *pnum = nb_sectors;
27
+ enum drain_type outer, inner;
38
- *file = bs;
28
+
39
-
29
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
40
- return BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_OFFSET_VALID |
30
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
41
- (sector_num << BDRV_SECTOR_BITS);
31
+ &error_abort);
42
-}
32
+ s = bs->opaque;
43
-
33
+ blk_insert_bs(blk, bs, &error_abort);
44
static void nvme_refresh_filename(BlockDriverState *bs, QDict *opts)
34
+
45
{
35
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
46
QINCREF(opts);
36
+ backing_s = backing->opaque;
47
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nvme = {
37
+ bdrv_set_backing_hd(bs, backing, &error_abort);
48
.bdrv_co_flush_to_disk = nvme_co_flush,
38
+
49
.bdrv_reopen_prepare = nvme_reopen_prepare,
39
+ for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
50
40
+ for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
51
- .bdrv_co_get_block_status = nvme_co_get_block_status,
41
+ /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */
52
-
42
+ int bs_quiesce = (outer != BDRV_DRAIN_ALL) +
53
.bdrv_refresh_filename = nvme_refresh_filename,
43
+ (inner != BDRV_DRAIN_ALL);
54
.bdrv_refresh_limits = nvme_refresh_limits,
44
+ int backing_quiesce = 0;
45
+ int backing_cb_cnt = (outer != BDRV_DRAIN) +
46
+ (inner != BDRV_DRAIN);
47
+
48
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
49
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
50
+ g_assert_cmpint(s->drain_count, ==, 0);
51
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
52
+
53
+ do_drain_begin(outer, bs);
54
+ do_drain_begin(inner, bs);
55
+
56
+ g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce);
57
+ g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
58
+ g_assert_cmpint(s->drain_count, ==, 2);
59
+ g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt);
60
+
61
+ do_drain_end(inner, bs);
62
+ do_drain_end(outer, bs);
63
+
64
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
65
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
66
+ g_assert_cmpint(s->drain_count, ==, 0);
67
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
68
+ }
69
+ }
70
+
71
+ bdrv_unref(backing);
72
+ bdrv_unref(bs);
73
+ blk_unref(blk);
74
+}
75
+
76
77
typedef struct TestBlockJob {
78
BlockJob common;
79
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
80
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
81
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
82
83
+ g_test_add_func("/bdrv-drain/nested", test_nested);
84
+
85
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
86
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
55
87
56
--
88
--
57
2.13.6
89
2.13.6
58
90
59
91
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
This is in preparation for subtree drains, i.e. drained sections that
2
2
affect not only a single node, but recursively all child nodes, too.
3
BlockDriverState has the BDRV_POLL_WHILE() macro to wait on event loop
3
4
activity while a condition evaluates to true. This is used to implement
4
Calling the parent callbacks for drain is pointless when we just came
5
synchronous operations where it acts as a condvar between the IOThread
5
from that parent node recursively and leads to multiple increases of
6
running the operation and the main loop waiting for the operation. It
6
bs->quiesce_counter in a single drain call. Don't do it.
7
can also be called from the thread that owns the AioContext and in that
7
8
case it's just a nested event loop.
8
In order for this to work correctly, the parent callback must be called
9
9
for every bdrv_drain_begin/end() call, not only for the outermost one:
10
BlockBackend needs this behavior but doesn't always have a
10
11
BlockDriverState it can use. This patch extracts BDRV_POLL_WHILE() into
11
If we have a node N with two parents A and B, recursive draining of A
12
the AioWait abstraction, which can be used with AioContext and isn't
12
should cause the quiesce_counter of B to increase because its child N is
13
tied to BlockDriverState anymore.
13
drained independently of B. If now B is recursively drained, too, A must
14
14
increase its quiesce_counter because N is drained independently of A
15
This feature could be built directly into AioContext but then all users
15
only now, even if N is going from quiesce_counter 1 to 2.
16
would kick the event loop even if they signal different conditions.
16
17
Imagine an AioContext with many BlockDriverStates, each time a request
18
completes any waiter would wake up and re-check their condition. It's
19
nicer to keep a separate AioWait object for each condition instead.
20
21
Please see "block/aio-wait.h" for details on the API.
22
23
The name AIO_WAIT_WHILE() avoids the confusion between AIO_POLL_WHILE()
24
and AioContext polling.
25
26
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
27
Reviewed-by: Eric Blake <eblake@redhat.com>
28
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
29
---
18
---
30
include/block/aio-wait.h | 116 ++++++++++++++++++++++++++++++++++++++++++++++
19
include/block/block.h | 4 ++--
31
include/block/block.h | 40 +++-------------
20
block.c | 13 +++++++++----
32
include/block/block_int.h | 7 ++-
21
block/io.c | 47 ++++++++++++++++++++++++++++++++++-------------
33
block.c | 5 ++
22
3 files changed, 45 insertions(+), 19 deletions(-)
34
block/io.c | 10 +---
23
35
util/aio-wait.c | 40 ++++++++++++++++
36
util/Makefile.objs | 2 +-
37
7 files changed, 174 insertions(+), 46 deletions(-)
38
create mode 100644 include/block/aio-wait.h
39
create mode 100644 util/aio-wait.c
40
41
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
42
new file mode 100644
43
index XXXXXXX..XXXXXXX
44
--- /dev/null
45
+++ b/include/block/aio-wait.h
46
@@ -XXX,XX +XXX,XX @@
47
+/*
48
+ * AioContext wait support
49
+ *
50
+ * Copyright (C) 2018 Red Hat, Inc.
51
+ *
52
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
53
+ * of this software and associated documentation files (the "Software"), to deal
54
+ * in the Software without restriction, including without limitation the rights
55
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
56
+ * copies of the Software, and to permit persons to whom the Software is
57
+ * furnished to do so, subject to the following conditions:
58
+ *
59
+ * The above copyright notice and this permission notice shall be included in
60
+ * all copies or substantial portions of the Software.
61
+ *
62
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
63
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
64
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
65
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
66
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
67
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
68
+ * THE SOFTWARE.
69
+ */
70
+
71
+#ifndef QEMU_AIO_WAIT_H
72
+#define QEMU_AIO_WAIT_H
73
+
74
+#include "block/aio.h"
75
+
76
+/**
77
+ * AioWait:
78
+ *
79
+ * An object that facilitates synchronous waiting on a condition. The main
80
+ * loop can wait on an operation running in an IOThread as follows:
81
+ *
82
+ * AioWait *wait = ...;
83
+ * AioContext *ctx = ...;
84
+ * MyWork work = { .done = false };
85
+ * schedule_my_work_in_iothread(ctx, &work);
86
+ * AIO_WAIT_WHILE(wait, ctx, !work.done);
87
+ *
88
+ * The IOThread must call aio_wait_kick() to notify the main loop when
89
+ * work.done changes:
90
+ *
91
+ * static void do_work(...)
92
+ * {
93
+ * ...
94
+ * work.done = true;
95
+ * aio_wait_kick(wait);
96
+ * }
97
+ */
98
+typedef struct {
99
+ /* Is the main loop waiting for a kick? Accessed with atomic ops. */
100
+ bool need_kick;
101
+} AioWait;
102
+
103
+/**
104
+ * AIO_WAIT_WHILE:
105
+ * @wait: the aio wait object
106
+ * @ctx: the aio context
107
+ * @cond: wait while this conditional expression is true
108
+ *
109
+ * Wait while a condition is true. Use this to implement synchronous
110
+ * operations that require event loop activity.
111
+ *
112
+ * The caller must be sure that something calls aio_wait_kick() when the value
113
+ * of @cond might have changed.
114
+ *
115
+ * The caller's thread must be the IOThread that owns @ctx or the main loop
116
+ * thread (with @ctx acquired exactly once). This function cannot be used to
117
+ * wait on conditions between two IOThreads since that could lead to deadlock,
118
+ * go via the main loop instead.
119
+ */
120
+#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \
121
+ bool waited_ = false; \
122
+ bool busy_ = true; \
123
+ AioWait *wait_ = (wait); \
124
+ AioContext *ctx_ = (ctx); \
125
+ if (in_aio_context_home_thread(ctx_)) { \
126
+ while ((cond) || busy_) { \
127
+ busy_ = aio_poll(ctx_, (cond)); \
128
+ waited_ |= !!(cond) | busy_; \
129
+ } \
130
+ } else { \
131
+ assert(qemu_get_current_aio_context() == \
132
+ qemu_get_aio_context()); \
133
+ assert(!wait_->need_kick); \
134
+ /* Set wait_->need_kick before evaluating cond. */ \
135
+ atomic_mb_set(&wait_->need_kick, true); \
136
+ while (busy_) { \
137
+ if ((cond)) { \
138
+ waited_ = busy_ = true; \
139
+ aio_context_release(ctx_); \
140
+ aio_poll(qemu_get_aio_context(), true); \
141
+ aio_context_acquire(ctx_); \
142
+ } else { \
143
+ busy_ = aio_poll(ctx_, false); \
144
+ waited_ |= busy_; \
145
+ } \
146
+ } \
147
+ atomic_set(&wait_->need_kick, false); \
148
+ } \
149
+ waited_; })
150
+
151
+/**
152
+ * aio_wait_kick:
153
+ * @wait: the aio wait object that should re-evaluate its condition
154
+ *
155
+ * Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During
156
+ * synchronous operations performed in an IOThread, the main thread lets the
157
+ * IOThread's event loop run, waiting for the operation to complete. A
158
+ * aio_wait_kick() call will wake up the main thread.
159
+ */
160
+void aio_wait_kick(AioWait *wait);
161
+
162
+#endif /* QEMU_AIO_WAIT */
163
diff --git a/include/block/block.h b/include/block/block.h
24
diff --git a/include/block/block.h b/include/block/block.h
164
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
165
--- a/include/block/block.h
26
--- a/include/block/block.h
166
+++ b/include/block/block.h
27
+++ b/include/block/block.h
167
@@ -XXX,XX +XXX,XX @@
28
@@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs);
168
#define BLOCK_H
29
* Begin a quiesced section of all users of @bs. This is part of
169
30
* bdrv_drained_begin.
170
#include "block/aio.h"
31
*/
171
+#include "block/aio-wait.h"
32
-void bdrv_parent_drained_begin(BlockDriverState *bs);
172
#include "qapi-types.h"
33
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
173
#include "qemu/iov.h"
34
174
#include "qemu/coroutine.h"
35
/**
175
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void);
36
* bdrv_parent_drained_end:
176
void bdrv_drain_all_end(void);
37
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin(BlockDriverState *bs);
177
void bdrv_drain_all(void);
38
* End a quiesced section of all users of @bs. This is part of
178
39
* bdrv_drained_end.
179
+/* Returns NULL when bs == NULL */
40
*/
180
+AioWait *bdrv_get_aio_wait(BlockDriverState *bs);
41
-void bdrv_parent_drained_end(BlockDriverState *bs);
181
+
42
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
182
#define BDRV_POLL_WHILE(bs, cond) ({ \
43
183
- bool waited_ = false; \
44
/**
184
- bool busy_ = true; \
45
* bdrv_drained_begin:
185
BlockDriverState *bs_ = (bs); \
186
- AioContext *ctx_ = bdrv_get_aio_context(bs_); \
187
- if (in_aio_context_home_thread(ctx_)) { \
188
- while ((cond) || busy_) { \
189
- busy_ = aio_poll(ctx_, (cond)); \
190
- waited_ |= !!(cond) | busy_; \
191
- } \
192
- } else { \
193
- assert(qemu_get_current_aio_context() == \
194
- qemu_get_aio_context()); \
195
- /* Ask bdrv_dec_in_flight to wake up the main \
196
- * QEMU AioContext. Extra I/O threads never take \
197
- * other I/O threads' AioContexts (see for example \
198
- * block_job_defer_to_main_loop for how to do it). \
199
- */ \
200
- assert(!bs_->wakeup); \
201
- /* Set bs->wakeup before evaluating cond. */ \
202
- atomic_mb_set(&bs_->wakeup, true); \
203
- while (busy_) { \
204
- if ((cond)) { \
205
- waited_ = busy_ = true; \
206
- aio_context_release(ctx_); \
207
- aio_poll(qemu_get_aio_context(), true); \
208
- aio_context_acquire(ctx_); \
209
- } else { \
210
- busy_ = aio_poll(ctx_, false); \
211
- waited_ |= busy_; \
212
- } \
213
- } \
214
- atomic_set(&bs_->wakeup, false); \
215
- } \
216
- waited_; })
217
+ AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \
218
+ bdrv_get_aio_context(bs_), \
219
+ cond); })
220
221
int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
222
int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
223
diff --git a/include/block/block_int.h b/include/block/block_int.h
224
index XXXXXXX..XXXXXXX 100644
225
--- a/include/block/block_int.h
226
+++ b/include/block/block_int.h
227
@@ -XXX,XX +XXX,XX @@
228
229
#include "block/accounting.h"
230
#include "block/block.h"
231
+#include "block/aio-wait.h"
232
#include "qemu/queue.h"
233
#include "qemu/coroutine.h"
234
#include "qemu/stats64.h"
235
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
236
unsigned int in_flight;
237
unsigned int serialising_in_flight;
238
239
- /* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic
240
- * ops.
241
- */
242
- bool wakeup;
243
+ /* Kicked to signal main loop when a request completes. */
244
+ AioWait wait;
245
246
/* counter for nested bdrv_io_plug.
247
* Accessed with atomic ops.
248
diff --git a/block.c b/block.c
46
diff --git a/block.c b/block.c
249
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
250
--- a/block.c
48
--- a/block.c
251
+++ b/block.c
49
+++ b/block.c
252
@@ -XXX,XX +XXX,XX @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
50
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
253
return bs->aio_context;
51
BlockDriverState *new_bs)
254
}
52
{
255
53
BlockDriverState *old_bs = child->bs;
256
+AioWait *bdrv_get_aio_wait(BlockDriverState *bs)
54
+ int i;
257
+{
55
258
+ return bs ? &bs->wait : NULL;
56
if (old_bs && new_bs) {
259
+}
57
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
260
+
58
}
261
void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
59
if (old_bs) {
262
{
60
if (old_bs->quiesce_counter && child->role->drained_end) {
263
aio_co_enter(bdrv_get_aio_context(bs), co);
61
- child->role->drained_end(child);
62
+ for (i = 0; i < old_bs->quiesce_counter; i++) {
63
+ child->role->drained_end(child);
64
+ }
65
}
66
if (child->role->detach) {
67
child->role->detach(child);
68
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
69
if (new_bs) {
70
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
71
if (new_bs->quiesce_counter && child->role->drained_begin) {
72
- child->role->drained_begin(child);
73
+ for (i = 0; i < new_bs->quiesce_counter; i++) {
74
+ child->role->drained_begin(child);
75
+ }
76
}
77
78
if (child->role->attach) {
79
@@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
80
AioContext *ctx = bdrv_get_aio_context(bs);
81
82
aio_disable_external(ctx);
83
- bdrv_parent_drained_begin(bs);
84
+ bdrv_parent_drained_begin(bs, NULL);
85
bdrv_drain(bs); /* ensure there are no in-flight requests */
86
87
while (aio_poll(ctx, false)) {
88
@@ -XXX,XX +XXX,XX @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
89
*/
90
aio_context_acquire(new_context);
91
bdrv_attach_aio_context(bs, new_context);
92
- bdrv_parent_drained_end(bs);
93
+ bdrv_parent_drained_end(bs, NULL);
94
aio_enable_external(ctx);
95
aio_context_release(new_context);
96
}
264
diff --git a/block/io.c b/block/io.c
97
diff --git a/block/io.c b/block/io.c
265
index XXXXXXX..XXXXXXX 100644
98
index XXXXXXX..XXXXXXX 100644
266
--- a/block/io.c
99
--- a/block/io.c
267
+++ b/block/io.c
100
+++ b/block/io.c
268
@@ -XXX,XX +XXX,XX @@
101
@@ -XXX,XX +XXX,XX @@
269
#include "qemu/osdep.h"
102
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
270
#include "trace.h"
103
int64_t offset, int bytes, BdrvRequestFlags flags);
271
#include "sysemu/block-backend.h"
104
272
+#include "block/aio-wait.h"
105
-void bdrv_parent_drained_begin(BlockDriverState *bs)
273
#include "block/blockjob.h"
106
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
274
#include "block/blockjob_int.h"
107
{
275
#include "block/block_int.h"
108
BdrvChild *c, *next;
276
@@ -XXX,XX +XXX,XX @@ void bdrv_inc_in_flight(BlockDriverState *bs)
109
277
atomic_inc(&bs->in_flight);
110
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
278
}
111
+ if (c == ignore) {
279
112
+ continue;
280
-static void dummy_bh_cb(void *opaque)
113
+ }
281
-{
114
if (c->role->drained_begin) {
282
-}
115
c->role->drained_begin(c);
283
-
116
}
284
void bdrv_wakeup(BlockDriverState *bs)
117
}
285
{
118
}
286
- /* The barrier (or an atomic op) is in the caller. */
119
287
- if (atomic_read(&bs->wakeup)) {
120
-void bdrv_parent_drained_end(BlockDriverState *bs)
288
- aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
121
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
289
- }
122
{
290
+ aio_wait_kick(bdrv_get_aio_wait(bs));
123
BdrvChild *c, *next;
291
}
124
292
125
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
293
void bdrv_dec_in_flight(BlockDriverState *bs)
126
+ if (c == ignore) {
294
diff --git a/util/aio-wait.c b/util/aio-wait.c
127
+ continue;
295
new file mode 100644
128
+ }
296
index XXXXXXX..XXXXXXX
129
if (c->role->drained_end) {
297
--- /dev/null
130
c->role->drained_end(c);
298
+++ b/util/aio-wait.c
131
}
299
@@ -XXX,XX +XXX,XX @@
132
@@ -XXX,XX +XXX,XX @@ typedef struct {
300
+/*
133
BlockDriverState *bs;
301
+ * AioContext wait support
134
bool done;
302
+ *
135
bool begin;
303
+ * Copyright (C) 2018 Red Hat, Inc.
136
+ BdrvChild *parent;
304
+ *
137
} BdrvCoDrainData;
305
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
138
306
+ * of this software and associated documentation files (the "Software"), to deal
139
static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
307
+ * in the Software without restriction, including without limitation the rights
140
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
308
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
141
return waited;
309
+ * copies of the Software, and to permit persons to whom the Software is
142
}
310
+ * furnished to do so, subject to the following conditions:
143
311
+ *
144
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent);
312
+ * The above copyright notice and this permission notice shall be included in
145
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
313
+ * all copies or substantial portions of the Software.
314
+ *
315
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
316
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
317
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
318
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
319
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
320
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
321
+ * THE SOFTWARE.
322
+ */
323
+
146
+
324
+#include "qemu/osdep.h"
147
static void bdrv_co_drain_bh_cb(void *opaque)
325
+#include "qemu/main-loop.h"
148
{
326
+#include "block/aio-wait.h"
149
BdrvCoDrainData *data = opaque;
327
+
150
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
328
+static void dummy_bh_cb(void *opaque)
151
152
bdrv_dec_in_flight(bs);
153
if (data->begin) {
154
- bdrv_drained_begin(bs);
155
+ bdrv_do_drained_begin(bs, data->parent);
156
} else {
157
- bdrv_drained_end(bs);
158
+ bdrv_do_drained_end(bs, data->parent);
159
}
160
161
data->done = true;
162
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
163
}
164
165
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
166
- bool begin)
167
+ bool begin, BdrvChild *parent)
168
{
169
BdrvCoDrainData data;
170
171
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
172
.bs = bs,
173
.done = false,
174
.begin = begin,
175
+ .parent = parent,
176
};
177
bdrv_inc_in_flight(bs);
178
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
179
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
180
assert(data.done);
181
}
182
183
-void bdrv_drained_begin(BlockDriverState *bs)
184
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
185
{
186
if (qemu_in_coroutine()) {
187
- bdrv_co_yield_to_drain(bs, true);
188
+ bdrv_co_yield_to_drain(bs, true, parent);
189
return;
190
}
191
192
/* Stop things in parent-to-child order */
193
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
194
aio_disable_external(bdrv_get_aio_context(bs));
195
- bdrv_parent_drained_begin(bs);
196
}
197
198
+ bdrv_parent_drained_begin(bs, parent);
199
bdrv_drain_invoke(bs, true, false);
200
bdrv_drain_recurse(bs);
201
}
202
203
-void bdrv_drained_end(BlockDriverState *bs)
204
+void bdrv_drained_begin(BlockDriverState *bs)
329
+{
205
+{
330
+ /* The point is to make AIO_WAIT_WHILE()'s aio_poll() return */
206
+ bdrv_do_drained_begin(bs, NULL);
331
+}
207
+}
332
+
208
+
333
+void aio_wait_kick(AioWait *wait)
209
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
210
{
211
int old_quiesce_counter;
212
213
if (qemu_in_coroutine()) {
214
- bdrv_co_yield_to_drain(bs, false);
215
+ bdrv_co_yield_to_drain(bs, false, parent);
216
return;
217
}
218
assert(bs->quiesce_counter > 0);
219
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_end(BlockDriverState *bs)
220
221
/* Re-enable things in child-to-parent order */
222
bdrv_drain_invoke(bs, false, false);
223
+ bdrv_parent_drained_end(bs, parent);
224
if (old_quiesce_counter == 1) {
225
- bdrv_parent_drained_end(bs);
226
aio_enable_external(bdrv_get_aio_context(bs));
227
}
228
}
229
230
+void bdrv_drained_end(BlockDriverState *bs)
334
+{
231
+{
335
+ /* The barrier (or an atomic op) is in the caller. */
232
+ bdrv_do_drained_end(bs, NULL);
336
+ if (atomic_read(&wait->need_kick)) {
337
+ aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
338
+ }
339
+}
233
+}
340
diff --git a/util/Makefile.objs b/util/Makefile.objs
234
+
341
index XXXXXXX..XXXXXXX 100644
235
/*
342
--- a/util/Makefile.objs
236
* Wait for pending requests to complete on a single BlockDriverState subtree,
343
+++ b/util/Makefile.objs
237
* and suspend block driver's internal I/O until next request arrives.
344
@@ -XXX,XX +XXX,XX @@
238
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_begin(void)
345
util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
239
/* Stop things in parent-to-child order */
346
util-obj-y += bufferiszero.o
240
aio_context_acquire(aio_context);
347
util-obj-y += lockcnt.o
241
aio_disable_external(aio_context);
348
-util-obj-y += aiocb.o async.o thread-pool.o qemu-timer.o
242
- bdrv_parent_drained_begin(bs);
349
+util-obj-y += aiocb.o async.o aio-wait.o thread-pool.o qemu-timer.o
243
+ bdrv_parent_drained_begin(bs, NULL);
350
util-obj-y += main-loop.o iohandler.o
244
bdrv_drain_invoke(bs, true, true);
351
util-obj-$(CONFIG_POSIX) += aio-posix.o
245
aio_context_release(aio_context);
352
util-obj-$(CONFIG_POSIX) += compatfd.o
246
247
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all_end(void)
248
/* Re-enable things in child-to-parent order */
249
aio_context_acquire(aio_context);
250
bdrv_drain_invoke(bs, false, true);
251
- bdrv_parent_drained_end(bs);
252
+ bdrv_parent_drained_end(bs, NULL);
253
aio_enable_external(aio_context);
254
aio_context_release(aio_context);
255
}
353
--
256
--
354
2.13.6
257
2.13.6
355
258
356
259
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
bdrv_drained_begin() waits for the completion of requests in the whole
2
subtree, but it only actually keeps its immediate bs parameter quiesced
3
until bdrv_drained_end().
2
4
3
We are gradually converting to byte-based interfaces, as they are
5
Add a version that keeps the whole subtree drained. As of this commit,
4
easier to reason about than sector-based. Convert all uses of
6
graph changes cannot be allowed during a subtree drained section, but
5
the allocmap (no semantic change). Callers that already had bytes
7
this will be fixed soon.
6
available are simpler, and callers that now scale to bytes will be
7
easier to switch to byte-based in the future.
8
8
9
Signed-off-by: Eric Blake <eblake@redhat.com>
10
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Fam Zheng <famz@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
10
---
14
block/iscsi.c | 90 +++++++++++++++++++++++++++++------------------------------
11
include/block/block.h | 13 +++++++++++++
15
1 file changed, 44 insertions(+), 46 deletions(-)
12
block/io.c | 54 ++++++++++++++++++++++++++++++++++++++++-----------
13
2 files changed, 56 insertions(+), 11 deletions(-)
16
14
17
diff --git a/block/iscsi.c b/block/iscsi.c
15
diff --git a/include/block/block.h b/include/block/block.h
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/block/iscsi.c
17
--- a/include/block/block.h
20
+++ b/block/iscsi.c
18
+++ b/include/block/block.h
21
@@ -XXX,XX +XXX,XX @@ static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
19
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
20
void bdrv_drained_begin(BlockDriverState *bs);
21
22
/**
23
+ * Like bdrv_drained_begin, but recursively begins a quiesced section for
24
+ * exclusive access to all child nodes as well.
25
+ *
26
+ * Graph changes are not allowed during a subtree drain section.
27
+ */
28
+void bdrv_subtree_drained_begin(BlockDriverState *bs);
29
+
30
+/**
31
* bdrv_drained_end:
32
*
33
* End a quiescent section started by bdrv_drained_begin().
34
*/
35
void bdrv_drained_end(BlockDriverState *bs);
36
37
+/**
38
+ * End a quiescent section started by bdrv_subtree_drained_begin().
39
+ */
40
+void bdrv_subtree_drained_end(BlockDriverState *bs);
41
+
42
void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
43
Error **errp);
44
void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
45
diff --git a/block/io.c b/block/io.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/block/io.c
48
+++ b/block/io.c
49
@@ -XXX,XX +XXX,XX @@ typedef struct {
50
BlockDriverState *bs;
51
bool done;
52
bool begin;
53
+ bool recursive;
54
BdrvChild *parent;
55
} BdrvCoDrainData;
56
57
@@ -XXX,XX +XXX,XX @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
58
return waited;
22
}
59
}
23
60
24
static void
61
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent);
25
-iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
62
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
26
- int nb_sectors, bool allocated, bool valid)
63
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
27
+iscsi_allocmap_update(IscsiLun *iscsilun, int64_t offset,
64
+ BdrvChild *parent);
28
+ int64_t bytes, bool allocated, bool valid)
65
+static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
66
+ BdrvChild *parent);
67
68
static void bdrv_co_drain_bh_cb(void *opaque)
29
{
69
{
30
int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
70
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
31
- int cluster_sectors = iscsilun->cluster_size >> BDRV_SECTOR_BITS;
71
32
72
bdrv_dec_in_flight(bs);
33
if (iscsilun->allocmap == NULL) {
73
if (data->begin) {
74
- bdrv_do_drained_begin(bs, data->parent);
75
+ bdrv_do_drained_begin(bs, data->recursive, data->parent);
76
} else {
77
- bdrv_do_drained_end(bs, data->parent);
78
+ bdrv_do_drained_end(bs, data->recursive, data->parent);
79
}
80
81
data->done = true;
82
@@ -XXX,XX +XXX,XX @@ static void bdrv_co_drain_bh_cb(void *opaque)
83
}
84
85
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
86
- bool begin, BdrvChild *parent)
87
+ bool begin, bool recursive,
88
+ BdrvChild *parent)
89
{
90
BdrvCoDrainData data;
91
92
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
93
.bs = bs,
94
.done = false,
95
.begin = begin,
96
+ .recursive = recursive,
97
.parent = parent,
98
};
99
bdrv_inc_in_flight(bs);
100
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
101
assert(data.done);
102
}
103
104
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
105
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
106
+ BdrvChild *parent)
107
{
108
+ BdrvChild *child, *next;
109
+
110
if (qemu_in_coroutine()) {
111
- bdrv_co_yield_to_drain(bs, true, parent);
112
+ bdrv_co_yield_to_drain(bs, true, recursive, parent);
34
return;
113
return;
35
}
114
}
36
/* expand to entirely contain all affected clusters */
115
37
- assert(cluster_sectors);
116
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent)
38
- cl_num_expanded = sector_num / cluster_sectors;
117
bdrv_parent_drained_begin(bs, parent);
39
- nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors,
118
bdrv_drain_invoke(bs, true, false);
40
- cluster_sectors) - cl_num_expanded;
119
bdrv_drain_recurse(bs);
41
+ assert(iscsilun->cluster_size);
120
+
42
+ cl_num_expanded = offset / iscsilun->cluster_size;
121
+ if (recursive) {
43
+ nb_cls_expanded = DIV_ROUND_UP(offset + bytes,
122
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
44
+ iscsilun->cluster_size) - cl_num_expanded;
123
+ bdrv_do_drained_begin(child->bs, true, child);
45
/* shrink to touch only completely contained clusters */
124
+ }
46
- cl_num_shrunk = DIV_ROUND_UP(sector_num, cluster_sectors);
125
+ }
47
- nb_cls_shrunk = (sector_num + nb_sectors) / cluster_sectors
48
- - cl_num_shrunk;
49
+ cl_num_shrunk = DIV_ROUND_UP(offset, iscsilun->cluster_size);
50
+ nb_cls_shrunk = (offset + bytes) / iscsilun->cluster_size - cl_num_shrunk;
51
if (allocated) {
52
bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
53
} else {
54
@@ -XXX,XX +XXX,XX @@ iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
55
}
126
}
56
127
57
static void
128
void bdrv_drained_begin(BlockDriverState *bs)
58
-iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num,
59
- int nb_sectors)
60
+iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t offset,
61
+ int64_t bytes)
62
{
129
{
63
- iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true);
130
- bdrv_do_drained_begin(bs, NULL);
64
+ iscsi_allocmap_update(iscsilun, offset, bytes, true, true);
131
+ bdrv_do_drained_begin(bs, false, NULL);
132
+}
133
+
134
+void bdrv_subtree_drained_begin(BlockDriverState *bs)
135
+{
136
+ bdrv_do_drained_begin(bs, true, NULL);
65
}
137
}
66
138
67
static void
139
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
68
-iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num,
140
+static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
69
- int nb_sectors)
141
+ BdrvChild *parent)
70
+iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t offset,
71
+ int64_t bytes)
72
{
142
{
73
/* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
143
+ BdrvChild *child, *next;
74
* is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
144
int old_quiesce_counter;
75
*/
145
76
- iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true);
146
if (qemu_in_coroutine()) {
77
+ iscsi_allocmap_update(iscsilun, offset, bytes, false, true);
147
- bdrv_co_yield_to_drain(bs, false, parent);
148
+ bdrv_co_yield_to_drain(bs, false, recursive, parent);
149
return;
150
}
151
assert(bs->quiesce_counter > 0);
152
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
153
if (old_quiesce_counter == 1) {
154
aio_enable_external(bdrv_get_aio_context(bs));
155
}
156
+
157
+ if (recursive) {
158
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
159
+ bdrv_do_drained_end(child->bs, true, child);
160
+ }
161
+ }
78
}
162
}
79
163
80
-static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num,
164
void bdrv_drained_end(BlockDriverState *bs)
81
- int nb_sectors)
82
+static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t offset,
83
+ int64_t bytes)
84
{
165
{
85
- iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false);
166
- bdrv_do_drained_end(bs, NULL);
86
+ iscsi_allocmap_update(iscsilun, offset, bytes, false, false);
167
+ bdrv_do_drained_end(bs, false, NULL);
168
+}
169
+
170
+void bdrv_subtree_drained_end(BlockDriverState *bs)
171
+{
172
+ bdrv_do_drained_end(bs, true, NULL);
87
}
173
}
88
174
89
static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
175
/*
90
@@ -XXX,XX +XXX,XX @@ static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
91
}
92
93
static inline bool
94
-iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num,
95
- int nb_sectors)
96
+iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t offset,
97
+ int64_t bytes)
98
{
99
unsigned long size;
100
if (iscsilun->allocmap == NULL) {
101
return true;
102
}
103
assert(iscsilun->cluster_size);
104
- size = DIV_ROUND_UP(sector_num + nb_sectors,
105
- iscsilun->cluster_size >> BDRV_SECTOR_BITS);
106
+ size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
107
return !(find_next_bit(iscsilun->allocmap, size,
108
- sector_num * BDRV_SECTOR_SIZE /
109
- iscsilun->cluster_size) == size);
110
+ offset / iscsilun->cluster_size) == size);
111
}
112
113
static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
114
- int64_t sector_num, int nb_sectors)
115
+ int64_t offset, int64_t bytes)
116
{
117
unsigned long size;
118
if (iscsilun->allocmap_valid == NULL) {
119
return false;
120
}
121
assert(iscsilun->cluster_size);
122
- size = DIV_ROUND_UP(sector_num + nb_sectors,
123
- iscsilun->cluster_size >> BDRV_SECTOR_BITS);
124
+ size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
125
return (find_next_zero_bit(iscsilun->allocmap_valid, size,
126
- sector_num * BDRV_SECTOR_SIZE /
127
- iscsilun->cluster_size) == size);
128
+ offset / iscsilun->cluster_size) == size);
129
}
130
131
static int coroutine_fn
132
@@ -XXX,XX +XXX,XX @@ retry:
133
}
134
135
if (iTask.status != SCSI_STATUS_GOOD) {
136
- iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
137
+ iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
138
+ nb_sectors * BDRV_SECTOR_SIZE);
139
error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba,
140
iTask.err_str);
141
r = iTask.err_code;
142
goto out_unlock;
143
}
144
145
- iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
146
+ iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
147
+ nb_sectors * BDRV_SECTOR_SIZE);
148
149
out_unlock:
150
qemu_mutex_unlock(&iscsilun->mutex);
151
@@ -XXX,XX +XXX,XX @@ retry:
152
}
153
154
if (ret & BDRV_BLOCK_ZERO) {
155
- iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum);
156
+ iscsi_allocmap_set_unallocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
157
+ *pnum * BDRV_SECTOR_SIZE);
158
} else {
159
- iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum);
160
+ iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
161
+ *pnum * BDRV_SECTOR_SIZE);
162
}
163
164
if (*pnum > nb_sectors) {
165
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
166
/* if cache.direct is off and we have a valid entry in our allocation map
167
* we can skip checking the block status and directly return zeroes if
168
* the request falls within an unallocated area */
169
- if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
170
- !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
171
+ if (iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
172
+ nb_sectors * BDRV_SECTOR_SIZE) &&
173
+ !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
174
+ nb_sectors * BDRV_SECTOR_SIZE)) {
175
qemu_iovec_memset(iov, 0, 0x00, iov->size);
176
return 0;
177
}
178
179
if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
180
- !iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
181
- !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
182
+ !iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
183
+ nb_sectors * BDRV_SECTOR_SIZE) &&
184
+ !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
185
+ nb_sectors * BDRV_SECTOR_SIZE)) {
186
int pnum;
187
BlockDriverState *file;
188
/* check the block status from the beginning of the cluster
189
@@ -XXX,XX +XXX,XX @@ retry:
190
goto retry;
191
}
192
193
- iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
194
- bytes >> BDRV_SECTOR_BITS);
195
+ iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
196
197
if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
198
/* the target might fail with a check condition if it
199
@@ -XXX,XX +XXX,XX @@ retry:
200
}
201
202
if (iTask.status != SCSI_STATUS_GOOD) {
203
- iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
204
- bytes >> BDRV_SECTOR_BITS);
205
+ iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
206
error_report("iSCSI WRITESAME10/16 failed at lba %" PRIu64 ": %s",
207
lba, iTask.err_str);
208
r = iTask.err_code;
209
@@ -XXX,XX +XXX,XX @@ retry:
210
}
211
212
if (flags & BDRV_REQ_MAY_UNMAP) {
213
- iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
214
- bytes >> BDRV_SECTOR_BITS);
215
+ iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
216
} else {
217
- iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
218
- bytes >> BDRV_SECTOR_BITS);
219
+ iscsi_allocmap_set_allocated(iscsilun, offset, bytes);
220
}
221
222
out_unlock:
223
--
176
--
224
2.13.6
177
2.13.6
225
178
226
179
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Add a subtree drain version to the existing test cases.
2
2
3
We are gradually moving away from sector-based interfaces, towards
4
byte-based. Update the null driver accordingly.
5
6
Signed-off-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
4
---
11
block/null.c | 23 ++++++++++++-----------
5
tests/test-bdrv-drain.c | 27 ++++++++++++++++++++++++++-
12
1 file changed, 12 insertions(+), 11 deletions(-)
6
1 file changed, 26 insertions(+), 1 deletion(-)
13
7
14
diff --git a/block/null.c b/block/null.c
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
15
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
16
--- a/block/null.c
10
--- a/tests/test-bdrv-drain.c
17
+++ b/block/null.c
11
+++ b/tests/test-bdrv-drain.c
18
@@ -XXX,XX +XXX,XX @@ static int null_reopen_prepare(BDRVReopenState *reopen_state,
12
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
19
return 0;
13
enum drain_type {
14
BDRV_DRAIN_ALL,
15
BDRV_DRAIN,
16
+ BDRV_SUBTREE_DRAIN,
17
DRAIN_TYPE_MAX,
18
};
19
20
@@ -XXX,XX +XXX,XX @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
21
switch (drain_type) {
22
case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
23
case BDRV_DRAIN: bdrv_drained_begin(bs); break;
24
+ case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break;
25
default: g_assert_not_reached();
26
}
20
}
27
}
21
28
@@ -XXX,XX +XXX,XX @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
22
-static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs,
29
switch (drain_type) {
23
- int64_t sector_num,
30
case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
24
- int nb_sectors, int *pnum,
31
case BDRV_DRAIN: bdrv_drained_end(bs); break;
25
- BlockDriverState **file)
32
+ case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break;
26
+static int coroutine_fn null_co_block_status(BlockDriverState *bs,
33
default: g_assert_not_reached();
27
+ bool want_zero, int64_t offset,
34
}
28
+ int64_t bytes, int64_t *pnum,
35
}
29
+ int64_t *map,
36
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain(void)
30
+ BlockDriverState **file)
37
test_drv_cb_common(BDRV_DRAIN, false);
38
}
39
40
+static void test_drv_cb_drain_subtree(void)
41
+{
42
+ test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
43
+}
44
+
45
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
31
{
46
{
32
BDRVNullState *s = bs->opaque;
47
BlockBackend *blk;
33
- off_t start = sector_num * BDRV_SECTOR_SIZE;
48
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain(void)
34
+ int ret = BDRV_BLOCK_OFFSET_VALID;
49
test_quiesce_common(BDRV_DRAIN, false);
35
36
- *pnum = nb_sectors;
37
+ *pnum = bytes;
38
+ *map = offset;
39
*file = bs;
40
41
if (s->read_zeroes) {
42
- return BDRV_BLOCK_OFFSET_VALID | start | BDRV_BLOCK_ZERO;
43
- } else {
44
- return BDRV_BLOCK_OFFSET_VALID | start;
45
+ ret |= BDRV_BLOCK_ZERO;
46
}
47
+ return ret;
48
}
50
}
49
51
50
static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
52
+static void test_quiesce_drain_subtree(void)
51
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_null_co = {
53
+{
52
.bdrv_co_flush_to_disk = null_co_flush,
54
+ test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
53
.bdrv_reopen_prepare = null_reopen_prepare,
55
+}
54
56
+
55
- .bdrv_co_get_block_status = null_co_get_block_status,
57
static void test_nested(void)
56
+ .bdrv_co_block_status = null_co_block_status,
58
{
57
59
BlockBackend *blk;
58
.bdrv_refresh_filename = null_refresh_filename,
60
@@ -XXX,XX +XXX,XX @@ static void test_nested(void)
59
};
61
/* XXX bdrv_drain_all() doesn't increase the quiesce_counter */
60
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_null_aio = {
62
int bs_quiesce = (outer != BDRV_DRAIN_ALL) +
61
.bdrv_aio_flush = null_aio_flush,
63
(inner != BDRV_DRAIN_ALL);
62
.bdrv_reopen_prepare = null_reopen_prepare,
64
- int backing_quiesce = 0;
63
65
+ int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) +
64
- .bdrv_co_get_block_status = null_co_get_block_status,
66
+ (inner == BDRV_SUBTREE_DRAIN);
65
+ .bdrv_co_block_status = null_co_block_status,
67
int backing_cb_cnt = (outer != BDRV_DRAIN) +
66
68
(inner != BDRV_DRAIN);
67
.bdrv_refresh_filename = null_refresh_filename,
69
68
};
70
@@ -XXX,XX +XXX,XX @@ static void test_blockjob_drain(void)
71
test_blockjob_common(BDRV_DRAIN);
72
}
73
74
+static void test_blockjob_drain_subtree(void)
75
+{
76
+ test_blockjob_common(BDRV_SUBTREE_DRAIN);
77
+}
78
+
79
int main(int argc, char **argv)
80
{
81
bdrv_init();
82
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
83
84
g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
85
g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
86
+ g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
87
+ test_drv_cb_drain_subtree);
88
89
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
90
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
91
+ g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
92
+ test_quiesce_drain_subtree);
93
94
g_test_add_func("/bdrv-drain/nested", test_nested);
95
96
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
97
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
98
+ g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
99
+ test_blockjob_drain_subtree);
100
101
return g_test_run();
102
}
69
--
103
--
70
2.13.6
104
2.13.6
71
105
72
106
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
If bdrv_do_drained_begin/end() are called in coroutine context, they
2
first use a BH to get out of the coroutine context. Call some existing
3
tests again from a coroutine to cover this code path.
2
4
3
We are gradually moving away from sector-based interfaces, towards
4
byte-based. Update the raw driver accordingly.
5
6
Signed-off-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
6
---
11
block/raw-format.c | 16 ++++++++--------
7
tests/test-bdrv-drain.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
12
1 file changed, 8 insertions(+), 8 deletions(-)
8
1 file changed, 59 insertions(+)
13
9
14
diff --git a/block/raw-format.c b/block/raw-format.c
10
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/block/raw-format.c
12
--- a/tests/test-bdrv-drain.c
17
+++ b/block/raw-format.c
13
+++ b/tests/test-bdrv-drain.c
18
@@ -XXX,XX +XXX,XX @@ fail:
14
@@ -XXX,XX +XXX,XX @@ static void aio_ret_cb(void *opaque, int ret)
19
return ret;
15
*aio_ret = ret;
20
}
16
}
21
17
22
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
18
+typedef struct CallInCoroutineData {
23
- int64_t sector_num,
19
+ void (*entry)(void);
24
- int nb_sectors, int *pnum,
20
+ bool done;
25
+static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
21
+} CallInCoroutineData;
26
+ bool want_zero, int64_t offset,
22
+
27
+ int64_t bytes, int64_t *pnum,
23
+static coroutine_fn void call_in_coroutine_entry(void *opaque)
28
+ int64_t *map,
24
+{
29
BlockDriverState **file)
25
+ CallInCoroutineData *data = opaque;
26
+
27
+ data->entry();
28
+ data->done = true;
29
+}
30
+
31
+static void call_in_coroutine(void (*entry)(void))
32
+{
33
+ Coroutine *co;
34
+ CallInCoroutineData data = {
35
+ .entry = entry,
36
+ .done = false,
37
+ };
38
+
39
+ co = qemu_coroutine_create(call_in_coroutine_entry, &data);
40
+ qemu_coroutine_enter(co);
41
+ while (!data.done) {
42
+ aio_poll(qemu_get_aio_context(), true);
43
+ }
44
+}
45
+
46
enum drain_type {
47
BDRV_DRAIN_ALL,
48
BDRV_DRAIN,
49
@@ -XXX,XX +XXX,XX @@ static void test_drv_cb_drain_subtree(void)
50
test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
51
}
52
53
+static void test_drv_cb_co_drain(void)
54
+{
55
+ call_in_coroutine(test_drv_cb_drain);
56
+}
57
+
58
+static void test_drv_cb_co_drain_subtree(void)
59
+{
60
+ call_in_coroutine(test_drv_cb_drain_subtree);
61
+}
62
+
63
static void test_quiesce_common(enum drain_type drain_type, bool recursive)
30
{
64
{
31
BDRVRawState *s = bs->opaque;
65
BlockBackend *blk;
32
- *pnum = nb_sectors;
66
@@ -XXX,XX +XXX,XX @@ static void test_quiesce_drain_subtree(void)
33
+ *pnum = bytes;
67
test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
34
*file = bs->file->bs;
35
- sector_num += s->offset / BDRV_SECTOR_SIZE;
36
- return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID |
37
- (sector_num << BDRV_SECTOR_BITS);
38
+ *map = offset + s->offset;
39
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
40
}
68
}
41
69
42
static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
70
+static void test_quiesce_co_drain(void)
43
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_raw = {
71
+{
44
.bdrv_co_pwritev = &raw_co_pwritev,
72
+ call_in_coroutine(test_quiesce_drain);
45
.bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
73
+}
46
.bdrv_co_pdiscard = &raw_co_pdiscard,
74
+
47
- .bdrv_co_get_block_status = &raw_co_get_block_status,
75
+static void test_quiesce_co_drain_subtree(void)
48
+ .bdrv_co_block_status = &raw_co_block_status,
76
+{
49
.bdrv_truncate = &raw_truncate,
77
+ call_in_coroutine(test_quiesce_drain_subtree);
50
.bdrv_getlength = &raw_getlength,
78
+}
51
.has_variable_length = true,
79
+
80
static void test_nested(void)
81
{
82
BlockBackend *blk;
83
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
84
g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
85
test_drv_cb_drain_subtree);
86
87
+ // XXX bdrv_drain_all() doesn't work in coroutine context
88
+ g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
89
+ g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
90
+ test_drv_cb_co_drain_subtree);
91
+
92
+
93
g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
94
g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
95
g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
96
test_quiesce_drain_subtree);
97
98
+ // XXX bdrv_drain_all() doesn't work in coroutine context
99
+ g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
100
+ g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
101
+ test_quiesce_co_drain_subtree);
102
+
103
g_test_add_func("/bdrv-drain/nested", test_nested);
104
105
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
52
--
106
--
53
2.13.6
107
2.13.6
54
108
55
109
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Test that drain sections are correctly propagated through the graph.
2
2
3
We are gradually moving away from sector-based interfaces, towards
4
byte-based. Update the qcow driver accordingly. There is no
5
intent to optimize based on the want_zero flag for this format.
6
7
Signed-off-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Reviewed-by: Fam Zheng <famz@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
4
---
12
block/qcow.c | 27 ++++++++++++++++-----------
5
tests/test-bdrv-drain.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++
13
1 file changed, 16 insertions(+), 11 deletions(-)
6
1 file changed, 74 insertions(+)
14
7
15
diff --git a/block/qcow.c b/block/qcow.c
8
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
16
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
17
--- a/block/qcow.c
10
--- a/tests/test-bdrv-drain.c
18
+++ b/block/qcow.c
11
+++ b/tests/test-bdrv-drain.c
19
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
12
@@ -XXX,XX +XXX,XX @@ static void test_nested(void)
20
return 1;
13
blk_unref(blk);
21
}
14
}
22
15
23
-static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
16
+static void test_multiparent(void)
24
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
17
+{
25
+static int coroutine_fn qcow_co_block_status(BlockDriverState *bs,
18
+ BlockBackend *blk_a, *blk_b;
26
+ bool want_zero,
19
+ BlockDriverState *bs_a, *bs_b, *backing;
27
+ int64_t offset, int64_t bytes,
20
+ BDRVTestState *a_s, *b_s, *backing_s;
28
+ int64_t *pnum, int64_t *map,
21
+
29
+ BlockDriverState **file)
22
+ blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
30
{
23
+ bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
31
BDRVQcowState *s = bs->opaque;
24
+ &error_abort);
32
- int index_in_cluster, n, ret;
25
+ a_s = bs_a->opaque;
33
+ int index_in_cluster, ret;
26
+ blk_insert_bs(blk_a, bs_a, &error_abort);
34
+ int64_t n;
27
+
35
uint64_t cluster_offset;
28
+ blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
36
29
+ bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
37
qemu_co_mutex_lock(&s->lock);
30
+ &error_abort);
38
- ret = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0, &cluster_offset);
31
+ b_s = bs_b->opaque;
39
+ ret = get_cluster_offset(bs, offset, 0, 0, 0, 0, &cluster_offset);
32
+ blk_insert_bs(blk_b, bs_b, &error_abort);
40
qemu_co_mutex_unlock(&s->lock);
33
+
41
if (ret < 0) {
34
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
42
return ret;
35
+ backing_s = backing->opaque;
43
}
36
+ bdrv_set_backing_hd(bs_a, backing, &error_abort);
44
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
37
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
45
- n = s->cluster_sectors - index_in_cluster;
38
+
46
- if (n > nb_sectors)
39
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
47
- n = nb_sectors;
40
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
48
+ index_in_cluster = offset & (s->cluster_size - 1);
41
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
49
+ n = s->cluster_size - index_in_cluster;
42
+ g_assert_cmpint(a_s->drain_count, ==, 0);
50
+ if (n > bytes) {
43
+ g_assert_cmpint(b_s->drain_count, ==, 0);
51
+ n = bytes;
44
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
52
+ }
45
+
53
*pnum = n;
46
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
54
if (!cluster_offset) {
47
+
55
return 0;
48
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
56
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
49
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
57
if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) {
50
+ g_assert_cmpint(backing->quiesce_counter, ==, 1);
58
return BDRV_BLOCK_DATA;
51
+ g_assert_cmpint(a_s->drain_count, ==, 1);
59
}
52
+ g_assert_cmpint(b_s->drain_count, ==, 1);
60
- cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
53
+ g_assert_cmpint(backing_s->drain_count, ==, 1);
61
+ *map = cluster_offset | index_in_cluster;
54
+
62
*file = bs->file->bs;
55
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
63
- return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
56
+
64
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
57
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
65
}
58
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
66
59
+ g_assert_cmpint(backing->quiesce_counter, ==, 2);
67
static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
60
+ g_assert_cmpint(a_s->drain_count, ==, 2);
68
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qcow = {
61
+ g_assert_cmpint(b_s->drain_count, ==, 2);
69
62
+ g_assert_cmpint(backing_s->drain_count, ==, 2);
70
.bdrv_co_readv = qcow_co_readv,
63
+
71
.bdrv_co_writev = qcow_co_writev,
64
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
72
- .bdrv_co_get_block_status = qcow_co_get_block_status,
65
+
73
+ .bdrv_co_block_status = qcow_co_block_status,
66
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
74
67
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
75
.bdrv_make_empty = qcow_make_empty,
68
+ g_assert_cmpint(backing->quiesce_counter, ==, 1);
76
.bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed,
69
+ g_assert_cmpint(a_s->drain_count, ==, 1);
70
+ g_assert_cmpint(b_s->drain_count, ==, 1);
71
+ g_assert_cmpint(backing_s->drain_count, ==, 1);
72
+
73
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
74
+
75
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
76
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
77
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
78
+ g_assert_cmpint(a_s->drain_count, ==, 0);
79
+ g_assert_cmpint(b_s->drain_count, ==, 0);
80
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
81
+
82
+ bdrv_unref(backing);
83
+ bdrv_unref(bs_a);
84
+ bdrv_unref(bs_b);
85
+ blk_unref(blk_a);
86
+ blk_unref(blk_b);
87
+}
88
+
89
90
typedef struct TestBlockJob {
91
BlockJob common;
92
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
93
test_quiesce_co_drain_subtree);
94
95
g_test_add_func("/bdrv-drain/nested", test_nested);
96
+ g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
97
98
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
99
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
77
--
100
--
78
2.13.6
101
2.13.6
79
102
80
103
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
We need to remember how many of the drain sections in which a node is
2
2
were recursive (i.e. subtree drain rather than node drain), so that they
3
We are gradually moving away from sector-based interfaces, towards
3
can be correctly applied when children are added or removed during the
4
byte-based. Now that the block layer exposes byte-based allocation,
4
drained section.
5
it's time to tackle the drivers. Add a new callback that operates
5
6
on as small as byte boundaries. Subsequent patches will then update
6
With this change, it is safe to modify the graph even inside a
7
individual drivers, then finally remove .bdrv_co_get_block_status().
7
bdrv_subtree_drained_begin/end() section.
8
8
9
The new code also passes through the 'want_zero' hint, which will
10
allow subsequent patches to further optimize callers that only care
11
about how much of the image is allocated (want_zero is false),
12
rather than full details about runs of zeroes and which offsets the
13
allocation actually maps to (want_zero is true). As part of this
14
effort, fix another part of the documentation: the claim in commit
15
4c41cb4 that BDRV_BLOCK_ALLOCATED is short for 'DATA || ZERO' is a
16
lie at the block layer (see commit e88ae2264), even though it is
17
how the bit is computed from the driver layer. After all, there
18
are intentionally cases where we return ZERO but not ALLOCATED at
19
the block layer, when we know that a read sees zero because the
20
backing file is too short. Note that the driver interface is thus
21
slightly different than the public interface with regards to which
22
bits will be set, and what guarantees are provided on input.
23
24
We also add an assertion that any driver using the new callback will
25
make progress (the only time pnum will be 0 is if the block layer
26
already handled an out-of-bounds request, or if there is an error);
27
the old driver interface did not provide this guarantee, which
28
could lead to some inf-loops in drastic corner-case failures.
29
30
Signed-off-by: Eric Blake <eblake@redhat.com>
31
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
32
Reviewed-by: Fam Zheng <famz@redhat.com>
33
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
34
---
10
---
35
include/block/block.h | 14 +++++++-------
11
include/block/block.h | 2 --
36
include/block/block_int.h | 20 +++++++++++++++-----
12
include/block/block_int.h | 5 +++++
37
block/io.c | 28 +++++++++++++++++++---------
13
block.c | 32 +++++++++++++++++++++++++++++---
38
3 files changed, 41 insertions(+), 21 deletions(-)
14
block/io.c | 28 ++++++++++++++++++++++++----
15
4 files changed, 58 insertions(+), 9 deletions(-)
39
16
40
diff --git a/include/block/block.h b/include/block/block.h
17
diff --git a/include/block/block.h b/include/block/block.h
41
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
42
--- a/include/block/block.h
19
--- a/include/block/block.h
43
+++ b/include/block/block.h
20
+++ b/include/block/block.h
44
@@ -XXX,XX +XXX,XX @@ typedef struct HDGeometry {
21
@@ -XXX,XX +XXX,XX @@ void bdrv_drained_begin(BlockDriverState *bs);
45
* BDRV_BLOCK_ZERO: offset reads as zero
22
/**
46
* BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
23
* Like bdrv_drained_begin, but recursively begins a quiesced section for
47
* BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
24
* exclusive access to all child nodes as well.
48
- * layer (short for DATA || ZERO), set by block layer
25
- *
49
- * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer
26
- * Graph changes are not allowed during a subtree drain section.
50
+ * layer rather than any backing, set by block layer
27
*/
51
+ * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
28
void bdrv_subtree_drained_begin(BlockDriverState *bs);
52
+ * layer, set by block layer
29
53
*
54
* Internal flag:
55
* BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
56
* that the block layer recompute the answer from the returned
57
* BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
58
*
59
- * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) of
60
- * the return value (old interface) or the entire map parameter (new
61
- * interface) represent the offset in the returned BDS that is allocated for
62
- * the corresponding raw data. However, whether that offset actually
63
- * contains data also depends on BDRV_BLOCK_DATA, as follows:
64
+ * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
65
+ * host offset within the returned BDS that is allocated for the
66
+ * corresponding raw guest data. However, whether that offset
67
+ * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
68
*
69
* DATA ZERO OFFSET_VALID
70
* t t t sectors read as zero, returned file is zero at offset
71
diff --git a/include/block/block_int.h b/include/block/block_int.h
30
diff --git a/include/block/block_int.h b/include/block/block_int.h
72
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
73
--- a/include/block/block_int.h
32
--- a/include/block/block_int.h
74
+++ b/include/block/block_int.h
33
+++ b/include/block/block_int.h
75
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
34
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
76
/*
35
77
* Building block for bdrv_block_status[_above] and
36
/* Accessed with atomic ops. */
78
* bdrv_is_allocated[_above]. The driver should answer only
37
int quiesce_counter;
79
- * according to the current layer, and should not set
38
+ int recursive_quiesce_counter;
80
- * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h
39
+
81
- * for the meaning of _DATA, _ZERO, and _OFFSET_VALID. The block
40
unsigned int write_gen; /* Current data generation */
82
- * layer guarantees input aligned to request_alignment, as well as
41
83
- * non-NULL pnum and file.
42
/* Protected by reqs_lock. */
84
+ * according to the current layer, and should only need to set
43
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
85
+ * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
44
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
86
+ * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
45
BdrvRequestFlags flags);
87
+ * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See
46
88
+ * block.h for the overall meaning of the bits. As a hint, the
47
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
89
+ * flag want_zero is true if the caller cares more about precise
48
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
90
+ * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
49
+
91
+ * overall allocation (favor larger *pnum, perhaps by reporting
50
int get_tmp_filename(char *filename, int size);
92
+ * _DATA instead of _ZERO). The block layer guarantees input
51
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
93
+ * clamped to bdrv_getlength() and aligned to request_alignment,
52
const char *filename);
94
+ * as well as non-NULL pnum, map, and file; in turn, the driver
53
diff --git a/block.c b/block.c
95
+ * must return an error or set pnum to an aligned non-zero value.
54
index XXXXXXX..XXXXXXX 100644
96
*/
55
--- a/block.c
97
int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
56
+++ b/block.c
98
int64_t sector_num, int nb_sectors, int *pnum,
57
@@ -XXX,XX +XXX,XX @@ static void bdrv_child_cb_drained_end(BdrvChild *child)
99
BlockDriverState **file);
58
bdrv_drained_end(bs);
100
+ int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
59
}
101
+ bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
60
102
+ int64_t *map, BlockDriverState **file);
61
+static void bdrv_child_cb_attach(BdrvChild *child)
103
62
+{
104
/*
63
+ BlockDriverState *bs = child->opaque;
105
* Invalidate any cached meta-data.
64
+ bdrv_apply_subtree_drain(child, bs);
65
+}
66
+
67
+static void bdrv_child_cb_detach(BdrvChild *child)
68
+{
69
+ BlockDriverState *bs = child->opaque;
70
+ bdrv_unapply_subtree_drain(child, bs);
71
+}
72
+
73
static int bdrv_child_cb_inactivate(BdrvChild *child)
74
{
75
BlockDriverState *bs = child->opaque;
76
@@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_file = {
77
.inherit_options = bdrv_inherited_options,
78
.drained_begin = bdrv_child_cb_drained_begin,
79
.drained_end = bdrv_child_cb_drained_end,
80
+ .attach = bdrv_child_cb_attach,
81
+ .detach = bdrv_child_cb_detach,
82
.inactivate = bdrv_child_cb_inactivate,
83
};
84
85
@@ -XXX,XX +XXX,XX @@ const BdrvChildRole child_format = {
86
.inherit_options = bdrv_inherited_fmt_options,
87
.drained_begin = bdrv_child_cb_drained_begin,
88
.drained_end = bdrv_child_cb_drained_end,
89
+ .attach = bdrv_child_cb_attach,
90
+ .detach = bdrv_child_cb_detach,
91
.inactivate = bdrv_child_cb_inactivate,
92
};
93
94
@@ -XXX,XX +XXX,XX @@ static void bdrv_backing_attach(BdrvChild *c)
95
parent->backing_blocker);
96
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
97
parent->backing_blocker);
98
+
99
+ bdrv_child_cb_attach(c);
100
}
101
102
static void bdrv_backing_detach(BdrvChild *c)
103
@@ -XXX,XX +XXX,XX @@ static void bdrv_backing_detach(BdrvChild *c)
104
bdrv_op_unblock_all(c->bs, parent->backing_blocker);
105
error_free(parent->backing_blocker);
106
parent->backing_blocker = NULL;
107
+
108
+ bdrv_child_cb_detach(c);
109
}
110
111
/*
112
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
113
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
114
}
115
if (old_bs) {
116
+ /* Detach first so that the recursive drain sections coming from @child
117
+ * are already gone and we only end the drain sections that came from
118
+ * elsewhere. */
119
+ if (child->role->detach) {
120
+ child->role->detach(child);
121
+ }
122
if (old_bs->quiesce_counter && child->role->drained_end) {
123
for (i = 0; i < old_bs->quiesce_counter; i++) {
124
child->role->drained_end(child);
125
}
126
}
127
- if (child->role->detach) {
128
- child->role->detach(child);
129
- }
130
QLIST_REMOVE(child, next_parent);
131
}
132
133
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
134
}
135
}
136
137
+ /* Attach only after starting new drained sections, so that recursive
138
+ * drain sections coming from @child don't get an extra .drained_begin
139
+ * callback. */
140
if (child->role->attach) {
141
child->role->attach(child);
142
}
106
diff --git a/block/io.c b/block/io.c
143
diff --git a/block/io.c b/block/io.c
107
index XXXXXXX..XXXXXXX 100644
144
index XXXXXXX..XXXXXXX 100644
108
--- a/block/io.c
145
--- a/block/io.c
109
+++ b/block/io.c
146
+++ b/block/io.c
110
@@ -XXX,XX +XXX,XX @@ int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
147
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
111
* Drivers not implementing the functionality are assumed to not support
148
assert(data.done);
112
* backing files, hence all their sectors are reported as allocated.
149
}
113
*
150
114
- * If 'want_zero' is true, the caller is querying for mapping purposes,
151
-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
115
- * and the result should include BDRV_BLOCK_OFFSET_VALID and
152
- BdrvChild *parent)
116
- * BDRV_BLOCK_ZERO where possible; otherwise, the result may omit those
153
+void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
117
- * bits particularly if it allows for a larger value in 'pnum'.
154
+ BdrvChild *parent)
118
+ * If 'want_zero' is true, the caller is querying for mapping
155
{
119
+ * purposes, with a focus on valid BDRV_BLOCK_OFFSET_VALID, _DATA, and
156
BdrvChild *child, *next;
120
+ * _ZERO where possible; otherwise, the result favors larger 'pnum',
157
121
+ * with a focus on accurate BDRV_BLOCK_ALLOCATED.
158
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
122
*
159
bdrv_drain_recurse(bs);
123
* If 'offset' is beyond the end of the disk image the return value is
160
124
* BDRV_BLOCK_EOF and 'pnum' is set to 0.
161
if (recursive) {
125
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
162
+ bs->recursive_quiesce_counter++;
126
163
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
127
/* Must be non-NULL or bdrv_getlength() would have failed */
164
bdrv_do_drained_begin(child->bs, true, child);
128
assert(bs->drv);
165
}
129
- if (!bs->drv->bdrv_co_get_block_status) {
166
@@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_begin(BlockDriverState *bs)
130
+ if (!bs->drv->bdrv_co_get_block_status && !bs->drv->bdrv_co_block_status) {
167
bdrv_do_drained_begin(bs, true, NULL);
131
*pnum = bytes;
168
}
132
ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
169
133
if (offset + bytes == total_size) {
170
-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
134
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
171
- BdrvChild *parent)
135
bdrv_inc_in_flight(bs);
172
+void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
136
173
+ BdrvChild *parent)
137
/* Round out to request_alignment boundaries */
174
{
138
- /* TODO: until we have a byte-based driver callback, we also have to
175
BdrvChild *child, *next;
139
- * round out to sectors, even if that is bigger than request_alignment */
176
int old_quiesce_counter;
140
- align = MAX(bs->bl.request_alignment, BDRV_SECTOR_SIZE);
177
@@ -XXX,XX +XXX,XX @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
141
+ align = bs->bl.request_alignment;
178
}
142
+ if (bs->drv->bdrv_co_get_block_status && align < BDRV_SECTOR_SIZE) {
179
143
+ align = BDRV_SECTOR_SIZE;
180
if (recursive) {
181
+ bs->recursive_quiesce_counter--;
182
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
183
bdrv_do_drained_end(child->bs, true, child);
184
}
185
@@ -XXX,XX +XXX,XX @@ void bdrv_subtree_drained_end(BlockDriverState *bs)
186
bdrv_do_drained_end(bs, true, NULL);
187
}
188
189
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
190
+{
191
+ int i;
192
+
193
+ for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
194
+ bdrv_do_drained_begin(child->bs, true, child);
144
+ }
195
+ }
145
aligned_offset = QEMU_ALIGN_DOWN(offset, align);
196
+}
146
aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
197
+
147
198
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
148
- {
199
+{
149
+ if (bs->drv->bdrv_co_get_block_status) {
200
+ int i;
150
int count; /* sectors */
201
+
151
int64_t longret;
202
+ for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
152
203
+ bdrv_do_drained_end(child->bs, true, child);
153
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
204
+ }
154
}
205
+}
155
ret = longret & ~BDRV_BLOCK_OFFSET_MASK;
206
+
156
*pnum = count * BDRV_SECTOR_SIZE;
207
/*
157
+ } else {
208
* Wait for pending requests to complete on a single BlockDriverState subtree,
158
+ ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
209
* and suspend block driver's internal I/O until next request arrives.
159
+ aligned_bytes, pnum, &local_map,
160
+ &local_file);
161
+ if (ret < 0) {
162
+ *pnum = 0;
163
+ goto out;
164
+ }
165
+ assert(*pnum); /* The block driver must make progress */
166
}
167
168
/*
169
--
210
--
170
2.13.6
211
2.13.6
171
212
172
213
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
2
3
We are gradually moving away from sector-based interfaces, towards
4
byte-based. Update the qcow2 driver accordingly.
5
6
For now, we are ignoring the 'want_zero' hint. However, it should
7
be relatively straightforward to honor the hint as a way to return
8
larger *pnum values when we have consecutive clusters with the same
9
data/zero status but which differ only in having non-consecutive
10
mappings.
11
12
Signed-off-by: Eric Blake <eblake@redhat.com>
13
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
14
Reviewed-by: Fam Zheng <famz@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
---
2
---
17
block/qcow2.c | 24 +++++++++++++-----------
3
tests/test-bdrv-drain.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
18
1 file changed, 13 insertions(+), 11 deletions(-)
4
1 file changed, 80 insertions(+)
19
5
20
diff --git a/block/qcow2.c b/block/qcow2.c
6
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
21
index XXXXXXX..XXXXXXX 100644
7
index XXXXXXX..XXXXXXX 100644
22
--- a/block/qcow2.c
8
--- a/tests/test-bdrv-drain.c
23
+++ b/block/qcow2.c
9
+++ b/tests/test-bdrv-drain.c
24
@@ -XXX,XX +XXX,XX @@ static void qcow2_join_options(QDict *options, QDict *old_options)
10
@@ -XXX,XX +XXX,XX @@ static void test_multiparent(void)
25
}
11
blk_unref(blk_b);
26
}
12
}
27
13
28
-static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
14
+static void test_graph_change(void)
29
- int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
15
+{
30
+static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs,
16
+ BlockBackend *blk_a, *blk_b;
31
+ bool want_zero,
17
+ BlockDriverState *bs_a, *bs_b, *backing;
32
+ int64_t offset, int64_t count,
18
+ BDRVTestState *a_s, *b_s, *backing_s;
33
+ int64_t *pnum, int64_t *map,
19
+
34
+ BlockDriverState **file)
20
+ blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
35
{
21
+ bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
36
BDRVQcow2State *s = bs->opaque;
22
+ &error_abort);
37
uint64_t cluster_offset;
23
+ a_s = bs_a->opaque;
38
int index_in_cluster, ret;
24
+ blk_insert_bs(blk_a, bs_a, &error_abort);
39
unsigned int bytes;
25
+
40
- int64_t status = 0;
26
+ blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
41
+ int status = 0;
27
+ bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
42
28
+ &error_abort);
43
- bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
29
+ b_s = bs_b->opaque;
44
+ bytes = MIN(INT_MAX, count);
30
+ blk_insert_bs(blk_b, bs_b, &error_abort);
45
qemu_co_mutex_lock(&s->lock);
31
+
46
- ret = qcow2_get_cluster_offset(bs, sector_num << BDRV_SECTOR_BITS, &bytes,
32
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
47
- &cluster_offset);
33
+ backing_s = backing->opaque;
48
+ ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
34
+ bdrv_set_backing_hd(bs_a, backing, &error_abort);
49
qemu_co_mutex_unlock(&s->lock);
35
+
50
if (ret < 0) {
36
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
51
return ret;
37
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
52
}
38
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
53
39
+ g_assert_cmpint(a_s->drain_count, ==, 0);
54
- *pnum = bytes >> BDRV_SECTOR_BITS;
40
+ g_assert_cmpint(b_s->drain_count, ==, 0);
55
+ *pnum = bytes;
41
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
56
42
+
57
if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
43
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
58
!s->crypto) {
44
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
59
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
45
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
60
- cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
46
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
61
+ index_in_cluster = offset & (s->cluster_size - 1);
47
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
62
+ *map = cluster_offset | index_in_cluster;
48
+
63
*file = bs->file->bs;
49
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
64
- status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
50
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
65
+ status |= BDRV_BLOCK_OFFSET_VALID;
51
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
66
}
52
+ g_assert_cmpint(backing->quiesce_counter, ==, 5);
67
if (ret == QCOW2_CLUSTER_ZERO_PLAIN || ret == QCOW2_CLUSTER_ZERO_ALLOC) {
53
+ g_assert_cmpint(a_s->drain_count, ==, 5);
68
status |= BDRV_BLOCK_ZERO;
54
+ g_assert_cmpint(b_s->drain_count, ==, 5);
69
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = {
55
+ g_assert_cmpint(backing_s->drain_count, ==, 5);
70
.bdrv_child_perm = bdrv_format_default_perms,
56
+
71
.bdrv_create = qcow2_create,
57
+ bdrv_set_backing_hd(bs_b, NULL, &error_abort);
72
.bdrv_has_zero_init = bdrv_has_zero_init_1,
58
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
73
- .bdrv_co_get_block_status = qcow2_co_get_block_status,
59
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
74
+ .bdrv_co_block_status = qcow2_co_block_status,
60
+ g_assert_cmpint(backing->quiesce_counter, ==, 3);
75
61
+ g_assert_cmpint(a_s->drain_count, ==, 3);
76
.bdrv_co_preadv = qcow2_co_preadv,
62
+ g_assert_cmpint(b_s->drain_count, ==, 2);
77
.bdrv_co_pwritev = qcow2_co_pwritev,
63
+ g_assert_cmpint(backing_s->drain_count, ==, 3);
64
+
65
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
66
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
67
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
68
+ g_assert_cmpint(backing->quiesce_counter, ==, 5);
69
+ g_assert_cmpint(a_s->drain_count, ==, 5);
70
+ g_assert_cmpint(b_s->drain_count, ==, 5);
71
+ g_assert_cmpint(backing_s->drain_count, ==, 5);
72
+
73
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
74
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
75
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
76
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
77
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
78
+
79
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
80
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
81
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
82
+ g_assert_cmpint(a_s->drain_count, ==, 0);
83
+ g_assert_cmpint(b_s->drain_count, ==, 0);
84
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
85
+
86
+ bdrv_unref(backing);
87
+ bdrv_unref(bs_a);
88
+ bdrv_unref(bs_b);
89
+ blk_unref(blk_a);
90
+ blk_unref(blk_b);
91
+}
92
+
93
94
typedef struct TestBlockJob {
95
BlockJob common;
96
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
97
98
g_test_add_func("/bdrv-drain/nested", test_nested);
99
g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
100
+ g_test_add_func("/bdrv-drain/graph-change", test_graph_change);
101
102
g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
103
g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
78
--
104
--
79
2.13.6
105
2.13.6
80
106
81
107
diff view generated by jsdifflib
Deleted patch
1
From: Eric Blake <eblake@redhat.com>
2
1
3
Rework the debug define so that we always get -Wformat checking,
4
even when debugging is disabled.
5
6
Signed-off-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Stefan Weil <sw@weilnetz.de>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Fam Zheng <famz@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
block/vdi.c | 12 +++++++++---
14
1 file changed, 9 insertions(+), 3 deletions(-)
15
16
diff --git a/block/vdi.c b/block/vdi.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/vdi.c
19
+++ b/block/vdi.c
20
@@ -XXX,XX +XXX,XX @@
21
#define DEFAULT_CLUSTER_SIZE (1 * MiB)
22
23
#if defined(CONFIG_VDI_DEBUG)
24
-#define logout(fmt, ...) \
25
- fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__)
26
+#define VDI_DEBUG 1
27
#else
28
-#define logout(fmt, ...) ((void)0)
29
+#define VDI_DEBUG 0
30
#endif
31
32
+#define logout(fmt, ...) \
33
+ do { \
34
+ if (VDI_DEBUG) { \
35
+ fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__); \
36
+ } \
37
+ } while (0)
38
+
39
/* Image signature. */
40
#define VDI_SIGNATURE 0xbeda107f
41
42
--
43
2.13.6
44
45
diff view generated by jsdifflib
Deleted patch
1
From: Alberto Garcia <berto@igalia.com>
2
1
3
This patch updates docs/qcow2-cache.txt explaining how to use the new
4
l2-cache-entry-size parameter.
5
6
Here's a more detailed technical description of this feature:
7
8
https://lists.gnu.org/archive/html/qemu-block/2017-09/msg00635.html
9
10
And here are some performance numbers:
11
12
https://lists.gnu.org/archive/html/qemu-block/2017-12/msg00507.html
13
14
Signed-off-by: Alberto Garcia <berto@igalia.com>
15
Reviewed-by: Eric Blake <eblake@redhat.com>
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
---
18
docs/qcow2-cache.txt | 46 +++++++++++++++++++++++++++++++++++++++++++---
19
1 file changed, 43 insertions(+), 3 deletions(-)
20
21
diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt
22
index XXXXXXX..XXXXXXX 100644
23
--- a/docs/qcow2-cache.txt
24
+++ b/docs/qcow2-cache.txt
25
@@ -XXX,XX +XXX,XX @@
26
qcow2 L2/refcount cache configuration
27
=====================================
28
-Copyright (C) 2015 Igalia, S.L.
29
+Copyright (C) 2015, 2018 Igalia, S.L.
30
Author: Alberto Garcia <berto@igalia.com>
31
32
This work is licensed under the terms of the GNU GPL, version 2 or
33
@@ -XXX,XX +XXX,XX @@ There are three options available, and all of them take bytes:
34
35
There are two things that need to be taken into account:
36
37
- - Both caches must have a size that is a multiple of the cluster
38
- size.
39
+ - Both caches must have a size that is a multiple of the cluster size
40
+ (or the cache entry size: see "Using smaller cache sizes" below).
41
42
- If you only set one of the options above, QEMU will automatically
43
adjust the others so that the L2 cache is 4 times bigger than the
44
@@ -XXX,XX +XXX,XX @@ much less often than the L2 cache, so it's perfectly reasonable to
45
keep it small.
46
47
48
+Using smaller cache entries
49
+---------------------------
50
+The qcow2 L2 cache stores complete tables by default. This means that
51
+if QEMU needs an entry from an L2 table then the whole table is read
52
+from disk and is kept in the cache. If the cache is full then a
53
+complete table needs to be evicted first.
54
+
55
+This can be inefficient with large cluster sizes since it results in
56
+more disk I/O and wastes more cache memory.
57
+
58
+Since QEMU 2.12 you can change the size of the L2 cache entry and make
59
+it smaller than the cluster size. This can be configured using the
60
+"l2-cache-entry-size" parameter:
61
+
62
+ -drive file=hd.qcow2,l2-cache-size=2097152,l2-cache-entry-size=4096
63
+
64
+Some things to take into account:
65
+
66
+ - The L2 cache entry size has the same restrictions as the cluster
67
+ size (power of two, at least 512 bytes).
68
+
69
+ - Smaller entry sizes generally improve the cache efficiency and make
70
+ disk I/O faster. This is particularly true with solid state drives
71
+ so it's a good idea to reduce the entry size in those cases. With
72
+ rotating hard drives the situation is a bit more complicated so you
73
+ should test it first and stay with the default size if unsure.
74
+
75
+ - Try different entry sizes to see which one gives faster performance
76
+ in your case. The block size of the host filesystem is generally a
77
+ good default (usually 4096 bytes in the case of ext4).
78
+
79
+ - Only the L2 cache can be configured this way. The refcount cache
80
+ always uses the cluster size as the entry size.
81
+
82
+ - If the L2 cache is big enough to hold all of the image's L2 tables
83
+ (as explained in the "Choosing the right cache sizes" section
84
+ earlier in this document) then none of this is necessary and you
85
+ can omit the "l2-cache-entry-size" parameter altogether.
86
+
87
+
88
Reducing the memory usage
89
-------------------------
90
It is possible to clean unused cache entries in order to reduce the
91
--
92
2.13.6
93
94
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Since commit bde70715, base is the only node that is reopened in
2
commit_start(). This means that the code, which still involves an
3
explicit BlockReopenQueue, can now be simplified by using bdrv_reopen().
2
4
3
The name aio_context_in_iothread() is misleading because it also returns
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
true when called on the main AioContext from the main loop thread, which
6
Reviewed-by: Fam Zheng <famz@redhat.com>
5
is not an IOThread.
7
---
8
block/commit.c | 8 +-------
9
1 file changed, 1 insertion(+), 7 deletions(-)
6
10
7
This patch renames it to in_aio_context_home_thread() and expands the
11
diff --git a/block/commit.c b/block/commit.c
8
doc comment to make the semantics clearer.
9
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
14
include/block/aio.h | 7 +++++--
15
include/block/block.h | 2 +-
16
2 files changed, 6 insertions(+), 3 deletions(-)
17
18
diff --git a/include/block/aio.h b/include/block/aio.h
19
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
20
--- a/include/block/aio.h
13
--- a/block/commit.c
21
+++ b/include/block/aio.h
14
+++ b/block/commit.c
22
@@ -XXX,XX +XXX,XX @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
15
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
23
AioContext *qemu_get_current_aio_context(void);
16
const char *filter_node_name, Error **errp)
24
25
/**
26
+ * in_aio_context_home_thread:
27
* @ctx: the aio context
28
*
29
- * Return whether we are running in the I/O thread that manages @ctx.
30
+ * Return whether we are running in the thread that normally runs @ctx. Note
31
+ * that acquiring/releasing ctx does not affect the outcome, each AioContext
32
+ * still only has one home thread that is responsible for running it.
33
*/
34
-static inline bool aio_context_in_iothread(AioContext *ctx)
35
+static inline bool in_aio_context_home_thread(AioContext *ctx)
36
{
17
{
37
return ctx == qemu_get_current_aio_context();
18
CommitBlockJob *s;
38
}
19
- BlockReopenQueue *reopen_queue = NULL;
39
diff --git a/include/block/block.h b/include/block/block.h
20
int orig_base_flags;
40
index XXXXXXX..XXXXXXX 100644
21
BlockDriverState *iter;
41
--- a/include/block/block.h
22
BlockDriverState *commit_top_bs = NULL;
42
+++ b/include/block/block.h
23
@@ -XXX,XX +XXX,XX @@ void commit_start(const char *job_id, BlockDriverState *bs,
43
@@ -XXX,XX +XXX,XX @@ void bdrv_drain_all(void);
24
/* convert base to r/w, if necessary */
44
bool busy_ = true; \
25
orig_base_flags = bdrv_get_flags(base);
45
BlockDriverState *bs_ = (bs); \
26
if (!(orig_base_flags & BDRV_O_RDWR)) {
46
AioContext *ctx_ = bdrv_get_aio_context(bs_); \
27
- reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
47
- if (aio_context_in_iothread(ctx_)) { \
28
- orig_base_flags | BDRV_O_RDWR);
48
+ if (in_aio_context_home_thread(ctx_)) { \
29
- }
49
while ((cond) || busy_) { \
30
-
50
busy_ = aio_poll(ctx_, (cond)); \
31
- if (reopen_queue) {
51
waited_ |= !!(cond) | busy_; \
32
- bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
33
+ bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err);
34
if (local_err != NULL) {
35
error_propagate(errp, local_err);
36
goto fail;
52
--
37
--
53
2.13.6
38
2.13.6
54
39
55
40
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
The bdrv_reopen*() implementation doesn't like it if the graph is
2
changed between queuing nodes for reopen and actually reopening them
3
(one of the reasons is that queuing can be recursive).
2
4
3
BlockDriver->bdrv_create() has been called from coroutine context since
5
So instead of draining the device only in bdrv_reopen_multiple(),
4
commit 5b7e1542cfa41a281af9629d31cef03704d976e6 ("block: make
6
require that callers already drained all affected nodes, and assert this
5
bdrv_create adopt coroutine").
7
in bdrv_reopen_queue().
6
8
7
Make this explicit by renaming to .bdrv_co_create_opts() and add the
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
coroutine_fn annotation. This makes it obvious to block driver authors
10
Reviewed-by: Fam Zheng <famz@redhat.com>
9
that they may yield, use CoMutex, or other coroutine_fn APIs.
11
---
10
bdrv_co_create is reserved for the QAPI-based version that Kevin is
12
block.c | 23 ++++++++++++++++-------
11
working on.
13
block/replication.c | 6 ++++++
14
qemu-io-cmds.c | 3 +++
15
3 files changed, 25 insertions(+), 7 deletions(-)
12
16
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Message-Id: <20170705102231.20711-2-stefanha@redhat.com>
15
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
16
Reviewed-by: Eric Blake <eblake@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
19
include/block/block_int.h | 3 ++-
20
block.c | 4 ++--
21
block/crypto.c | 8 ++++----
22
block/file-posix.c | 15 ++++++++-------
23
block/file-win32.c | 5 +++--
24
block/gluster.c | 13 +++++++------
25
block/iscsi.c | 7 ++++---
26
block/nfs.c | 5 +++--
27
block/parallels.c | 6 ++++--
28
block/qcow.c | 5 +++--
29
block/qcow2.c | 5 +++--
30
block/qed.c | 6 ++++--
31
block/raw-format.c | 5 +++--
32
block/rbd.c | 6 ++++--
33
block/sheepdog.c | 10 +++++-----
34
block/ssh.c | 5 +++--
35
block/vdi.c | 5 +++--
36
block/vhdx.c | 5 +++--
37
block/vmdk.c | 5 +++--
38
block/vpc.c | 5 +++--
39
20 files changed, 74 insertions(+), 54 deletions(-)
40
41
diff --git a/include/block/block_int.h b/include/block/block_int.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/include/block/block_int.h
44
+++ b/include/block/block_int.h
45
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
46
int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
47
Error **errp);
48
void (*bdrv_close)(BlockDriverState *bs);
49
- int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
50
+ int coroutine_fn (*bdrv_co_create_opts)(const char *filename, QemuOpts *opts,
51
+ Error **errp);
52
int (*bdrv_make_empty)(BlockDriverState *bs);
53
54
void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options);
55
diff --git a/block.c b/block.c
17
diff --git a/block.c b/block.c
56
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
57
--- a/block.c
19
--- a/block.c
58
+++ b/block.c
20
+++ b/block.c
59
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn bdrv_create_co_entry(void *opaque)
21
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
60
CreateCo *cco = opaque;
22
* returns a pointer to bs_queue, which is either the newly allocated
61
assert(cco->drv);
23
* bs_queue, or the existing bs_queue being used.
62
24
*
63
- ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
25
+ * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
64
+ ret = cco->drv->bdrv_co_create_opts(cco->filename, cco->opts, &local_err);
26
*/
65
error_propagate(&cco->err, local_err);
27
static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
66
cco->ret = ret;
28
BlockDriverState *bs,
67
}
29
@@ -XXX,XX +XXX,XX @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
68
@@ -XXX,XX +XXX,XX @@ int bdrv_create(BlockDriver *drv, const char* filename,
30
BdrvChild *child;
69
.err = NULL,
31
QDict *old_options, *explicit_options;
70
};
32
71
33
+ /* Make sure that the caller remembered to use a drained section. This is
72
- if (!drv->bdrv_create) {
34
+ * important to avoid graph changes between the recursive queuing here and
73
+ if (!drv->bdrv_co_create_opts) {
35
+ * bdrv_reopen_multiple(). */
74
error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
36
+ assert(bs->quiesce_counter > 0);
75
ret = -ENOTSUP;
37
+
76
goto out;
38
if (bs_queue == NULL) {
77
diff --git a/block/crypto.c b/block/crypto.c
39
bs_queue = g_new0(BlockReopenQueue, 1);
78
index XXXXXXX..XXXXXXX 100644
40
QSIMPLEQ_INIT(bs_queue);
79
--- a/block/crypto.c
41
@@ -XXX,XX +XXX,XX @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
80
+++ b/block/crypto.c
42
* If all devices prepare successfully, then the changes are committed
81
@@ -XXX,XX +XXX,XX @@ static int block_crypto_open_luks(BlockDriverState *bs,
43
* to all devices.
82
bs, options, flags, errp);
44
*
83
}
45
+ * All affected nodes must be drained between bdrv_reopen_queue() and
84
46
+ * bdrv_reopen_multiple().
85
-static int block_crypto_create_luks(const char *filename,
47
*/
86
- QemuOpts *opts,
48
int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp)
87
- Error **errp)
88
+static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename,
89
+ QemuOpts *opts,
90
+ Error **errp)
91
{
49
{
92
return block_crypto_create_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
50
@@ -XXX,XX +XXX,XX @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er
93
filename, opts, errp);
51
94
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_crypto_luks = {
52
assert(bs_queue != NULL);
95
.bdrv_open = block_crypto_open_luks,
53
96
.bdrv_close = block_crypto_close,
54
- aio_context_release(ctx);
97
.bdrv_child_perm = bdrv_format_default_perms,
55
- bdrv_drain_all_begin();
98
- .bdrv_create = block_crypto_create_luks,
56
- aio_context_acquire(ctx);
99
+ .bdrv_co_create_opts = block_crypto_co_create_opts_luks,
57
-
100
.bdrv_truncate = block_crypto_truncate,
58
QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
101
.create_opts = &block_crypto_create_opts_luks,
59
+ assert(bs_entry->state.bs->quiesce_counter > 0);
102
60
if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
103
diff --git a/block/file-posix.c b/block/file-posix.c
61
error_propagate(errp, local_err);
104
index XXXXXXX..XXXXXXX 100644
62
goto cleanup;
105
--- a/block/file-posix.c
63
@@ -XXX,XX +XXX,XX @@ cleanup:
106
+++ b/block/file-posix.c
107
@@ -XXX,XX +XXX,XX @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
108
return (int64_t)st.st_blocks * 512;
109
}
110
111
-static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
112
+static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts,
113
+ Error **errp)
114
{
115
int fd;
116
int result = 0;
117
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = {
118
.bdrv_reopen_commit = raw_reopen_commit,
119
.bdrv_reopen_abort = raw_reopen_abort,
120
.bdrv_close = raw_close,
121
- .bdrv_create = raw_create,
122
+ .bdrv_co_create_opts = raw_co_create_opts,
123
.bdrv_has_zero_init = bdrv_has_zero_init_1,
124
.bdrv_co_block_status = raw_co_block_status,
125
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
126
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
127
return -ENOTSUP;
128
}
129
130
-static int hdev_create(const char *filename, QemuOpts *opts,
131
- Error **errp)
132
+static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts,
133
+ Error **errp)
134
{
135
int fd;
136
int ret = 0;
137
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_device = {
138
.bdrv_reopen_prepare = raw_reopen_prepare,
139
.bdrv_reopen_commit = raw_reopen_commit,
140
.bdrv_reopen_abort = raw_reopen_abort,
141
- .bdrv_create = hdev_create,
142
+ .bdrv_co_create_opts = hdev_co_create_opts,
143
.create_opts = &raw_create_opts,
144
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
145
146
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = {
147
.bdrv_reopen_prepare = raw_reopen_prepare,
148
.bdrv_reopen_commit = raw_reopen_commit,
149
.bdrv_reopen_abort = raw_reopen_abort,
150
- .bdrv_create = hdev_create,
151
+ .bdrv_co_create_opts = hdev_co_create_opts,
152
.create_opts = &raw_create_opts,
153
154
155
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_cdrom = {
156
.bdrv_reopen_prepare = raw_reopen_prepare,
157
.bdrv_reopen_commit = raw_reopen_commit,
158
.bdrv_reopen_abort = raw_reopen_abort,
159
- .bdrv_create = hdev_create,
160
+ .bdrv_co_create_opts = hdev_co_create_opts,
161
.create_opts = &raw_create_opts,
162
163
.bdrv_co_preadv = raw_co_preadv,
164
diff --git a/block/file-win32.c b/block/file-win32.c
165
index XXXXXXX..XXXXXXX 100644
166
--- a/block/file-win32.c
167
+++ b/block/file-win32.c
168
@@ -XXX,XX +XXX,XX @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
169
return st.st_size;
170
}
171
172
-static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
173
+static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts,
174
+ Error **errp)
175
{
176
int fd;
177
int64_t total_size = 0;
178
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = {
179
.bdrv_file_open = raw_open,
180
.bdrv_refresh_limits = raw_probe_alignment,
181
.bdrv_close = raw_close,
182
- .bdrv_create = raw_create,
183
+ .bdrv_co_create_opts = raw_co_create_opts,
184
.bdrv_has_zero_init = bdrv_has_zero_init_1,
185
186
.bdrv_aio_readv = raw_aio_readv,
187
diff --git a/block/gluster.c b/block/gluster.c
188
index XXXXXXX..XXXXXXX 100644
189
--- a/block/gluster.c
190
+++ b/block/gluster.c
191
@@ -XXX,XX +XXX,XX @@ static int qemu_gluster_do_truncate(struct glfs_fd *fd, int64_t offset,
192
return 0;
193
}
194
195
-static int qemu_gluster_create(const char *filename,
196
- QemuOpts *opts, Error **errp)
197
+static int coroutine_fn qemu_gluster_co_create_opts(const char *filename,
198
+ QemuOpts *opts,
199
+ Error **errp)
200
{
201
BlockdevOptionsGluster *gconf;
202
struct glfs *glfs;
203
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster = {
204
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
205
.bdrv_reopen_abort = qemu_gluster_reopen_abort,
206
.bdrv_close = qemu_gluster_close,
207
- .bdrv_create = qemu_gluster_create,
208
+ .bdrv_co_create_opts = qemu_gluster_co_create_opts,
209
.bdrv_getlength = qemu_gluster_getlength,
210
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
211
.bdrv_truncate = qemu_gluster_truncate,
212
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_tcp = {
213
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
214
.bdrv_reopen_abort = qemu_gluster_reopen_abort,
215
.bdrv_close = qemu_gluster_close,
216
- .bdrv_create = qemu_gluster_create,
217
+ .bdrv_co_create_opts = qemu_gluster_co_create_opts,
218
.bdrv_getlength = qemu_gluster_getlength,
219
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
220
.bdrv_truncate = qemu_gluster_truncate,
221
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_unix = {
222
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
223
.bdrv_reopen_abort = qemu_gluster_reopen_abort,
224
.bdrv_close = qemu_gluster_close,
225
- .bdrv_create = qemu_gluster_create,
226
+ .bdrv_co_create_opts = qemu_gluster_co_create_opts,
227
.bdrv_getlength = qemu_gluster_getlength,
228
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
229
.bdrv_truncate = qemu_gluster_truncate,
230
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_rdma = {
231
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
232
.bdrv_reopen_abort = qemu_gluster_reopen_abort,
233
.bdrv_close = qemu_gluster_close,
234
- .bdrv_create = qemu_gluster_create,
235
+ .bdrv_co_create_opts = qemu_gluster_co_create_opts,
236
.bdrv_getlength = qemu_gluster_getlength,
237
.bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
238
.bdrv_truncate = qemu_gluster_truncate,
239
diff --git a/block/iscsi.c b/block/iscsi.c
240
index XXXXXXX..XXXXXXX 100644
241
--- a/block/iscsi.c
242
+++ b/block/iscsi.c
243
@@ -XXX,XX +XXX,XX @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset,
244
return 0;
245
}
246
247
-static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
248
+static int coroutine_fn iscsi_co_create_opts(const char *filename, QemuOpts *opts,
249
+ Error **errp)
250
{
251
int ret = 0;
252
int64_t total_size = 0;
253
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iscsi = {
254
.bdrv_parse_filename = iscsi_parse_filename,
255
.bdrv_file_open = iscsi_open,
256
.bdrv_close = iscsi_close,
257
- .bdrv_create = iscsi_create,
258
+ .bdrv_co_create_opts = iscsi_co_create_opts,
259
.create_opts = &iscsi_create_opts,
260
.bdrv_reopen_prepare = iscsi_reopen_prepare,
261
.bdrv_reopen_commit = iscsi_reopen_commit,
262
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_iser = {
263
.bdrv_parse_filename = iscsi_parse_filename,
264
.bdrv_file_open = iscsi_open,
265
.bdrv_close = iscsi_close,
266
- .bdrv_create = iscsi_create,
267
+ .bdrv_co_create_opts = iscsi_co_create_opts,
268
.create_opts = &iscsi_create_opts,
269
.bdrv_reopen_prepare = iscsi_reopen_prepare,
270
.bdrv_reopen_commit = iscsi_reopen_commit,
271
diff --git a/block/nfs.c b/block/nfs.c
272
index XXXXXXX..XXXXXXX 100644
273
--- a/block/nfs.c
274
+++ b/block/nfs.c
275
@@ -XXX,XX +XXX,XX @@ static QemuOptsList nfs_create_opts = {
276
}
64
}
277
};
65
g_free(bs_queue);
278
66
279
-static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
67
- bdrv_drain_all_end();
280
+static int coroutine_fn nfs_file_co_create_opts(const char *url, QemuOpts *opts,
68
-
281
+ Error **errp)
282
{
283
int64_t ret, total_size;
284
NFSClient *client = g_new0(NFSClient, 1);
285
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nfs = {
286
287
.bdrv_file_open = nfs_file_open,
288
.bdrv_close = nfs_file_close,
289
- .bdrv_create = nfs_file_create,
290
+ .bdrv_co_create_opts = nfs_file_co_create_opts,
291
.bdrv_reopen_prepare = nfs_reopen_prepare,
292
293
.bdrv_co_preadv = nfs_co_preadv,
294
diff --git a/block/parallels.c b/block/parallels.c
295
index XXXXXXX..XXXXXXX 100644
296
--- a/block/parallels.c
297
+++ b/block/parallels.c
298
@@ -XXX,XX +XXX,XX @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
299
}
300
301
302
-static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
303
+static int coroutine_fn parallels_co_create_opts(const char *filename,
304
+ QemuOpts *opts,
305
+ Error **errp)
306
{
307
int64_t total_size, cl_size;
308
uint8_t tmp[BDRV_SECTOR_SIZE];
309
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_parallels = {
310
.bdrv_co_readv = parallels_co_readv,
311
.bdrv_co_writev = parallels_co_writev,
312
.supports_backing = true,
313
- .bdrv_create = parallels_create,
314
+ .bdrv_co_create_opts = parallels_co_create_opts,
315
.bdrv_check = parallels_check,
316
.create_opts = &parallels_create_opts,
317
};
318
diff --git a/block/qcow.c b/block/qcow.c
319
index XXXXXXX..XXXXXXX 100644
320
--- a/block/qcow.c
321
+++ b/block/qcow.c
322
@@ -XXX,XX +XXX,XX @@ static void qcow_close(BlockDriverState *bs)
323
error_free(s->migration_blocker);
324
}
325
326
-static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
327
+static int coroutine_fn qcow_co_create_opts(const char *filename, QemuOpts *opts,
328
+ Error **errp)
329
{
330
int header_size, backing_filename_len, l1_size, shift, i;
331
QCowHeader header;
332
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qcow = {
333
.bdrv_close        = qcow_close,
334
.bdrv_child_perm = bdrv_format_default_perms,
335
.bdrv_reopen_prepare = qcow_reopen_prepare,
336
- .bdrv_create = qcow_create,
337
+ .bdrv_co_create_opts = qcow_co_create_opts,
338
.bdrv_has_zero_init = bdrv_has_zero_init_1,
339
.supports_backing = true,
340
341
diff --git a/block/qcow2.c b/block/qcow2.c
342
index XXXXXXX..XXXXXXX 100644
343
--- a/block/qcow2.c
344
+++ b/block/qcow2.c
345
@@ -XXX,XX +XXX,XX @@ out:
346
return ret;
69
return ret;
347
}
70
}
348
71
349
-static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
72
@@ -XXX,XX +XXX,XX @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
350
+static int coroutine_fn qcow2_co_create_opts(const char *filename, QemuOpts *opts,
351
+ Error **errp)
352
{
73
{
353
char *backing_file = NULL;
74
int ret = -1;
354
char *backing_fmt = NULL;
75
Error *local_err = NULL;
355
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = {
76
- BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
356
.bdrv_reopen_abort = qcow2_reopen_abort,
77
+ BlockReopenQueue *queue;
357
.bdrv_join_options = qcow2_join_options,
78
358
.bdrv_child_perm = bdrv_format_default_perms,
79
+ bdrv_subtree_drained_begin(bs);
359
- .bdrv_create = qcow2_create,
80
+
360
+ .bdrv_co_create_opts = qcow2_co_create_opts,
81
+ queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
361
.bdrv_has_zero_init = bdrv_has_zero_init_1,
82
ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
362
.bdrv_co_block_status = qcow2_co_block_status,
83
if (local_err != NULL) {
363
84
error_propagate(errp, local_err);
364
diff --git a/block/qed.c b/block/qed.c
85
}
365
index XXXXXXX..XXXXXXX 100644
86
+
366
--- a/block/qed.c
87
+ bdrv_subtree_drained_end(bs);
367
+++ b/block/qed.c
88
+
368
@@ -XXX,XX +XXX,XX @@ out:
369
return ret;
89
return ret;
370
}
90
}
371
91
372
-static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp)
92
diff --git a/block/replication.c b/block/replication.c
373
+static int coroutine_fn bdrv_qed_co_create_opts(const char *filename,
374
+ QemuOpts *opts,
375
+ Error **errp)
376
{
377
uint64_t image_size = 0;
378
uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
379
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qed = {
380
.bdrv_close = bdrv_qed_close,
381
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
382
.bdrv_child_perm = bdrv_format_default_perms,
383
- .bdrv_create = bdrv_qed_create,
384
+ .bdrv_co_create_opts = bdrv_qed_co_create_opts,
385
.bdrv_has_zero_init = bdrv_has_zero_init_1,
386
.bdrv_co_block_status = bdrv_qed_co_block_status,
387
.bdrv_co_readv = bdrv_qed_co_readv,
388
diff --git a/block/raw-format.c b/block/raw-format.c
389
index XXXXXXX..XXXXXXX 100644
93
index XXXXXXX..XXXXXXX 100644
390
--- a/block/raw-format.c
94
--- a/block/replication.c
391
+++ b/block/raw-format.c
95
+++ b/block/replication.c
392
@@ -XXX,XX +XXX,XX @@ static int raw_has_zero_init(BlockDriverState *bs)
96
@@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
393
return bdrv_has_zero_init(bs->file->bs);
97
new_secondary_flags = s->orig_secondary_flags;
98
}
99
100
+ bdrv_subtree_drained_begin(s->hidden_disk->bs);
101
+ bdrv_subtree_drained_begin(s->secondary_disk->bs);
102
+
103
if (orig_hidden_flags != new_hidden_flags) {
104
reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL,
105
new_hidden_flags);
106
@@ -XXX,XX +XXX,XX @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
107
reopen_queue, &local_err);
108
error_propagate(errp, local_err);
109
}
110
+
111
+ bdrv_subtree_drained_end(s->hidden_disk->bs);
112
+ bdrv_subtree_drained_end(s->secondary_disk->bs);
394
}
113
}
395
114
396
-static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
115
static void backup_job_cleanup(BlockDriverState *bs)
397
+static int coroutine_fn raw_co_create_opts(const char *filename, QemuOpts *opts,
116
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
398
+ Error **errp)
399
{
400
return bdrv_create_file(filename, opts, errp);
401
}
402
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_raw = {
403
.bdrv_open = &raw_open,
404
.bdrv_close = &raw_close,
405
.bdrv_child_perm = bdrv_filter_default_perms,
406
- .bdrv_create = &raw_create,
407
+ .bdrv_co_create_opts = &raw_co_create_opts,
408
.bdrv_co_preadv = &raw_co_preadv,
409
.bdrv_co_pwritev = &raw_co_pwritev,
410
.bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
411
diff --git a/block/rbd.c b/block/rbd.c
412
index XXXXXXX..XXXXXXX 100644
117
index XXXXXXX..XXXXXXX 100644
413
--- a/block/rbd.c
118
--- a/qemu-io-cmds.c
414
+++ b/block/rbd.c
119
+++ b/qemu-io-cmds.c
415
@@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = {
120
@@ -XXX,XX +XXX,XX @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
416
},
121
opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL;
417
};
122
qemu_opts_reset(&reopen_opts);
418
123
419
-static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
124
+ bdrv_subtree_drained_begin(bs);
420
+static int coroutine_fn qemu_rbd_co_create_opts(const char *filename,
125
brq = bdrv_reopen_queue(NULL, bs, opts, flags);
421
+ QemuOpts *opts,
126
bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err);
422
+ Error **errp)
127
+ bdrv_subtree_drained_end(bs);
423
{
128
+
424
Error *local_err = NULL;
129
if (local_err) {
425
int64_t bytes = 0;
130
error_report_err(local_err);
426
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_rbd = {
131
} else {
427
.bdrv_file_open = qemu_rbd_open,
428
.bdrv_close = qemu_rbd_close,
429
.bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
430
- .bdrv_create = qemu_rbd_create,
431
+ .bdrv_co_create_opts = qemu_rbd_co_create_opts,
432
.bdrv_has_zero_init = bdrv_has_zero_init_1,
433
.bdrv_get_info = qemu_rbd_getinfo,
434
.create_opts = &qemu_rbd_create_opts,
435
diff --git a/block/sheepdog.c b/block/sheepdog.c
436
index XXXXXXX..XXXXXXX 100644
437
--- a/block/sheepdog.c
438
+++ b/block/sheepdog.c
439
@@ -XXX,XX +XXX,XX @@ static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
440
return 0;
441
}
442
443
-static int sd_create(const char *filename, QemuOpts *opts,
444
- Error **errp)
445
+static int coroutine_fn sd_co_create_opts(const char *filename, QemuOpts *opts,
446
+ Error **errp)
447
{
448
Error *err = NULL;
449
int ret = 0;
450
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog = {
451
.bdrv_reopen_commit = sd_reopen_commit,
452
.bdrv_reopen_abort = sd_reopen_abort,
453
.bdrv_close = sd_close,
454
- .bdrv_create = sd_create,
455
+ .bdrv_co_create_opts = sd_co_create_opts,
456
.bdrv_has_zero_init = bdrv_has_zero_init_1,
457
.bdrv_getlength = sd_getlength,
458
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
459
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_tcp = {
460
.bdrv_reopen_commit = sd_reopen_commit,
461
.bdrv_reopen_abort = sd_reopen_abort,
462
.bdrv_close = sd_close,
463
- .bdrv_create = sd_create,
464
+ .bdrv_co_create_opts = sd_co_create_opts,
465
.bdrv_has_zero_init = bdrv_has_zero_init_1,
466
.bdrv_getlength = sd_getlength,
467
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
468
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_unix = {
469
.bdrv_reopen_commit = sd_reopen_commit,
470
.bdrv_reopen_abort = sd_reopen_abort,
471
.bdrv_close = sd_close,
472
- .bdrv_create = sd_create,
473
+ .bdrv_co_create_opts = sd_co_create_opts,
474
.bdrv_has_zero_init = bdrv_has_zero_init_1,
475
.bdrv_getlength = sd_getlength,
476
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
477
diff --git a/block/ssh.c b/block/ssh.c
478
index XXXXXXX..XXXXXXX 100644
479
--- a/block/ssh.c
480
+++ b/block/ssh.c
481
@@ -XXX,XX +XXX,XX @@ static QemuOptsList ssh_create_opts = {
482
}
483
};
484
485
-static int ssh_create(const char *filename, QemuOpts *opts, Error **errp)
486
+static int coroutine_fn ssh_co_create_opts(const char *filename, QemuOpts *opts,
487
+ Error **errp)
488
{
489
int r, ret;
490
int64_t total_size = 0;
491
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_ssh = {
492
.instance_size = sizeof(BDRVSSHState),
493
.bdrv_parse_filename = ssh_parse_filename,
494
.bdrv_file_open = ssh_file_open,
495
- .bdrv_create = ssh_create,
496
+ .bdrv_co_create_opts = ssh_co_create_opts,
497
.bdrv_close = ssh_close,
498
.bdrv_has_zero_init = ssh_has_zero_init,
499
.bdrv_co_readv = ssh_co_readv,
500
diff --git a/block/vdi.c b/block/vdi.c
501
index XXXXXXX..XXXXXXX 100644
502
--- a/block/vdi.c
503
+++ b/block/vdi.c
504
@@ -XXX,XX +XXX,XX @@ nonallocating_write:
505
return ret;
506
}
507
508
-static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
509
+static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts,
510
+ Error **errp)
511
{
512
int ret = 0;
513
uint64_t bytes = 0;
514
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vdi = {
515
.bdrv_close = vdi_close,
516
.bdrv_reopen_prepare = vdi_reopen_prepare,
517
.bdrv_child_perm = bdrv_format_default_perms,
518
- .bdrv_create = vdi_create,
519
+ .bdrv_co_create_opts = vdi_co_create_opts,
520
.bdrv_has_zero_init = bdrv_has_zero_init_1,
521
.bdrv_co_block_status = vdi_co_block_status,
522
.bdrv_make_empty = vdi_make_empty,
523
diff --git a/block/vhdx.c b/block/vhdx.c
524
index XXXXXXX..XXXXXXX 100644
525
--- a/block/vhdx.c
526
+++ b/block/vhdx.c
527
@@ -XXX,XX +XXX,XX @@ exit:
528
* .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
529
* 1MB
530
*/
531
-static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
532
+static int coroutine_fn vhdx_co_create_opts(const char *filename, QemuOpts *opts,
533
+ Error **errp)
534
{
535
int ret = 0;
536
uint64_t image_size = (uint64_t) 2 * GiB;
537
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vhdx = {
538
.bdrv_child_perm = bdrv_format_default_perms,
539
.bdrv_co_readv = vhdx_co_readv,
540
.bdrv_co_writev = vhdx_co_writev,
541
- .bdrv_create = vhdx_create,
542
+ .bdrv_co_create_opts = vhdx_co_create_opts,
543
.bdrv_get_info = vhdx_get_info,
544
.bdrv_check = vhdx_check,
545
.bdrv_has_zero_init = bdrv_has_zero_init_1,
546
diff --git a/block/vmdk.c b/block/vmdk.c
547
index XXXXXXX..XXXXXXX 100644
548
--- a/block/vmdk.c
549
+++ b/block/vmdk.c
550
@@ -XXX,XX +XXX,XX @@ static int filename_decompose(const char *filename, char *path, char *prefix,
551
return VMDK_OK;
552
}
553
554
-static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
555
+static int coroutine_fn vmdk_co_create_opts(const char *filename, QemuOpts *opts,
556
+ Error **errp)
557
{
558
int idx = 0;
559
BlockBackend *new_blk = NULL;
560
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vmdk = {
561
.bdrv_co_pwritev_compressed = vmdk_co_pwritev_compressed,
562
.bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes,
563
.bdrv_close = vmdk_close,
564
- .bdrv_create = vmdk_create,
565
+ .bdrv_co_create_opts = vmdk_co_create_opts,
566
.bdrv_co_flush_to_disk = vmdk_co_flush,
567
.bdrv_co_block_status = vmdk_co_block_status,
568
.bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
569
diff --git a/block/vpc.c b/block/vpc.c
570
index XXXXXXX..XXXXXXX 100644
571
--- a/block/vpc.c
572
+++ b/block/vpc.c
573
@@ -XXX,XX +XXX,XX @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
574
return ret;
575
}
576
577
-static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
578
+static int coroutine_fn vpc_co_create_opts(const char *filename, QemuOpts *opts,
579
+ Error **errp)
580
{
581
uint8_t buf[1024];
582
VHDFooter *footer = (VHDFooter *) buf;
583
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_vpc = {
584
.bdrv_close = vpc_close,
585
.bdrv_reopen_prepare = vpc_reopen_prepare,
586
.bdrv_child_perm = bdrv_format_default_perms,
587
- .bdrv_create = vpc_create,
588
+ .bdrv_co_create_opts = vpc_co_create_opts,
589
590
.bdrv_co_preadv = vpc_co_preadv,
591
.bdrv_co_pwritev = vpc_co_pwritev,
592
--
132
--
593
2.13.6
133
2.13.6
594
134
595
135
diff view generated by jsdifflib