1
The following changes since commit 55a19ad8b2d0797e3a8fe90ab99a9bb713824059:
1
The following changes since commit 813bac3d8d70d85cb7835f7945eb9eed84c2d8d0:
2
2
3
Update version for v2.9.0-rc1 release (2017-03-21 17:13:29 +0000)
3
Merge tag '2023q3-bsd-user-pull-request' of https://gitlab.com/bsdimp/qemu into staging (2023-08-29 08:58:00 -0400)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 600ac6a0ef5c06418446ef2f37407bddcc51b21c:
9
for you to fetch changes up to 87ec6f55af38e29be5b2b65a8acf84da73e06d06:
10
10
11
blockjob: add devops to blockjob backends (2017-03-22 13:26:27 -0400)
11
aio-posix: zero out io_uring sqe user_data (2023-08-30 07:39:59 -0400)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches for 2.9
14
Pull request
15
16
v3:
17
- Drop UFS emulation due to CI failures
18
- Add "aio-posix: zero out io_uring sqe user_data"
19
15
----------------------------------------------------------------
20
----------------------------------------------------------------
16
21
17
John Snow (3):
22
Andrey Drobyshev (3):
18
blockjob: add block_job_start_shim
23
block: add subcluster_size field to BlockDriverInfo
19
block-backend: add drained_begin / drained_end ops
24
block/io: align requests to subcluster_size
20
blockjob: add devops to blockjob backends
25
tests/qemu-iotests/197: add testcase for CoR with subclusters
21
26
22
Paolo Bonzini (1):
27
Fabiano Rosas (1):
23
blockjob: avoid recursive AioContext locking
28
block-migration: Ensure we don't crash during migration cleanup
24
29
25
block/block-backend.c | 24 ++++++++++++++--
30
Stefan Hajnoczi (1):
26
blockjob.c | 63 ++++++++++++++++++++++++++++++++----------
31
aio-posix: zero out io_uring sqe user_data
27
include/sysemu/block-backend.h | 8 ++++++
32
28
3 files changed, 79 insertions(+), 16 deletions(-)
33
include/block/block-common.h | 5 ++++
34
include/block/block-io.h | 8 +++---
35
block.c | 7 +++++
36
block/io.c | 50 ++++++++++++++++++------------------
37
block/mirror.c | 8 +++---
38
block/qcow2.c | 1 +
39
migration/block.c | 11 ++++++--
40
util/fdmon-io_uring.c | 2 ++
41
tests/qemu-iotests/197 | 29 +++++++++++++++++++++
42
tests/qemu-iotests/197.out | 24 +++++++++++++++++
43
10 files changed, 110 insertions(+), 35 deletions(-)
29
44
30
--
45
--
31
2.9.3
46
2.41.0
32
33
diff view generated by jsdifflib
New patch
1
From: Fabiano Rosas <farosas@suse.de>
1
2
3
We can fail the blk_insert_bs() at init_blk_migration(), leaving the
4
BlkMigDevState without a dirty_bitmap and BlockDriverState. Account
5
for the possibly missing elements when doing cleanup.
6
7
Fix the following crashes:
8
9
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
10
0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
11
359 BlockDriverState *bs = bitmap->bs;
12
#0 0x0000555555ec83ef in bdrv_release_dirty_bitmap (bitmap=0x0) at ../block/dirty-bitmap.c:359
13
#1 0x0000555555bba331 in unset_dirty_tracking () at ../migration/block.c:371
14
#2 0x0000555555bbad98 in block_migration_cleanup_bmds () at ../migration/block.c:681
15
16
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
17
0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
18
7073 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
19
#0 0x0000555555e971ff in bdrv_op_unblock (bs=0x0, op=BLOCK_OP_TYPE_BACKUP_SOURCE, reason=0x0) at ../block.c:7073
20
#1 0x0000555555e9734a in bdrv_op_unblock_all (bs=0x0, reason=0x0) at ../block.c:7095
21
#2 0x0000555555bbae13 in block_migration_cleanup_bmds () at ../migration/block.c:690
22
23
Signed-off-by: Fabiano Rosas <farosas@suse.de>
24
Message-id: 20230731203338.27581-1-farosas@suse.de
25
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
26
---
27
migration/block.c | 11 +++++++++--
28
1 file changed, 9 insertions(+), 2 deletions(-)
29
30
diff --git a/migration/block.c b/migration/block.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/migration/block.c
33
+++ b/migration/block.c
34
@@ -XXX,XX +XXX,XX @@ static void unset_dirty_tracking(void)
35
BlkMigDevState *bmds;
36
37
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
38
- bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
39
+ if (bmds->dirty_bitmap) {
40
+ bdrv_release_dirty_bitmap(bmds->dirty_bitmap);
41
+ }
42
}
43
}
44
45
@@ -XXX,XX +XXX,XX @@ static int64_t get_remaining_dirty(void)
46
static void block_migration_cleanup_bmds(void)
47
{
48
BlkMigDevState *bmds;
49
+ BlockDriverState *bs;
50
AioContext *ctx;
51
52
unset_dirty_tracking();
53
54
while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
55
QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
56
- bdrv_op_unblock_all(blk_bs(bmds->blk), bmds->blocker);
57
+
58
+ bs = blk_bs(bmds->blk);
59
+ if (bs) {
60
+ bdrv_op_unblock_all(bs, bmds->blocker);
61
+ }
62
error_free(bmds->blocker);
63
64
/* Save ctx, because bmds->blk can disappear during blk_unref. */
65
--
66
2.41.0
diff view generated by jsdifflib
1
From: John Snow <jsnow@redhat.com>
1
From: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
2
2
3
This lets us hook into drained_begin and drained_end requests from the
3
This is going to be used in the subsequent commit as requests alignment
4
backend level, which is particularly useful for making sure that all
4
(in particular, during copy-on-read). This value only makes sense for
5
jobs associated with a particular node (whether the source or the target)
5
the formats which support subclusters (currently QCOW2 only). If this
6
receive a drain request.
6
field isn't set by driver's own bdrv_get_info() implementation, we
7
simply set it equal to the cluster size thus treating each cluster as
8
having a single subcluster.
7
9
8
Suggested-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Signed-off-by: John Snow <jsnow@redhat.com>
11
Reviewed-by: Denis V. Lunev <den@openvz.org>
10
Reviewed-by: Jeff Cody <jcody@redhat.com>
12
Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
11
Message-id: 20170316212351.13797-4-jsnow@redhat.com
13
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
12
Signed-off-by: Jeff Cody <jcody@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Message-ID: <20230711172553.234055-2-andrey.drobyshev@virtuozzo.com>
13
---
16
---
14
blockjob.c | 29 ++++++++++++++++++++++++-----
17
include/block/block-common.h | 5 +++++
15
1 file changed, 24 insertions(+), 5 deletions(-)
18
block.c | 7 +++++++
19
block/qcow2.c | 1 +
20
3 files changed, 13 insertions(+)
16
21
17
diff --git a/blockjob.c b/blockjob.c
22
diff --git a/include/block/block-common.h b/include/block/block-common.h
18
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
19
--- a/blockjob.c
24
--- a/include/block/block-common.h
20
+++ b/blockjob.c
25
+++ b/include/block/block-common.h
21
@@ -XXX,XX +XXX,XX @@ static const BdrvChildRole child_job = {
26
@@ -XXX,XX +XXX,XX @@ typedef struct BlockZoneWps {
22
.stay_at_node = true,
27
typedef struct BlockDriverInfo {
23
};
28
/* in bytes, 0 if irrelevant */
24
29
int cluster_size;
25
+static void block_job_drained_begin(void *opaque)
30
+ /*
26
+{
31
+ * A fraction of cluster_size, if supported (currently QCOW2 only); if
27
+ BlockJob *job = opaque;
32
+ * disabled or unsupported, set equal to cluster_size.
28
+ block_job_pause(job);
33
+ */
29
+}
34
+ int subcluster_size;
30
+
35
/* offset at which the VM state can be saved (0 if not possible) */
31
+static void block_job_drained_end(void *opaque)
36
int64_t vm_state_offset;
32
+{
37
bool is_dirty;
33
+ BlockJob *job = opaque;
38
diff --git a/block.c b/block.c
34
+ block_job_resume(job);
39
index XXXXXXX..XXXXXXX 100644
35
+}
40
--- a/block.c
36
+
41
+++ b/block.c
37
+static const BlockDevOps block_job_dev_ops = {
42
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
38
+ .drained_begin = block_job_drained_begin,
43
}
39
+ .drained_end = block_job_drained_end,
44
memset(bdi, 0, sizeof(*bdi));
40
+};
45
ret = drv->bdrv_co_get_info(bs, bdi);
41
+
46
+ if (bdi->subcluster_size == 0) {
42
BlockJob *block_job_next(BlockJob *job)
47
+ /*
48
+ * If the driver left this unset, subclusters are not supported.
49
+ * Then it is safe to treat each cluster as having only one subcluster.
50
+ */
51
+ bdi->subcluster_size = bdi->cluster_size;
52
+ }
53
if (ret < 0) {
54
return ret;
55
}
56
diff --git a/block/qcow2.c b/block/qcow2.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/block/qcow2.c
59
+++ b/block/qcow2.c
60
@@ -XXX,XX +XXX,XX @@ qcow2_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
43
{
61
{
44
if (!job) {
62
BDRVQcow2State *s = bs->opaque;
45
@@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
63
bdi->cluster_size = s->cluster_size;
46
}
64
+ bdi->subcluster_size = s->subcluster_size;
47
65
bdi->vm_state_offset = qcow2_vm_state_offset(s);
48
job = g_malloc0(driver->instance_size);
66
bdi->is_dirty = s->incompatible_features & QCOW2_INCOMPAT_DIRTY;
49
- error_setg(&job->blocker, "block device is in use by block job: %s",
67
return 0;
50
- BlockJobType_lookup[driver->job_type]);
51
- block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
52
- bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
53
-
54
job->driver = driver;
55
job->id = g_strdup(job_id);
56
job->blk = blk;
57
@@ -XXX,XX +XXX,XX @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
58
job->paused = true;
59
job->pause_count = 1;
60
job->refcnt = 1;
61
+
62
+ error_setg(&job->blocker, "block device is in use by block job: %s",
63
+ BlockJobType_lookup[driver->job_type]);
64
+ block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
65
bs->job = job;
66
67
+ blk_set_dev_ops(blk, &block_job_dev_ops, job);
68
+ bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
69
+
70
QLIST_INSERT_HEAD(&block_jobs, job, job_list);
71
72
blk_add_aio_context_notifier(blk, block_job_attached_aio_context,
73
--
68
--
74
2.9.3
69
2.41.0
75
76
diff view generated by jsdifflib
1
From: John Snow <jsnow@redhat.com>
1
From: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
2
2
3
Allow block backends to forward drain requests to their devices/users.
3
When target image is using subclusters, and we align the request during
4
The initial intended purpose for this patch is to allow BBs to forward
4
copy-on-read, it makes sense to align to subcluster_size rather than
5
requests along to BlockJobs, which will want to pause if their associated
5
cluster_size. Otherwise we end up with unnecessary allocations.
6
BB has entered a drained region.
6
7
7
This commit renames bdrv_round_to_clusters() to bdrv_round_to_subclusters()
8
Signed-off-by: John Snow <jsnow@redhat.com>
8
and utilizes subcluster_size field of BlockDriverInfo to make necessary
9
Reviewed-by: Jeff Cody <jcody@redhat.com>
9
alignments. It affects copy-on-read as well as mirror job (which is
10
Message-id: 20170316212351.13797-3-jsnow@redhat.com
10
using bdrv_round_to_clusters()).
11
Signed-off-by: Jeff Cody <jcody@redhat.com>
11
12
This change also fixes the following bug with failing assert (covered by
13
the test in the subsequent commit):
14
15
qemu-img create -f qcow2 base.qcow2 64K
16
qemu-img create -f qcow2 -o extended_l2=on,backing_file=base.qcow2,backing_fmt=qcow2 img.qcow2 64K
17
qemu-io -c "write -P 0xaa 0 2K" img.qcow2
18
qemu-io -C -c "read -P 0x00 2K 62K" img.qcow2
19
20
qemu-io: ../block/io.c:1236: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed.
21
22
Reviewed-by: Eric Blake <eblake@redhat.com>
23
Reviewed-by: Denis V. Lunev <den@openvz.org>
24
Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
25
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
26
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
27
Message-ID: <20230711172553.234055-3-andrey.drobyshev@virtuozzo.com>
12
---
28
---
13
block/block-backend.c | 24 ++++++++++++++++++++++--
29
include/block/block-io.h | 8 +++----
14
include/sysemu/block-backend.h | 8 ++++++++
30
block/io.c | 50 ++++++++++++++++++++--------------------
15
2 files changed, 30 insertions(+), 2 deletions(-)
31
block/mirror.c | 8 +++----
16
32
3 files changed, 33 insertions(+), 33 deletions(-)
17
diff --git a/block/block-backend.c b/block/block-backend.c
33
34
diff --git a/include/block/block-io.h b/include/block/block-io.h
18
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
19
--- a/block/block-backend.c
36
--- a/include/block/block-io.h
20
+++ b/block/block-backend.c
37
+++ b/include/block/block-io.h
21
@@ -XXX,XX +XXX,XX @@ struct BlockBackend {
38
@@ -XXX,XX +XXX,XX @@ bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
22
bool allow_write_beyond_eof;
39
ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
23
40
Error **errp);
24
NotifierList remove_bs_notifiers, insert_bs_notifiers;
41
BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
25
+
42
-void bdrv_round_to_clusters(BlockDriverState *bs,
26
+ int quiesce_counter;
43
- int64_t offset, int64_t bytes,
27
};
44
- int64_t *cluster_offset,
28
45
- int64_t *cluster_bytes);
29
typedef struct BlockBackendAIOCB {
46
+void bdrv_round_to_subclusters(BlockDriverState *bs,
30
@@ -XXX,XX +XXX,XX @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
47
+ int64_t offset, int64_t bytes,
31
void *opaque)
48
+ int64_t *cluster_offset,
49
+ int64_t *cluster_bytes);
50
51
void bdrv_get_backing_filename(BlockDriverState *bs,
52
char *filename, int filename_size);
53
diff --git a/block/io.c b/block/io.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/block/io.c
56
+++ b/block/io.c
57
@@ -XXX,XX +XXX,XX @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
58
}
59
60
/**
61
- * Round a region to cluster boundaries
62
+ * Round a region to subcluster (if supported) or cluster boundaries
63
*/
64
void coroutine_fn GRAPH_RDLOCK
65
-bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
66
- int64_t *cluster_offset, int64_t *cluster_bytes)
67
+bdrv_round_to_subclusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
68
+ int64_t *align_offset, int64_t *align_bytes)
32
{
69
{
33
/* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
70
BlockDriverInfo bdi;
34
- * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
71
IO_CODE();
35
- * is set. */
72
- if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
36
+ * it that way, so we can assume blk->dev, if present, is a DeviceState if
73
- *cluster_offset = offset;
37
+ * blk->dev_ops is set. Non-device users may use dev_ops without device. */
74
- *cluster_bytes = bytes;
38
assert(!blk->legacy_dev);
75
+ if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.subcluster_size == 0) {
39
76
+ *align_offset = offset;
40
blk->dev_ops = ops;
77
+ *align_bytes = bytes;
41
blk->dev_opaque = opaque;
78
} else {
42
+
79
- int64_t c = bdi.cluster_size;
43
+ /* Are we currently quiesced? Should we enforce this right now? */
80
- *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
44
+ if (blk->quiesce_counter && ops->drained_begin) {
81
- *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
45
+ ops->drained_begin(opaque);
82
+ int64_t c = bdi.subcluster_size;
46
+ }
83
+ *align_offset = QEMU_ALIGN_DOWN(offset, c);
84
+ *align_bytes = QEMU_ALIGN_UP(offset - *align_offset + bytes, c);
85
}
47
}
86
}
48
87
49
/*
88
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
50
@@ -XXX,XX +XXX,XX @@ static void blk_root_drained_begin(BdrvChild *child)
89
void *bounce_buffer = NULL;
51
{
90
52
BlockBackend *blk = child->opaque;
91
BlockDriver *drv = bs->drv;
53
92
- int64_t cluster_offset;
54
+ if (++blk->quiesce_counter == 1) {
93
- int64_t cluster_bytes;
55
+ if (blk->dev_ops && blk->dev_ops->drained_begin) {
94
+ int64_t align_offset;
56
+ blk->dev_ops->drained_begin(blk->dev_opaque);
95
+ int64_t align_bytes;
57
+ }
96
int64_t skip_bytes;
58
+ }
97
int ret;
59
+
98
int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
60
/* Note that blk->root may not be accessible here yet if we are just
99
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
61
* attaching to a BlockDriverState that is drained. Use child instead. */
100
* BDRV_REQUEST_MAX_BYTES (even when the original read did not), which
62
101
* is one reason we loop rather than doing it all at once.
63
@@ -XXX,XX +XXX,XX @@ static void blk_root_drained_begin(BdrvChild *child)
102
*/
64
static void blk_root_drained_end(BdrvChild *child)
103
- bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
65
{
104
- skip_bytes = offset - cluster_offset;
66
BlockBackend *blk = child->opaque;
105
+ bdrv_round_to_subclusters(bs, offset, bytes, &align_offset, &align_bytes);
67
+ assert(blk->quiesce_counter);
106
+ skip_bytes = offset - align_offset;
68
107
69
assert(blk->public.io_limits_disabled);
108
trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
70
--blk->public.io_limits_disabled;
109
- cluster_offset, cluster_bytes);
71
+
110
+ align_offset, align_bytes);
72
+ if (--blk->quiesce_counter == 0) {
111
73
+ if (blk->dev_ops && blk->dev_ops->drained_end) {
112
- while (cluster_bytes) {
74
+ blk->dev_ops->drained_end(blk->dev_opaque);
113
+ while (align_bytes) {
75
+ }
114
int64_t pnum;
76
+ }
115
77
}
116
if (skip_write) {
78
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
117
ret = 1; /* "already allocated", so nothing will be copied */
118
- pnum = MIN(cluster_bytes, max_transfer);
119
+ pnum = MIN(align_bytes, max_transfer);
120
} else {
121
- ret = bdrv_is_allocated(bs, cluster_offset,
122
- MIN(cluster_bytes, max_transfer), &pnum);
123
+ ret = bdrv_is_allocated(bs, align_offset,
124
+ MIN(align_bytes, max_transfer), &pnum);
125
if (ret < 0) {
126
/*
127
* Safe to treat errors in querying allocation as if
128
* unallocated; we'll probably fail again soon on the
129
* read, but at least that will set a decent errno.
130
*/
131
- pnum = MIN(cluster_bytes, max_transfer);
132
+ pnum = MIN(align_bytes, max_transfer);
133
}
134
135
/* Stop at EOF if the image ends in the middle of the cluster */
136
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
137
/* Must copy-on-read; use the bounce buffer */
138
pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
139
if (!bounce_buffer) {
140
- int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
141
+ int64_t max_we_need = MAX(pnum, align_bytes - pnum);
142
int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
143
int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
144
145
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
146
}
147
qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
148
149
- ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
150
+ ret = bdrv_driver_preadv(bs, align_offset, pnum,
151
&local_qiov, 0, 0);
152
if (ret < 0) {
153
goto err;
154
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
155
/* FIXME: Should we (perhaps conditionally) be setting
156
* BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
157
* that still correctly reads as zero? */
158
- ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
159
+ ret = bdrv_co_do_pwrite_zeroes(bs, align_offset, pnum,
160
BDRV_REQ_WRITE_UNCHANGED);
161
} else {
162
/* This does not change the data on the disk, it is not
163
* necessary to flush even in cache=writethrough mode.
164
*/
165
- ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
166
+ ret = bdrv_driver_pwritev(bs, align_offset, pnum,
167
&local_qiov, 0,
168
BDRV_REQ_WRITE_UNCHANGED);
169
}
170
@@ -XXX,XX +XXX,XX @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
171
}
172
}
173
174
- cluster_offset += pnum;
175
- cluster_bytes -= pnum;
176
+ align_offset += pnum;
177
+ align_bytes -= pnum;
178
progress += pnum - skip_bytes;
179
skip_bytes = 0;
180
}
181
diff --git a/block/mirror.c b/block/mirror.c
79
index XXXXXXX..XXXXXXX 100644
182
index XXXXXXX..XXXXXXX 100644
80
--- a/include/sysemu/block-backend.h
183
--- a/block/mirror.c
81
+++ b/include/sysemu/block-backend.h
184
+++ b/block/mirror.c
82
@@ -XXX,XX +XXX,XX @@ typedef struct BlockDevOps {
185
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
83
* Runs when the size changed (e.g. monitor command block_resize)
186
need_cow |= !test_bit((*offset + *bytes - 1) / s->granularity,
84
*/
187
s->cow_bitmap);
85
void (*resize_cb)(void *opaque);
188
if (need_cow) {
86
+ /*
189
- bdrv_round_to_clusters(blk_bs(s->target), *offset, *bytes,
87
+ * Runs when the backend receives a drain request.
190
- &align_offset, &align_bytes);
88
+ */
191
+ bdrv_round_to_subclusters(blk_bs(s->target), *offset, *bytes,
89
+ void (*drained_begin)(void *opaque);
192
+ &align_offset, &align_bytes);
90
+ /*
193
}
91
+ * Runs when the backend's last drain request ends.
194
92
+ */
195
if (align_bytes > max_bytes) {
93
+ void (*drained_end)(void *opaque);
196
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
94
} BlockDevOps;
197
int64_t target_offset;
95
198
int64_t target_bytes;
96
/* This struct is embedded in (the private) BlockBackend struct and contains
199
WITH_GRAPH_RDLOCK_GUARD() {
200
- bdrv_round_to_clusters(blk_bs(s->target), offset, io_bytes,
201
- &target_offset, &target_bytes);
202
+ bdrv_round_to_subclusters(blk_bs(s->target), offset, io_bytes,
203
+ &target_offset, &target_bytes);
204
}
205
if (target_offset == offset &&
206
target_bytes == io_bytes) {
97
--
207
--
98
2.9.3
208
2.41.0
99
100
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
2
2
3
Streaming or any other block job hangs when performed on a block device
3
Add testcase which checks that allocations during copy-on-read are
4
that has a non-default iothread. This happens because the AioContext
4
performed on the subcluster basis when subclusters are enabled in target
5
is acquired twice by block_job_defer_to_main_loop_bh and then released
5
image.
6
only once by BDRV_POLL_WHILE. (Insert rants on recursive mutexes, which
7
unfortunately are a temporary but necessary evil for iothreads at the
8
moment).
9
6
10
Luckily, the reason for the double acquisition is simple; the function
7
This testcase also triggers the following assert with previous commit
11
acquires the AioContext for both the job iothread and the BDS iothread,
8
not being applied, so we check that as well:
12
in case the BDS iothread was changed while the job was running. It
13
is therefore enough to skip the second acquisition when the two
14
AioContexts are one and the same.
15
9
16
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
qemu-io: ../block/io.c:1236: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed.
11
17
Reviewed-by: Eric Blake <eblake@redhat.com>
12
Reviewed-by: Eric Blake <eblake@redhat.com>
18
Reviewed-by: Jeff Cody <jcody@redhat.com>
13
Reviewed-by: Denis V. Lunev <den@openvz.org>
19
Message-id: 1490118490-5597-1-git-send-email-pbonzini@redhat.com
14
Signed-off-by: Andrey Drobyshev <andrey.drobyshev@virtuozzo.com>
20
Signed-off-by: Jeff Cody <jcody@redhat.com>
15
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Message-ID: <20230711172553.234055-4-andrey.drobyshev@virtuozzo.com>
21
---
18
---
22
blockjob.c | 8 ++++++--
19
tests/qemu-iotests/197 | 29 +++++++++++++++++++++++++++++
23
1 file changed, 6 insertions(+), 2 deletions(-)
20
tests/qemu-iotests/197.out | 24 ++++++++++++++++++++++++
21
2 files changed, 53 insertions(+)
24
22
25
diff --git a/blockjob.c b/blockjob.c
23
diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
24
index XXXXXXX..XXXXXXX 100755
25
--- a/tests/qemu-iotests/197
26
+++ b/tests/qemu-iotests/197
27
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -f qcow2 -C -c 'read 0 1024' "$TEST_WRAP" | _filter_qemu_io
28
$QEMU_IO -f qcow2 -c map "$TEST_WRAP"
29
_check_test_img
30
31
+echo
32
+echo '=== Copy-on-read with subclusters ==='
33
+echo
34
+
35
+# Create base and top images 64K (1 cluster) each. Make subclusters enabled
36
+# for the top image
37
+_make_test_img 64K
38
+IMGPROTO=file IMGFMT=qcow2 TEST_IMG_FILE="$TEST_WRAP" \
39
+ _make_test_img --no-opts -o extended_l2=true -F "$IMGFMT" -b "$TEST_IMG" \
40
+ 64K | _filter_img_create
41
+
42
+$QEMU_IO -c "write -P 0xaa 0 64k" "$TEST_IMG" | _filter_qemu_io
43
+
44
+# Allocate individual subclusters in the top image, and not the whole cluster
45
+$QEMU_IO -c "write -P 0xbb 28K 2K" -c "write -P 0xcc 34K 2K" "$TEST_WRAP" \
46
+ | _filter_qemu_io
47
+
48
+# Only 2 subclusters should be allocated in the top image at this point
49
+$QEMU_IMG map "$TEST_WRAP" | _filter_qemu_img_map
50
+
51
+# Actual copy-on-read operation
52
+$QEMU_IO -C -c "read -P 0xaa 30K 4K" "$TEST_WRAP" | _filter_qemu_io
53
+
54
+# And here we should have 4 subclusters allocated right in the middle of the
55
+# top image. Make sure the whole cluster remains unallocated
56
+$QEMU_IMG map "$TEST_WRAP" | _filter_qemu_img_map
57
+
58
+_check_test_img
59
+
60
# success, all done
61
echo '*** done'
62
status=0
63
diff --git a/tests/qemu-iotests/197.out b/tests/qemu-iotests/197.out
26
index XXXXXXX..XXXXXXX 100644
64
index XXXXXXX..XXXXXXX 100644
27
--- a/blockjob.c
65
--- a/tests/qemu-iotests/197.out
28
+++ b/blockjob.c
66
+++ b/tests/qemu-iotests/197.out
29
@@ -XXX,XX +XXX,XX @@ static void block_job_defer_to_main_loop_bh(void *opaque)
67
@@ -XXX,XX +XXX,XX @@ read 1024/1024 bytes at offset 0
30
68
1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
31
/* Fetch BDS AioContext again, in case it has changed */
69
1 KiB (0x400) bytes allocated at offset 0 bytes (0x0)
32
aio_context = blk_get_aio_context(data->job->blk);
70
No errors were found on the image.
33
- aio_context_acquire(aio_context);
71
+
34
+ if (aio_context != data->aio_context) {
72
+=== Copy-on-read with subclusters ===
35
+ aio_context_acquire(aio_context);
73
+
36
+ }
74
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65536
37
75
+Formatting 'TEST_DIR/t.wrap.IMGFMT', fmt=IMGFMT size=65536 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
38
data->job->deferred_to_main_loop = false;
76
+wrote 65536/65536 bytes at offset 0
39
data->fn(data->job, data->opaque);
77
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
40
78
+wrote 2048/2048 bytes at offset 28672
41
- aio_context_release(aio_context);
79
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
42
+ if (aio_context != data->aio_context) {
80
+wrote 2048/2048 bytes at offset 34816
43
+ aio_context_release(aio_context);
81
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
44
+ }
82
+Offset Length File
45
83
+0 0x7000 TEST_DIR/t.IMGFMT
46
aio_context_release(data->aio_context);
84
+0x7000 0x800 TEST_DIR/t.wrap.IMGFMT
47
85
+0x7800 0x1000 TEST_DIR/t.IMGFMT
86
+0x8800 0x800 TEST_DIR/t.wrap.IMGFMT
87
+0x9000 0x7000 TEST_DIR/t.IMGFMT
88
+read 4096/4096 bytes at offset 30720
89
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
90
+Offset Length File
91
+0 0x7000 TEST_DIR/t.IMGFMT
92
+0x7000 0x2000 TEST_DIR/t.wrap.IMGFMT
93
+0x9000 0x7000 TEST_DIR/t.IMGFMT
94
+No errors were found on the image.
95
*** done
48
--
96
--
49
2.9.3
97
2.41.0
50
51
diff view generated by jsdifflib
1
From: John Snow <jsnow@redhat.com>
1
liburing does not clear sqe->user_data. We must do it ourselves to avoid
2
undefined behavior in process_cqe() when user_data is used.
2
3
3
The purpose of this shim is to allow us to pause pre-started jobs.
4
Note that fdmon-io_uring is currently disabled, so this is a latent bug
4
The purpose of *that* is to allow us to buffer a pause request that
5
that does not affect users. Let's merge this fix now to make it easier
5
will be able to take effect before the job ever does any work, allowing
6
to enable fdmon-io_uring in the future (and I'm working on that).
6
us to create jobs during a quiescent state (under which they will be
7
automatically paused), then resuming the jobs after the critical section
8
in any order, either:
9
7
10
(1) -block_job_start
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
-block_job_resume (via e.g. drained_end)
9
Message-ID: <20230426212639.82310-1-stefanha@redhat.com>
10
---
11
util/fdmon-io_uring.c | 2 ++
12
1 file changed, 2 insertions(+)
12
13
13
(2) -block_job_resume (via e.g. drained_end)
14
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
14
-block_job_start
15
16
The problem that requires a startup wrapper is the idea that a job must
17
start in the busy=true state only its first time-- all subsequent entries
18
require busy to be false, and the toggling of this state is otherwise
19
handled during existing pause and yield points.
20
21
The wrapper simply allows us to mandate that a job can "start," set busy
22
to true, then immediately pause only if necessary. We could avoid
23
requiring a wrapper, but all jobs would need to do it, so it's been
24
factored out here.
25
26
Signed-off-by: John Snow <jsnow@redhat.com>
27
Reviewed-by: Jeff Cody <jcody@redhat.com>
28
Message-id: 20170316212351.13797-2-jsnow@redhat.com
29
Signed-off-by: Jeff Cody <jcody@redhat.com>
30
---
31
blockjob.c | 26 +++++++++++++++++++-------
32
1 file changed, 19 insertions(+), 7 deletions(-)
33
34
diff --git a/blockjob.c b/blockjob.c
35
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
36
--- a/blockjob.c
16
--- a/util/fdmon-io_uring.c
37
+++ b/blockjob.c
17
+++ b/util/fdmon-io_uring.c
38
@@ -XXX,XX +XXX,XX @@ static bool block_job_started(BlockJob *job)
18
@@ -XXX,XX +XXX,XX @@ static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node)
39
return job->co;
19
#else
20
io_uring_prep_poll_remove(sqe, node);
21
#endif
22
+ io_uring_sqe_set_data(sqe, NULL);
40
}
23
}
41
24
42
+/**
25
/* Add a timeout that self-cancels when another cqe becomes ready */
43
+ * All jobs must allow a pause point before entering their job proper. This
26
@@ -XXX,XX +XXX,XX @@ static void add_timeout_sqe(AioContext *ctx, int64_t ns)
44
+ * ensures that jobs can be paused prior to being started, then resumed later.
27
45
+ */
28
sqe = get_sqe(ctx);
46
+static void coroutine_fn block_job_co_entry(void *opaque)
29
io_uring_prep_timeout(sqe, &ts, 1, 0);
47
+{
30
+ io_uring_sqe_set_data(sqe, NULL);
48
+ BlockJob *job = opaque;
49
+
50
+ assert(job && job->driver && job->driver->start);
51
+ block_job_pause_point(job);
52
+ job->driver->start(job);
53
+}
54
+
55
void block_job_start(BlockJob *job)
56
{
57
assert(job && !block_job_started(job) && job->paused &&
58
- !job->busy && job->driver->start);
59
- job->co = qemu_coroutine_create(job->driver->start, job);
60
- if (--job->pause_count == 0) {
61
- job->paused = false;
62
- job->busy = true;
63
- qemu_coroutine_enter(job->co);
64
- }
65
+ job->driver && job->driver->start);
66
+ job->co = qemu_coroutine_create(block_job_co_entry, job);
67
+ job->pause_count--;
68
+ job->busy = true;
69
+ job->paused = false;
70
+ qemu_coroutine_enter(job->co);
71
}
31
}
72
32
73
void block_job_ref(BlockJob *job)
33
/* Add sqes from ctx->submit_list for submission */
74
--
34
--
75
2.9.3
35
2.41.0
76
77
diff view generated by jsdifflib