1
The following changes since commit 1b8c45899715d292398152ba97ef755ccaf84680:
1
The following changes since commit 825b96dbcee23d134b691fc75618b59c5f53da32:
2
2
3
Merge remote-tracking branch 'remotes/dgilbert/tags/pull-migration-20200507a' into staging (2020-05-07 18:43:20 +0100)
3
Merge tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu into staging (2025-03-11 09:32:07 +0800)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
7
https://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to 47e0b38a13935cb666f88964c3096654092f42d6:
9
for you to fetch changes up to a93c04f3cbe690877b3297a9df4767aa811fcd97:
10
10
11
block: Drop unused .bdrv_has_zero_init_truncate (2020-05-08 13:26:35 +0200)
11
virtio-scsi: only expose cmd vqs via iothread-vq-mapping (2025-03-11 15:49:22 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches:
14
Block layer patches
15
15
16
- qcow2: Fix preallocation on block devices
16
- virtio-scsi: add iothread-vq-mapping parameter
17
- backup: Make sure that source and target size match
17
- Improve writethrough performance
18
- vmdk: Fix zero cluster handling
18
- Fix missing zero init in bdrv_snapshot_goto()
19
- Follow-up cleanups and fixes for the truncate changes
19
- Code cleanup and iotests fixes
20
- iotests: Skip more tests if required drivers are missing
21
20
22
----------------------------------------------------------------
21
----------------------------------------------------------------
23
Alberto Garcia (1):
22
Kevin Wolf (8):
24
qcow2: Avoid integer wraparound in qcow2_co_truncate()
23
block: Remove unused blk_op_is_blocked()
24
block: Zero block driver state before reopening
25
file-posix: Support FUA writes
26
block/io: Ignore FUA with cache.no-flush=on
27
aio: Create AioPolledEvent
28
aio-posix: Factor out adjust_polling_time()
29
aio-posix: Separate AioPolledEvent per AioHandler
30
aio-posix: Adjust polling time also for new handlers
25
31
26
Eric Blake (9):
32
Stefan Hajnoczi (13):
27
gluster: Drop useless has_zero_init callback
33
scsi-disk: drop unused SCSIDiskState->bh field
28
file-win32: Support BDRV_REQ_ZERO_WRITE for truncate
34
dma: use current AioContext for dma_blk_io()
29
nfs: Support BDRV_REQ_ZERO_WRITE for truncate
35
scsi: track per-SCSIRequest AioContext
30
rbd: Support BDRV_REQ_ZERO_WRITE for truncate
36
scsi: introduce requests_lock
31
sheepdog: Support BDRV_REQ_ZERO_WRITE for truncate
37
virtio-scsi: introduce event and ctrl virtqueue locks
32
ssh: Support BDRV_REQ_ZERO_WRITE for truncate
38
virtio-scsi: protect events_dropped field
33
parallels: Rework truncation logic
39
virtio-scsi: perform TMFs in appropriate AioContexts
34
vhdx: Rework truncation logic
40
virtio-blk: extract cleanup_iothread_vq_mapping() function
35
block: Drop unused .bdrv_has_zero_init_truncate
41
virtio-blk: tidy up iothread_vq_mapping functions
42
virtio: extract iothread-vq-mapping.h API
43
virtio-scsi: add iothread-vq-mapping parameter
44
virtio-scsi: handle ctrl virtqueue in main loop
45
virtio-scsi: only expose cmd vqs via iothread-vq-mapping
36
46
37
Kevin Wolf (11):
47
Thomas Huth (1):
38
vmdk: Rename VmdkMetaData.valid to new_allocation
48
iotests: Limit qsd-migrate to working formats
39
vmdk: Fix zero cluster allocation
40
vmdk: Fix partial overwrite of zero cluster
41
vmdk: Don't update L2 table for zero write on zero cluster
42
vmdk: Flush only once in vmdk_L2update()
43
iotests: vmdk: Enable zeroed_grained=on by default
44
iotests/283: Use consistent size for source and target
45
backup: Improve error for bdrv_getlength() failure
46
backup: Make sure that source and target size match
47
iotests: Backup with different source/target size
48
iotests/055: Use cache.no-flush for vmdk target
49
49
50
Max Reitz (1):
50
include/block/aio.h | 5 +-
51
qcow2: Fix preallocation on block devices
51
include/block/raw-aio.h | 8 +-
52
52
include/hw/scsi/scsi.h | 8 +-
53
Vladimir Sementsov-Ogievskiy (8):
53
include/hw/virtio/iothread-vq-mapping.h | 45 +++
54
iotests: handle tmpfs
54
include/hw/virtio/virtio-scsi.h | 15 +-
55
iotests/082: require bochs
55
include/system/block-backend-global-state.h | 1 -
56
iotests/148: use skip_if_unsupported
56
include/system/dma.h | 3 +-
57
iotests/041: drop self.assert_no_active_block_jobs()
57
util/aio-posix.h | 1 +
58
iotests/055: refactor compressed backup to vmdk
58
block/block-backend.c | 12 -
59
iotests/055: skip vmdk target tests if vmdk is not whitelisted
59
block/file-posix.c | 26 +-
60
iotests/109: mark required formats as required to support whitelisting
60
block/io.c | 4 +
61
iotests/113: mark bochs as required to support whitelisting
61
block/io_uring.c | 13 +-
62
62
block/linux-aio.c | 24 +-
63
include/block/block.h | 1 -
63
block/snapshot.c | 1 +
64
include/block/block_int.h | 7 ---
64
hw/block/virtio-blk.c | 132 +-------
65
block.c | 21 --------
65
hw/ide/core.c | 3 +-
66
block/backup-top.c | 14 +++--
66
hw/ide/macio.c | 3 +-
67
block/backup.c | 18 +++++--
67
hw/scsi/scsi-bus.c | 121 +++++--
68
block/file-posix.c | 1 -
68
hw/scsi/scsi-disk.c | 24 +-
69
block/file-win32.c | 4 +-
69
hw/scsi/virtio-scsi-dataplane.c | 103 ++++--
70
block/gluster.c | 14 -----
70
hw/scsi/virtio-scsi.c | 502 ++++++++++++++++------------
71
block/nfs.c | 4 +-
71
hw/virtio/iothread-vq-mapping.c | 131 ++++++++
72
block/parallels.c | 25 +++++----
72
system/dma-helpers.c | 8 +-
73
block/qcow2.c | 23 ++++++---
73
util/aio-posix.c | 114 ++++---
74
block/qed.c | 1 -
74
util/async.c | 1 -
75
block/raw-format.c | 6 ---
75
hw/virtio/meson.build | 1 +
76
block/rbd.c | 4 +-
76
meson.build | 4 +
77
block/sheepdog.c | 4 +-
77
tests/qemu-iotests/tests/qsd-migrate | 2 +-
78
block/ssh.c | 5 +-
78
28 files changed, 803 insertions(+), 512 deletions(-)
79
block/vhdx.c | 89 ++++++++++++++++++--------------
79
create mode 100644 include/hw/virtio/iothread-vq-mapping.h
80
block/vmdk.c | 47 ++++++++++-------
80
create mode 100644 hw/virtio/iothread-vq-mapping.c
81
tests/qemu-iotests/041 | 8 ---
82
tests/qemu-iotests/055 | 120 ++++++++++++++++++++++++++++++-------------
83
tests/qemu-iotests/055.out | 4 +-
84
tests/qemu-iotests/059 | 6 +--
85
tests/qemu-iotests/082 | 1 +
86
tests/qemu-iotests/091 | 2 +-
87
tests/qemu-iotests/109 | 1 +
88
tests/qemu-iotests/113 | 4 +-
89
tests/qemu-iotests/148 | 1 +
90
tests/qemu-iotests/283 | 6 ++-
91
tests/qemu-iotests/283.out | 2 +-
92
tests/qemu-iotests/292 | 73 ++++++++++++++++++++++++++
93
tests/qemu-iotests/292.out | 24 +++++++++
94
tests/qemu-iotests/check | 3 ++
95
tests/qemu-iotests/common.rc | 37 ++++++++++++-
96
tests/qemu-iotests/group | 1 +
97
34 files changed, 386 insertions(+), 195 deletions(-)
98
create mode 100755 tests/qemu-iotests/292
99
create mode 100644 tests/qemu-iotests/292.out
100
101
diff view generated by jsdifflib
1
055 uses the backup block job to create a compressed backup of an
1
Commit fc4e394b28 removed the last caller of blk_op_is_blocked(). Remove
2
$IMGFMT image with both qcow2 and vmdk targets. However, cluster
2
the now unused function.
3
allocation in vmdk is very slow because it flushes the image file after
4
each L2 update.
5
6
There is no reason why we need this level of safety in this test, so
7
let's disable flushes for vmdk. For the blockdev-backup tests this is
8
achieved by simply adding the cache.no-flush=on to the drive_add() for
9
the target. For drive-backup, the caching flags are copied from the
10
source node, so we'll also add the flag to the source node, even though
11
it is not vmdk.
12
13
This can make the test run significantly faster (though it doesn't make
14
a difference on tmpfs). In my usual setup it goes from ~45s to ~15s.
15
3
16
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
17
Message-Id: <20200505064618.16267-1-kwolf@redhat.com>
5
Message-ID: <20250206165331.379033-1-kwolf@redhat.com>
18
Reviewed-by: Eric Blake <eblake@redhat.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
---
9
---
21
tests/qemu-iotests/055 | 11 +++++++----
10
include/system/block-backend-global-state.h | 1 -
22
1 file changed, 7 insertions(+), 4 deletions(-)
11
block/block-backend.c | 12 ------------
12
2 files changed, 13 deletions(-)
23
13
24
diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
14
diff --git a/include/system/block-backend-global-state.h b/include/system/block-backend-global-state.h
25
index XXXXXXX..XXXXXXX 100755
15
index XXXXXXX..XXXXXXX 100644
26
--- a/tests/qemu-iotests/055
16
--- a/include/system/block-backend-global-state.h
27
+++ b/tests/qemu-iotests/055
17
+++ b/include/system/block-backend-global-state.h
28
@@ -XXX,XX +XXX,XX @@ class TestSingleTransaction(iotests.QMPTestCase):
18
@@ -XXX,XX +XXX,XX @@ bool blk_supports_write_perm(BlockBackend *blk);
29
19
bool blk_is_sg(BlockBackend *blk);
30
class TestCompressedToQcow2(iotests.QMPTestCase):
20
void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
31
image_len = 64 * 1024 * 1024 # MB
21
int blk_get_flags(BlockBackend *blk);
32
- target_fmt = {'type': 'qcow2', 'args': ()}
22
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
33
+ target_fmt = {'type': 'qcow2', 'args': (), 'drive-opts': ''}
23
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
34
24
Error **errp);
35
def tearDown(self):
25
void blk_add_aio_context_notifier(BlockBackend *blk,
36
self.vm.shutdown()
26
diff --git a/block/block-backend.c b/block/block-backend.c
37
@@ -XXX,XX +XXX,XX @@ class TestCompressedToQcow2(iotests.QMPTestCase):
27
index XXXXXXX..XXXXXXX 100644
38
pass
28
--- a/block/block-backend.c
39
29
+++ b/block/block-backend.c
40
def do_prepare_drives(self, attach_target):
30
@@ -XXX,XX +XXX,XX @@ void *blk_blockalign(BlockBackend *blk, size_t size)
41
- self.vm = iotests.VM().add_drive('blkdebug::' + test_img)
31
return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
42
+ self.vm = iotests.VM().add_drive('blkdebug::' + test_img,
32
}
43
+ opts=self.target_fmt['drive-opts'])
33
44
34
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
45
qemu_img('create', '-f', self.target_fmt['type'], blockdev_target_img,
35
-{
46
str(self.image_len), *self.target_fmt['args'])
36
- BlockDriverState *bs = blk_bs(blk);
47
if attach_target:
37
- GLOBAL_STATE_CODE();
48
self.vm.add_drive(blockdev_target_img,
38
- GRAPH_RDLOCK_GUARD_MAINLOOP();
49
img_format=self.target_fmt['type'],
39
-
50
- interface="none")
40
- if (!bs) {
51
+ interface="none",
41
- return false;
52
+ opts=self.target_fmt['drive-opts'])
42
- }
53
43
-
54
self.vm.launch()
44
- return bdrv_op_is_blocked(bs, op, errp);
55
45
-}
56
@@ -XXX,XX +XXX,XX @@ class TestCompressedToQcow2(iotests.QMPTestCase):
46
57
47
/**
58
48
* Return BB's current AioContext. Note that this context may change
59
class TestCompressedToVmdk(TestCompressedToQcow2):
60
- target_fmt = {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized')}
61
+ target_fmt = {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized'),
62
+ 'drive-opts': 'cache.no-flush=on'}
63
64
@iotests.skip_if_unsupported(['vmdk'])
65
def setUp(self):
66
--
49
--
67
2.25.3
50
2.48.1
68
51
69
52
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Block drivers assume in their .bdrv_open() implementation that their
2
state in bs->opaque has been zeroed; it is initially allocated with
3
g_malloc0() in bdrv_open_driver().
2
4
3
Our .bdrv_has_zero_init_truncate always returns 1 because sheepdog
5
bdrv_snapshot_goto() needs to make sure that it is zeroed again before
4
always 0-fills; we can use that same knowledge to implement
6
calling drv->bdrv_open() to avoid that block drivers use stale values.
5
BDRV_REQ_ZERO_WRITE by ignoring it.
6
7
7
Signed-off-by: Eric Blake <eblake@redhat.com>
8
One symptom of this bug is VMDK running into a double free when the user
8
Message-Id: <20200428202905.770727-6-eblake@redhat.com>
9
tries to apply an internal snapshot like 'qemu-img snapshot -a test
10
test.vmdk'. This should be a graceful error because VMDK doesn't support
11
internal snapshots.
12
13
==25507== Invalid free() / delete / delete[] / realloc()
14
==25507== at 0x484B347: realloc (vg_replace_malloc.c:1801)
15
==25507== by 0x54B592A: g_realloc (gmem.c:171)
16
==25507== by 0x1B221D: vmdk_add_extent (../block/vmdk.c:570)
17
==25507== by 0x1B1084: vmdk_open_sparse (../block/vmdk.c:1059)
18
==25507== by 0x1AF3D8: vmdk_open (../block/vmdk.c:1371)
19
==25507== by 0x1A2AE0: bdrv_snapshot_goto (../block/snapshot.c:299)
20
==25507== by 0x205C77: img_snapshot (../qemu-img.c:3500)
21
==25507== by 0x58FA087: (below main) (libc_start_call_main.h:58)
22
==25507== Address 0x832f3e0 is 0 bytes inside a block of size 272 free'd
23
==25507== at 0x4846B83: free (vg_replace_malloc.c:989)
24
==25507== by 0x54AEAC4: g_free (gmem.c:208)
25
==25507== by 0x1AF629: vmdk_close (../block/vmdk.c:2889)
26
==25507== by 0x1A2A9C: bdrv_snapshot_goto (../block/snapshot.c:290)
27
==25507== by 0x205C77: img_snapshot (../qemu-img.c:3500)
28
==25507== by 0x58FA087: (below main) (libc_start_call_main.h:58)
29
30
This error was discovered by fuzzing qemu-img.
31
32
Cc: qemu-stable@nongnu.org
33
Closes: https://gitlab.com/qemu-project/qemu/-/issues/2853
34
Closes: https://gitlab.com/qemu-project/qemu/-/issues/2851
35
Reported-by: Denis Rastyogin <gerben@altlinux.org>
36
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
37
Message-ID: <20250310104858.28221-1-kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
38
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
39
---
11
block/sheepdog.c | 1 +
40
block/snapshot.c | 1 +
12
1 file changed, 1 insertion(+)
41
1 file changed, 1 insertion(+)
13
42
14
diff --git a/block/sheepdog.c b/block/sheepdog.c
43
diff --git a/block/snapshot.c b/block/snapshot.c
15
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
16
--- a/block/sheepdog.c
45
--- a/block/snapshot.c
17
+++ b/block/sheepdog.c
46
+++ b/block/snapshot.c
18
@@ -XXX,XX +XXX,XX @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
47
@@ -XXX,XX +XXX,XX @@ int bdrv_snapshot_goto(BlockDriverState *bs,
19
memcpy(&s->inode, buf, sizeof(s->inode));
48
bdrv_graph_wrunlock();
20
49
21
bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
50
ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
22
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
51
+ memset(bs->opaque, 0, drv->instance_size);
23
pstrcpy(s->name, sizeof(s->name), vdi);
52
open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
24
qemu_co_mutex_init(&s->lock);
53
qobject_unref(options);
25
qemu_co_mutex_init(&s->queue_lock);
54
if (open_ret < 0) {
26
--
55
--
27
2.25.3
56
2.48.1
28
29
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Until now, FUA was always emulated with a separate flush after the write
2
2
for file-posix. The overhead of processing a second request can reduce
3
Now that there are no clients of bdrv_has_zero_init_truncate, none of
3
performance significantly for a guest disk that has disabled the write
4
the drivers need to worry about providing it.
4
cache, especially if the host disk is already write through, too, and
5
5
the flush isn't actually doing anything.
6
What's more, this eliminates a source of some confusion: a literal
6
7
reading of the documentation as written in ceaca56f and implemented in
7
Advertise support for REQ_FUA in write requests and implement it for
8
commit 1dcaf527 claims that a driver which returns 0 for
8
Linux AIO and io_uring using the RWF_DSYNC flag for write requests. The
9
bdrv_has_zero_init_truncate() must not return 1 for
9
thread pool still performs a separate fdatasync() call. This can be
10
bdrv_has_zero_init(); this condition was violated for parallels, qcow,
10
improved later by using the pwritev2() syscall if available.
11
and sometimes for vdi, although in practice it did not matter since
11
12
those drivers also lacked .bdrv_co_truncate.
12
As an example, this is how fio numbers can be improved in some scenarios
13
13
with this patch (all using virtio-blk with cache=directsync on an nvme
14
Signed-off-by: Eric Blake <eblake@redhat.com>
14
block device for the VM, fio with ioengine=libaio,direct=1,sync=1):
15
Message-Id: <20200428202905.770727-10-eblake@redhat.com>
15
16
Acked-by: Richard W.M. Jones <rjones@redhat.com>
16
| old | with FUA support
17
------------------------------+---------------+-------------------
18
bs=4k, iodepth=1, numjobs=1 | 45.6k iops | 56.1k iops
19
bs=4k, iodepth=1, numjobs=16 | 183.3k iops | 236.0k iops
20
bs=4k, iodepth=16, numjobs=1 | 258.4k iops | 311.1k iops
21
22
However, not all scenarios are clear wins. On another slower disk I saw
23
little to no improvment. In fact, in two corner case scenarios, I even
24
observed a regression, which I however consider acceptable:
25
26
1. On slow host disks in a write through cache mode, when the guest is
27
using virtio-blk in a separate iothread so that polling can be
28
enabled, and each completion is quickly followed up with a new
29
request (so that polling gets it), it can happen that enabling FUA
30
makes things slower - the additional very fast no-op flush we used to
31
have gave the adaptive polling algorithm a success so that it kept
32
polling. Without it, we only have the slow write request, which
33
disables polling. This is a problem in the polling algorithm that
34
will be fixed later in this series.
35
36
2. With a high queue depth, it can be beneficial to have flush requests
37
for another reason: The optimisation in bdrv_co_flush() that flushes
38
only once per write generation acts as a synchronisation mechanism
39
that lets all requests complete at the same time. This can result in
40
better batching and if the disk is very fast (I only saw this with a
41
null_blk backend), this can make up for the overhead of the flush and
42
improve throughput. In theory, we could optionally introduce a
43
similar artificial latency in the normal completion path to achieve
44
the same kind of completion batching. This is not implemented in this
45
series.
46
47
Compatibility is not a concern for io_uring, it has supported RWF_DSYNC
48
from the start. Linux AIO started supporting it in Linux 4.13 and libaio
49
0.3.111. The kernel is not a problem for any supported build platform,
50
so it's not necessary to add runtime checks. However, openSUSE is still
51
stuck with an older libaio version that would break the build. We must
52
detect this at build time to avoid build failures.
53
54
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
55
Message-ID: <20250307221634.71951-2-kwolf@redhat.com>
56
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
57
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
18
---
58
---
19
include/block/block.h | 1 -
59
include/block/raw-aio.h | 8 ++++++--
20
include/block/block_int.h | 7 -------
60
block/file-posix.c | 26 ++++++++++++++++++--------
21
block.c | 21 ---------------------
61
block/io_uring.c | 13 ++++++++-----
22
block/file-posix.c | 1 -
62
block/linux-aio.c | 24 +++++++++++++++++++++---
23
block/file-win32.c | 1 -
63
meson.build | 4 ++++
24
block/nfs.c | 1 -
64
5 files changed, 57 insertions(+), 18 deletions(-)
25
block/qcow2.c | 1 -
65
26
block/qed.c | 1 -
66
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
27
block/raw-format.c | 6 ------
67
index XXXXXXX..XXXXXXX 100644
28
block/rbd.c | 1 -
68
--- a/include/block/raw-aio.h
29
block/sheepdog.c | 3 ---
69
+++ b/include/block/raw-aio.h
30
block/ssh.c | 1 -
70
@@ -XXX,XX +XXX,XX @@
31
12 files changed, 45 deletions(-)
71
#define QEMU_RAW_AIO_H
32
72
33
diff --git a/include/block/block.h b/include/block/block.h
73
#include "block/aio.h"
34
index XXXXXXX..XXXXXXX 100644
74
+#include "block/block-common.h"
35
--- a/include/block/block.h
75
#include "qemu/iov.h"
36
+++ b/include/block/block.h
76
37
@@ -XXX,XX +XXX,XX @@ int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
77
/* AIO request types */
38
int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
78
@@ -XXX,XX +XXX,XX @@ void laio_cleanup(LinuxAioState *s);
39
int bdrv_has_zero_init_1(BlockDriverState *bs);
79
40
int bdrv_has_zero_init(BlockDriverState *bs);
80
/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
41
-int bdrv_has_zero_init_truncate(BlockDriverState *bs);
81
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
42
bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
82
- int type, uint64_t dev_max_batch);
43
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
83
+ int type, BdrvRequestFlags flags,
44
int bdrv_block_status(BlockDriverState *bs, int64_t offset,
84
+ uint64_t dev_max_batch);
45
diff --git a/include/block/block_int.h b/include/block/block_int.h
85
46
index XXXXXXX..XXXXXXX 100644
86
bool laio_has_fdsync(int);
47
--- a/include/block/block_int.h
87
+bool laio_has_fua(void);
48
+++ b/include/block/block_int.h
88
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
49
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
89
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
50
/*
90
#endif
51
* Returns 1 if newly created images are guaranteed to contain only
91
@@ -XXX,XX +XXX,XX @@ void luring_cleanup(LuringState *s);
52
* zeros, 0 otherwise.
92
53
- * Must return 0 if .bdrv_has_zero_init_truncate() returns 0.
93
/* luring_co_submit: submit I/O requests in the thread's current AioContext. */
54
*/
94
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
55
int (*bdrv_has_zero_init)(BlockDriverState *bs);
95
- QEMUIOVector *qiov, int type);
56
96
+ QEMUIOVector *qiov, int type,
57
- /*
97
+ BdrvRequestFlags flags);
58
- * Returns 1 if new areas added by growing the image with
98
void luring_detach_aio_context(LuringState *s, AioContext *old_context);
59
- * PREALLOC_MODE_OFF contain only zeros, 0 otherwise.
99
void luring_attach_aio_context(LuringState *s, AioContext *new_context);
60
- */
100
#endif
61
- int (*bdrv_has_zero_init_truncate)(BlockDriverState *bs);
62
-
63
/* Remove fd handlers, timers, and other event loop callbacks so the event
64
* loop is no longer in use. Called with no in-flight requests and in
65
* depth-first traversal order with parents before child nodes.
66
diff --git a/block.c b/block.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/block.c
69
+++ b/block.c
70
@@ -XXX,XX +XXX,XX @@ int bdrv_has_zero_init(BlockDriverState *bs)
71
return 0;
72
}
73
74
-int bdrv_has_zero_init_truncate(BlockDriverState *bs)
75
-{
76
- if (!bs->drv) {
77
- return 0;
78
- }
79
-
80
- if (bs->backing) {
81
- /* Depends on the backing image length, but better safe than sorry */
82
- return 0;
83
- }
84
- if (bs->drv->bdrv_has_zero_init_truncate) {
85
- return bs->drv->bdrv_has_zero_init_truncate(bs);
86
- }
87
- if (bs->file && bs->drv->is_filter) {
88
- return bdrv_has_zero_init_truncate(bs->file->bs);
89
- }
90
-
91
- /* safe default */
92
- return 0;
93
-}
94
-
95
bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
96
{
97
BlockDriverInfo bdi;
98
diff --git a/block/file-posix.c b/block/file-posix.c
101
diff --git a/block/file-posix.c b/block/file-posix.c
99
index XXXXXXX..XXXXXXX 100644
102
index XXXXXXX..XXXXXXX 100644
100
--- a/block/file-posix.c
103
--- a/block/file-posix.c
101
+++ b/block/file-posix.c
104
+++ b/block/file-posix.c
102
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = {
105
@@ -XXX,XX +XXX,XX @@ static int fd_open(BlockDriverState *bs)
103
.bdrv_co_create = raw_co_create,
106
}
104
.bdrv_co_create_opts = raw_co_create_opts,
107
105
.bdrv_has_zero_init = bdrv_has_zero_init_1,
108
static int64_t raw_getlength(BlockDriverState *bs);
106
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
109
+static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs);
107
.bdrv_co_block_status = raw_co_block_status,
110
108
.bdrv_co_invalidate_cache = raw_co_invalidate_cache,
111
typedef struct RawPosixAIOData {
109
.bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
112
BlockDriverState *bs;
110
diff --git a/block/file-win32.c b/block/file-win32.c
113
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
111
index XXXXXXX..XXXXXXX 100644
114
#endif
112
--- a/block/file-win32.c
115
s->needs_alignment = raw_needs_alignment(bs);
113
+++ b/block/file-win32.c
116
114
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_file = {
117
+ if (!s->use_linux_aio || laio_has_fua()) {
115
.bdrv_close = raw_close,
118
+ bs->supported_write_flags = BDRV_REQ_FUA;
116
.bdrv_co_create_opts = raw_co_create_opts,
119
+ }
117
.bdrv_has_zero_init = bdrv_has_zero_init_1,
120
+
118
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
121
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
119
122
if (S_ISREG(st.st_mode)) {
120
.bdrv_aio_preadv = raw_aio_preadv,
123
/* When extending regular files, we get zeros from the OS */
121
.bdrv_aio_pwritev = raw_aio_pwritev,
124
@@ -XXX,XX +XXX,XX @@ static inline bool raw_check_linux_aio(BDRVRawState *s)
122
diff --git a/block/nfs.c b/block/nfs.c
125
#endif
123
index XXXXXXX..XXXXXXX 100644
126
124
--- a/block/nfs.c
127
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
125
+++ b/block/nfs.c
128
- uint64_t bytes, QEMUIOVector *qiov, int type)
126
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nfs = {
129
+ uint64_t bytes, QEMUIOVector *qiov, int type,
127
.create_opts = &nfs_create_opts,
130
+ int flags)
128
131
{
129
.bdrv_has_zero_init = nfs_has_zero_init,
132
BDRVRawState *s = bs->opaque;
130
- .bdrv_has_zero_init_truncate = nfs_has_zero_init,
133
RawPosixAIOData acb;
131
.bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
134
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
132
.bdrv_co_truncate = nfs_file_co_truncate,
135
#ifdef CONFIG_LINUX_IO_URING
133
136
} else if (raw_check_linux_io_uring(s)) {
134
diff --git a/block/qcow2.c b/block/qcow2.c
137
assert(qiov->size == bytes);
135
index XXXXXXX..XXXXXXX 100644
138
- ret = luring_co_submit(bs, s->fd, offset, qiov, type);
136
--- a/block/qcow2.c
139
+ ret = luring_co_submit(bs, s->fd, offset, qiov, type, flags);
137
+++ b/block/qcow2.c
140
goto out;
138
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = {
141
#endif
139
.bdrv_co_create_opts = qcow2_co_create_opts,
142
#ifdef CONFIG_LINUX_AIO
140
.bdrv_co_create = qcow2_co_create,
143
} else if (raw_check_linux_aio(s)) {
141
.bdrv_has_zero_init = qcow2_has_zero_init,
144
assert(qiov->size == bytes);
142
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
145
- ret = laio_co_submit(s->fd, offset, qiov, type,
143
.bdrv_co_block_status = qcow2_co_block_status,
146
+ ret = laio_co_submit(s->fd, offset, qiov, type, flags,
144
147
s->aio_max_batch);
145
.bdrv_co_preadv_part = qcow2_co_preadv_part,
148
goto out;
146
diff --git a/block/qed.c b/block/qed.c
149
#endif
147
index XXXXXXX..XXXXXXX 100644
150
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
148
--- a/block/qed.c
151
149
+++ b/block/qed.c
152
assert(qiov->size == bytes);
150
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_qed = {
153
ret = raw_thread_pool_submit(handle_aiocb_rw, &acb);
151
.bdrv_co_create = bdrv_qed_co_create,
154
+ if (ret == 0 && (flags & BDRV_REQ_FUA)) {
152
.bdrv_co_create_opts = bdrv_qed_co_create_opts,
155
+ /* TODO Use pwritev2() instead if it's available */
153
.bdrv_has_zero_init = bdrv_has_zero_init_1,
156
+ ret = raw_co_flush_to_disk(bs);
154
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
157
+ }
155
.bdrv_co_block_status = bdrv_qed_co_block_status,
158
goto out; /* Avoid the compiler err of unused label */
156
.bdrv_co_readv = bdrv_qed_co_readv,
159
157
.bdrv_co_writev = bdrv_qed_co_writev,
160
out:
158
diff --git a/block/raw-format.c b/block/raw-format.c
161
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
159
index XXXXXXX..XXXXXXX 100644
162
int64_t bytes, QEMUIOVector *qiov,
160
--- a/block/raw-format.c
163
BdrvRequestFlags flags)
161
+++ b/block/raw-format.c
164
{
162
@@ -XXX,XX +XXX,XX @@ static int raw_has_zero_init(BlockDriverState *bs)
165
- return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
163
return bdrv_has_zero_init(bs->file->bs);
166
+ return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ, flags);
164
}
167
}
165
168
166
-static int raw_has_zero_init_truncate(BlockDriverState *bs)
169
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
167
-{
170
int64_t bytes, QEMUIOVector *qiov,
168
- return bdrv_has_zero_init_truncate(bs->file->bs);
171
BdrvRequestFlags flags)
169
-}
172
{
170
-
173
- return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
171
static int coroutine_fn raw_co_create_opts(BlockDriver *drv,
174
+ return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE, flags);
172
const char *filename,
175
}
173
QemuOpts *opts,
176
174
@@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_raw = {
177
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
175
.bdrv_co_ioctl = &raw_co_ioctl,
178
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
176
.create_opts = &raw_create_opts,
179
177
.bdrv_has_zero_init = &raw_has_zero_init,
180
#ifdef CONFIG_LINUX_IO_URING
178
- .bdrv_has_zero_init_truncate = &raw_has_zero_init_truncate,
181
if (raw_check_linux_io_uring(s)) {
179
.strong_runtime_opts = raw_strong_runtime_opts,
182
- return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
180
.mutable_opts = mutable_opts,
183
+ return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
181
};
184
}
182
diff --git a/block/rbd.c b/block/rbd.c
185
#endif
183
index XXXXXXX..XXXXXXX 100644
186
#ifdef CONFIG_LINUX_AIO
184
--- a/block/rbd.c
187
if (s->has_laio_fdsync && raw_check_linux_aio(s)) {
185
+++ b/block/rbd.c
188
- return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
186
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_rbd = {
189
+ return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0, 0);
187
.bdrv_co_create = qemu_rbd_co_create,
190
}
188
.bdrv_co_create_opts = qemu_rbd_co_create_opts,
191
#endif
189
.bdrv_has_zero_init = bdrv_has_zero_init_1,
192
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
190
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
193
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
191
.bdrv_get_info = qemu_rbd_getinfo,
194
}
192
.create_opts = &qemu_rbd_create_opts,
195
193
.bdrv_getlength = qemu_rbd_getlength,
196
trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
194
diff --git a/block/sheepdog.c b/block/sheepdog.c
197
- return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
195
index XXXXXXX..XXXXXXX 100644
198
+ return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND, 0);
196
--- a/block/sheepdog.c
199
}
197
+++ b/block/sheepdog.c
200
#endif
198
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog = {
201
199
.bdrv_co_create = sd_co_create,
202
diff --git a/block/io_uring.c b/block/io_uring.c
200
.bdrv_co_create_opts = sd_co_create_opts,
203
index XXXXXXX..XXXXXXX 100644
201
.bdrv_has_zero_init = bdrv_has_zero_init_1,
204
--- a/block/io_uring.c
202
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
205
+++ b/block/io_uring.c
203
.bdrv_getlength = sd_getlength,
206
@@ -XXX,XX +XXX,XX @@ static void luring_deferred_fn(void *opaque)
204
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
207
*
205
.bdrv_co_truncate = sd_co_truncate,
208
*/
206
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_tcp = {
209
static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
207
.bdrv_co_create = sd_co_create,
210
- uint64_t offset, int type)
208
.bdrv_co_create_opts = sd_co_create_opts,
211
+ uint64_t offset, int type, BdrvRequestFlags flags)
209
.bdrv_has_zero_init = bdrv_has_zero_init_1,
212
{
210
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
213
int ret;
211
.bdrv_getlength = sd_getlength,
214
struct io_uring_sqe *sqes = &luringcb->sqeq;
212
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
215
+ int luring_flags;
213
.bdrv_co_truncate = sd_co_truncate,
216
214
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_sheepdog_unix = {
217
switch (type) {
215
.bdrv_co_create = sd_co_create,
218
case QEMU_AIO_WRITE:
216
.bdrv_co_create_opts = sd_co_create_opts,
219
- io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
217
.bdrv_has_zero_init = bdrv_has_zero_init_1,
220
- luringcb->qiov->niov, offset);
218
- .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
221
+ luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
219
.bdrv_getlength = sd_getlength,
222
+ io_uring_prep_writev2(sqes, fd, luringcb->qiov->iov,
220
.bdrv_get_allocated_file_size = sd_get_allocated_file_size,
223
+ luringcb->qiov->niov, offset, luring_flags);
221
.bdrv_co_truncate = sd_co_truncate,
224
break;
222
diff --git a/block/ssh.c b/block/ssh.c
225
case QEMU_AIO_ZONE_APPEND:
223
index XXXXXXX..XXXXXXX 100644
226
io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
224
--- a/block/ssh.c
227
@@ -XXX,XX +XXX,XX @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
225
+++ b/block/ssh.c
228
}
226
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_ssh = {
229
227
.bdrv_co_create_opts = ssh_co_create_opts,
230
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
228
.bdrv_close = ssh_close,
231
- QEMUIOVector *qiov, int type)
229
.bdrv_has_zero_init = ssh_has_zero_init,
232
+ QEMUIOVector *qiov, int type,
230
- .bdrv_has_zero_init_truncate = ssh_has_zero_init,
233
+ BdrvRequestFlags flags)
231
.bdrv_co_readv = ssh_co_readv,
234
{
232
.bdrv_co_writev = ssh_co_writev,
235
int ret;
233
.bdrv_getlength = ssh_getlength,
236
AioContext *ctx = qemu_get_current_aio_context();
237
@@ -XXX,XX +XXX,XX @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
238
};
239
trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0,
240
type);
241
- ret = luring_do_submit(fd, &luringcb, s, offset, type);
242
+ ret = luring_do_submit(fd, &luringcb, s, offset, type, flags);
243
244
if (ret < 0) {
245
return ret;
246
diff --git a/block/linux-aio.c b/block/linux-aio.c
247
index XXXXXXX..XXXXXXX 100644
248
--- a/block/linux-aio.c
249
+++ b/block/linux-aio.c
250
@@ -XXX,XX +XXX,XX @@ static void laio_deferred_fn(void *opaque)
251
}
252
253
static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
254
- int type, uint64_t dev_max_batch)
255
+ int type, BdrvRequestFlags flags,
256
+ uint64_t dev_max_batch)
257
{
258
LinuxAioState *s = laiocb->ctx;
259
struct iocb *iocbs = &laiocb->iocb;
260
QEMUIOVector *qiov = laiocb->qiov;
261
+ int laio_flags;
262
263
switch (type) {
264
case QEMU_AIO_WRITE:
265
+#ifdef HAVE_IO_PREP_PWRITEV2
266
+ laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
267
+ io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags);
268
+#else
269
+ assert(flags == 0);
270
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
271
+#endif
272
break;
273
case QEMU_AIO_ZONE_APPEND:
274
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
275
@@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
276
}
277
278
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
279
- int type, uint64_t dev_max_batch)
280
+ int type, BdrvRequestFlags flags,
281
+ uint64_t dev_max_batch)
282
{
283
int ret;
284
AioContext *ctx = qemu_get_current_aio_context();
285
@@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
286
.qiov = qiov,
287
};
288
289
- ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch);
290
+ ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch);
291
if (ret < 0) {
292
return ret;
293
}
294
@@ -XXX,XX +XXX,XX @@ bool laio_has_fdsync(int fd)
295
io_destroy(ctx);
296
return (ret == -EINVAL) ? false : true;
297
}
298
+
299
+bool laio_has_fua(void)
300
+{
301
+#ifdef HAVE_IO_PREP_PWRITEV2
302
+ return true;
303
+#else
304
+ return false;
305
+#endif
306
+}
307
diff --git a/meson.build b/meson.build
308
index XXXXXXX..XXXXXXX 100644
309
--- a/meson.build
310
+++ b/meson.build
311
@@ -XXX,XX +XXX,XX @@ config_host_data.set('HAVE_OPTRESET',
312
cc.has_header_symbol('getopt.h', 'optreset'))
313
config_host_data.set('HAVE_IPPROTO_MPTCP',
314
cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP'))
315
+if libaio.found()
316
+ config_host_data.set('HAVE_IO_PREP_PWRITEV2',
317
+ cc.has_header_symbol('libaio.h', 'io_prep_pwritev2'))
318
+endif
319
320
# has_member
321
config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
234
--
322
--
235
2.25.3
323
2.48.1
236
237
diff view generated by jsdifflib
1
This tests that the backup job catches situations where the target node
1
For block drivers that don't advertise FUA support, we already call
2
has a different size than the source node. It must also forbid resize
2
bdrv_co_flush(), which considers BDRV_O_NO_FLUSH. However, drivers that
3
operations when the job is already running.
3
do support FUA still see the FUA flag with BDRV_O_NO_FLUSH and get the
4
associated performance penalty that cache.no-flush=on was supposed to
5
avoid.
6
7
Clear FUA for write requests if BDRV_O_NO_FLUSH is set.
4
8
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Message-Id: <20200430142755.315494-5-kwolf@redhat.com>
10
Message-ID: <20250307221634.71951-3-kwolf@redhat.com>
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
13
---
10
tests/qemu-iotests/055 | 41 ++++++++++++++++++++++++++++++++++++--
14
block/io.c | 4 ++++
11
tests/qemu-iotests/055.out | 4 ++--
15
1 file changed, 4 insertions(+)
12
2 files changed, 41 insertions(+), 4 deletions(-)
13
16
14
diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
17
diff --git a/block/io.c b/block/io.c
15
index XXXXXXX..XXXXXXX 100755
18
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/qemu-iotests/055
19
--- a/block/io.c
17
+++ b/tests/qemu-iotests/055
20
+++ b/block/io.c
18
@@ -XXX,XX +XXX,XX @@ class TestSingleDrive(iotests.QMPTestCase):
21
@@ -XXX,XX +XXX,XX @@ bdrv_driver_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
19
def setUp(self):
22
return -ENOMEDIUM;
20
qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
23
}
21
24
22
- self.vm = iotests.VM().add_drive('blkdebug::' + test_img)
25
+ if (bs->open_flags & BDRV_O_NO_FLUSH) {
23
- self.vm.add_drive(blockdev_target_img, interface="none")
26
+ flags &= ~BDRV_REQ_FUA;
24
+ self.vm = iotests.VM()
27
+ }
25
+ self.vm.add_drive('blkdebug::' + test_img, 'node-name=source')
26
+ self.vm.add_drive(blockdev_target_img, 'node-name=target',
27
+ interface="none")
28
if iotests.qemu_default_machine == 'pc':
29
self.vm.add_drive(None, 'media=cdrom', 'ide')
30
self.vm.launch()
31
@@ -XXX,XX +XXX,XX @@ class TestSingleDrive(iotests.QMPTestCase):
32
def test_pause_blockdev_backup(self):
33
self.do_test_pause('blockdev-backup', 'drive1', blockdev_target_img)
34
35
+ def do_test_resize_blockdev_backup(self, device, node):
36
+ def pre_finalize():
37
+ result = self.vm.qmp('block_resize', device=device, size=65536)
38
+ self.assert_qmp(result, 'error/class', 'GenericError')
39
+
28
+
40
+ result = self.vm.qmp('block_resize', node_name=node, size=65536)
29
if ((flags & BDRV_REQ_FUA) &&
41
+ self.assert_qmp(result, 'error/class', 'GenericError')
30
(~bs->supported_write_flags & BDRV_REQ_FUA)) {
42
+
31
flags &= ~BDRV_REQ_FUA;
43
+ result = self.vm.qmp('blockdev-backup', job_id='job0', device='drive0',
44
+ target='drive1', sync='full', auto_finalize=False,
45
+ auto_dismiss=False)
46
+ self.assert_qmp(result, 'return', {})
47
+
48
+ self.vm.run_job('job0', auto_finalize=False, pre_finalize=pre_finalize)
49
+
50
+ def test_source_resize_blockdev_backup(self):
51
+ self.do_test_resize_blockdev_backup('drive0', 'source')
52
+
53
+ def test_target_resize_blockdev_backup(self):
54
+ self.do_test_resize_blockdev_backup('drive1', 'target')
55
+
56
+ def do_test_target_size(self, size):
57
+ result = self.vm.qmp('block_resize', device='drive1', size=size)
58
+ self.assert_qmp(result, 'return', {})
59
+
60
+ result = self.vm.qmp('blockdev-backup', job_id='job0', device='drive0',
61
+ target='drive1', sync='full')
62
+ self.assert_qmp(result, 'error/class', 'GenericError')
63
+
64
+ def test_small_target(self):
65
+ self.do_test_target_size(image_len // 2)
66
+
67
+ def test_large_target(self):
68
+ self.do_test_target_size(image_len * 2)
69
+
70
def test_medium_not_found(self):
71
if iotests.qemu_default_machine != 'pc':
72
return
73
diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out
74
index XXXXXXX..XXXXXXX 100644
75
--- a/tests/qemu-iotests/055.out
76
+++ b/tests/qemu-iotests/055.out
77
@@ -XXX,XX +XXX,XX @@
78
-....................................
79
+........................................
80
----------------------------------------------------------------------
81
-Ran 36 tests
82
+Ran 40 tests
83
84
OK
85
--
32
--
86
2.25.3
33
2.48.1
87
88
diff view generated by jsdifflib
1
If we have a backup L2 table, we currently flush once after writing to
1
As a preparation for having multiple adaptive polling states per
2
the active L2 table and again after writing to the backup table. A
2
AioContext, move the 'ns' field into a separate struct.
3
single flush is enough and makes things a little less slow.
4
3
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Message-Id: <20200430133007.170335-6-kwolf@redhat.com>
5
Message-ID: <20250307221634.71951-4-kwolf@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
8
---
10
block/vmdk.c | 7 +++++--
9
include/block/aio.h | 6 +++++-
11
1 file changed, 5 insertions(+), 2 deletions(-)
10
util/aio-posix.c | 31 ++++++++++++++++---------------
11
util/async.c | 3 ++-
12
3 files changed, 23 insertions(+), 17 deletions(-)
12
13
13
diff --git a/block/vmdk.c b/block/vmdk.c
14
diff --git a/include/block/aio.h b/include/block/aio.h
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/block/vmdk.c
16
--- a/include/block/aio.h
16
+++ b/block/vmdk.c
17
+++ b/include/block/aio.h
17
@@ -XXX,XX +XXX,XX @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
18
@@ -XXX,XX +XXX,XX @@ struct BHListSlice {
18
offset = cpu_to_le32(offset);
19
19
/* update L2 table */
20
typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;
20
BLKDBG_EVENT(extent->file, BLKDBG_L2_UPDATE);
21
21
- if (bdrv_pwrite_sync(extent->file,
22
+typedef struct AioPolledEvent {
22
+ if (bdrv_pwrite(extent->file,
23
+ int64_t ns; /* current polling time in nanoseconds */
23
((int64_t)m_data->l2_offset * 512)
24
+} AioPolledEvent;
24
+ (m_data->l2_index * sizeof(offset)),
25
+
25
&offset, sizeof(offset)) < 0) {
26
struct AioContext {
26
@@ -XXX,XX +XXX,XX @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
27
GSource source;
27
/* update backup L2 table */
28
28
if (extent->l1_backup_table_offset != 0) {
29
@@ -XXX,XX +XXX,XX @@ struct AioContext {
29
m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
30
int poll_disable_cnt;
30
- if (bdrv_pwrite_sync(extent->file,
31
31
+ if (bdrv_pwrite(extent->file,
32
/* Polling mode parameters */
32
((int64_t)m_data->l2_offset * 512)
33
- int64_t poll_ns; /* current polling time in nanoseconds */
33
+ (m_data->l2_index * sizeof(offset)),
34
+ AioPolledEvent poll;
34
&offset, sizeof(offset)) < 0) {
35
int64_t poll_max_ns; /* maximum polling time in nanoseconds */
35
return VMDK_ERROR;
36
int64_t poll_grow; /* polling time growth factor */
37
int64_t poll_shrink; /* polling time shrink factor */
38
diff --git a/util/aio-posix.c b/util/aio-posix.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/util/aio-posix.c
41
+++ b/util/aio-posix.c
42
@@ -XXX,XX +XXX,XX @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
43
return false;
44
}
45
46
- max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
47
+ max_ns = qemu_soonest_timeout(*timeout, ctx->poll.ns);
48
if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
49
/*
50
* Enable poll mode. It pairs with the poll_set_started() in
51
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
52
if (ctx->poll_max_ns) {
53
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
54
55
- if (block_ns <= ctx->poll_ns) {
56
+ if (block_ns <= ctx->poll.ns) {
57
/* This is the sweet spot, no adjustment needed */
58
} else if (block_ns > ctx->poll_max_ns) {
59
/* We'd have to poll for too long, poll less */
60
- int64_t old = ctx->poll_ns;
61
+ int64_t old = ctx->poll.ns;
62
63
if (ctx->poll_shrink) {
64
- ctx->poll_ns /= ctx->poll_shrink;
65
+ ctx->poll.ns /= ctx->poll_shrink;
66
} else {
67
- ctx->poll_ns = 0;
68
+ ctx->poll.ns = 0;
69
}
70
71
- trace_poll_shrink(ctx, old, ctx->poll_ns);
72
- } else if (ctx->poll_ns < ctx->poll_max_ns &&
73
+ trace_poll_shrink(ctx, old, ctx->poll.ns);
74
+ } else if (ctx->poll.ns < ctx->poll_max_ns &&
75
block_ns < ctx->poll_max_ns) {
76
/* There is room to grow, poll longer */
77
- int64_t old = ctx->poll_ns;
78
+ int64_t old = ctx->poll.ns;
79
int64_t grow = ctx->poll_grow;
80
81
if (grow == 0) {
82
grow = 2;
83
}
84
85
- if (ctx->poll_ns) {
86
- ctx->poll_ns *= grow;
87
+ if (ctx->poll.ns) {
88
+ ctx->poll.ns *= grow;
89
} else {
90
- ctx->poll_ns = 4000; /* start polling at 4 microseconds */
91
+ ctx->poll.ns = 4000; /* start polling at 4 microseconds */
92
}
93
94
- if (ctx->poll_ns > ctx->poll_max_ns) {
95
- ctx->poll_ns = ctx->poll_max_ns;
96
+ if (ctx->poll.ns > ctx->poll_max_ns) {
97
+ ctx->poll.ns = ctx->poll_max_ns;
98
}
99
100
- trace_poll_grow(ctx, old, ctx->poll_ns);
101
+ trace_poll_grow(ctx, old, ctx->poll.ns);
36
}
102
}
37
}
103
}
38
+ if (bdrv_flush(extent->file->bs) < 0) {
104
39
+ return VMDK_ERROR;
105
@@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
40
+ }
106
/* No thread synchronization here, it doesn't matter if an incorrect value
41
if (m_data->l2_cache_entry) {
107
* is used once.
42
*m_data->l2_cache_entry = offset;
108
*/
43
}
109
+ ctx->poll.ns = 0;
110
+
111
ctx->poll_max_ns = max_ns;
112
- ctx->poll_ns = 0;
113
ctx->poll_grow = grow;
114
ctx->poll_shrink = shrink;
115
116
diff --git a/util/async.c b/util/async.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/util/async.c
119
+++ b/util/async.c
120
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
121
qemu_rec_mutex_init(&ctx->lock);
122
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
123
124
- ctx->poll_ns = 0;
125
+ ctx->poll.ns = 0;
126
+
127
ctx->poll_max_ns = 0;
128
ctx->poll_grow = 0;
129
ctx->poll_shrink = 0;
44
--
130
--
45
2.25.3
131
2.48.1
46
47
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
2
Message-ID: <20250307221634.71951-5-kwolf@redhat.com>
3
The vhdx driver uses truncation for image growth, with a special case
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
for blocks that already read as zero but which are only being
5
partially written. But with a bit of rearranging, it's just as easy
6
to defer the decision on whether truncation resulted in zeroes to the
7
actual allocation attempt, reducing the number of places that still
8
use bdrv_has_zero_init_truncate.
9
10
Signed-off-by: Eric Blake <eblake@redhat.com>
11
Message-Id: <20200428202905.770727-9-eblake@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
5
---
14
block/vhdx.c | 89 ++++++++++++++++++++++++++++++----------------------
6
util/aio-posix.c | 77 ++++++++++++++++++++++++++----------------------
15
1 file changed, 51 insertions(+), 38 deletions(-)
7
1 file changed, 41 insertions(+), 36 deletions(-)
16
8
17
diff --git a/block/vhdx.c b/block/vhdx.c
9
diff --git a/util/aio-posix.c b/util/aio-posix.c
18
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
19
--- a/block/vhdx.c
11
--- a/util/aio-posix.c
20
+++ b/block/vhdx.c
12
+++ b/util/aio-posix.c
21
@@ -XXX,XX +XXX,XX @@ exit:
13
@@ -XXX,XX +XXX,XX @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
22
/*
14
return false;
23
* Allocate a new payload block at the end of the file.
15
}
24
*
16
25
- * Allocation will happen at 1MB alignment inside the file
17
+static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
26
+ * Allocation will happen at 1MB alignment inside the file.
18
+ int64_t block_ns)
27
+ *
19
+{
28
+ * If @need_zero is set on entry but not cleared on return, then truncation
20
+ if (block_ns <= poll->ns) {
29
+ * could not guarantee that the new portion reads as zero, and the caller
21
+ /* This is the sweet spot, no adjustment needed */
30
+ * will take care of it instead.
22
+ } else if (block_ns > ctx->poll_max_ns) {
31
*
23
+ /* We'd have to poll for too long, poll less */
32
* Returns the file offset start of the new payload block
24
+ int64_t old = poll->ns;
33
*/
25
+
34
static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
26
+ if (ctx->poll_shrink) {
35
- uint64_t *new_offset)
27
+ poll->ns /= ctx->poll_shrink;
36
+ uint64_t *new_offset, bool *need_zero)
28
+ } else {
29
+ poll->ns = 0;
30
+ }
31
+
32
+ trace_poll_shrink(ctx, old, poll->ns);
33
+ } else if (poll->ns < ctx->poll_max_ns &&
34
+ block_ns < ctx->poll_max_ns) {
35
+ /* There is room to grow, poll longer */
36
+ int64_t old = poll->ns;
37
+ int64_t grow = ctx->poll_grow;
38
+
39
+ if (grow == 0) {
40
+ grow = 2;
41
+ }
42
+
43
+ if (poll->ns) {
44
+ poll->ns *= grow;
45
+ } else {
46
+ poll->ns = 4000; /* start polling at 4 microseconds */
47
+ }
48
+
49
+ if (poll->ns > ctx->poll_max_ns) {
50
+ poll->ns = ctx->poll_max_ns;
51
+ }
52
+
53
+ trace_poll_grow(ctx, old, poll->ns);
54
+ }
55
+}
56
+
57
bool aio_poll(AioContext *ctx, bool blocking)
37
{
58
{
38
int64_t current_len;
59
AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
39
60
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
40
@@ -XXX,XX +XXX,XX @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
61
/* Adjust polling time */
41
return -EINVAL;
62
if (ctx->poll_max_ns) {
63
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
64
-
65
- if (block_ns <= ctx->poll.ns) {
66
- /* This is the sweet spot, no adjustment needed */
67
- } else if (block_ns > ctx->poll_max_ns) {
68
- /* We'd have to poll for too long, poll less */
69
- int64_t old = ctx->poll.ns;
70
-
71
- if (ctx->poll_shrink) {
72
- ctx->poll.ns /= ctx->poll_shrink;
73
- } else {
74
- ctx->poll.ns = 0;
75
- }
76
-
77
- trace_poll_shrink(ctx, old, ctx->poll.ns);
78
- } else if (ctx->poll.ns < ctx->poll_max_ns &&
79
- block_ns < ctx->poll_max_ns) {
80
- /* There is room to grow, poll longer */
81
- int64_t old = ctx->poll.ns;
82
- int64_t grow = ctx->poll_grow;
83
-
84
- if (grow == 0) {
85
- grow = 2;
86
- }
87
-
88
- if (ctx->poll.ns) {
89
- ctx->poll.ns *= grow;
90
- } else {
91
- ctx->poll.ns = 4000; /* start polling at 4 microseconds */
92
- }
93
-
94
- if (ctx->poll.ns > ctx->poll_max_ns) {
95
- ctx->poll.ns = ctx->poll_max_ns;
96
- }
97
-
98
- trace_poll_grow(ctx, old, ctx->poll.ns);
99
- }
100
+ adjust_polling_time(ctx, &ctx->poll, block_ns);
42
}
101
}
43
102
44
+ if (*need_zero) {
103
progress |= aio_bh_poll(ctx);
45
+ int ret;
46
+
47
+ ret = bdrv_truncate(bs->file, *new_offset + s->block_size, false,
48
+ PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL);
49
+ if (ret != -ENOTSUP) {
50
+ *need_zero = false;
51
+ return ret;
52
+ }
53
+ }
54
+
55
return bdrv_truncate(bs->file, *new_offset + s->block_size, false,
56
PREALLOC_MODE_OFF, 0, NULL);
57
}
58
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
59
/* in this case, we need to preserve zero writes for
60
* data that is not part of this write, so we must pad
61
* the rest of the buffer to zeroes */
62
-
63
- /* if we are on a posix system with ftruncate() that extends
64
- * a file, then it is zero-filled for us. On Win32, the raw
65
- * layer uses SetFilePointer and SetFileEnd, which does not
66
- * zero fill AFAIK */
67
-
68
- /* Queue another write of zero buffers if the underlying file
69
- * does not zero-fill on file extension */
70
-
71
- if (bdrv_has_zero_init_truncate(bs->file->bs) == 0) {
72
- use_zero_buffers = true;
73
-
74
+ use_zero_buffers = true;
75
+ /* fall through */
76
+ case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
77
+ case PAYLOAD_BLOCK_UNMAPPED:
78
+ case PAYLOAD_BLOCK_UNMAPPED_v095:
79
+ case PAYLOAD_BLOCK_UNDEFINED:
80
+ bat_prior_offset = sinfo.file_offset;
81
+ ret = vhdx_allocate_block(bs, s, &sinfo.file_offset,
82
+ &use_zero_buffers);
83
+ if (ret < 0) {
84
+ goto exit;
85
+ }
86
+ /*
87
+ * once we support differencing files, this may also be
88
+ * partially present
89
+ */
90
+ /* update block state to the newly specified state */
91
+ vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry,
92
+ &bat_entry_offset,
93
+ PAYLOAD_BLOCK_FULLY_PRESENT);
94
+ bat_update = true;
95
+ /*
96
+ * Since we just allocated a block, file_offset is the
97
+ * beginning of the payload block. It needs to be the
98
+ * write address, which includes the offset into the
99
+ * block, unless the entire block needs to read as
100
+ * zeroes but truncation was not able to provide them,
101
+ * in which case we need to fill in the rest.
102
+ */
103
+ if (!use_zero_buffers) {
104
+ sinfo.file_offset += sinfo.block_offset;
105
+ } else {
106
/* zero fill the front, if any */
107
if (sinfo.block_offset) {
108
iov1.iov_len = sinfo.block_offset;
109
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
110
}
111
112
/* our actual data */
113
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
114
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
115
sinfo.bytes_avail);
116
117
/* zero fill the back, if any */
118
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
119
sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS;
120
}
121
}
122
- /* fall through */
123
- case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
124
- case PAYLOAD_BLOCK_UNMAPPED:
125
- case PAYLOAD_BLOCK_UNMAPPED_v095:
126
- case PAYLOAD_BLOCK_UNDEFINED:
127
- bat_prior_offset = sinfo.file_offset;
128
- ret = vhdx_allocate_block(bs, s, &sinfo.file_offset);
129
- if (ret < 0) {
130
- goto exit;
131
- }
132
- /* once we support differencing files, this may also be
133
- * partially present */
134
- /* update block state to the newly specified state */
135
- vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry,
136
- &bat_entry_offset,
137
- PAYLOAD_BLOCK_FULLY_PRESENT);
138
- bat_update = true;
139
- /* since we just allocated a block, file_offset is the
140
- * beginning of the payload block. It needs to be the
141
- * write address, which includes the offset into the block */
142
- if (!use_zero_buffers) {
143
- sinfo.file_offset += sinfo.block_offset;
144
- }
145
+
146
/* fall through */
147
case PAYLOAD_BLOCK_FULLY_PRESENT:
148
/* if the file offset address is in the header zone,
149
--
104
--
150
2.25.3
105
2.48.1
151
152
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Adaptive polling has a big problem: It doesn't consider that an event
2
loop can wait for many different events that may have very different
3
typical latencies.
2
4
3
The parallels driver tries to use truncation for image growth, but can
5
For example, think of a guest that tends to send a new I/O request soon
4
only do so when reads are guaranteed as zero. Now that we have a way
6
after the previous I/O request completes, but the storage on the host is
5
to request zero contents from truncation, we can defer the decision to
7
rather slow. In this case, getting the new request from guest quickly
6
actual allocation attempts rather than up front, reducing the number
8
means that polling is enabled, but the next thing is performing the I/O
7
of places that still use bdrv_has_zero_init_truncate.
9
request on the backend, which is slow and disables polling again for the
10
next guest request. This means that in such a scenario, polling could
11
help for every other event, but is only ever enabled when it can't
12
succeed.
8
13
9
Signed-off-by: Eric Blake <eblake@redhat.com>
14
In order to fix this, keep a separate AioPolledEvent for each
10
Message-Id: <20200428202905.770727-8-eblake@redhat.com>
15
AioHandler. We will then know that the backend file descriptor always
11
Reviewed-by: Denis V. Lunev <den@openvz.org>
16
has a high latency and isn't worth polling for, but we also know that
17
the guest is always fast and we should poll for it. This solves at least
18
half of the problem, we can now keep polling for those cases where it
19
makes sense and get the improved performance from it.
20
21
Since the event loop doesn't know which event will be next, we still do
22
some unnecessary polling while we're waiting for the slow disk. I made
23
some attempts to be more clever than just randomly growing and shrinking
24
the polling time, and even to let callers be explicit about when they
25
expect a new event, but so far this hasn't resulted in improved
26
performance or even caused performance regressions. For now, let's just
27
fix the part that is easy enough to fix, we can revisit the rest later.
28
29
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
30
Message-ID: <20250307221634.71951-6-kwolf@redhat.com>
31
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
32
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
33
---
14
block/parallels.c | 25 ++++++++++++++++---------
34
include/block/aio.h | 1 -
15
1 file changed, 16 insertions(+), 9 deletions(-)
35
util/aio-posix.h | 1 +
36
util/aio-posix.c | 26 ++++++++++++++++++++++----
37
util/async.c | 2 --
38
4 files changed, 23 insertions(+), 7 deletions(-)
16
39
17
diff --git a/block/parallels.c b/block/parallels.c
40
diff --git a/include/block/aio.h b/include/block/aio.h
18
index XXXXXXX..XXXXXXX 100644
41
index XXXXXXX..XXXXXXX 100644
19
--- a/block/parallels.c
42
--- a/include/block/aio.h
20
+++ b/block/parallels.c
43
+++ b/include/block/aio.h
21
@@ -XXX,XX +XXX,XX @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
44
@@ -XXX,XX +XXX,XX @@ struct AioContext {
22
static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
45
int poll_disable_cnt;
23
int nb_sectors, int *pnum)
46
47
/* Polling mode parameters */
48
- AioPolledEvent poll;
49
int64_t poll_max_ns; /* maximum polling time in nanoseconds */
50
int64_t poll_grow; /* polling time growth factor */
51
int64_t poll_shrink; /* polling time shrink factor */
52
diff --git a/util/aio-posix.h b/util/aio-posix.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/util/aio-posix.h
55
+++ b/util/aio-posix.h
56
@@ -XXX,XX +XXX,XX @@ struct AioHandler {
57
#endif
58
int64_t poll_idle_timeout; /* when to stop userspace polling */
59
bool poll_ready; /* has polling detected an event? */
60
+ AioPolledEvent poll;
61
};
62
63
/* Add a handler to a ready list */
64
diff --git a/util/aio-posix.c b/util/aio-posix.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/util/aio-posix.c
67
+++ b/util/aio-posix.c
68
@@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list,
69
static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
70
int64_t *timeout)
24
{
71
{
25
- int ret;
72
+ AioHandler *node;
26
+ int ret = 0;
73
int64_t max_ns;
27
BDRVParallelsState *s = bs->opaque;
74
28
int64_t pos, space, idx, to_allocate, i, len;
75
if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
29
76
return false;
30
@@ -XXX,XX +XXX,XX @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
31
}
77
}
32
if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) {
78
33
space += s->prealloc_size;
79
- max_ns = qemu_soonest_timeout(*timeout, ctx->poll.ns);
34
+ /*
80
+ max_ns = 0;
35
+ * We require the expanded size to read back as zero. If the
81
+ QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
36
+ * user permitted truncation, we try that; but if it fails, we
82
+ max_ns = MAX(max_ns, node->poll.ns);
37
+ * force the safer-but-slower fallocate.
83
+ }
38
+ */
84
+ max_ns = qemu_soonest_timeout(*timeout, max_ns);
39
+ if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) {
85
+
40
+ ret = bdrv_truncate(bs->file,
86
if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
41
+ (s->data_end + space) << BDRV_SECTOR_BITS,
87
/*
42
+ false, PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE,
88
* Enable poll mode. It pairs with the poll_set_started() in
43
+ NULL);
89
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
44
+ if (ret == -ENOTSUP) {
90
45
+ s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
91
/* Adjust polling time */
92
if (ctx->poll_max_ns) {
93
+ AioHandler *node;
94
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
95
- adjust_polling_time(ctx, &ctx->poll, block_ns);
96
+
97
+ QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
98
+ if (QLIST_IS_INSERTED(node, node_ready)) {
99
+ adjust_polling_time(ctx, &node->poll, block_ns);
46
+ }
100
+ }
47
+ }
101
+ }
48
if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
49
ret = bdrv_pwrite_zeroes(bs->file,
50
s->data_end << BDRV_SECTOR_BITS,
51
space << BDRV_SECTOR_BITS, 0);
52
- } else {
53
- ret = bdrv_truncate(bs->file,
54
- (s->data_end + space) << BDRV_SECTOR_BITS,
55
- false, PREALLOC_MODE_OFF, 0, NULL);
56
}
57
if (ret < 0) {
58
return ret;
59
@@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
60
qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
61
s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
62
buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
63
+ /* prealloc_mode can be downgraded later during allocate_clusters */
64
s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf,
65
PRL_PREALLOC_MODE_FALLOCATE,
66
&local_err);
67
@@ -XXX,XX +XXX,XX @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
68
goto fail_options;
69
}
102
}
70
103
71
- if (!bdrv_has_zero_init_truncate(bs->file->bs)) {
104
progress |= aio_bh_poll(ctx);
72
- s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
105
@@ -XXX,XX +XXX,XX @@ void aio_context_use_g_source(AioContext *ctx)
73
- }
106
void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
107
int64_t grow, int64_t shrink, Error **errp)
108
{
109
+ AioHandler *node;
110
+
111
+ qemu_lockcnt_inc(&ctx->list_lock);
112
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
113
+ node->poll.ns = 0;
114
+ }
115
+ qemu_lockcnt_dec(&ctx->list_lock);
116
+
117
/* No thread synchronization here, it doesn't matter if an incorrect value
118
* is used once.
119
*/
120
- ctx->poll.ns = 0;
74
-
121
-
75
if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) {
122
ctx->poll_max_ns = max_ns;
76
s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
123
ctx->poll_grow = grow;
77
ret = parallels_update_header(bs);
124
ctx->poll_shrink = shrink;
125
diff --git a/util/async.c b/util/async.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/util/async.c
128
+++ b/util/async.c
129
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
130
qemu_rec_mutex_init(&ctx->lock);
131
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
132
133
- ctx->poll.ns = 0;
134
-
135
ctx->poll_max_ns = 0;
136
ctx->poll_grow = 0;
137
ctx->poll_shrink = 0;
78
--
138
--
79
2.25.3
139
2.48.1
80
81
diff view generated by jsdifflib
1
bdrv_get_device_name() will be an empty string with modern management
1
aio_dispatch_handler() adds handlers to ctx->poll_aio_handlers if
2
tools that don't use -drive. Use bdrv_get_device_or_node_name() instead
2
polling should be enabled. If we call adjust_polling_time() for all
3
so that the node name is used if the BlockBackend is anonymous.
3
polling handlers before this, new polling handlers are still left at
4
poll->ns = 0 and polling is only actually enabled after the next event.
5
Move the adjust_polling_time() call after aio_dispatch_handler().
4
6
5
While at it, start with upper case to make the message consistent with
7
This fixes test-nested-aio-poll, which expects that polling becomes
6
the rest of the function.
8
effective the first time around.
7
9
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Message-ID: <20250311141912.135657-1-kwolf@redhat.com>
10
Reviewed-by: Alberto Garcia <berto@igalia.com>
11
Message-Id: <20200430142755.315494-3-kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
13
---
14
block/backup.c | 4 ++--
14
util/aio-posix.c | 28 +++++++++++++++++-----------
15
1 file changed, 2 insertions(+), 2 deletions(-)
15
1 file changed, 17 insertions(+), 11 deletions(-)
16
16
17
diff --git a/block/backup.c b/block/backup.c
17
diff --git a/util/aio-posix.c b/util/aio-posix.c
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/backup.c
19
--- a/util/aio-posix.c
20
+++ b/block/backup.c
20
+++ b/util/aio-posix.c
21
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
21
@@ -XXX,XX +XXX,XX @@
22
22
/* Stop userspace polling on a handler if it isn't active for some time */
23
len = bdrv_getlength(bs);
23
#define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
24
if (len < 0) {
24
25
- error_setg_errno(errp, -len, "unable to get length for '%s'",
25
+static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
26
- bdrv_get_device_name(bs));
26
+ int64_t block_ns);
27
+ error_setg_errno(errp, -len, "Unable to get length for '%s'",
27
+
28
+ bdrv_get_device_or_node_name(bs));
28
bool aio_poll_disabled(AioContext *ctx)
29
goto error;
29
{
30
return qatomic_read(&ctx->poll_disable_cnt);
31
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
32
* scanning all handlers with aio_dispatch_handlers().
33
*/
34
static bool aio_dispatch_ready_handlers(AioContext *ctx,
35
- AioHandlerList *ready_list)
36
+ AioHandlerList *ready_list,
37
+ int64_t block_ns)
38
{
39
bool progress = false;
40
AioHandler *node;
41
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_ready_handlers(AioContext *ctx,
42
while ((node = QLIST_FIRST(ready_list))) {
43
QLIST_REMOVE(node, node_ready);
44
progress = aio_dispatch_handler(ctx, node) || progress;
45
+
46
+ /*
47
+ * Adjust polling time only after aio_dispatch_handler(), which can
48
+ * add the handler to ctx->poll_aio_handlers.
49
+ */
50
+ if (ctx->poll_max_ns && QLIST_IS_INSERTED(node, node_poll)) {
51
+ adjust_polling_time(ctx, &node->poll, block_ns);
52
+ }
30
}
53
}
31
54
55
return progress;
56
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
57
bool use_notify_me;
58
int64_t timeout;
59
int64_t start = 0;
60
+ int64_t block_ns = 0;
61
62
/*
63
* There cannot be two concurrent aio_poll calls for the same AioContext (or
64
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
65
66
aio_notify_accept(ctx);
67
68
- /* Adjust polling time */
69
+ /* Calculate blocked time for adaptive polling */
70
if (ctx->poll_max_ns) {
71
- AioHandler *node;
72
- int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
73
-
74
- QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
75
- if (QLIST_IS_INSERTED(node, node_ready)) {
76
- adjust_polling_time(ctx, &node->poll, block_ns);
77
- }
78
- }
79
+ block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
80
}
81
82
progress |= aio_bh_poll(ctx);
83
- progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
84
+ progress |= aio_dispatch_ready_handlers(ctx, &ready_list, block_ns);
85
86
aio_free_deleted_handlers(ctx);
87
32
--
88
--
33
2.25.3
89
2.48.1
34
35
diff view generated by jsdifflib
1
If a cluster is already zeroed, we don't have to call vmdk_L2update(),
1
From: Thomas Huth <thuth@redhat.com>
2
which is rather slow because it flushes the image file.
3
2
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
qsd-migrate is currently only working for raw, qcow2 and qed.
5
Message-Id: <20200430133007.170335-5-kwolf@redhat.com>
4
Other formats are failing, e.g. because they don't support migration.
6
Reviewed-by: Eric Blake <eblake@redhat.com>
5
Thus let's limit this test to the three usable formats now.
6
7
Suggested-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
9
Message-ID: <20250224214058.205889-1-thuth@redhat.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
12
---
9
block/vmdk.c | 2 +-
13
tests/qemu-iotests/tests/qsd-migrate | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
14
1 file changed, 1 insertion(+), 1 deletion(-)
11
15
12
diff --git a/block/vmdk.c b/block/vmdk.c
16
diff --git a/tests/qemu-iotests/tests/qsd-migrate b/tests/qemu-iotests/tests/qsd-migrate
13
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100755
14
--- a/block/vmdk.c
18
--- a/tests/qemu-iotests/tests/qsd-migrate
15
+++ b/block/vmdk.c
19
+++ b/tests/qemu-iotests/tests/qsd-migrate
16
@@ -XXX,XX +XXX,XX @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
20
@@ -XXX,XX +XXX,XX @@ import iotests
17
offset_in_cluster == 0 &&
21
18
n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) {
22
from iotests import filter_qemu_io, filter_qtest
19
n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE;
23
20
- if (!zero_dry_run) {
24
-iotests.script_initialize(supported_fmts=['generic'],
21
+ if (!zero_dry_run && ret != VMDK_ZEROED) {
25
+iotests.script_initialize(supported_fmts=['qcow2', 'qed', 'raw'],
22
/* update L2 tables */
26
supported_protocols=['file'],
23
if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
27
supported_platforms=['linux'])
24
!= VMDK_OK) {
28
25
--
29
--
26
2.25.3
30
2.48.1
27
28
diff view generated by jsdifflib
1
The test case forgot to specify the null-co size for the target node.
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
When adding a check to backup that both sizes match, this would fail
3
because of the size mismatch and not the behaviour that the test really
4
wanted to test.
5
2
6
Fixes: a541fcc27c98b96da187c7d4573f3270f3ddd283
3
Commit 71544d30a6f8 ("scsi: push request restart to SCSIDevice") removed
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
the only user of SCSIDiskState->bh.
8
Message-Id: <20200430142755.315494-2-kwolf@redhat.com>
5
9
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
Message-ID: <20250311132616.1049687-2-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
11
---
12
tests/qemu-iotests/283 | 6 +++++-
12
hw/scsi/scsi-disk.c | 1 -
13
tests/qemu-iotests/283.out | 2 +-
13
1 file changed, 1 deletion(-)
14
2 files changed, 6 insertions(+), 2 deletions(-)
15
14
16
diff --git a/tests/qemu-iotests/283 b/tests/qemu-iotests/283
15
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/tests/qemu-iotests/283
17
--- a/hw/scsi/scsi-disk.c
19
+++ b/tests/qemu-iotests/283
18
+++ b/hw/scsi/scsi-disk.c
20
@@ -XXX,XX +XXX,XX @@ to check that crash is fixed :)
19
@@ -XXX,XX +XXX,XX @@ struct SCSIDiskState {
21
vm = iotests.VM()
20
uint64_t max_unmap_size;
22
vm.launch()
21
uint64_t max_io_size;
23
22
uint32_t quirks;
24
-vm.qmp_log('blockdev-add', **{'node-name': 'target', 'driver': 'null-co'})
23
- QEMUBH *bh;
25
+vm.qmp_log('blockdev-add', **{
24
char *version;
26
+ 'node-name': 'target',
25
char *serial;
27
+ 'driver': 'null-co',
26
char *vendor;
28
+ 'size': size,
29
+})
30
31
vm.qmp_log('blockdev-add', **{
32
'node-name': 'source',
33
diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tests/qemu-iotests/283.out
36
+++ b/tests/qemu-iotests/283.out
37
@@ -XXX,XX +XXX,XX @@
38
-{"execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "target"}}
39
+{"execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "target", "size": 1048576}}
40
{"return": {}}
41
{"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": {"driver": "null-co", "node-name": "base", "size": 1048576}, "node-name": "source"}}
42
{"return": {}}
43
--
27
--
44
2.25.3
28
2.48.1
45
29
46
30
diff view generated by jsdifflib
1
In order to avoid bitrot in the zero cluster code in VMDK, enable
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
zeroed_grain=on by default for the tests.
3
2
4
059 now unsets the default options because zeroed_grain=on works only
3
In the past a single AioContext was used for block I/O and it was
5
with some subformats and the test case tests many different subformats,
4
fetched using blk_get_aio_context(). Nowadays the block layer supports
6
including those for which it doesn't work.
5
running I/O from any AioContext and multiple AioContexts at the same
6
time. Remove the dma_blk_io() AioContext argument and use the current
7
AioContext instead.
7
8
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
This makes calling the function easier and enables multiple IOThreads to
9
Message-Id: <20200430133007.170335-7-kwolf@redhat.com>
10
use dma_blk_io() concurrently for the same block device.
10
Reviewed-by: Eric Blake <eblake@redhat.com>
11
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
14
Message-ID: <20250311132616.1049687-3-stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
16
---
13
tests/qemu-iotests/059 | 6 +++---
17
include/system/dma.h | 3 +--
14
tests/qemu-iotests/check | 3 +++
18
hw/ide/core.c | 3 +--
15
2 files changed, 6 insertions(+), 3 deletions(-)
19
hw/ide/macio.c | 3 +--
20
hw/scsi/scsi-disk.c | 6 ++----
21
system/dma-helpers.c | 8 ++++----
22
5 files changed, 9 insertions(+), 14 deletions(-)
16
23
17
diff --git a/tests/qemu-iotests/059 b/tests/qemu-iotests/059
24
diff --git a/include/system/dma.h b/include/system/dma.h
18
index XXXXXXX..XXXXXXX 100755
25
index XXXXXXX..XXXXXXX 100644
19
--- a/tests/qemu-iotests/059
26
--- a/include/system/dma.h
20
+++ b/tests/qemu-iotests/059
27
+++ b/include/system/dma.h
21
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
28
@@ -XXX,XX +XXX,XX @@ typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov,
22
_supported_fmt vmdk
29
BlockCompletionFunc *cb, void *cb_opaque,
23
_supported_proto file
30
void *opaque);
24
_supported_os Linux
31
25
-_unsupported_imgopts "subformat=monolithicFlat" \
32
-BlockAIOCB *dma_blk_io(AioContext *ctx,
26
- "subformat=twoGbMaxExtentFlat" \
33
- QEMUSGList *sg, uint64_t offset, uint32_t align,
27
- "subformat=twoGbMaxExtentSparse"
34
+BlockAIOCB *dma_blk_io(QEMUSGList *sg, uint64_t offset, uint32_t align,
28
+
35
DMAIOFunc *io_func, void *io_func_opaque,
29
+# We test all kinds of VMDK options here, so ignore user-specified options
36
BlockCompletionFunc *cb, void *opaque, DMADirection dir);
30
+IMGOPTS=""
37
BlockAIOCB *dma_blk_read(BlockBackend *blk,
31
38
diff --git a/hw/ide/core.c b/hw/ide/core.c
32
capacity_offset=16
39
index XXXXXXX..XXXXXXX 100644
33
granularity_offset=20
40
--- a/hw/ide/core.c
34
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
41
+++ b/hw/ide/core.c
35
index XXXXXXX..XXXXXXX 100755
42
@@ -XXX,XX +XXX,XX @@ static void ide_dma_cb(void *opaque, int ret)
36
--- a/tests/qemu-iotests/check
43
BDRV_SECTOR_SIZE, ide_dma_cb, s);
37
+++ b/tests/qemu-iotests/check
44
break;
38
@@ -XXX,XX +XXX,XX @@ fi
45
case IDE_DMA_TRIM:
39
if [ "$IMGFMT" == "luks" ] && ! (echo "$IMGOPTS" | grep "iter-time=" > /dev/null); then
46
- s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk),
40
IMGOPTS=$(_optstr_add "$IMGOPTS" "iter-time=10")
47
- &s->sg, offset, BDRV_SECTOR_SIZE,
41
fi
48
+ s->bus->dma->aiocb = dma_blk_io(&s->sg, offset, BDRV_SECTOR_SIZE,
42
+if [ "$IMGFMT" == "vmdk" ] && ! (echo "$IMGOPTS" | grep "zeroed_grain=" > /dev/null); then
49
ide_issue_trim, s, ide_dma_cb, s,
43
+ IMGOPTS=$(_optstr_add "$IMGOPTS" "zeroed_grain=on")
50
DMA_DIRECTION_TO_DEVICE);
44
+fi
51
break;
45
52
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
46
if [ -z "$SAMPLE_IMG_DIR" ]; then
53
index XXXXXXX..XXXXXXX 100644
47
SAMPLE_IMG_DIR="$source_iotests/sample_images"
54
--- a/hw/ide/macio.c
55
+++ b/hw/ide/macio.c
56
@@ -XXX,XX +XXX,XX @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
57
pmac_ide_transfer_cb, io);
58
break;
59
case IDE_DMA_TRIM:
60
- s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk), &s->sg,
61
- offset, 0x1, ide_issue_trim, s,
62
+ s->bus->dma->aiocb = dma_blk_io(&s->sg, offset, 0x1, ide_issue_trim, s,
63
pmac_ide_transfer_cb, io,
64
DMA_DIRECTION_TO_DEVICE);
65
break;
66
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/hw/scsi/scsi-disk.c
69
+++ b/hw/scsi/scsi-disk.c
70
@@ -XXX,XX +XXX,XX @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
71
if (r->req.sg) {
72
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
73
r->req.residual -= r->req.sg->size;
74
- r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
75
- r->req.sg, r->sector << BDRV_SECTOR_BITS,
76
+ r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
77
BDRV_SECTOR_SIZE,
78
sdc->dma_readv, r, scsi_dma_complete, r,
79
DMA_DIRECTION_FROM_DEVICE);
80
@@ -XXX,XX +XXX,XX @@ static void scsi_write_data(SCSIRequest *req)
81
if (r->req.sg) {
82
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
83
r->req.residual -= r->req.sg->size;
84
- r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
85
- r->req.sg, r->sector << BDRV_SECTOR_BITS,
86
+ r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
87
BDRV_SECTOR_SIZE,
88
sdc->dma_writev, r, scsi_dma_complete, r,
89
DMA_DIRECTION_TO_DEVICE);
90
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/system/dma-helpers.c
93
+++ b/system/dma-helpers.c
94
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo dma_aiocb_info = {
95
.cancel_async = dma_aio_cancel,
96
};
97
98
-BlockAIOCB *dma_blk_io(AioContext *ctx,
99
+BlockAIOCB *dma_blk_io(
100
QEMUSGList *sg, uint64_t offset, uint32_t align,
101
DMAIOFunc *io_func, void *io_func_opaque,
102
BlockCompletionFunc *cb,
103
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *dma_blk_io(AioContext *ctx,
104
105
dbs->acb = NULL;
106
dbs->sg = sg;
107
- dbs->ctx = ctx;
108
+ dbs->ctx = qemu_get_current_aio_context();
109
dbs->offset = offset;
110
dbs->align = align;
111
dbs->sg_cur_index = 0;
112
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *dma_blk_read(BlockBackend *blk,
113
QEMUSGList *sg, uint64_t offset, uint32_t align,
114
void (*cb)(void *opaque, int ret), void *opaque)
115
{
116
- return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
117
+ return dma_blk_io(sg, offset, align,
118
dma_blk_read_io_func, blk, cb, opaque,
119
DMA_DIRECTION_FROM_DEVICE);
120
}
121
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *dma_blk_write(BlockBackend *blk,
122
QEMUSGList *sg, uint64_t offset, uint32_t align,
123
void (*cb)(void *opaque, int ret), void *opaque)
124
{
125
- return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
126
+ return dma_blk_io(sg, offset, align,
127
dma_blk_write_io_func, blk, cb, opaque,
128
DMA_DIRECTION_TO_DEVICE);
129
}
48
--
130
--
49
2.25.3
131
2.48.1
50
51
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Our .bdrv_has_zero_init_truncate always returns 1 because rbd always
3
Until now, a SCSIDevice's I/O requests have run in a single AioContext.
4
0-fills; we can use that same knowledge to implement
4
In order to support multiple IOThreads it will be necessary to move to
5
BDRV_REQ_ZERO_WRITE by ignoring it.
5
the concept of a per-SCSIRequest AioContext.
6
6
7
Signed-off-by: Eric Blake <eblake@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-Id: <20200428202905.770727-5-eblake@redhat.com>
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
Message-ID: <20250311132616.1049687-4-stefanha@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
11
---
11
block/rbd.c | 3 +++
12
include/hw/scsi/scsi.h | 1 +
12
1 file changed, 3 insertions(+)
13
hw/scsi/scsi-bus.c | 1 +
14
hw/scsi/scsi-disk.c | 17 ++++++-----------
15
3 files changed, 8 insertions(+), 11 deletions(-)
13
16
14
diff --git a/block/rbd.c b/block/rbd.c
17
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/block/rbd.c
19
--- a/include/hw/scsi/scsi.h
17
+++ b/block/rbd.c
20
+++ b/include/hw/scsi/scsi.h
18
@@ -XXX,XX +XXX,XX @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
21
@@ -XXX,XX +XXX,XX @@ struct SCSIRequest {
22
SCSIBus *bus;
23
SCSIDevice *dev;
24
const SCSIReqOps *ops;
25
+ AioContext *ctx;
26
uint32_t refcount;
27
uint32_t tag;
28
uint32_t lun;
29
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/hw/scsi/scsi-bus.c
32
+++ b/hw/scsi/scsi-bus.c
33
@@ -XXX,XX +XXX,XX @@ invalid_opcode:
19
}
34
}
20
}
35
}
21
36
22
+ /* When extending regular files, we get zeros from the OS */
37
+ req->ctx = qemu_get_current_aio_context();
23
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
38
req->cmd = cmd;
24
+
39
req->residual = req->cmd.xfer;
25
r = 0;
40
26
goto out;
41
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
27
42
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/scsi/scsi-disk.c
44
+++ b/hw/scsi/scsi-disk.c
45
@@ -XXX,XX +XXX,XX @@ static void scsi_aio_complete(void *opaque, int ret)
46
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
47
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
48
49
- /* The request must only run in the BlockBackend's AioContext */
50
- assert(blk_get_aio_context(s->qdev.conf.blk) ==
51
- qemu_get_current_aio_context());
52
+ /* The request must run in its AioContext */
53
+ assert(r->req.ctx == qemu_get_current_aio_context());
54
55
assert(r->req.aiocb != NULL);
56
r->req.aiocb = NULL;
57
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_complete(void *opaque, int ret)
58
59
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
60
{
61
- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
62
uint32_t n;
63
64
- /* The request must only run in the BlockBackend's AioContext */
65
- assert(blk_get_aio_context(s->qdev.conf.blk) ==
66
- qemu_get_current_aio_context());
67
+ /* The request must run in its AioContext */
68
+ assert(r->req.ctx == qemu_get_current_aio_context());
69
70
assert(r->req.aiocb == NULL);
71
if (scsi_disk_req_check_error(r, ret, ret > 0)) {
72
@@ -XXX,XX +XXX,XX @@ static void scsi_read_data(SCSIRequest *req)
73
74
static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
75
{
76
- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
77
uint32_t n;
78
79
- /* The request must only run in the BlockBackend's AioContext */
80
- assert(blk_get_aio_context(s->qdev.conf.blk) ==
81
- qemu_get_current_aio_context());
82
+ /* The request must run in its AioContext */
83
+ assert(r->req.ctx == qemu_get_current_aio_context());
84
85
assert (r->req.aiocb == NULL);
86
if (scsi_disk_req_check_error(r, ret, ret > 0)) {
28
--
87
--
29
2.25.3
88
2.48.1
30
31
diff view generated by jsdifflib
1
m_data must contain valid data even for zero clusters when no cluster
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
was allocated in the image file. Without this, zero writes segfault with
3
images that have zeroed_grain=on.
4
2
5
For zero writes, we don't want to allocate a cluster in the image file
3
SCSIDevice keeps track of in-flight requests for device reset and Task
6
even in compressed files.
4
Management Functions (TMFs). The request list requires protection so
5
that multi-threaded SCSI emulation can be implemented in commits that
6
follow.
7
7
8
Fixes: 524089bce43fd1cd3daaca979872451efa2cf7c6
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Message-Id: <20200430133007.170335-3-kwolf@redhat.com>
10
Message-ID: <20250311132616.1049687-5-stefanha@redhat.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
12
---
14
block/vmdk.c | 12 +++++++-----
13
include/hw/scsi/scsi.h | 7 ++-
15
1 file changed, 7 insertions(+), 5 deletions(-)
14
hw/scsi/scsi-bus.c | 120 +++++++++++++++++++++++++++++------------
15
2 files changed, 88 insertions(+), 39 deletions(-)
16
16
17
diff --git a/block/vmdk.c b/block/vmdk.c
17
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/vmdk.c
19
--- a/include/hw/scsi/scsi.h
20
+++ b/block/vmdk.c
20
+++ b/include/hw/scsi/scsi.h
21
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
21
@@ -XXX,XX +XXX,XX @@ struct SCSIRequest {
22
extent->l2_cache_counts[min_index] = 1;
22
bool dma_started;
23
found:
23
BlockAIOCB *aiocb;
24
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
24
QEMUSGList *sg;
25
+ if (m_data) {
25
+
26
+ m_data->l1_index = l1_index;
26
+ /* Protected by SCSIDevice->requests_lock */
27
+ m_data->l2_index = l2_index;
27
QTAILQ_ENTRY(SCSIRequest) next;
28
+ m_data->l2_offset = l2_offset;
28
};
29
+ m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
29
30
@@ -XXX,XX +XXX,XX @@ struct SCSIDevice
31
uint8_t sense[SCSI_SENSE_BUF_SIZE];
32
uint32_t sense_len;
33
34
- /*
35
- * The requests list is only accessed from the AioContext that executes
36
- * requests or from the main loop when IOThread processing is stopped.
37
- */
38
+ QemuMutex requests_lock; /* protects the requests list */
39
QTAILQ_HEAD(, SCSIRequest) requests;
40
41
uint32_t channel;
42
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/hw/scsi/scsi-bus.c
45
+++ b/hw/scsi/scsi-bus.c
46
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_sync(SCSIDevice *s,
47
assert(!runstate_is_running());
48
assert(qemu_in_main_thread());
49
50
- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
51
- fn(req, opaque);
52
+ /*
53
+ * Locking is not necessary because the guest is stopped and no other
54
+ * threads can be accessing the requests list, but take the lock for
55
+ * consistency.
56
+ */
57
+ WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
58
+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
59
+ fn(req, opaque);
60
+ }
61
}
62
}
63
64
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async_bh(void *opaque)
65
{
66
g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
67
SCSIDevice *s = data->s;
68
- AioContext *ctx;
69
- SCSIRequest *req;
70
- SCSIRequest *next;
71
+ g_autoptr(GList) reqs = NULL;
72
73
/*
74
- * The BB cannot have changed contexts between this BH being scheduled and
75
- * now: BBs' AioContexts, when they have a node attached, can only be
76
- * changed via bdrv_try_change_aio_context(), in a drained section. While
77
- * we have the in-flight counter incremented, that drain must block.
78
+ * Build a list of requests in this AioContext so fn() can be invoked later
79
+ * outside requests_lock.
80
*/
81
- ctx = blk_get_aio_context(s->conf.blk);
82
- assert(ctx == qemu_get_current_aio_context());
83
+ WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
84
+ AioContext *ctx = qemu_get_current_aio_context();
85
+ SCSIRequest *req;
86
+ SCSIRequest *next;
87
+
88
+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
89
+ if (req->ctx == ctx) {
90
+ scsi_req_ref(req); /* dropped after calling fn() */
91
+ reqs = g_list_prepend(reqs, req);
92
+ }
93
+ }
30
+ }
94
+ }
31
95
32
if (extent->sesparse) {
96
- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
33
cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
97
- data->fn(req, data->fn_opaque);
34
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
98
+ /* Call fn() on each request */
35
}
99
+ for (GList *elem = g_list_first(reqs); elem; elem = g_list_next(elem)) {
36
if (m_data) {
100
+ data->fn(elem->data, data->fn_opaque);
37
m_data->new_allocation = true;
101
+ scsi_req_unref(elem->data);
38
- m_data->l1_index = l1_index;
102
}
39
- m_data->l2_index = l2_index;
103
40
- m_data->l2_offset = l2_offset;
104
/* Drop the reference taken by scsi_device_for_each_req_async() */
41
- m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
105
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async_bh(void *opaque)
42
}
106
blk_dec_in_flight(s->conf.blk);
43
}
107
}
44
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
108
45
@@ -XXX,XX +XXX,XX @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
109
+static void scsi_device_for_each_req_async_do_ctx(gpointer key, gpointer value,
46
error_report("Could not write to allocated cluster"
110
+ gpointer user_data)
47
" for streamOptimized");
111
+{
48
return -EIO;
112
+ AioContext *ctx = key;
49
- } else {
113
+ SCSIDeviceForEachReqAsyncData *params = user_data;
50
+ } else if (!zeroed) {
114
+ SCSIDeviceForEachReqAsyncData *data;
51
/* allocate */
115
+
52
ret = get_cluster_offset(bs, extent, &m_data, offset,
116
+ data = g_new(SCSIDeviceForEachReqAsyncData, 1);
53
true, &cluster_offset, 0, 0);
117
+ data->s = params->s;
118
+ data->fn = params->fn;
119
+ data->fn_opaque = params->fn_opaque;
120
+
121
+ /*
122
+ * Hold a reference to the SCSIDevice until
123
+ * scsi_device_for_each_req_async_bh() finishes.
124
+ */
125
+ object_ref(OBJECT(data->s));
126
+
127
+ /* Paired with scsi_device_for_each_req_async_bh() */
128
+ blk_inc_in_flight(data->s->conf.blk);
129
+
130
+ aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, data);
131
+}
132
+
133
/*
134
* Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
135
- * runs in the AioContext that is executing the request.
136
+ * must be thread-safe because it runs concurrently in each AioContext that is
137
+ * executing a request.
138
+ *
139
* Keeps the BlockBackend's in-flight counter incremented until everything is
140
* done, so draining it will settle all scheduled @fn() calls.
141
*/
142
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async(SCSIDevice *s,
143
{
144
assert(qemu_in_main_thread());
145
146
- SCSIDeviceForEachReqAsyncData *data =
147
- g_new(SCSIDeviceForEachReqAsyncData, 1);
148
-
149
- data->s = s;
150
- data->fn = fn;
151
- data->fn_opaque = opaque;
152
-
153
- /*
154
- * Hold a reference to the SCSIDevice until
155
- * scsi_device_for_each_req_async_bh() finishes.
156
- */
157
- object_ref(OBJECT(s));
158
+ /* The set of AioContexts where the requests are being processed */
159
+ g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
160
+ WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
161
+ SCSIRequest *req;
162
+ QTAILQ_FOREACH(req, &s->requests, next) {
163
+ g_hash_table_add(aio_contexts, req->ctx);
164
+ }
165
+ }
166
167
- /* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */
168
- blk_inc_in_flight(s->conf.blk);
169
- aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
170
- scsi_device_for_each_req_async_bh,
171
- data);
172
+ /* Schedule a BH for each AioContext */
173
+ SCSIDeviceForEachReqAsyncData params = {
174
+ .s = s,
175
+ .fn = fn,
176
+ .fn_opaque = opaque,
177
+ };
178
+ g_hash_table_foreach(
179
+ aio_contexts,
180
+ scsi_device_for_each_req_async_do_ctx,
181
+ &params
182
+ );
183
}
184
185
static void scsi_device_realize(SCSIDevice *s, Error **errp)
186
@@ -XXX,XX +XXX,XX @@ static void scsi_qdev_realize(DeviceState *qdev, Error **errp)
187
dev->lun = lun;
188
}
189
190
+ qemu_mutex_init(&dev->requests_lock);
191
QTAILQ_INIT(&dev->requests);
192
scsi_device_realize(dev, &local_err);
193
if (local_err) {
194
@@ -XXX,XX +XXX,XX @@ static void scsi_qdev_unrealize(DeviceState *qdev)
195
196
scsi_device_purge_requests(dev, SENSE_CODE(NO_SENSE));
197
198
+ qemu_mutex_destroy(&dev->requests_lock);
199
+
200
scsi_device_unrealize(dev);
201
202
blockdev_mark_auto_del(dev->conf.blk);
203
@@ -XXX,XX +XXX,XX @@ static void scsi_req_enqueue_internal(SCSIRequest *req)
204
req->sg = NULL;
205
}
206
req->enqueued = true;
207
- QTAILQ_INSERT_TAIL(&req->dev->requests, req, next);
208
+
209
+ WITH_QEMU_LOCK_GUARD(&req->dev->requests_lock) {
210
+ QTAILQ_INSERT_TAIL(&req->dev->requests, req, next);
211
+ }
212
}
213
214
int32_t scsi_req_enqueue(SCSIRequest *req)
215
@@ -XXX,XX +XXX,XX @@ static void scsi_req_dequeue(SCSIRequest *req)
216
trace_scsi_req_dequeue(req->dev->id, req->lun, req->tag);
217
req->retry = false;
218
if (req->enqueued) {
219
- QTAILQ_REMOVE(&req->dev->requests, req, next);
220
+ WITH_QEMU_LOCK_GUARD(&req->dev->requests_lock) {
221
+ QTAILQ_REMOVE(&req->dev->requests, req, next);
222
+ }
223
req->enqueued = false;
224
scsi_req_unref(req);
225
}
226
@@ -XXX,XX +XXX,XX @@ static void scsi_device_class_init(ObjectClass *klass, void *data)
227
228
static void scsi_dev_instance_init(Object *obj)
229
{
230
- DeviceState *dev = DEVICE(obj);
231
- SCSIDevice *s = SCSI_DEVICE(dev);
232
+ SCSIDevice *s = SCSI_DEVICE(obj);
233
234
device_add_bootindex_property(obj, &s->conf.bootindex,
235
"bootindex", NULL,
54
--
236
--
55
2.25.3
237
2.48.1
56
57
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Our .bdrv_has_zero_init_truncate returns 1 if we detect that the OS
3
Virtqueues are not thread-safe. Until now this was not a major issue
4
always 0-fills; we can use that same knowledge to implement
4
since all virtqueue processing happened in the same thread. The ctrl
5
BDRV_REQ_ZERO_WRITE by ignoring it when the OS gives it to us for
5
queue's Task Management Function (TMF) requests sometimes need the main
6
free.
6
loop, so a BH was used to schedule the virtqueue completion back in the
7
7
thread that has virtqueue access.
8
Signed-off-by: Eric Blake <eblake@redhat.com>
8
9
Message-Id: <20200428202905.770727-4-eblake@redhat.com>
9
When IOThread Virtqueue Mapping is introduced in later commits, event
10
and ctrl virtqueue accesses from other threads will become necessary.
11
Introduce an optional per-virtqueue lock so the event and ctrl
12
virtqueues can be protected in the commits that follow.
13
14
The addition of the ctrl virtqueue lock makes
15
virtio_scsi_complete_req_from_main_loop() and its BH unnecessary.
16
Instead, take the ctrl virtqueue lock from the main loop thread.
17
18
The cmd virtqueue does not have a lock because the entirety of SCSI
19
command processing happens in one thread. Only one thread accesses the
20
cmd virtqueue and a lock is unnecessary.
21
22
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
23
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
24
Message-ID: <20250311132616.1049687-6-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
25
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
26
---
12
block/nfs.c | 3 +++
27
include/hw/virtio/virtio-scsi.h | 3 ++
13
1 file changed, 3 insertions(+)
28
hw/scsi/virtio-scsi.c | 84 ++++++++++++++++++---------------
14
29
2 files changed, 49 insertions(+), 38 deletions(-)
15
diff --git a/block/nfs.c b/block/nfs.c
30
31
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
16
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
17
--- a/block/nfs.c
33
--- a/include/hw/virtio/virtio-scsi.h
18
+++ b/block/nfs.c
34
+++ b/include/hw/virtio/virtio-scsi.h
19
@@ -XXX,XX +XXX,XX @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
35
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
20
}
36
int resetting; /* written from main loop thread, read from any thread */
21
37
bool events_dropped;
22
bs->total_sectors = ret;
38
23
+ if (client->has_zero_init) {
39
+ QemuMutex ctrl_lock; /* protects ctrl_vq */
24
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
40
+ QemuMutex event_lock; /* protects event_vq */
25
+ }
41
+
42
/*
43
* TMFs deferred to main loop BH. These fields are protected by
44
* tmf_bh_lock.
45
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/hw/scsi/virtio-scsi.c
48
+++ b/hw/scsi/virtio-scsi.c
49
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_free_req(VirtIOSCSIReq *req)
50
g_free(req);
51
}
52
53
-static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
54
+static void virtio_scsi_complete_req(VirtIOSCSIReq *req, QemuMutex *vq_lock)
55
{
56
VirtIOSCSI *s = req->dev;
57
VirtQueue *vq = req->vq;
58
VirtIODevice *vdev = VIRTIO_DEVICE(s);
59
60
qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size);
61
+
62
+ if (vq_lock) {
63
+ qemu_mutex_lock(vq_lock);
64
+ }
65
+
66
virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size);
67
if (s->dataplane_started && !s->dataplane_fenced) {
68
virtio_notify_irqfd(vdev, vq);
69
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
70
virtio_notify(vdev, vq);
71
}
72
73
+ if (vq_lock) {
74
+ qemu_mutex_unlock(vq_lock);
75
+ }
76
+
77
if (req->sreq) {
78
req->sreq->hba_private = NULL;
79
scsi_req_unref(req->sreq);
80
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
81
virtio_scsi_free_req(req);
82
}
83
84
-static void virtio_scsi_complete_req_bh(void *opaque)
85
+static void virtio_scsi_bad_req(VirtIOSCSIReq *req, QemuMutex *vq_lock)
86
{
87
- VirtIOSCSIReq *req = opaque;
88
+ virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
89
90
- virtio_scsi_complete_req(req);
91
-}
92
+ if (vq_lock) {
93
+ qemu_mutex_lock(vq_lock);
94
+ }
95
96
-/*
97
- * Called from virtio_scsi_do_one_tmf_bh() in main loop thread. The main loop
98
- * thread cannot touch the virtqueue since that could race with an IOThread.
99
- */
100
-static void virtio_scsi_complete_req_from_main_loop(VirtIOSCSIReq *req)
101
-{
102
- VirtIOSCSI *s = req->dev;
103
+ virtqueue_detach_element(req->vq, &req->elem, 0);
104
105
- if (!s->ctx || s->ctx == qemu_get_aio_context()) {
106
- /* No need to schedule a BH when there is no IOThread */
107
- virtio_scsi_complete_req(req);
108
- } else {
109
- /* Run request completion in the IOThread */
110
- aio_wait_bh_oneshot(s->ctx, virtio_scsi_complete_req_bh, req);
111
+ if (vq_lock) {
112
+ qemu_mutex_unlock(vq_lock);
113
}
114
-}
115
116
-static void virtio_scsi_bad_req(VirtIOSCSIReq *req)
117
-{
118
- virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
119
- virtqueue_detach_element(req->vq, &req->elem, 0);
120
virtio_scsi_free_req(req);
121
}
122
123
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_parse_req(VirtIOSCSIReq *req,
26
return 0;
124
return 0;
27
}
125
}
28
126
127
-static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq)
128
+static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq, QemuMutex *vq_lock)
129
{
130
VirtIOSCSICommon *vs = (VirtIOSCSICommon *)s;
131
VirtIOSCSIReq *req;
132
133
+ if (vq_lock) {
134
+ qemu_mutex_lock(vq_lock);
135
+ }
136
+
137
req = virtqueue_pop(vq, sizeof(VirtIOSCSIReq) + vs->cdb_size);
138
+
139
+ if (vq_lock) {
140
+ qemu_mutex_unlock(vq_lock);
141
+ }
142
+
143
if (!req) {
144
return NULL;
145
}
146
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
147
148
trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(req->req.tmf.lun),
149
req->req.tmf.tag, req->resp.tmf.response);
150
- virtio_scsi_complete_req(req);
151
+ virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
152
}
153
g_free(n);
154
}
155
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
156
157
out:
158
object_unref(OBJECT(d));
159
- virtio_scsi_complete_req_from_main_loop(req);
160
+ virtio_scsi_complete_req(req, &s->ctrl_lock);
161
}
162
163
/* Some TMFs must be processed from the main loop thread */
164
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
165
166
/* SAM-6 6.3.2 Hard reset */
167
req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
168
- virtio_scsi_complete_req(req);
169
+ virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
170
}
171
}
172
173
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
174
175
if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0,
176
&type, sizeof(type)) < sizeof(type)) {
177
- virtio_scsi_bad_req(req);
178
+ virtio_scsi_bad_req(req, &s->ctrl_lock);
179
return;
180
}
181
182
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
183
if (type == VIRTIO_SCSI_T_TMF) {
184
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlTMFReq),
185
sizeof(VirtIOSCSICtrlTMFResp)) < 0) {
186
- virtio_scsi_bad_req(req);
187
+ virtio_scsi_bad_req(req, &s->ctrl_lock);
188
return;
189
} else {
190
r = virtio_scsi_do_tmf(s, req);
191
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
192
type == VIRTIO_SCSI_T_AN_SUBSCRIBE) {
193
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlANReq),
194
sizeof(VirtIOSCSICtrlANResp)) < 0) {
195
- virtio_scsi_bad_req(req);
196
+ virtio_scsi_bad_req(req, &s->ctrl_lock);
197
return;
198
} else {
199
req->req.an.event_requested =
200
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
201
type == VIRTIO_SCSI_T_AN_SUBSCRIBE)
202
trace_virtio_scsi_an_resp(virtio_scsi_get_lun(req->req.an.lun),
203
req->resp.an.response);
204
- virtio_scsi_complete_req(req);
205
+ virtio_scsi_complete_req(req, &s->ctrl_lock);
206
} else {
207
assert(r == -EINPROGRESS);
208
}
209
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
210
{
211
VirtIOSCSIReq *req;
212
213
- while ((req = virtio_scsi_pop_req(s, vq))) {
214
+ while ((req = virtio_scsi_pop_req(s, vq, &s->ctrl_lock))) {
215
virtio_scsi_handle_ctrl_req(s, req);
216
}
217
}
218
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req)
219
* in virtio_scsi_command_complete.
220
*/
221
req->resp_size = sizeof(VirtIOSCSICmdResp);
222
- virtio_scsi_complete_req(req);
223
+ virtio_scsi_complete_req(req, NULL);
224
}
225
226
static void virtio_scsi_command_failed(SCSIRequest *r)
227
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
228
virtio_scsi_fail_cmd_req(req);
229
return -ENOTSUP;
230
} else {
231
- virtio_scsi_bad_req(req);
232
+ virtio_scsi_bad_req(req, NULL);
233
return -EINVAL;
234
}
235
}
236
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
237
virtio_queue_set_notification(vq, 0);
238
}
239
240
- while ((req = virtio_scsi_pop_req(s, vq))) {
241
+ while ((req = virtio_scsi_pop_req(s, vq, NULL))) {
242
ret = virtio_scsi_handle_cmd_req_prepare(s, req);
243
if (!ret) {
244
QTAILQ_INSERT_TAIL(&reqs, req, next);
245
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
246
return;
247
}
248
249
- req = virtio_scsi_pop_req(s, vs->event_vq);
250
+ req = virtio_scsi_pop_req(s, vs->event_vq, &s->event_lock);
251
if (!req) {
252
s->events_dropped = true;
253
return;
254
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
255
}
256
257
if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) {
258
- virtio_scsi_bad_req(req);
259
+ virtio_scsi_bad_req(req, &s->event_lock);
260
return;
261
}
262
263
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
264
}
265
trace_virtio_scsi_event(virtio_scsi_get_lun(evt->lun), event, reason);
266
267
- virtio_scsi_complete_req(req);
268
+ virtio_scsi_complete_req(req, &s->event_lock);
269
}
270
271
static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
272
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
273
Error *err = NULL;
274
275
QTAILQ_INIT(&s->tmf_bh_list);
276
+ qemu_mutex_init(&s->ctrl_lock);
277
+ qemu_mutex_init(&s->event_lock);
278
qemu_mutex_init(&s->tmf_bh_lock);
279
280
virtio_scsi_common_realize(dev,
281
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_unrealize(DeviceState *dev)
282
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
283
virtio_scsi_common_unrealize(dev);
284
qemu_mutex_destroy(&s->tmf_bh_lock);
285
+ qemu_mutex_destroy(&s->event_lock);
286
+ qemu_mutex_destroy(&s->ctrl_lock);
287
}
288
289
static const Property virtio_scsi_properties[] = {
29
--
290
--
30
2.25.3
291
2.48.1
31
32
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
3
The block layer can invoke the resize callback from any AioContext that
4
Message-Id: <20200430124713.3067-8-vsementsov@virtuozzo.com>
4
is processing requests. The virtqueue is already protected but the
5
events_dropped field also needs to be protected against races. Cover it
6
using the event virtqueue lock because it is closely associated with
7
accesses to the virtqueue.
8
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Message-ID: <20250311132616.1049687-7-stefanha@redhat.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
13
---
7
tests/qemu-iotests/109 | 1 +
14
include/hw/virtio/virtio-scsi.h | 3 ++-
8
1 file changed, 1 insertion(+)
15
hw/scsi/virtio-scsi.c | 29 ++++++++++++++++++++---------
16
2 files changed, 22 insertions(+), 10 deletions(-)
9
17
10
diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109
18
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
11
index XXXXXXX..XXXXXXX 100755
19
index XXXXXXX..XXXXXXX 100644
12
--- a/tests/qemu-iotests/109
20
--- a/include/hw/virtio/virtio-scsi.h
13
+++ b/tests/qemu-iotests/109
21
+++ b/include/hw/virtio/virtio-scsi.h
14
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
22
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
15
_supported_fmt raw
23
16
_supported_proto file
24
SCSIBus bus;
17
_supported_os Linux
25
int resetting; /* written from main loop thread, read from any thread */
18
+_require_drivers qcow qcow2 qed vdi vmdk vpc
26
+
19
27
+ QemuMutex event_lock; /* protects event_vq and events_dropped */
20
qemu_comm_method=qmp
28
bool events_dropped;
21
29
30
QemuMutex ctrl_lock; /* protects ctrl_vq */
31
- QemuMutex event_lock; /* protects event_vq */
32
33
/*
34
* TMFs deferred to main loop BH. These fields are protected by
35
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/scsi/virtio-scsi.c
38
+++ b/hw/scsi/virtio-scsi.c
39
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev)
40
41
vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
42
vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
43
- s->events_dropped = false;
44
+
45
+ WITH_QEMU_LOCK_GUARD(&s->event_lock) {
46
+ s->events_dropped = false;
47
+ }
48
}
49
50
typedef struct {
51
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
52
}
53
54
req = virtio_scsi_pop_req(s, vs->event_vq, &s->event_lock);
55
- if (!req) {
56
- s->events_dropped = true;
57
- return;
58
- }
59
+ WITH_QEMU_LOCK_GUARD(&s->event_lock) {
60
+ if (!req) {
61
+ s->events_dropped = true;
62
+ return;
63
+ }
64
65
- if (s->events_dropped) {
66
- event |= VIRTIO_SCSI_T_EVENTS_MISSED;
67
- s->events_dropped = false;
68
+ if (s->events_dropped) {
69
+ event |= VIRTIO_SCSI_T_EVENTS_MISSED;
70
+ s->events_dropped = false;
71
+ }
72
}
73
74
if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) {
75
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
76
77
static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
78
{
79
- if (s->events_dropped) {
80
+ bool events_dropped;
81
+
82
+ WITH_QEMU_LOCK_GUARD(&s->event_lock) {
83
+ events_dropped = s->events_dropped;
84
+ }
85
+
86
+ if (events_dropped) {
87
VirtIOSCSIEventInfo info = {
88
.event = VIRTIO_SCSI_T_NO_EVENT,
89
};
22
--
90
--
23
2.25.3
91
2.48.1
24
25
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
block.c already defaults to 0 if we don't provide a callback; there's
3
With IOThread Virtqueue Mapping there will be multiple AioContexts
4
no need to write a callback that always fails.
4
processing SCSI requests. scsi_req_cancel() and other SCSI request
5
5
operations must be performed from the AioContext where the request is
6
Signed-off-by: Eric Blake <eblake@redhat.com>
6
running.
7
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
8
Reviewed-by: Alberto Garcia <berto@igalia.com>
8
Introduce a virtio_scsi_defer_tmf_to_aio_context() function and the
9
Message-Id: <20200428202905.770727-2-eblake@redhat.com>
9
necessary VirtIOSCSIReq->remaining refcount infrastructure to move the
10
TMF code into the AioContext where the request is running.
11
12
For the time being there is still just one AioContext: the main loop or
13
the IOThread. When the iothread-vq-mapping parameter is added in a later
14
patch this will be changed to per-virtqueue AioContexts.
15
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
18
Message-ID: <20250311132616.1049687-8-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
20
---
12
block/gluster.c | 14 --------------
21
hw/scsi/virtio-scsi.c | 270 ++++++++++++++++++++++++++++++++----------
13
1 file changed, 14 deletions(-)
22
1 file changed, 206 insertions(+), 64 deletions(-)
14
23
15
diff --git a/block/gluster.c b/block/gluster.c
24
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
16
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
17
--- a/block/gluster.c
26
--- a/hw/scsi/virtio-scsi.c
18
+++ b/block/gluster.c
27
+++ b/hw/scsi/virtio-scsi.c
19
@@ -XXX,XX +XXX,XX @@ static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
28
@@ -XXX,XX +XXX,XX @@ typedef struct VirtIOSCSIReq {
29
/* Used for two-stage request submission and TMFs deferred to BH */
30
QTAILQ_ENTRY(VirtIOSCSIReq) next;
31
32
- /* Used for cancellation of request during TMFs */
33
+ /* Used for cancellation of request during TMFs. Atomic. */
34
int remaining;
35
36
SCSIRequest *sreq;
37
@@ -XXX,XX +XXX,XX @@ typedef struct {
38
VirtIOSCSIReq *tmf_req;
39
} VirtIOSCSICancelNotifier;
40
41
+static void virtio_scsi_tmf_dec_remaining(VirtIOSCSIReq *tmf)
42
+{
43
+ if (qatomic_fetch_dec(&tmf->remaining) == 1) {
44
+ trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(tmf->req.tmf.lun),
45
+ tmf->req.tmf.tag, tmf->resp.tmf.response);
46
+
47
+ virtio_scsi_complete_req(tmf, &tmf->dev->ctrl_lock);
48
+ }
49
+}
50
+
51
static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
52
{
53
VirtIOSCSICancelNotifier *n = container_of(notifier,
54
VirtIOSCSICancelNotifier,
55
notifier);
56
57
- if (--n->tmf_req->remaining == 0) {
58
- VirtIOSCSIReq *req = n->tmf_req;
59
-
60
- trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(req->req.tmf.lun),
61
- req->req.tmf.tag, req->resp.tmf.response);
62
- virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
63
- }
64
+ virtio_scsi_tmf_dec_remaining(n->tmf_req);
65
g_free(n);
66
}
67
68
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
20
}
69
}
21
}
70
}
22
71
23
-static int qemu_gluster_has_zero_init(BlockDriverState *bs)
72
-static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
24
-{
73
+static void virtio_scsi_defer_tmf_to_main_loop(VirtIOSCSIReq *req)
25
- /* GlusterFS volume could be backed by a block device */
74
{
26
- return 0;
75
VirtIOSCSI *s = req->dev;
27
-}
76
77
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
78
}
79
}
80
81
+static void virtio_scsi_tmf_cancel_req(VirtIOSCSIReq *tmf, SCSIRequest *r)
82
+{
83
+ VirtIOSCSICancelNotifier *notifier;
84
+
85
+ assert(r->ctx == qemu_get_current_aio_context());
86
+
87
+ /* Decremented in virtio_scsi_cancel_notify() */
88
+ qatomic_inc(&tmf->remaining);
89
+
90
+ notifier = g_new(VirtIOSCSICancelNotifier, 1);
91
+ notifier->notifier.notify = virtio_scsi_cancel_notify;
92
+ notifier->tmf_req = tmf;
93
+ scsi_req_cancel_async(r, &notifier->notifier);
94
+}
95
+
96
+/* Execute a TMF on the requests in the current AioContext */
97
+static void virtio_scsi_do_tmf_aio_context(void *opaque)
98
+{
99
+ AioContext *ctx = qemu_get_current_aio_context();
100
+ VirtIOSCSIReq *tmf = opaque;
101
+ VirtIOSCSI *s = tmf->dev;
102
+ SCSIDevice *d = virtio_scsi_device_get(s, tmf->req.tmf.lun);
103
+ SCSIRequest *r;
104
+ bool match_tag;
105
+
106
+ if (!d) {
107
+ tmf->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
108
+ virtio_scsi_tmf_dec_remaining(tmf);
109
+ return;
110
+ }
111
+
112
+ /*
113
+ * This function could handle other subtypes that need to be processed in
114
+ * the request's AioContext in the future, but for now only request
115
+ * cancelation subtypes are performed here.
116
+ */
117
+ switch (tmf->req.tmf.subtype) {
118
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK:
119
+ match_tag = true;
120
+ break;
121
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
122
+ case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
123
+ match_tag = false;
124
+ break;
125
+ default:
126
+ g_assert_not_reached();
127
+ }
128
+
129
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
130
+ QTAILQ_FOREACH(r, &d->requests, next) {
131
+ VirtIOSCSIReq *cmd_req = r->hba_private;
132
+ assert(cmd_req); /* request has hba_private while enqueued */
133
+
134
+ if (r->ctx != ctx) {
135
+ continue;
136
+ }
137
+ if (match_tag && cmd_req->req.cmd.tag != tmf->req.tmf.tag) {
138
+ continue;
139
+ }
140
+ virtio_scsi_tmf_cancel_req(tmf, r);
141
+ }
142
+ }
143
+
144
+ /* Incremented by virtio_scsi_do_tmf() */
145
+ virtio_scsi_tmf_dec_remaining(tmf);
146
+
147
+ object_unref(d);
148
+}
149
+
150
+static void dummy_bh(void *opaque)
151
+{
152
+ /* Do nothing */
153
+}
154
+
155
+/*
156
+ * Wait for pending virtio_scsi_defer_tmf_to_aio_context() BHs.
157
+ */
158
+static void virtio_scsi_flush_defer_tmf_to_aio_context(VirtIOSCSI *s)
159
+{
160
+ GLOBAL_STATE_CODE();
161
+
162
+ assert(!s->dataplane_started);
163
+
164
+ if (s->ctx) {
165
+ /* Our BH only runs after previously scheduled BHs */
166
+ aio_wait_bh_oneshot(s->ctx, dummy_bh, NULL);
167
+ }
168
+}
169
+
170
+/*
171
+ * Run the TMF in a specific AioContext, handling only requests in that
172
+ * AioContext. This is necessary because requests can run in different
173
+ * AioContext and it is only possible to cancel them from the AioContext where
174
+ * they are running.
175
+ */
176
+static void virtio_scsi_defer_tmf_to_aio_context(VirtIOSCSIReq *tmf,
177
+ AioContext *ctx)
178
+{
179
+ /* Decremented in virtio_scsi_do_tmf_aio_context() */
180
+ qatomic_inc(&tmf->remaining);
181
+
182
+ /* See virtio_scsi_flush_defer_tmf_to_aio_context() cleanup during reset */
183
+ aio_bh_schedule_oneshot(ctx, virtio_scsi_do_tmf_aio_context, tmf);
184
+}
185
+
186
+/*
187
+ * Returns the AioContext for a given TMF's tag field or NULL. Note that the
188
+ * request identified by the tag may have completed by the time you can execute
189
+ * a BH in the AioContext, so don't assume the request still exists in your BH.
190
+ */
191
+static AioContext *find_aio_context_for_tmf_tag(SCSIDevice *d,
192
+ VirtIOSCSIReq *tmf)
193
+{
194
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
195
+ SCSIRequest *r;
196
+ SCSIRequest *next;
197
+
198
+ QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
199
+ VirtIOSCSIReq *cmd_req = r->hba_private;
200
+
201
+ /* hba_private is non-NULL while the request is enqueued */
202
+ assert(cmd_req);
203
+
204
+ if (cmd_req->req.cmd.tag == tmf->req.tmf.tag) {
205
+ return r->ctx;
206
+ }
207
+ }
208
+ }
209
+ return NULL;
210
+}
211
+
212
/* Return 0 if the request is ready to be completed and return to guest;
213
* -EINPROGRESS if the request is submitted and will be completed later, in the
214
* case of async cancellation. */
215
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
216
{
217
SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
218
SCSIRequest *r, *next;
219
+ AioContext *ctx;
220
int ret = 0;
221
222
virtio_scsi_ctx_check(s, d);
223
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
224
req->req.tmf.tag, req->req.tmf.subtype);
225
226
switch (req->req.tmf.subtype) {
227
- case VIRTIO_SCSI_T_TMF_ABORT_TASK:
228
- case VIRTIO_SCSI_T_TMF_QUERY_TASK:
229
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK: {
230
if (!d) {
231
goto fail;
232
}
233
if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
234
goto incorrect_lun;
235
}
236
- QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
237
- VirtIOSCSIReq *cmd_req = r->hba_private;
238
- if (cmd_req && cmd_req->req.cmd.tag == req->req.tmf.tag) {
239
- break;
240
- }
241
+
242
+ ctx = find_aio_context_for_tmf_tag(d, req);
243
+ if (ctx) {
244
+ virtio_scsi_defer_tmf_to_aio_context(req, ctx);
245
+ ret = -EINPROGRESS;
246
}
247
- if (r) {
248
- /*
249
- * Assert that the request has not been completed yet, we
250
- * check for it in the loop above.
251
- */
252
- assert(r->hba_private);
253
- if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK) {
254
- /* "If the specified command is present in the task set, then
255
- * return a service response set to FUNCTION SUCCEEDED".
256
- */
257
- req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
258
- } else {
259
- VirtIOSCSICancelNotifier *notifier;
28
-
260
-
29
/*
261
- req->remaining = 1;
30
* Find allocation range in @bs around offset @start.
262
- notifier = g_new(VirtIOSCSICancelNotifier, 1);
31
* May change underlying file descriptor's file offset.
263
- notifier->tmf_req = req;
32
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster = {
264
- notifier->notifier.notify = virtio_scsi_cancel_notify;
33
.bdrv_co_readv = qemu_gluster_co_readv,
265
- scsi_req_cancel_async(r, &notifier->notifier);
34
.bdrv_co_writev = qemu_gluster_co_writev,
266
- ret = -EINPROGRESS;
35
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
267
+ break;
36
- .bdrv_has_zero_init = qemu_gluster_has_zero_init,
268
+ }
37
- .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init,
269
+
38
#ifdef CONFIG_GLUSTERFS_DISCARD
270
+ case VIRTIO_SCSI_T_TMF_QUERY_TASK:
39
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
271
+ if (!d) {
40
#endif
272
+ goto fail;
41
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_tcp = {
273
+ }
42
.bdrv_co_readv = qemu_gluster_co_readv,
274
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
43
.bdrv_co_writev = qemu_gluster_co_writev,
275
+ goto incorrect_lun;
44
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
276
+ }
45
- .bdrv_has_zero_init = qemu_gluster_has_zero_init,
277
+
46
- .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init,
278
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
47
#ifdef CONFIG_GLUSTERFS_DISCARD
279
+ QTAILQ_FOREACH(r, &d->requests, next) {
48
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
280
+ VirtIOSCSIReq *cmd_req = r->hba_private;
49
#endif
281
+ assert(cmd_req); /* request has hba_private while enqueued */
50
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_unix = {
282
+
51
.bdrv_co_readv = qemu_gluster_co_readv,
283
+ if (cmd_req->req.cmd.tag == req->req.tmf.tag) {
52
.bdrv_co_writev = qemu_gluster_co_writev,
284
+ /*
53
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
285
+ * "If the specified command is present in the task set,
54
- .bdrv_has_zero_init = qemu_gluster_has_zero_init,
286
+ * then return a service response set to FUNCTION
55
- .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init,
287
+ * SUCCEEDED".
56
#ifdef CONFIG_GLUSTERFS_DISCARD
288
+ */
57
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
289
+ req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
58
#endif
290
+ }
59
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_gluster_rdma = {
291
}
60
.bdrv_co_readv = qemu_gluster_co_readv,
292
}
61
.bdrv_co_writev = qemu_gluster_co_writev,
293
break;
62
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
294
63
- .bdrv_has_zero_init = qemu_gluster_has_zero_init,
295
case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
64
- .bdrv_has_zero_init_truncate = qemu_gluster_has_zero_init,
296
case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
65
#ifdef CONFIG_GLUSTERFS_DISCARD
297
- virtio_scsi_defer_tmf_to_bh(req);
66
.bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
298
+ virtio_scsi_defer_tmf_to_main_loop(req);
67
#endif
299
ret = -EINPROGRESS;
300
break;
301
302
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
303
- case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
304
+ case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: {
305
+ if (!d) {
306
+ goto fail;
307
+ }
308
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
309
+ goto incorrect_lun;
310
+ }
311
+
312
+ qatomic_inc(&req->remaining);
313
+
314
+ ctx = s->ctx ?: qemu_get_aio_context();
315
+ virtio_scsi_defer_tmf_to_aio_context(req, ctx);
316
+
317
+ virtio_scsi_tmf_dec_remaining(req);
318
+ ret = -EINPROGRESS;
319
+ break;
320
+ }
321
+
322
case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
323
if (!d) {
324
goto fail;
325
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
326
goto incorrect_lun;
327
}
328
329
- /* Add 1 to "remaining" until virtio_scsi_do_tmf returns.
330
- * This way, if the bus starts calling back to the notifiers
331
- * even before we finish the loop, virtio_scsi_cancel_notify
332
- * will not complete the TMF too early.
333
- */
334
- req->remaining = 1;
335
- QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
336
- if (r->hba_private) {
337
- if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK_SET) {
338
- /* "If there is any command present in the task set, then
339
- * return a service response set to FUNCTION SUCCEEDED".
340
- */
341
- req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
342
- break;
343
- } else {
344
- VirtIOSCSICancelNotifier *notifier;
345
-
346
- req->remaining++;
347
- notifier = g_new(VirtIOSCSICancelNotifier, 1);
348
- notifier->notifier.notify = virtio_scsi_cancel_notify;
349
- notifier->tmf_req = req;
350
- scsi_req_cancel_async(r, &notifier->notifier);
351
- }
352
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
353
+ QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
354
+ /* Request has hba_private while enqueued */
355
+ assert(r->hba_private);
356
+
357
+ /*
358
+ * "If there is any command present in the task set, then
359
+ * return a service response set to FUNCTION SUCCEEDED".
360
+ */
361
+ req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
362
+ break;
363
}
364
}
365
- if (--req->remaining > 0) {
366
- ret = -EINPROGRESS;
367
- }
368
break;
369
370
case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
371
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev)
372
assert(!s->dataplane_started);
373
374
virtio_scsi_reset_tmf_bh(s);
375
+ virtio_scsi_flush_defer_tmf_to_aio_context(s);
376
377
qatomic_inc(&s->resetting);
378
bus_cold_reset(BUS(&s->bus));
68
--
379
--
69
2.25.3
380
2.48.1
70
71
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Some tests requires O_DIRECT, or want it by default. Introduce smarter
3
This is the cleanup function that must be called after
4
O_DIRECT handling:
4
apply_iothread_vq_mapping() succeeds. virtio-scsi will need this
5
function too, so extract it.
5
6
6
- Check O_DIRECT in common.rc, if it is requested by selected
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
cache-mode.
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
8
9
Message-ID: <20250311132616.1049687-9-stefanha@redhat.com>
9
- Support second fall-through argument in _default_cache_mode
10
11
Inspired-by: Max's 23e1d054112cec1e
12
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
13
Message-Id: <20200430124713.3067-2-vsementsov@virtuozzo.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
11
---
16
tests/qemu-iotests/091 | 2 +-
12
hw/block/virtio-blk.c | 27 +++++++++++++++++++++------
17
tests/qemu-iotests/common.rc | 37 ++++++++++++++++++++++++++++++++++--
13
1 file changed, 21 insertions(+), 6 deletions(-)
18
2 files changed, 36 insertions(+), 3 deletions(-)
19
14
20
diff --git a/tests/qemu-iotests/091 b/tests/qemu-iotests/091
15
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
21
index XXXXXXX..XXXXXXX 100755
22
--- a/tests/qemu-iotests/091
23
+++ b/tests/qemu-iotests/091
24
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
25
_supported_fmt qcow2
26
_supported_proto file
27
_supported_os Linux
28
-_default_cache_mode none
29
_supported_cache_modes writethrough none writeback
30
+_default_cache_mode none writeback
31
32
size=1G
33
34
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
35
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
36
--- a/tests/qemu-iotests/common.rc
17
--- a/hw/block/virtio-blk.c
37
+++ b/tests/qemu-iotests/common.rc
18
+++ b/hw/block/virtio-blk.c
38
@@ -XXX,XX +XXX,XX @@ _supported_cache_modes()
19
@@ -XXX,XX +XXX,XX @@ validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
39
_notrun "not suitable for cache mode: $CACHEMODE"
20
* Fill in the AioContext for each virtqueue in the @vq_aio_context array given
21
* the iothread-vq-mapping parameter in @iothread_vq_mapping_list.
22
*
23
+ * cleanup_iothread_vq_mapping() must be called to free IOThread object
24
+ * references after this function returns success.
25
+ *
26
* Returns: %true on success, %false on failure.
27
**/
28
static bool apply_iothread_vq_mapping(
29
@@ -XXX,XX +XXX,XX @@ static bool apply_iothread_vq_mapping(
30
return true;
40
}
31
}
41
32
42
+# Check whether the filesystem supports O_DIRECT
33
+/**
43
+_check_o_direct()
34
+ * cleanup_iothread_vq_mapping:
35
+ * @list: The mapping of virtqueues to IOThreads.
36
+ *
37
+ * Release IOThread object references that were acquired by
38
+ * apply_iothread_vq_mapping().
39
+ */
40
+static void cleanup_iothread_vq_mapping(IOThreadVirtQueueMappingList *list)
44
+{
41
+{
45
+ $QEMU_IMG create -f raw "$TEST_IMG".test_o_direct 1M > /dev/null
42
+ IOThreadVirtQueueMappingList *node;
46
+ out=$($QEMU_IO -f raw -t none -c quit "$TEST_IMG".test_o_direct 2>&1)
47
+ rm -f "$TEST_IMG".test_o_direct
48
+
43
+
49
+ [[ "$out" != *"O_DIRECT"* ]]
44
+ for (node = list; node; node = node->next) {
45
+ IOThread *iothread = iothread_by_id(node->value->iothread);
46
+ object_unref(OBJECT(iothread));
47
+ }
50
+}
48
+}
51
+
49
+
52
+_require_o_direct()
50
/* Context: BQL held */
53
+{
51
static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
54
+ if ! _check_o_direct; then
55
+ _notrun "file system on $TEST_DIR does not support O_DIRECT"
56
+ fi
57
+}
58
+
59
+_check_cache_mode()
60
+{
61
+ if [ $CACHEMODE == "none" ] || [ $CACHEMODE == "directsync" ]; then
62
+ _require_o_direct
63
+ fi
64
+}
65
+
66
+_check_cache_mode
67
+
68
+# $1 - cache mode to use by default
69
+# $2 - (optional) cache mode to use by default if O_DIRECT is not supported
70
_default_cache_mode()
71
{
52
{
72
if $CACHEMODE_IS_DEFAULT; then
53
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s)
73
- CACHEMODE="$1"
54
assert(!s->ioeventfd_started);
74
- QEMU_IO="$QEMU_IO --cache $1"
55
75
+ if [ -z "$2" ] || _check_o_direct; then
56
if (conf->iothread_vq_mapping_list) {
76
+ CACHEMODE="$1"
57
- IOThreadVirtQueueMappingList *node;
77
+ else
58
-
78
+ CACHEMODE="$2"
59
- for (node = conf->iothread_vq_mapping_list; node; node = node->next) {
79
+ fi
60
- IOThread *iothread = iothread_by_id(node->value->iothread);
80
+ QEMU_IO="$QEMU_IO --cache $CACHEMODE"
61
- object_unref(OBJECT(iothread));
81
+ _check_cache_mode
62
- }
82
return
63
+ cleanup_iothread_vq_mapping(conf->iothread_vq_mapping_list);
83
fi
64
}
84
}
65
66
if (conf->iothread) {
85
--
67
--
86
2.25.3
68
2.48.1
87
88
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Test fails if bochs not whitelisted, so, skip it in this case.
4
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Message-Id: <20200430124713.3067-3-vsementsov@virtuozzo.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
9
tests/qemu-iotests/082 | 1 +
10
1 file changed, 1 insertion(+)
11
12
diff --git a/tests/qemu-iotests/082 b/tests/qemu-iotests/082
13
index XXXXXXX..XXXXXXX 100755
14
--- a/tests/qemu-iotests/082
15
+++ b/tests/qemu-iotests/082
16
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
17
18
_supported_fmt qcow2
19
_supported_proto file nfs
20
+_require_drivers bochs
21
22
run_qemu_img()
23
{
24
--
25
2.25.3
26
27
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Skip test-case with quorum if quorum is not whitelisted.
4
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Message-Id: <20200430124713.3067-4-vsementsov@virtuozzo.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
9
tests/qemu-iotests/148 | 1 +
10
1 file changed, 1 insertion(+)
11
12
diff --git a/tests/qemu-iotests/148 b/tests/qemu-iotests/148
13
index XXXXXXX..XXXXXXX 100755
14
--- a/tests/qemu-iotests/148
15
+++ b/tests/qemu-iotests/148
16
@@ -XXX,XX +XXX,XX @@ sector = "%d"
17
''' % bad_sector)
18
file.close()
19
20
+ @iotests.skip_if_unsupported(['quorum'])
21
def setUp(self):
22
driveopts = ['driver=quorum', 'vote-threshold=2']
23
driveopts.append('read-pattern=%s' % self.read_pattern)
24
--
25
2.25.3
26
27
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Drop check for no block-jobs: it's obvious that there no jobs
4
immediately after vm.launch().
5
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Message-Id: <20200430124713.3067-5-vsementsov@virtuozzo.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
10
tests/qemu-iotests/041 | 8 --------
11
1 file changed, 8 deletions(-)
12
13
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
14
index XXXXXXX..XXXXXXX 100755
15
--- a/tests/qemu-iotests/041
16
+++ b/tests/qemu-iotests/041
17
@@ -XXX,XX +XXX,XX @@ class TestRepairQuorum(iotests.QMPTestCase):
18
pass
19
20
def test_complete(self):
21
- self.assert_no_active_block_jobs()
22
-
23
result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
24
sync='full', node_name="repair0", replaces="img1",
25
target=quorum_repair_img, format=iotests.imgfmt)
26
@@ -XXX,XX +XXX,XX @@ class TestRepairQuorum(iotests.QMPTestCase):
27
'target image does not match source after mirroring')
28
29
def test_cancel(self):
30
- self.assert_no_active_block_jobs()
31
-
32
result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
33
sync='full', node_name="repair0", replaces="img1",
34
target=quorum_repair_img, format=iotests.imgfmt)
35
@@ -XXX,XX +XXX,XX @@ class TestRepairQuorum(iotests.QMPTestCase):
36
self.assert_has_block_node(None, quorum_img3)
37
38
def test_cancel_after_ready(self):
39
- self.assert_no_active_block_jobs()
40
-
41
result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
42
sync='full', node_name="repair0", replaces="img1",
43
target=quorum_repair_img, format=iotests.imgfmt)
44
@@ -XXX,XX +XXX,XX @@ class TestRepairQuorum(iotests.QMPTestCase):
45
'target image does not match source after mirroring')
46
47
def test_pause(self):
48
- self.assert_no_active_block_jobs()
49
-
50
result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
51
sync='full', node_name="repair0", replaces="img1",
52
target=quorum_repair_img, format=iotests.imgfmt)
53
--
54
2.25.3
55
56
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Instead of looping in each test, let's better refactor vmdk target case
4
as a subclass.
5
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Message-Id: <20200430124713.3067-6-vsementsov@virtuozzo.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
10
tests/qemu-iotests/055 | 70 ++++++++++++++++++++------------------
11
tests/qemu-iotests/055.out | 4 +--
12
2 files changed, 39 insertions(+), 35 deletions(-)
13
14
diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
15
index XXXXXXX..XXXXXXX 100755
16
--- a/tests/qemu-iotests/055
17
+++ b/tests/qemu-iotests/055
18
@@ -XXX,XX +XXX,XX @@ class TestSingleTransaction(iotests.QMPTestCase):
19
self.assert_no_active_block_jobs()
20
21
22
-class TestDriveCompression(iotests.QMPTestCase):
23
+class TestCompressedToQcow2(iotests.QMPTestCase):
24
image_len = 64 * 1024 * 1024 # MB
25
- fmt_supports_compression = [{'type': 'qcow2', 'args': ()},
26
- {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized')}]
27
+ target_fmt = {'type': 'qcow2', 'args': ()}
28
29
def tearDown(self):
30
self.vm.shutdown()
31
@@ -XXX,XX +XXX,XX @@ class TestDriveCompression(iotests.QMPTestCase):
32
except OSError:
33
pass
34
35
- def do_prepare_drives(self, fmt, args, attach_target):
36
+ def do_prepare_drives(self, attach_target):
37
self.vm = iotests.VM().add_drive('blkdebug::' + test_img)
38
39
- qemu_img('create', '-f', fmt, blockdev_target_img,
40
- str(TestDriveCompression.image_len), *args)
41
+ qemu_img('create', '-f', self.target_fmt['type'], blockdev_target_img,
42
+ str(self.image_len), *self.target_fmt['args'])
43
if attach_target:
44
self.vm.add_drive(blockdev_target_img,
45
- img_format=fmt, interface="none")
46
+ img_format=self.target_fmt['type'],
47
+ interface="none")
48
49
self.vm.launch()
50
51
- def do_test_compress_complete(self, cmd, format, attach_target, **args):
52
- self.do_prepare_drives(format['type'], format['args'], attach_target)
53
+ def do_test_compress_complete(self, cmd, attach_target, **args):
54
+ self.do_prepare_drives(attach_target)
55
56
self.assert_no_active_block_jobs()
57
58
@@ -XXX,XX +XXX,XX @@ class TestDriveCompression(iotests.QMPTestCase):
59
60
self.vm.shutdown()
61
self.assertTrue(iotests.compare_images(test_img, blockdev_target_img,
62
- iotests.imgfmt, format['type']),
63
+ iotests.imgfmt,
64
+ self.target_fmt['type']),
65
'target image does not match source after backup')
66
67
def test_complete_compress_drive_backup(self):
68
- for format in TestDriveCompression.fmt_supports_compression:
69
- self.do_test_compress_complete('drive-backup', format, False,
70
- target=blockdev_target_img, mode='existing')
71
+ self.do_test_compress_complete('drive-backup', False,
72
+ target=blockdev_target_img,
73
+ mode='existing')
74
75
def test_complete_compress_blockdev_backup(self):
76
- for format in TestDriveCompression.fmt_supports_compression:
77
- self.do_test_compress_complete('blockdev-backup', format, True,
78
- target='drive1')
79
+ self.do_test_compress_complete('blockdev-backup',
80
+ True, target='drive1')
81
82
- def do_test_compress_cancel(self, cmd, format, attach_target, **args):
83
- self.do_prepare_drives(format['type'], format['args'], attach_target)
84
+ def do_test_compress_cancel(self, cmd, attach_target, **args):
85
+ self.do_prepare_drives(attach_target)
86
87
self.assert_no_active_block_jobs()
88
89
@@ -XXX,XX +XXX,XX @@ class TestDriveCompression(iotests.QMPTestCase):
90
self.vm.shutdown()
91
92
def test_compress_cancel_drive_backup(self):
93
- for format in TestDriveCompression.fmt_supports_compression:
94
- self.do_test_compress_cancel('drive-backup', format, False,
95
- target=blockdev_target_img, mode='existing')
96
+ self.do_test_compress_cancel('drive-backup', False,
97
+ target=blockdev_target_img,
98
+ mode='existing')
99
100
def test_compress_cancel_blockdev_backup(self):
101
- for format in TestDriveCompression.fmt_supports_compression:
102
- self.do_test_compress_cancel('blockdev-backup', format, True,
103
- target='drive1')
104
+ self.do_test_compress_cancel('blockdev-backup', True,
105
+ target='drive1')
106
107
- def do_test_compress_pause(self, cmd, format, attach_target, **args):
108
- self.do_prepare_drives(format['type'], format['args'], attach_target)
109
+ def do_test_compress_pause(self, cmd, attach_target, **args):
110
+ self.do_prepare_drives(attach_target)
111
112
self.assert_no_active_block_jobs()
113
114
@@ -XXX,XX +XXX,XX @@ class TestDriveCompression(iotests.QMPTestCase):
115
116
self.vm.shutdown()
117
self.assertTrue(iotests.compare_images(test_img, blockdev_target_img,
118
- iotests.imgfmt, format['type']),
119
+ iotests.imgfmt,
120
+ self.target_fmt['type']),
121
'target image does not match source after backup')
122
123
def test_compress_pause_drive_backup(self):
124
- for format in TestDriveCompression.fmt_supports_compression:
125
- self.do_test_compress_pause('drive-backup', format, False,
126
- target=blockdev_target_img, mode='existing')
127
+ self.do_test_compress_pause('drive-backup', False,
128
+ target=blockdev_target_img,
129
+ mode='existing')
130
131
def test_compress_pause_blockdev_backup(self):
132
- for format in TestDriveCompression.fmt_supports_compression:
133
- self.do_test_compress_pause('blockdev-backup', format, True,
134
- target='drive1')
135
+ self.do_test_compress_pause('blockdev-backup', True,
136
+ target='drive1')
137
+
138
+
139
+class TestCompressedToVmdk(TestCompressedToQcow2):
140
+ target_fmt = {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized')}
141
+
142
143
if __name__ == '__main__':
144
iotests.main(supported_fmts=['raw', 'qcow2'],
145
diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out
146
index XXXXXXX..XXXXXXX 100644
147
--- a/tests/qemu-iotests/055.out
148
+++ b/tests/qemu-iotests/055.out
149
@@ -XXX,XX +XXX,XX @@
150
-..............................
151
+....................................
152
----------------------------------------------------------------------
153
-Ran 30 tests
154
+Ran 36 tests
155
156
OK
157
--
158
2.25.3
159
160
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
4
Message-Id: <20200430124713.3067-7-vsementsov@virtuozzo.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
7
tests/qemu-iotests/055 | 4 ++++
8
1 file changed, 4 insertions(+)
9
10
diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
11
index XXXXXXX..XXXXXXX 100755
12
--- a/tests/qemu-iotests/055
13
+++ b/tests/qemu-iotests/055
14
@@ -XXX,XX +XXX,XX @@ class TestCompressedToQcow2(iotests.QMPTestCase):
15
class TestCompressedToVmdk(TestCompressedToQcow2):
16
target_fmt = {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized')}
17
18
+ @iotests.skip_if_unsupported(['vmdk'])
19
+ def setUp(self):
20
+ pass
21
+
22
23
if __name__ == '__main__':
24
iotests.main(supported_fmts=['raw', 'qcow2'],
25
--
26
2.25.3
27
28
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
4
Message-Id: <20200430124713.3067-9-vsementsov@virtuozzo.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
---
7
tests/qemu-iotests/113 | 4 ++--
8
1 file changed, 2 insertions(+), 2 deletions(-)
9
10
diff --git a/tests/qemu-iotests/113 b/tests/qemu-iotests/113
11
index XXXXXXX..XXXXXXX 100755
12
--- a/tests/qemu-iotests/113
13
+++ b/tests/qemu-iotests/113
14
@@ -XXX,XX +XXX,XX @@ trap "_cleanup; exit \$status" 0 1 2 3 15
15
. ./common.rc
16
. ./common.filter
17
18
-# Some of these test cases use bochs, but others do use raw, so this
19
-# is only half a lie.
20
+# Some of these test cases use bochs, but others do use raw
21
+_require_drivers bochs
22
_supported_fmt raw
23
_supported_proto file
24
_supported_os Linux
25
--
26
2.25.3
27
28
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
When using bdrv_file, .bdrv_has_zero_init_truncate always returns 1;
3
Use noun_verb() function naming instead of verb_noun() because the
4
therefore, we can behave just like file-posix, and always implement
4
former is the most common naming style for APIs. The next commit will
5
BDRV_REQ_ZERO_WRITE by ignoring it since the OS gives it to us for
5
move these functions into a header file so that virtio-scsi can call
6
free (note that file-posix.c had to use an 'if' because it shared code
6
them.
7
between regular files and block devices, but in file-win32.c,
8
bdrv_host_device uses a separate .bdrv_file_open).
9
7
10
Signed-off-by: Eric Blake <eblake@redhat.com>
8
Shorten iothread_vq_mapping_apply()'s iothread_vq_mapping_list argument
11
Message-Id: <20200428202905.770727-3-eblake@redhat.com>
9
to just "list" like in the other functions.
10
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Message-ID: <20250311132616.1049687-10-stefanha@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
15
---
14
block/file-win32.c | 3 +++
16
hw/block/virtio-blk.c | 33 ++++++++++++++++-----------------
15
1 file changed, 3 insertions(+)
17
1 file changed, 16 insertions(+), 17 deletions(-)
16
18
17
diff --git a/block/file-win32.c b/block/file-win32.c
19
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
18
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
19
--- a/block/file-win32.c
21
--- a/hw/block/virtio-blk.c
20
+++ b/block/file-win32.c
22
+++ b/hw/block/virtio-blk.c
21
@@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
23
@@ -XXX,XX +XXX,XX @@ static const BlockDevOps virtio_block_ops = {
22
win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
24
};
25
26
static bool
27
-validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
28
- uint16_t num_queues, Error **errp)
29
+iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t
30
+ num_queues, Error **errp)
31
{
32
g_autofree unsigned long *vqs = bitmap_new(num_queues);
33
g_autoptr(GHashTable) iothreads =
34
@@ -XXX,XX +XXX,XX @@ validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
35
}
36
37
/**
38
- * apply_iothread_vq_mapping:
39
- * @iothread_vq_mapping_list: The mapping of virtqueues to IOThreads.
40
+ * iothread_vq_mapping_apply:
41
+ * @list: The mapping of virtqueues to IOThreads.
42
* @vq_aio_context: The array of AioContext pointers to fill in.
43
* @num_queues: The length of @vq_aio_context.
44
* @errp: If an error occurs, a pointer to the area to store the error.
45
*
46
* Fill in the AioContext for each virtqueue in the @vq_aio_context array given
47
- * the iothread-vq-mapping parameter in @iothread_vq_mapping_list.
48
+ * the iothread-vq-mapping parameter in @list.
49
*
50
- * cleanup_iothread_vq_mapping() must be called to free IOThread object
51
+ * iothread_vq_mapping_cleanup() must be called to free IOThread object
52
* references after this function returns success.
53
*
54
* Returns: %true on success, %false on failure.
55
**/
56
-static bool apply_iothread_vq_mapping(
57
- IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
58
+static bool iothread_vq_mapping_apply(
59
+ IOThreadVirtQueueMappingList *list,
60
AioContext **vq_aio_context,
61
uint16_t num_queues,
62
Error **errp)
63
@@ -XXX,XX +XXX,XX @@ static bool apply_iothread_vq_mapping(
64
size_t num_iothreads = 0;
65
size_t cur_iothread = 0;
66
67
- if (!validate_iothread_vq_mapping_list(iothread_vq_mapping_list,
68
- num_queues, errp)) {
69
+ if (!iothread_vq_mapping_validate(list, num_queues, errp)) {
70
return false;
23
}
71
}
24
72
25
+ /* When extending regular files, we get zeros from the OS */
73
- for (node = iothread_vq_mapping_list; node; node = node->next) {
26
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
74
+ for (node = list; node; node = node->next) {
27
+
75
num_iothreads++;
28
ret = 0;
76
}
29
fail:
77
30
qemu_opts_del(opts);
78
- for (node = iothread_vq_mapping_list; node; node = node->next) {
79
+ for (node = list; node; node = node->next) {
80
IOThread *iothread = iothread_by_id(node->value->iothread);
81
AioContext *ctx = iothread_get_aio_context(iothread);
82
83
@@ -XXX,XX +XXX,XX @@ static bool apply_iothread_vq_mapping(
84
}
85
86
/**
87
- * cleanup_iothread_vq_mapping:
88
+ * iothread_vq_mapping_cleanup:
89
* @list: The mapping of virtqueues to IOThreads.
90
*
91
* Release IOThread object references that were acquired by
92
- * apply_iothread_vq_mapping().
93
+ * iothread_vq_mapping_apply().
94
*/
95
-static void cleanup_iothread_vq_mapping(IOThreadVirtQueueMappingList *list)
96
+static void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list)
97
{
98
IOThreadVirtQueueMappingList *node;
99
100
@@ -XXX,XX +XXX,XX @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
101
s->vq_aio_context = g_new(AioContext *, conf->num_queues);
102
103
if (conf->iothread_vq_mapping_list) {
104
- if (!apply_iothread_vq_mapping(conf->iothread_vq_mapping_list,
105
+ if (!iothread_vq_mapping_apply(conf->iothread_vq_mapping_list,
106
s->vq_aio_context,
107
conf->num_queues,
108
errp)) {
109
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s)
110
assert(!s->ioeventfd_started);
111
112
if (conf->iothread_vq_mapping_list) {
113
- cleanup_iothread_vq_mapping(conf->iothread_vq_mapping_list);
114
+ iothread_vq_mapping_cleanup(conf->iothread_vq_mapping_list);
115
}
116
117
if (conf->iothread) {
31
--
118
--
32
2.25.3
119
2.48.1
33
34
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
After commit f01643fb8b47e8a70c04bbf45e0f12a9e5bc54de when an image is
3
The code that builds an array of AioContext pointers indexed by the
4
extended and BDRV_REQ_ZERO_WRITE is set then the new clusters are
4
virtqueue is not specific to virtio-blk. virtio-scsi will need to do the
5
zeroized.
5
same thing, so extract the functions.
6
6
7
The code however does not detect correctly situations when the old and
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
the new end of the image are within the same cluster. The problem can
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
be reproduced with these steps:
9
Message-ID: <20250311132616.1049687-11-stefanha@redhat.com>
10
11
qemu-img create -f qcow2 backing.qcow2 1M
12
qemu-img create -f qcow2 -F qcow2 -b backing.qcow2 top.qcow2
13
qemu-img resize --shrink top.qcow2 520k
14
qemu-img resize top.qcow2 567k
15
16
In the last step offset - zero_start causes an integer wraparound.
17
18
Signed-off-by: Alberto Garcia <berto@igalia.com>
19
Message-Id: <20200504155217.10325-1-berto@igalia.com>
20
Reviewed-by: Eric Blake <eblake@redhat.com>
21
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
22
---
11
---
23
block/qcow2.c | 12 ++++---
12
include/hw/virtio/iothread-vq-mapping.h | 45 ++++++++
24
tests/qemu-iotests/292 | 73 ++++++++++++++++++++++++++++++++++++++
13
hw/block/virtio-blk.c | 142 +-----------------------
25
tests/qemu-iotests/292.out | 24 +++++++++++++
14
hw/virtio/iothread-vq-mapping.c | 131 ++++++++++++++++++++++
26
tests/qemu-iotests/group | 1 +
15
hw/virtio/meson.build | 1 +
27
4 files changed, 105 insertions(+), 5 deletions(-)
16
4 files changed, 178 insertions(+), 141 deletions(-)
28
create mode 100755 tests/qemu-iotests/292
17
create mode 100644 include/hw/virtio/iothread-vq-mapping.h
29
create mode 100644 tests/qemu-iotests/292.out
18
create mode 100644 hw/virtio/iothread-vq-mapping.c
30
19
31
diff --git a/block/qcow2.c b/block/qcow2.c
20
diff --git a/include/hw/virtio/iothread-vq-mapping.h b/include/hw/virtio/iothread-vq-mapping.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/block/qcow2.c
34
+++ b/block/qcow2.c
35
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
36
* requires a cluster-aligned start. The end may be unaligned if it is
37
* at the end of the image (which it is here).
38
*/
39
- ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0);
40
- if (ret < 0) {
41
- error_setg_errno(errp, -ret, "Failed to zero out new clusters");
42
- goto fail;
43
+ if (offset > zero_start) {
44
+ ret = qcow2_cluster_zeroize(bs, zero_start, offset - zero_start, 0);
45
+ if (ret < 0) {
46
+ error_setg_errno(errp, -ret, "Failed to zero out new clusters");
47
+ goto fail;
48
+ }
49
}
50
51
/* Write explicit zeros for the unaligned head */
52
if (zero_start > old_length) {
53
- uint64_t len = zero_start - old_length;
54
+ uint64_t len = MIN(zero_start, offset) - old_length;
55
uint8_t *buf = qemu_blockalign0(bs, len);
56
QEMUIOVector qiov;
57
qemu_iovec_init_buf(&qiov, buf, len);
58
diff --git a/tests/qemu-iotests/292 b/tests/qemu-iotests/292
59
new file mode 100755
60
index XXXXXXX..XXXXXXX
61
--- /dev/null
62
+++ b/tests/qemu-iotests/292
63
@@ -XXX,XX +XXX,XX @@
64
+#!/usr/bin/env bash
65
+#
66
+# Test resizing a qcow2 image with a backing file
67
+#
68
+# Copyright (C) 2020 Igalia, S.L.
69
+# Author: Alberto Garcia <berto@igalia.com>
70
+#
71
+# This program is free software; you can redistribute it and/or modify
72
+# it under the terms of the GNU General Public License as published by
73
+# the Free Software Foundation; either version 2 of the License, or
74
+# (at your option) any later version.
75
+#
76
+# This program is distributed in the hope that it will be useful,
77
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
78
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
79
+# GNU General Public License for more details.
80
+#
81
+# You should have received a copy of the GNU General Public License
82
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
83
+#
84
+
85
+# creator
86
+owner=berto@igalia.com
87
+
88
+seq=`basename $0`
89
+echo "QA output created by $seq"
90
+
91
+status=1 # failure is the default!
92
+
93
+_cleanup()
94
+{
95
+ _cleanup_test_img
96
+}
97
+trap "_cleanup; exit \$status" 0 1 2 3 15
98
+
99
+# get standard environment, filters and checks
100
+. ./common.rc
101
+. ./common.filter
102
+
103
+_supported_fmt qcow2
104
+_supported_proto file
105
+_supported_os Linux
106
+
107
+echo '### Create the backing image'
108
+BACKING_IMG="$TEST_IMG.base"
109
+TEST_IMG="$BACKING_IMG" _make_test_img 1M
110
+
111
+echo '### Fill the backing image with data (0x11)'
112
+$QEMU_IO -c 'write -P 0x11 0 1M' "$BACKING_IMG" | _filter_qemu_io
113
+
114
+echo '### Create the top image'
115
+_make_test_img -F "$IMGFMT" -b "$BACKING_IMG"
116
+
117
+echo '### Fill the top image with data (0x22)'
118
+$QEMU_IO -c 'write -P 0x22 0 1M' "$TEST_IMG" | _filter_qemu_io
119
+
120
+# Both offsets are part of the same cluster.
121
+echo '### Shrink the image to 520k'
122
+$QEMU_IMG resize --shrink "$TEST_IMG" 520k
123
+echo '### Grow the image to 567k'
124
+$QEMU_IMG resize "$TEST_IMG" 567k
125
+
126
+echo '### Check that the tail of the image reads as zeroes'
127
+$QEMU_IO -c 'read -P 0x22 0 520k' "$TEST_IMG" | _filter_qemu_io
128
+$QEMU_IO -c 'read -P 0x00 520k 47k' "$TEST_IMG" | _filter_qemu_io
129
+
130
+echo '### Show output of qemu-img map'
131
+$QEMU_IMG map "$TEST_IMG" | _filter_testdir
132
+
133
+# success, all done
134
+echo "*** done"
135
+rm -f $seq.full
136
+status=0
137
diff --git a/tests/qemu-iotests/292.out b/tests/qemu-iotests/292.out
138
new file mode 100644
21
new file mode 100644
139
index XXXXXXX..XXXXXXX
22
index XXXXXXX..XXXXXXX
140
--- /dev/null
23
--- /dev/null
141
+++ b/tests/qemu-iotests/292.out
24
+++ b/include/hw/virtio/iothread-vq-mapping.h
142
@@ -XXX,XX +XXX,XX @@
25
@@ -XXX,XX +XXX,XX @@
143
+QA output created by 292
26
+/*
144
+### Create the backing image
27
+ * IOThread Virtqueue Mapping
145
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=1048576
28
+ *
146
+### Fill the backing image with data (0x11)
29
+ * Copyright Red Hat, Inc
147
+wrote 1048576/1048576 bytes at offset 0
30
+ *
148
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
31
+ * SPDX-License-Identifier: GPL-2.0-only
149
+### Create the top image
32
+ */
150
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT.base backing_fmt=IMGFMT
33
+
151
+### Fill the top image with data (0x22)
34
+#ifndef HW_VIRTIO_IOTHREAD_VQ_MAPPING_H
152
+wrote 1048576/1048576 bytes at offset 0
35
+#define HW_VIRTIO_IOTHREAD_VQ_MAPPING_H
153
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
36
+
154
+### Shrink the image to 520k
37
+#include "qapi/error.h"
155
+Image resized.
38
+#include "qapi/qapi-types-virtio.h"
156
+### Grow the image to 567k
39
+
157
+Image resized.
40
+/**
158
+### Check that the tail of the image reads as zeroes
41
+ * iothread_vq_mapping_apply:
159
+read 532480/532480 bytes at offset 0
42
+ * @list: The mapping of virtqueues to IOThreads.
160
+520 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
43
+ * @vq_aio_context: The array of AioContext pointers to fill in.
161
+read 48128/48128 bytes at offset 532480
44
+ * @num_queues: The length of @vq_aio_context.
162
+47 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
45
+ * @errp: If an error occurs, a pointer to the area to store the error.
163
+### Show output of qemu-img map
46
+ *
164
+Offset Length Mapped to File
47
+ * Fill in the AioContext for each virtqueue in the @vq_aio_context array given
165
+0 0x8dc00 0x50000 TEST_DIR/t.qcow2
48
+ * the iothread-vq-mapping parameter in @list.
166
+*** done
49
+ *
167
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
50
+ * iothread_vq_mapping_cleanup() must be called to free IOThread object
51
+ * references after this function returns success.
52
+ *
53
+ * Returns: %true on success, %false on failure.
54
+ **/
55
+bool iothread_vq_mapping_apply(
56
+ IOThreadVirtQueueMappingList *list,
57
+ AioContext **vq_aio_context,
58
+ uint16_t num_queues,
59
+ Error **errp);
60
+
61
+/**
62
+ * iothread_vq_mapping_cleanup:
63
+ * @list: The mapping of virtqueues to IOThreads.
64
+ *
65
+ * Release IOThread object references that were acquired by
66
+ * iothread_vq_mapping_apply().
67
+ */
68
+void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list);
69
+
70
+#endif /* HW_VIRTIO_IOTHREAD_VQ_MAPPING_H */
71
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
168
index XXXXXXX..XXXXXXX 100644
72
index XXXXXXX..XXXXXXX 100644
169
--- a/tests/qemu-iotests/group
73
--- a/hw/block/virtio-blk.c
170
+++ b/tests/qemu-iotests/group
74
+++ b/hw/block/virtio-blk.c
171
@@ -XXX,XX +XXX,XX @@
75
@@ -XXX,XX +XXX,XX @@
172
288 quick
76
#endif
173
289 rw quick
77
#include "hw/virtio/virtio-bus.h"
174
290 rw auto quick
78
#include "migration/qemu-file-types.h"
175
+292 rw auto quick
79
+#include "hw/virtio/iothread-vq-mapping.h"
80
#include "hw/virtio/virtio-access.h"
81
#include "hw/virtio/virtio-blk-common.h"
82
#include "qemu/coroutine.h"
83
@@ -XXX,XX +XXX,XX @@ static const BlockDevOps virtio_block_ops = {
84
.drained_end = virtio_blk_drained_end,
85
};
86
87
-static bool
88
-iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t
89
- num_queues, Error **errp)
90
-{
91
- g_autofree unsigned long *vqs = bitmap_new(num_queues);
92
- g_autoptr(GHashTable) iothreads =
93
- g_hash_table_new(g_str_hash, g_str_equal);
94
-
95
- for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
96
- const char *name = node->value->iothread;
97
- uint16List *vq;
98
-
99
- if (!iothread_by_id(name)) {
100
- error_setg(errp, "IOThread \"%s\" object does not exist", name);
101
- return false;
102
- }
103
-
104
- if (!g_hash_table_add(iothreads, (gpointer)name)) {
105
- error_setg(errp,
106
- "duplicate IOThread name \"%s\" in iothread-vq-mapping",
107
- name);
108
- return false;
109
- }
110
-
111
- if (node != list) {
112
- if (!!node->value->vqs != !!list->value->vqs) {
113
- error_setg(errp, "either all items in iothread-vq-mapping "
114
- "must have vqs or none of them must have it");
115
- return false;
116
- }
117
- }
118
-
119
- for (vq = node->value->vqs; vq; vq = vq->next) {
120
- if (vq->value >= num_queues) {
121
- error_setg(errp, "vq index %u for IOThread \"%s\" must be "
122
- "less than num_queues %u in iothread-vq-mapping",
123
- vq->value, name, num_queues);
124
- return false;
125
- }
126
-
127
- if (test_and_set_bit(vq->value, vqs)) {
128
- error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
129
- "because it is already assigned", vq->value, name);
130
- return false;
131
- }
132
- }
133
- }
134
-
135
- if (list->value->vqs) {
136
- for (uint16_t i = 0; i < num_queues; i++) {
137
- if (!test_bit(i, vqs)) {
138
- error_setg(errp,
139
- "missing vq %u IOThread assignment in iothread-vq-mapping",
140
- i);
141
- return false;
142
- }
143
- }
144
- }
145
-
146
- return true;
147
-}
148
-
149
-/**
150
- * iothread_vq_mapping_apply:
151
- * @list: The mapping of virtqueues to IOThreads.
152
- * @vq_aio_context: The array of AioContext pointers to fill in.
153
- * @num_queues: The length of @vq_aio_context.
154
- * @errp: If an error occurs, a pointer to the area to store the error.
155
- *
156
- * Fill in the AioContext for each virtqueue in the @vq_aio_context array given
157
- * the iothread-vq-mapping parameter in @list.
158
- *
159
- * iothread_vq_mapping_cleanup() must be called to free IOThread object
160
- * references after this function returns success.
161
- *
162
- * Returns: %true on success, %false on failure.
163
- **/
164
-static bool iothread_vq_mapping_apply(
165
- IOThreadVirtQueueMappingList *list,
166
- AioContext **vq_aio_context,
167
- uint16_t num_queues,
168
- Error **errp)
169
-{
170
- IOThreadVirtQueueMappingList *node;
171
- size_t num_iothreads = 0;
172
- size_t cur_iothread = 0;
173
-
174
- if (!iothread_vq_mapping_validate(list, num_queues, errp)) {
175
- return false;
176
- }
177
-
178
- for (node = list; node; node = node->next) {
179
- num_iothreads++;
180
- }
181
-
182
- for (node = list; node; node = node->next) {
183
- IOThread *iothread = iothread_by_id(node->value->iothread);
184
- AioContext *ctx = iothread_get_aio_context(iothread);
185
-
186
- /* Released in virtio_blk_vq_aio_context_cleanup() */
187
- object_ref(OBJECT(iothread));
188
-
189
- if (node->value->vqs) {
190
- uint16List *vq;
191
-
192
- /* Explicit vq:IOThread assignment */
193
- for (vq = node->value->vqs; vq; vq = vq->next) {
194
- assert(vq->value < num_queues);
195
- vq_aio_context[vq->value] = ctx;
196
- }
197
- } else {
198
- /* Round-robin vq:IOThread assignment */
199
- for (unsigned i = cur_iothread; i < num_queues;
200
- i += num_iothreads) {
201
- vq_aio_context[i] = ctx;
202
- }
203
- }
204
-
205
- cur_iothread++;
206
- }
207
-
208
- return true;
209
-}
210
-
211
-/**
212
- * iothread_vq_mapping_cleanup:
213
- * @list: The mapping of virtqueues to IOThreads.
214
- *
215
- * Release IOThread object references that were acquired by
216
- * iothread_vq_mapping_apply().
217
- */
218
-static void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list)
219
-{
220
- IOThreadVirtQueueMappingList *node;
221
-
222
- for (node = list; node; node = node->next) {
223
- IOThread *iothread = iothread_by_id(node->value->iothread);
224
- object_unref(OBJECT(iothread));
225
- }
226
-}
227
-
228
/* Context: BQL held */
229
static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
230
{
231
diff --git a/hw/virtio/iothread-vq-mapping.c b/hw/virtio/iothread-vq-mapping.c
232
new file mode 100644
233
index XXXXXXX..XXXXXXX
234
--- /dev/null
235
+++ b/hw/virtio/iothread-vq-mapping.c
236
@@ -XXX,XX +XXX,XX @@
237
+/*
238
+ * IOThread Virtqueue Mapping
239
+ *
240
+ * Copyright Red Hat, Inc
241
+ *
242
+ * SPDX-License-Identifier: GPL-2.0-only
243
+ */
244
+
245
+#include "qemu/osdep.h"
246
+#include "system/iothread.h"
247
+#include "hw/virtio/iothread-vq-mapping.h"
248
+
249
+static bool
250
+iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t
251
+ num_queues, Error **errp)
252
+{
253
+ g_autofree unsigned long *vqs = bitmap_new(num_queues);
254
+ g_autoptr(GHashTable) iothreads =
255
+ g_hash_table_new(g_str_hash, g_str_equal);
256
+
257
+ for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
258
+ const char *name = node->value->iothread;
259
+ uint16List *vq;
260
+
261
+ if (!iothread_by_id(name)) {
262
+ error_setg(errp, "IOThread \"%s\" object does not exist", name);
263
+ return false;
264
+ }
265
+
266
+ if (!g_hash_table_add(iothreads, (gpointer)name)) {
267
+ error_setg(errp,
268
+ "duplicate IOThread name \"%s\" in iothread-vq-mapping",
269
+ name);
270
+ return false;
271
+ }
272
+
273
+ if (node != list) {
274
+ if (!!node->value->vqs != !!list->value->vqs) {
275
+ error_setg(errp, "either all items in iothread-vq-mapping "
276
+ "must have vqs or none of them must have it");
277
+ return false;
278
+ }
279
+ }
280
+
281
+ for (vq = node->value->vqs; vq; vq = vq->next) {
282
+ if (vq->value >= num_queues) {
283
+ error_setg(errp, "vq index %u for IOThread \"%s\" must be "
284
+ "less than num_queues %u in iothread-vq-mapping",
285
+ vq->value, name, num_queues);
286
+ return false;
287
+ }
288
+
289
+ if (test_and_set_bit(vq->value, vqs)) {
290
+ error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
291
+ "because it is already assigned", vq->value, name);
292
+ return false;
293
+ }
294
+ }
295
+ }
296
+
297
+ if (list->value->vqs) {
298
+ for (uint16_t i = 0; i < num_queues; i++) {
299
+ if (!test_bit(i, vqs)) {
300
+ error_setg(errp,
301
+ "missing vq %u IOThread assignment in iothread-vq-mapping",
302
+ i);
303
+ return false;
304
+ }
305
+ }
306
+ }
307
+
308
+ return true;
309
+}
310
+
311
+bool iothread_vq_mapping_apply(
312
+ IOThreadVirtQueueMappingList *list,
313
+ AioContext **vq_aio_context,
314
+ uint16_t num_queues,
315
+ Error **errp)
316
+{
317
+ IOThreadVirtQueueMappingList *node;
318
+ size_t num_iothreads = 0;
319
+ size_t cur_iothread = 0;
320
+
321
+ if (!iothread_vq_mapping_validate(list, num_queues, errp)) {
322
+ return false;
323
+ }
324
+
325
+ for (node = list; node; node = node->next) {
326
+ num_iothreads++;
327
+ }
328
+
329
+ for (node = list; node; node = node->next) {
330
+ IOThread *iothread = iothread_by_id(node->value->iothread);
331
+ AioContext *ctx = iothread_get_aio_context(iothread);
332
+
333
+ /* Released in virtio_blk_vq_aio_context_cleanup() */
334
+ object_ref(OBJECT(iothread));
335
+
336
+ if (node->value->vqs) {
337
+ uint16List *vq;
338
+
339
+ /* Explicit vq:IOThread assignment */
340
+ for (vq = node->value->vqs; vq; vq = vq->next) {
341
+ assert(vq->value < num_queues);
342
+ vq_aio_context[vq->value] = ctx;
343
+ }
344
+ } else {
345
+ /* Round-robin vq:IOThread assignment */
346
+ for (unsigned i = cur_iothread; i < num_queues;
347
+ i += num_iothreads) {
348
+ vq_aio_context[i] = ctx;
349
+ }
350
+ }
351
+
352
+ cur_iothread++;
353
+ }
354
+
355
+ return true;
356
+}
357
+
358
+void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list)
359
+{
360
+ IOThreadVirtQueueMappingList *node;
361
+
362
+ for (node = list; node; node = node->next) {
363
+ IOThread *iothread = iothread_by_id(node->value->iothread);
364
+ object_unref(OBJECT(iothread));
365
+ }
366
+}
367
+
368
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
369
index XXXXXXX..XXXXXXX 100644
370
--- a/hw/virtio/meson.build
371
+++ b/hw/virtio/meson.build
372
@@ -XXX,XX +XXX,XX @@
373
system_virtio_ss = ss.source_set()
374
system_virtio_ss.add(files('virtio-bus.c'))
375
+system_virtio_ss.add(files('iothread-vq-mapping.c'))
376
system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c'))
377
system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c'))
378
system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c'))
176
--
379
--
177
2.25.3
380
2.48.1
178
179
diff view generated by jsdifflib
Deleted patch
1
m_data is used for zero clusters even though valid == 0. It really only
2
means that a new cluster was allocated in the image file. Rename it to
3
reflect this.
4
1
5
While at it, change it from int to bool, too.
6
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Message-Id: <20200430133007.170335-2-kwolf@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
12
block/vmdk.c | 8 ++++----
13
1 file changed, 4 insertions(+), 4 deletions(-)
14
15
diff --git a/block/vmdk.c b/block/vmdk.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block/vmdk.c
18
+++ b/block/vmdk.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct VmdkMetaData {
20
unsigned int l1_index;
21
unsigned int l2_index;
22
unsigned int l2_offset;
23
- int valid;
24
+ bool new_allocation;
25
uint32_t *l2_cache_entry;
26
} VmdkMetaData;
27
28
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
29
unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
30
31
if (m_data) {
32
- m_data->valid = 0;
33
+ m_data->new_allocation = false;
34
}
35
if (extent->flat) {
36
*cluster_offset = extent->flat_start_offset;
37
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
38
return ret;
39
}
40
if (m_data) {
41
- m_data->valid = 1;
42
+ m_data->new_allocation = true;
43
m_data->l1_index = l1_index;
44
m_data->l2_index = l2_index;
45
m_data->l2_offset = l2_offset;
46
@@ -XXX,XX +XXX,XX @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
47
if (ret) {
48
return ret;
49
}
50
- if (m_data.valid) {
51
+ if (m_data.new_allocation) {
52
/* update L2 tables */
53
if (vmdk_L2update(extent, &m_data,
54
cluster_offset >> BDRV_SECTOR_BITS)
55
--
56
2.25.3
57
58
diff view generated by jsdifflib
Deleted patch
1
When overwriting a zero cluster, we must not perform copy-on-write from
2
the backing file, but from a zeroed buffer.
3
1
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Message-Id: <20200430133007.170335-4-kwolf@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
9
block/vmdk.c | 18 ++++++++++++------
10
1 file changed, 12 insertions(+), 6 deletions(-)
11
12
diff --git a/block/vmdk.c b/block/vmdk.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/block/vmdk.c
15
+++ b/block/vmdk.c
16
@@ -XXX,XX +XXX,XX @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
17
* get_whole_cluster
18
*
19
* Copy backing file's cluster that covers @sector_num, otherwise write zero,
20
- * to the cluster at @cluster_sector_num.
21
+ * to the cluster at @cluster_sector_num. If @zeroed is true, we're overwriting
22
+ * a zeroed cluster in the current layer and must not copy data from the
23
+ * backing file.
24
*
25
* If @skip_start_sector < @skip_end_sector, the relative range
26
* [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
27
@@ -XXX,XX +XXX,XX @@ static int get_whole_cluster(BlockDriverState *bs,
28
uint64_t cluster_offset,
29
uint64_t offset,
30
uint64_t skip_start_bytes,
31
- uint64_t skip_end_bytes)
32
+ uint64_t skip_end_bytes,
33
+ bool zeroed)
34
{
35
int ret = VMDK_OK;
36
int64_t cluster_bytes;
37
uint8_t *whole_grain;
38
+ bool copy_from_backing;
39
40
/* For COW, align request sector_num to cluster start */
41
cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
42
offset = QEMU_ALIGN_DOWN(offset, cluster_bytes);
43
whole_grain = qemu_blockalign(bs, cluster_bytes);
44
+ copy_from_backing = bs->backing && !zeroed;
45
46
- if (!bs->backing) {
47
+ if (!copy_from_backing) {
48
memset(whole_grain, 0, skip_start_bytes);
49
memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes);
50
}
51
@@ -XXX,XX +XXX,XX @@ static int get_whole_cluster(BlockDriverState *bs,
52
53
/* Read backing data before skip range */
54
if (skip_start_bytes > 0) {
55
- if (bs->backing) {
56
+ if (copy_from_backing) {
57
/* qcow2 emits this on bs->file instead of bs->backing */
58
BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
59
ret = bdrv_pread(bs->backing, offset, whole_grain,
60
@@ -XXX,XX +XXX,XX @@ static int get_whole_cluster(BlockDriverState *bs,
61
}
62
/* Read backing data after skip range */
63
if (skip_end_bytes < cluster_bytes) {
64
- if (bs->backing) {
65
+ if (copy_from_backing) {
66
/* qcow2 emits this on bs->file instead of bs->backing */
67
BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
68
ret = bdrv_pread(bs->backing, offset + skip_end_bytes,
69
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
70
* or inappropriate VM shutdown.
71
*/
72
ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE,
73
- offset, skip_start_bytes, skip_end_bytes);
74
+ offset, skip_start_bytes, skip_end_bytes,
75
+ zeroed);
76
if (ret) {
77
return ret;
78
}
79
--
80
2.25.3
81
82
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Calling bdrv_getlength() to get the pre-truncate file size will not
3
Allow virtio-scsi virtqueues to be assigned to different IOThreads. This
4
really work on block devices, because they have always the same length,
4
makes it possible to take advantage of host multi-queue block layer
5
and trying to write beyond it will fail with a rather cryptic error
5
scalability by assigning virtqueues that have affinity with vCPUs to
6
message.
6
different IOThreads that have affinity with host CPUs. The same feature
7
was introduced for virtio-blk in the past:
8
https://developers.redhat.com/articles/2024/09/05/scaling-virtio-blk-disk-io-iothread-virtqueue-mapping
7
9
8
Instead, we should use qcow2_get_last_cluster() and bdrv_getlength()
10
Here are fio randread 4k iodepth=64 results from a 4 vCPU guest with an
9
only as a fallback.
11
Intel P4800X SSD:
12
iothreads IOPS
13
------------------------------
14
1 189576
15
2 312698
16
4 346744
10
17
11
Before this patch:
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
$ truncate -s 1G test.img
19
Message-ID: <20250311132616.1049687-12-stefanha@redhat.com>
13
$ sudo losetup -f --show test.img
14
/dev/loop0
15
$ sudo qemu-img create -f qcow2 -o preallocation=full /dev/loop0 64M
16
Formatting '/dev/loop0', fmt=qcow2 size=67108864 cluster_size=65536
17
preallocation=full lazy_refcounts=off refcount_bits=16
18
qemu-img: /dev/loop0: Could not resize image: Failed to resize refcount
19
structures: No space left on device
20
21
With this patch:
22
$ sudo qemu-img create -f qcow2 -o preallocation=full /dev/loop0 64M
23
Formatting '/dev/loop0', fmt=qcow2 size=67108864 cluster_size=65536
24
preallocation=full lazy_refcounts=off refcount_bits=16
25
qemu-img: /dev/loop0: Could not resize image: Failed to resize
26
underlying file: Preallocation mode 'full' unsupported for this
27
non-regular file
28
29
So as you can see, it still fails, but now the problem is missing
30
support on the block device level, so we at least get a better error
31
message.
32
33
Note that we cannot preallocate block devices on truncate by design,
34
because we do not know what area to preallocate. Their length is always
35
the same, the truncate operation does not change it.
36
37
Signed-off-by: Max Reitz <mreitz@redhat.com>
38
Message-Id: <20200505141801.1096763-1-mreitz@redhat.com>
39
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
40
---
21
---
41
block/qcow2.c | 10 ++++++++--
22
include/hw/virtio/virtio-scsi.h | 5 +-
42
1 file changed, 8 insertions(+), 2 deletions(-)
23
hw/scsi/virtio-scsi-dataplane.c | 90 ++++++++++++++++++++++++---------
24
hw/scsi/virtio-scsi.c | 63 ++++++++++++++---------
25
3 files changed, 107 insertions(+), 51 deletions(-)
43
26
44
diff --git a/block/qcow2.c b/block/qcow2.c
27
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
45
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
46
--- a/block/qcow2.c
29
--- a/include/hw/virtio/virtio-scsi.h
47
+++ b/block/qcow2.c
30
+++ b/include/hw/virtio/virtio-scsi.h
48
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
31
@@ -XXX,XX +XXX,XX @@
49
{
32
#include "hw/virtio/virtio.h"
50
int64_t allocation_start, host_offset, guest_offset;
33
#include "hw/scsi/scsi.h"
51
int64_t clusters_allocated;
34
#include "chardev/char-fe.h"
52
- int64_t old_file_size, new_file_size;
35
+#include "qapi/qapi-types-virtio.h"
53
+ int64_t old_file_size, last_cluster, new_file_size;
36
#include "system/iothread.h"
54
uint64_t nb_new_data_clusters, nb_new_l2_tables;
37
55
38
#define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common"
56
/* With a data file, preallocation means just allocating the metadata
39
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSIConf {
57
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
40
CharBackend chardev;
58
ret = old_file_size;
41
uint32_t boot_tpgt;
42
IOThread *iothread;
43
+ IOThreadVirtQueueMappingList *iothread_vq_mapping_list;
44
};
45
46
struct VirtIOSCSI;
47
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
48
QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
49
50
/* Fields for dataplane below */
51
- AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
52
+ AioContext **vq_aio_context; /* per-virtqueue AioContext pointer */
53
54
bool dataplane_started;
55
bool dataplane_starting;
56
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_common_realize(DeviceState *dev,
57
void virtio_scsi_common_unrealize(DeviceState *dev);
58
59
void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp);
60
+void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s);
61
int virtio_scsi_dataplane_start(VirtIODevice *s);
62
void virtio_scsi_dataplane_stop(VirtIODevice *s);
63
64
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/hw/scsi/virtio-scsi-dataplane.c
67
+++ b/hw/scsi/virtio-scsi-dataplane.c
68
@@ -XXX,XX +XXX,XX @@
69
#include "system/block-backend.h"
70
#include "hw/scsi/scsi.h"
71
#include "scsi/constants.h"
72
+#include "hw/virtio/iothread-vq-mapping.h"
73
#include "hw/virtio/virtio-bus.h"
74
75
/* Context: BQL held */
76
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
77
VirtIODevice *vdev = VIRTIO_DEVICE(s);
78
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
79
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
80
+ uint16_t num_vqs = vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED;
81
82
- if (vs->conf.iothread) {
83
+ if (vs->conf.iothread && vs->conf.iothread_vq_mapping_list) {
84
+ error_setg(errp,
85
+ "iothread and iothread-vq-mapping properties cannot be set "
86
+ "at the same time");
87
+ return;
88
+ }
89
+
90
+ if (vs->conf.iothread || vs->conf.iothread_vq_mapping_list) {
91
if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
92
error_setg(errp,
93
"device is incompatible with iothread "
94
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
95
error_setg(errp, "ioeventfd is required for iothread");
96
return;
97
}
98
- s->ctx = iothread_get_aio_context(vs->conf.iothread);
99
- } else {
100
- if (!virtio_device_ioeventfd_enabled(vdev)) {
101
+ }
102
+
103
+ s->vq_aio_context = g_new(AioContext *, num_vqs);
104
+
105
+ if (vs->conf.iothread_vq_mapping_list) {
106
+ if (!iothread_vq_mapping_apply(vs->conf.iothread_vq_mapping_list,
107
+ s->vq_aio_context, num_vqs, errp)) {
108
+ g_free(s->vq_aio_context);
109
+ s->vq_aio_context = NULL;
110
return;
111
}
112
- s->ctx = qemu_get_aio_context();
113
+ } else if (vs->conf.iothread) {
114
+ AioContext *ctx = iothread_get_aio_context(vs->conf.iothread);
115
+ for (uint16_t i = 0; i < num_vqs; i++) {
116
+ s->vq_aio_context[i] = ctx;
117
+ }
118
+
119
+ /* Released in virtio_scsi_dataplane_cleanup() */
120
+ object_ref(OBJECT(vs->conf.iothread));
121
+ } else {
122
+ AioContext *ctx = qemu_get_aio_context();
123
+ for (unsigned i = 0; i < num_vqs; i++) {
124
+ s->vq_aio_context[i] = ctx;
125
+ }
126
+ }
127
+}
128
+
129
+/* Context: BQL held */
130
+void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s)
131
+{
132
+ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
133
+
134
+ if (vs->conf.iothread_vq_mapping_list) {
135
+ iothread_vq_mapping_cleanup(vs->conf.iothread_vq_mapping_list);
136
}
137
+
138
+ if (vs->conf.iothread) {
139
+ object_unref(OBJECT(vs->conf.iothread));
140
+ }
141
+
142
+ g_free(s->vq_aio_context);
143
+ s->vq_aio_context = NULL;
144
}
145
146
static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
147
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
148
}
149
150
/* Context: BH in IOThread */
151
-static void virtio_scsi_dataplane_stop_bh(void *opaque)
152
+static void virtio_scsi_dataplane_stop_vq_bh(void *opaque)
153
{
154
- VirtIOSCSI *s = opaque;
155
- VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
156
+ AioContext *ctx = qemu_get_current_aio_context();
157
+ VirtQueue *vq = opaque;
158
EventNotifier *host_notifier;
159
- int i;
160
161
- virtio_queue_aio_detach_host_notifier(vs->ctrl_vq, s->ctx);
162
- host_notifier = virtio_queue_get_host_notifier(vs->ctrl_vq);
163
+ virtio_queue_aio_detach_host_notifier(vq, ctx);
164
+ host_notifier = virtio_queue_get_host_notifier(vq);
165
166
/*
167
* Test and clear notifier after disabling event, in case poll callback
168
* didn't have time to run.
169
*/
170
virtio_queue_host_notifier_read(host_notifier);
171
-
172
- virtio_queue_aio_detach_host_notifier(vs->event_vq, s->ctx);
173
- host_notifier = virtio_queue_get_host_notifier(vs->event_vq);
174
- virtio_queue_host_notifier_read(host_notifier);
175
-
176
- for (i = 0; i < vs->conf.num_queues; i++) {
177
- virtio_queue_aio_detach_host_notifier(vs->cmd_vqs[i], s->ctx);
178
- host_notifier = virtio_queue_get_host_notifier(vs->cmd_vqs[i]);
179
- virtio_queue_host_notifier_read(host_notifier);
180
- }
181
}
182
183
/* Context: BQL held */
184
@@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
185
smp_wmb(); /* paired with aio_notify_accept() */
186
187
if (s->bus.drain_count == 0) {
188
- virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
189
- virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
190
+ virtio_queue_aio_attach_host_notifier(vs->ctrl_vq,
191
+ s->vq_aio_context[0]);
192
+ virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq,
193
+ s->vq_aio_context[1]);
194
195
for (i = 0; i < vs->conf.num_queues; i++) {
196
- virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
197
+ AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
198
+ virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], ctx);
199
}
200
}
201
return 0;
202
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
203
s->dataplane_stopping = true;
204
205
if (s->bus.drain_count == 0) {
206
- aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
207
+ for (i = 0; i < vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED; i++) {
208
+ VirtQueue *vq = virtio_get_queue(&vs->parent_obj, i);
209
+ AioContext *ctx = s->vq_aio_context[i];
210
+ aio_wait_bh_oneshot(ctx, virtio_scsi_dataplane_stop_vq_bh, vq);
211
+ }
212
}
213
214
blk_drain_all(); /* ensure there are no in-flight requests */
215
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
216
index XXXXXXX..XXXXXXX 100644
217
--- a/hw/scsi/virtio-scsi.c
218
+++ b/hw/scsi/virtio-scsi.c
219
@@ -XXX,XX +XXX,XX @@
220
#include "hw/qdev-properties.h"
221
#include "hw/scsi/scsi.h"
222
#include "scsi/constants.h"
223
+#include "hw/virtio/iothread-vq-mapping.h"
224
#include "hw/virtio/virtio-bus.h"
225
#include "hw/virtio/virtio-access.h"
226
#include "trace.h"
227
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
228
g_free(n);
229
}
230
231
-static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d)
232
-{
233
- if (s->dataplane_started && d && blk_is_available(d->conf.blk)) {
234
- assert(blk_get_aio_context(d->conf.blk) == s->ctx);
235
- }
236
-}
237
-
238
static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
239
{
240
VirtIOSCSI *s = req->dev;
241
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_flush_defer_tmf_to_aio_context(VirtIOSCSI *s)
242
243
assert(!s->dataplane_started);
244
245
- if (s->ctx) {
246
+ for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) {
247
+ AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
248
+
249
/* Our BH only runs after previously scheduled BHs */
250
- aio_wait_bh_oneshot(s->ctx, dummy_bh, NULL);
251
+ aio_wait_bh_oneshot(ctx, dummy_bh, NULL);
252
}
253
}
254
255
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
256
AioContext *ctx;
257
int ret = 0;
258
259
- virtio_scsi_ctx_check(s, d);
260
/* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */
261
req->resp.tmf.response = VIRTIO_SCSI_S_OK;
262
263
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
264
265
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
266
case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: {
267
+ g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
268
+
269
if (!d) {
59
goto fail;
270
goto fail;
60
}
271
}
61
- old_file_size = ROUND_UP(old_file_size, s->cluster_size);
272
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
62
+
273
63
+ last_cluster = qcow2_get_last_cluster(bs, old_file_size);
274
qatomic_inc(&req->remaining);
64
+ if (last_cluster >= 0) {
275
65
+ old_file_size = (last_cluster + 1) * s->cluster_size;
276
- ctx = s->ctx ?: qemu_get_aio_context();
66
+ } else {
277
- virtio_scsi_defer_tmf_to_aio_context(req, ctx);
67
+ old_file_size = ROUND_UP(old_file_size, s->cluster_size);
278
+ for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) {
279
+ ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
280
+
281
+ if (!g_hash_table_add(aio_contexts, ctx)) {
282
+ continue; /* skip previously added AioContext */
283
+ }
284
+
285
+ virtio_scsi_defer_tmf_to_aio_context(req, ctx);
68
+ }
286
+ }
69
287
70
nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
288
virtio_scsi_tmf_dec_remaining(req);
71
s->cluster_size);
289
ret = -EINPROGRESS;
290
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
291
*/
292
static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s)
293
{
294
- if (!s->ctx || s->dataplane_started) {
295
+ if (s->dataplane_started) {
296
return false;
297
}
298
+ if (s->vq_aio_context[0] == qemu_get_aio_context()) {
299
+ return false; /* not using IOThreads */
300
+ }
301
302
virtio_device_start_ioeventfd(&s->parent_obj.parent_obj);
303
return !s->dataplane_fenced;
304
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
305
virtio_scsi_complete_cmd_req(req);
306
return -ENOENT;
307
}
308
- virtio_scsi_ctx_check(s, d);
309
req->sreq = scsi_req_new(d, req->req.cmd.tag,
310
virtio_scsi_get_lun(req->req.cmd.lun),
311
req->req.cmd.cdb, vs->cdb_size, req);
312
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
313
{
314
VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
315
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
316
+ AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED];
317
SCSIDevice *sd = SCSI_DEVICE(dev);
318
- int ret;
319
320
- if (s->ctx && !s->dataplane_fenced) {
321
- ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp);
322
- if (ret < 0) {
323
- return;
324
- }
325
+ if (ctx != qemu_get_aio_context() && !s->dataplane_fenced) {
326
+ /*
327
+ * Try to make the BlockBackend's AioContext match ours. Ignore failure
328
+ * because I/O will still work although block jobs and other users
329
+ * might be slower when multiple AioContexts use a BlockBackend.
330
+ */
331
+ blk_set_aio_context(sd->conf.blk, ctx, errp);
332
}
333
334
if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
335
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
336
337
qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
338
339
- if (s->ctx) {
340
+ if (s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED] != qemu_get_aio_context()) {
341
/* If other users keep the BlockBackend in the iothread, that's ok */
342
blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL);
343
}
344
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_drained_begin(SCSIBus *bus)
345
346
for (uint32_t i = 0; i < total_queues; i++) {
347
VirtQueue *vq = virtio_get_queue(vdev, i);
348
- virtio_queue_aio_detach_host_notifier(vq, s->ctx);
349
+ virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]);
350
}
351
}
352
353
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_drained_end(SCSIBus *bus)
354
355
for (uint32_t i = 0; i < total_queues; i++) {
356
VirtQueue *vq = virtio_get_queue(vdev, i);
357
+ AioContext *ctx = s->vq_aio_context[i];
358
+
359
if (vq == vs->event_vq) {
360
- virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx);
361
+ virtio_queue_aio_attach_host_notifier_no_poll(vq, ctx);
362
} else {
363
- virtio_queue_aio_attach_host_notifier(vq, s->ctx);
364
+ virtio_queue_aio_attach_host_notifier(vq, ctx);
365
}
366
}
367
}
368
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_common_unrealize(DeviceState *dev)
369
virtio_cleanup(vdev);
370
}
371
372
+/* main loop */
373
static void virtio_scsi_device_unrealize(DeviceState *dev)
374
{
375
VirtIOSCSI *s = VIRTIO_SCSI(dev);
376
377
virtio_scsi_reset_tmf_bh(s);
378
-
379
+ virtio_scsi_dataplane_cleanup(s);
380
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
381
virtio_scsi_common_unrealize(dev);
382
qemu_mutex_destroy(&s->tmf_bh_lock);
383
@@ -XXX,XX +XXX,XX @@ static const Property virtio_scsi_properties[] = {
384
VIRTIO_SCSI_F_CHANGE, true),
385
DEFINE_PROP_LINK("iothread", VirtIOSCSI, parent_obj.conf.iothread,
386
TYPE_IOTHREAD, IOThread *),
387
+ DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOSCSI,
388
+ parent_obj.conf.iothread_vq_mapping_list),
389
};
390
391
static const VMStateDescription vmstate_virtio_scsi = {
72
--
392
--
73
2.25.3
393
2.48.1
74
75
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Our .bdrv_has_zero_init_truncate can detect when the remote side
3
Previously the ctrl virtqueue was handled in the AioContext where SCSI
4
always zero fills; we can reuse that same knowledge to implement
4
requests are processed. When IOThread Virtqueue Mapping was added things
5
BDRV_REQ_ZERO_WRITE by ignoring it when the server gives it to us for
5
become more complicated because SCSI requests could run in other
6
free.
6
AioContexts.
7
7
8
Signed-off-by: Eric Blake <eblake@redhat.com>
8
Simplify by handling the ctrl virtqueue in the main loop where reset
9
Message-Id: <20200428202905.770727-7-eblake@redhat.com>
9
operations can be performed. Note that BHs are still used canceling SCSI
10
Reviewed-by: Richard W.M. Jones <rjones@redhat.com>
10
requests in their AioContexts but at least the mean loop activity
11
doesn't need BHs anymore.
12
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Message-ID: <20250311132616.1049687-13-stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
16
---
13
block/ssh.c | 4 ++++
17
include/hw/virtio/virtio-scsi.h | 8 --
14
1 file changed, 4 insertions(+)
18
hw/scsi/virtio-scsi-dataplane.c | 6 ++
15
19
hw/scsi/virtio-scsi.c | 144 ++++++--------------------------
16
diff --git a/block/ssh.c b/block/ssh.c
20
3 files changed, 33 insertions(+), 125 deletions(-)
21
22
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
17
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
18
--- a/block/ssh.c
24
--- a/include/hw/virtio/virtio-scsi.h
19
+++ b/block/ssh.c
25
+++ b/include/hw/virtio/virtio-scsi.h
20
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
26
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
21
/* Go non-blocking. */
27
22
ssh_set_blocking(s->session, 0);
28
QemuMutex ctrl_lock; /* protects ctrl_vq */
23
29
24
+ if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) {
30
- /*
25
+ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
31
- * TMFs deferred to main loop BH. These fields are protected by
32
- * tmf_bh_lock.
33
- */
34
- QemuMutex tmf_bh_lock;
35
- QEMUBH *tmf_bh;
36
- QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
37
-
38
/* Fields for dataplane below */
39
AioContext **vq_aio_context; /* per-virtqueue AioContext pointer */
40
41
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/scsi/virtio-scsi-dataplane.c
44
+++ b/hw/scsi/virtio-scsi-dataplane.c
45
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
46
s->vq_aio_context[i] = ctx;
47
}
48
}
49
+
50
+ /*
51
+ * Always handle the ctrl virtqueue in the main loop thread where device
52
+ * resets can be performed.
53
+ */
54
+ s->vq_aio_context[0] = qemu_get_aio_context();
55
}
56
57
/* Context: BQL held */
58
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/hw/scsi/virtio-scsi.c
61
+++ b/hw/scsi/virtio-scsi.c
62
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
63
g_free(n);
64
}
65
66
-static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
67
-{
68
- VirtIOSCSI *s = req->dev;
69
- SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
70
- BusChild *kid;
71
- int target;
72
-
73
- switch (req->req.tmf.subtype) {
74
- case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
75
- if (!d) {
76
- req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
77
- goto out;
78
- }
79
- if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
80
- req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN;
81
- goto out;
82
- }
83
- qatomic_inc(&s->resetting);
84
- device_cold_reset(&d->qdev);
85
- qatomic_dec(&s->resetting);
86
- break;
87
-
88
- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
89
- target = req->req.tmf.lun[1];
90
- qatomic_inc(&s->resetting);
91
-
92
- rcu_read_lock();
93
- QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
94
- SCSIDevice *d1 = SCSI_DEVICE(kid->child);
95
- if (d1->channel == 0 && d1->id == target) {
96
- device_cold_reset(&d1->qdev);
97
- }
98
- }
99
- rcu_read_unlock();
100
-
101
- qatomic_dec(&s->resetting);
102
- break;
103
-
104
- default:
105
- g_assert_not_reached();
106
- }
107
-
108
-out:
109
- object_unref(OBJECT(d));
110
- virtio_scsi_complete_req(req, &s->ctrl_lock);
111
-}
112
-
113
-/* Some TMFs must be processed from the main loop thread */
114
-static void virtio_scsi_do_tmf_bh(void *opaque)
115
-{
116
- VirtIOSCSI *s = opaque;
117
- QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
118
- VirtIOSCSIReq *req;
119
- VirtIOSCSIReq *tmp;
120
-
121
- GLOBAL_STATE_CODE();
122
-
123
- WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
124
- QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
125
- QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
126
- QTAILQ_INSERT_TAIL(&reqs, req, next);
127
- }
128
-
129
- qemu_bh_delete(s->tmf_bh);
130
- s->tmf_bh = NULL;
131
- }
132
-
133
- QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) {
134
- QTAILQ_REMOVE(&reqs, req, next);
135
- virtio_scsi_do_one_tmf_bh(req);
136
- }
137
-}
138
-
139
-static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
140
-{
141
- VirtIOSCSIReq *req;
142
- VirtIOSCSIReq *tmp;
143
-
144
- GLOBAL_STATE_CODE();
145
-
146
- /* Called after ioeventfd has been stopped, so tmf_bh_lock is not needed */
147
- if (s->tmf_bh) {
148
- qemu_bh_delete(s->tmf_bh);
149
- s->tmf_bh = NULL;
150
- }
151
-
152
- QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
153
- QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
154
-
155
- /* SAM-6 6.3.2 Hard reset */
156
- req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
157
- virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
158
- }
159
-}
160
-
161
-static void virtio_scsi_defer_tmf_to_main_loop(VirtIOSCSIReq *req)
162
-{
163
- VirtIOSCSI *s = req->dev;
164
-
165
- WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
166
- QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
167
-
168
- if (!s->tmf_bh) {
169
- s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
170
- qemu_bh_schedule(s->tmf_bh);
171
- }
172
- }
173
-}
174
-
175
static void virtio_scsi_tmf_cancel_req(VirtIOSCSIReq *tmf, SCSIRequest *r)
176
{
177
VirtIOSCSICancelNotifier *notifier;
178
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
179
break;
180
181
case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
182
- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
183
- virtio_scsi_defer_tmf_to_main_loop(req);
184
- ret = -EINPROGRESS;
185
+ if (!d) {
186
+ goto fail;
187
+ }
188
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
189
+ goto incorrect_lun;
190
+ }
191
+ qatomic_inc(&s->resetting);
192
+ device_cold_reset(&d->qdev);
193
+ qatomic_dec(&s->resetting);
194
break;
195
196
+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: {
197
+ BusChild *kid;
198
+ int target = req->req.tmf.lun[1];
199
+ qatomic_inc(&s->resetting);
200
+
201
+ rcu_read_lock();
202
+ QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
203
+ SCSIDevice *d1 = SCSI_DEVICE(kid->child);
204
+ if (d1->channel == 0 && d1->id == target) {
205
+ device_cold_reset(&d1->qdev);
206
+ }
207
+ }
208
+ rcu_read_unlock();
209
+
210
+ qatomic_dec(&s->resetting);
211
+ break;
26
+ }
212
+ }
27
+
213
+
28
qapi_free_BlockdevOptionsSsh(opts);
214
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
29
215
case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: {
30
return 0;
216
g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
217
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev)
218
219
assert(!s->dataplane_started);
220
221
- virtio_scsi_reset_tmf_bh(s);
222
virtio_scsi_flush_defer_tmf_to_aio_context(s);
223
224
qatomic_inc(&s->resetting);
225
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
226
VirtIOSCSI *s = VIRTIO_SCSI(dev);
227
Error *err = NULL;
228
229
- QTAILQ_INIT(&s->tmf_bh_list);
230
qemu_mutex_init(&s->ctrl_lock);
231
qemu_mutex_init(&s->event_lock);
232
- qemu_mutex_init(&s->tmf_bh_lock);
233
234
virtio_scsi_common_realize(dev,
235
virtio_scsi_handle_ctrl,
236
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_unrealize(DeviceState *dev)
237
{
238
VirtIOSCSI *s = VIRTIO_SCSI(dev);
239
240
- virtio_scsi_reset_tmf_bh(s);
241
virtio_scsi_dataplane_cleanup(s);
242
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
243
virtio_scsi_common_unrealize(dev);
244
- qemu_mutex_destroy(&s->tmf_bh_lock);
245
qemu_mutex_destroy(&s->event_lock);
246
qemu_mutex_destroy(&s->ctrl_lock);
247
}
31
--
248
--
32
2.25.3
249
2.48.1
33
34
diff view generated by jsdifflib
1
Since the introduction of a backup filter node in commit 00e30f05d, the
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
backup block job crashes when the target image is smaller than the
3
source image because it will try to write after the end of the target
4
node without having BLK_PERM_RESIZE. (Previously, the BlockBackend layer
5
would have caught this and errored out gracefully.)
6
2
7
We can fix this and even do better than the old behaviour: Check that
3
Peter Krempa and Kevin Wolf observed that iothread-vq-mapping is
8
source and target have the same image size at the start of the block job
4
confusing to use because the control and event virtqueues have a fixed
9
and unshare BLK_PERM_RESIZE. (This permission was already unshared
5
location before the command virtqueues but need to be treated
10
before the same commit 00e30f05d, but the BlockBackend that was used to
6
differently.
11
make the restriction was removed without a replacement.) This will
12
immediately error out when starting the job instead of only when writing
13
to a block that doesn't exist in the target.
14
7
15
Longer target than source would technically work because we would never
8
Only expose the command virtqueues via iothread-vq-mapping so that the
16
write to blocks that don't exist, but semantically these are invalid,
9
command-line parameter is intuitive: it controls where SCSI requests are
17
too, because a backup is supposed to create a copy, not just an image
10
processed.
18
that starts with a copy.
19
11
20
Fixes: 00e30f05de1d19586345ec373970ef4c192c6270
12
The control virtqueue needs to be hardcoded to the main loop thread for
21
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1778593
13
technical reasons anyway. Kevin also pointed out that it's better to
22
Cc: qemu-stable@nongnu.org
14
place the event virtqueue in the main loop thread since its no poll
23
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
behavior would prevent polling if assigned to an IOThread.
24
Message-Id: <20200430142755.315494-4-kwolf@redhat.com>
16
25
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
17
This change is its own commit to avoid squashing the previous commit.
18
19
Suggested-by: Kevin Wolf <kwolf@redhat.com>
20
Suggested-by: Peter Krempa <pkrempa@redhat.com>
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
Message-ID: <20250311132616.1049687-14-stefanha@redhat.com>
26
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
23
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
27
---
24
---
28
block/backup-top.c | 14 +++++++++-----
25
hw/scsi/virtio-scsi-dataplane.c | 33 ++++++++++++++++++++-------------
29
block/backup.c | 14 +++++++++++++-
26
1 file changed, 20 insertions(+), 13 deletions(-)
30
2 files changed, 22 insertions(+), 6 deletions(-)
31
27
32
diff --git a/block/backup-top.c b/block/backup-top.c
28
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
33
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
34
--- a/block/backup-top.c
30
--- a/hw/scsi/virtio-scsi-dataplane.c
35
+++ b/block/backup-top.c
31
+++ b/hw/scsi/virtio-scsi-dataplane.c
36
@@ -XXX,XX +XXX,XX @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c,
32
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
37
*
33
VirtIODevice *vdev = VIRTIO_DEVICE(s);
38
* Share write to target (child_file), to not interfere
34
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
39
* with guest writes to its disk which may be in target backing chain.
35
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
40
+ * Can't resize during a backup block job because we check the size
36
- uint16_t num_vqs = vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED;
41
+ * only upfront.
37
42
*/
38
if (vs->conf.iothread && vs->conf.iothread_vq_mapping_list) {
43
- *nshared = BLK_PERM_ALL;
39
error_setg(errp,
44
+ *nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
40
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
45
*nperm = BLK_PERM_WRITE;
41
}
42
}
43
44
- s->vq_aio_context = g_new(AioContext *, num_vqs);
45
+ s->vq_aio_context = g_new(AioContext *, vs->conf.num_queues +
46
+ VIRTIO_SCSI_VQ_NUM_FIXED);
47
+
48
+ /*
49
+ * Handle the ctrl virtqueue in the main loop thread where device resets
50
+ * can be performed.
51
+ */
52
+ s->vq_aio_context[0] = qemu_get_aio_context();
53
+
54
+ /*
55
+ * Handle the event virtqueue in the main loop thread where its no_poll
56
+ * behavior won't stop IOThread polling.
57
+ */
58
+ s->vq_aio_context[1] = qemu_get_aio_context();
59
60
if (vs->conf.iothread_vq_mapping_list) {
61
if (!iothread_vq_mapping_apply(vs->conf.iothread_vq_mapping_list,
62
- s->vq_aio_context, num_vqs, errp)) {
63
+ &s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED],
64
+ vs->conf.num_queues, errp)) {
65
g_free(s->vq_aio_context);
66
s->vq_aio_context = NULL;
67
return;
68
}
69
} else if (vs->conf.iothread) {
70
AioContext *ctx = iothread_get_aio_context(vs->conf.iothread);
71
- for (uint16_t i = 0; i < num_vqs; i++) {
72
- s->vq_aio_context[i] = ctx;
73
+ for (uint16_t i = 0; i < vs->conf.num_queues; i++) {
74
+ s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i] = ctx;
75
}
76
77
/* Released in virtio_scsi_dataplane_cleanup() */
78
object_ref(OBJECT(vs->conf.iothread));
46
} else {
79
} else {
47
/* Source child */
80
AioContext *ctx = qemu_get_aio_context();
48
@@ -XXX,XX +XXX,XX @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c,
81
- for (unsigned i = 0; i < num_vqs; i++) {
49
if (perm & BLK_PERM_WRITE) {
82
- s->vq_aio_context[i] = ctx;
50
*nperm = *nperm | BLK_PERM_CONSISTENT_READ;
83
+ for (unsigned i = 0; i < vs->conf.num_queues; i++) {
84
+ s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i] = ctx;
51
}
85
}
52
- *nshared &= ~BLK_PERM_WRITE;
53
+ *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
54
}
86
}
87
-
88
- /*
89
- * Always handle the ctrl virtqueue in the main loop thread where device
90
- * resets can be performed.
91
- */
92
- s->vq_aio_context[0] = qemu_get_aio_context();
55
}
93
}
56
94
57
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
95
/* Context: BQL held */
58
{
59
Error *local_err = NULL;
60
BDRVBackupTopState *state;
61
- BlockDriverState *top = bdrv_new_open_driver(&bdrv_backup_top_filter,
62
- filter_node_name,
63
- BDRV_O_RDWR, errp);
64
+ BlockDriverState *top;
65
bool appended = false;
66
67
+ assert(source->total_sectors == target->total_sectors);
68
+
69
+ top = bdrv_new_open_driver(&bdrv_backup_top_filter, filter_node_name,
70
+ BDRV_O_RDWR, errp);
71
if (!top) {
72
return NULL;
73
}
74
diff --git a/block/backup.c b/block/backup.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/block/backup.c
77
+++ b/block/backup.c
78
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
79
BlockCompletionFunc *cb, void *opaque,
80
JobTxn *txn, Error **errp)
81
{
82
- int64_t len;
83
+ int64_t len, target_len;
84
BackupBlockJob *job = NULL;
85
int64_t cluster_size;
86
BdrvRequestFlags write_flags;
87
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
88
goto error;
89
}
90
91
+ target_len = bdrv_getlength(target);
92
+ if (target_len < 0) {
93
+ error_setg_errno(errp, -target_len, "Unable to get length for '%s'",
94
+ bdrv_get_device_or_node_name(bs));
95
+ goto error;
96
+ }
97
+
98
+ if (target_len != len) {
99
+ error_setg(errp, "Source and target image have different sizes");
100
+ goto error;
101
+ }
102
+
103
cluster_size = backup_calculate_cluster_size(target, errp);
104
if (cluster_size < 0) {
105
goto error;
106
--
96
--
107
2.25.3
97
2.48.1
108
109
diff view generated by jsdifflib