1
The following changes since commit 0db1851becbefe3e50cfc03776fb1f75817376af:
1
The following changes since commit 79fc2fb685f35a5e71e23629760ef4025d6aba31:
2
2
3
Merge remote-tracking branch 'remotes/vivier/tags/m68k-for-2.10-pull-request' into staging (2017-06-07 11:56:00 +0100)
3
Merge tag 'trivial-branch-for-7.2-pull-request' of https://gitlab.com/laurent_vivier/qemu into staging (2022-10-25 11:37:17 -0400)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 11cde1c81093a33c46c7a4039bf750bb61551087:
9
for you to fetch changes up to baf422684d73c7bf38e2c18815e18d44fcf395b6:
10
10
11
configure: split c and cxx extra flags (2017-06-07 15:29:46 +0100)
11
virtio-blk: use BDRV_REQ_REGISTERED_BUF optimization hint (2022-10-26 14:56:42 -0400)
12
13
----------------------------------------------------------------
14
Pull request
12
15
13
----------------------------------------------------------------
16
----------------------------------------------------------------
14
17
15
----------------------------------------------------------------
18
Stefan Hajnoczi (13):
19
coroutine: add flag to re-queue at front of CoQueue
20
blkio: add libblkio block driver
21
numa: call ->ram_block_removed() in ram_block_notifer_remove()
22
block: pass size to bdrv_unregister_buf()
23
block: use BdrvRequestFlags type for supported flag fields
24
block: add BDRV_REQ_REGISTERED_BUF request flag
25
block: return errors from bdrv_register_buf()
26
numa: use QLIST_FOREACH_SAFE() for RAM block notifiers
27
block: add BlockRAMRegistrar
28
exec/cpu-common: add qemu_ram_get_fd()
29
stubs: add qemu_ram_block_from_host() and qemu_ram_get_fd()
30
blkio: implement BDRV_REQ_REGISTERED_BUF optimization
31
virtio-blk: use BDRV_REQ_REGISTERED_BUF optimization hint
16
32
17
Bruno Dominguez (1):
33
MAINTAINERS | 7 +
18
configure: split c and cxx extra flags
34
meson_options.txt | 2 +
19
35
qapi/block-core.json | 77 +-
20
Philippe Mathieu-Daudé (2):
36
meson.build | 9 +
21
oslib: strip trailing '\n' from error_setg() string argument
37
include/block/block-common.h | 9 +
22
coccinelle: fix typo in comment
38
include/block/block-global-state.h | 10 +-
23
39
include/block/block_int-common.h | 15 +-
24
Roman Pen (1):
40
include/exec/cpu-common.h | 1 +
25
coroutine-lock: do not touch coroutine after another one has been
41
include/hw/virtio/virtio-blk.h | 2 +
26
entered
42
include/qemu/coroutine.h | 15 +-
27
43
include/sysemu/block-backend-global-state.h | 4 +-
28
Stefan Hajnoczi (1):
44
include/sysemu/block-ram-registrar.h | 37 +
29
.gdbinit: load QEMU sub-commands when gdb starts
45
block.c | 14 +
30
46
block/blkio.c | 1008 +++++++++++++++++++
31
configure | 75 ++++++++++++++++++--------------
47
block/blkverify.c | 4 +-
32
disas/libvixl/Makefile.objs | 4 +-
48
block/block-backend.c | 8 +-
33
util/oslib-posix.c | 2 +-
49
block/block-ram-registrar.c | 58 ++
34
util/qemu-coroutine-lock.c | 19 +++++++-
50
block/crypto.c | 4 +-
35
util/qemu-coroutine.c | 5 +++
51
block/file-posix.c | 1 -
36
.gdbinit | 8 ++++
52
block/gluster.c | 1 -
37
rules.mak | 3 --
53
block/io.c | 101 +-
38
scripts/coccinelle/return_directly.cocci | 2 +-
54
block/mirror.c | 2 +
39
8 files changed, 77 insertions(+), 41 deletions(-)
55
block/nbd.c | 1 -
40
create mode 100644 .gdbinit
56
block/nvme.c | 20 +-
57
block/parallels.c | 1 -
58
block/qcow.c | 2 -
59
block/qed.c | 1 -
60
block/raw-format.c | 2 +
61
block/replication.c | 1 -
62
block/ssh.c | 1 -
63
block/vhdx.c | 1 -
64
hw/block/virtio-blk.c | 39 +-
65
hw/core/numa.c | 26 +-
66
qemu-img.c | 6 +-
67
softmmu/physmem.c | 5 +
68
stubs/physmem.c | 13 +
69
tests/qtest/modules-test.c | 3 +
70
util/qemu-coroutine-lock.c | 9 +-
71
util/vfio-helpers.c | 5 +-
72
block/meson.build | 2 +
73
scripts/meson-buildoptions.sh | 3 +
74
stubs/meson.build | 1 +
75
42 files changed, 1435 insertions(+), 96 deletions(-)
76
create mode 100644 include/sysemu/block-ram-registrar.h
77
create mode 100644 block/blkio.c
78
create mode 100644 block/block-ram-registrar.c
79
create mode 100644 stubs/physmem.c
41
80
42
--
81
--
43
2.9.4
82
2.37.3
44
45
diff view generated by jsdifflib
1
From: Roman Pen <roman.penyaev@profitbricks.com>
1
When a coroutine wakes up it may determine that it must re-queue.
2
Normally coroutines are pushed onto the back of the CoQueue, but for
3
fairness it may be necessary to push it onto the front of the CoQueue.
2
4
3
Submission of requests on linux aio is a bit tricky and can lead to
5
Add a flag to specify that the coroutine should be pushed onto the front
4
requests completions on submission path:
6
of the CoQueue. A later patch will use this to ensure fairness in the
7
bounce buffer CoQueue used by the blkio BlockDriver.
5
8
6
44713c9e8547 ("linux-aio: Handle io_submit() failure gracefully")
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
0ed93d84edab ("linux-aio: process completions from ioq_submit()")
10
Message-id: 20221013185908.1297568-2-stefanha@redhat.com
8
9
That means that any coroutine which has been yielded in order to wait
10
for completion can be resumed from submission path and be eventually
11
terminated (freed).
12
13
The following use-after-free crash was observed when IO throttling
14
was enabled:
15
16
Program received signal SIGSEGV, Segmentation fault.
17
[Switching to Thread 0x7f5813dff700 (LWP 56417)]
18
virtqueue_unmap_sg (elem=0x7f5804009a30, len=1, vq=<optimized out>) at virtio.c:252
19
(gdb) bt
20
#0 virtqueue_unmap_sg (elem=0x7f5804009a30, len=1, vq=<optimized out>) at virtio.c:252
21
^^^^^^^^^^^^^^
22
remember the address
23
24
#1 virtqueue_fill (vq=0x5598b20d21b0, elem=0x7f5804009a30, len=1, idx=0) at virtio.c:282
25
#2 virtqueue_push (vq=0x5598b20d21b0, elem=elem@entry=0x7f5804009a30, len=<optimized out>) at virtio.c:308
26
#3 virtio_blk_req_complete (req=req@entry=0x7f5804009a30, status=status@entry=0 '\000') at virtio-blk.c:61
27
#4 virtio_blk_rw_complete (opaque=<optimized out>, ret=0) at virtio-blk.c:126
28
#5 blk_aio_complete (acb=0x7f58040068d0) at block-backend.c:923
29
#6 coroutine_trampoline (i0=<optimized out>, i1=<optimized out>) at coroutine-ucontext.c:78
30
31
(gdb) p * elem
32
$8 = {index = 77, out_num = 2, in_num = 1,
33
in_addr = 0x7f5804009ad8, out_addr = 0x7f5804009ae0,
34
in_sg = 0x0, out_sg = 0x7f5804009a50}
35
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
36
'in_sg' and 'out_sg' are invalid.
37
e.g. it is impossible that 'in_sg' is zero,
38
instead its value must be equal to:
39
40
(gdb) p/x 0x7f5804009ad8 + sizeof(elem->in_addr[0]) + 2 * sizeof(elem->out_addr[0])
41
$26 = 0x7f5804009af0
42
43
Seems 'elem' was corrupted. Meanwhile another thread raised an abort:
44
45
Thread 12 (Thread 0x7f57f2ffd700 (LWP 56426)):
46
#0 raise () from /lib/x86_64-linux-gnu/libc.so.6
47
#1 abort () from /lib/x86_64-linux-gnu/libc.so.6
48
#2 qemu_coroutine_enter (co=0x7f5804009af0) at qemu-coroutine.c:113
49
#3 qemu_co_queue_run_restart (co=0x7f5804009a30) at qemu-coroutine-lock.c:60
50
#4 qemu_coroutine_enter (co=0x7f5804009a30) at qemu-coroutine.c:119
51
^^^^^^^^^^^^^^^^^^
52
WTF?? this is equal to elem from crashed thread
53
54
#5 qemu_co_queue_run_restart (co=0x7f57e7f16ae0) at qemu-coroutine-lock.c:60
55
#6 qemu_coroutine_enter (co=0x7f57e7f16ae0) at qemu-coroutine.c:119
56
#7 qemu_co_queue_run_restart (co=0x7f5807e112a0) at qemu-coroutine-lock.c:60
57
#8 qemu_coroutine_enter (co=0x7f5807e112a0) at qemu-coroutine.c:119
58
#9 qemu_co_queue_run_restart (co=0x7f5807f17820) at qemu-coroutine-lock.c:60
59
#10 qemu_coroutine_enter (co=0x7f5807f17820) at qemu-coroutine.c:119
60
#11 qemu_co_queue_run_restart (co=0x7f57e7f18e10) at qemu-coroutine-lock.c:60
61
#12 qemu_coroutine_enter (co=0x7f57e7f18e10) at qemu-coroutine.c:119
62
#13 qemu_co_enter_next (queue=queue@entry=0x5598b1e742d0) at qemu-coroutine-lock.c:106
63
#14 timer_cb (blk=0x5598b1e74280, is_write=<optimized out>) at throttle-groups.c:419
64
65
Crash can be explained by access of 'co' object from the loop inside
66
qemu_co_queue_run_restart():
67
68
while ((next = QSIMPLEQ_FIRST(&co->co_queue_wakeup))) {
69
QSIMPLEQ_REMOVE_HEAD(&co->co_queue_wakeup, co_queue_next);
70
^^^^^^^^^^^^^^^^^^^^
71
on each iteration 'co' is accessed,
72
but 'co' can be already freed
73
74
qemu_coroutine_enter(next);
75
}
76
77
When 'next' coroutine is resumed (entered) it can in its turn resume
78
'co', and eventually free it. That's why we see 'co' (which was freed)
79
has the same address as 'elem' from the first backtrace.
80
81
The fix is obvious: use temporary queue and do not touch coroutine after
82
first qemu_coroutine_enter() is invoked.
83
84
The issue is quite rare and happens every ~12 hours on very high IO
85
and CPU load (building linux kernel with -j512 inside guest) when IO
86
throttling is enabled. With the fix applied guest is running ~35 hours
87
and is still alive so far.
88
89
Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
90
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
91
Message-id: 20170601160847.23720-1-roman.penyaev@profitbricks.com
92
Cc: Paolo Bonzini <pbonzini@redhat.com>
93
Cc: Fam Zheng <famz@redhat.com>
94
Cc: Stefan Hajnoczi <stefanha@redhat.com>
95
Cc: Kevin Wolf <kwolf@redhat.com>
96
Cc: qemu-devel@nongnu.org
97
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
98
---
12
---
99
util/qemu-coroutine-lock.c | 19 +++++++++++++++++--
13
include/qemu/coroutine.h | 15 +++++++++++++--
100
util/qemu-coroutine.c | 5 +++++
14
util/qemu-coroutine-lock.c | 9 +++++++--
101
2 files changed, 22 insertions(+), 2 deletions(-)
15
2 files changed, 20 insertions(+), 4 deletions(-)
102
16
17
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/qemu/coroutine.h
20
+++ b/include/qemu/coroutine.h
21
@@ -XXX,XX +XXX,XX @@ typedef struct CoQueue {
22
*/
23
void qemu_co_queue_init(CoQueue *queue);
24
25
+typedef enum {
26
+ /*
27
+ * Enqueue at front instead of back. Use this to re-queue a request when
28
+ * its wait condition is not satisfied after being woken up.
29
+ */
30
+ CO_QUEUE_WAIT_FRONT = 0x1,
31
+} CoQueueWaitFlags;
32
+
33
/**
34
* Adds the current coroutine to the CoQueue and transfers control to the
35
* caller of the coroutine. The mutex is unlocked during the wait and
36
* locked again afterwards.
37
*/
38
#define qemu_co_queue_wait(queue, lock) \
39
- qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock))
40
-void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock);
41
+ qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), 0)
42
+#define qemu_co_queue_wait_flags(queue, lock, flags) \
43
+ qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), (flags))
44
+void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock,
45
+ CoQueueWaitFlags flags);
46
47
/**
48
* Removes the next coroutine from the CoQueue, and queue it to run after
103
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
49
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
104
index XXXXXXX..XXXXXXX 100644
50
index XXXXXXX..XXXXXXX 100644
105
--- a/util/qemu-coroutine-lock.c
51
--- a/util/qemu-coroutine-lock.c
106
+++ b/util/qemu-coroutine-lock.c
52
+++ b/util/qemu-coroutine-lock.c
107
@@ -XXX,XX +XXX,XX @@ void coroutine_fn qemu_co_queue_wait(CoQueue *queue, CoMutex *mutex)
53
@@ -XXX,XX +XXX,XX @@ void qemu_co_queue_init(CoQueue *queue)
108
void qemu_co_queue_run_restart(Coroutine *co)
54
QSIMPLEQ_INIT(&queue->entries);
55
}
56
57
-void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock)
58
+void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock,
59
+ CoQueueWaitFlags flags)
109
{
60
{
110
Coroutine *next;
61
Coroutine *self = qemu_coroutine_self();
111
+ QSIMPLEQ_HEAD(, Coroutine) tmp_queue_wakeup =
62
- QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
112
+ QSIMPLEQ_HEAD_INITIALIZER(tmp_queue_wakeup);
63
+ if (flags & CO_QUEUE_WAIT_FRONT) {
113
64
+ QSIMPLEQ_INSERT_HEAD(&queue->entries, self, co_queue_next);
114
trace_qemu_co_queue_run_restart(co);
65
+ } else {
115
- while ((next = QSIMPLEQ_FIRST(&co->co_queue_wakeup))) {
66
+ QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
116
- QSIMPLEQ_REMOVE_HEAD(&co->co_queue_wakeup, co_queue_next);
67
+ }
117
+
68
118
+ /* Because "co" has yielded, any coroutine that we wakeup can resume it.
69
if (lock) {
119
+ * If this happens and "co" terminates, co->co_queue_wakeup becomes
70
qemu_lockable_unlock(lock);
120
+ * invalid memory. Therefore, use a temporary queue and do not touch
121
+ * the "co" coroutine as soon as you enter another one.
122
+ *
123
+ * In its turn resumed "co" can pupulate "co_queue_wakeup" queue with
124
+ * new coroutines to be woken up. The caller, who has resumed "co",
125
+ * will be responsible for traversing the same queue, which may cause
126
+ * a different wakeup order but not any missing wakeups.
127
+ */
128
+ QSIMPLEQ_CONCAT(&tmp_queue_wakeup, &co->co_queue_wakeup);
129
+
130
+ while ((next = QSIMPLEQ_FIRST(&tmp_queue_wakeup))) {
131
+ QSIMPLEQ_REMOVE_HEAD(&tmp_queue_wakeup, co_queue_next);
132
qemu_coroutine_enter(next);
133
}
134
}
135
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
136
index XXXXXXX..XXXXXXX 100644
137
--- a/util/qemu-coroutine.c
138
+++ b/util/qemu-coroutine.c
139
@@ -XXX,XX +XXX,XX @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
140
141
qemu_co_queue_run_restart(co);
142
143
+ /* Beware, if ret == COROUTINE_YIELD and qemu_co_queue_run_restart()
144
+ * has started any other coroutine, "co" might have been reentered
145
+ * and even freed by now! So be careful and do not touch it.
146
+ */
147
+
148
switch (ret) {
149
case COROUTINE_YIELD:
150
return;
151
--
71
--
152
2.9.4
72
2.37.3
153
154
diff view generated by jsdifflib
New patch
1
libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
2
high-performance disk I/O. It currently supports io_uring,
3
virtio-blk-vhost-user, and virtio-blk-vhost-vdpa with additional drivers
4
under development.
1
5
6
One of the reasons for developing libblkio is that other applications
7
besides QEMU can use it. This will be particularly useful for
8
virtio-blk-vhost-user which applications may wish to use for connecting
9
to qemu-storage-daemon.
10
11
libblkio also gives us an opportunity to develop in Rust behind a C API
12
that is easy to consume from QEMU.
13
14
This commit adds io_uring, nvme-io_uring, virtio-blk-vhost-user, and
15
virtio-blk-vhost-vdpa BlockDrivers to QEMU using libblkio. It will be
16
easy to add other libblkio drivers since they will share the majority of
17
code.
18
19
For now I/O buffers are copied through bounce buffers if the libblkio
20
driver requires it. Later commits add an optimization for
21
pre-registering guest RAM to avoid bounce buffers.
22
23
The syntax is:
24
25
--blockdev io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off
26
27
--blockdev nvme-io_uring,node-name=drive0,filename=/dev/ng0n1,readonly=on|off,cache.direct=on
28
29
--blockdev virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off,cache.direct=on
30
31
--blockdev virtio-blk-vhost-user,node-name=drive0,path=vhost-user-blk.sock,readonly=on|off,cache.direct=on
32
33
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
34
Acked-by: Markus Armbruster <armbru@redhat.com>
35
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
36
Message-id: 20221013185908.1297568-3-stefanha@redhat.com
37
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
38
---
39
MAINTAINERS | 6 +
40
meson_options.txt | 2 +
41
qapi/block-core.json | 77 +++-
42
meson.build | 9 +
43
block/blkio.c | 831 ++++++++++++++++++++++++++++++++++
44
tests/qtest/modules-test.c | 3 +
45
block/meson.build | 1 +
46
scripts/meson-buildoptions.sh | 3 +
47
8 files changed, 928 insertions(+), 4 deletions(-)
48
create mode 100644 block/blkio.c
49
50
diff --git a/MAINTAINERS b/MAINTAINERS
51
index XXXXXXX..XXXXXXX 100644
52
--- a/MAINTAINERS
53
+++ b/MAINTAINERS
54
@@ -XXX,XX +XXX,XX @@ L: qemu-block@nongnu.org
55
S: Maintained
56
F: block/vdi.c
57
58
+blkio
59
+M: Stefan Hajnoczi <stefanha@redhat.com>
60
+L: qemu-block@nongnu.org
61
+S: Maintained
62
+F: block/blkio.c
63
+
64
iSCSI
65
M: Ronnie Sahlberg <ronniesahlberg@gmail.com>
66
M: Paolo Bonzini <pbonzini@redhat.com>
67
diff --git a/meson_options.txt b/meson_options.txt
68
index XXXXXXX..XXXXXXX 100644
69
--- a/meson_options.txt
70
+++ b/meson_options.txt
71
@@ -XXX,XX +XXX,XX @@ option('bzip2', type : 'feature', value : 'auto',
72
description: 'bzip2 support for DMG images')
73
option('cap_ng', type : 'feature', value : 'auto',
74
description: 'cap_ng support')
75
+option('blkio', type : 'feature', value : 'auto',
76
+ description: 'libblkio block device driver')
77
option('bpf', type : 'feature', value : 'auto',
78
description: 'eBPF support')
79
option('cocoa', type : 'feature', value : 'auto',
80
diff --git a/qapi/block-core.json b/qapi/block-core.json
81
index XXXXXXX..XXXXXXX 100644
82
--- a/qapi/block-core.json
83
+++ b/qapi/block-core.json
84
@@ -XXX,XX +XXX,XX @@
85
'file', 'snapshot-access', 'ftp', 'ftps', 'gluster',
86
{'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
87
{'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
88
- 'http', 'https', 'iscsi',
89
- 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
90
- 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
91
+ 'http', 'https',
92
+ { 'name': 'io_uring', 'if': 'CONFIG_BLKIO' },
93
+ 'iscsi',
94
+ 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme',
95
+ { 'name': 'nvme-io_uring', 'if': 'CONFIG_BLKIO' },
96
+ 'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
97
+ 'raw', 'rbd',
98
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
99
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
100
+ 'ssh', 'throttle', 'vdi', 'vhdx',
101
+ { 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
102
+ { 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
103
+ 'vmdk', 'vpc', 'vvfat' ] }
104
105
##
106
# @BlockdevOptionsFile:
107
@@ -XXX,XX +XXX,XX @@
108
'*debug': 'int',
109
'*logfile': 'str' } }
110
111
+##
112
+# @BlockdevOptionsIoUring:
113
+#
114
+# Driver specific block device options for the io_uring backend.
115
+#
116
+# @filename: path to the image file
117
+#
118
+# Since: 7.2
119
+##
120
+{ 'struct': 'BlockdevOptionsIoUring',
121
+ 'data': { 'filename': 'str' },
122
+ 'if': 'CONFIG_BLKIO' }
123
+
124
+##
125
+# @BlockdevOptionsNvmeIoUring:
126
+#
127
+# Driver specific block device options for the nvme-io_uring backend.
128
+#
129
+# @filename: path to the image file
130
+#
131
+# Since: 7.2
132
+##
133
+{ 'struct': 'BlockdevOptionsNvmeIoUring',
134
+ 'data': { 'filename': 'str' },
135
+ 'if': 'CONFIG_BLKIO' }
136
+
137
+##
138
+# @BlockdevOptionsVirtioBlkVhostUser:
139
+#
140
+# Driver specific block device options for the virtio-blk-vhost-user backend.
141
+#
142
+# @path: path to the vhost-user UNIX domain socket.
143
+#
144
+# Since: 7.2
145
+##
146
+{ 'struct': 'BlockdevOptionsVirtioBlkVhostUser',
147
+ 'data': { 'path': 'str' },
148
+ 'if': 'CONFIG_BLKIO' }
149
+
150
+##
151
+# @BlockdevOptionsVirtioBlkVhostVdpa:
152
+#
153
+# Driver specific block device options for the virtio-blk-vhost-vdpa backend.
154
+#
155
+# @path: path to the vhost-vdpa character device.
156
+#
157
+# Since: 7.2
158
+##
159
+{ 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
160
+ 'data': { 'path': 'str' },
161
+ 'if': 'CONFIG_BLKIO' }
162
+
163
##
164
# @IscsiTransport:
165
#
166
@@ -XXX,XX +XXX,XX @@
167
'if': 'HAVE_HOST_BLOCK_DEVICE' },
168
'http': 'BlockdevOptionsCurlHttp',
169
'https': 'BlockdevOptionsCurlHttps',
170
+ 'io_uring': { 'type': 'BlockdevOptionsIoUring',
171
+ 'if': 'CONFIG_BLKIO' },
172
'iscsi': 'BlockdevOptionsIscsi',
173
'luks': 'BlockdevOptionsLUKS',
174
'nbd': 'BlockdevOptionsNbd',
175
@@ -XXX,XX +XXX,XX @@
176
'null-aio': 'BlockdevOptionsNull',
177
'null-co': 'BlockdevOptionsNull',
178
'nvme': 'BlockdevOptionsNVMe',
179
+ 'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring',
180
+ 'if': 'CONFIG_BLKIO' },
181
'parallels': 'BlockdevOptionsGenericFormat',
182
'preallocate':'BlockdevOptionsPreallocate',
183
'qcow2': 'BlockdevOptionsQcow2',
184
@@ -XXX,XX +XXX,XX @@
185
'throttle': 'BlockdevOptionsThrottle',
186
'vdi': 'BlockdevOptionsGenericFormat',
187
'vhdx': 'BlockdevOptionsGenericFormat',
188
+ 'virtio-blk-vhost-user':
189
+ { 'type': 'BlockdevOptionsVirtioBlkVhostUser',
190
+ 'if': 'CONFIG_BLKIO' },
191
+ 'virtio-blk-vhost-vdpa':
192
+ { 'type': 'BlockdevOptionsVirtioBlkVhostVdpa',
193
+ 'if': 'CONFIG_BLKIO' },
194
'vmdk': 'BlockdevOptionsGenericCOWFormat',
195
'vpc': 'BlockdevOptionsGenericFormat',
196
'vvfat': 'BlockdevOptionsVVFAT'
197
diff --git a/meson.build b/meson.build
198
index XXXXXXX..XXXXXXX 100644
199
--- a/meson.build
200
+++ b/meson.build
201
@@ -XXX,XX +XXX,XX @@ if not get_option('virglrenderer').auto() or have_system or have_vhost_user_gpu
202
required: get_option('virglrenderer'),
203
kwargs: static_kwargs)
204
endif
205
+blkio = not_found
206
+if not get_option('blkio').auto() or have_block
207
+ blkio = dependency('blkio',
208
+ method: 'pkg-config',
209
+ required: get_option('blkio'),
210
+ kwargs: static_kwargs)
211
+endif
212
curl = not_found
213
if not get_option('curl').auto() or have_block
214
curl = dependency('libcurl', version: '>=7.29.0',
215
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_LIBUDEV', libudev.found())
216
config_host_data.set('CONFIG_LZO', lzo.found())
217
config_host_data.set('CONFIG_MPATH', mpathpersist.found())
218
config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api)
219
+config_host_data.set('CONFIG_BLKIO', blkio.found())
220
config_host_data.set('CONFIG_CURL', curl.found())
221
config_host_data.set('CONFIG_CURSES', curses.found())
222
config_host_data.set('CONFIG_GBM', gbm.found())
223
@@ -XXX,XX +XXX,XX @@ summary_info += {'PAM': pam}
224
summary_info += {'iconv support': iconv}
225
summary_info += {'curses support': curses}
226
summary_info += {'virgl support': virgl}
227
+summary_info += {'blkio support': blkio}
228
summary_info += {'curl support': curl}
229
summary_info += {'Multipath support': mpathpersist}
230
summary_info += {'PNG support': png}
231
diff --git a/block/blkio.c b/block/blkio.c
232
new file mode 100644
233
index XXXXXXX..XXXXXXX
234
--- /dev/null
235
+++ b/block/blkio.c
236
@@ -XXX,XX +XXX,XX @@
237
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
238
+/*
239
+ * libblkio BlockDriver
240
+ *
241
+ * Copyright Red Hat, Inc.
242
+ *
243
+ * Author:
244
+ * Stefan Hajnoczi <stefanha@redhat.com>
245
+ */
246
+
247
+#include "qemu/osdep.h"
248
+#include <blkio.h>
249
+#include "block/block_int.h"
250
+#include "qapi/error.h"
251
+#include "qapi/qmp/qdict.h"
252
+#include "qemu/module.h"
253
+
254
+/*
255
+ * Keep the QEMU BlockDriver names identical to the libblkio driver names.
256
+ * Using macros instead of typing out the string literals avoids typos.
257
+ */
258
+#define DRIVER_IO_URING "io_uring"
259
+#define DRIVER_NVME_IO_URING "nvme-io_uring"
260
+#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
261
+#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
262
+
263
+/*
264
+ * Allocated bounce buffers are kept in a list sorted by buffer address.
265
+ */
266
+typedef struct BlkioBounceBuf {
267
+ QLIST_ENTRY(BlkioBounceBuf) next;
268
+
269
+ /* The bounce buffer */
270
+ struct iovec buf;
271
+} BlkioBounceBuf;
272
+
273
+typedef struct {
274
+ /*
275
+ * libblkio is not thread-safe so this lock protects ->blkio and
276
+ * ->blkioq.
277
+ */
278
+ QemuMutex blkio_lock;
279
+ struct blkio *blkio;
280
+ struct blkioq *blkioq; /* make this multi-queue in the future... */
281
+ int completion_fd;
282
+
283
+ /*
284
+ * Polling fetches the next completion into this field.
285
+ *
286
+ * No lock is necessary since only one thread calls aio_poll() and invokes
287
+ * fd and poll handlers.
288
+ */
289
+ struct blkio_completion poll_completion;
290
+
291
+ /*
292
+ * Protects ->bounce_pool, ->bounce_bufs, ->bounce_available.
293
+ *
294
+ * Lock ordering: ->bounce_lock before ->blkio_lock.
295
+ */
296
+ CoMutex bounce_lock;
297
+
298
+ /* Bounce buffer pool */
299
+ struct blkio_mem_region bounce_pool;
300
+
301
+ /* Sorted list of allocated bounce buffers */
302
+ QLIST_HEAD(, BlkioBounceBuf) bounce_bufs;
303
+
304
+ /* Queue for coroutines waiting for bounce buffer space */
305
+ CoQueue bounce_available;
306
+
307
+ /* The value of the "mem-region-alignment" property */
308
+ size_t mem_region_alignment;
309
+
310
+ /* Can we skip adding/deleting blkio_mem_regions? */
311
+ bool needs_mem_regions;
312
+} BDRVBlkioState;
313
+
314
+/* Called with s->bounce_lock held */
315
+static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes)
316
+{
317
+ /* There can be no allocated bounce buffers during resize */
318
+ assert(QLIST_EMPTY(&s->bounce_bufs));
319
+
320
+ /* Pad size to reduce frequency of resize calls */
321
+ bytes += 128 * 1024;
322
+
323
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
324
+ int ret;
325
+
326
+ if (s->bounce_pool.addr) {
327
+ blkio_unmap_mem_region(s->blkio, &s->bounce_pool);
328
+ blkio_free_mem_region(s->blkio, &s->bounce_pool);
329
+ memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
330
+ }
331
+
332
+ /* Automatically freed when s->blkio is destroyed */
333
+ ret = blkio_alloc_mem_region(s->blkio, &s->bounce_pool, bytes);
334
+ if (ret < 0) {
335
+ return ret;
336
+ }
337
+
338
+ ret = blkio_map_mem_region(s->blkio, &s->bounce_pool);
339
+ if (ret < 0) {
340
+ blkio_free_mem_region(s->blkio, &s->bounce_pool);
341
+ memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
342
+ return ret;
343
+ }
344
+ }
345
+
346
+ return 0;
347
+}
348
+
349
+/* Called with s->bounce_lock held */
350
+static bool
351
+blkio_do_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
352
+ int64_t bytes)
353
+{
354
+ void *addr = s->bounce_pool.addr;
355
+ BlkioBounceBuf *cur = NULL;
356
+ BlkioBounceBuf *prev = NULL;
357
+ ptrdiff_t space;
358
+
359
+ /*
360
+ * This is just a linear search over the holes between requests. An
361
+ * efficient allocator would be nice.
362
+ */
363
+ QLIST_FOREACH(cur, &s->bounce_bufs, next) {
364
+ space = cur->buf.iov_base - addr;
365
+ if (bytes <= space) {
366
+ QLIST_INSERT_BEFORE(cur, bounce, next);
367
+ bounce->buf.iov_base = addr;
368
+ bounce->buf.iov_len = bytes;
369
+ return true;
370
+ }
371
+
372
+ addr = cur->buf.iov_base + cur->buf.iov_len;
373
+ prev = cur;
374
+ }
375
+
376
+ /* Is there space after the last request? */
377
+ space = s->bounce_pool.addr + s->bounce_pool.len - addr;
378
+ if (bytes > space) {
379
+ return false;
380
+ }
381
+ if (prev) {
382
+ QLIST_INSERT_AFTER(prev, bounce, next);
383
+ } else {
384
+ QLIST_INSERT_HEAD(&s->bounce_bufs, bounce, next);
385
+ }
386
+ bounce->buf.iov_base = addr;
387
+ bounce->buf.iov_len = bytes;
388
+ return true;
389
+}
390
+
391
+static int coroutine_fn
392
+blkio_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
393
+ int64_t bytes)
394
+{
395
+ /*
396
+ * Ensure fairness: first time around we join the back of the queue,
397
+ * subsequently we join the front so we don't lose our place.
398
+ */
399
+ CoQueueWaitFlags wait_flags = 0;
400
+
401
+ QEMU_LOCK_GUARD(&s->bounce_lock);
402
+
403
+ /* Ensure fairness: don't even try if other requests are already waiting */
404
+ if (!qemu_co_queue_empty(&s->bounce_available)) {
405
+ qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
406
+ wait_flags);
407
+ wait_flags = CO_QUEUE_WAIT_FRONT;
408
+ }
409
+
410
+ while (true) {
411
+ if (blkio_do_alloc_bounce_buffer(s, bounce, bytes)) {
412
+ /* Kick the next queued request since there may be space */
413
+ qemu_co_queue_next(&s->bounce_available);
414
+ return 0;
415
+ }
416
+
417
+ /*
418
+ * If there are no in-flight requests then the pool was simply too
419
+ * small.
420
+ */
421
+ if (QLIST_EMPTY(&s->bounce_bufs)) {
422
+ bool ok;
423
+ int ret;
424
+
425
+ ret = blkio_resize_bounce_pool(s, bytes);
426
+ if (ret < 0) {
427
+ /* Kick the next queued request since that may fail too */
428
+ qemu_co_queue_next(&s->bounce_available);
429
+ return ret;
430
+ }
431
+
432
+ ok = blkio_do_alloc_bounce_buffer(s, bounce, bytes);
433
+ assert(ok); /* must have space this time */
434
+ return 0;
435
+ }
436
+
437
+ qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
438
+ wait_flags);
439
+ wait_flags = CO_QUEUE_WAIT_FRONT;
440
+ }
441
+}
442
+
443
+static void coroutine_fn blkio_free_bounce_buffer(BDRVBlkioState *s,
444
+ BlkioBounceBuf *bounce)
445
+{
446
+ QEMU_LOCK_GUARD(&s->bounce_lock);
447
+
448
+ QLIST_REMOVE(bounce, next);
449
+
450
+ /* Wake up waiting coroutines since space may now be available */
451
+ qemu_co_queue_next(&s->bounce_available);
452
+}
453
+
454
+/* For async to .bdrv_co_*() conversion */
455
+typedef struct {
456
+ Coroutine *coroutine;
457
+ int ret;
458
+} BlkioCoData;
459
+
460
+static void blkio_completion_fd_read(void *opaque)
461
+{
462
+ BlockDriverState *bs = opaque;
463
+ BDRVBlkioState *s = bs->opaque;
464
+ uint64_t val;
465
+ int ret;
466
+
467
+ /* Polling may have already fetched a completion */
468
+ if (s->poll_completion.user_data != NULL) {
469
+ BlkioCoData *cod = s->poll_completion.user_data;
470
+ cod->ret = s->poll_completion.ret;
471
+
472
+ /* Clear it in case aio_co_wake() enters a nested event loop */
473
+ s->poll_completion.user_data = NULL;
474
+
475
+ aio_co_wake(cod->coroutine);
476
+ }
477
+
478
+ /* Reset completion fd status */
479
+ ret = read(s->completion_fd, &val, sizeof(val));
480
+
481
+ /* Ignore errors, there's nothing we can do */
482
+ (void)ret;
483
+
484
+ /*
485
+ * Reading one completion at a time makes nested event loop re-entrancy
486
+ * simple. Change this loop to get multiple completions in one go if it
487
+ * becomes a performance bottleneck.
488
+ */
489
+ while (true) {
490
+ struct blkio_completion completion;
491
+
492
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
493
+ ret = blkioq_do_io(s->blkioq, &completion, 0, 1, NULL);
494
+ }
495
+ if (ret != 1) {
496
+ break;
497
+ }
498
+
499
+ BlkioCoData *cod = completion.user_data;
500
+ cod->ret = completion.ret;
501
+ aio_co_wake(cod->coroutine);
502
+ }
503
+}
504
+
505
+static bool blkio_completion_fd_poll(void *opaque)
506
+{
507
+ BlockDriverState *bs = opaque;
508
+ BDRVBlkioState *s = bs->opaque;
509
+ int ret;
510
+
511
+ /* Just in case we already fetched a completion */
512
+ if (s->poll_completion.user_data != NULL) {
513
+ return true;
514
+ }
515
+
516
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
517
+ ret = blkioq_do_io(s->blkioq, &s->poll_completion, 0, 1, NULL);
518
+ }
519
+ return ret == 1;
520
+}
521
+
522
+static void blkio_completion_fd_poll_ready(void *opaque)
523
+{
524
+ blkio_completion_fd_read(opaque);
525
+}
526
+
527
+static void blkio_attach_aio_context(BlockDriverState *bs,
528
+ AioContext *new_context)
529
+{
530
+ BDRVBlkioState *s = bs->opaque;
531
+
532
+ aio_set_fd_handler(new_context,
533
+ s->completion_fd,
534
+ false,
535
+ blkio_completion_fd_read,
536
+ NULL,
537
+ blkio_completion_fd_poll,
538
+ blkio_completion_fd_poll_ready,
539
+ bs);
540
+}
541
+
542
+static void blkio_detach_aio_context(BlockDriverState *bs)
543
+{
544
+ BDRVBlkioState *s = bs->opaque;
545
+
546
+ aio_set_fd_handler(bdrv_get_aio_context(bs),
547
+ s->completion_fd,
548
+ false, NULL, NULL, NULL, NULL, NULL);
549
+}
550
+
551
+/* Call with s->blkio_lock held to submit I/O after enqueuing a new request */
552
+static void blkio_submit_io(BlockDriverState *bs)
553
+{
554
+ if (qatomic_read(&bs->io_plugged) == 0) {
555
+ BDRVBlkioState *s = bs->opaque;
556
+
557
+ blkioq_do_io(s->blkioq, NULL, 0, 0, NULL);
558
+ }
559
+}
560
+
561
+static int coroutine_fn
562
+blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
563
+{
564
+ BDRVBlkioState *s = bs->opaque;
565
+ BlkioCoData cod = {
566
+ .coroutine = qemu_coroutine_self(),
567
+ };
568
+
569
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
570
+ blkioq_discard(s->blkioq, offset, bytes, &cod, 0);
571
+ blkio_submit_io(bs);
572
+ }
573
+
574
+ qemu_coroutine_yield();
575
+ return cod.ret;
576
+}
577
+
578
+static int coroutine_fn
579
+blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
580
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
581
+{
582
+ BlkioCoData cod = {
583
+ .coroutine = qemu_coroutine_self(),
584
+ };
585
+ BDRVBlkioState *s = bs->opaque;
586
+ bool use_bounce_buffer = s->needs_mem_regions;
587
+ BlkioBounceBuf bounce;
588
+ struct iovec *iov = qiov->iov;
589
+ int iovcnt = qiov->niov;
590
+
591
+ if (use_bounce_buffer) {
592
+ int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
593
+ if (ret < 0) {
594
+ return ret;
595
+ }
596
+
597
+ iov = &bounce.buf;
598
+ iovcnt = 1;
599
+ }
600
+
601
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
602
+ blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0);
603
+ blkio_submit_io(bs);
604
+ }
605
+
606
+ qemu_coroutine_yield();
607
+
608
+ if (use_bounce_buffer) {
609
+ if (cod.ret == 0) {
610
+ qemu_iovec_from_buf(qiov, 0,
611
+ bounce.buf.iov_base,
612
+ bounce.buf.iov_len);
613
+ }
614
+
615
+ blkio_free_bounce_buffer(s, &bounce);
616
+ }
617
+
618
+ return cod.ret;
619
+}
620
+
621
+static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset,
622
+ int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
623
+{
624
+ uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
625
+ BlkioCoData cod = {
626
+ .coroutine = qemu_coroutine_self(),
627
+ };
628
+ BDRVBlkioState *s = bs->opaque;
629
+ bool use_bounce_buffer = s->needs_mem_regions;
630
+ BlkioBounceBuf bounce;
631
+ struct iovec *iov = qiov->iov;
632
+ int iovcnt = qiov->niov;
633
+
634
+ if (use_bounce_buffer) {
635
+ int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
636
+ if (ret < 0) {
637
+ return ret;
638
+ }
639
+
640
+ qemu_iovec_to_buf(qiov, 0, bounce.buf.iov_base, bytes);
641
+ iov = &bounce.buf;
642
+ iovcnt = 1;
643
+ }
644
+
645
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
646
+ blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags);
647
+ blkio_submit_io(bs);
648
+ }
649
+
650
+ qemu_coroutine_yield();
651
+
652
+ if (use_bounce_buffer) {
653
+ blkio_free_bounce_buffer(s, &bounce);
654
+ }
655
+
656
+ return cod.ret;
657
+}
658
+
659
+static int coroutine_fn blkio_co_flush(BlockDriverState *bs)
660
+{
661
+ BDRVBlkioState *s = bs->opaque;
662
+ BlkioCoData cod = {
663
+ .coroutine = qemu_coroutine_self(),
664
+ };
665
+
666
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
667
+ blkioq_flush(s->blkioq, &cod, 0);
668
+ blkio_submit_io(bs);
669
+ }
670
+
671
+ qemu_coroutine_yield();
672
+ return cod.ret;
673
+}
674
+
675
+static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs,
676
+ int64_t offset, int64_t bytes, BdrvRequestFlags flags)
677
+{
678
+ BDRVBlkioState *s = bs->opaque;
679
+ BlkioCoData cod = {
680
+ .coroutine = qemu_coroutine_self(),
681
+ };
682
+ uint32_t blkio_flags = 0;
683
+
684
+ if (flags & BDRV_REQ_FUA) {
685
+ blkio_flags |= BLKIO_REQ_FUA;
686
+ }
687
+ if (!(flags & BDRV_REQ_MAY_UNMAP)) {
688
+ blkio_flags |= BLKIO_REQ_NO_UNMAP;
689
+ }
690
+ if (flags & BDRV_REQ_NO_FALLBACK) {
691
+ blkio_flags |= BLKIO_REQ_NO_FALLBACK;
692
+ }
693
+
694
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
695
+ blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags);
696
+ blkio_submit_io(bs);
697
+ }
698
+
699
+ qemu_coroutine_yield();
700
+ return cod.ret;
701
+}
702
+
703
+static void blkio_io_unplug(BlockDriverState *bs)
704
+{
705
+ BDRVBlkioState *s = bs->opaque;
706
+
707
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
708
+ blkio_submit_io(bs);
709
+ }
710
+}
711
+
712
+static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
713
+ Error **errp)
714
+{
715
+ const char *filename = qdict_get_str(options, "filename");
716
+ BDRVBlkioState *s = bs->opaque;
717
+ int ret;
718
+
719
+ ret = blkio_set_str(s->blkio, "path", filename);
720
+ qdict_del(options, "filename");
721
+ if (ret < 0) {
722
+ error_setg_errno(errp, -ret, "failed to set path: %s",
723
+ blkio_get_error_msg());
724
+ return ret;
725
+ }
726
+
727
+ if (flags & BDRV_O_NOCACHE) {
728
+ ret = blkio_set_bool(s->blkio, "direct", true);
729
+ if (ret < 0) {
730
+ error_setg_errno(errp, -ret, "failed to set direct: %s",
731
+ blkio_get_error_msg());
732
+ return ret;
733
+ }
734
+ }
735
+
736
+ return 0;
737
+}
738
+
739
+static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
740
+ Error **errp)
741
+{
742
+ const char *filename = qdict_get_str(options, "filename");
743
+ BDRVBlkioState *s = bs->opaque;
744
+ int ret;
745
+
746
+ ret = blkio_set_str(s->blkio, "path", filename);
747
+ qdict_del(options, "filename");
748
+ if (ret < 0) {
749
+ error_setg_errno(errp, -ret, "failed to set path: %s",
750
+ blkio_get_error_msg());
751
+ return ret;
752
+ }
753
+
754
+ if (!(flags & BDRV_O_NOCACHE)) {
755
+ error_setg(errp, "cache.direct=off is not supported");
756
+ return -EINVAL;
757
+ }
758
+
759
+ return 0;
760
+}
761
+
762
+static int blkio_virtio_blk_common_open(BlockDriverState *bs,
763
+ QDict *options, int flags, Error **errp)
764
+{
765
+ const char *path = qdict_get_try_str(options, "path");
766
+ BDRVBlkioState *s = bs->opaque;
767
+ int ret;
768
+
769
+ if (!path) {
770
+ error_setg(errp, "missing 'path' option");
771
+ return -EINVAL;
772
+ }
773
+
774
+ ret = blkio_set_str(s->blkio, "path", path);
775
+ qdict_del(options, "path");
776
+ if (ret < 0) {
777
+ error_setg_errno(errp, -ret, "failed to set path: %s",
778
+ blkio_get_error_msg());
779
+ return ret;
780
+ }
781
+
782
+ if (!(flags & BDRV_O_NOCACHE)) {
783
+ error_setg(errp, "cache.direct=off is not supported");
784
+ return -EINVAL;
785
+ }
786
+ return 0;
787
+}
788
+
789
+static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
790
+ Error **errp)
791
+{
792
+ const char *blkio_driver = bs->drv->protocol_name;
793
+ BDRVBlkioState *s = bs->opaque;
794
+ int ret;
795
+
796
+ ret = blkio_create(blkio_driver, &s->blkio);
797
+ if (ret < 0) {
798
+ error_setg_errno(errp, -ret, "blkio_create failed: %s",
799
+ blkio_get_error_msg());
800
+ return ret;
801
+ }
802
+
803
+ if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) {
804
+ ret = blkio_io_uring_open(bs, options, flags, errp);
805
+ } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
806
+ ret = blkio_nvme_io_uring(bs, options, flags, errp);
807
+ } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
808
+ ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
809
+ } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
810
+ ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
811
+ } else {
812
+ g_assert_not_reached();
813
+ }
814
+ if (ret < 0) {
815
+ blkio_destroy(&s->blkio);
816
+ return ret;
817
+ }
818
+
819
+ if (!(flags & BDRV_O_RDWR)) {
820
+ ret = blkio_set_bool(s->blkio, "read-only", true);
821
+ if (ret < 0) {
822
+ error_setg_errno(errp, -ret, "failed to set read-only: %s",
823
+ blkio_get_error_msg());
824
+ blkio_destroy(&s->blkio);
825
+ return ret;
826
+ }
827
+ }
828
+
829
+ ret = blkio_connect(s->blkio);
830
+ if (ret < 0) {
831
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
832
+ blkio_get_error_msg());
833
+ blkio_destroy(&s->blkio);
834
+ return ret;
835
+ }
836
+
837
+ ret = blkio_get_bool(s->blkio,
838
+ "needs-mem-regions",
839
+ &s->needs_mem_regions);
840
+ if (ret < 0) {
841
+ error_setg_errno(errp, -ret,
842
+ "failed to get needs-mem-regions: %s",
843
+ blkio_get_error_msg());
844
+ blkio_destroy(&s->blkio);
845
+ return ret;
846
+ }
847
+
848
+ ret = blkio_get_uint64(s->blkio,
849
+ "mem-region-alignment",
850
+ &s->mem_region_alignment);
851
+ if (ret < 0) {
852
+ error_setg_errno(errp, -ret,
853
+ "failed to get mem-region-alignment: %s",
854
+ blkio_get_error_msg());
855
+ blkio_destroy(&s->blkio);
856
+ return ret;
857
+ }
858
+
859
+ ret = blkio_start(s->blkio);
860
+ if (ret < 0) {
861
+ error_setg_errno(errp, -ret, "blkio_start failed: %s",
862
+ blkio_get_error_msg());
863
+ blkio_destroy(&s->blkio);
864
+ return ret;
865
+ }
866
+
867
+ bs->supported_write_flags = BDRV_REQ_FUA;
868
+ bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP |
869
+ BDRV_REQ_NO_FALLBACK;
870
+
871
+ qemu_mutex_init(&s->blkio_lock);
872
+ qemu_co_mutex_init(&s->bounce_lock);
873
+ qemu_co_queue_init(&s->bounce_available);
874
+ QLIST_INIT(&s->bounce_bufs);
875
+ s->blkioq = blkio_get_queue(s->blkio, 0);
876
+ s->completion_fd = blkioq_get_completion_fd(s->blkioq);
877
+
878
+ blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
879
+ return 0;
880
+}
881
+
882
+static void blkio_close(BlockDriverState *bs)
883
+{
884
+ BDRVBlkioState *s = bs->opaque;
885
+
886
+ /* There is no destroy() API for s->bounce_lock */
887
+
888
+ qemu_mutex_destroy(&s->blkio_lock);
889
+ blkio_detach_aio_context(bs);
890
+ blkio_destroy(&s->blkio);
891
+}
892
+
893
+static int64_t blkio_getlength(BlockDriverState *bs)
894
+{
895
+ BDRVBlkioState *s = bs->opaque;
896
+ uint64_t capacity;
897
+ int ret;
898
+
899
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
900
+ ret = blkio_get_uint64(s->blkio, "capacity", &capacity);
901
+ }
902
+ if (ret < 0) {
903
+ return -ret;
904
+ }
905
+
906
+ return capacity;
907
+}
908
+
909
+static int blkio_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
910
+{
911
+ return 0;
912
+}
913
+
914
+static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
915
+{
916
+ BDRVBlkioState *s = bs->opaque;
917
+ QEMU_LOCK_GUARD(&s->blkio_lock);
918
+ int value;
919
+ int ret;
920
+
921
+ ret = blkio_get_int(s->blkio, "request-alignment", &value);
922
+ if (ret < 0) {
923
+ error_setg_errno(errp, -ret, "failed to get \"request-alignment\": %s",
924
+ blkio_get_error_msg());
925
+ return;
926
+ }
927
+ bs->bl.request_alignment = value;
928
+ if (bs->bl.request_alignment < 1 ||
929
+ bs->bl.request_alignment >= INT_MAX ||
930
+ !is_power_of_2(bs->bl.request_alignment)) {
931
+ error_setg(errp, "invalid \"request-alignment\" value %" PRIu32 ", "
932
+ "must be a power of 2 less than INT_MAX",
933
+ bs->bl.request_alignment);
934
+ return;
935
+ }
936
+
937
+ ret = blkio_get_int(s->blkio, "optimal-io-size", &value);
938
+ if (ret < 0) {
939
+ error_setg_errno(errp, -ret, "failed to get \"optimal-io-size\": %s",
940
+ blkio_get_error_msg());
941
+ return;
942
+ }
943
+ bs->bl.opt_transfer = value;
944
+ if (bs->bl.opt_transfer > INT_MAX ||
945
+ (bs->bl.opt_transfer % bs->bl.request_alignment)) {
946
+ error_setg(errp, "invalid \"optimal-io-size\" value %" PRIu32 ", must "
947
+ "be a multiple of %" PRIu32, bs->bl.opt_transfer,
948
+ bs->bl.request_alignment);
949
+ return;
950
+ }
951
+
952
+ ret = blkio_get_int(s->blkio, "max-transfer", &value);
953
+ if (ret < 0) {
954
+ error_setg_errno(errp, -ret, "failed to get \"max-transfer\": %s",
955
+ blkio_get_error_msg());
956
+ return;
957
+ }
958
+ bs->bl.max_transfer = value;
959
+ if ((bs->bl.max_transfer % bs->bl.request_alignment) ||
960
+ (bs->bl.opt_transfer && (bs->bl.max_transfer % bs->bl.opt_transfer))) {
961
+ error_setg(errp, "invalid \"max-transfer\" value %" PRIu32 ", must be "
962
+ "a multiple of %" PRIu32 " and %" PRIu32 " (if non-zero)",
963
+ bs->bl.max_transfer, bs->bl.request_alignment,
964
+ bs->bl.opt_transfer);
965
+ return;
966
+ }
967
+
968
+ ret = blkio_get_int(s->blkio, "buf-alignment", &value);
969
+ if (ret < 0) {
970
+ error_setg_errno(errp, -ret, "failed to get \"buf-alignment\": %s",
971
+ blkio_get_error_msg());
972
+ return;
973
+ }
974
+ if (value < 1) {
975
+ error_setg(errp, "invalid \"buf-alignment\" value %d, must be "
976
+ "positive", value);
977
+ return;
978
+ }
979
+ bs->bl.min_mem_alignment = value;
980
+
981
+ ret = blkio_get_int(s->blkio, "optimal-buf-alignment", &value);
982
+ if (ret < 0) {
983
+ error_setg_errno(errp, -ret,
984
+ "failed to get \"optimal-buf-alignment\": %s",
985
+ blkio_get_error_msg());
986
+ return;
987
+ }
988
+ if (value < 1) {
989
+ error_setg(errp, "invalid \"optimal-buf-alignment\" value %d, "
990
+ "must be positive", value);
991
+ return;
992
+ }
993
+ bs->bl.opt_mem_alignment = value;
994
+
995
+ ret = blkio_get_int(s->blkio, "max-segments", &value);
996
+ if (ret < 0) {
997
+ error_setg_errno(errp, -ret, "failed to get \"max-segments\": %s",
998
+ blkio_get_error_msg());
999
+ return;
1000
+ }
1001
+ if (value < 1) {
1002
+ error_setg(errp, "invalid \"max-segments\" value %d, must be positive",
1003
+ value);
1004
+ return;
1005
+ }
1006
+ bs->bl.max_iov = value;
1007
+}
1008
+
1009
+/*
1010
+ * TODO
1011
+ * Missing libblkio APIs:
1012
+ * - block_status
1013
+ * - co_invalidate_cache
1014
+ *
1015
+ * Out of scope?
1016
+ * - create
1017
+ * - truncate
1018
+ */
1019
+
1020
+#define BLKIO_DRIVER(name, ...) \
1021
+ { \
1022
+ .format_name = name, \
1023
+ .protocol_name = name, \
1024
+ .instance_size = sizeof(BDRVBlkioState), \
1025
+ .bdrv_file_open = blkio_file_open, \
1026
+ .bdrv_close = blkio_close, \
1027
+ .bdrv_getlength = blkio_getlength, \
1028
+ .bdrv_get_info = blkio_get_info, \
1029
+ .bdrv_attach_aio_context = blkio_attach_aio_context, \
1030
+ .bdrv_detach_aio_context = blkio_detach_aio_context, \
1031
+ .bdrv_co_pdiscard = blkio_co_pdiscard, \
1032
+ .bdrv_co_preadv = blkio_co_preadv, \
1033
+ .bdrv_co_pwritev = blkio_co_pwritev, \
1034
+ .bdrv_co_flush_to_disk = blkio_co_flush, \
1035
+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
1036
+ .bdrv_io_unplug = blkio_io_unplug, \
1037
+ .bdrv_refresh_limits = blkio_refresh_limits, \
1038
+ __VA_ARGS__ \
1039
+ }
1040
+
1041
+static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
1042
+ DRIVER_IO_URING,
1043
+ .bdrv_needs_filename = true,
1044
+);
1045
+
1046
+static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
1047
+ DRIVER_NVME_IO_URING,
1048
+ .bdrv_needs_filename = true,
1049
+);
1050
+
1051
+static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
1052
+ DRIVER_VIRTIO_BLK_VHOST_USER
1053
+);
1054
+
1055
+static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER(
1056
+ DRIVER_VIRTIO_BLK_VHOST_VDPA
1057
+);
1058
+
1059
+static void bdrv_blkio_init(void)
1060
+{
1061
+ bdrv_register(&bdrv_io_uring);
1062
+ bdrv_register(&bdrv_nvme_io_uring);
1063
+ bdrv_register(&bdrv_virtio_blk_vhost_user);
1064
+ bdrv_register(&bdrv_virtio_blk_vhost_vdpa);
1065
+}
1066
+
1067
+block_init(bdrv_blkio_init);
1068
diff --git a/tests/qtest/modules-test.c b/tests/qtest/modules-test.c
1069
index XXXXXXX..XXXXXXX 100644
1070
--- a/tests/qtest/modules-test.c
1071
+++ b/tests/qtest/modules-test.c
1072
@@ -XXX,XX +XXX,XX @@ static void test_modules_load(const void *data)
1073
int main(int argc, char *argv[])
1074
{
1075
const char *modules[] = {
1076
+#ifdef CONFIG_BLKIO
1077
+ "block-", "blkio",
1078
+#endif
1079
#ifdef CONFIG_CURL
1080
"block-", "curl",
1081
#endif
1082
diff --git a/block/meson.build b/block/meson.build
1083
index XXXXXXX..XXXXXXX 100644
1084
--- a/block/meson.build
1085
+++ b/block/meson.build
1086
@@ -XXX,XX +XXX,XX @@ block_modules = {}
1087
1088
modsrc = []
1089
foreach m : [
1090
+ [blkio, 'blkio', files('blkio.c')],
1091
[curl, 'curl', files('curl.c')],
1092
[glusterfs, 'gluster', files('gluster.c')],
1093
[libiscsi, 'iscsi', [files('iscsi.c'), libm]],
1094
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
1095
index XXXXXXX..XXXXXXX 100644
1096
--- a/scripts/meson-buildoptions.sh
1097
+++ b/scripts/meson-buildoptions.sh
1098
@@ -XXX,XX +XXX,XX @@ meson_options_help() {
1099
printf "%s\n" ' auth-pam PAM access control'
1100
printf "%s\n" ' avx2 AVX2 optimizations'
1101
printf "%s\n" ' avx512f AVX512F optimizations'
1102
+ printf "%s\n" ' blkio libblkio block device driver'
1103
printf "%s\n" ' bochs bochs image format support'
1104
printf "%s\n" ' bpf eBPF support'
1105
printf "%s\n" ' brlapi brlapi character device driver'
1106
@@ -XXX,XX +XXX,XX @@ _meson_option_parse() {
1107
--disable-gcov) printf "%s" -Db_coverage=false ;;
1108
--enable-lto) printf "%s" -Db_lto=true ;;
1109
--disable-lto) printf "%s" -Db_lto=false ;;
1110
+ --enable-blkio) printf "%s" -Dblkio=enabled ;;
1111
+ --disable-blkio) printf "%s" -Dblkio=disabled ;;
1112
--block-drv-ro-whitelist=*) quote_sh "-Dblock_drv_ro_whitelist=$2" ;;
1113
--block-drv-rw-whitelist=*) quote_sh "-Dblock_drv_rw_whitelist=$2" ;;
1114
--enable-block-drv-whitelist-in-tools) printf "%s" -Dblock_drv_whitelist_in_tools=true ;;
1115
--
1116
2.37.3
diff view generated by jsdifflib
New patch
1
When a RAMBlockNotifier is added, ->ram_block_added() is called with all
2
existing RAMBlocks. There is no equivalent ->ram_block_removed() call
3
when a RAMBlockNotifier is removed.
1
4
5
The util/vfio-helpers.c code (the sole user of RAMBlockNotifier) is fine
6
with this asymmetry because it does not rely on RAMBlockNotifier for
7
cleanup. It walks its internal list of DMA mappings and unmaps them by
8
itself.
9
10
Future users of RAMBlockNotifier may not have an internal data structure
11
that records added RAMBlocks so they will need ->ram_block_removed()
12
callbacks.
13
14
This patch makes ram_block_notifier_remove() symmetric with respect to
15
callbacks. Now util/vfio-helpers.c needs to unmap remaining DMA mappings
16
after ram_block_notifier_remove() has been called. This is necessary
17
since users like block/nvme.c may create additional DMA mappings that do
18
not originate from the RAMBlockNotifier.
19
20
Reviewed-by: David Hildenbrand <david@redhat.com>
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
Message-id: 20221013185908.1297568-4-stefanha@redhat.com
23
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
24
---
25
hw/core/numa.c | 17 +++++++++++++++++
26
util/vfio-helpers.c | 5 ++++-
27
2 files changed, 21 insertions(+), 1 deletion(-)
28
29
diff --git a/hw/core/numa.c b/hw/core/numa.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/hw/core/numa.c
32
+++ b/hw/core/numa.c
33
@@ -XXX,XX +XXX,XX @@ static int ram_block_notify_add_single(RAMBlock *rb, void *opaque)
34
return 0;
35
}
36
37
+static int ram_block_notify_remove_single(RAMBlock *rb, void *opaque)
38
+{
39
+ const ram_addr_t max_size = qemu_ram_get_max_length(rb);
40
+ const ram_addr_t size = qemu_ram_get_used_length(rb);
41
+ void *host = qemu_ram_get_host_addr(rb);
42
+ RAMBlockNotifier *notifier = opaque;
43
+
44
+ if (host) {
45
+ notifier->ram_block_removed(notifier, host, size, max_size);
46
+ }
47
+ return 0;
48
+}
49
+
50
void ram_block_notifier_add(RAMBlockNotifier *n)
51
{
52
QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next);
53
@@ -XXX,XX +XXX,XX @@ void ram_block_notifier_add(RAMBlockNotifier *n)
54
void ram_block_notifier_remove(RAMBlockNotifier *n)
55
{
56
QLIST_REMOVE(n, next);
57
+
58
+ if (n->ram_block_removed) {
59
+ qemu_ram_foreach_block(ram_block_notify_remove_single, n);
60
+ }
61
}
62
63
void ram_block_notify_add(void *host, size_t size, size_t max_size)
64
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/util/vfio-helpers.c
67
+++ b/util/vfio-helpers.c
68
@@ -XXX,XX +XXX,XX @@ void qemu_vfio_close(QEMUVFIOState *s)
69
if (!s) {
70
return;
71
}
72
+
73
+ ram_block_notifier_remove(&s->ram_notifier);
74
+
75
for (i = 0; i < s->nr_mappings; ++i) {
76
qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
77
}
78
- ram_block_notifier_remove(&s->ram_notifier);
79
+
80
g_free(s->usable_iova_ranges);
81
s->nb_iova_ranges = 0;
82
qemu_vfio_reset(s);
83
--
84
2.37.3
diff view generated by jsdifflib
New patch
1
The only implementor of bdrv_register_buf() is block/nvme.c, where the
2
size is not needed when unregistering a buffer. This is because
3
util/vfio-helpers.c can look up mappings by address.
1
4
5
Future block drivers that implement bdrv_register_buf() may not be able
6
to do their job given only the buffer address. Add a size argument to
7
bdrv_unregister_buf().
8
9
Also document the assumptions about
10
bdrv_register_buf()/bdrv_unregister_buf() calls. The same <host, size>
11
values that were given to bdrv_register_buf() must be given to
12
bdrv_unregister_buf().
13
14
gcc 11.2.1 emits a spurious warning that img_bench()'s buf_size local
15
variable might be uninitialized, so it's necessary to silence the
16
compiler.
17
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
20
Message-id: 20221013185908.1297568-5-stefanha@redhat.com
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
---
23
include/block/block-global-state.h | 5 ++++-
24
include/block/block_int-common.h | 2 +-
25
include/sysemu/block-backend-global-state.h | 2 +-
26
block/block-backend.c | 4 ++--
27
block/io.c | 6 +++---
28
block/nvme.c | 2 +-
29
qemu-img.c | 4 ++--
30
7 files changed, 14 insertions(+), 11 deletions(-)
31
32
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/include/block/block-global-state.h
35
+++ b/include/block/block-global-state.h
36
@@ -XXX,XX +XXX,XX @@ void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
37
* Register/unregister a buffer for I/O. For example, VFIO drivers are
38
* interested to know the memory areas that would later be used for I/O, so
39
* that they can prepare IOMMU mapping etc., to get better performance.
40
+ *
41
+ * Buffers must not overlap and they must be unregistered with the same <host,
42
+ * size> values that they were registered with.
43
*/
44
void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
45
-void bdrv_unregister_buf(BlockDriverState *bs, void *host);
46
+void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size);
47
48
void bdrv_cancel_in_flight(BlockDriverState *bs);
49
50
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
51
index XXXXXXX..XXXXXXX 100644
52
--- a/include/block/block_int-common.h
53
+++ b/include/block/block_int-common.h
54
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
55
* DMA mapping for hot buffers.
56
*/
57
void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
58
- void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
59
+ void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host, size_t size);
60
61
/*
62
* This field is modified only under the BQL, and is part of
63
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
64
index XXXXXXX..XXXXXXX 100644
65
--- a/include/sysemu/block-backend-global-state.h
66
+++ b/include/sysemu/block-backend-global-state.h
67
@@ -XXX,XX +XXX,XX @@ void blk_io_limits_update_group(BlockBackend *blk, const char *group);
68
void blk_set_force_allow_inactivate(BlockBackend *blk);
69
70
void blk_register_buf(BlockBackend *blk, void *host, size_t size);
71
-void blk_unregister_buf(BlockBackend *blk, void *host);
72
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size);
73
74
const BdrvChild *blk_root(BlockBackend *blk);
75
76
diff --git a/block/block-backend.c b/block/block-backend.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/block/block-backend.c
79
+++ b/block/block-backend.c
80
@@ -XXX,XX +XXX,XX @@ void blk_register_buf(BlockBackend *blk, void *host, size_t size)
81
bdrv_register_buf(blk_bs(blk), host, size);
82
}
83
84
-void blk_unregister_buf(BlockBackend *blk, void *host)
85
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size)
86
{
87
GLOBAL_STATE_CODE();
88
- bdrv_unregister_buf(blk_bs(blk), host);
89
+ bdrv_unregister_buf(blk_bs(blk), host, size);
90
}
91
92
int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
93
diff --git a/block/io.c b/block/io.c
94
index XXXXXXX..XXXXXXX 100644
95
--- a/block/io.c
96
+++ b/block/io.c
97
@@ -XXX,XX +XXX,XX @@ void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
98
}
99
}
100
101
-void bdrv_unregister_buf(BlockDriverState *bs, void *host)
102
+void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
103
{
104
BdrvChild *child;
105
106
GLOBAL_STATE_CODE();
107
if (bs->drv && bs->drv->bdrv_unregister_buf) {
108
- bs->drv->bdrv_unregister_buf(bs, host);
109
+ bs->drv->bdrv_unregister_buf(bs, host, size);
110
}
111
QLIST_FOREACH(child, &bs->children, next) {
112
- bdrv_unregister_buf(child->bs, host);
113
+ bdrv_unregister_buf(child->bs, host, size);
114
}
115
}
116
117
diff --git a/block/nvme.c b/block/nvme.c
118
index XXXXXXX..XXXXXXX 100644
119
--- a/block/nvme.c
120
+++ b/block/nvme.c
121
@@ -XXX,XX +XXX,XX @@ static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
122
}
123
}
124
125
-static void nvme_unregister_buf(BlockDriverState *bs, void *host)
126
+static void nvme_unregister_buf(BlockDriverState *bs, void *host, size_t size)
127
{
128
BDRVNVMeState *s = bs->opaque;
129
130
diff --git a/qemu-img.c b/qemu-img.c
131
index XXXXXXX..XXXXXXX 100644
132
--- a/qemu-img.c
133
+++ b/qemu-img.c
134
@@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv)
135
struct timeval t1, t2;
136
int i;
137
bool force_share = false;
138
- size_t buf_size;
139
+ size_t buf_size = 0;
140
141
for (;;) {
142
static const struct option long_options[] = {
143
@@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv)
144
145
out:
146
if (data.buf) {
147
- blk_unregister_buf(blk, data.buf);
148
+ blk_unregister_buf(blk, data.buf, buf_size);
149
}
150
qemu_vfree(data.buf);
151
blk_unref(blk);
152
--
153
2.37.3
diff view generated by jsdifflib
New patch
1
Use the enum type so GDB displays the enum members instead of printing a
2
numeric constant.
1
3
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
6
Message-id: 20221013185908.1297568-6-stefanha@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
include/block/block_int-common.h | 8 ++++----
10
1 file changed, 4 insertions(+), 4 deletions(-)
11
12
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/block/block_int-common.h
15
+++ b/include/block/block_int-common.h
16
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
17
/*
18
* Flags honored during pread
19
*/
20
- unsigned int supported_read_flags;
21
+ BdrvRequestFlags supported_read_flags;
22
/*
23
* Flags honored during pwrite (so far: BDRV_REQ_FUA,
24
* BDRV_REQ_WRITE_UNCHANGED).
25
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
26
* flag), or they have to explicitly take the WRITE permission for
27
* their children.
28
*/
29
- unsigned int supported_write_flags;
30
+ BdrvRequestFlags supported_write_flags;
31
/*
32
* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
33
* BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED)
34
*/
35
- unsigned int supported_zero_flags;
36
+ BdrvRequestFlags supported_zero_flags;
37
/*
38
* Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
39
*
40
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
41
* that any added space reads as all zeros. If this can't be guaranteed,
42
* the operation must fail.
43
*/
44
- unsigned int supported_truncate_flags;
45
+ BdrvRequestFlags supported_truncate_flags;
46
47
/* the following member gives a name to every node on the bs graph. */
48
char node_name[32];
49
--
50
2.37.3
diff view generated by jsdifflib
New patch
1
1
Block drivers may optimize I/O requests accessing buffers previously
2
registered with bdrv_register_buf(). Checking whether all elements of a
3
request's QEMUIOVector are within previously registered buffers is
4
expensive, so we need a hint from the user to avoid costly checks.
5
6
Add a BDRV_REQ_REGISTERED_BUF request flag to indicate that all
7
QEMUIOVector elements in an I/O request are known to be within
8
previously registered buffers.
9
10
Always pass the flag through to driver read/write functions. There is
11
little harm in passing the flag to a driver that does not use it.
12
Passing the flag to drivers avoids changes across many block drivers.
13
Filter drivers would need to explicitly support the flag and pass
14
through to their children when the children support it. That's a lot of
15
code changes and it's hard to remember to do that everywhere, leading to
16
silent reduced performance when the flag is accidentally dropped.
17
18
The only problematic scenario with the approach in this patch is when a
19
driver passes the flag through to internal I/O requests that don't use
20
the same I/O buffer. In that case the hint may be set when it should
21
actually be clear. This is a rare case though so the risk is low.
22
23
Some drivers have assert(!flags), which no longer works when
24
BDRV_REQ_REGISTERED_BUF is passed in. These assertions aren't very
25
useful anyway since the functions are called almost exclusively by
26
bdrv_driver_preadv/pwritev() so if we get flags handling right there
27
then the assertion is not needed.
28
29
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
30
Message-id: 20221013185908.1297568-7-stefanha@redhat.com
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
32
---
33
include/block/block-common.h | 9 ++++++
34
block.c | 14 +++++++++
35
block/blkverify.c | 4 +--
36
block/crypto.c | 4 +--
37
block/file-posix.c | 1 -
38
block/gluster.c | 1 -
39
block/io.c | 61 ++++++++++++++++++++++--------------
40
block/mirror.c | 2 ++
41
block/nbd.c | 1 -
42
block/parallels.c | 1 -
43
block/qcow.c | 2 --
44
block/qed.c | 1 -
45
block/raw-format.c | 2 ++
46
block/replication.c | 1 -
47
block/ssh.c | 1 -
48
block/vhdx.c | 1 -
49
16 files changed, 69 insertions(+), 37 deletions(-)
50
51
diff --git a/include/block/block-common.h b/include/block/block-common.h
52
index XXXXXXX..XXXXXXX 100644
53
--- a/include/block/block-common.h
54
+++ b/include/block/block-common.h
55
@@ -XXX,XX +XXX,XX @@ typedef enum {
56
*/
57
BDRV_REQ_MAY_UNMAP = 0x4,
58
59
+ /*
60
+ * An optimization hint when all QEMUIOVector elements are within
61
+ * previously registered bdrv_register_buf() memory ranges.
62
+ *
63
+ * Code that replaces the user's QEMUIOVector elements with bounce buffers
64
+ * must take care to clear this flag.
65
+ */
66
+ BDRV_REQ_REGISTERED_BUF = 0x8,
67
+
68
BDRV_REQ_FUA = 0x10,
69
BDRV_REQ_WRITE_COMPRESSED = 0x20,
70
71
diff --git a/block.c b/block.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/block.c
74
+++ b/block.c
75
@@ -XXX,XX +XXX,XX @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
76
goto open_failed;
77
}
78
79
+ assert(!(bs->supported_read_flags & ~BDRV_REQ_MASK));
80
+ assert(!(bs->supported_write_flags & ~BDRV_REQ_MASK));
81
+
82
+ /*
83
+ * Always allow the BDRV_REQ_REGISTERED_BUF optimization hint. This saves
84
+ * drivers that pass read/write requests through to a child the trouble of
85
+ * declaring support explicitly.
86
+ *
87
+ * Drivers must not propagate this flag accidentally when they initiate I/O
88
+ * to a bounce buffer. That case should be rare though.
89
+ */
90
+ bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
91
+ bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;
92
+
93
ret = refresh_total_sectors(bs, bs->total_sectors);
94
if (ret < 0) {
95
error_setg_errno(errp, -ret, "Could not refresh total sector count");
96
diff --git a/block/blkverify.c b/block/blkverify.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/block/blkverify.c
99
+++ b/block/blkverify.c
100
@@ -XXX,XX +XXX,XX @@ blkverify_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
101
qemu_iovec_init(&raw_qiov, qiov->niov);
102
qemu_iovec_clone(&raw_qiov, qiov, buf);
103
104
- ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
105
- false);
106
+ ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov,
107
+ flags & ~BDRV_REQ_REGISTERED_BUF, false);
108
109
cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
110
if (cmp_offset != -1) {
111
diff --git a/block/crypto.c b/block/crypto.c
112
index XXXXXXX..XXXXXXX 100644
113
--- a/block/crypto.c
114
+++ b/block/crypto.c
115
@@ -XXX,XX +XXX,XX @@ block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
116
uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
117
uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
118
119
- assert(!flags);
120
assert(payload_offset < INT64_MAX);
121
assert(QEMU_IS_ALIGNED(offset, sector_size));
122
assert(QEMU_IS_ALIGNED(bytes, sector_size));
123
@@ -XXX,XX +XXX,XX @@ block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
124
uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
125
uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
126
127
- assert(!(flags & ~BDRV_REQ_FUA));
128
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
129
+
130
assert(payload_offset < INT64_MAX);
131
assert(QEMU_IS_ALIGNED(offset, sector_size));
132
assert(QEMU_IS_ALIGNED(bytes, sector_size));
133
diff --git a/block/file-posix.c b/block/file-posix.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/block/file-posix.c
136
+++ b/block/file-posix.c
137
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
138
int64_t bytes, QEMUIOVector *qiov,
139
BdrvRequestFlags flags)
140
{
141
- assert(flags == 0);
142
return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
143
}
144
145
diff --git a/block/gluster.c b/block/gluster.c
146
index XXXXXXX..XXXXXXX 100644
147
--- a/block/gluster.c
148
+++ b/block/gluster.c
149
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
150
QEMUIOVector *qiov,
151
int flags)
152
{
153
- assert(!flags);
154
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
155
}
156
157
diff --git a/block/io.c b/block/io.c
158
index XXXXXXX..XXXXXXX 100644
159
--- a/block/io.c
160
+++ b/block/io.c
161
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
162
int ret;
163
164
bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
165
- assert(!(flags & ~BDRV_REQ_MASK));
166
- assert(!(flags & BDRV_REQ_NO_FALLBACK));
167
+ assert(!(flags & ~bs->supported_read_flags));
168
169
if (!drv) {
170
return -ENOMEDIUM;
171
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
172
BdrvRequestFlags flags)
173
{
174
BlockDriver *drv = bs->drv;
175
+ bool emulate_fua = false;
176
int64_t sector_num;
177
unsigned int nb_sectors;
178
QEMUIOVector local_qiov;
179
int ret;
180
181
bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
182
- assert(!(flags & ~BDRV_REQ_MASK));
183
- assert(!(flags & BDRV_REQ_NO_FALLBACK));
184
185
if (!drv) {
186
return -ENOMEDIUM;
187
}
188
189
+ if ((flags & BDRV_REQ_FUA) &&
190
+ (~bs->supported_write_flags & BDRV_REQ_FUA)) {
191
+ flags &= ~BDRV_REQ_FUA;
192
+ emulate_fua = true;
193
+ }
194
+
195
+ flags &= bs->supported_write_flags;
196
+
197
if (drv->bdrv_co_pwritev_part) {
198
ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
199
- flags & bs->supported_write_flags);
200
- flags &= ~bs->supported_write_flags;
201
+ flags);
202
goto emulate_flags;
203
}
204
205
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
206
}
207
208
if (drv->bdrv_co_pwritev) {
209
- ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
210
- flags & bs->supported_write_flags);
211
- flags &= ~bs->supported_write_flags;
212
+ ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags);
213
goto emulate_flags;
214
}
215
216
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
217
.coroutine = qemu_coroutine_self(),
218
};
219
220
- acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
221
- flags & bs->supported_write_flags,
222
+ acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov, flags,
223
bdrv_co_io_em_complete, &co);
224
- flags &= ~bs->supported_write_flags;
225
if (acb == NULL) {
226
ret = -EIO;
227
} else {
228
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
229
assert(bytes <= BDRV_REQUEST_MAX_BYTES);
230
231
assert(drv->bdrv_co_writev);
232
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
233
- flags & bs->supported_write_flags);
234
- flags &= ~bs->supported_write_flags;
235
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, flags);
236
237
emulate_flags:
238
- if (ret == 0 && (flags & BDRV_REQ_FUA)) {
239
+ if (ret == 0 && emulate_fua) {
240
ret = bdrv_co_flush(bs);
241
}
242
243
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
244
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
245
align);
246
247
- /* TODO: We would need a per-BDS .supported_read_flags and
248
+ /*
249
+ * TODO: We would need a per-BDS .supported_read_flags and
250
* potential fallback support, if we ever implement any read flags
251
* to pass through to drivers. For now, there aren't any
252
- * passthrough flags. */
253
- assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
254
+ * passthrough flags except the BDRV_REQ_REGISTERED_BUF optimization hint.
255
+ */
256
+ assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH |
257
+ BDRV_REQ_REGISTERED_BUF)));
258
259
/* Handle Copy on Read and associated serialisation */
260
if (flags & BDRV_REQ_COPY_ON_READ) {
261
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
262
goto out;
263
}
264
265
- assert(!(flags & ~bs->supported_read_flags));
266
+ assert(!(flags & ~(bs->supported_read_flags | BDRV_REQ_REGISTERED_BUF)));
267
268
max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
269
if (bytes <= max_bytes && bytes <= max_transfer) {
270
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
271
static int bdrv_pad_request(BlockDriverState *bs,
272
QEMUIOVector **qiov, size_t *qiov_offset,
273
int64_t *offset, int64_t *bytes,
274
- BdrvRequestPadding *pad, bool *padded)
275
+ BdrvRequestPadding *pad, bool *padded,
276
+ BdrvRequestFlags *flags)
277
{
278
int ret;
279
280
@@ -XXX,XX +XXX,XX @@ static int bdrv_pad_request(BlockDriverState *bs,
281
if (padded) {
282
*padded = true;
283
}
284
+ if (flags) {
285
+ /* Can't use optimization hint with bounce buffer */
286
+ *flags &= ~BDRV_REQ_REGISTERED_BUF;
287
+ }
288
289
return 0;
290
}
291
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
292
}
293
294
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
295
- NULL);
296
+ NULL, &flags);
297
if (ret < 0) {
298
goto fail;
299
}
300
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
301
return -ENOTSUP;
302
}
303
304
+ /* By definition there is no user buffer so this flag doesn't make sense */
305
+ if (flags & BDRV_REQ_REGISTERED_BUF) {
306
+ return -EINVAL;
307
+ }
308
+
309
/* Invalidate the cached block-status data range if this write overlaps */
310
bdrv_bsc_invalidate_range(bs, offset, bytes);
311
312
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
313
bool padding;
314
BdrvRequestPadding pad;
315
316
+ /* This flag doesn't make sense for padding or zero writes */
317
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
318
+
319
padding = bdrv_init_padding(bs, offset, bytes, &pad);
320
if (padding) {
321
assert(!(flags & BDRV_REQ_NO_WAIT));
322
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
323
* alignment only if there is no ZERO flag.
324
*/
325
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
326
- &padded);
327
+ &padded, &flags);
328
if (ret < 0) {
329
return ret;
330
}
331
diff --git a/block/mirror.c b/block/mirror.c
332
index XXXXXXX..XXXXXXX 100644
333
--- a/block/mirror.c
334
+++ b/block/mirror.c
335
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
336
qemu_iovec_init(&bounce_qiov, 1);
337
qemu_iovec_add(&bounce_qiov, bounce_buf, bytes);
338
qiov = &bounce_qiov;
339
+
340
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
341
}
342
343
ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov,
344
diff --git a/block/nbd.c b/block/nbd.c
345
index XXXXXXX..XXXXXXX 100644
346
--- a/block/nbd.c
347
+++ b/block/nbd.c
348
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nbd_client_co_preadv(BlockDriverState *bs, int64_t offse
349
};
350
351
assert(bytes <= NBD_MAX_BUFFER_SIZE);
352
- assert(!flags);
353
354
if (!bytes) {
355
return 0;
356
diff --git a/block/parallels.c b/block/parallels.c
357
index XXXXXXX..XXXXXXX 100644
358
--- a/block/parallels.c
359
+++ b/block/parallels.c
360
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
361
QEMUIOVector hd_qiov;
362
int ret = 0;
363
364
- assert(!flags);
365
qemu_iovec_init(&hd_qiov, qiov->niov);
366
367
while (nb_sectors > 0) {
368
diff --git a/block/qcow.c b/block/qcow.c
369
index XXXXXXX..XXXXXXX 100644
370
--- a/block/qcow.c
371
+++ b/block/qcow.c
372
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset,
373
uint8_t *buf;
374
void *orig_buf;
375
376
- assert(!flags);
377
if (qiov->niov > 1) {
378
buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
379
if (buf == NULL) {
380
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, int64_t offset,
381
uint8_t *buf;
382
void *orig_buf;
383
384
- assert(!flags);
385
s->cluster_cache_offset = -1; /* disable compressed cache */
386
387
/* We must always copy the iov when encrypting, so we
388
diff --git a/block/qed.c b/block/qed.c
389
index XXXXXXX..XXXXXXX 100644
390
--- a/block/qed.c
391
+++ b/block/qed.c
392
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
393
int64_t sector_num, int nb_sectors,
394
QEMUIOVector *qiov, int flags)
395
{
396
- assert(!flags);
397
return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE);
398
}
399
400
diff --git a/block/raw-format.c b/block/raw-format.c
401
index XXXXXXX..XXXXXXX 100644
402
--- a/block/raw-format.c
403
+++ b/block/raw-format.c
404
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
405
qemu_iovec_add(&local_qiov, buf, 512);
406
qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
407
qiov = &local_qiov;
408
+
409
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
410
}
411
412
ret = raw_adjust_offset(bs, &offset, bytes, true);
413
diff --git a/block/replication.c b/block/replication.c
414
index XXXXXXX..XXXXXXX 100644
415
--- a/block/replication.c
416
+++ b/block/replication.c
417
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
418
int ret;
419
int64_t n;
420
421
- assert(!flags);
422
ret = replication_get_io_status(s);
423
if (ret < 0) {
424
goto out;
425
diff --git a/block/ssh.c b/block/ssh.c
426
index XXXXXXX..XXXXXXX 100644
427
--- a/block/ssh.c
428
+++ b/block/ssh.c
429
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
430
BDRVSSHState *s = bs->opaque;
431
int ret;
432
433
- assert(!flags);
434
qemu_co_mutex_lock(&s->lock);
435
ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
436
nb_sectors * BDRV_SECTOR_SIZE, qiov);
437
diff --git a/block/vhdx.c b/block/vhdx.c
438
index XXXXXXX..XXXXXXX 100644
439
--- a/block/vhdx.c
440
+++ b/block/vhdx.c
441
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
442
uint64_t bat_prior_offset = 0;
443
bool bat_update = false;
444
445
- assert(!flags);
446
qemu_iovec_init(&hd_qiov, qiov->niov);
447
448
qemu_co_mutex_lock(&s->lock);
449
--
450
2.37.3
diff view generated by jsdifflib
New patch
1
Registering an I/O buffer is only a performance optimization hint but it
2
is still necessary to return errors when it fails.
1
3
4
Later patches will need to detect errors when registering buffers but an
5
immediate advantage is that error_report() calls are no longer needed in
6
block driver .bdrv_register_buf() functions.
7
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: 20221013185908.1297568-8-stefanha@redhat.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
include/block/block-global-state.h | 5 ++-
13
include/block/block_int-common.h | 5 ++-
14
include/sysemu/block-backend-global-state.h | 2 +-
15
block/block-backend.c | 4 +--
16
block/io.c | 34 +++++++++++++++++++--
17
block/nvme.c | 18 +++++------
18
qemu-img.c | 2 +-
19
7 files changed, 52 insertions(+), 18 deletions(-)
20
21
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
22
index XXXXXXX..XXXXXXX 100644
23
--- a/include/block/block-global-state.h
24
+++ b/include/block/block-global-state.h
25
@@ -XXX,XX +XXX,XX @@ void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
26
*
27
* Buffers must not overlap and they must be unregistered with the same <host,
28
* size> values that they were registered with.
29
+ *
30
+ * Returns: true on success, false on failure
31
*/
32
-void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
33
+bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
34
+ Error **errp);
35
void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size);
36
37
void bdrv_cancel_in_flight(BlockDriverState *bs);
38
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
39
index XXXXXXX..XXXXXXX 100644
40
--- a/include/block/block_int-common.h
41
+++ b/include/block/block_int-common.h
42
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
43
* that it can do IOMMU mapping with VFIO etc., in order to get better
44
* performance. In the case of VFIO drivers, this callback is used to do
45
* DMA mapping for hot buffers.
46
+ *
47
+ * Returns: true on success, false on failure
48
*/
49
- void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
50
+ bool (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size,
51
+ Error **errp);
52
void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host, size_t size);
53
54
/*
55
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/include/sysemu/block-backend-global-state.h
58
+++ b/include/sysemu/block-backend-global-state.h
59
@@ -XXX,XX +XXX,XX @@ void blk_io_limits_enable(BlockBackend *blk, const char *group);
60
void blk_io_limits_update_group(BlockBackend *blk, const char *group);
61
void blk_set_force_allow_inactivate(BlockBackend *blk);
62
63
-void blk_register_buf(BlockBackend *blk, void *host, size_t size);
64
+bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp);
65
void blk_unregister_buf(BlockBackend *blk, void *host, size_t size);
66
67
const BdrvChild *blk_root(BlockBackend *blk);
68
diff --git a/block/block-backend.c b/block/block-backend.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/block/block-backend.c
71
+++ b/block/block-backend.c
72
@@ -XXX,XX +XXX,XX @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
73
}
74
}
75
76
-void blk_register_buf(BlockBackend *blk, void *host, size_t size)
77
+bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp)
78
{
79
GLOBAL_STATE_CODE();
80
- bdrv_register_buf(blk_bs(blk), host, size);
81
+ return bdrv_register_buf(blk_bs(blk), host, size, errp);
82
}
83
84
void blk_unregister_buf(BlockBackend *blk, void *host, size_t size)
85
diff --git a/block/io.c b/block/io.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/block/io.c
88
+++ b/block/io.c
89
@@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs)
90
}
91
}
92
93
-void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
94
+/* Helper that undoes bdrv_register_buf() when it fails partway through */
95
+static void bdrv_register_buf_rollback(BlockDriverState *bs,
96
+ void *host,
97
+ size_t size,
98
+ BdrvChild *final_child)
99
+{
100
+ BdrvChild *child;
101
+
102
+ QLIST_FOREACH(child, &bs->children, next) {
103
+ if (child == final_child) {
104
+ break;
105
+ }
106
+
107
+ bdrv_unregister_buf(child->bs, host, size);
108
+ }
109
+
110
+ if (bs->drv && bs->drv->bdrv_unregister_buf) {
111
+ bs->drv->bdrv_unregister_buf(bs, host, size);
112
+ }
113
+}
114
+
115
+bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
116
+ Error **errp)
117
{
118
BdrvChild *child;
119
120
GLOBAL_STATE_CODE();
121
if (bs->drv && bs->drv->bdrv_register_buf) {
122
- bs->drv->bdrv_register_buf(bs, host, size);
123
+ if (!bs->drv->bdrv_register_buf(bs, host, size, errp)) {
124
+ return false;
125
+ }
126
}
127
QLIST_FOREACH(child, &bs->children, next) {
128
- bdrv_register_buf(child->bs, host, size);
129
+ if (!bdrv_register_buf(child->bs, host, size, errp)) {
130
+ bdrv_register_buf_rollback(bs, host, size, child);
131
+ return false;
132
+ }
133
}
134
+ return true;
135
}
136
137
void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
138
diff --git a/block/nvme.c b/block/nvme.c
139
index XXXXXXX..XXXXXXX 100644
140
--- a/block/nvme.c
141
+++ b/block/nvme.c
142
@@ -XXX,XX +XXX,XX @@ static void nvme_aio_unplug(BlockDriverState *bs)
143
}
144
}
145
146
-static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
147
+static bool nvme_register_buf(BlockDriverState *bs, void *host, size_t size,
148
+ Error **errp)
149
{
150
int ret;
151
- Error *local_err = NULL;
152
BDRVNVMeState *s = bs->opaque;
153
154
- ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, &local_err);
155
- if (ret) {
156
- /* FIXME: we may run out of IOVA addresses after repeated
157
- * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
158
- * doesn't reclaim addresses for fixed mappings. */
159
- error_reportf_err(local_err, "nvme_register_buf failed: ");
160
- }
161
+ /*
162
+ * FIXME: we may run out of IOVA addresses after repeated
163
+ * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
164
+ * doesn't reclaim addresses for fixed mappings.
165
+ */
166
+ ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, errp);
167
+ return ret == 0;
168
}
169
170
static void nvme_unregister_buf(BlockDriverState *bs, void *host, size_t size)
171
diff --git a/qemu-img.c b/qemu-img.c
172
index XXXXXXX..XXXXXXX 100644
173
--- a/qemu-img.c
174
+++ b/qemu-img.c
175
@@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv)
176
data.buf = blk_blockalign(blk, buf_size);
177
memset(data.buf, pattern, data.nrreq * data.bufsize);
178
179
- blk_register_buf(blk, data.buf, buf_size);
180
+ blk_register_buf(blk, data.buf, buf_size, &error_fatal);
181
182
data.qiov = g_new(QEMUIOVector, data.nrreq);
183
for (i = 0; i < data.nrreq; i++) {
184
--
185
2.37.3
diff view generated by jsdifflib
New patch
1
Make list traversal work when a callback removes a notifier
2
mid-traversal. This is a cleanup to prevent bugs in the future.
1
3
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Reviewed-by: David Hildenbrand <david@redhat.com>
6
Message-id: 20221013185908.1297568-9-stefanha@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
hw/core/numa.c | 9 ++++++---
10
1 file changed, 6 insertions(+), 3 deletions(-)
11
12
diff --git a/hw/core/numa.c b/hw/core/numa.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/core/numa.c
15
+++ b/hw/core/numa.c
16
@@ -XXX,XX +XXX,XX @@ void ram_block_notifier_remove(RAMBlockNotifier *n)
17
void ram_block_notify_add(void *host, size_t size, size_t max_size)
18
{
19
RAMBlockNotifier *notifier;
20
+ RAMBlockNotifier *next;
21
22
- QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
23
+ QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) {
24
if (notifier->ram_block_added) {
25
notifier->ram_block_added(notifier, host, size, max_size);
26
}
27
@@ -XXX,XX +XXX,XX @@ void ram_block_notify_add(void *host, size_t size, size_t max_size)
28
void ram_block_notify_remove(void *host, size_t size, size_t max_size)
29
{
30
RAMBlockNotifier *notifier;
31
+ RAMBlockNotifier *next;
32
33
- QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
34
+ QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) {
35
if (notifier->ram_block_removed) {
36
notifier->ram_block_removed(notifier, host, size, max_size);
37
}
38
@@ -XXX,XX +XXX,XX @@ void ram_block_notify_remove(void *host, size_t size, size_t max_size)
39
void ram_block_notify_resize(void *host, size_t old_size, size_t new_size)
40
{
41
RAMBlockNotifier *notifier;
42
+ RAMBlockNotifier *next;
43
44
- QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
45
+ QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) {
46
if (notifier->ram_block_resized) {
47
notifier->ram_block_resized(notifier, host, old_size, new_size);
48
}
49
--
50
2.37.3
diff view generated by jsdifflib
New patch
1
Emulated devices and other BlockBackend users wishing to take advantage
2
of blk_register_buf() all have the same repetitive job: register
3
RAMBlocks with the BlockBackend using RAMBlockNotifier.
1
4
5
Add a BlockRAMRegistrar API to do this. A later commit will use this
6
from hw/block/virtio-blk.c.
7
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
10
Message-id: 20221013185908.1297568-10-stefanha@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
MAINTAINERS | 1 +
14
include/sysemu/block-ram-registrar.h | 37 ++++++++++++++++++
15
block/block-ram-registrar.c | 58 ++++++++++++++++++++++++++++
16
block/meson.build | 1 +
17
4 files changed, 97 insertions(+)
18
create mode 100644 include/sysemu/block-ram-registrar.h
19
create mode 100644 block/block-ram-registrar.c
20
21
diff --git a/MAINTAINERS b/MAINTAINERS
22
index XXXXXXX..XXXXXXX 100644
23
--- a/MAINTAINERS
24
+++ b/MAINTAINERS
25
@@ -XXX,XX +XXX,XX @@ F: block*
26
F: block/
27
F: hw/block/
28
F: include/block/
29
+F: include/sysemu/block-*.h
30
F: qemu-img*
31
F: docs/tools/qemu-img.rst
32
F: qemu-io*
33
diff --git a/include/sysemu/block-ram-registrar.h b/include/sysemu/block-ram-registrar.h
34
new file mode 100644
35
index XXXXXXX..XXXXXXX
36
--- /dev/null
37
+++ b/include/sysemu/block-ram-registrar.h
38
@@ -XXX,XX +XXX,XX @@
39
+/*
40
+ * BlockBackend RAM Registrar
41
+ *
42
+ * SPDX-License-Identifier: GPL-2.0-or-later
43
+ */
44
+
45
+#ifndef BLOCK_RAM_REGISTRAR_H
46
+#define BLOCK_RAM_REGISTRAR_H
47
+
48
+#include "exec/ramlist.h"
49
+
50
+/**
51
+ * struct BlockRAMRegistrar:
52
+ *
53
+ * Keeps RAMBlock memory registered with a BlockBackend using
54
+ * blk_register_buf() including hotplugged memory.
55
+ *
56
+ * Emulated devices or other BlockBackend users initialize a BlockRAMRegistrar
57
+ * with blk_ram_registrar_init() before submitting I/O requests with the
58
+ * BDRV_REQ_REGISTERED_BUF flag set.
59
+ */
60
+typedef struct {
61
+ BlockBackend *blk;
62
+ RAMBlockNotifier notifier;
63
+ bool ok;
64
+} BlockRAMRegistrar;
65
+
66
+void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk);
67
+void blk_ram_registrar_destroy(BlockRAMRegistrar *r);
68
+
69
+/* Have all RAMBlocks been registered successfully? */
70
+static inline bool blk_ram_registrar_ok(BlockRAMRegistrar *r)
71
+{
72
+ return r->ok;
73
+}
74
+
75
+#endif /* BLOCK_RAM_REGISTRAR_H */
76
diff --git a/block/block-ram-registrar.c b/block/block-ram-registrar.c
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/block/block-ram-registrar.c
81
@@ -XXX,XX +XXX,XX @@
82
+/*
83
+ * BlockBackend RAM Registrar
84
+ *
85
+ * SPDX-License-Identifier: GPL-2.0-or-later
86
+ */
87
+
88
+#include "qemu/osdep.h"
89
+#include "sysemu/block-backend.h"
90
+#include "sysemu/block-ram-registrar.h"
91
+#include "qapi/error.h"
92
+
93
+static void ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
94
+ size_t max_size)
95
+{
96
+ BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier);
97
+ Error *err = NULL;
98
+
99
+ if (!r->ok) {
100
+ return; /* don't try again if we've already failed */
101
+ }
102
+
103
+ if (!blk_register_buf(r->blk, host, max_size, &err)) {
104
+ error_report_err(err);
105
+ ram_block_notifier_remove(&r->notifier);
106
+ r->ok = false;
107
+ }
108
+}
109
+
110
+static void ram_block_removed(RAMBlockNotifier *n, void *host, size_t size,
111
+ size_t max_size)
112
+{
113
+ BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier);
114
+ blk_unregister_buf(r->blk, host, max_size);
115
+}
116
+
117
+void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk)
118
+{
119
+ r->blk = blk;
120
+ r->notifier = (RAMBlockNotifier){
121
+ .ram_block_added = ram_block_added,
122
+ .ram_block_removed = ram_block_removed,
123
+
124
+ /*
125
+ * .ram_block_resized() is not necessary because we use the max_size
126
+ * value that does not change across resize.
127
+ */
128
+ };
129
+ r->ok = true;
130
+
131
+ ram_block_notifier_add(&r->notifier);
132
+}
133
+
134
+void blk_ram_registrar_destroy(BlockRAMRegistrar *r)
135
+{
136
+ if (r->ok) {
137
+ ram_block_notifier_remove(&r->notifier);
138
+ }
139
+}
140
diff --git a/block/meson.build b/block/meson.build
141
index XXXXXXX..XXXXXXX 100644
142
--- a/block/meson.build
143
+++ b/block/meson.build
144
@@ -XXX,XX +XXX,XX @@ block_ss.add(files(
145
), zstd, zlib, gnutls)
146
147
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
148
+softmmu_ss.add(files('block-ram-registrar.c'))
149
150
if get_option('qcow1').allowed()
151
block_ss.add(files('qcow.c'))
152
--
153
2.37.3
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Add a function to get the file descriptor for a RAMBlock. Device
2
emulation code typically uses the MemoryRegion APIs but vhost-style code
3
may use RAMBlock directly for sharing guest memory with another process.
2
4
3
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
This new API will be used by the libblkio block driver so it can share
4
Reviewed-by: Eric Blake <eblake@redhat.com>
6
guest memory via .bdrv_register_buf().
7
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: 20221013185908.1297568-11-stefanha@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
11
---
7
scripts/coccinelle/return_directly.cocci | 2 +-
12
include/exec/cpu-common.h | 1 +
8
1 file changed, 1 insertion(+), 1 deletion(-)
13
softmmu/physmem.c | 5 +++++
14
2 files changed, 6 insertions(+)
9
15
10
diff --git a/scripts/coccinelle/return_directly.cocci b/scripts/coccinelle/return_directly.cocci
16
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
11
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
12
--- a/scripts/coccinelle/return_directly.cocci
18
--- a/include/exec/cpu-common.h
13
+++ b/scripts/coccinelle/return_directly.cocci
19
+++ b/include/exec/cpu-common.h
14
@@ -XXX,XX +XXX,XX @@
20
@@ -XXX,XX +XXX,XX @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb);
15
-// replace 'R = X; return R;' with 'return R;'
21
bool qemu_ram_is_migratable(RAMBlock *rb);
16
+// replace 'R = X; return R;' with 'return X;'
22
void qemu_ram_set_migratable(RAMBlock *rb);
17
@@
23
void qemu_ram_unset_migratable(RAMBlock *rb);
18
identifier VAR;
24
+int qemu_ram_get_fd(RAMBlock *rb);
19
expression E;
25
26
size_t qemu_ram_pagesize(RAMBlock *block);
27
size_t qemu_ram_pagesize_largest(void);
28
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/softmmu/physmem.c
31
+++ b/softmmu/physmem.c
32
@@ -XXX,XX +XXX,XX @@ void qemu_ram_unset_migratable(RAMBlock *rb)
33
rb->flags &= ~RAM_MIGRATABLE;
34
}
35
36
+int qemu_ram_get_fd(RAMBlock *rb)
37
+{
38
+ return rb->fd;
39
+}
40
+
41
/* Called with iothread lock held. */
42
void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
43
{
20
--
44
--
21
2.9.4
45
2.37.3
22
23
diff view generated by jsdifflib
1
The scripts/qemu-gdb.py file is not easily discoverable. Add a .gdbinit
1
The blkio block driver will need to look up the file descriptor for a
2
file so GDB either loads qemu-gdb.py automatically or prints a message
2
given pointer. This is possible in softmmu builds where the RAMBlock API
3
informing the user how to enable them (some systems disable ./.gdbinit
3
is available for querying guest RAM.
4
loading for security reasons).
5
4
6
Symlink .gdbinit and the scripts directory in order to make out-of-tree
5
Add stubs so tools like qemu-img that link the block layer still build
7
builds work. The scripts directory is used to find the qemu-gdb.py file
6
successfully. In this case there is no guest RAM but that is fine.
8
specified by a relative path in .gdbinit.
7
Bounce buffers and their file descriptors will be allocated with
8
libblkio's blkio_alloc_mem_region() so we won't rely on QEMU's
9
qemu_ram_get_fd() in that case.
9
10
10
Suggested-by: Eric Blake <eblake@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Message-id: 20221013185908.1297568-12-stefanha@redhat.com
13
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
14
Tested-by: Eric Blake <eblake@redhat.com>
15
Message-id: 20170517124042.1430-1-stefanha@redhat.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
---
14
---
18
configure | 1 +
15
stubs/physmem.c | 13 +++++++++++++
19
.gdbinit | 8 ++++++++
16
stubs/meson.build | 1 +
20
2 files changed, 9 insertions(+)
17
2 files changed, 14 insertions(+)
21
create mode 100644 .gdbinit
18
create mode 100644 stubs/physmem.c
22
19
23
diff --git a/configure b/configure
20
diff --git a/stubs/physmem.c b/stubs/physmem.c
24
index XXXXXXX..XXXXXXX 100755
25
--- a/configure
26
+++ b/configure
27
@@ -XXX,XX +XXX,XX @@ FILES="$FILES pc-bios/spapr-rtas/Makefile"
28
FILES="$FILES pc-bios/s390-ccw/Makefile"
29
FILES="$FILES roms/seabios/Makefile roms/vgabios/Makefile"
30
FILES="$FILES pc-bios/qemu-icon.bmp"
31
+FILES="$FILES .gdbinit scripts" # scripts needed by relative path in .gdbinit
32
for bios_file in \
33
$source_path/pc-bios/*.bin \
34
$source_path/pc-bios/*.lid \
35
diff --git a/.gdbinit b/.gdbinit
36
new file mode 100644
21
new file mode 100644
37
index XXXXXXX..XXXXXXX
22
index XXXXXXX..XXXXXXX
38
--- /dev/null
23
--- /dev/null
39
+++ b/.gdbinit
24
+++ b/stubs/physmem.c
40
@@ -XXX,XX +XXX,XX @@
25
@@ -XXX,XX +XXX,XX @@
41
+# GDB may have ./.gdbinit loading disabled by default. In that case you can
26
+#include "qemu/osdep.h"
42
+# follow the instructions it prints. They boil down to adding the following to
27
+#include "exec/cpu-common.h"
43
+# your home directory's ~/.gdbinit file:
44
+#
45
+# add-auto-load-safe-path /path/to/qemu/.gdbinit
46
+
28
+
47
+# Load QEMU-specific sub-commands and settings
29
+RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
48
+source scripts/qemu-gdb.py
30
+ ram_addr_t *offset)
31
+{
32
+ return NULL;
33
+}
34
+
35
+int qemu_ram_get_fd(RAMBlock *rb)
36
+{
37
+ return -1;
38
+}
39
diff --git a/stubs/meson.build b/stubs/meson.build
40
index XXXXXXX..XXXXXXX 100644
41
--- a/stubs/meson.build
42
+++ b/stubs/meson.build
43
@@ -XXX,XX +XXX,XX @@ stub_ss.add(files('migr-blocker.c'))
44
stub_ss.add(files('module-opts.c'))
45
stub_ss.add(files('monitor.c'))
46
stub_ss.add(files('monitor-core.c'))
47
+stub_ss.add(files('physmem.c'))
48
stub_ss.add(files('qemu-timer-notify-cb.c'))
49
stub_ss.add(files('qmp_memory_device.c'))
50
stub_ss.add(files('qmp-command-available.c'))
49
--
51
--
50
2.9.4
52
2.37.3
51
52
diff view generated by jsdifflib
1
From: Bruno Dominguez <bru.dominguez@gmail.com>
1
Avoid bounce buffers when QEMUIOVector elements are within previously
2
2
registered bdrv_register_buf() buffers.
3
There was no possibility to add specific cxx flags using the configure
3
4
file. So A new entrance has been created to support it.
4
The idea is that emulated storage controllers will register guest RAM
5
5
using bdrv_register_buf() and set the BDRV_REQ_REGISTERED_BUF on I/O
6
Duplication of information in configure and rules.mak. Taking
6
requests. Therefore no blkio_map_mem_region() calls are necessary in the
7
QEMU_CFLAGS and add them to QEMU_CXXFLAGS, now the value of
7
performance-critical I/O code path.
8
QEMU_CXXFLAGS is stored in config-host.mak, so there is no need for
8
9
it.
9
This optimization doesn't apply if the I/O buffer is internally
10
10
allocated by QEMU (e.g. qcow2 metadata). There we still take the slow
11
The makefile for libvixl was adding flags for QEMU_CXXFLAGS in
11
path because BDRV_REQ_REGISTERED_BUF is not set.
12
QEMU_CFLAGS because of the addition in rules.mak. That was removed, so
12
13
adding them where it should be.
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
14
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
15
Signed-off-by: Bruno Dominguez <bru.dominguez@gmail.com>
15
Message-id: 20221013185908.1297568-13-stefanha@redhat.com
16
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Message-id: 1496754467-20893-1-git-send-email-bru.dominguez@gmail.com
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
---
17
---
20
configure | 74 +++++++++++++++++++++++++--------------------
18
block/blkio.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++++-
21
disas/libvixl/Makefile.objs | 4 +--
19
1 file changed, 180 insertions(+), 3 deletions(-)
22
rules.mak | 3 --
20
23
3 files changed, 44 insertions(+), 37 deletions(-)
21
diff --git a/block/blkio.c b/block/blkio.c
24
22
index XXXXXXX..XXXXXXX 100644
25
diff --git a/configure b/configure
23
--- a/block/blkio.c
26
index XXXXXXX..XXXXXXX 100755
24
+++ b/block/blkio.c
27
--- a/configure
25
@@ -XXX,XX +XXX,XX @@
28
+++ b/configure
26
#include "qemu/osdep.h"
29
@@ -XXX,XX +XXX,XX @@ update_cxxflags() {
27
#include <blkio.h>
30
# Set QEMU_CXXFLAGS from QEMU_CFLAGS by filtering out those
28
#include "block/block_int.h"
31
# options which some versions of GCC's C++ compiler complain about
29
+#include "exec/memory.h"
32
# because they only make sense for C programs.
30
+#include "exec/cpu-common.h" /* for qemu_ram_get_fd() */
33
- QEMU_CXXFLAGS=
31
#include "qapi/error.h"
34
+ QEMU_CXXFLAGS="$QEMU_CXXFLAGS -D__STDC_LIMIT_MACROS"
32
+#include "qemu/error-report.h"
35
+
33
#include "qapi/qmp/qdict.h"
36
for arg in $QEMU_CFLAGS; do
34
#include "qemu/module.h"
37
case $arg in
35
+#include "exec/memory.h" /* for ram_block_discard_disable() */
38
-Wstrict-prototypes|-Wmissing-prototypes|-Wnested-externs|\
36
39
@@ -XXX,XX +XXX,XX @@ for opt do
37
/*
40
--extra-cflags=*) QEMU_CFLAGS="$QEMU_CFLAGS $optarg"
38
* Keep the QEMU BlockDriver names identical to the libblkio driver names.
41
EXTRA_CFLAGS="$optarg"
39
@@ -XXX,XX +XXX,XX @@ typedef struct {
42
;;
40
43
+ --extra-cxxflags=*) QEMU_CXXFLAGS="$QEMU_CXXFLAGS $optarg"
41
/* Can we skip adding/deleting blkio_mem_regions? */
44
+ EXTRA_CXXFLAGS="$optarg"
42
bool needs_mem_regions;
45
+ ;;
43
+
46
--extra-ldflags=*) LDFLAGS="$LDFLAGS $optarg"
44
+ /* Are file descriptors necessary for blkio_mem_regions? */
47
EXTRA_LDFLAGS="$optarg"
45
+ bool needs_mem_region_fd;
48
;;
46
+
49
@@ -XXX,XX +XXX,XX @@ for opt do
47
+ /* Are madvise(MADV_DONTNEED)-style operations unavailable? */
50
;;
48
+ bool may_pin_mem_regions;
51
--extra-cflags=*)
49
} BDRVBlkioState;
52
;;
50
53
+ --extra-cxxflags=*)
51
/* Called with s->bounce_lock held */
54
+ ;;
52
@@ -XXX,XX +XXX,XX @@ blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
55
--extra-ldflags=*)
53
.coroutine = qemu_coroutine_self(),
56
;;
54
};
57
--enable-debug-info)
55
BDRVBlkioState *s = bs->opaque;
58
@@ -XXX,XX +XXX,XX @@ Advanced options (experts only):
56
- bool use_bounce_buffer = s->needs_mem_regions;
59
--cxx=CXX use C++ compiler CXX [$cxx]
57
+ bool use_bounce_buffer =
60
--objcc=OBJCC use Objective-C compiler OBJCC [$objcc]
58
+ s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
61
--extra-cflags=CFLAGS append extra C compiler flags QEMU_CFLAGS
59
BlkioBounceBuf bounce;
62
+ --extra-cxxflags=CXXFLAGS append extra C++ compiler flags QEMU_CXXFLAGS
60
struct iovec *iov = qiov->iov;
63
--extra-ldflags=LDFLAGS append extra linker flags LDFLAGS
61
int iovcnt = qiov->niov;
64
--make=MAKE use specified make [$make]
62
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset,
65
--install=INSTALL use specified install [$install]
63
.coroutine = qemu_coroutine_self(),
66
@@ -XXX,XX +XXX,XX @@ if test "$bogus_os" = "yes"; then
64
};
67
error_exit "Unrecognized host OS $targetos"
65
BDRVBlkioState *s = bs->opaque;
68
fi
66
- bool use_bounce_buffer = s->needs_mem_regions;
69
67
+ bool use_bounce_buffer =
70
-# Check that the C++ compiler exists and works with the C compiler
68
+ s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
71
-if has $cxx; then
69
BlkioBounceBuf bounce;
72
- cat > $TMPC <<EOF
70
struct iovec *iov = qiov->iov;
73
-int c_function(void);
71
int iovcnt = qiov->niov;
74
-int main(void) { return c_function(); }
72
@@ -XXX,XX +XXX,XX @@ static void blkio_io_unplug(BlockDriverState *bs)
75
-EOF
73
}
76
-
74
}
77
- compile_object
75
78
-
76
+typedef enum {
79
- cat > $TMPCXX <<EOF
77
+ BMRR_OK,
80
-extern "C" {
78
+ BMRR_SKIP,
81
- int c_function(void);
79
+ BMRR_FAIL,
82
-}
80
+} BlkioMemRegionResult;
83
-int c_function(void) { return 42; }
81
+
84
-EOF
82
+/*
85
-
83
+ * Produce a struct blkio_mem_region for a given address and size.
86
- update_cxxflags
84
+ *
87
-
85
+ * This function produces identical results when called multiple times with the
88
- if do_cxx $QEMU_CXXFLAGS -o $TMPE $TMPCXX $TMPO $LDFLAGS; then
86
+ * same arguments. This property is necessary because blkio_unmap_mem_region()
89
- # C++ compiler $cxx works ok with C compiler $cc
87
+ * must receive the same struct blkio_mem_region field values that were passed
90
- :
88
+ * to blkio_map_mem_region().
91
- else
89
+ */
92
- echo "C++ compiler $cxx does not work with C compiler $cc"
90
+static BlkioMemRegionResult
93
- echo "Disabling C++ specific optional code"
91
+blkio_mem_region_from_host(BlockDriverState *bs,
94
- cxx=
92
+ void *host, size_t size,
95
- fi
93
+ struct blkio_mem_region *region,
96
-else
94
+ Error **errp)
97
- echo "No C++ compiler available; disabling C++ specific optional code"
95
+{
98
- cxx=
96
+ BDRVBlkioState *s = bs->opaque;
99
-fi
97
+ int fd = -1;
100
-
98
+ ram_addr_t fd_offset = 0;
101
gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
99
+
102
gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
100
+ if (((uintptr_t)host | size) % s->mem_region_alignment) {
103
gcc_flags="-Wno-missing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
101
+ error_setg(errp, "unaligned buf %p with size %zu", host, size);
104
@@ -XXX,XX +XXX,XX @@ EOF
102
+ return BMRR_FAIL;
105
fi
103
+ }
106
fi
104
+
107
105
+ /* Attempt to find the fd for the underlying memory */
108
+# Check that the C++ compiler exists and works with the C compiler.
106
+ if (s->needs_mem_region_fd) {
109
+# All the QEMU_CXXFLAGS are based on QEMU_CFLAGS. Keep this at the end to don't miss any other that could be added.
107
+ RAMBlock *ram_block;
110
+if has $cxx; then
108
+ RAMBlock *end_block;
111
+ cat > $TMPC <<EOF
109
+ ram_addr_t offset;
112
+int c_function(void);
110
+
113
+int main(void) { return c_function(); }
111
+ /*
114
+EOF
112
+ * bdrv_register_buf() is called with the BQL held so mr lives at least
115
+
113
+ * until this function returns.
116
+ compile_object
114
+ */
117
+
115
+ ram_block = qemu_ram_block_from_host(host, false, &fd_offset);
118
+ cat > $TMPCXX <<EOF
116
+ if (ram_block) {
119
+extern "C" {
117
+ fd = qemu_ram_get_fd(ram_block);
120
+ int c_function(void);
118
+ }
119
+ if (fd == -1) {
120
+ /*
121
+ * Ideally every RAMBlock would have an fd. pc-bios and other
122
+ * things don't. Luckily they are usually not I/O buffers and we
123
+ * can just ignore them.
124
+ */
125
+ return BMRR_SKIP;
126
+ }
127
+
128
+ /* Make sure the fd covers the entire range */
129
+ end_block = qemu_ram_block_from_host(host + size - 1, false, &offset);
130
+ if (ram_block != end_block) {
131
+ error_setg(errp, "registered buffer at %p with size %zu extends "
132
+ "beyond RAMBlock", host, size);
133
+ return BMRR_FAIL;
134
+ }
135
+ }
136
+
137
+ *region = (struct blkio_mem_region){
138
+ .addr = host,
139
+ .len = size,
140
+ .fd = fd,
141
+ .fd_offset = fd_offset,
142
+ };
143
+ return BMRR_OK;
121
+}
144
+}
122
+int c_function(void) { return 42; }
145
+
123
+EOF
146
+static bool blkio_register_buf(BlockDriverState *bs, void *host, size_t size,
124
+
147
+ Error **errp)
125
+ update_cxxflags
148
+{
126
+
149
+ BDRVBlkioState *s = bs->opaque;
127
+ if do_cxx $QEMU_CXXFLAGS -o $TMPE $TMPCXX $TMPO $LDFLAGS; then
150
+ struct blkio_mem_region region;
128
+ # C++ compiler $cxx works ok with C compiler $cc
151
+ BlkioMemRegionResult region_result;
129
+ :
152
+ int ret;
130
+ else
153
+
131
+ echo "C++ compiler $cxx does not work with C compiler $cc"
154
+ /*
132
+ echo "Disabling C++ specific optional code"
155
+ * Mapping memory regions conflicts with RAM discard (virtio-mem) when
133
+ cxx=
156
+ * there is pinning, so only do it when necessary.
134
+ fi
157
+ */
135
+else
158
+ if (!s->needs_mem_regions && s->may_pin_mem_regions) {
136
+ echo "No C++ compiler available; disabling C++ specific optional code"
159
+ return true;
137
+ cxx=
160
+ }
138
+fi
161
+
139
+
162
+ region_result = blkio_mem_region_from_host(bs, host, size, &region, errp);
140
echo_version() {
163
+ if (region_result == BMRR_SKIP) {
141
if test "$1" = "yes" ; then
164
+ return true;
142
echo "($2)"
165
+ } else if (region_result != BMRR_OK) {
143
@@ -XXX,XX +XXX,XX @@ if test "$mingw32" = "no" ; then
166
+ return false;
144
fi
167
+ }
145
echo "qemu_helperdir=$libexecdir" >> $config_host_mak
168
+
146
echo "extra_cflags=$EXTRA_CFLAGS" >> $config_host_mak
169
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
147
+echo "extra_cxxflags=$EXTRA_CXXFLAGS" >> $config_host_mak
170
+ ret = blkio_map_mem_region(s->blkio, &region);
148
echo "extra_ldflags=$EXTRA_LDFLAGS" >> $config_host_mak
171
+ }
149
echo "qemu_localedir=$qemu_localedir" >> $config_host_mak
172
+
150
echo "libs_softmmu=$libs_softmmu" >> $config_host_mak
173
+ if (ret < 0) {
151
@@ -XXX,XX +XXX,XX @@ echo "WINDRES=$windres" >> $config_host_mak
174
+ error_setg(errp, "Failed to add blkio mem region %p with size %zu: %s",
152
echo "CFLAGS=$CFLAGS" >> $config_host_mak
175
+ host, size, blkio_get_error_msg());
153
echo "CFLAGS_NOPIE=$CFLAGS_NOPIE" >> $config_host_mak
176
+ return false;
154
echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak
177
+ }
155
+echo "QEMU_CXXFLAGS=$QEMU_CXXFLAGS" >> $config_host_mak
178
+ return true;
156
echo "QEMU_INCLUDES=$QEMU_INCLUDES" >> $config_host_mak
179
+}
157
if test "$sparse" = "yes" ; then
180
+
158
echo "CC := REAL_CC=\"\$(CC)\" cgcc" >> $config_host_mak
181
+static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
159
diff --git a/disas/libvixl/Makefile.objs b/disas/libvixl/Makefile.objs
182
+{
160
index XXXXXXX..XXXXXXX 100644
183
+ BDRVBlkioState *s = bs->opaque;
161
--- a/disas/libvixl/Makefile.objs
184
+ struct blkio_mem_region region;
162
+++ b/disas/libvixl/Makefile.objs
185
+
163
@@ -XXX,XX +XXX,XX @@ libvixl_OBJS = vixl/utils.o \
186
+ /* See blkio_register_buf() */
164
187
+ if (!s->needs_mem_regions && s->may_pin_mem_regions) {
165
# The -Wno-sign-compare is needed only for gcc 4.6, which complains about
188
+ return;
166
# some signed-unsigned equality comparisons which later gcc versions do not.
189
+ }
167
-$(addprefix $(obj)/,$(libvixl_OBJS)): QEMU_CFLAGS := -I$(SRC_PATH)/disas/libvixl $(QEMU_CFLAGS) -Wno-sign-compare
190
+
168
+$(addprefix $(obj)/,$(libvixl_OBJS)): QEMU_CXXFLAGS := -I$(SRC_PATH)/disas/libvixl $(QEMU_CXXFLAGS) -Wno-sign-compare
191
+ if (blkio_mem_region_from_host(bs, host, size, &region, NULL) != BMRR_OK) {
169
# Ensure that C99 macros are defined regardless of the inclusion order of
192
+ return;
170
# headers in vixl. This is required at least on NetBSD.
193
+ }
171
-$(addprefix $(obj)/,$(libvixl_OBJS)): QEMU_CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS -D__STDC_FORMAT_MACROS
194
+
172
+$(addprefix $(obj)/,$(libvixl_OBJS)): QEMU_CXXFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
195
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
173
196
+ blkio_unmap_mem_region(s->blkio, &region);
174
common-obj-$(CONFIG_ARM_A64_DIS) += $(libvixl_OBJS)
197
+ }
175
diff --git a/rules.mak b/rules.mak
198
+}
176
index XXXXXXX..XXXXXXX 100644
199
+
177
--- a/rules.mak
200
static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
178
+++ b/rules.mak
201
Error **errp)
179
@@ -XXX,XX +XXX,XX @@ MAKEFLAGS += -rR
202
{
180
%.mak:
203
@@ -XXX,XX +XXX,XX @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
181
clean-target:
204
return ret;
182
205
}
183
-# Flags for C++ compilation
206
184
-QEMU_CXXFLAGS = -D__STDC_LIMIT_MACROS $(filter-out -Wstrict-prototypes -Wmissing-prototypes -Wnested-externs -Wold-style-declaration -Wold-style-definition -Wredundant-decls, $(QEMU_CFLAGS))
207
+ ret = blkio_get_bool(s->blkio,
185
-
208
+ "needs-mem-region-fd",
186
# Flags for dependency generation
209
+ &s->needs_mem_region_fd);
187
QEMU_DGFLAGS += -MMD -MP -MT $@ -MF $(@D)/$(*F).d
210
+ if (ret < 0) {
211
+ error_setg_errno(errp, -ret,
212
+ "failed to get needs-mem-region-fd: %s",
213
+ blkio_get_error_msg());
214
+ blkio_destroy(&s->blkio);
215
+ return ret;
216
+ }
217
+
218
ret = blkio_get_uint64(s->blkio,
219
"mem-region-alignment",
220
&s->mem_region_alignment);
221
@@ -XXX,XX +XXX,XX @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
222
return ret;
223
}
224
225
+ ret = blkio_get_bool(s->blkio,
226
+ "may-pin-mem-regions",
227
+ &s->may_pin_mem_regions);
228
+ if (ret < 0) {
229
+ /* Be conservative (assume pinning) if the property is not supported */
230
+ s->may_pin_mem_regions = s->needs_mem_regions;
231
+ }
232
+
233
+ /*
234
+ * Notify if libblkio drivers pin memory and prevent features like
235
+ * virtio-mem from working.
236
+ */
237
+ if (s->may_pin_mem_regions) {
238
+ ret = ram_block_discard_disable(true);
239
+ if (ret < 0) {
240
+ error_setg_errno(errp, -ret, "ram_block_discard_disable() failed");
241
+ blkio_destroy(&s->blkio);
242
+ return ret;
243
+ }
244
+ }
245
+
246
ret = blkio_start(s->blkio);
247
if (ret < 0) {
248
error_setg_errno(errp, -ret, "blkio_start failed: %s",
249
blkio_get_error_msg());
250
blkio_destroy(&s->blkio);
251
+ if (s->may_pin_mem_regions) {
252
+ ram_block_discard_disable(false);
253
+ }
254
return ret;
255
}
256
257
- bs->supported_write_flags = BDRV_REQ_FUA;
258
+ bs->supported_write_flags = BDRV_REQ_FUA | BDRV_REQ_REGISTERED_BUF;
259
bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP |
260
BDRV_REQ_NO_FALLBACK;
261
262
@@ -XXX,XX +XXX,XX @@ static void blkio_close(BlockDriverState *bs)
263
qemu_mutex_destroy(&s->blkio_lock);
264
blkio_detach_aio_context(bs);
265
blkio_destroy(&s->blkio);
266
+
267
+ if (s->may_pin_mem_regions) {
268
+ ram_block_discard_disable(false);
269
+ }
270
}
271
272
static int64_t blkio_getlength(BlockDriverState *bs)
273
@@ -XXX,XX +XXX,XX @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
274
.bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
275
.bdrv_io_unplug = blkio_io_unplug, \
276
.bdrv_refresh_limits = blkio_refresh_limits, \
277
+ .bdrv_register_buf = blkio_register_buf, \
278
+ .bdrv_unregister_buf = blkio_unregister_buf, \
279
__VA_ARGS__ \
280
}
188
281
189
--
282
--
190
2.9.4
283
2.37.3
191
192
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
Register guest RAM using BlockRAMRegistrar and set the
2
BDRV_REQ_REGISTERED_BUF flag so block drivers can optimize memory
3
accesses in I/O requests.
2
4
3
spotted by Coccinelle script scripts/coccinelle/err-bad-newline.cocci
5
This is for vdpa-blk, vhost-user-blk, and other I/O interfaces that rely
6
on DMA mapping/unmapping.
4
7
5
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
7
Reviewed-by: Laurent Vivier <lvivier@redhat.com>
10
Message-id: 20221013185908.1297568-14-stefanha@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
12
---
10
util/oslib-posix.c | 2 +-
13
include/hw/virtio/virtio-blk.h | 2 ++
11
1 file changed, 1 insertion(+), 1 deletion(-)
14
hw/block/virtio-blk.c | 39 ++++++++++++++++++++++------------
15
2 files changed, 27 insertions(+), 14 deletions(-)
12
16
13
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
17
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/util/oslib-posix.c
19
--- a/include/hw/virtio/virtio-blk.h
16
+++ b/util/oslib-posix.c
20
+++ b/include/hw/virtio/virtio-blk.h
17
@@ -XXX,XX +XXX,XX @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
21
@@ -XXX,XX +XXX,XX @@
18
/* touch pages simultaneously */
22
#include "hw/block/block.h"
19
if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) {
23
#include "sysemu/iothread.h"
20
error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
24
#include "sysemu/block-backend.h"
21
- "pages available to allocate guest RAM\n");
25
+#include "sysemu/block-ram-registrar.h"
22
+ "pages available to allocate guest RAM");
26
#include "qom/object.h"
27
28
#define TYPE_VIRTIO_BLK "virtio-blk-device"
29
@@ -XXX,XX +XXX,XX @@ struct VirtIOBlock {
30
struct VirtIOBlockDataPlane *dataplane;
31
uint64_t host_features;
32
size_t config_size;
33
+ BlockRAMRegistrar blk_ram_registrar;
34
};
35
36
typedef struct VirtIOBlockReq {
37
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/hw/block/virtio-blk.c
40
+++ b/hw/block/virtio-blk.c
41
@@ -XXX,XX +XXX,XX @@
42
#include "hw/block/block.h"
43
#include "hw/qdev-properties.h"
44
#include "sysemu/blockdev.h"
45
+#include "sysemu/block-ram-registrar.h"
46
#include "sysemu/sysemu.h"
47
#include "sysemu/runstate.h"
48
#include "hw/virtio/virtio-blk.h"
49
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
23
}
50
}
24
51
}
25
ret = sigaction(SIGBUS, &oldact, NULL);
52
53
-static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
54
+static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb,
55
int start, int num_reqs, int niov)
56
{
57
+ BlockBackend *blk = s->blk;
58
QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
59
int64_t sector_num = mrb->reqs[start]->sector_num;
60
bool is_write = mrb->is_write;
61
+ BdrvRequestFlags flags = 0;
62
63
if (num_reqs > 1) {
64
int i;
65
@@ -XXX,XX +XXX,XX @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
66
num_reqs - 1);
67
}
68
69
+ if (blk_ram_registrar_ok(&s->blk_ram_registrar)) {
70
+ flags |= BDRV_REQ_REGISTERED_BUF;
71
+ }
72
+
73
if (is_write) {
74
- blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
75
- virtio_blk_rw_complete, mrb->reqs[start]);
76
+ blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov,
77
+ flags, virtio_blk_rw_complete,
78
+ mrb->reqs[start]);
79
} else {
80
- blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
81
- virtio_blk_rw_complete, mrb->reqs[start]);
82
+ blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov,
83
+ flags, virtio_blk_rw_complete,
84
+ mrb->reqs[start]);
85
}
86
}
87
88
@@ -XXX,XX +XXX,XX @@ static int multireq_compare(const void *a, const void *b)
89
}
90
}
91
92
-static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
93
+static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb)
94
{
95
int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0;
96
uint32_t max_transfer;
97
int64_t sector_num = 0;
98
99
if (mrb->num_reqs == 1) {
100
- submit_requests(blk, mrb, 0, 1, -1);
101
+ submit_requests(s, mrb, 0, 1, -1);
102
mrb->num_reqs = 0;
103
return;
104
}
105
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
106
* 3. merge would exceed maximum transfer length of backend device
107
*/
108
if (sector_num + nb_sectors != req->sector_num ||
109
- niov > blk_get_max_iov(blk) - req->qiov.niov ||
110
+ niov > blk_get_max_iov(s->blk) - req->qiov.niov ||
111
req->qiov.size > max_transfer ||
112
nb_sectors > (max_transfer -
113
req->qiov.size) / BDRV_SECTOR_SIZE) {
114
- submit_requests(blk, mrb, start, num_reqs, niov);
115
+ submit_requests(s, mrb, start, num_reqs, niov);
116
num_reqs = 0;
117
}
118
}
119
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
120
num_reqs++;
121
}
122
123
- submit_requests(blk, mrb, start, num_reqs, niov);
124
+ submit_requests(s, mrb, start, num_reqs, niov);
125
mrb->num_reqs = 0;
126
}
127
128
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
129
* Make sure all outstanding writes are posted to the backing device.
130
*/
131
if (mrb->is_write && mrb->num_reqs > 0) {
132
- virtio_blk_submit_multireq(s->blk, mrb);
133
+ virtio_blk_submit_multireq(s, mrb);
134
}
135
blk_aio_flush(s->blk, virtio_blk_flush_complete, req);
136
}
137
@@ -XXX,XX +XXX,XX @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
138
if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS ||
139
is_write != mrb->is_write ||
140
!s->conf.request_merging)) {
141
- virtio_blk_submit_multireq(s->blk, mrb);
142
+ virtio_blk_submit_multireq(s, mrb);
143
}
144
145
assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS);
146
@@ -XXX,XX +XXX,XX @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
147
} while (!virtio_queue_empty(vq));
148
149
if (mrb.num_reqs) {
150
- virtio_blk_submit_multireq(s->blk, &mrb);
151
+ virtio_blk_submit_multireq(s, &mrb);
152
}
153
154
blk_io_unplug(s->blk);
155
@@ -XXX,XX +XXX,XX @@ void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh)
156
}
157
158
if (mrb.num_reqs) {
159
- virtio_blk_submit_multireq(s->blk, &mrb);
160
+ virtio_blk_submit_multireq(s, &mrb);
161
}
162
if (is_bh) {
163
blk_dec_in_flight(s->conf.conf.blk);
164
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
165
}
166
167
s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
168
+ blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
169
blk_set_dev_ops(s->blk, &virtio_block_ops, s);
170
171
blk_iostatus_enable(s->blk);
172
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_device_unrealize(DeviceState *dev)
173
virtio_del_queue(vdev, i);
174
}
175
qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
176
+ blk_ram_registrar_destroy(&s->blk_ram_registrar);
177
qemu_del_vm_change_state_handler(s->change);
178
blockdev_mark_auto_del(s->blk);
179
virtio_cleanup(vdev);
26
--
180
--
27
2.9.4
181
2.37.3
28
29
diff view generated by jsdifflib