1 | The following changes since commit d47a851caeda96d5979bf48d4bae6a87784ad91d: | 1 | The following changes since commit 887cba855bb6ff4775256f7968409281350b568c: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170601' into staging (2017-06-02 14:07:53 +0100) | 3 | configure: Fix cross-building for RISCV host (v5) (2023-07-11 17:56:09 +0100) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to df3a429ae82c0f45becdfab105617701d75e0f05: | 9 | for you to fetch changes up to 75dcb4d790bbe5327169fd72b185960ca58e2fa6: |
10 | 10 | ||
11 | gluster: add support for PREALLOC_MODE_FALLOC (2017-06-02 10:51:47 -0400) | 11 | virtio-blk: fix host notifier issues during dataplane start/stop (2023-07-12 15:20:32 -0400) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Gluster patch(es) | 14 | Pull request |
15 | |||
15 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
16 | 17 | ||
17 | Niels de Vos (1): | 18 | Stefan Hajnoczi (1): |
18 | gluster: add support for PREALLOC_MODE_FALLOC | 19 | virtio-blk: fix host notifier issues during dataplane start/stop |
19 | 20 | ||
20 | block/gluster.c | 78 ++++++++++++++++++++++++++++++--------------------------- | 21 | hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++-------------- |
21 | configure | 6 +++++ | 22 | 1 file changed, 38 insertions(+), 29 deletions(-) |
22 | 2 files changed, 47 insertions(+), 37 deletions(-) | ||
23 | 23 | ||
24 | -- | 24 | -- |
25 | 2.9.3 | 25 | 2.40.1 |
26 | |||
27 | diff view generated by jsdifflib |
1 | From: Niels de Vos <ndevos@redhat.com> | 1 | The main loop thread can consume 100% CPU when using --device |
---|---|---|---|
2 | virtio-blk-pci,iothread=<iothread>. ppoll() constantly returns but | ||
3 | reading virtqueue host notifiers fails with EAGAIN. The file descriptors | ||
4 | are stale and remain registered with the AioContext because of bugs in | ||
5 | the virtio-blk dataplane start/stop code. | ||
2 | 6 | ||
3 | Add missing support for "preallocation=falloc" to the Gluster block | 7 | The problem is that the dataplane start/stop code involves drain |
4 | driver. This change bases its logic on that of block/file-posix.c and | 8 | operations, which call virtio_blk_drained_begin() and |
5 | removed the gluster_supports_zerofill() and qemu_gluster_zerofill() | 9 | virtio_blk_drained_end() at points where the host notifier is not |
6 | functions in favour of #ifdef checks in an easy to read | 10 | operational: |
7 | switch-statement. | 11 | - In virtio_blk_data_plane_start(), blk_set_aio_context() drains after |
12 | vblk->dataplane_started has been set to true but the host notifier has | ||
13 | not been attached yet. | ||
14 | - In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context() | ||
15 | drain after the host notifier has already been detached but with | ||
16 | vblk->dataplane_started still set to true. | ||
8 | 17 | ||
9 | Both glfs_zerofill() and glfs_fallocate() have been introduced with | 18 | I would like to simplify ->ioeventfd_start/stop() to avoid interactions |
10 | GlusterFS 3.5.0 (pkg-config glusterfs-api = 6). A #define for the | 19 | with drain entirely, but couldn't find a way to do that. Instead, this |
11 | availability of glfs_fallocate() has been added to ./configure. | 20 | patch accepts the fragile nature of the code and reorders it so that |
21 | vblk->dataplane_started is false during drain operations. This way the | ||
22 | virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't | ||
23 | touch the host notifier. The result is that | ||
24 | virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have | ||
25 | complete control over the host notifier and stale file descriptors are | ||
26 | no longer left in the AioContext. | ||
12 | 27 | ||
13 | Reported-by: Satheesaran Sundaramoorthi <sasundar@redhat.com> | 28 | This patch fixes the 100% CPU consumption in the main loop thread and |
14 | Signed-off-by: Niels de Vos <ndevos@redhat.com> | 29 | correctly moves host notifier processing to the IOThread. |
15 | Message-id: 20170528063114.28691-1-ndevos@redhat.com | 30 | |
16 | URL: https://bugzilla.redhat.com/1450759 | 31 | Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()") |
17 | Signed-off-by: Niels de Vos <ndevos@redhat.com> | 32 | Reported-by: Lukáš Doktor <ldoktor@redhat.com> |
18 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 33 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
34 | Tested-by: Lukas Doktor <ldoktor@redhat.com> | ||
35 | Message-id: 20230704151527.193586-1-stefanha@redhat.com | ||
36 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | --- | 37 | --- |
20 | block/gluster.c | 78 ++++++++++++++++++++++++++++++--------------------------- | 38 | hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++-------------- |
21 | configure | 6 +++++ | 39 | 1 file changed, 38 insertions(+), 29 deletions(-) |
22 | 2 files changed, 47 insertions(+), 37 deletions(-) | ||
23 | 40 | ||
24 | diff --git a/block/gluster.c b/block/gluster.c | 41 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c |
25 | index XXXXXXX..XXXXXXX 100644 | 42 | index XXXXXXX..XXXXXXX 100644 |
26 | --- a/block/gluster.c | 43 | --- a/hw/block/dataplane/virtio-blk.c |
27 | +++ b/block/gluster.c | 44 | +++ b/hw/block/dataplane/virtio-blk.c |
28 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs, | 45 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) |
29 | qemu_coroutine_yield(); | 46 | |
30 | return acb.ret; | 47 | memory_region_transaction_commit(); |
48 | |||
49 | - /* | ||
50 | - * These fields are visible to the IOThread so we rely on implicit barriers | ||
51 | - * in aio_context_acquire() on the write side and aio_notify_accept() on | ||
52 | - * the read side. | ||
53 | - */ | ||
54 | - s->starting = false; | ||
55 | - vblk->dataplane_started = true; | ||
56 | trace_virtio_blk_data_plane_start(s); | ||
57 | |||
58 | old_context = blk_get_aio_context(s->conf->conf.blk); | ||
59 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | ||
60 | event_notifier_set(virtio_queue_get_host_notifier(vq)); | ||
61 | } | ||
62 | |||
63 | + /* | ||
64 | + * These fields must be visible to the IOThread when it processes the | ||
65 | + * virtqueue, otherwise it will think dataplane has not started yet. | ||
66 | + * | ||
67 | + * Make sure ->dataplane_started is false when blk_set_aio_context() is | ||
68 | + * called above so that draining does not cause the host notifier to be | ||
69 | + * detached/attached prematurely. | ||
70 | + */ | ||
71 | + s->starting = false; | ||
72 | + vblk->dataplane_started = true; | ||
73 | + smp_wmb(); /* paired with aio_notify_accept() on the read side */ | ||
74 | + | ||
75 | /* Get this show started by hooking up our callbacks */ | ||
76 | if (!blk_in_drain(s->conf->conf.blk)) { | ||
77 | aio_context_acquire(s->ctx); | ||
78 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | ||
79 | fail_guest_notifiers: | ||
80 | vblk->dataplane_disabled = true; | ||
81 | s->starting = false; | ||
82 | - vblk->dataplane_started = true; | ||
83 | return -ENOSYS; | ||
31 | } | 84 | } |
32 | - | 85 | |
33 | -static inline bool gluster_supports_zerofill(void) | 86 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) |
34 | -{ | 87 | aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); |
35 | - return 1; | ||
36 | -} | ||
37 | - | ||
38 | -static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, | ||
39 | - int64_t size) | ||
40 | -{ | ||
41 | - return glfs_zerofill(fd, offset, size); | ||
42 | -} | ||
43 | - | ||
44 | -#else | ||
45 | -static inline bool gluster_supports_zerofill(void) | ||
46 | -{ | ||
47 | - return 0; | ||
48 | -} | ||
49 | - | ||
50 | -static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, | ||
51 | - int64_t size) | ||
52 | -{ | ||
53 | - return 0; | ||
54 | -} | ||
55 | #endif | ||
56 | |||
57 | static int qemu_gluster_create(const char *filename, | ||
58 | @@ -XXX,XX +XXX,XX @@ static int qemu_gluster_create(const char *filename, | ||
59 | struct glfs *glfs; | ||
60 | struct glfs_fd *fd; | ||
61 | int ret = 0; | ||
62 | - int prealloc = 0; | ||
63 | + PreallocMode prealloc; | ||
64 | int64_t total_size = 0; | ||
65 | char *tmp = NULL; | ||
66 | + Error *local_err = NULL; | ||
67 | |||
68 | gconf = g_new0(BlockdevOptionsGluster, 1); | ||
69 | gconf->debug = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG, | ||
70 | @@ -XXX,XX +XXX,XX @@ static int qemu_gluster_create(const char *filename, | ||
71 | BDRV_SECTOR_SIZE); | ||
72 | |||
73 | tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); | ||
74 | - if (!tmp || !strcmp(tmp, "off")) { | ||
75 | - prealloc = 0; | ||
76 | - } else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) { | ||
77 | - prealloc = 1; | ||
78 | - } else { | ||
79 | - error_setg(errp, "Invalid preallocation mode: '%s'" | ||
80 | - " or GlusterFS doesn't support zerofill API", tmp); | ||
81 | + prealloc = qapi_enum_parse(PreallocMode_lookup, tmp, | ||
82 | + PREALLOC_MODE__MAX, PREALLOC_MODE_OFF, | ||
83 | + &local_err); | ||
84 | + g_free(tmp); | ||
85 | + if (local_err) { | ||
86 | + error_propagate(errp, local_err); | ||
87 | ret = -EINVAL; | ||
88 | goto out; | ||
89 | } | 88 | } |
90 | @@ -XXX,XX +XXX,XX @@ static int qemu_gluster_create(const char *filename, | 89 | |
91 | O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); | 90 | + /* |
92 | if (!fd) { | 91 | + * Batch all the host notifiers in a single transaction to avoid |
93 | ret = -errno; | 92 | + * quadratic time complexity in address_space_update_ioeventfds(). |
94 | - } else { | 93 | + */ |
95 | + goto out; | 94 | + memory_region_transaction_begin(); |
95 | + | ||
96 | + for (i = 0; i < nvqs; i++) { | ||
97 | + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | ||
96 | + } | 98 | + } |
97 | + | 99 | + |
98 | + switch (prealloc) { | 100 | + /* |
99 | +#ifdef CONFIG_GLUSTERFS_FALLOCATE | 101 | + * The transaction expects the ioeventfds to be open when it |
100 | + case PREALLOC_MODE_FALLOC: | 102 | + * commits. Do it now, before the cleanup loop. |
101 | + if (glfs_fallocate(fd, 0, 0, total_size)) { | 103 | + */ |
102 | + error_setg(errp, "Could not preallocate data for the new file"); | 104 | + memory_region_transaction_commit(); |
103 | + ret = -errno; | 105 | + |
104 | + } | 106 | + for (i = 0; i < nvqs; i++) { |
105 | + break; | 107 | + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); |
106 | +#endif /* CONFIG_GLUSTERFS_FALLOCATE */ | ||
107 | +#ifdef CONFIG_GLUSTERFS_ZEROFILL | ||
108 | + case PREALLOC_MODE_FULL: | ||
109 | if (!glfs_ftruncate(fd, total_size)) { | ||
110 | - if (prealloc && qemu_gluster_zerofill(fd, 0, total_size)) { | ||
111 | + if (glfs_zerofill(fd, 0, total_size)) { | ||
112 | + error_setg(errp, "Could not zerofill the new file"); | ||
113 | ret = -errno; | ||
114 | } | ||
115 | } else { | ||
116 | + error_setg(errp, "Could not resize file"); | ||
117 | ret = -errno; | ||
118 | } | ||
119 | + break; | ||
120 | +#endif /* CONFIG_GLUSTERFS_ZEROFILL */ | ||
121 | + case PREALLOC_MODE_OFF: | ||
122 | + if (glfs_ftruncate(fd, total_size) != 0) { | ||
123 | + ret = -errno; | ||
124 | + error_setg(errp, "Could not resize file"); | ||
125 | + } | ||
126 | + break; | ||
127 | + default: | ||
128 | + ret = -EINVAL; | ||
129 | + error_setg(errp, "Unsupported preallocation mode: %s", | ||
130 | + PreallocMode_lookup[prealloc]); | ||
131 | + break; | ||
132 | + } | 108 | + } |
133 | |||
134 | - if (glfs_close(fd) != 0) { | ||
135 | - ret = -errno; | ||
136 | - } | ||
137 | + if (glfs_close(fd) != 0) { | ||
138 | + ret = -errno; | ||
139 | } | ||
140 | out: | ||
141 | - g_free(tmp); | ||
142 | qapi_free_BlockdevOptionsGluster(gconf); | ||
143 | glfs_clear_preopened(glfs); | ||
144 | return ret; | ||
145 | diff --git a/configure b/configure | ||
146 | index XXXXXXX..XXXXXXX 100755 | ||
147 | --- a/configure | ||
148 | +++ b/configure | ||
149 | @@ -XXX,XX +XXX,XX @@ seccomp="" | ||
150 | glusterfs="" | ||
151 | glusterfs_xlator_opt="no" | ||
152 | glusterfs_discard="no" | ||
153 | +glusterfs_fallocate="no" | ||
154 | glusterfs_zerofill="no" | ||
155 | gtk="" | ||
156 | gtkabi="" | ||
157 | @@ -XXX,XX +XXX,XX @@ if test "$glusterfs" != "no" ; then | ||
158 | glusterfs_discard="yes" | ||
159 | fi | ||
160 | if $pkg_config --atleast-version=6 glusterfs-api; then | ||
161 | + glusterfs_fallocate="yes" | ||
162 | glusterfs_zerofill="yes" | ||
163 | fi | ||
164 | else | ||
165 | @@ -XXX,XX +XXX,XX @@ if test "$glusterfs_discard" = "yes" ; then | ||
166 | echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak | ||
167 | fi | ||
168 | |||
169 | +if test "$glusterfs_fallocate" = "yes" ; then | ||
170 | + echo "CONFIG_GLUSTERFS_FALLOCATE=y" >> $config_host_mak | ||
171 | +fi | ||
172 | + | 109 | + |
173 | if test "$glusterfs_zerofill" = "yes" ; then | 110 | + /* |
174 | echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak | 111 | + * Set ->dataplane_started to false before draining so that host notifiers |
175 | fi | 112 | + * are not detached/attached anymore. |
113 | + */ | ||
114 | + vblk->dataplane_started = false; | ||
115 | + | ||
116 | aio_context_acquire(s->ctx); | ||
117 | |||
118 | /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ | ||
119 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) | ||
120 | |||
121 | aio_context_release(s->ctx); | ||
122 | |||
123 | - /* | ||
124 | - * Batch all the host notifiers in a single transaction to avoid | ||
125 | - * quadratic time complexity in address_space_update_ioeventfds(). | ||
126 | - */ | ||
127 | - memory_region_transaction_begin(); | ||
128 | - | ||
129 | - for (i = 0; i < nvqs; i++) { | ||
130 | - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | ||
131 | - } | ||
132 | - | ||
133 | - /* | ||
134 | - * The transaction expects the ioeventfds to be open when it | ||
135 | - * commits. Do it now, before the cleanup loop. | ||
136 | - */ | ||
137 | - memory_region_transaction_commit(); | ||
138 | - | ||
139 | - for (i = 0; i < nvqs; i++) { | ||
140 | - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); | ||
141 | - } | ||
142 | - | ||
143 | qemu_bh_cancel(s->bh); | ||
144 | notify_guest_bh(s); /* final chance to notify guest */ | ||
145 | |||
146 | /* Clean up guest notifier (irq) */ | ||
147 | k->set_guest_notifiers(qbus->parent, nvqs, false); | ||
148 | |||
149 | - vblk->dataplane_started = false; | ||
150 | s->stopping = false; | ||
151 | } | ||
176 | -- | 152 | -- |
177 | 2.9.3 | 153 | 2.40.1 |
178 | 154 | ||
179 | 155 | diff view generated by jsdifflib |