1 | The following changes since commit 41feb5b955f0d7c9d071b2c5adbc404ae2895c7a: | 1 | The following changes since commit 887cba855bb6ff4775256f7968409281350b568c: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-3.0-pull-request' into staging (2018-06-05 10:38:33 +0100) | 3 | configure: Fix cross-building for RISCV host (v5) (2023-07-11 17:56:09 +0100) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 68acc99f143b60ec4faa2903065b187d4d3c4bf3: | 9 | for you to fetch changes up to 75dcb4d790bbe5327169fd72b185960ca58e2fa6: |
10 | 10 | ||
11 | sheepdog: remove huge BSS object (2018-06-05 10:15:12 -0400) | 11 | virtio-blk: fix host notifier issues during dataplane start/stop (2023-07-12 15:20:32 -0400) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Sheepdog patches | 14 | Pull request |
15 | |||
15 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
16 | 17 | ||
17 | Paolo Bonzini (2): | 18 | Stefan Hajnoczi (1): |
18 | sheepdog: cleanup repeated expression | 19 | virtio-blk: fix host notifier issues during dataplane start/stop |
19 | sheepdog: remove huge BSS object | ||
20 | 20 | ||
21 | block/sheepdog.c | 28 +++++++++++++++------------- | 21 | hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++-------------- |
22 | 1 file changed, 15 insertions(+), 13 deletions(-) | 22 | 1 file changed, 38 insertions(+), 29 deletions(-) |
23 | 23 | ||
24 | -- | 24 | -- |
25 | 2.13.6 | 25 | 2.40.1 |
26 | |||
27 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | The main loop thread can consume 100% CPU when using --device |
---|---|---|---|
2 | virtio-blk-pci,iothread=<iothread>. ppoll() constantly returns but | ||
3 | reading virtqueue host notifiers fails with EAGAIN. The file descriptors | ||
4 | are stale and remain registered with the AioContext because of bugs in | ||
5 | the virtio-blk dataplane start/stop code. | ||
2 | 6 | ||
3 | The expression "SD_INODE_SIZE - sizeof(inode.data_vdi_id)" already has a macro | 7 | The problem is that the dataplane start/stop code involves drain |
4 | defined for the same value (though with a nicer definition using offsetof). | 8 | operations, which call virtio_blk_drained_begin() and |
5 | Replace it. | 9 | virtio_blk_drained_end() at points where the host notifier is not |
10 | operational: | ||
11 | - In virtio_blk_data_plane_start(), blk_set_aio_context() drains after | ||
12 | vblk->dataplane_started has been set to true but the host notifier has | ||
13 | not been attached yet. | ||
14 | - In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context() | ||
15 | drain after the host notifier has already been detached but with | ||
16 | vblk->dataplane_started still set to true. | ||
6 | 17 | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 18 | I would like to simplify ->ioeventfd_start/stop() to avoid interactions |
8 | Reviewed-by: Fam Zheng <famz@redhat.com> | 19 | with drain entirely, but couldn't find a way to do that. Instead, this |
9 | Reviewed-by: Jeff Cody <jcody@redhat.com> | 20 | patch accepts the fragile nature of the code and reorders it so that |
10 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 21 | vblk->dataplane_started is false during drain operations. This way the |
11 | Message-Id: <20180523160721.14018-2-pbonzini@redhat.com> | 22 | virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't |
12 | Signed-off-by: Jeff Cody <jcody@redhat.com> | 23 | touch the host notifier. The result is that |
24 | virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have | ||
25 | complete control over the host notifier and stale file descriptors are | ||
26 | no longer left in the AioContext. | ||
27 | |||
28 | This patch fixes the 100% CPU consumption in the main loop thread and | ||
29 | correctly moves host notifier processing to the IOThread. | ||
30 | |||
31 | Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()") | ||
32 | Reported-by: Lukáš Doktor <ldoktor@redhat.com> | ||
33 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
34 | Tested-by: Lukas Doktor <ldoktor@redhat.com> | ||
35 | Message-id: 20230704151527.193586-1-stefanha@redhat.com | ||
36 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | --- | 37 | --- |
14 | block/sheepdog.c | 6 +++--- | 38 | hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++-------------- |
15 | 1 file changed, 3 insertions(+), 3 deletions(-) | 39 | 1 file changed, 38 insertions(+), 29 deletions(-) |
16 | 40 | ||
17 | diff --git a/block/sheepdog.c b/block/sheepdog.c | 41 | diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c |
18 | index XXXXXXX..XXXXXXX 100644 | 42 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/block/sheepdog.c | 43 | --- a/hw/block/dataplane/virtio-blk.c |
20 | +++ b/block/sheepdog.c | 44 | +++ b/hw/block/dataplane/virtio-blk.c |
21 | @@ -XXX,XX +XXX,XX @@ static int sd_truncate(BlockDriverState *bs, int64_t offset, | 45 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) |
46 | |||
47 | memory_region_transaction_commit(); | ||
48 | |||
49 | - /* | ||
50 | - * These fields are visible to the IOThread so we rely on implicit barriers | ||
51 | - * in aio_context_acquire() on the write side and aio_notify_accept() on | ||
52 | - * the read side. | ||
53 | - */ | ||
54 | - s->starting = false; | ||
55 | - vblk->dataplane_started = true; | ||
56 | trace_virtio_blk_data_plane_start(s); | ||
57 | |||
58 | old_context = blk_get_aio_context(s->conf->conf.blk); | ||
59 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) | ||
60 | event_notifier_set(virtio_queue_get_host_notifier(vq)); | ||
22 | } | 61 | } |
23 | 62 | ||
24 | /* we don't need to update entire object */ | 63 | + /* |
25 | - datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); | 64 | + * These fields must be visible to the IOThread when it processes the |
26 | + datalen = SD_INODE_HEADER_SIZE; | 65 | + * virtqueue, otherwise it will think dataplane has not started yet. |
27 | s->inode.vdi_size = offset; | 66 | + * |
28 | ret = write_object(fd, s->bs, (char *)&s->inode, | 67 | + * Make sure ->dataplane_started is false when blk_set_aio_context() is |
29 | vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies, | 68 | + * called above so that draining does not cause the host notifier to be |
30 | @@ -XXX,XX +XXX,XX @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) | 69 | + * detached/attached prematurely. |
31 | */ | 70 | + */ |
32 | strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag)); | 71 | + s->starting = false; |
33 | /* we don't need to update entire object */ | 72 | + vblk->dataplane_started = true; |
34 | - datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); | 73 | + smp_wmb(); /* paired with aio_notify_accept() on the read side */ |
35 | + datalen = SD_INODE_HEADER_SIZE; | 74 | + |
36 | inode = g_malloc(datalen); | 75 | /* Get this show started by hooking up our callbacks */ |
37 | 76 | if (!blk_in_drain(s->conf->conf.blk)) { | |
38 | /* refresh inode. */ | 77 | aio_context_acquire(s->ctx); |
39 | @@ -XXX,XX +XXX,XX @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) | 78 | @@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) |
40 | /* we don't need to read entire object */ | 79 | fail_guest_notifiers: |
41 | ret = read_object(fd, s->bs, (char *)&inode, | 80 | vblk->dataplane_disabled = true; |
42 | vid_to_vdi_oid(vid), | 81 | s->starting = false; |
43 | - 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0, | 82 | - vblk->dataplane_started = true; |
44 | + 0, SD_INODE_HEADER_SIZE, 0, | 83 | return -ENOSYS; |
45 | s->cache_flags); | 84 | } |
46 | 85 | ||
47 | if (ret) { | 86 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) |
87 | aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); | ||
88 | } | ||
89 | |||
90 | + /* | ||
91 | + * Batch all the host notifiers in a single transaction to avoid | ||
92 | + * quadratic time complexity in address_space_update_ioeventfds(). | ||
93 | + */ | ||
94 | + memory_region_transaction_begin(); | ||
95 | + | ||
96 | + for (i = 0; i < nvqs; i++) { | ||
97 | + virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | ||
98 | + } | ||
99 | + | ||
100 | + /* | ||
101 | + * The transaction expects the ioeventfds to be open when it | ||
102 | + * commits. Do it now, before the cleanup loop. | ||
103 | + */ | ||
104 | + memory_region_transaction_commit(); | ||
105 | + | ||
106 | + for (i = 0; i < nvqs; i++) { | ||
107 | + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); | ||
108 | + } | ||
109 | + | ||
110 | + /* | ||
111 | + * Set ->dataplane_started to false before draining so that host notifiers | ||
112 | + * are not detached/attached anymore. | ||
113 | + */ | ||
114 | + vblk->dataplane_started = false; | ||
115 | + | ||
116 | aio_context_acquire(s->ctx); | ||
117 | |||
118 | /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ | ||
119 | @@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) | ||
120 | |||
121 | aio_context_release(s->ctx); | ||
122 | |||
123 | - /* | ||
124 | - * Batch all the host notifiers in a single transaction to avoid | ||
125 | - * quadratic time complexity in address_space_update_ioeventfds(). | ||
126 | - */ | ||
127 | - memory_region_transaction_begin(); | ||
128 | - | ||
129 | - for (i = 0; i < nvqs; i++) { | ||
130 | - virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); | ||
131 | - } | ||
132 | - | ||
133 | - /* | ||
134 | - * The transaction expects the ioeventfds to be open when it | ||
135 | - * commits. Do it now, before the cleanup loop. | ||
136 | - */ | ||
137 | - memory_region_transaction_commit(); | ||
138 | - | ||
139 | - for (i = 0; i < nvqs; i++) { | ||
140 | - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); | ||
141 | - } | ||
142 | - | ||
143 | qemu_bh_cancel(s->bh); | ||
144 | notify_guest_bh(s); /* final chance to notify guest */ | ||
145 | |||
146 | /* Clean up guest notifier (irq) */ | ||
147 | k->set_guest_notifiers(qbus->parent, nvqs, false); | ||
148 | |||
149 | - vblk->dataplane_started = false; | ||
150 | s->stopping = false; | ||
151 | } | ||
48 | -- | 152 | -- |
49 | 2.13.6 | 153 | 2.40.1 |
50 | 154 | ||
51 | 155 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Paolo Bonzini <pbonzini@redhat.com> | ||
2 | 1 | ||
3 | block/sheepdog.o has a 4M static variable that is 90% of QEMU's whole .bss | ||
4 | section. Replace it with a heap-allocated block, and make it smaller too | ||
5 | since only the inode header is actually being used. | ||
6 | |||
7 | bss size goes down from 4464280 to 269976. | ||
8 | |||
9 | Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | ||
10 | Reviewed-by: Jeff Cody <jcody@redhat.com> | ||
11 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
12 | Message-Id: <20180523160721.14018-3-pbonzini@redhat.com> | ||
13 | Signed-off-by: Jeff Cody <jcody@redhat.com> | ||
14 | --- | ||
15 | block/sheepdog.c | 22 ++++++++++++---------- | ||
16 | 1 file changed, 12 insertions(+), 10 deletions(-) | ||
17 | |||
18 | diff --git a/block/sheepdog.c b/block/sheepdog.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/block/sheepdog.c | ||
21 | +++ b/block/sheepdog.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) | ||
23 | QEMUSnapshotInfo *sn_tab = NULL; | ||
24 | unsigned wlen, rlen; | ||
25 | int found = 0; | ||
26 | - static SheepdogInode inode; | ||
27 | + SheepdogInode *inode; | ||
28 | unsigned long *vdi_inuse; | ||
29 | unsigned int start_nr; | ||
30 | uint64_t hval; | ||
31 | uint32_t vid; | ||
32 | |||
33 | vdi_inuse = g_malloc(max); | ||
34 | + inode = g_malloc(SD_INODE_HEADER_SIZE); | ||
35 | |||
36 | fd = connect_to_sdog(s, &local_err); | ||
37 | if (fd < 0) { | ||
38 | @@ -XXX,XX +XXX,XX @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) | ||
39 | } | ||
40 | |||
41 | /* we don't need to read entire object */ | ||
42 | - ret = read_object(fd, s->bs, (char *)&inode, | ||
43 | + ret = read_object(fd, s->bs, (char *)inode, | ||
44 | vid_to_vdi_oid(vid), | ||
45 | 0, SD_INODE_HEADER_SIZE, 0, | ||
46 | s->cache_flags); | ||
47 | @@ -XXX,XX +XXX,XX @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) | ||
48 | continue; | ||
49 | } | ||
50 | |||
51 | - if (!strcmp(inode.name, s->name) && is_snapshot(&inode)) { | ||
52 | - sn_tab[found].date_sec = inode.snap_ctime >> 32; | ||
53 | - sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff; | ||
54 | - sn_tab[found].vm_state_size = inode.vm_state_size; | ||
55 | - sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec; | ||
56 | + if (!strcmp(inode->name, s->name) && is_snapshot(inode)) { | ||
57 | + sn_tab[found].date_sec = inode->snap_ctime >> 32; | ||
58 | + sn_tab[found].date_nsec = inode->snap_ctime & 0xffffffff; | ||
59 | + sn_tab[found].vm_state_size = inode->vm_state_size; | ||
60 | + sn_tab[found].vm_clock_nsec = inode->vm_clock_nsec; | ||
61 | |||
62 | snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), | ||
63 | - "%" PRIu32, inode.snap_id); | ||
64 | + "%" PRIu32, inode->snap_id); | ||
65 | pstrcpy(sn_tab[found].name, | ||
66 | - MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)), | ||
67 | - inode.tag); | ||
68 | + MIN(sizeof(sn_tab[found].name), sizeof(inode->tag)), | ||
69 | + inode->tag); | ||
70 | found++; | ||
71 | } | ||
72 | } | ||
73 | @@ -XXX,XX +XXX,XX @@ out: | ||
74 | *psn_tab = sn_tab; | ||
75 | |||
76 | g_free(vdi_inuse); | ||
77 | + g_free(inode); | ||
78 | |||
79 | if (ret < 0) { | ||
80 | return ret; | ||
81 | -- | ||
82 | 2.13.6 | ||
83 | |||
84 | diff view generated by jsdifflib |