1 | The following changes since commit f45cc81911adc7726e8a2801986b6998b91b816e: | 1 | The following changes since commit 352998df1c53b366413690d95b35f76d0721ebed: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20220307' into staging (2022-03-08 09:06:57 +0000) | 3 | Merge tag 'i2c-20220314' of https://github.com/philmd/qemu into staging (2022-03-14 14:39:33 +0000) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the git repository at: |
6 | 6 | ||
7 | https://github.com/jasowang/qemu.git tags/net-pull-request | 7 | https://github.com/jasowang/qemu.git tags/net-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to a10dd1e279fc56cebc7e738925e0db85d0cea089: | 9 | for you to fetch changes up to 12a195fa343aae2ead1301ce04727bd0ae25eb15: |
10 | 10 | ||
11 | vdpa: Expose VHOST_F_LOG_ALL on SVQ (2022-03-08 21:18:41 +0800) | 11 | vdpa: Expose VHOST_F_LOG_ALL on SVQ (2022-03-15 13:57:44 +0800) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | |||
15 | Changes since V2: | ||
16 | - fix 32bit build errros | ||
14 | 17 | ||
15 | ---------------------------------------------------------------- | 18 | ---------------------------------------------------------------- |
16 | Eugenio Pérez (14): | 19 | Eugenio Pérez (14): |
17 | vhost: Add VhostShadowVirtqueue | 20 | vhost: Add VhostShadowVirtqueue |
18 | vhost: Add Shadow VirtQueue kick forwarding capabilities | 21 | vhost: Add Shadow VirtQueue kick forwarding capabilities |
... | ... | ||
34 | 37 | ||
35 | hw/net/virtio-net.c | 1 + | 38 | hw/net/virtio-net.c | 1 + |
36 | hw/virtio/meson.build | 2 +- | 39 | hw/virtio/meson.build | 2 +- |
37 | hw/virtio/vhost-iova-tree.c | 110 +++++++ | 40 | hw/virtio/vhost-iova-tree.c | 110 +++++++ |
38 | hw/virtio/vhost-iova-tree.h | 27 ++ | 41 | hw/virtio/vhost-iova-tree.h | 27 ++ |
39 | hw/virtio/vhost-shadow-virtqueue.c | 637 +++++++++++++++++++++++++++++++++++++ | 42 | hw/virtio/vhost-shadow-virtqueue.c | 636 +++++++++++++++++++++++++++++++++++++ |
40 | hw/virtio/vhost-shadow-virtqueue.h | 87 +++++ | 43 | hw/virtio/vhost-shadow-virtqueue.h | 87 +++++ |
41 | hw/virtio/vhost-vdpa.c | 525 +++++++++++++++++++++++++++++- | 44 | hw/virtio/vhost-vdpa.c | 522 +++++++++++++++++++++++++++++- |
42 | include/hw/virtio/vhost-vdpa.h | 8 + | 45 | include/hw/virtio/vhost-vdpa.h | 8 + |
43 | include/qemu/iova-tree.h | 38 ++- | 46 | include/qemu/iova-tree.h | 38 ++- |
44 | util/iova-tree.c | 169 ++++++++++ | 47 | util/iova-tree.c | 170 ++++++++++ |
45 | 10 files changed, 1587 insertions(+), 17 deletions(-) | 48 | 10 files changed, 1584 insertions(+), 17 deletions(-) |
46 | create mode 100644 hw/virtio/vhost-iova-tree.c | 49 | create mode 100644 hw/virtio/vhost-iova-tree.c |
47 | create mode 100644 hw/virtio/vhost-iova-tree.h | 50 | create mode 100644 hw/virtio/vhost-iova-tree.h |
48 | create mode 100644 hw/virtio/vhost-shadow-virtqueue.c | 51 | create mode 100644 hw/virtio/vhost-shadow-virtqueue.c |
49 | create mode 100644 hw/virtio/vhost-shadow-virtqueue.h | 52 | create mode 100644 hw/virtio/vhost-shadow-virtqueue.h |
50 | 53 | ||
51 | 54 | ||
55 | diff view generated by jsdifflib |
1 | Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg") | 1 | Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg") |
---|---|---|---|
2 | tries to fix the use after free of the sg by caching the virtqueue | 2 | tries to fix the use after free of the sg by caching the virtqueue |
3 | elements in an array and unmap them at once after receiving the | 3 | elements in an array and unmap them at once after receiving the |
4 | packets, But it forgot to unmap the cached elements on error which | 4 | packets, But it forgot to unmap the cached elements on error which |
5 | will lead to leaking of mapping and other unexpected results. | 5 | will lead to leaking of mapping and other unexpected results. |
6 | 6 | ||
7 | Fixing this by detaching the cached elements on error. This addresses | 7 | Fixing this by detaching the cached elements on error. This addresses |
8 | CVE-2022-26353. | 8 | CVE-2022-26353. |
9 | 9 | ||
10 | Reported-by: Victor Tom <vv474172261@gmail.com> | 10 | Reported-by: Victor Tom <vv474172261@gmail.com> |
11 | Cc: qemu-stable@nongnu.org | 11 | Cc: qemu-stable@nongnu.org |
12 | Fixes: CVE-2022-26353 | 12 | Fixes: CVE-2022-26353 |
13 | Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg") | 13 | Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg") |
14 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | 14 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> |
15 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 15 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
16 | --- | 16 | --- |
17 | hw/net/virtio-net.c | 1 + | 17 | hw/net/virtio-net.c | 1 + |
18 | 1 file changed, 1 insertion(+) | 18 | 1 file changed, 1 insertion(+) |
19 | 19 | ||
20 | diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c | 20 | diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c |
21 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/net/virtio-net.c | 22 | --- a/hw/net/virtio-net.c |
23 | +++ b/hw/net/virtio-net.c | 23 | +++ b/hw/net/virtio-net.c |
24 | @@ -XXX,XX +XXX,XX @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, | 24 | @@ -XXX,XX +XXX,XX @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, |
25 | 25 | ||
26 | err: | 26 | err: |
27 | for (j = 0; j < i; j++) { | 27 | for (j = 0; j < i; j++) { |
28 | + virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); | 28 | + virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); |
29 | g_free(elems[j]); | 29 | g_free(elems[j]); |
30 | } | 30 | } |
31 | 31 | ||
32 | -- | 32 | -- |
33 | 2.7.4 | 33 | 2.7.4 | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
7 | do. | 7 | do. |
8 | 8 | ||
9 | This commit only exposes basic SVQ allocation and free. Next patches of | 9 | This commit only exposes basic SVQ allocation and free. Next patches of |
10 | the series add functionality like notifications and buffers forwarding. | 10 | the series add functionality like notifications and buffers forwarding. |
11 | 11 | ||
12 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
12 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 13 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
13 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
14 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 14 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
15 | --- | 15 | --- |
16 | hw/virtio/meson.build | 2 +- | 16 | hw/virtio/meson.build | 2 +- |
17 | hw/virtio/vhost-shadow-virtqueue.c | 62 ++++++++++++++++++++++++++++++++++++++ | 17 | hw/virtio/vhost-shadow-virtqueue.c | 62 ++++++++++++++++++++++++++++++++++++++ |
18 | hw/virtio/vhost-shadow-virtqueue.h | 28 +++++++++++++++++ | 18 | hw/virtio/vhost-shadow-virtqueue.h | 28 +++++++++++++++++ |
... | ... | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
4 | will just forward the guest's kicks to the device. | 4 | will just forward the guest's kicks to the device. |
5 | 5 | ||
6 | Host memory notifiers regions are left out for simplicity, and they will | 6 | Host memory notifiers regions are left out for simplicity, and they will |
7 | not be addressed in this series. | 7 | not be addressed in this series. |
8 | 8 | ||
9 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
9 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 10 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
10 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
11 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 11 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
12 | --- | 12 | --- |
13 | hw/virtio/vhost-shadow-virtqueue.c | 56 ++++++++++++++ | 13 | hw/virtio/vhost-shadow-virtqueue.c | 55 ++++++++++++++ |
14 | hw/virtio/vhost-shadow-virtqueue.h | 14 ++++ | 14 | hw/virtio/vhost-shadow-virtqueue.h | 14 ++++ |
15 | hw/virtio/vhost-vdpa.c | 145 ++++++++++++++++++++++++++++++++++++- | 15 | hw/virtio/vhost-vdpa.c | 144 ++++++++++++++++++++++++++++++++++++- |
16 | include/hw/virtio/vhost-vdpa.h | 4 + | 16 | include/hw/virtio/vhost-vdpa.h | 4 ++ |
17 | 4 files changed, 217 insertions(+), 2 deletions(-) | 17 | 4 files changed, 215 insertions(+), 2 deletions(-) |
18 | 18 | ||
19 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c | 19 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c |
20 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/hw/virtio/vhost-shadow-virtqueue.c | 21 | --- a/hw/virtio/vhost-shadow-virtqueue.c |
22 | +++ b/hw/virtio/vhost-shadow-virtqueue.c | 22 | +++ b/hw/virtio/vhost-shadow-virtqueue.c |
... | ... | ||
32 | + * | 32 | + * |
33 | + * @n: guest kick event notifier, the one that guest set to notify svq. | 33 | + * @n: guest kick event notifier, the one that guest set to notify svq. |
34 | + */ | 34 | + */ |
35 | +static void vhost_handle_guest_kick(EventNotifier *n) | 35 | +static void vhost_handle_guest_kick(EventNotifier *n) |
36 | +{ | 36 | +{ |
37 | + VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, | 37 | + VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick); |
38 | + svq_kick); | ||
39 | + event_notifier_test_and_clear(n); | 38 | + event_notifier_test_and_clear(n); |
40 | + event_notifier_set(&svq->hdev_kick); | 39 | + event_notifier_set(&svq->hdev_kick); |
41 | +} | 40 | +} |
42 | + | 41 | + |
43 | +/** | 42 | +/** |
... | ... | ||
194 | + ram_block_discard_disable(false); | 193 | + ram_block_discard_disable(false); |
195 | + return ret; | 194 | + return ret; |
196 | } | 195 | } |
197 | 196 | ||
198 | static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, | 197 | static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, |
199 | @@ -XXX,XX +XXX,XX @@ err: | 198 | @@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) |
200 | 199 | ||
201 | static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) | 200 | static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) |
202 | { | 201 | { |
203 | + struct vhost_vdpa *v = dev->opaque; | 202 | + struct vhost_vdpa *v = dev->opaque; |
204 | int i; | 203 | int i; |
... | ... | ||
286 | + * @svq: The shadow virtqueue | 285 | + * @svq: The shadow virtqueue |
287 | + * @idx: The index of the virtqueue in the vhost device | 286 | + * @idx: The index of the virtqueue in the vhost device |
288 | + * @errp: Error | 287 | + * @errp: Error |
289 | + */ | 288 | + */ |
290 | +static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, | 289 | +static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, |
291 | + VhostShadowVirtqueue *svq, | 290 | + VhostShadowVirtqueue *svq, unsigned idx, |
292 | + unsigned idx, | ||
293 | + Error **errp) | 291 | + Error **errp) |
294 | +{ | 292 | +{ |
295 | + struct vhost_vring_file file = { | 293 | + struct vhost_vring_file file = { |
296 | + .index = dev->vq_index + idx, | 294 | + .index = dev->vq_index + idx, |
297 | + }; | 295 | + }; |
... | ... | diff view generated by jsdifflib |
1 | From: Eugenio Pérez <eperezma@redhat.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This will make qemu aware of the device used buffers, allowing it to | 3 | This will make qemu aware of the device used buffers, allowing it to |
4 | write the guest memory with its contents if needed. | 4 | write the guest memory with its contents if needed. |
5 | 5 | ||
6 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
6 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 7 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
7 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 8 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
9 | --- | 9 | --- |
10 | hw/virtio/vhost-shadow-virtqueue.c | 38 ++++++++++++++++++++++++++++++++++++++ | 10 | hw/virtio/vhost-shadow-virtqueue.c | 38 ++++++++++++++++++++++++++++++++++++++ |
11 | hw/virtio/vhost-shadow-virtqueue.h | 4 ++++ | 11 | hw/virtio/vhost-shadow-virtqueue.h | 4 ++++ |
12 | hw/virtio/vhost-vdpa.c | 31 +++++++++++++++++++++++++++++-- | 12 | hw/virtio/vhost-vdpa.c | 31 +++++++++++++++++++++++++++++-- |
... | ... | ||
118 | + * | 118 | + * |
119 | + * Note that this function does not rewind kick file descriptor if cannot set | 119 | + * Note that this function does not rewind kick file descriptor if cannot set |
120 | + * call one. | 120 | + * call one. |
121 | */ | 121 | */ |
122 | static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, | 122 | static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, |
123 | VhostShadowVirtqueue *svq, | 123 | VhostShadowVirtqueue *svq, unsigned idx, |
124 | @@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, | 124 | @@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, |
125 | r = vhost_vdpa_set_vring_dev_kick(dev, &file); | 125 | r = vhost_vdpa_set_vring_dev_kick(dev, &file); |
126 | if (unlikely(r != 0)) { | 126 | if (unlikely(r != 0)) { |
127 | error_setg_errno(errp, -r, "Can't set device kick fd"); | 127 | error_setg_errno(errp, -r, "Can't set device kick fd"); |
128 | + return false; | 128 | + return false; |
... | ... | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
8 | 8 | ||
9 | Future changes can add support to offer more features to the guest, | 9 | Future changes can add support to offer more features to the guest, |
10 | since the use of VirtQueue gives this for free. This is left out at the | 10 | since the use of VirtQueue gives this for free. This is left out at the |
11 | moment for simplicity. | 11 | moment for simplicity. |
12 | 12 | ||
13 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
13 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 14 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
14 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
15 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 15 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
16 | --- | 16 | --- |
17 | hw/virtio/vhost-shadow-virtqueue.c | 44 ++++++++++++++++++++++++++++++++++++++ | 17 | hw/virtio/vhost-shadow-virtqueue.c | 44 ++++++++++++++++++++++++++++++++++++++ |
18 | hw/virtio/vhost-shadow-virtqueue.h | 2 ++ | 18 | hw/virtio/vhost-shadow-virtqueue.h | 2 ++ |
19 | hw/virtio/vhost-vdpa.c | 15 +++++++++++++ | 19 | hw/virtio/vhost-vdpa.c | 15 +++++++++++++ |
... | ... | ||
52 | + case VIRTIO_F_ACCESS_PLATFORM: | 52 | + case VIRTIO_F_ACCESS_PLATFORM: |
53 | + /* SVQ trust in the host's IOMMU to translate addresses */ | 53 | + /* SVQ trust in the host's IOMMU to translate addresses */ |
54 | + case VIRTIO_F_VERSION_1: | 54 | + case VIRTIO_F_VERSION_1: |
55 | + /* SVQ trust that the guest vring is little endian */ | 55 | + /* SVQ trust that the guest vring is little endian */ |
56 | + if (!(svq_features & BIT_ULL(b))) { | 56 | + if (!(svq_features & BIT_ULL(b))) { |
57 | + set_bit(b, &svq_features); | 57 | + svq_features |= BIT_ULL(b); |
58 | + ok = false; | 58 | + ok = false; |
59 | + } | 59 | + } |
60 | + continue; | 60 | + continue; |
61 | + | 61 | + |
62 | + default: | 62 | + default: |
63 | + if (svq_features & BIT_ULL(b)) { | 63 | + if (svq_features & BIT_ULL(b)) { |
64 | + clear_bit(b, &svq_features); | 64 | + svq_features &= ~BIT_ULL(b); |
65 | + ok = false; | 65 | + ok = false; |
66 | + } | 66 | + } |
67 | + } | 67 | + } |
68 | + } | 68 | + } |
69 | + | 69 | + |
... | ... | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
4 | 4 | ||
5 | Since this will be different from the guest's vaddr, but the device can | 5 | Since this will be different from the guest's vaddr, but the device can |
6 | access it, SVQ takes special care about its alignment & lack of garbage | 6 | access it, SVQ takes special care about its alignment & lack of garbage |
7 | data. It assumes that IOMMU will work in host_page_size ranges for that. | 7 | data. It assumes that IOMMU will work in host_page_size ranges for that. |
8 | 8 | ||
9 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
9 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 10 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
10 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
11 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 11 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
12 | --- | 12 | --- |
13 | hw/virtio/vhost-shadow-virtqueue.c | 29 +++++++++++++++++++++++++++++ | 13 | hw/virtio/vhost-shadow-virtqueue.c | 29 +++++++++++++++++++++++++++++ |
14 | hw/virtio/vhost-shadow-virtqueue.h | 9 +++++++++ | 14 | hw/virtio/vhost-shadow-virtqueue.h | 9 +++++++++ |
15 | 2 files changed, 38 insertions(+) | 15 | 2 files changed, 38 insertions(+) |
... | ... | ||
27 | + * @addr: Destination to store address | 27 | + * @addr: Destination to store address |
28 | + */ | 28 | + */ |
29 | +void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, | 29 | +void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, |
30 | + struct vhost_vring_addr *addr) | 30 | + struct vhost_vring_addr *addr) |
31 | +{ | 31 | +{ |
32 | + addr->desc_user_addr = (uint64_t)svq->vring.desc; | 32 | + addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc; |
33 | + addr->avail_user_addr = (uint64_t)svq->vring.avail; | 33 | + addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail; |
34 | + addr->used_user_addr = (uint64_t)svq->vring.used; | 34 | + addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used; |
35 | +} | 35 | +} |
36 | + | 36 | + |
37 | +size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) | 37 | +size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) |
38 | +{ | 38 | +{ |
39 | + size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; | 39 | + size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; |
... | ... | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
3 | First half of the buffers forwarding part, preparing vhost-vdpa | 3 | First half of the buffers forwarding part, preparing vhost-vdpa |
4 | callbacks to SVQ to offer it. QEMU cannot enable it at this moment, so | 4 | callbacks to SVQ to offer it. QEMU cannot enable it at this moment, so |
5 | this is effectively dead code at the moment, but it helps to reduce | 5 | this is effectively dead code at the moment, but it helps to reduce |
6 | patch size. | 6 | patch size. |
7 | 7 | ||
8 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
8 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 9 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
9 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
10 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 10 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
11 | --- | 11 | --- |
12 | hw/virtio/vhost-vdpa.c | 48 +++++++++++++++++++++++++++++++++++++++++------- | 12 | hw/virtio/vhost-vdpa.c | 48 +++++++++++++++++++++++++++++++++++++++++------- |
13 | 1 file changed, 41 insertions(+), 7 deletions(-) | 13 | 1 file changed, 41 insertions(+), 7 deletions(-) |
14 | 14 | ||
... | ... | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
20 | emulated virtio devices, it may cause unexpected SVQ queue full. This | 20 | emulated virtio devices, it may cause unexpected SVQ queue full. This |
21 | patch also solves it by checking for this condition at both guest's | 21 | patch also solves it by checking for this condition at both guest's |
22 | kicks and device's calls. The code may be more elegant in the future if | 22 | kicks and device's calls. The code may be more elegant in the future if |
23 | SVQ code runs in its own iocontext. | 23 | SVQ code runs in its own iocontext. |
24 | 24 | ||
25 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
25 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 26 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
26 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
27 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 27 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
28 | --- | 28 | --- |
29 | hw/virtio/vhost-shadow-virtqueue.c | 353 ++++++++++++++++++++++++++++++++++++- | 29 | hw/virtio/vhost-shadow-virtqueue.c | 352 ++++++++++++++++++++++++++++++++++++- |
30 | hw/virtio/vhost-shadow-virtqueue.h | 26 +++ | 30 | hw/virtio/vhost-shadow-virtqueue.h | 26 +++ |
31 | hw/virtio/vhost-vdpa.c | 159 ++++++++++++++++- | 31 | hw/virtio/vhost-vdpa.c | 155 +++++++++++++++- |
32 | 3 files changed, 526 insertions(+), 12 deletions(-) | 32 | 3 files changed, 522 insertions(+), 11 deletions(-) |
33 | 33 | ||
34 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c | 34 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c |
35 | index XXXXXXX..XXXXXXX 100644 | 35 | index XXXXXXX..XXXXXXX 100644 |
36 | --- a/hw/virtio/vhost-shadow-virtqueue.c | 36 | --- a/hw/virtio/vhost-shadow-virtqueue.c |
37 | +++ b/hw/virtio/vhost-shadow-virtqueue.c | 37 | +++ b/hw/virtio/vhost-shadow-virtqueue.c |
38 | @@ -XXX,XX +XXX,XX @@ | 38 | @@ -XXX,XX +XXX,XX @@ |
39 | #include "qemu/error-report.h" | 39 | #include "qemu/error-report.h" |
40 | #include "qapi/error.h" | 40 | #include "qapi/error.h" |
41 | #include "qemu/main-loop.h" | 41 | #include "qemu/main-loop.h" |
42 | +#include "qemu/log.h" | 42 | +#include "qemu/log.h" |
43 | +#include "qemu/memalign.h" | ||
43 | #include "linux-headers/linux/vhost.h" | 44 | #include "linux-headers/linux/vhost.h" |
44 | 45 | ||
45 | /** | 46 | /** |
46 | @@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp) | 47 | @@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp) |
47 | } | 48 | } |
... | ... | ||
56 | +{ | 57 | +{ |
57 | + return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx); | 58 | + return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx); |
58 | +} | 59 | +} |
59 | + | 60 | + |
60 | +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, | 61 | +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, |
61 | + const struct iovec *iovec, | 62 | + const struct iovec *iovec, size_t num, |
62 | + size_t num, bool more_descs, bool write) | 63 | + bool more_descs, bool write) |
63 | +{ | 64 | +{ |
64 | + uint16_t i = svq->free_head, last = svq->free_head; | 65 | + uint16_t i = svq->free_head, last = svq->free_head; |
65 | + unsigned n; | 66 | + unsigned n; |
66 | + uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; | 67 | + uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; |
67 | + vring_desc_t *descs = svq->vring.desc; | 68 | + vring_desc_t *descs = svq->vring.desc; |
... | ... | ||
74 | + if (more_descs || (n + 1 < num)) { | 75 | + if (more_descs || (n + 1 < num)) { |
75 | + descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); | 76 | + descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); |
76 | + } else { | 77 | + } else { |
77 | + descs[i].flags = flags; | 78 | + descs[i].flags = flags; |
78 | + } | 79 | + } |
79 | + descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base); | 80 | + descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base); |
80 | + descs[i].len = cpu_to_le32(iovec[n].iov_len); | 81 | + descs[i].len = cpu_to_le32(iovec[n].iov_len); |
81 | + | 82 | + |
82 | + last = i; | 83 | + last = i; |
83 | + i = cpu_to_le16(descs[i].next); | 84 | + i = cpu_to_le16(descs[i].next); |
84 | + } | 85 | + } |
85 | + | 86 | + |
86 | + svq->free_head = le16_to_cpu(descs[last].next); | 87 | + svq->free_head = le16_to_cpu(descs[last].next); |
87 | +} | 88 | +} |
88 | + | 89 | + |
89 | +static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, | 90 | +static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, |
90 | + VirtQueueElement *elem, | 91 | + VirtQueueElement *elem, unsigned *head) |
91 | + unsigned *head) | ||
92 | +{ | 92 | +{ |
93 | + unsigned avail_idx; | 93 | + unsigned avail_idx; |
94 | + vring_avail_t *avail = svq->vring.avail; | 94 | + vring_avail_t *avail = svq->vring.avail; |
95 | + | 95 | + |
96 | + *head = svq->free_head; | 96 | + *head = svq->free_head; |
97 | + | 97 | + |
98 | + /* We need some descriptors here */ | 98 | + /* We need some descriptors here */ |
99 | + if (unlikely(!elem->out_num && !elem->in_num)) { | 99 | + if (unlikely(!elem->out_num && !elem->in_num)) { |
100 | + qemu_log_mask(LOG_GUEST_ERROR, | 100 | + qemu_log_mask(LOG_GUEST_ERROR, |
101 | + "Guest provided element with no descriptors"); | 101 | + "Guest provided element with no descriptors"); |
102 | + return false; | 102 | + return false; |
103 | + } | 103 | + } |
104 | + | 104 | + |
105 | + vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, | 105 | + vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0, |
106 | + elem->in_num > 0, false); | 106 | + false); |
107 | + vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); | 107 | + vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); |
108 | + | 108 | + |
109 | + /* | 109 | + /* |
110 | + * Put the entry in the available array (but don't update avail->idx until | 110 | + * Put the entry in the available array (but don't update avail->idx until |
111 | + * they do sync). | 111 | + * they do sync). |
... | ... | ||
181 | + | 181 | + |
182 | + if (!elem) { | 182 | + if (!elem) { |
183 | + break; | 183 | + break; |
184 | + } | 184 | + } |
185 | + | 185 | + |
186 | + if (elem->out_num + elem->in_num > | 186 | + if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { |
187 | + vhost_svq_available_slots(svq)) { | ||
188 | + /* | 187 | + /* |
189 | + * This condition is possible since a contiguous buffer in GPA | 188 | + * This condition is possible since a contiguous buffer in GPA |
190 | + * does not imply a contiguous buffer in qemu's VA | 189 | + * does not imply a contiguous buffer in qemu's VA |
191 | + * scatter-gather segments. If that happens, the buffer exposed | 190 | + * scatter-gather segments. If that happens, the buffer exposed |
192 | + * to the device needs to be a chain of descriptors at this | 191 | + * to the device needs to be a chain of descriptors at this |
... | ... | ||
218 | * @n: guest kick event notifier, the one that guest set to notify svq. | 217 | * @n: guest kick event notifier, the one that guest set to notify svq. |
219 | */ | 218 | */ |
220 | -static void vhost_handle_guest_kick(EventNotifier *n) | 219 | -static void vhost_handle_guest_kick(EventNotifier *n) |
221 | +static void vhost_handle_guest_kick_notifier(EventNotifier *n) | 220 | +static void vhost_handle_guest_kick_notifier(EventNotifier *n) |
222 | { | 221 | { |
223 | VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, | 222 | VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick); |
224 | svq_kick); | ||
225 | event_notifier_test_and_clear(n); | 223 | event_notifier_test_and_clear(n); |
226 | - event_notifier_set(&svq->hdev_kick); | 224 | - event_notifier_set(&svq->hdev_kick); |
227 | + vhost_handle_guest_kick(svq); | 225 | + vhost_handle_guest_kick(svq); |
228 | +} | 226 | +} |
229 | + | 227 | + |
... | ... | ||
485 | @@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev, | 483 | @@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev, |
486 | * Note that this function does not rewind kick file descriptor if cannot set | 484 | * Note that this function does not rewind kick file descriptor if cannot set |
487 | * call one. | 485 | * call one. |
488 | */ | 486 | */ |
489 | -static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, | 487 | -static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, |
490 | - VhostShadowVirtqueue *svq, | 488 | - VhostShadowVirtqueue *svq, unsigned idx, |
491 | - unsigned idx, | ||
492 | - Error **errp) | 489 | - Error **errp) |
493 | +static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, | 490 | +static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, |
494 | + VhostShadowVirtqueue *svq, | 491 | + VhostShadowVirtqueue *svq, unsigned idx, |
495 | + unsigned idx, | ||
496 | + Error **errp) | 492 | + Error **errp) |
497 | { | 493 | { |
498 | struct vhost_vring_file file = { | 494 | struct vhost_vring_file file = { |
499 | .index = dev->vq_index + idx, | 495 | .index = dev->vq_index + idx, |
500 | @@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, | 496 | @@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, |
... | ... | ||
565 | + | 561 | + |
566 | + ERRP_GUARD(); | 562 | + ERRP_GUARD(); |
567 | + vhost_svq_get_vring_addr(svq, addr); | 563 | + vhost_svq_get_vring_addr(svq, addr); |
568 | + | 564 | + |
569 | + r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size, | 565 | + r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size, |
570 | + (void *)addr->desc_user_addr, true); | 566 | + (void *)(uintptr_t)addr->desc_user_addr, true); |
571 | + if (unlikely(r != 0)) { | 567 | + if (unlikely(r != 0)) { |
572 | + error_setg_errno(errp, -r, "Cannot create vq driver region: "); | 568 | + error_setg_errno(errp, -r, "Cannot create vq driver region: "); |
573 | + return false; | 569 | + return false; |
574 | + } | 570 | + } |
575 | + | 571 | + |
576 | + r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size, | 572 | + r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size, |
577 | + (void *)addr->used_user_addr, false); | 573 | + (void *)(intptr_t)addr->used_user_addr, false); |
578 | + if (unlikely(r != 0)) { | 574 | + if (unlikely(r != 0)) { |
579 | + error_setg_errno(errp, -r, "Cannot create vq device region: "); | 575 | + error_setg_errno(errp, -r, "Cannot create vq device region: "); |
580 | + } | 576 | + } |
581 | + | 577 | + |
582 | + return r == 0; | 578 | + return r == 0; |
583 | +} | 579 | +} |
584 | + | 580 | + |
585 | +static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, | 581 | +static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, |
586 | + VhostShadowVirtqueue *svq, | 582 | + VhostShadowVirtqueue *svq, unsigned idx, |
587 | + unsigned idx, | ||
588 | + Error **errp) | 583 | + Error **errp) |
589 | +{ | 584 | +{ |
590 | + uint16_t vq_index = dev->vq_index + idx; | 585 | + uint16_t vq_index = dev->vq_index + idx; |
591 | + struct vhost_vring_state s = { | 586 | + struct vhost_vring_state s = { |
592 | + .index = vq_index, | 587 | + .index = vq_index, |
... | ... | ||
659 | + if (!v->shadow_vqs) { | 654 | + if (!v->shadow_vqs) { |
660 | + return true; | 655 | + return true; |
661 | + } | 656 | + } |
662 | + | 657 | + |
663 | + for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { | 658 | + for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { |
664 | + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, | 659 | + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); |
665 | + i); | ||
666 | + bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); | 660 | + bool ok = vhost_vdpa_svq_unmap_rings(dev, svq); |
667 | + if (unlikely(!ok)) { | 661 | + if (unlikely(!ok)) { |
668 | return false; | 662 | return false; |
669 | } | 663 | } |
670 | } | 664 | } |
... | ... | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
6 | 6 | ||
7 | It's usage is mainly to allow devices to access qemu address space, | 7 | It's usage is mainly to allow devices to access qemu address space, |
8 | remapping guest's one into a new iova space where qemu can add chunks of | 8 | remapping guest's one into a new iova space where qemu can add chunks of |
9 | addresses. | 9 | addresses. |
10 | 10 | ||
11 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | ||
12 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | 11 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> |
13 | Reviewed-by: Peter Xu <peterx@redhat.com> | 12 | Reviewed-by: Peter Xu <peterx@redhat.com> |
13 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | ||
14 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 14 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
15 | --- | 15 | --- |
16 | include/qemu/iova-tree.h | 18 +++++++ | 16 | include/qemu/iova-tree.h | 18 +++++++ |
17 | util/iova-tree.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++ | 17 | util/iova-tree.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++ |
18 | 2 files changed, 153 insertions(+) | 18 | 2 files changed, 154 insertions(+) |
19 | 19 | ||
20 | diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h | 20 | diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h |
21 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/include/qemu/iova-tree.h | 22 | --- a/include/qemu/iova-tree.h |
23 | +++ b/include/qemu/iova-tree.h | 23 | +++ b/include/qemu/iova-tree.h |
... | ... | ||
87 | + * | 87 | + * |
88 | + * @args: The alloc arguments | 88 | + * @args: The alloc arguments |
89 | + * @next: The next mapping in the tree. Can be NULL to signal the last one | 89 | + * @next: The next mapping in the tree. Can be NULL to signal the last one |
90 | + */ | 90 | + */ |
91 | +static void iova_tree_alloc_args_iterate(struct IOVATreeAllocArgs *args, | 91 | +static void iova_tree_alloc_args_iterate(struct IOVATreeAllocArgs *args, |
92 | + const DMAMap *next) { | 92 | + const DMAMap *next) |
93 | +{ | ||
93 | + args->prev = args->this; | 94 | + args->prev = args->this; |
94 | + args->this = next; | 95 | + args->this = next; |
95 | +} | 96 | +} |
96 | + | 97 | + |
97 | static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data) | 98 | static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data) |
... | ... | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
4 | a mapping that match a translated address so we can do the reverse. | 4 | a mapping that match a translated address so we can do the reverse. |
5 | 5 | ||
6 | This have linear complexity instead of logarithmic, but it supports | 6 | This have linear complexity instead of logarithmic, but it supports |
7 | overlapping HVA. Future developments could reduce it. | 7 | overlapping HVA. Future developments could reduce it. |
8 | 8 | ||
9 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
9 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 10 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
10 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
11 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 11 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
12 | --- | 12 | --- |
13 | include/qemu/iova-tree.h | 20 +++++++++++++++++++- | 13 | include/qemu/iova-tree.h | 20 +++++++++++++++++++- |
14 | util/iova-tree.c | 34 ++++++++++++++++++++++++++++++++++ | 14 | util/iova-tree.c | 34 ++++++++++++++++++++++++++++++++++ |
15 | 2 files changed, 53 insertions(+), 1 deletion(-) | 15 | 2 files changed, 53 insertions(+), 1 deletion(-) |
... | ... | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
13 | 13 | ||
14 | It duplicates the tree so it can search efficiently in both directions, | 14 | It duplicates the tree so it can search efficiently in both directions, |
15 | and it will signal overlap if iova or the translated address is present | 15 | and it will signal overlap if iova or the translated address is present |
16 | in any tree. | 16 | in any tree. |
17 | 17 | ||
18 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
18 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 19 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
19 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
20 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 20 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
21 | --- | 21 | --- |
22 | hw/virtio/meson.build | 2 +- | 22 | hw/virtio/meson.build | 2 +- |
23 | hw/virtio/vhost-iova-tree.c | 110 ++++++++++++++++++++++++++++++++++++++++++++ | 23 | hw/virtio/vhost-iova-tree.c | 110 ++++++++++++++++++++++++++++++++++++++++++++ |
24 | hw/virtio/vhost-iova-tree.h | 27 +++++++++++ | 24 | hw/virtio/vhost-iova-tree.h | 27 +++++++++++ |
... | ... | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
5 | Only introduce usage here, not allocation and deallocation. As with | 5 | Only introduce usage here, not allocation and deallocation. As with |
6 | previous patches, we use the dead code paths of shadow_vqs_enabled to | 6 | previous patches, we use the dead code paths of shadow_vqs_enabled to |
7 | avoid commiting too many changes at once. These are impossible to take | 7 | avoid commiting too many changes at once. These are impossible to take |
8 | at the moment. | 8 | at the moment. |
9 | 9 | ||
10 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
10 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 11 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
11 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
12 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 12 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
13 | --- | 13 | --- |
14 | hw/virtio/vhost-shadow-virtqueue.c | 75 +++++++++++++++++++++-- | 14 | hw/virtio/vhost-shadow-virtqueue.c | 86 +++++++++++++++++++++++--- |
15 | hw/virtio/vhost-shadow-virtqueue.h | 6 +- | 15 | hw/virtio/vhost-shadow-virtqueue.h | 6 +- |
16 | hw/virtio/vhost-vdpa.c | 122 +++++++++++++++++++++++++++++++------ | 16 | hw/virtio/vhost-vdpa.c | 122 +++++++++++++++++++++++++++++++------ |
17 | include/hw/virtio/vhost-vdpa.h | 3 + | 17 | include/hw/virtio/vhost-vdpa.h | 3 + |
18 | 4 files changed, 181 insertions(+), 25 deletions(-) | 18 | 4 files changed, 187 insertions(+), 30 deletions(-) |
19 | 19 | ||
20 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c | 20 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c |
21 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/hw/virtio/vhost-shadow-virtqueue.c | 22 | --- a/hw/virtio/vhost-shadow-virtqueue.c |
23 | +++ b/hw/virtio/vhost-shadow-virtqueue.c | 23 | +++ b/hw/virtio/vhost-shadow-virtqueue.c |
24 | @@ -XXX,XX +XXX,XX @@ static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) | 24 | @@ -XXX,XX +XXX,XX @@ static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) |
25 | return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx); | 25 | return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx); |
26 | } | 26 | } |
27 | 27 | ||
28 | -static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, | ||
28 | +/** | 29 | +/** |
29 | + * Translate addresses between the qemu's virtual address and the SVQ IOVA | 30 | + * Translate addresses between the qemu's virtual address and the SVQ IOVA |
30 | + * | 31 | + * |
31 | + * @svq: Shadow VirtQueue | 32 | + * @svq: Shadow VirtQueue |
32 | + * @vaddr: Translated IOVA addresses | 33 | + * @vaddr: Translated IOVA addresses |
33 | + * @iovec: Source qemu's VA addresses | 34 | + * @iovec: Source qemu's VA addresses |
34 | + * @num: Length of iovec and minimum length of vaddr | 35 | + * @num: Length of iovec and minimum length of vaddr |
35 | + */ | 36 | + */ |
36 | +static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, | 37 | +static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, |
37 | + void **addrs, const struct iovec *iovec, | 38 | + hwaddr *addrs, const struct iovec *iovec, |
38 | + size_t num) | 39 | + size_t num) |
39 | +{ | 40 | +{ |
40 | + if (num == 0) { | 41 | + if (num == 0) { |
41 | + return true; | 42 | + return true; |
42 | + } | 43 | + } |
43 | + | 44 | + |
44 | + for (size_t i = 0; i < num; ++i) { | 45 | + for (size_t i = 0; i < num; ++i) { |
45 | + DMAMap needle = { | 46 | + DMAMap needle = { |
46 | + .translated_addr = (hwaddr)iovec[i].iov_base, | 47 | + .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, |
47 | + .size = iovec[i].iov_len, | 48 | + .size = iovec[i].iov_len, |
48 | + }; | 49 | + }; |
50 | + Int128 needle_last, map_last; | ||
49 | + size_t off; | 51 | + size_t off; |
50 | + | 52 | + |
51 | + const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); | 53 | + const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); |
52 | + /* | 54 | + /* |
53 | + * Map cannot be NULL since iova map contains all guest space and | 55 | + * Map cannot be NULL since iova map contains all guest space and |
... | ... | ||
59 | + needle.translated_addr); | 61 | + needle.translated_addr); |
60 | + return false; | 62 | + return false; |
61 | + } | 63 | + } |
62 | + | 64 | + |
63 | + off = needle.translated_addr - map->translated_addr; | 65 | + off = needle.translated_addr - map->translated_addr; |
64 | + addrs[i] = (void *)(map->iova + off); | 66 | + addrs[i] = map->iova + off; |
65 | + | 67 | + |
66 | + if (unlikely(int128_gt(int128_add(needle.translated_addr, | 68 | + needle_last = int128_add(int128_make64(needle.translated_addr), |
67 | + iovec[i].iov_len), | 69 | + int128_make64(iovec[i].iov_len)); |
68 | + map->translated_addr + map->size))) { | 70 | + map_last = int128_make64(map->translated_addr + map->size); |
71 | + if (unlikely(int128_gt(needle_last, map_last))) { | ||
69 | + qemu_log_mask(LOG_GUEST_ERROR, | 72 | + qemu_log_mask(LOG_GUEST_ERROR, |
70 | + "Guest buffer expands over iova range"); | 73 | + "Guest buffer expands over iova range"); |
71 | + return false; | 74 | + return false; |
72 | + } | 75 | + } |
73 | + } | 76 | + } |
74 | + | 77 | + |
75 | + return true; | 78 | + return true; |
76 | +} | 79 | +} |
77 | + | 80 | + |
78 | static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, | 81 | +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, |
79 | + void * const *sg, | 82 | const struct iovec *iovec, size_t num, |
80 | const struct iovec *iovec, | 83 | bool more_descs, bool write) |
81 | size_t num, bool more_descs, bool write) | ||
82 | { | 84 | { |
83 | @@ -XXX,XX +XXX,XX @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, | 85 | @@ -XXX,XX +XXX,XX @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, |
84 | } else { | 86 | } else { |
85 | descs[i].flags = flags; | 87 | descs[i].flags = flags; |
86 | } | 88 | } |
87 | - descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base); | 89 | - descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base); |
88 | + descs[i].addr = cpu_to_le64((hwaddr)sg[n]); | 90 | + descs[i].addr = cpu_to_le64(sg[n]); |
89 | descs[i].len = cpu_to_le32(iovec[n].iov_len); | 91 | descs[i].len = cpu_to_le32(iovec[n].iov_len); |
90 | 92 | ||
91 | last = i; | 93 | last = i; |
92 | @@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, | 94 | @@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, |
93 | { | 95 | { |
94 | unsigned avail_idx; | 96 | unsigned avail_idx; |
95 | vring_avail_t *avail = svq->vring.avail; | 97 | vring_avail_t *avail = svq->vring.avail; |
96 | + bool ok; | 98 | + bool ok; |
97 | + g_autofree void **sgs = g_new(void *, MAX(elem->out_num, elem->in_num)); | 99 | + g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); |
98 | 100 | ||
99 | *head = svq->free_head; | 101 | *head = svq->free_head; |
100 | 102 | ||
101 | @@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, | 103 | @@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, |
102 | return false; | 104 | return false; |
103 | } | 105 | } |
104 | 106 | ||
105 | - vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, | 107 | - vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0, |
108 | - false); | ||
109 | - vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); | ||
106 | + ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); | 110 | + ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); |
107 | + if (unlikely(!ok)) { | 111 | + if (unlikely(!ok)) { |
108 | + return false; | 112 | + return false; |
109 | + } | 113 | + } |
110 | + vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, | 114 | + vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, |
111 | elem->in_num > 0, false); | 115 | + elem->in_num > 0, false); |
112 | - vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); | ||
113 | + | 116 | + |
114 | + | 117 | + |
115 | + ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); | 118 | + ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); |
116 | + if (unlikely(!ok)) { | 119 | + if (unlikely(!ok)) { |
117 | + return false; | 120 | + return false; |
118 | + } | 121 | + } |
119 | + | 122 | + |
120 | + vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); | 123 | + vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); |
121 | 124 | ||
122 | /* | 125 | /* |
123 | * Put the entry in the available array (but don't update avail->idx until | 126 | * Put the entry in the available array (but don't update avail->idx until |
127 | @@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd) | ||
128 | void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, | ||
129 | struct vhost_vring_addr *addr) | ||
130 | { | ||
131 | - addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc; | ||
132 | - addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail; | ||
133 | - addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used; | ||
134 | + addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc; | ||
135 | + addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail; | ||
136 | + addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used; | ||
137 | } | ||
138 | |||
139 | size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) | ||
124 | @@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) | 140 | @@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) |
125 | * Creates vhost shadow virtqueue, and instructs the vhost device to use the | 141 | * Creates vhost shadow virtqueue, and instructs the vhost device to use the |
126 | * shadow methods and file descriptors. | 142 | * shadow methods and file descriptors. |
127 | * | 143 | * |
128 | + * @iova_tree: Tree to perform descriptors translations | 144 | + * @iova_tree: Tree to perform descriptors translations |
... | ... | ||
183 | vaddr, section->readonly); | 199 | vaddr, section->readonly); |
184 | 200 | ||
185 | llsize = int128_sub(llend, int128_make64(iova)); | 201 | llsize = int128_sub(llend, int128_make64(iova)); |
186 | + if (v->shadow_vqs_enabled) { | 202 | + if (v->shadow_vqs_enabled) { |
187 | + DMAMap mem_region = { | 203 | + DMAMap mem_region = { |
188 | + .translated_addr = (hwaddr)vaddr, | 204 | + .translated_addr = (hwaddr)(uintptr_t)vaddr, |
189 | + .size = int128_get64(llsize) - 1, | 205 | + .size = int128_get64(llsize) - 1, |
190 | + .perm = IOMMU_ACCESS_FLAG(true, section->readonly), | 206 | + .perm = IOMMU_ACCESS_FLAG(true, section->readonly), |
191 | + }; | 207 | + }; |
192 | + | 208 | + |
193 | + int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); | 209 | + int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); |
... | ... | ||
209 | + const DMAMap *result; | 225 | + const DMAMap *result; |
210 | + const void *vaddr = memory_region_get_ram_ptr(section->mr) + | 226 | + const void *vaddr = memory_region_get_ram_ptr(section->mr) + |
211 | + section->offset_within_region + | 227 | + section->offset_within_region + |
212 | + (iova - section->offset_within_address_space); | 228 | + (iova - section->offset_within_address_space); |
213 | + DMAMap mem_region = { | 229 | + DMAMap mem_region = { |
214 | + .translated_addr = (hwaddr)vaddr, | 230 | + .translated_addr = (hwaddr)(uintptr_t)vaddr, |
215 | + .size = int128_get64(llsize) - 1, | 231 | + .size = int128_get64(llsize) - 1, |
216 | + }; | 232 | + }; |
217 | + | 233 | + |
218 | + result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); | 234 | + result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); |
219 | + iova = result->iova; | 235 | + iova = result->iova; |
... | ... | ||
297 | + error_setg(errp, "Cannot allocate iova (%d)", r); | 313 | + error_setg(errp, "Cannot allocate iova (%d)", r); |
298 | + return false; | 314 | + return false; |
299 | + } | 315 | + } |
300 | + | 316 | + |
301 | + r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1, | 317 | + r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1, |
302 | + (void *)needle->translated_addr, | 318 | + (void *)(uintptr_t)needle->translated_addr, |
303 | + needle->perm == IOMMU_RO); | 319 | + needle->perm == IOMMU_RO); |
304 | + if (unlikely(r != 0)) { | 320 | + if (unlikely(r != 0)) { |
305 | + error_setg_errno(errp, -r, "Cannot map region to device"); | 321 | + error_setg_errno(errp, -r, "Cannot map region to device"); |
306 | + vhost_iova_tree_remove(v->iova_tree, needle); | 322 | + vhost_iova_tree_remove(v->iova_tree, needle); |
307 | + } | 323 | + } |
... | ... | ||
326 | ERRP_GUARD(); | 342 | ERRP_GUARD(); |
327 | - vhost_svq_get_vring_addr(svq, addr); | 343 | - vhost_svq_get_vring_addr(svq, addr); |
328 | + vhost_svq_get_vring_addr(svq, &svq_addr); | 344 | + vhost_svq_get_vring_addr(svq, &svq_addr); |
329 | 345 | ||
330 | - r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size, | 346 | - r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size, |
331 | - (void *)addr->desc_user_addr, true); | 347 | - (void *)(uintptr_t)addr->desc_user_addr, true); |
332 | - if (unlikely(r != 0)) { | 348 | - if (unlikely(r != 0)) { |
333 | - error_setg_errno(errp, -r, "Cannot create vq driver region: "); | 349 | - error_setg_errno(errp, -r, "Cannot create vq driver region: "); |
334 | + driver_region = (DMAMap) { | 350 | + driver_region = (DMAMap) { |
335 | + .translated_addr = svq_addr.desc_user_addr, | 351 | + .translated_addr = svq_addr.desc_user_addr, |
336 | + .size = driver_size - 1, | 352 | + .size = driver_size - 1, |
... | ... | ||
344 | + addr->desc_user_addr = driver_region.iova; | 360 | + addr->desc_user_addr = driver_region.iova; |
345 | + avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; | 361 | + avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; |
346 | + addr->avail_user_addr = driver_region.iova + avail_offset; | 362 | + addr->avail_user_addr = driver_region.iova + avail_offset; |
347 | 363 | ||
348 | - r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size, | 364 | - r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size, |
349 | - (void *)addr->used_user_addr, false); | 365 | - (void *)(intptr_t)addr->used_user_addr, false); |
350 | - if (unlikely(r != 0)) { | 366 | - if (unlikely(r != 0)) { |
351 | - error_setg_errno(errp, -r, "Cannot create vq device region: "); | 367 | - error_setg_errno(errp, -r, "Cannot create vq device region: "); |
352 | + device_region = (DMAMap) { | 368 | + device_region = (DMAMap) { |
353 | + .translated_addr = svq_addr.used_user_addr, | 369 | + .translated_addr = svq_addr.used_user_addr, |
354 | + .size = device_size - 1, | 370 | + .size = device_size - 1, |
... | ... | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
8 | processing ones. | 8 | processing ones. |
9 | 9 | ||
10 | This is ok for networking, but other kinds of devices might have | 10 | This is ok for networking, but other kinds of devices might have |
11 | problems with these retransmissions. | 11 | problems with these retransmissions. |
12 | 12 | ||
13 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
13 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 14 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
14 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
15 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 15 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
16 | --- | 16 | --- |
17 | hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++ | 17 | hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++ |
18 | 1 file changed, 17 insertions(+) | 18 | 1 file changed, 17 insertions(+) |
19 | 19 | ||
... | ... | diff view generated by jsdifflib |
1 | From: Eugenio Pérez <eperezma@redhat.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Setting the log address would make the device start reporting invalid | 3 | Setting the log address would make the device start reporting invalid |
4 | dirty memory because the SVQ vrings are located in qemu's memory. | 4 | dirty memory because the SVQ vrings are located in qemu's memory. |
5 | 5 | ||
6 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
6 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 7 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
7 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 8 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
9 | --- | 9 | --- |
10 | hw/virtio/vhost-vdpa.c | 3 ++- | 10 | hw/virtio/vhost-vdpa.c | 3 ++- |
11 | 1 file changed, 2 insertions(+), 1 deletion(-) | 11 | 1 file changed, 2 insertions(+), 1 deletion(-) |
12 | 12 | ||
... | ... | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
8 | because SVQ memory is in the qemu region. | 8 | because SVQ memory is in the qemu region. |
9 | 9 | ||
10 | The log region is still allocated. Future changes might skip that, but | 10 | The log region is still allocated. Future changes might skip that, but |
11 | this series is already long enough. | 11 | this series is already long enough. |
12 | 12 | ||
13 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
13 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | 14 | Acked-by: Michael S. Tsirkin <mst@redhat.com> |
14 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
15 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 15 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
16 | --- | 16 | --- |
17 | hw/virtio/vhost-vdpa.c | 39 +++++++++++++++++++++++++++++++++++---- | 17 | hw/virtio/vhost-vdpa.c | 39 +++++++++++++++++++++++++++++++++++---- |
18 | include/hw/virtio/vhost-vdpa.h | 1 + | 18 | include/hw/virtio/vhost-vdpa.h | 1 + |
19 | 2 files changed, 36 insertions(+), 4 deletions(-) | 19 | 2 files changed, 36 insertions(+), 4 deletions(-) |
... | ... | diff view generated by jsdifflib |