1
The following changes since commit 92f8c6fef13b31ba222c4d20ad8afd2b79c4c28e:
1
The following changes since commit 352998df1c53b366413690d95b35f76d0721ebed:
2
2
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20210525' into staging (2021-05-25 16:17:06 +0100)
3
Merge tag 'i2c-20220314' of https://github.com/philmd/qemu into staging (2022-03-14 14:39:33 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 90322e646e87c1440661cb3ddbc0cc94309d8a4f:
9
for you to fetch changes up to 12a195fa343aae2ead1301ce04727bd0ae25eb15:
10
10
11
MAINTAINERS: Added eBPF maintainers information. (2021-06-04 15:25:46 +0800)
11
vdpa: Expose VHOST_F_LOG_ALL on SVQ (2022-03-15 13:57:44 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
Changes since V2:
16
- fix 32bit build errros
17
15
----------------------------------------------------------------
18
----------------------------------------------------------------
16
Andrew Melnychenko (7):
19
Eugenio Pérez (14):
17
net/tap: Added TUNSETSTEERINGEBPF code.
20
vhost: Add VhostShadowVirtqueue
18
net: Added SetSteeringEBPF method for NetClientState.
21
vhost: Add Shadow VirtQueue kick forwarding capabilities
19
ebpf: Added eBPF RSS program.
22
vhost: Add Shadow VirtQueue call forwarding capabilities
20
ebpf: Added eBPF RSS loader.
23
vhost: Add vhost_svq_valid_features to shadow vq
21
virtio-net: Added eBPF RSS to virtio-net.
24
virtio: Add vhost_svq_get_vring_addr
22
docs: Added eBPF documentation.
25
vdpa: adapt vhost_ops callbacks to svq
23
MAINTAINERS: Added eBPF maintainers information.
26
vhost: Shadow virtqueue buffers forwarding
27
util: Add iova_tree_alloc_map
28
util: add iova_tree_find_iova
29
vhost: Add VhostIOVATree
30
vdpa: Add custom IOTLB translations to SVQ
31
vdpa: Adapt vhost_vdpa_get_vring_base to SVQ
32
vdpa: Never set log_base addr if SVQ is enabled
33
vdpa: Expose VHOST_F_LOG_ALL on SVQ
24
34
25
MAINTAINERS | 8 +
35
Jason Wang (1):
26
configure | 8 +-
36
virtio-net: fix map leaking on error during receive
27
docs/devel/ebpf_rss.rst | 125 +++++++++
37
28
docs/devel/index.rst | 1 +
38
hw/net/virtio-net.c | 1 +
29
ebpf/ebpf_rss-stub.c | 40 +++
39
hw/virtio/meson.build | 2 +-
30
ebpf/ebpf_rss.c | 165 ++++++++++++
40
hw/virtio/vhost-iova-tree.c | 110 +++++++
31
ebpf/ebpf_rss.h | 44 ++++
41
hw/virtio/vhost-iova-tree.h | 27 ++
32
ebpf/meson.build | 1 +
42
hw/virtio/vhost-shadow-virtqueue.c | 636 +++++++++++++++++++++++++++++++++++++
33
ebpf/rss.bpf.skeleton.h | 431 +++++++++++++++++++++++++++++++
43
hw/virtio/vhost-shadow-virtqueue.h | 87 +++++
34
ebpf/trace-events | 4 +
44
hw/virtio/vhost-vdpa.c | 522 +++++++++++++++++++++++++++++-
35
ebpf/trace.h | 1 +
45
include/hw/virtio/vhost-vdpa.h | 8 +
36
hw/net/vhost_net.c | 3 +
46
include/qemu/iova-tree.h | 38 ++-
37
hw/net/virtio-net.c | 116 ++++++++-
47
util/iova-tree.c | 170 ++++++++++
38
include/hw/virtio/virtio-net.h | 4 +
48
10 files changed, 1584 insertions(+), 17 deletions(-)
39
include/net/net.h | 2 +
49
create mode 100644 hw/virtio/vhost-iova-tree.c
40
meson.build | 23 ++
50
create mode 100644 hw/virtio/vhost-iova-tree.h
41
meson_options.txt | 2 +
51
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
42
net/tap-bsd.c | 5 +
52
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
43
net/tap-linux.c | 13 +
44
net/tap-linux.h | 1 +
45
net/tap-solaris.c | 5 +
46
net/tap-stub.c | 5 +
47
net/tap.c | 9 +
48
net/tap_int.h | 1 +
49
net/vhost-vdpa.c | 2 +
50
tools/ebpf/Makefile.ebpf | 21 ++
51
tools/ebpf/rss.bpf.c | 571 +++++++++++++++++++++++++++++++++++++++++
52
27 files changed, 1607 insertions(+), 4 deletions(-)
53
create mode 100644 docs/devel/ebpf_rss.rst
54
create mode 100644 ebpf/ebpf_rss-stub.c
55
create mode 100644 ebpf/ebpf_rss.c
56
create mode 100644 ebpf/ebpf_rss.h
57
create mode 100644 ebpf/meson.build
58
create mode 100644 ebpf/rss.bpf.skeleton.h
59
create mode 100644 ebpf/trace-events
60
create mode 100644 ebpf/trace.h
61
create mode 100755 tools/ebpf/Makefile.ebpf
62
create mode 100644 tools/ebpf/rss.bpf.c
63
53
64
54
55
diff view generated by jsdifflib
New patch
1
Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
2
tries to fix the use after free of the sg by caching the virtqueue
3
elements in an array and unmap them at once after receiving the
4
packets, But it forgot to unmap the cached elements on error which
5
will lead to leaking of mapping and other unexpected results.
1
6
7
Fixing this by detaching the cached elements on error. This addresses
8
CVE-2022-26353.
9
10
Reported-by: Victor Tom <vv474172261@gmail.com>
11
Cc: qemu-stable@nongnu.org
12
Fixes: CVE-2022-26353
13
Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
14
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
17
hw/net/virtio-net.c | 1 +
18
1 file changed, 1 insertion(+)
19
20
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/net/virtio-net.c
23
+++ b/hw/net/virtio-net.c
24
@@ -XXX,XX +XXX,XX @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
25
26
err:
27
for (j = 0; j < i; j++) {
28
+ virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
29
g_free(elems[j]);
30
}
31
32
--
33
2.7.4
diff view generated by jsdifflib
1
From: Andrew Melnychenko <andrew@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
3
Vhost shadow virtqueue (SVQ) is an intermediate jump for virtqueue
4
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
4
notifications and buffers, allowing qemu to track them. While qemu is
5
forwarding the buffers and virtqueue changes, it is able to commit the
6
memory it's being dirtied, the same way regular qemu's VirtIO devices
7
do.
8
9
This commit only exposes basic SVQ allocation and free. Next patches of
10
the series add functionality like notifications and buffers forwarding.
11
12
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
---
15
---
7
docs/devel/ebpf_rss.rst | 125 ++++++++++++++++++++++++++++++++++++++++++++++++
16
hw/virtio/meson.build | 2 +-
8
docs/devel/index.rst | 1 +
17
hw/virtio/vhost-shadow-virtqueue.c | 62 ++++++++++++++++++++++++++++++++++++++
9
2 files changed, 126 insertions(+)
18
hw/virtio/vhost-shadow-virtqueue.h | 28 +++++++++++++++++
10
create mode 100644 docs/devel/ebpf_rss.rst
19
3 files changed, 91 insertions(+), 1 deletion(-)
20
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
21
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
11
22
12
diff --git a/docs/devel/ebpf_rss.rst b/docs/devel/ebpf_rss.rst
23
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
24
index XXXXXXX..XXXXXXX 100644
25
--- a/hw/virtio/meson.build
26
+++ b/hw/virtio/meson.build
27
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
28
29
virtio_ss = ss.source_set()
30
virtio_ss.add(files('virtio.c'))
31
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c'))
32
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
33
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
34
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
35
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
36
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
13
new file mode 100644
37
new file mode 100644
14
index XXXXXXX..XXXXXXX
38
index XXXXXXX..XXXXXXX
15
--- /dev/null
39
--- /dev/null
16
+++ b/docs/devel/ebpf_rss.rst
40
+++ b/hw/virtio/vhost-shadow-virtqueue.c
17
@@ -XXX,XX +XXX,XX @@
41
@@ -XXX,XX +XXX,XX @@
18
+===========================
42
+/*
19
+eBPF RSS virtio-net support
43
+ * vhost shadow virtqueue
20
+===========================
44
+ *
45
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
46
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
47
+ *
48
+ * SPDX-License-Identifier: GPL-2.0-or-later
49
+ */
21
+
50
+
22
+RSS(Receive Side Scaling) is used to distribute network packets to guest virtqueues
51
+#include "qemu/osdep.h"
23
+by calculating packet hash. Usually every queue is processed then by a specific guest CPU core.
52
+#include "hw/virtio/vhost-shadow-virtqueue.h"
24
+
53
+
25
+For now there are 2 RSS implementations in qemu:
54
+#include "qemu/error-report.h"
26
+- 'in-qemu' RSS (functions if qemu receives network packets, i.e. vhost=off)
27
+- eBPF RSS (can function with also with vhost=on)
28
+
55
+
29
+eBPF support (CONFIG_EBPF) is enabled by 'configure' script.
56
+/**
30
+To enable eBPF RSS support use './configure --enable-bpf'.
57
+ * Creates vhost shadow virtqueue, and instructs the vhost device to use the
58
+ * shadow methods and file descriptors.
59
+ *
60
+ * Returns the new virtqueue or NULL.
61
+ *
62
+ * In case of error, reason is reported through error_report.
63
+ */
64
+VhostShadowVirtqueue *vhost_svq_new(void)
65
+{
66
+ g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
67
+ int r;
31
+
68
+
32
+If steering BPF is not set for kernel's TUN module, the TUN uses automatic selection
69
+ r = event_notifier_init(&svq->hdev_kick, 0);
33
+of rx virtqueue based on lookup table built according to calculated symmetric hash
70
+ if (r != 0) {
34
+of transmitted packets.
71
+ error_report("Couldn't create kick event notifier: %s (%d)",
35
+If steering BPF is set for TUN the BPF code calculates the hash of packet header and
72
+ g_strerror(errno), errno);
36
+returns the virtqueue number to place the packet to.
73
+ goto err_init_hdev_kick;
74
+ }
37
+
75
+
38
+Simplified decision formula:
76
+ r = event_notifier_init(&svq->hdev_call, 0);
77
+ if (r != 0) {
78
+ error_report("Couldn't create call event notifier: %s (%d)",
79
+ g_strerror(errno), errno);
80
+ goto err_init_hdev_call;
81
+ }
39
+
82
+
40
+.. code:: C
83
+ return g_steal_pointer(&svq);
41
+
84
+
42
+ queue_index = indirection_table[hash(<packet data>)%<indirection_table size>]
85
+err_init_hdev_call:
86
+ event_notifier_cleanup(&svq->hdev_kick);
43
+
87
+
88
+err_init_hdev_kick:
89
+ return NULL;
90
+}
44
+
91
+
45
+Not for all packets, the hash can/should be calculated.
92
+/**
93
+ * Free the resources of the shadow virtqueue.
94
+ *
95
+ * @pvq: gpointer to SVQ so it can be used by autofree functions.
96
+ */
97
+void vhost_svq_free(gpointer pvq)
98
+{
99
+ VhostShadowVirtqueue *vq = pvq;
100
+ event_notifier_cleanup(&vq->hdev_kick);
101
+ event_notifier_cleanup(&vq->hdev_call);
102
+ g_free(vq);
103
+}
104
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
105
new file mode 100644
106
index XXXXXXX..XXXXXXX
107
--- /dev/null
108
+++ b/hw/virtio/vhost-shadow-virtqueue.h
109
@@ -XXX,XX +XXX,XX @@
110
+/*
111
+ * vhost shadow virtqueue
112
+ *
113
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
114
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
115
+ *
116
+ * SPDX-License-Identifier: GPL-2.0-or-later
117
+ */
46
+
118
+
47
+Note: currently, eBPF RSS does not support hash reporting.
119
+#ifndef VHOST_SHADOW_VIRTQUEUE_H
120
+#define VHOST_SHADOW_VIRTQUEUE_H
48
+
121
+
49
+eBPF RSS turned on by different combinations of vhost-net, vitrio-net and tap configurations:
122
+#include "qemu/event_notifier.h"
50
+
123
+
51
+- eBPF is used:
124
+/* Shadow virtqueue to relay notifications */
125
+typedef struct VhostShadowVirtqueue {
126
+ /* Shadow kick notifier, sent to vhost */
127
+ EventNotifier hdev_kick;
128
+ /* Shadow call notifier, sent to vhost */
129
+ EventNotifier hdev_call;
130
+} VhostShadowVirtqueue;
52
+
131
+
53
+ tap,vhost=off & virtio-net-pci,rss=on,hash=off
132
+VhostShadowVirtqueue *vhost_svq_new(void);
54
+
133
+
55
+- eBPF is used:
134
+void vhost_svq_free(gpointer vq);
135
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
56
+
136
+
57
+ tap,vhost=on & virtio-net-pci,rss=on,hash=off
137
+#endif
58
+
59
+- 'in-qemu' RSS is used:
60
+
61
+ tap,vhost=off & virtio-net-pci,rss=on,hash=on
62
+
63
+- eBPF is used, hash population feature is not reported to the guest:
64
+
65
+ tap,vhost=on & virtio-net-pci,rss=on,hash=on
66
+
67
+If CONFIG_EBPF is not set then only 'in-qemu' RSS is supported.
68
+Also 'in-qemu' RSS, as a fallback, is used if the eBPF program failed to load or set to TUN.
69
+
70
+RSS eBPF program
71
+----------------
72
+
73
+RSS program located in ebpf/rss.bpf.skeleton.h generated by bpftool.
74
+So the program is part of the qemu binary.
75
+Initially, the eBPF program was compiled by clang and source code located at tools/ebpf/rss.bpf.c.
76
+Prerequisites to recompile the eBPF program (regenerate ebpf/rss.bpf.skeleton.h):
77
+
78
+ llvm, clang, kernel source tree, bpftool
79
+ Adjust Makefile.ebpf to reflect the location of the kernel source tree
80
+
81
+ $ cd tools/ebpf
82
+ $ make -f Makefile.ebpf
83
+
84
+Current eBPF RSS implementation uses 'bounded loops' with 'backward jump instructions' which present in the last kernels.
85
+Overall eBPF RSS works on kernels 5.8+.
86
+
87
+eBPF RSS implementation
88
+-----------------------
89
+
90
+eBPF RSS loading functionality located in ebpf/ebpf_rss.c and ebpf/ebpf_rss.h.
91
+
92
+The `struct EBPFRSSContext` structure that holds 4 file descriptors:
93
+
94
+- ctx - pointer of the libbpf context.
95
+- program_fd - file descriptor of the eBPF RSS program.
96
+- map_configuration - file descriptor of the 'configuration' map. This map contains one element of 'struct EBPFRSSConfig'. This configuration determines eBPF program behavior.
97
+- map_toeplitz_key - file descriptor of the 'Toeplitz key' map. One element of the 40byte key prepared for the hashing algorithm.
98
+- map_indirections_table - 128 elements of queue indexes.
99
+
100
+`struct EBPFRSSConfig` fields:
101
+
102
+- redirect - "boolean" value, should the hash be calculated, on false - `default_queue` would be used as the final decision.
103
+- populate_hash - for now, not used. eBPF RSS doesn't support hash reporting.
104
+- hash_types - binary mask of different hash types. See `VIRTIO_NET_RSS_HASH_TYPE_*` defines. If for packet hash should not be calculated - `default_queue` would be used.
105
+- indirections_len - length of the indirections table, maximum 128.
106
+- default_queue - the queue index that used for packet that shouldn't be hashed. For some packets, the hash can't be calculated(g.e ARP).
107
+
108
+Functions:
109
+
110
+- `ebpf_rss_init()` - sets ctx to NULL, which indicates that EBPFRSSContext is not loaded.
111
+- `ebpf_rss_load()` - creates 3 maps and loads eBPF program from the rss.bpf.skeleton.h. Returns 'true' on success. After that, program_fd can be used to set steering for TAP.
112
+- `ebpf_rss_set_all()` - sets values for eBPF maps. `indirections_table` length is in EBPFRSSConfig. `toeplitz_key` is VIRTIO_NET_RSS_MAX_KEY_SIZE aka 40 bytes array.
113
+- `ebpf_rss_unload()` - close all file descriptors and set ctx to NULL.
114
+
115
+Simplified eBPF RSS workflow:
116
+
117
+.. code:: C
118
+
119
+ struct EBPFRSSConfig config;
120
+ config.redirect = 1;
121
+ config.hash_types = VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4;
122
+ config.indirections_len = VIRTIO_NET_RSS_MAX_TABLE_LEN;
123
+ config.default_queue = 0;
124
+
125
+ uint16_t table[VIRTIO_NET_RSS_MAX_TABLE_LEN] = {...};
126
+ uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {...};
127
+
128
+ struct EBPFRSSContext ctx;
129
+ ebpf_rss_init(&ctx);
130
+ ebpf_rss_load(&ctx);
131
+ ebpf_rss_set_all(&ctx, &config, table, key);
132
+ if (net_client->info->set_steering_ebpf != NULL) {
133
+ net_client->info->set_steering_ebpf(net_client, ctx->program_fd);
134
+ }
135
+ ...
136
+ ebpf_unload(&ctx);
137
+
138
+
139
+NetClientState SetSteeringEBPF()
140
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
141
+
142
+For now, `set_steering_ebpf()` method supported by Linux TAP NetClientState. The method requires an eBPF program file descriptor as an argument.
143
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
144
index XXXXXXX..XXXXXXX 100644
145
--- a/docs/devel/index.rst
146
+++ b/docs/devel/index.rst
147
@@ -XXX,XX +XXX,XX @@ Contents:
148
qom
149
block-coroutine-wrapper
150
multi-process
151
+ ebpf_rss
152
--
138
--
153
2.7.4
139
2.7.4
154
140
155
141
diff view generated by jsdifflib
New patch
1
1
From: Eugenio Pérez <eperezma@redhat.com>
2
3
At this mode no buffer forwarding will be performed in SVQ mode: Qemu
4
will just forward the guest's kicks to the device.
5
6
Host memory notifiers regions are left out for simplicity, and they will
7
not be addressed in this series.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/virtio/vhost-shadow-virtqueue.c | 55 ++++++++++++++
14
hw/virtio/vhost-shadow-virtqueue.h | 14 ++++
15
hw/virtio/vhost-vdpa.c | 144 ++++++++++++++++++++++++++++++++++++-
16
include/hw/virtio/vhost-vdpa.h | 4 ++
17
4 files changed, 215 insertions(+), 2 deletions(-)
18
19
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/hw/virtio/vhost-shadow-virtqueue.c
23
@@ -XXX,XX +XXX,XX @@
24
#include "hw/virtio/vhost-shadow-virtqueue.h"
25
26
#include "qemu/error-report.h"
27
+#include "qemu/main-loop.h"
28
+#include "linux-headers/linux/vhost.h"
29
+
30
+/**
31
+ * Forward guest notifications.
32
+ *
33
+ * @n: guest kick event notifier, the one that guest set to notify svq.
34
+ */
35
+static void vhost_handle_guest_kick(EventNotifier *n)
36
+{
37
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
38
+ event_notifier_test_and_clear(n);
39
+ event_notifier_set(&svq->hdev_kick);
40
+}
41
+
42
+/**
43
+ * Set a new file descriptor for the guest to kick the SVQ and notify for avail
44
+ *
45
+ * @svq: The svq
46
+ * @svq_kick_fd: The svq kick fd
47
+ *
48
+ * Note that the SVQ will never close the old file descriptor.
49
+ */
50
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
51
+{
52
+ EventNotifier *svq_kick = &svq->svq_kick;
53
+ bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick);
54
+ bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND;
55
+
56
+ if (poll_stop) {
57
+ event_notifier_set_handler(svq_kick, NULL);
58
+ }
59
+
60
+ /*
61
+ * event_notifier_set_handler already checks for guest's notifications if
62
+ * they arrive at the new file descriptor in the switch, so there is no
63
+ * need to explicitly check for them.
64
+ */
65
+ if (poll_start) {
66
+ event_notifier_init_fd(svq_kick, svq_kick_fd);
67
+ event_notifier_set(svq_kick);
68
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
69
+ }
70
+}
71
+
72
+/**
73
+ * Stop the shadow virtqueue operation.
74
+ * @svq: Shadow Virtqueue
75
+ */
76
+void vhost_svq_stop(VhostShadowVirtqueue *svq)
77
+{
78
+ event_notifier_set_handler(&svq->svq_kick, NULL);
79
+}
80
81
/**
82
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
83
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
84
goto err_init_hdev_call;
85
}
86
87
+ event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
88
return g_steal_pointer(&svq);
89
90
err_init_hdev_call:
91
@@ -XXX,XX +XXX,XX @@ err_init_hdev_kick:
92
void vhost_svq_free(gpointer pvq)
93
{
94
VhostShadowVirtqueue *vq = pvq;
95
+ vhost_svq_stop(vq);
96
event_notifier_cleanup(&vq->hdev_kick);
97
event_notifier_cleanup(&vq->hdev_call);
98
g_free(vq);
99
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/hw/virtio/vhost-shadow-virtqueue.h
102
+++ b/hw/virtio/vhost-shadow-virtqueue.h
103
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
104
EventNotifier hdev_kick;
105
/* Shadow call notifier, sent to vhost */
106
EventNotifier hdev_call;
107
+
108
+ /*
109
+ * Borrowed virtqueue's guest to host notifier. To borrow it in this event
110
+ * notifier allows to recover the VhostShadowVirtqueue from the event loop
111
+ * easily. If we use the VirtQueue's one, we don't have an easy way to
112
+ * retrieve VhostShadowVirtqueue.
113
+ *
114
+ * So shadow virtqueue must not clean it, or we would lose VirtQueue one.
115
+ */
116
+ EventNotifier svq_kick;
117
} VhostShadowVirtqueue;
118
119
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
120
+
121
+void vhost_svq_stop(VhostShadowVirtqueue *svq);
122
+
123
VhostShadowVirtqueue *vhost_svq_new(void);
124
125
void vhost_svq_free(gpointer vq);
126
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/hw/virtio/vhost-vdpa.c
129
+++ b/hw/virtio/vhost-vdpa.c
130
@@ -XXX,XX +XXX,XX @@
131
#include "hw/virtio/vhost.h"
132
#include "hw/virtio/vhost-backend.h"
133
#include "hw/virtio/virtio-net.h"
134
+#include "hw/virtio/vhost-shadow-virtqueue.h"
135
#include "hw/virtio/vhost-vdpa.h"
136
#include "exec/address-spaces.h"
137
#include "qemu/main-loop.h"
138
#include "cpu.h"
139
#include "trace.h"
140
#include "qemu-common.h"
141
+#include "qapi/error.h"
142
143
/*
144
* Return one past the end of the end of section. Be careful with uint64_t
145
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
146
return v->index != 0;
147
}
148
149
+static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
150
+ Error **errp)
151
+{
152
+ g_autoptr(GPtrArray) shadow_vqs = NULL;
153
+
154
+ if (!v->shadow_vqs_enabled) {
155
+ return 0;
156
+ }
157
+
158
+ shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
159
+ for (unsigned n = 0; n < hdev->nvqs; ++n) {
160
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
161
+
162
+ if (unlikely(!svq)) {
163
+ error_setg(errp, "Cannot create svq %u", n);
164
+ return -1;
165
+ }
166
+ g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq));
167
+ }
168
+
169
+ v->shadow_vqs = g_steal_pointer(&shadow_vqs);
170
+ return 0;
171
+}
172
+
173
static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
174
{
175
struct vhost_vdpa *v;
176
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
177
dev->opaque = opaque ;
178
v->listener = vhost_vdpa_memory_listener;
179
v->msg_type = VHOST_IOTLB_MSG_V2;
180
+ ret = vhost_vdpa_init_svq(dev, v, errp);
181
+ if (ret) {
182
+ goto err;
183
+ }
184
185
vhost_vdpa_get_iova_range(v);
186
187
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
188
VIRTIO_CONFIG_S_DRIVER);
189
190
return 0;
191
+
192
+err:
193
+ ram_block_discard_disable(false);
194
+ return ret;
195
}
196
197
static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
198
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
199
200
static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
201
{
202
+ struct vhost_vdpa *v = dev->opaque;
203
int i;
204
205
+ if (v->shadow_vqs_enabled) {
206
+ /* FIXME SVQ is not compatible with host notifiers mr */
207
+ return;
208
+ }
209
+
210
for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
211
if (vhost_vdpa_host_notifier_init(dev, i)) {
212
goto err;
213
@@ -XXX,XX +XXX,XX @@ err:
214
return;
215
}
216
217
+static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
218
+{
219
+ struct vhost_vdpa *v = dev->opaque;
220
+ size_t idx;
221
+
222
+ if (!v->shadow_vqs) {
223
+ return;
224
+ }
225
+
226
+ for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
227
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
228
+ }
229
+ g_ptr_array_free(v->shadow_vqs, true);
230
+}
231
+
232
static int vhost_vdpa_cleanup(struct vhost_dev *dev)
233
{
234
struct vhost_vdpa *v;
235
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
236
trace_vhost_vdpa_cleanup(dev, v);
237
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
238
memory_listener_unregister(&v->listener);
239
+ vhost_vdpa_svq_cleanup(dev);
240
241
dev->opaque = NULL;
242
ram_block_discard_disable(false);
243
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
244
return ret;
245
}
246
247
+static void vhost_vdpa_reset_svq(struct vhost_vdpa *v)
248
+{
249
+ if (!v->shadow_vqs_enabled) {
250
+ return;
251
+ }
252
+
253
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
254
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
255
+ vhost_svq_stop(svq);
256
+ }
257
+}
258
+
259
static int vhost_vdpa_reset_device(struct vhost_dev *dev)
260
{
261
+ struct vhost_vdpa *v = dev->opaque;
262
int ret;
263
uint8_t status = 0;
264
265
+ vhost_vdpa_reset_svq(v);
266
+
267
ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
268
trace_vhost_vdpa_reset_device(dev, status);
269
return ret;
270
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
271
return ret;
272
}
273
274
+static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
275
+ struct vhost_vring_file *file)
276
+{
277
+ trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
278
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
279
+}
280
+
281
+/**
282
+ * Set the shadow virtqueue descriptors to the device
283
+ *
284
+ * @dev: The vhost device model
285
+ * @svq: The shadow virtqueue
286
+ * @idx: The index of the virtqueue in the vhost device
287
+ * @errp: Error
288
+ */
289
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
290
+ VhostShadowVirtqueue *svq, unsigned idx,
291
+ Error **errp)
292
+{
293
+ struct vhost_vring_file file = {
294
+ .index = dev->vq_index + idx,
295
+ };
296
+ const EventNotifier *event_notifier = &svq->hdev_kick;
297
+ int r;
298
+
299
+ file.fd = event_notifier_get_fd(event_notifier);
300
+ r = vhost_vdpa_set_vring_dev_kick(dev, &file);
301
+ if (unlikely(r != 0)) {
302
+ error_setg_errno(errp, -r, "Can't set device kick fd");
303
+ }
304
+
305
+ return r == 0;
306
+}
307
+
308
+static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
309
+{
310
+ struct vhost_vdpa *v = dev->opaque;
311
+ Error *err = NULL;
312
+ unsigned i;
313
+
314
+ if (!v->shadow_vqs) {
315
+ return true;
316
+ }
317
+
318
+ for (i = 0; i < v->shadow_vqs->len; ++i) {
319
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
320
+ bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
321
+ if (unlikely(!ok)) {
322
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
323
+ return false;
324
+ }
325
+ }
326
+
327
+ return true;
328
+}
329
+
330
static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
331
{
332
struct vhost_vdpa *v = dev->opaque;
333
+ bool ok;
334
trace_vhost_vdpa_dev_start(dev, started);
335
336
if (started) {
337
vhost_vdpa_host_notifiers_init(dev);
338
+ ok = vhost_vdpa_svqs_start(dev);
339
+ if (unlikely(!ok)) {
340
+ return -1;
341
+ }
342
vhost_vdpa_set_vring_ready(dev);
343
} else {
344
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
345
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
346
static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
347
struct vhost_vring_file *file)
348
{
349
- trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
350
- return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
351
+ struct vhost_vdpa *v = dev->opaque;
352
+ int vdpa_idx = file->index - dev->vq_index;
353
+
354
+ if (v->shadow_vqs_enabled) {
355
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
356
+ vhost_svq_set_svq_kick_fd(svq, file->fd);
357
+ return 0;
358
+ } else {
359
+ return vhost_vdpa_set_vring_dev_kick(dev, file);
360
+ }
361
}
362
363
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
364
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
365
index XXXXXXX..XXXXXXX 100644
366
--- a/include/hw/virtio/vhost-vdpa.h
367
+++ b/include/hw/virtio/vhost-vdpa.h
368
@@ -XXX,XX +XXX,XX @@
369
#ifndef HW_VIRTIO_VHOST_VDPA_H
370
#define HW_VIRTIO_VHOST_VDPA_H
371
372
+#include <gmodule.h>
373
+
374
#include "hw/virtio/virtio.h"
375
#include "standard-headers/linux/vhost_types.h"
376
377
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
378
bool iotlb_batch_begin_sent;
379
MemoryListener listener;
380
struct vhost_vdpa_iova_range iova_range;
381
+ bool shadow_vqs_enabled;
382
+ GPtrArray *shadow_vqs;
383
struct vhost_dev *dev;
384
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
385
} VhostVDPA;
386
--
387
2.7.4
388
389
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This will make qemu aware of the device used buffers, allowing it to
4
write the guest memory with its contents if needed.
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Acked-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 38 ++++++++++++++++++++++++++++++++++++++
11
hw/virtio/vhost-shadow-virtqueue.h | 4 ++++
12
hw/virtio/vhost-vdpa.c | 31 +++++++++++++++++++++++++++++--
13
3 files changed, 71 insertions(+), 2 deletions(-)
14
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
19
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(EventNotifier *n)
20
}
21
22
/**
23
+ * Forward vhost notifications
24
+ *
25
+ * @n: hdev call event notifier, the one that device set to notify svq.
26
+ */
27
+static void vhost_svq_handle_call(EventNotifier *n)
28
+{
29
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
30
+ hdev_call);
31
+ event_notifier_test_and_clear(n);
32
+ event_notifier_set(&svq->svq_call);
33
+}
34
+
35
+/**
36
+ * Set the call notifier for the SVQ to call the guest
37
+ *
38
+ * @svq: Shadow virtqueue
39
+ * @call_fd: call notifier
40
+ *
41
+ * Called on BQL context.
42
+ */
43
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
44
+{
45
+ if (call_fd == VHOST_FILE_UNBIND) {
46
+ /*
47
+ * Fail event_notifier_set if called handling device call.
48
+ *
49
+ * SVQ still needs device notifications, since it needs to keep
50
+ * forwarding used buffers even with the unbind.
51
+ */
52
+ memset(&svq->svq_call, 0, sizeof(svq->svq_call));
53
+ } else {
54
+ event_notifier_init_fd(&svq->svq_call, call_fd);
55
+ }
56
+}
57
+
58
+/**
59
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
60
*
61
* @svq: The svq
62
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
63
}
64
65
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
66
+ event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
67
return g_steal_pointer(&svq);
68
69
err_init_hdev_call:
70
@@ -XXX,XX +XXX,XX @@ void vhost_svq_free(gpointer pvq)
71
VhostShadowVirtqueue *vq = pvq;
72
vhost_svq_stop(vq);
73
event_notifier_cleanup(&vq->hdev_kick);
74
+ event_notifier_set_handler(&vq->hdev_call, NULL);
75
event_notifier_cleanup(&vq->hdev_call);
76
g_free(vq);
77
}
78
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
79
index XXXXXXX..XXXXXXX 100644
80
--- a/hw/virtio/vhost-shadow-virtqueue.h
81
+++ b/hw/virtio/vhost-shadow-virtqueue.h
82
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
83
* So shadow virtqueue must not clean it, or we would lose VirtQueue one.
84
*/
85
EventNotifier svq_kick;
86
+
87
+ /* Guest's call notifier, where the SVQ calls guest. */
88
+ EventNotifier svq_call;
89
} VhostShadowVirtqueue;
90
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
void vhost_svq_stop(VhostShadowVirtqueue *svq);
95
96
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/hw/virtio/vhost-vdpa.c
99
+++ b/hw/virtio/vhost-vdpa.c
100
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
101
return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
102
}
103
104
+static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
105
+ struct vhost_vring_file *file)
106
+{
107
+ trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
108
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
109
+}
110
+
111
/**
112
* Set the shadow virtqueue descriptors to the device
113
*
114
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
115
* @svq: The shadow virtqueue
116
* @idx: The index of the virtqueue in the vhost device
117
* @errp: Error
118
+ *
119
+ * Note that this function does not rewind kick file descriptor if cannot set
120
+ * call one.
121
*/
122
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
123
VhostShadowVirtqueue *svq, unsigned idx,
124
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
125
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
126
if (unlikely(r != 0)) {
127
error_setg_errno(errp, -r, "Can't set device kick fd");
128
+ return false;
129
+ }
130
+
131
+ event_notifier = &svq->hdev_call;
132
+ file.fd = event_notifier_get_fd(event_notifier);
133
+ r = vhost_vdpa_set_vring_dev_call(dev, &file);
134
+ if (unlikely(r != 0)) {
135
+ error_setg_errno(errp, -r, "Can't set device call fd");
136
}
137
138
return r == 0;
139
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
140
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
141
struct vhost_vring_file *file)
142
{
143
- trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
144
- return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
145
+ struct vhost_vdpa *v = dev->opaque;
146
+
147
+ if (v->shadow_vqs_enabled) {
148
+ int vdpa_idx = file->index - dev->vq_index;
149
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
150
+
151
+ vhost_svq_set_svq_call_fd(svq, file->fd);
152
+ return 0;
153
+ } else {
154
+ return vhost_vdpa_set_vring_dev_call(dev, file);
155
+ }
156
}
157
158
static int vhost_vdpa_get_features(struct vhost_dev *dev,
159
--
160
2.7.4
161
162
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This allows SVQ to negotiate features with the guest and the device. For
4
the device, SVQ is a driver. While this function bypasses all
5
non-transport features, it needs to disable the features that SVQ does
6
not support when forwarding buffers. This includes packed vq layout,
7
indirect descriptors or event idx.
8
9
Future changes can add support to offer more features to the guest,
10
since the use of VirtQueue gives this for free. This is left out at the
11
moment for simplicity.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
17
hw/virtio/vhost-shadow-virtqueue.c | 44 ++++++++++++++++++++++++++++++++++++++
18
hw/virtio/vhost-shadow-virtqueue.h | 2 ++
19
hw/virtio/vhost-vdpa.c | 15 +++++++++++++
20
3 files changed, 61 insertions(+)
21
22
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/virtio/vhost-shadow-virtqueue.c
25
+++ b/hw/virtio/vhost-shadow-virtqueue.c
26
@@ -XXX,XX +XXX,XX @@
27
#include "hw/virtio/vhost-shadow-virtqueue.h"
28
29
#include "qemu/error-report.h"
30
+#include "qapi/error.h"
31
#include "qemu/main-loop.h"
32
#include "linux-headers/linux/vhost.h"
33
34
/**
35
+ * Validate the transport device features that both guests can use with the SVQ
36
+ * and SVQs can use with the device.
37
+ *
38
+ * @dev_features: The features
39
+ * @errp: Error pointer
40
+ */
41
+bool vhost_svq_valid_features(uint64_t features, Error **errp)
42
+{
43
+ bool ok = true;
44
+ uint64_t svq_features = features;
45
+
46
+ for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
47
+ ++b) {
48
+ switch (b) {
49
+ case VIRTIO_F_ANY_LAYOUT:
50
+ continue;
51
+
52
+ case VIRTIO_F_ACCESS_PLATFORM:
53
+ /* SVQ trust in the host's IOMMU to translate addresses */
54
+ case VIRTIO_F_VERSION_1:
55
+ /* SVQ trust that the guest vring is little endian */
56
+ if (!(svq_features & BIT_ULL(b))) {
57
+ svq_features |= BIT_ULL(b);
58
+ ok = false;
59
+ }
60
+ continue;
61
+
62
+ default:
63
+ if (svq_features & BIT_ULL(b)) {
64
+ svq_features &= ~BIT_ULL(b);
65
+ ok = false;
66
+ }
67
+ }
68
+ }
69
+
70
+ if (!ok) {
71
+ error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64
72
+ ", ok: 0x%"PRIx64, features, svq_features);
73
+ }
74
+ return ok;
75
+}
76
+
77
+/**
78
* Forward guest notifications.
79
*
80
* @n: guest kick event notifier, the one that guest set to notify svq.
81
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/hw/virtio/vhost-shadow-virtqueue.h
84
+++ b/hw/virtio/vhost-shadow-virtqueue.h
85
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
86
EventNotifier svq_call;
87
} VhostShadowVirtqueue;
88
89
+bool vhost_svq_valid_features(uint64_t features, Error **errp);
90
+
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/hw/virtio/vhost-vdpa.c
97
+++ b/hw/virtio/vhost-vdpa.c
98
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
99
Error **errp)
100
{
101
g_autoptr(GPtrArray) shadow_vqs = NULL;
102
+ uint64_t dev_features, svq_features;
103
+ int r;
104
+ bool ok;
105
106
if (!v->shadow_vqs_enabled) {
107
return 0;
108
}
109
110
+ r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
111
+ if (r != 0) {
112
+ error_setg_errno(errp, -r, "Can't get vdpa device features");
113
+ return r;
114
+ }
115
+
116
+ svq_features = dev_features;
117
+ ok = vhost_svq_valid_features(svq_features, errp);
118
+ if (unlikely(!ok)) {
119
+ return -1;
120
+ }
121
+
122
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
123
for (unsigned n = 0; n < hdev->nvqs; ++n) {
124
g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
125
--
126
2.7.4
127
128
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
It reports the shadow virtqueue address from qemu virtual address space.
4
5
Since this will be different from the guest's vaddr, but the device can
6
access it, SVQ takes special care about its alignment & lack of garbage
7
data. It assumes that IOMMU will work in host_page_size ranges for that.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/virtio/vhost-shadow-virtqueue.c | 29 +++++++++++++++++++++++++++++
14
hw/virtio/vhost-shadow-virtqueue.h | 9 +++++++++
15
2 files changed, 38 insertions(+)
16
17
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/virtio/vhost-shadow-virtqueue.c
20
+++ b/hw/virtio/vhost-shadow-virtqueue.c
21
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
22
}
23
24
/**
25
+ * Get the shadow vq vring address.
26
+ * @svq: Shadow virtqueue
27
+ * @addr: Destination to store address
28
+ */
29
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
30
+ struct vhost_vring_addr *addr)
31
+{
32
+ addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
33
+ addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
34
+ addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
35
+}
36
+
37
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
38
+{
39
+ size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
40
+ size_t avail_size = offsetof(vring_avail_t, ring) +
41
+ sizeof(uint16_t) * svq->vring.num;
42
+
43
+ return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size);
44
+}
45
+
46
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
47
+{
48
+ size_t used_size = offsetof(vring_used_t, ring) +
49
+ sizeof(vring_used_elem_t) * svq->vring.num;
50
+ return ROUND_UP(used_size, qemu_real_host_page_size);
51
+}
52
+
53
+/**
54
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
55
*
56
* @svq: The svq
57
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
58
index XXXXXXX..XXXXXXX 100644
59
--- a/hw/virtio/vhost-shadow-virtqueue.h
60
+++ b/hw/virtio/vhost-shadow-virtqueue.h
61
@@ -XXX,XX +XXX,XX @@
62
#define VHOST_SHADOW_VIRTQUEUE_H
63
64
#include "qemu/event_notifier.h"
65
+#include "hw/virtio/virtio.h"
66
+#include "standard-headers/linux/vhost_types.h"
67
68
/* Shadow virtqueue to relay notifications */
69
typedef struct VhostShadowVirtqueue {
70
+ /* Shadow vring */
71
+ struct vring vring;
72
+
73
/* Shadow kick notifier, sent to vhost */
74
EventNotifier hdev_kick;
75
/* Shadow call notifier, sent to vhost */
76
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp);
77
78
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
79
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
80
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
81
+ struct vhost_vring_addr *addr);
82
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
83
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
84
85
void vhost_svq_stop(VhostShadowVirtqueue *svq);
86
87
--
88
2.7.4
89
90
diff view generated by jsdifflib
1
From: Andrew Melnychenko <andrew@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
RSS program and Makefile to build it.
3
First half of the buffers forwarding part, preparing vhost-vdpa
4
The bpftool used to generate '.h' file.
4
callbacks to SVQ to offer it. QEMU cannot enable it at this moment, so
5
The data in that file may be loaded by libbpf.
5
this is effectively dead code at the moment, but it helps to reduce
6
EBPF compilation is not required for building qemu.
6
patch size.
7
You can use Makefile if you need to regenerate rss.bpf.skeleton.h.
8
7
9
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
9
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
11
---
13
tools/ebpf/Makefile.ebpf | 21 ++
12
hw/virtio/vhost-vdpa.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
14
tools/ebpf/rss.bpf.c | 571 +++++++++++++++++++++++++++++++++++++++++++++++
13
1 file changed, 41 insertions(+), 7 deletions(-)
15
2 files changed, 592 insertions(+)
16
create mode 100755 tools/ebpf/Makefile.ebpf
17
create mode 100644 tools/ebpf/rss.bpf.c
18
14
19
diff --git a/tools/ebpf/Makefile.ebpf b/tools/ebpf/Makefile.ebpf
15
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
20
new file mode 100755
16
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX
17
--- a/hw/virtio/vhost-vdpa.c
22
--- /dev/null
18
+++ b/hw/virtio/vhost-vdpa.c
23
+++ b/tools/ebpf/Makefile.ebpf
19
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
24
@@ -XXX,XX +XXX,XX @@
20
return ret;
25
+OBJS = rss.bpf.o
21
}
26
+
22
27
+LLC ?= llc
23
+static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev,
28
+CLANG ?= clang
24
+ struct vhost_vring_state *ring)
29
+INC_FLAGS = `$(CLANG) -print-file-name=include`
25
+{
30
+EXTRA_CFLAGS ?= -O2 -emit-llvm -fno-stack-protector
26
+ trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
31
+
27
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
32
+all: $(OBJS)
33
+
34
+.PHONY: clean
35
+
36
+clean:
37
+    rm -f $(OBJS)
38
+
39
+$(OBJS): %.o:%.c
40
+    $(CLANG) $(INC_FLAGS) \
41
+ -D__KERNEL__ -D__ASM_SYSREG_H \
42
+ -I../include $(LINUXINCLUDE) \
43
+ $(EXTRA_CFLAGS) -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
44
+    bpftool gen skeleton rss.bpf.o > rss.bpf.skeleton.h
45
+    cp rss.bpf.skeleton.h ../../ebpf/
46
diff --git a/tools/ebpf/rss.bpf.c b/tools/ebpf/rss.bpf.c
47
new file mode 100644
48
index XXXXXXX..XXXXXXX
49
--- /dev/null
50
+++ b/tools/ebpf/rss.bpf.c
51
@@ -XXX,XX +XXX,XX @@
52
+/*
53
+ * eBPF RSS program
54
+ *
55
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
56
+ *
57
+ * Authors:
58
+ * Andrew Melnychenko <andrew@daynix.com>
59
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
60
+ *
61
+ * This work is licensed under the terms of the GNU GPL, version 2. See
62
+ * the COPYING file in the top-level directory.
63
+ *
64
+ * Prepare:
65
+ * Requires llvm, clang, bpftool, linux kernel tree
66
+ *
67
+ * Build rss.bpf.skeleton.h:
68
+ * make -f Makefile.ebpf clean all
69
+ */
70
+
71
+#include <stddef.h>
72
+#include <stdbool.h>
73
+#include <linux/bpf.h>
74
+
75
+#include <linux/in.h>
76
+#include <linux/if_ether.h>
77
+#include <linux/ip.h>
78
+#include <linux/ipv6.h>
79
+
80
+#include <linux/udp.h>
81
+#include <linux/tcp.h>
82
+
83
+#include <bpf/bpf_helpers.h>
84
+#include <bpf/bpf_endian.h>
85
+#include <linux/virtio_net.h>
86
+
87
+#define INDIRECTION_TABLE_SIZE 128
88
+#define HASH_CALCULATION_BUFFER_SIZE 36
89
+
90
+struct rss_config_t {
91
+ __u8 redirect;
92
+ __u8 populate_hash;
93
+ __u32 hash_types;
94
+ __u16 indirections_len;
95
+ __u16 default_queue;
96
+} __attribute__((packed));
97
+
98
+struct toeplitz_key_data_t {
99
+ __u32 leftmost_32_bits;
100
+ __u8 next_byte[HASH_CALCULATION_BUFFER_SIZE];
101
+};
102
+
103
+struct packet_hash_info_t {
104
+ __u8 is_ipv4;
105
+ __u8 is_ipv6;
106
+ __u8 is_udp;
107
+ __u8 is_tcp;
108
+ __u8 is_ipv6_ext_src;
109
+ __u8 is_ipv6_ext_dst;
110
+ __u8 is_fragmented;
111
+
112
+ __u16 src_port;
113
+ __u16 dst_port;
114
+
115
+ union {
116
+ struct {
117
+ __be32 in_src;
118
+ __be32 in_dst;
119
+ };
120
+
121
+ struct {
122
+ struct in6_addr in6_src;
123
+ struct in6_addr in6_dst;
124
+ struct in6_addr in6_ext_src;
125
+ struct in6_addr in6_ext_dst;
126
+ };
127
+ };
128
+};
129
+
130
+struct bpf_map_def SEC("maps")
131
+tap_rss_map_configurations = {
132
+ .type = BPF_MAP_TYPE_ARRAY,
133
+ .key_size = sizeof(__u32),
134
+ .value_size = sizeof(struct rss_config_t),
135
+ .max_entries = 1,
136
+};
137
+
138
+struct bpf_map_def SEC("maps")
139
+tap_rss_map_toeplitz_key = {
140
+ .type = BPF_MAP_TYPE_ARRAY,
141
+ .key_size = sizeof(__u32),
142
+ .value_size = sizeof(struct toeplitz_key_data_t),
143
+ .max_entries = 1,
144
+};
145
+
146
+struct bpf_map_def SEC("maps")
147
+tap_rss_map_indirection_table = {
148
+ .type = BPF_MAP_TYPE_ARRAY,
149
+ .key_size = sizeof(__u32),
150
+ .value_size = sizeof(__u16),
151
+ .max_entries = INDIRECTION_TABLE_SIZE,
152
+};
153
+
154
+static inline void net_rx_rss_add_chunk(__u8 *rss_input, size_t *bytes_written,
155
+ const void *ptr, size_t size) {
156
+ __builtin_memcpy(&rss_input[*bytes_written], ptr, size);
157
+ *bytes_written += size;
158
+}
28
+}
159
+
29
+
160
+static inline
30
static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
161
+void net_toeplitz_add(__u32 *result,
31
struct vhost_vring_file *file)
162
+ __u8 *input,
32
{
163
+ __u32 len
33
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
164
+ , struct toeplitz_key_data_t *key) {
34
return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
35
}
36
37
+static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
38
+ struct vhost_vring_addr *addr)
39
+{
40
+ trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
41
+ addr->desc_user_addr, addr->used_user_addr,
42
+ addr->avail_user_addr,
43
+ addr->log_guest_addr);
165
+
44
+
166
+ __u32 accumulator = *result;
45
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
167
+ __u32 leftmost_32_bits = key->leftmost_32_bits;
168
+ __u32 byte;
169
+
46
+
170
+ for (byte = 0; byte < HASH_CALCULATION_BUFFER_SIZE; byte++) {
171
+ __u8 input_byte = input[byte];
172
+ __u8 key_byte = key->next_byte[byte];
173
+ __u8 bit;
174
+
175
+ for (bit = 0; bit < 8; bit++) {
176
+ if (input_byte & (1 << 7)) {
177
+ accumulator ^= leftmost_32_bits;
178
+ }
179
+
180
+ leftmost_32_bits =
181
+ (leftmost_32_bits << 1) | ((key_byte & (1 << 7)) >> 7);
182
+
183
+ input_byte <<= 1;
184
+ key_byte <<= 1;
185
+ }
186
+ }
187
+
188
+ *result = accumulator;
189
+}
47
+}
190
+
48
+
49
/**
50
* Set the shadow virtqueue descriptors to the device
51
*
52
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
53
static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
54
struct vhost_vring_addr *addr)
55
{
56
- trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
57
- addr->desc_user_addr, addr->used_user_addr,
58
- addr->avail_user_addr,
59
- addr->log_guest_addr);
60
- return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
61
+ struct vhost_vdpa *v = dev->opaque;
191
+
62
+
192
+static inline int ip6_extension_header_type(__u8 hdr_type)
63
+ if (v->shadow_vqs_enabled) {
193
+{
64
+ /*
194
+ switch (hdr_type) {
65
+ * Device vring addr was set at device start. SVQ base is handled by
195
+ case IPPROTO_HOPOPTS:
66
+ * VirtQueue code.
196
+ case IPPROTO_ROUTING:
67
+ */
197
+ case IPPROTO_FRAGMENT:
198
+ case IPPROTO_ICMPV6:
199
+ case IPPROTO_NONE:
200
+ case IPPROTO_DSTOPTS:
201
+ case IPPROTO_MH:
202
+ return 1;
203
+ default:
204
+ return 0;
205
+ }
206
+}
207
+/*
208
+ * According to
209
+ * https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml
210
+ * we expect that there are would be no more than 11 extensions in IPv6 header,
211
+ * also there is 27 TLV options for Destination and Hop-by-hop extensions.
212
+ * Need to choose reasonable amount of maximum extensions/options we may
213
+ * check to find ext src/dst.
214
+ */
215
+#define IP6_EXTENSIONS_COUNT 11
216
+#define IP6_OPTIONS_COUNT 30
217
+
218
+static inline int parse_ipv6_ext(struct __sk_buff *skb,
219
+ struct packet_hash_info_t *info,
220
+ __u8 *l4_protocol, size_t *l4_offset)
221
+{
222
+ int err = 0;
223
+
224
+ if (!ip6_extension_header_type(*l4_protocol)) {
225
+ return 0;
68
+ return 0;
226
+ }
69
+ }
227
+
70
+
228
+ struct ipv6_opt_hdr ext_hdr = {};
71
+ return vhost_vdpa_set_vring_dev_addr(dev, addr);
72
}
73
74
static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
75
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
76
static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
77
struct vhost_vring_state *ring)
78
{
79
- trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
80
- return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
81
+ struct vhost_vdpa *v = dev->opaque;
229
+
82
+
230
+ for (unsigned int i = 0; i < IP6_EXTENSIONS_COUNT; ++i) {
83
+ if (v->shadow_vqs_enabled) {
231
+
84
+ /*
232
+ err = bpf_skb_load_bytes_relative(skb, *l4_offset, &ext_hdr,
85
+ * Device vring base was set at device start. SVQ base is handled by
233
+ sizeof(ext_hdr), BPF_HDR_START_NET);
86
+ * VirtQueue code.
234
+ if (err) {
87
+ */
235
+ goto error;
236
+ }
237
+
238
+ if (*l4_protocol == IPPROTO_ROUTING) {
239
+ struct ipv6_rt_hdr ext_rt = {};
240
+
241
+ err = bpf_skb_load_bytes_relative(skb, *l4_offset, &ext_rt,
242
+ sizeof(ext_rt), BPF_HDR_START_NET);
243
+ if (err) {
244
+ goto error;
245
+ }
246
+
247
+ if ((ext_rt.type == IPV6_SRCRT_TYPE_2) &&
248
+ (ext_rt.hdrlen == sizeof(struct in6_addr) / 8) &&
249
+ (ext_rt.segments_left == 1)) {
250
+
251
+ err = bpf_skb_load_bytes_relative(skb,
252
+ *l4_offset + offsetof(struct rt2_hdr, addr),
253
+ &info->in6_ext_dst, sizeof(info->in6_ext_dst),
254
+ BPF_HDR_START_NET);
255
+ if (err) {
256
+ goto error;
257
+ }
258
+
259
+ info->is_ipv6_ext_dst = 1;
260
+ }
261
+
262
+ } else if (*l4_protocol == IPPROTO_DSTOPTS) {
263
+ struct ipv6_opt_t {
264
+ __u8 type;
265
+ __u8 length;
266
+ } __attribute__((packed)) opt = {};
267
+
268
+ size_t opt_offset = sizeof(ext_hdr);
269
+
270
+ for (unsigned int j = 0; j < IP6_OPTIONS_COUNT; ++j) {
271
+ err = bpf_skb_load_bytes_relative(skb, *l4_offset + opt_offset,
272
+ &opt, sizeof(opt), BPF_HDR_START_NET);
273
+ if (err) {
274
+ goto error;
275
+ }
276
+
277
+ if (opt.type == IPV6_TLV_HAO) {
278
+ err = bpf_skb_load_bytes_relative(skb,
279
+ *l4_offset + opt_offset
280
+ + offsetof(struct ipv6_destopt_hao, addr),
281
+ &info->in6_ext_src, sizeof(info->in6_ext_src),
282
+ BPF_HDR_START_NET);
283
+ if (err) {
284
+ goto error;
285
+ }
286
+
287
+ info->is_ipv6_ext_src = 1;
288
+ break;
289
+ }
290
+
291
+ opt_offset += (opt.type == IPV6_TLV_PAD1) ?
292
+ 1 : opt.length + sizeof(opt);
293
+
294
+ if (opt_offset + 1 >= ext_hdr.hdrlen * 8) {
295
+ break;
296
+ }
297
+ }
298
+ } else if (*l4_protocol == IPPROTO_FRAGMENT) {
299
+ info->is_fragmented = true;
300
+ }
301
+
302
+ *l4_protocol = ext_hdr.nexthdr;
303
+ *l4_offset += (ext_hdr.hdrlen + 1) * 8;
304
+
305
+ if (!ip6_extension_header_type(ext_hdr.nexthdr)) {
306
+ return 0;
307
+ }
308
+ }
309
+
310
+ return 0;
311
+error:
312
+ return err;
313
+}
314
+
315
+static __be16 parse_eth_type(struct __sk_buff *skb)
316
+{
317
+ unsigned int offset = 12;
318
+ __be16 ret = 0;
319
+ int err = 0;
320
+
321
+ err = bpf_skb_load_bytes_relative(skb, offset, &ret, sizeof(ret),
322
+ BPF_HDR_START_MAC);
323
+ if (err) {
324
+ return 0;
88
+ return 0;
325
+ }
89
+ }
326
+
90
+
327
+ switch (bpf_ntohs(ret)) {
91
+ return vhost_vdpa_set_dev_vring_base(dev, ring);
328
+ case ETH_P_8021AD:
92
}
329
+ offset += 4;
93
330
+ case ETH_P_8021Q:
94
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
331
+ offset += 4;
332
+ err = bpf_skb_load_bytes_relative(skb, offset, &ret, sizeof(ret),
333
+ BPF_HDR_START_MAC);
334
+ default:
335
+ break;
336
+ }
337
+
338
+ if (err) {
339
+ return 0;
340
+ }
341
+
342
+ return ret;
343
+}
344
+
345
+static inline int parse_packet(struct __sk_buff *skb,
346
+ struct packet_hash_info_t *info)
347
+{
348
+ int err = 0;
349
+
350
+ if (!info || !skb) {
351
+ return -1;
352
+ }
353
+
354
+ size_t l4_offset = 0;
355
+ __u8 l4_protocol = 0;
356
+ __u16 l3_protocol = bpf_ntohs(parse_eth_type(skb));
357
+ if (l3_protocol == 0) {
358
+ err = -1;
359
+ goto error;
360
+ }
361
+
362
+ if (l3_protocol == ETH_P_IP) {
363
+ info->is_ipv4 = 1;
364
+
365
+ struct iphdr ip = {};
366
+ err = bpf_skb_load_bytes_relative(skb, 0, &ip, sizeof(ip),
367
+ BPF_HDR_START_NET);
368
+ if (err) {
369
+ goto error;
370
+ }
371
+
372
+ info->in_src = ip.saddr;
373
+ info->in_dst = ip.daddr;
374
+ info->is_fragmented = !!ip.frag_off;
375
+
376
+ l4_protocol = ip.protocol;
377
+ l4_offset = ip.ihl * 4;
378
+ } else if (l3_protocol == ETH_P_IPV6) {
379
+ info->is_ipv6 = 1;
380
+
381
+ struct ipv6hdr ip6 = {};
382
+ err = bpf_skb_load_bytes_relative(skb, 0, &ip6, sizeof(ip6),
383
+ BPF_HDR_START_NET);
384
+ if (err) {
385
+ goto error;
386
+ }
387
+
388
+ info->in6_src = ip6.saddr;
389
+ info->in6_dst = ip6.daddr;
390
+
391
+ l4_protocol = ip6.nexthdr;
392
+ l4_offset = sizeof(ip6);
393
+
394
+ err = parse_ipv6_ext(skb, info, &l4_protocol, &l4_offset);
395
+ if (err) {
396
+ goto error;
397
+ }
398
+ }
399
+
400
+ if (l4_protocol != 0 && !info->is_fragmented) {
401
+ if (l4_protocol == IPPROTO_TCP) {
402
+ info->is_tcp = 1;
403
+
404
+ struct tcphdr tcp = {};
405
+ err = bpf_skb_load_bytes_relative(skb, l4_offset, &tcp, sizeof(tcp),
406
+ BPF_HDR_START_NET);
407
+ if (err) {
408
+ goto error;
409
+ }
410
+
411
+ info->src_port = tcp.source;
412
+ info->dst_port = tcp.dest;
413
+ } else if (l4_protocol == IPPROTO_UDP) { /* TODO: add udplite? */
414
+ info->is_udp = 1;
415
+
416
+ struct udphdr udp = {};
417
+ err = bpf_skb_load_bytes_relative(skb, l4_offset, &udp, sizeof(udp),
418
+ BPF_HDR_START_NET);
419
+ if (err) {
420
+ goto error;
421
+ }
422
+
423
+ info->src_port = udp.source;
424
+ info->dst_port = udp.dest;
425
+ }
426
+ }
427
+
428
+ return 0;
429
+
430
+error:
431
+ return err;
432
+}
433
+
434
+static inline __u32 calculate_rss_hash(struct __sk_buff *skb,
435
+ struct rss_config_t *config, struct toeplitz_key_data_t *toe)
436
+{
437
+ __u8 rss_input[HASH_CALCULATION_BUFFER_SIZE] = {};
438
+ size_t bytes_written = 0;
439
+ __u32 result = 0;
440
+ int err = 0;
441
+ struct packet_hash_info_t packet_info = {};
442
+
443
+ err = parse_packet(skb, &packet_info);
444
+ if (err) {
445
+ return 0;
446
+ }
447
+
448
+ if (packet_info.is_ipv4) {
449
+ if (packet_info.is_tcp &&
450
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
451
+
452
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
453
+ &packet_info.in_src,
454
+ sizeof(packet_info.in_src));
455
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
456
+ &packet_info.in_dst,
457
+ sizeof(packet_info.in_dst));
458
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
459
+ &packet_info.src_port,
460
+ sizeof(packet_info.src_port));
461
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
462
+ &packet_info.dst_port,
463
+ sizeof(packet_info.dst_port));
464
+ } else if (packet_info.is_udp &&
465
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
466
+
467
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
468
+ &packet_info.in_src,
469
+ sizeof(packet_info.in_src));
470
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
471
+ &packet_info.in_dst,
472
+ sizeof(packet_info.in_dst));
473
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
474
+ &packet_info.src_port,
475
+ sizeof(packet_info.src_port));
476
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
477
+ &packet_info.dst_port,
478
+ sizeof(packet_info.dst_port));
479
+ } else if (config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
480
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
481
+ &packet_info.in_src,
482
+ sizeof(packet_info.in_src));
483
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
484
+ &packet_info.in_dst,
485
+ sizeof(packet_info.in_dst));
486
+ }
487
+ } else if (packet_info.is_ipv6) {
488
+ if (packet_info.is_tcp &&
489
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
490
+
491
+ if (packet_info.is_ipv6_ext_src &&
492
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
493
+
494
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
495
+ &packet_info.in6_ext_src,
496
+ sizeof(packet_info.in6_ext_src));
497
+ } else {
498
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
499
+ &packet_info.in6_src,
500
+ sizeof(packet_info.in6_src));
501
+ }
502
+ if (packet_info.is_ipv6_ext_dst &&
503
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
504
+
505
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
506
+ &packet_info.in6_ext_dst,
507
+ sizeof(packet_info.in6_ext_dst));
508
+ } else {
509
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
510
+ &packet_info.in6_dst,
511
+ sizeof(packet_info.in6_dst));
512
+ }
513
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
514
+ &packet_info.src_port,
515
+ sizeof(packet_info.src_port));
516
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
517
+ &packet_info.dst_port,
518
+ sizeof(packet_info.dst_port));
519
+ } else if (packet_info.is_udp &&
520
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
521
+
522
+ if (packet_info.is_ipv6_ext_src &&
523
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
524
+
525
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
526
+ &packet_info.in6_ext_src,
527
+ sizeof(packet_info.in6_ext_src));
528
+ } else {
529
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
530
+ &packet_info.in6_src,
531
+ sizeof(packet_info.in6_src));
532
+ }
533
+ if (packet_info.is_ipv6_ext_dst &&
534
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
535
+
536
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
537
+ &packet_info.in6_ext_dst,
538
+ sizeof(packet_info.in6_ext_dst));
539
+ } else {
540
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
541
+ &packet_info.in6_dst,
542
+ sizeof(packet_info.in6_dst));
543
+ }
544
+
545
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
546
+ &packet_info.src_port,
547
+ sizeof(packet_info.src_port));
548
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
549
+ &packet_info.dst_port,
550
+ sizeof(packet_info.dst_port));
551
+
552
+ } else if (config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
553
+ if (packet_info.is_ipv6_ext_src &&
554
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
555
+
556
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
557
+ &packet_info.in6_ext_src,
558
+ sizeof(packet_info.in6_ext_src));
559
+ } else {
560
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
561
+ &packet_info.in6_src,
562
+ sizeof(packet_info.in6_src));
563
+ }
564
+ if (packet_info.is_ipv6_ext_dst &&
565
+ config->hash_types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
566
+
567
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
568
+ &packet_info.in6_ext_dst,
569
+ sizeof(packet_info.in6_ext_dst));
570
+ } else {
571
+ net_rx_rss_add_chunk(rss_input, &bytes_written,
572
+ &packet_info.in6_dst,
573
+ sizeof(packet_info.in6_dst));
574
+ }
575
+ }
576
+ }
577
+
578
+ if (bytes_written) {
579
+ net_toeplitz_add(&result, rss_input, bytes_written, toe);
580
+ }
581
+
582
+ return result;
583
+}
584
+
585
+SEC("tun_rss_steering")
586
+int tun_rss_steering_prog(struct __sk_buff *skb)
587
+{
588
+
589
+ struct rss_config_t *config;
590
+ struct toeplitz_key_data_t *toe;
591
+
592
+ __u32 key = 0;
593
+ __u32 hash = 0;
594
+
595
+ config = bpf_map_lookup_elem(&tap_rss_map_configurations, &key);
596
+ toe = bpf_map_lookup_elem(&tap_rss_map_toeplitz_key, &key);
597
+
598
+ if (config && toe) {
599
+ if (!config->redirect) {
600
+ return config->default_queue;
601
+ }
602
+
603
+ hash = calculate_rss_hash(skb, config, toe);
604
+ if (hash) {
605
+ __u32 table_idx = hash % config->indirections_len;
606
+ __u16 *queue = 0;
607
+
608
+ queue = bpf_map_lookup_elem(&tap_rss_map_indirection_table,
609
+ &table_idx);
610
+
611
+ if (queue) {
612
+ return *queue;
613
+ }
614
+ }
615
+
616
+ return config->default_queue;
617
+ }
618
+
619
+ return -1;
620
+}
621
+
622
+char _license[] SEC("license") = "GPL v2";
623
--
95
--
624
2.7.4
96
2.7.4
625
97
626
98
diff view generated by jsdifflib
1
From: Andrew Melnychenko <andrew@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
When RSS is enabled the device tries to load the eBPF program
3
Initial version of shadow virtqueue that actually forward buffers. There
4
to select RX virtqueue in the TUN. If eBPF can be loaded
4
is no iommu support at the moment, and that will be addressed in future
5
the RSS will function also with vhost (works with kernel 5.8 and later).
5
patches of this series. Since all vhost-vdpa devices use forced IOMMU,
6
Software RSS is used as a fallback with vhost=off when eBPF can't be loaded
6
this means that SVQ is not usable at this point of the series on any
7
or when hash population requested by the guest.
7
device.
8
8
9
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
9
For simplicity it only supports modern devices, that expects vring
10
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
10
in little endian, with split ring and no event idx or indirect
11
descriptors. Support for them will not be added in this series.
12
13
It reuses the VirtQueue code for the device part. The driver part is
14
based on Linux's virtio_ring driver, but with stripped functionality
15
and optimizations so it's easier to review.
16
17
However, forwarding buffers have some particular pieces: One of the most
18
unexpected ones is that a guest's buffer can expand through more than
19
one descriptor in SVQ. While this is handled gracefully by qemu's
20
emulated virtio devices, it may cause unexpected SVQ queue full. This
21
patch also solves it by checking for this condition at both guest's
22
kicks and device's calls. The code may be more elegant in the future if
23
SVQ code runs in its own iocontext.
24
25
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
26
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
27
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
28
---
13
hw/net/vhost_net.c | 3 ++
29
hw/virtio/vhost-shadow-virtqueue.c | 352 ++++++++++++++++++++++++++++++++++++-
14
hw/net/virtio-net.c | 116 +++++++++++++++++++++++++++++++++++++++--
30
hw/virtio/vhost-shadow-virtqueue.h | 26 +++
15
include/hw/virtio/virtio-net.h | 4 ++
31
hw/virtio/vhost-vdpa.c | 155 +++++++++++++++-
16
net/vhost-vdpa.c | 2 +
32
3 files changed, 522 insertions(+), 11 deletions(-)
17
4 files changed, 122 insertions(+), 3 deletions(-)
33
18
34
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
19
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
20
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/net/vhost_net.c
36
--- a/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/hw/net/vhost_net.c
37
+++ b/hw/virtio/vhost-shadow-virtqueue.c
23
@@ -XXX,XX +XXX,XX @@ static const int kernel_feature_bits[] = {
38
@@ -XXX,XX +XXX,XX @@
24
VIRTIO_NET_F_MTU,
39
#include "qemu/error-report.h"
25
VIRTIO_F_IOMMU_PLATFORM,
40
#include "qapi/error.h"
26
VIRTIO_F_RING_PACKED,
41
#include "qemu/main-loop.h"
27
+ VIRTIO_NET_F_HASH_REPORT,
42
+#include "qemu/log.h"
28
VHOST_INVALID_FEATURE_BIT
43
+#include "qemu/memalign.h"
29
};
44
#include "linux-headers/linux/vhost.h"
30
45
31
@@ -XXX,XX +XXX,XX @@ static const int user_feature_bits[] = {
46
/**
32
VIRTIO_NET_F_MTU,
47
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
33
VIRTIO_F_IOMMU_PLATFORM,
48
}
34
VIRTIO_F_RING_PACKED,
49
35
+ VIRTIO_NET_F_RSS,
50
/**
36
+ VIRTIO_NET_F_HASH_REPORT,
51
- * Forward guest notifications.
37
52
+ * Number of descriptors that the SVQ can make available from the guest.
38
/* This bit implies RARP isn't sent by QEMU out of band */
53
+ *
39
VIRTIO_NET_F_GUEST_ANNOUNCE,
54
+ * @svq: The svq
40
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
55
+ */
41
index XXXXXXX..XXXXXXX 100644
56
+static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
42
--- a/hw/net/virtio-net.c
57
+{
43
+++ b/hw/net/virtio-net.c
58
+ return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
44
@@ -XXX,XX +XXX,XX @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
59
+}
45
return features;
60
+
46
}
61
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
47
62
+ const struct iovec *iovec, size_t num,
48
- virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
63
+ bool more_descs, bool write)
49
- virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
64
+{
50
+ if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
65
+ uint16_t i = svq->free_head, last = svq->free_head;
51
+ virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
66
+ unsigned n;
52
+ }
67
+ uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
53
features = vhost_net_get_features(get_vhost_net(nc->peer), features);
68
+ vring_desc_t *descs = svq->vring.desc;
54
vdev->backend_features = features;
69
+
55
70
+ if (num == 0) {
56
@@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
71
+ return;
72
+ }
73
+
74
+ for (n = 0; n < num; n++) {
75
+ if (more_descs || (n + 1 < num)) {
76
+ descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
77
+ } else {
78
+ descs[i].flags = flags;
79
+ }
80
+ descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
81
+ descs[i].len = cpu_to_le32(iovec[n].iov_len);
82
+
83
+ last = i;
84
+ i = cpu_to_le16(descs[i].next);
85
+ }
86
+
87
+ svq->free_head = le16_to_cpu(descs[last].next);
88
+}
89
+
90
+static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
91
+ VirtQueueElement *elem, unsigned *head)
92
+{
93
+ unsigned avail_idx;
94
+ vring_avail_t *avail = svq->vring.avail;
95
+
96
+ *head = svq->free_head;
97
+
98
+ /* We need some descriptors here */
99
+ if (unlikely(!elem->out_num && !elem->in_num)) {
100
+ qemu_log_mask(LOG_GUEST_ERROR,
101
+ "Guest provided element with no descriptors");
102
+ return false;
103
+ }
104
+
105
+ vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
106
+ false);
107
+ vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
108
+
109
+ /*
110
+ * Put the entry in the available array (but don't update avail->idx until
111
+ * they do sync).
112
+ */
113
+ avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
114
+ avail->ring[avail_idx] = cpu_to_le16(*head);
115
+ svq->shadow_avail_idx++;
116
+
117
+ /* Update the avail index after write the descriptor */
118
+ smp_wmb();
119
+ avail->idx = cpu_to_le16(svq->shadow_avail_idx);
120
+
121
+ return true;
122
+}
123
+
124
+static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
125
+{
126
+ unsigned qemu_head;
127
+ bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
128
+ if (unlikely(!ok)) {
129
+ return false;
130
+ }
131
+
132
+ svq->ring_id_maps[qemu_head] = elem;
133
+ return true;
134
+}
135
+
136
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
137
+{
138
+ /*
139
+ * We need to expose the available array entries before checking the used
140
+ * flags
141
+ */
142
+ smp_mb();
143
+ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
144
+ return;
145
+ }
146
+
147
+ event_notifier_set(&svq->hdev_kick);
148
+}
149
+
150
+/**
151
+ * Forward available buffers.
152
+ *
153
+ * @svq: Shadow VirtQueue
154
+ *
155
+ * Note that this function does not guarantee that all guest's available
156
+ * buffers are available to the device in SVQ avail ring. The guest may have
157
+ * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
158
+ * qemu vaddr.
159
+ *
160
+ * If that happens, guest's kick notifications will be disabled until the
161
+ * device uses some buffers.
162
+ */
163
+static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
164
+{
165
+ /* Clear event notifier */
166
+ event_notifier_test_and_clear(&svq->svq_kick);
167
+
168
+ /* Forward to the device as many available buffers as possible */
169
+ do {
170
+ virtio_queue_set_notification(svq->vq, false);
171
+
172
+ while (true) {
173
+ VirtQueueElement *elem;
174
+ bool ok;
175
+
176
+ if (svq->next_guest_avail_elem) {
177
+ elem = g_steal_pointer(&svq->next_guest_avail_elem);
178
+ } else {
179
+ elem = virtqueue_pop(svq->vq, sizeof(*elem));
180
+ }
181
+
182
+ if (!elem) {
183
+ break;
184
+ }
185
+
186
+ if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) {
187
+ /*
188
+ * This condition is possible since a contiguous buffer in GPA
189
+ * does not imply a contiguous buffer in qemu's VA
190
+ * scatter-gather segments. If that happens, the buffer exposed
191
+ * to the device needs to be a chain of descriptors at this
192
+ * moment.
193
+ *
194
+ * SVQ cannot hold more available buffers if we are here:
195
+ * queue the current guest descriptor and ignore further kicks
196
+ * until some elements are used.
197
+ */
198
+ svq->next_guest_avail_elem = elem;
199
+ return;
200
+ }
201
+
202
+ ok = vhost_svq_add(svq, elem);
203
+ if (unlikely(!ok)) {
204
+ /* VQ is broken, just return and ignore any other kicks */
205
+ return;
206
+ }
207
+ vhost_svq_kick(svq);
208
+ }
209
+
210
+ virtio_queue_set_notification(svq->vq, true);
211
+ } while (!virtio_queue_empty(svq->vq));
212
+}
213
+
214
+/**
215
+ * Handle guest's kick.
216
*
217
* @n: guest kick event notifier, the one that guest set to notify svq.
218
*/
219
-static void vhost_handle_guest_kick(EventNotifier *n)
220
+static void vhost_handle_guest_kick_notifier(EventNotifier *n)
221
{
222
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
223
event_notifier_test_and_clear(n);
224
- event_notifier_set(&svq->hdev_kick);
225
+ vhost_handle_guest_kick(svq);
226
+}
227
+
228
+static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
229
+{
230
+ if (svq->last_used_idx != svq->shadow_used_idx) {
231
+ return true;
232
+ }
233
+
234
+ svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
235
+
236
+ return svq->last_used_idx != svq->shadow_used_idx;
237
}
238
239
/**
240
- * Forward vhost notifications
241
+ * Enable vhost device calls after disable them.
242
+ *
243
+ * @svq: The svq
244
+ *
245
+ * It returns false if there are pending used buffers from the vhost device,
246
+ * avoiding the possible races between SVQ checking for more work and enabling
247
+ * callbacks. True if SVQ used vring has no more pending buffers.
248
+ */
249
+static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
250
+{
251
+ svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
252
+ /* Make sure the flag is written before the read of used_idx */
253
+ smp_mb();
254
+ return !vhost_svq_more_used(svq);
255
+}
256
+
257
+static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
258
+{
259
+ svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
260
+}
261
+
262
+static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
263
+ uint32_t *len)
264
+{
265
+ vring_desc_t *descs = svq->vring.desc;
266
+ const vring_used_t *used = svq->vring.used;
267
+ vring_used_elem_t used_elem;
268
+ uint16_t last_used;
269
+
270
+ if (!vhost_svq_more_used(svq)) {
271
+ return NULL;
272
+ }
273
+
274
+ /* Only get used array entries after they have been exposed by dev */
275
+ smp_rmb();
276
+ last_used = svq->last_used_idx & (svq->vring.num - 1);
277
+ used_elem.id = le32_to_cpu(used->ring[last_used].id);
278
+ used_elem.len = le32_to_cpu(used->ring[last_used].len);
279
+
280
+ svq->last_used_idx++;
281
+ if (unlikely(used_elem.id >= svq->vring.num)) {
282
+ qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
283
+ svq->vdev->name, used_elem.id);
284
+ return NULL;
285
+ }
286
+
287
+ if (unlikely(!svq->ring_id_maps[used_elem.id])) {
288
+ qemu_log_mask(LOG_GUEST_ERROR,
289
+ "Device %s says index %u is used, but it was not available",
290
+ svq->vdev->name, used_elem.id);
291
+ return NULL;
292
+ }
293
+
294
+ descs[used_elem.id].next = svq->free_head;
295
+ svq->free_head = used_elem.id;
296
+
297
+ *len = used_elem.len;
298
+ return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
299
+}
300
+
301
+static void vhost_svq_flush(VhostShadowVirtqueue *svq,
302
+ bool check_for_avail_queue)
303
+{
304
+ VirtQueue *vq = svq->vq;
305
+
306
+ /* Forward as many used buffers as possible. */
307
+ do {
308
+ unsigned i = 0;
309
+
310
+ vhost_svq_disable_notification(svq);
311
+ while (true) {
312
+ uint32_t len;
313
+ g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
314
+ if (!elem) {
315
+ break;
316
+ }
317
+
318
+ if (unlikely(i >= svq->vring.num)) {
319
+ qemu_log_mask(LOG_GUEST_ERROR,
320
+ "More than %u used buffers obtained in a %u size SVQ",
321
+ i, svq->vring.num);
322
+ virtqueue_fill(vq, elem, len, i);
323
+ virtqueue_flush(vq, i);
324
+ return;
325
+ }
326
+ virtqueue_fill(vq, elem, len, i++);
327
+ }
328
+
329
+ virtqueue_flush(vq, i);
330
+ event_notifier_set(&svq->svq_call);
331
+
332
+ if (check_for_avail_queue && svq->next_guest_avail_elem) {
333
+ /*
334
+ * Avail ring was full when vhost_svq_flush was called, so it's a
335
+ * good moment to make more descriptors available if possible.
336
+ */
337
+ vhost_handle_guest_kick(svq);
338
+ }
339
+ } while (!vhost_svq_enable_notification(svq));
340
+}
341
+
342
+/**
343
+ * Forward used buffers.
344
*
345
* @n: hdev call event notifier, the one that device set to notify svq.
346
+ *
347
+ * Note that we are not making any buffers available in the loop, there is no
348
+ * way that it runs more than virtqueue size times.
349
*/
350
static void vhost_svq_handle_call(EventNotifier *n)
351
{
352
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
353
hdev_call);
354
event_notifier_test_and_clear(n);
355
- event_notifier_set(&svq->svq_call);
356
+ vhost_svq_flush(svq, true);
357
}
358
359
/**
360
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
361
if (poll_start) {
362
event_notifier_init_fd(svq_kick, svq_kick_fd);
363
event_notifier_set(svq_kick);
364
- event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
365
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
366
+ }
367
+}
368
+
369
+/**
370
+ * Start the shadow virtqueue operation.
371
+ *
372
+ * @svq: Shadow Virtqueue
373
+ * @vdev: VirtIO device
374
+ * @vq: Virtqueue to shadow
375
+ */
376
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
377
+ VirtQueue *vq)
378
+{
379
+ size_t desc_size, driver_size, device_size;
380
+
381
+ svq->next_guest_avail_elem = NULL;
382
+ svq->shadow_avail_idx = 0;
383
+ svq->shadow_used_idx = 0;
384
+ svq->last_used_idx = 0;
385
+ svq->vdev = vdev;
386
+ svq->vq = vq;
387
+
388
+ svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
389
+ driver_size = vhost_svq_driver_area_size(svq);
390
+ device_size = vhost_svq_device_area_size(svq);
391
+ svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size);
392
+ desc_size = sizeof(vring_desc_t) * svq->vring.num;
393
+ svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
394
+ memset(svq->vring.desc, 0, driver_size);
395
+ svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
396
+ memset(svq->vring.used, 0, device_size);
397
+ svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
398
+ for (unsigned i = 0; i < svq->vring.num - 1; i++) {
399
+ svq->vring.desc[i].next = cpu_to_le16(i + 1);
57
}
400
}
58
}
401
}
59
402
60
+static void virtio_net_detach_epbf_rss(VirtIONet *n);
403
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
61
+
404
void vhost_svq_stop(VhostShadowVirtqueue *svq)
62
static void virtio_net_disable_rss(VirtIONet *n)
63
{
405
{
64
if (n->rss_data.enabled) {
406
event_notifier_set_handler(&svq->svq_kick, NULL);
65
trace_virtio_net_rss_disable();
407
+ g_autofree VirtQueueElement *next_avail_elem = NULL;
408
+
409
+ if (!svq->vq) {
410
+ return;
411
+ }
412
+
413
+ /* Send all pending used descriptors to guest */
414
+ vhost_svq_flush(svq, false);
415
+
416
+ for (unsigned i = 0; i < svq->vring.num; ++i) {
417
+ g_autofree VirtQueueElement *elem = NULL;
418
+ elem = g_steal_pointer(&svq->ring_id_maps[i]);
419
+ if (elem) {
420
+ virtqueue_detach_element(svq->vq, elem, 0);
421
+ }
422
+ }
423
+
424
+ next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
425
+ if (next_avail_elem) {
426
+ virtqueue_detach_element(svq->vq, next_avail_elem, 0);
427
+ }
428
+ svq->vq = NULL;
429
+ g_free(svq->ring_id_maps);
430
+ qemu_vfree(svq->vring.desc);
431
+ qemu_vfree(svq->vring.used);
432
}
433
434
/**
435
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
436
index XXXXXXX..XXXXXXX 100644
437
--- a/hw/virtio/vhost-shadow-virtqueue.h
438
+++ b/hw/virtio/vhost-shadow-virtqueue.h
439
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
440
441
/* Guest's call notifier, where the SVQ calls guest. */
442
EventNotifier svq_call;
443
+
444
+ /* Virtio queue shadowing */
445
+ VirtQueue *vq;
446
+
447
+ /* Virtio device */
448
+ VirtIODevice *vdev;
449
+
450
+ /* Map for use the guest's descriptors */
451
+ VirtQueueElement **ring_id_maps;
452
+
453
+ /* Next VirtQueue element that guest made available */
454
+ VirtQueueElement *next_guest_avail_elem;
455
+
456
+ /* Next head to expose to the device */
457
+ uint16_t shadow_avail_idx;
458
+
459
+ /* Next free descriptor */
460
+ uint16_t free_head;
461
+
462
+ /* Last seen used idx */
463
+ uint16_t shadow_used_idx;
464
+
465
+ /* Next head to consume from the device */
466
+ uint16_t last_used_idx;
467
} VhostShadowVirtqueue;
468
469
bool vhost_svq_valid_features(uint64_t features, Error **errp);
470
@@ -XXX,XX +XXX,XX @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
471
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
472
size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
473
474
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
475
+ VirtQueue *vq);
476
void vhost_svq_stop(VhostShadowVirtqueue *svq);
477
478
VhostShadowVirtqueue *vhost_svq_new(void);
479
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
480
index XXXXXXX..XXXXXXX 100644
481
--- a/hw/virtio/vhost-vdpa.c
482
+++ b/hw/virtio/vhost-vdpa.c
483
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
484
* Note that this function does not rewind kick file descriptor if cannot set
485
* call one.
486
*/
487
-static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
488
- VhostShadowVirtqueue *svq, unsigned idx,
489
- Error **errp)
490
+static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
491
+ VhostShadowVirtqueue *svq, unsigned idx,
492
+ Error **errp)
493
{
494
struct vhost_vring_file file = {
495
.index = dev->vq_index + idx,
496
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
497
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
498
if (unlikely(r != 0)) {
499
error_setg_errno(errp, -r, "Can't set device kick fd");
500
- return false;
501
+ return r;
66
}
502
}
67
n->rss_data.enabled = false;
503
68
+
504
event_notifier = &svq->hdev_call;
69
+ virtio_net_detach_epbf_rss(n);
505
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
70
+}
506
error_setg_errno(errp, -r, "Can't set device call fd");
71
+
507
}
72
+static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
508
73
+{
509
+ return r;
74
+ NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
510
+}
75
+ if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
511
+
512
+/**
513
+ * Unmap a SVQ area in the device
514
+ */
515
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
516
+ hwaddr size)
517
+{
518
+ int r;
519
+
520
+ size = ROUND_UP(size, qemu_real_host_page_size);
521
+ r = vhost_vdpa_dma_unmap(v, iova, size);
522
+ return r == 0;
523
+}
524
+
525
+static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
526
+ const VhostShadowVirtqueue *svq)
527
+{
528
+ struct vhost_vdpa *v = dev->opaque;
529
+ struct vhost_vring_addr svq_addr;
530
+ size_t device_size = vhost_svq_device_area_size(svq);
531
+ size_t driver_size = vhost_svq_driver_area_size(svq);
532
+ bool ok;
533
+
534
+ vhost_svq_get_vring_addr(svq, &svq_addr);
535
+
536
+ ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
537
+ if (unlikely(!ok)) {
76
+ return false;
538
+ return false;
77
+ }
539
+ }
78
+
540
+
79
+ return nc->info->set_steering_ebpf(nc, prog_fd);
541
+ return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
80
+}
542
+}
81
+
543
+
82
+static void rss_data_to_rss_config(struct VirtioNetRssData *data,
544
+/**
83
+ struct EBPFRSSConfig *config)
545
+ * Map the shadow virtqueue rings in the device
84
+{
546
+ *
85
+ config->redirect = data->redirect;
547
+ * @dev: The vhost device
86
+ config->populate_hash = data->populate_hash;
548
+ * @svq: The shadow virtqueue
87
+ config->hash_types = data->hash_types;
549
+ * @addr: Assigned IOVA addresses
88
+ config->indirections_len = data->indirections_len;
550
+ * @errp: Error pointer
89
+ config->default_queue = data->default_queue;
551
+ */
90
+}
552
+static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
91
+
553
+ const VhostShadowVirtqueue *svq,
92
+static bool virtio_net_attach_epbf_rss(VirtIONet *n)
554
+ struct vhost_vring_addr *addr,
93
+{
555
+ Error **errp)
94
+ struct EBPFRSSConfig config = {};
556
+{
95
+
557
+ struct vhost_vdpa *v = dev->opaque;
96
+ if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
558
+ size_t device_size = vhost_svq_device_area_size(svq);
559
+ size_t driver_size = vhost_svq_driver_area_size(svq);
560
+ int r;
561
+
562
+ ERRP_GUARD();
563
+ vhost_svq_get_vring_addr(svq, addr);
564
+
565
+ r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
566
+ (void *)(uintptr_t)addr->desc_user_addr, true);
567
+ if (unlikely(r != 0)) {
568
+ error_setg_errno(errp, -r, "Cannot create vq driver region: ");
97
+ return false;
569
+ return false;
98
+ }
570
+ }
99
+
571
+
100
+ rss_data_to_rss_config(&n->rss_data, &config);
572
+ r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
101
+
573
+ (void *)(intptr_t)addr->used_user_addr, false);
102
+ if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
574
+ if (unlikely(r != 0)) {
103
+ n->rss_data.indirections_table, n->rss_data.key)) {
575
+ error_setg_errno(errp, -r, "Cannot create vq device region: ");
576
+ }
577
+
578
+ return r == 0;
579
+}
580
+
581
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
582
+ VhostShadowVirtqueue *svq, unsigned idx,
583
+ Error **errp)
584
+{
585
+ uint16_t vq_index = dev->vq_index + idx;
586
+ struct vhost_vring_state s = {
587
+ .index = vq_index,
588
+ };
589
+ int r;
590
+
591
+ r = vhost_vdpa_set_dev_vring_base(dev, &s);
592
+ if (unlikely(r)) {
593
+ error_setg_errno(errp, -r, "Cannot set vring base");
104
+ return false;
594
+ return false;
105
+ }
595
+ }
106
+
596
+
107
+ if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
597
+ r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp);
108
+ return false;
598
return r == 0;
599
}
600
601
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
602
}
603
604
for (i = 0; i < v->shadow_vqs->len; ++i) {
605
+ VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
606
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
607
+ struct vhost_vring_addr addr = {
608
+ .index = i,
609
+ };
610
+ int r;
611
bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
612
if (unlikely(!ok)) {
613
- error_reportf_err(err, "Cannot setup SVQ %u: ", i);
614
+ goto err;
615
+ }
616
+
617
+ vhost_svq_start(svq, dev->vdev, vq);
618
+ ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err);
619
+ if (unlikely(!ok)) {
620
+ goto err_map;
621
+ }
622
+
623
+ /* Override vring GPA set by vhost subsystem */
624
+ r = vhost_vdpa_set_vring_dev_addr(dev, &addr);
625
+ if (unlikely(r != 0)) {
626
+ error_setg_errno(&err, -r, "Cannot set device address");
627
+ goto err_set_addr;
628
+ }
109
+ }
629
+ }
110
+
630
+
111
+ return true;
631
+ return true;
112
+}
632
+
113
+
633
+err_set_addr:
114
+static void virtio_net_detach_epbf_rss(VirtIONet *n)
634
+ vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i));
115
+{
635
+
116
+ virtio_net_attach_ebpf_to_backend(n->nic, -1);
636
+err_map:
117
+}
637
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
118
+
638
+
119
+static bool virtio_net_load_ebpf(VirtIONet *n)
639
+err:
120
+{
640
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
121
+ if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
641
+ for (unsigned j = 0; j < i; ++j) {
122
+ /* backend does't support steering ebpf */
642
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
123
+ return false;
643
+ vhost_vdpa_svq_unmap_rings(dev, svq);
124
+ }
644
+ vhost_svq_stop(svq);
125
+
645
+ }
126
+ return ebpf_rss_load(&n->ebpf_rss);
646
+
127
+}
647
+ return false;
128
+
648
+}
129
+static void virtio_net_unload_ebpf(VirtIONet *n)
649
+
130
+{
650
+static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
131
+ virtio_net_attach_ebpf_to_backend(n->nic, -1);
651
+{
132
+ ebpf_rss_unload(&n->ebpf_rss);
652
+ struct vhost_vdpa *v = dev->opaque;
133
}
653
+
134
654
+ if (!v->shadow_vqs) {
135
static uint16_t virtio_net_handle_rss(VirtIONet *n,
655
+ return true;
136
@@ -XXX,XX +XXX,XX @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
656
+ }
137
goto error;
657
+
658
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
659
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
660
+ bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
661
+ if (unlikely(!ok)) {
662
return false;
663
}
138
}
664
}
139
n->rss_data.enabled = true;
665
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
140
+
666
}
141
+ if (!n->rss_data.populate_hash) {
667
vhost_vdpa_set_vring_ready(dev);
142
+ if (!virtio_net_attach_epbf_rss(n)) {
668
} else {
143
+ /* EBPF must be loaded for vhost */
669
+ ok = vhost_vdpa_svqs_stop(dev);
144
+ if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
670
+ if (unlikely(!ok)) {
145
+ warn_report("Can't load eBPF RSS for vhost");
671
+ return -1;
146
+ goto error;
672
+ }
147
+ }
673
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
148
+ /* fallback to software RSS */
149
+ warn_report("Can't load eBPF RSS - fallback to software RSS");
150
+ n->rss_data.enabled_software_rss = true;
151
+ }
152
+ } else {
153
+ /* use software RSS for hash populating */
154
+ /* and detach eBPF if was loaded before */
155
+ virtio_net_detach_epbf_rss(n);
156
+ n->rss_data.enabled_software_rss = true;
157
+ }
158
+
159
trace_virtio_net_rss_enable(n->rss_data.hash_types,
160
n->rss_data.indirections_len,
161
temp.b);
162
@@ -XXX,XX +XXX,XX @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
163
return -1;
164
}
674
}
165
675
166
- if (!no_rss && n->rss_data.enabled) {
167
+ if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
168
int index = virtio_net_process_rss(nc, buf, size);
169
if (index >= 0) {
170
NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
171
@@ -XXX,XX +XXX,XX @@ static int virtio_net_post_load_device(void *opaque, int version_id)
172
}
173
174
if (n->rss_data.enabled) {
175
+ n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
176
+ if (!n->rss_data.populate_hash) {
177
+ if (!virtio_net_attach_epbf_rss(n)) {
178
+ if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
179
+ warn_report("Can't post-load eBPF RSS for vhost");
180
+ } else {
181
+ warn_report("Can't post-load eBPF RSS - "
182
+ "fallback to software RSS");
183
+ n->rss_data.enabled_software_rss = true;
184
+ }
185
+ }
186
+ }
187
+
188
trace_virtio_net_rss_enable(n->rss_data.hash_types,
189
n->rss_data.indirections_len,
190
sizeof(n->rss_data.key));
191
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
192
n->qdev = dev;
193
194
net_rx_pkt_init(&n->rx_pkt, false);
195
+
196
+ if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
197
+ virtio_net_load_ebpf(n);
198
+ }
199
}
200
201
static void virtio_net_device_unrealize(DeviceState *dev)
202
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_unrealize(DeviceState *dev)
203
VirtIONet *n = VIRTIO_NET(dev);
204
int i, max_queues;
205
206
+ if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
207
+ virtio_net_unload_ebpf(n);
208
+ }
209
+
210
/* This will stop vhost backend if appropriate. */
211
virtio_net_set_status(vdev, 0);
212
213
@@ -XXX,XX +XXX,XX @@ static void virtio_net_instance_init(Object *obj)
214
device_add_bootindex_property(obj, &n->nic_conf.bootindex,
215
"bootindex", "/ethernet-phy@0",
216
DEVICE(n));
217
+
218
+ ebpf_rss_init(&n->ebpf_rss);
219
}
220
221
static int virtio_net_pre_save(void *opaque)
222
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
223
index XXXXXXX..XXXXXXX 100644
224
--- a/include/hw/virtio/virtio-net.h
225
+++ b/include/hw/virtio/virtio-net.h
226
@@ -XXX,XX +XXX,XX @@
227
#include "qemu/option_int.h"
228
#include "qom/object.h"
229
230
+#include "ebpf/ebpf_rss.h"
231
+
232
#define TYPE_VIRTIO_NET "virtio-net-device"
233
OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
234
235
@@ -XXX,XX +XXX,XX @@ typedef struct VirtioNetRscChain {
236
237
typedef struct VirtioNetRssData {
238
bool enabled;
239
+ bool enabled_software_rss;
240
bool redirect;
241
bool populate_hash;
242
uint32_t hash_types;
243
@@ -XXX,XX +XXX,XX @@ struct VirtIONet {
244
Notifier migration_state;
245
VirtioNetRssData rss_data;
246
struct NetRxPkt *rx_pkt;
247
+ struct EBPFRSSContext ebpf_rss;
248
};
249
250
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
251
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
252
index XXXXXXX..XXXXXXX 100644
253
--- a/net/vhost-vdpa.c
254
+++ b/net/vhost-vdpa.c
255
@@ -XXX,XX +XXX,XX @@ const int vdpa_feature_bits[] = {
256
VIRTIO_NET_F_MTU,
257
VIRTIO_F_IOMMU_PLATFORM,
258
VIRTIO_F_RING_PACKED,
259
+ VIRTIO_NET_F_RSS,
260
+ VIRTIO_NET_F_HASH_REPORT,
261
VIRTIO_NET_F_GUEST_ANNOUNCE,
262
VIRTIO_NET_F_STATUS,
263
VHOST_INVALID_FEATURE_BIT
264
--
676
--
265
2.7.4
677
2.7.4
266
678
267
679
diff view generated by jsdifflib
New patch
1
1
From: Eugenio Pérez <eperezma@redhat.com>
2
3
This iova tree function allows it to look for a hole in allocated
4
regions and return a totally new translation for a given translated
5
address.
6
7
It's usage is mainly to allow devices to access qemu address space,
8
remapping guest's one into a new iova space where qemu can add chunks of
9
addresses.
10
11
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
12
Reviewed-by: Peter Xu <peterx@redhat.com>
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
include/qemu/iova-tree.h | 18 +++++++
17
util/iova-tree.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++
18
2 files changed, 154 insertions(+)
19
20
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/qemu/iova-tree.h
23
+++ b/include/qemu/iova-tree.h
24
@@ -XXX,XX +XXX,XX @@
25
#define IOVA_OK (0)
26
#define IOVA_ERR_INVALID (-1) /* Invalid parameters */
27
#define IOVA_ERR_OVERLAP (-2) /* IOVA range overlapped */
28
+#define IOVA_ERR_NOMEM (-3) /* Cannot allocate */
29
30
typedef struct IOVATree IOVATree;
31
typedef struct DMAMap {
32
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova);
33
void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator);
34
35
/**
36
+ * iova_tree_alloc_map:
37
+ *
38
+ * @tree: the iova tree to allocate from
39
+ * @map: the new map (as translated addr & size) to allocate in the iova region
40
+ * @iova_begin: the minimum address of the allocation
41
+ * @iova_end: the maximum addressable direction of the allocation
42
+ *
43
+ * Allocates a new region of a given size, between iova_min and iova_max.
44
+ *
45
+ * Return: Same as iova_tree_insert, but cannot overlap and can return error if
46
+ * iova tree is out of free contiguous range. The caller gets the assigned iova
47
+ * in map->iova.
48
+ */
49
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
50
+ hwaddr iova_end);
51
+
52
+/**
53
* iova_tree_destroy:
54
*
55
* @tree: the iova tree to destroy
56
diff --git a/util/iova-tree.c b/util/iova-tree.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/util/iova-tree.c
59
+++ b/util/iova-tree.c
60
@@ -XXX,XX +XXX,XX @@ struct IOVATree {
61
GTree *tree;
62
};
63
64
+/* Args to pass to iova_tree_alloc foreach function. */
65
+struct IOVATreeAllocArgs {
66
+ /* Size of the desired allocation */
67
+ size_t new_size;
68
+
69
+ /* The minimum address allowed in the allocation */
70
+ hwaddr iova_begin;
71
+
72
+ /* Map at the left of the hole, can be NULL if "this" is first one */
73
+ const DMAMap *prev;
74
+
75
+ /* Map at the right of the hole, can be NULL if "prev" is the last one */
76
+ const DMAMap *this;
77
+
78
+ /* If found, we fill in the IOVA here */
79
+ hwaddr iova_result;
80
+
81
+ /* Whether have we found a valid IOVA */
82
+ bool iova_found;
83
+};
84
+
85
+/**
86
+ * Iterate args to the next hole
87
+ *
88
+ * @args: The alloc arguments
89
+ * @next: The next mapping in the tree. Can be NULL to signal the last one
90
+ */
91
+static void iova_tree_alloc_args_iterate(struct IOVATreeAllocArgs *args,
92
+ const DMAMap *next)
93
+{
94
+ args->prev = args->this;
95
+ args->this = next;
96
+}
97
+
98
static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
99
{
100
const DMAMap *m1 = a, *m2 = b;
101
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map)
102
return IOVA_OK;
103
}
104
105
+/**
106
+ * Try to find an unallocated IOVA range between prev and this elements.
107
+ *
108
+ * @args: Arguments to allocation
109
+ *
110
+ * Cases:
111
+ *
112
+ * (1) !prev, !this: No entries allocated, always succeed
113
+ *
114
+ * (2) !prev, this: We're iterating at the 1st element.
115
+ *
116
+ * (3) prev, !this: We're iterating at the last element.
117
+ *
118
+ * (4) prev, this: this is the most common case, we'll try to find a hole
119
+ * between "prev" and "this" mapping.
120
+ *
121
+ * Note that this function assumes the last valid iova is HWADDR_MAX, but it
122
+ * searches linearly so it's easy to discard the result if it's not the case.
123
+ */
124
+static void iova_tree_alloc_map_in_hole(struct IOVATreeAllocArgs *args)
125
+{
126
+ const DMAMap *prev = args->prev, *this = args->this;
127
+ uint64_t hole_start, hole_last;
128
+
129
+ if (this && this->iova + this->size < args->iova_begin) {
130
+ return;
131
+ }
132
+
133
+ hole_start = MAX(prev ? prev->iova + prev->size + 1 : 0, args->iova_begin);
134
+ hole_last = this ? this->iova : HWADDR_MAX;
135
+
136
+ if (hole_last - hole_start > args->new_size) {
137
+ args->iova_result = hole_start;
138
+ args->iova_found = true;
139
+ }
140
+}
141
+
142
+/**
143
+ * Foreach dma node in the tree, compare if there is a hole with its previous
144
+ * node (or minimum iova address allowed) and the node.
145
+ *
146
+ * @key: Node iterating
147
+ * @value: Node iterating
148
+ * @pargs: Struct to communicate with the outside world
149
+ *
150
+ * Return: false to keep iterating, true if needs break.
151
+ */
152
+static gboolean iova_tree_alloc_traverse(gpointer key, gpointer value,
153
+ gpointer pargs)
154
+{
155
+ struct IOVATreeAllocArgs *args = pargs;
156
+ DMAMap *node = value;
157
+
158
+ assert(key == value);
159
+
160
+ iova_tree_alloc_args_iterate(args, node);
161
+ iova_tree_alloc_map_in_hole(args);
162
+ return args->iova_found;
163
+}
164
+
165
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
166
+ hwaddr iova_last)
167
+{
168
+ struct IOVATreeAllocArgs args = {
169
+ .new_size = map->size,
170
+ .iova_begin = iova_begin,
171
+ };
172
+
173
+ if (unlikely(iova_last < iova_begin)) {
174
+ return IOVA_ERR_INVALID;
175
+ }
176
+
177
+ /*
178
+ * Find a valid hole for the mapping
179
+ *
180
+ * Assuming low iova_begin, so no need to do a binary search to
181
+ * locate the first node.
182
+ *
183
+ * TODO: Replace all this with g_tree_node_first/next/last when available
184
+ * (from glib since 2.68). To do it with g_tree_foreach complicates the
185
+ * code a lot.
186
+ *
187
+ */
188
+ g_tree_foreach(tree->tree, iova_tree_alloc_traverse, &args);
189
+ if (!args.iova_found) {
190
+ /*
191
+ * Either tree is empty or the last hole is still not checked.
192
+ * g_tree_foreach does not compare (last, iova_last] range, so we check
193
+ * it here.
194
+ */
195
+ iova_tree_alloc_args_iterate(&args, NULL);
196
+ iova_tree_alloc_map_in_hole(&args);
197
+ }
198
+
199
+ if (!args.iova_found || args.iova_result + map->size > iova_last) {
200
+ return IOVA_ERR_NOMEM;
201
+ }
202
+
203
+ map->iova = args.iova_result;
204
+ return iova_tree_insert(tree, map);
205
+}
206
+
207
void iova_tree_destroy(IOVATree *tree)
208
{
209
g_tree_destroy(tree->tree);
210
--
211
2.7.4
212
213
diff view generated by jsdifflib
1
From: Andrew Melnychenko <andrew@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
For now, that method supported only by Linux TAP.
3
This function does the reverse operation of iova_tree_find: To look for
4
Linux TAP uses TUNSETSTEERINGEBPF ioctl.
4
a mapping that match a translated address so we can do the reverse.
5
5
6
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
6
This have linear complexity instead of logarithmic, but it supports
7
overlapping HVA. Future developments could reduce it.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
12
---
9
include/net/net.h | 2 ++
13
include/qemu/iova-tree.h | 20 +++++++++++++++++++-
10
net/tap-bsd.c | 5 +++++
14
util/iova-tree.c | 34 ++++++++++++++++++++++++++++++++++
11
net/tap-linux.c | 13 +++++++++++++
15
2 files changed, 53 insertions(+), 1 deletion(-)
12
net/tap-solaris.c | 5 +++++
13
net/tap-stub.c | 5 +++++
14
net/tap.c | 9 +++++++++
15
net/tap_int.h | 1 +
16
7 files changed, 40 insertions(+)
17
16
18
diff --git a/include/net/net.h b/include/net/net.h
17
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
19
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
20
--- a/include/net/net.h
19
--- a/include/qemu/iova-tree.h
21
+++ b/include/net/net.h
20
+++ b/include/qemu/iova-tree.h
22
@@ -XXX,XX +XXX,XX @@ typedef int (SetVnetBE)(NetClientState *, bool);
21
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
23
typedef struct SocketReadState SocketReadState;
22
* @tree: the iova tree to search from
24
typedef void (SocketReadStateFinalize)(SocketReadState *rs);
23
* @map: the mapping to search
25
typedef void (NetAnnounce)(NetClientState *);
24
*
26
+typedef bool (SetSteeringEBPF)(NetClientState *, int);
25
- * Search for a mapping in the iova tree that overlaps with the
27
26
+ * Search for a mapping in the iova tree that iova overlaps with the
28
typedef struct NetClientInfo {
27
* mapping range specified. Only the first found mapping will be
29
NetClientDriver type;
28
* returned.
30
@@ -XXX,XX +XXX,XX @@ typedef struct NetClientInfo {
29
*
31
SetVnetLE *set_vnet_le;
30
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
32
SetVnetBE *set_vnet_be;
31
const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map);
33
NetAnnounce *announce;
32
34
+ SetSteeringEBPF *set_steering_ebpf;
33
/**
35
} NetClientInfo;
34
+ * iova_tree_find_iova:
36
35
+ *
37
struct NetClientState {
36
+ * @tree: the iova tree to search from
38
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
37
+ * @map: the mapping to search
38
+ *
39
+ * Search for a mapping in the iova tree that translated_addr overlaps with the
40
+ * mapping range specified. Only the first found mapping will be
41
+ * returned.
42
+ *
43
+ * Return: DMAMap pointer if found, or NULL if not found. Note that
44
+ * the returned DMAMap pointer is maintained internally. User should
45
+ * only read the content but never modify or free the content. Also,
46
+ * user is responsible to make sure the pointer is valid (say, no
47
+ * concurrent deletion in progress).
48
+ */
49
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map);
50
+
51
+/**
52
* iova_tree_find_address:
53
*
54
* @tree: the iova tree to search from
55
diff --git a/util/iova-tree.c b/util/iova-tree.c
39
index XXXXXXX..XXXXXXX 100644
56
index XXXXXXX..XXXXXXX 100644
40
--- a/net/tap-bsd.c
57
--- a/util/iova-tree.c
41
+++ b/net/tap-bsd.c
58
+++ b/util/iova-tree.c
42
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
59
@@ -XXX,XX +XXX,XX @@ struct IOVATreeAllocArgs {
43
{
60
bool iova_found;
44
return -1;
61
};
62
63
+typedef struct IOVATreeFindIOVAArgs {
64
+ const DMAMap *needle;
65
+ const DMAMap *result;
66
+} IOVATreeFindIOVAArgs;
67
+
68
/**
69
* Iterate args to the next hole
70
*
71
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map)
72
return g_tree_lookup(tree->tree, map);
45
}
73
}
74
75
+static gboolean iova_tree_find_address_iterator(gpointer key, gpointer value,
76
+ gpointer data)
77
+{
78
+ const DMAMap *map = key;
79
+ IOVATreeFindIOVAArgs *args = data;
80
+ const DMAMap *needle;
46
+
81
+
47
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
82
+ g_assert(key == value);
48
+{
49
+ return -1;
50
+}
51
diff --git a/net/tap-linux.c b/net/tap-linux.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/net/tap-linux.c
54
+++ b/net/tap-linux.c
55
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
56
pstrcpy(ifname, sizeof(ifr.ifr_name), ifr.ifr_name);
57
return 0;
58
}
59
+
83
+
60
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
84
+ needle = args->needle;
61
+{
85
+ if (map->translated_addr + map->size < needle->translated_addr ||
62
+ if (ioctl(fd, TUNSETSTEERINGEBPF, (void *) &prog_fd) != 0) {
86
+ needle->translated_addr + needle->size < map->translated_addr) {
63
+ error_report("Issue while setting TUNSETSTEERINGEBPF:"
87
+ return false;
64
+ " %s with fd: %d, prog_fd: %d",
65
+ strerror(errno), fd, prog_fd);
66
+
67
+ return -1;
68
+ }
88
+ }
69
+
89
+
70
+ return 0;
90
+ args->result = map;
71
+}
91
+ return true;
72
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/net/tap-solaris.c
75
+++ b/net/tap-solaris.c
76
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
77
{
78
return -1;
79
}
80
+
81
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
82
+{
83
+ return -1;
84
+}
85
diff --git a/net/tap-stub.c b/net/tap-stub.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/net/tap-stub.c
88
+++ b/net/tap-stub.c
89
@@ -XXX,XX +XXX,XX @@ int tap_fd_get_ifname(int fd, char *ifname)
90
{
91
return -1;
92
}
93
+
94
+int tap_fd_set_steering_ebpf(int fd, int prog_fd)
95
+{
96
+ return -1;
97
+}
98
diff --git a/net/tap.c b/net/tap.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/net/tap.c
101
+++ b/net/tap.c
102
@@ -XXX,XX +XXX,XX @@ static void tap_poll(NetClientState *nc, bool enable)
103
tap_write_poll(s, enable);
104
}
105
106
+static bool tap_set_steering_ebpf(NetClientState *nc, int prog_fd)
107
+{
108
+ TAPState *s = DO_UPCAST(TAPState, nc, nc);
109
+ assert(nc->info->type == NET_CLIENT_DRIVER_TAP);
110
+
111
+ return tap_fd_set_steering_ebpf(s->fd, prog_fd) == 0;
112
+}
92
+}
113
+
93
+
114
int tap_get_fd(NetClientState *nc)
94
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map)
95
+{
96
+ IOVATreeFindIOVAArgs args = {
97
+ .needle = map,
98
+ };
99
+
100
+ g_tree_foreach(tree->tree, iova_tree_find_address_iterator, &args);
101
+ return args.result;
102
+}
103
+
104
const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova)
115
{
105
{
116
TAPState *s = DO_UPCAST(TAPState, nc, nc);
106
const DMAMap map = { .iova = iova, .size = 0 };
117
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_tap_info = {
118
.set_vnet_hdr_len = tap_set_vnet_hdr_len,
119
.set_vnet_le = tap_set_vnet_le,
120
.set_vnet_be = tap_set_vnet_be,
121
+ .set_steering_ebpf = tap_set_steering_ebpf,
122
};
123
124
static TAPState *net_tap_fd_init(NetClientState *peer,
125
diff --git a/net/tap_int.h b/net/tap_int.h
126
index XXXXXXX..XXXXXXX 100644
127
--- a/net/tap_int.h
128
+++ b/net/tap_int.h
129
@@ -XXX,XX +XXX,XX @@ int tap_fd_set_vnet_be(int fd, int vnet_is_be);
130
int tap_fd_enable(int fd);
131
int tap_fd_disable(int fd);
132
int tap_fd_get_ifname(int fd, char *ifname);
133
+int tap_fd_set_steering_ebpf(int fd, int prog_fd);
134
135
#endif /* NET_TAP_INT_H */
136
--
107
--
137
2.7.4
108
2.7.4
138
109
139
110
diff view generated by jsdifflib
1
From: Andrew Melnychenko <andrew@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Added function that loads RSS eBPF program.
3
This tree is able to look for a translated address from an IOVA address.
4
Added stub functions for RSS eBPF loader.
5
Added meson and configuration options.
6
4
7
By default, eBPF feature enabled if libbpf is present in the build system.
5
At first glance it is similar to util/iova-tree. However, SVQ working on
8
libbpf checked in configuration shell script and meson script.
6
devices with limited IOVA space need more capabilities, like allocating
7
IOVA chunks or performing reverse translations (qemu addresses to iova).
9
8
10
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
9
The allocation capability, as "assign a free IOVA address to this chunk
11
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
10
of memory in qemu's address space" allows shadow virtqueue to create a
11
new address space that is not restricted by guest's addressable one, so
12
we can allocate shadow vqs vrings outside of it.
13
14
It duplicates the tree so it can search efficiently in both directions,
15
and it will signal overlap if iova or the translated address is present
16
in any tree.
17
18
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
19
Acked-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
20
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
21
---
14
configure | 8 +-
22
hw/virtio/meson.build | 2 +-
15
ebpf/ebpf_rss-stub.c | 40 +++++
23
hw/virtio/vhost-iova-tree.c | 110 ++++++++++++++++++++++++++++++++++++++++++++
16
ebpf/ebpf_rss.c | 165 ++++++++++++++++++
24
hw/virtio/vhost-iova-tree.h | 27 +++++++++++
17
ebpf/ebpf_rss.h | 44 +++++
25
3 files changed, 138 insertions(+), 1 deletion(-)
18
ebpf/meson.build | 1 +
26
create mode 100644 hw/virtio/vhost-iova-tree.c
19
ebpf/rss.bpf.skeleton.h | 431 ++++++++++++++++++++++++++++++++++++++++++++++++
27
create mode 100644 hw/virtio/vhost-iova-tree.h
20
ebpf/trace-events | 4 +
21
ebpf/trace.h | 1 +
22
meson.build | 23 +++
23
meson_options.txt | 2 +
24
10 files changed, 718 insertions(+), 1 deletion(-)
25
create mode 100644 ebpf/ebpf_rss-stub.c
26
create mode 100644 ebpf/ebpf_rss.c
27
create mode 100644 ebpf/ebpf_rss.h
28
create mode 100644 ebpf/meson.build
29
create mode 100644 ebpf/rss.bpf.skeleton.h
30
create mode 100644 ebpf/trace-events
31
create mode 100644 ebpf/trace.h
32
28
33
diff --git a/configure b/configure
29
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
34
index XXXXXXX..XXXXXXX 100755
30
index XXXXXXX..XXXXXXX 100644
35
--- a/configure
31
--- a/hw/virtio/meson.build
36
+++ b/configure
32
+++ b/hw/virtio/meson.build
37
@@ -XXX,XX +XXX,XX @@ vhost_vsock="$default_feature"
33
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
38
vhost_user="no"
34
39
vhost_user_blk_server="auto"
35
virtio_ss = ss.source_set()
40
vhost_user_fs="$default_feature"
36
virtio_ss.add(files('virtio.c'))
41
+bpf="auto"
37
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
42
kvm="auto"
38
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c'))
43
hax="auto"
39
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
44
hvf="auto"
40
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
45
@@ -XXX,XX +XXX,XX @@ for opt do
41
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
46
;;
42
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
47
--enable-membarrier) membarrier="yes"
48
;;
49
+ --disable-bpf) bpf="disabled"
50
+ ;;
51
+ --enable-bpf) bpf="enabled"
52
+ ;;
53
--disable-blobs) blobs="false"
54
;;
55
--with-pkgversion=*) pkgversion="$optarg"
56
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available
57
vhost-user vhost-user backend support
58
vhost-user-blk-server vhost-user-blk server support
59
vhost-vdpa vhost-vdpa kernel backend support
60
+ bpf BPF kernel support
61
spice spice
62
spice-protocol spice-protocol
63
rbd rados block device (rbd)
64
@@ -XXX,XX +XXX,XX @@ if test "$skip_meson" = no; then
65
-Dattr=$attr -Ddefault_devices=$default_devices \
66
-Ddocs=$docs -Dsphinx_build=$sphinx_build -Dinstall_blobs=$blobs \
67
-Dvhost_user_blk_server=$vhost_user_blk_server -Dmultiprocess=$multiprocess \
68
- -Dfuse=$fuse -Dfuse_lseek=$fuse_lseek -Dguest_agent_msi=$guest_agent_msi \
69
+ -Dfuse=$fuse -Dfuse_lseek=$fuse_lseek -Dguest_agent_msi=$guest_agent_msi -Dbpf=$bpf\
70
$(if test "$default_features" = no; then echo "-Dauto_features=disabled"; fi) \
71
    -Dtcg_interpreter=$tcg_interpreter \
72
$cross_arg \
73
diff --git a/ebpf/ebpf_rss-stub.c b/ebpf/ebpf_rss-stub.c
74
new file mode 100644
43
new file mode 100644
75
index XXXXXXX..XXXXXXX
44
index XXXXXXX..XXXXXXX
76
--- /dev/null
45
--- /dev/null
77
+++ b/ebpf/ebpf_rss-stub.c
46
+++ b/hw/virtio/vhost-iova-tree.c
78
@@ -XXX,XX +XXX,XX @@
47
@@ -XXX,XX +XXX,XX @@
79
+/*
48
+/*
80
+ * eBPF RSS stub file
49
+ * vhost software live migration iova tree
81
+ *
50
+ *
82
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
51
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
52
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
83
+ *
53
+ *
84
+ * Authors:
54
+ * SPDX-License-Identifier: GPL-2.0-or-later
85
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
86
+ *
87
+ * This work is licensed under the terms of the GNU GPL, version 2. See
88
+ * the COPYING file in the top-level directory.
89
+ */
55
+ */
90
+
56
+
91
+#include "qemu/osdep.h"
57
+#include "qemu/osdep.h"
92
+#include "ebpf/ebpf_rss.h"
58
+#include "qemu/iova-tree.h"
59
+#include "vhost-iova-tree.h"
93
+
60
+
94
+void ebpf_rss_init(struct EBPFRSSContext *ctx)
61
+#define iova_min_addr qemu_real_host_page_size
62
+
63
+/**
64
+ * VhostIOVATree, able to:
65
+ * - Translate iova address
66
+ * - Reverse translate iova address (from translated to iova)
67
+ * - Allocate IOVA regions for translated range (linear operation)
68
+ */
69
+struct VhostIOVATree {
70
+ /* First addressable iova address in the device */
71
+ uint64_t iova_first;
72
+
73
+ /* Last addressable iova address in the device */
74
+ uint64_t iova_last;
75
+
76
+ /* IOVA address to qemu memory maps. */
77
+ IOVATree *iova_taddr_map;
78
+};
79
+
80
+/**
81
+ * Create a new IOVA tree
82
+ *
83
+ * Returns the new IOVA tree
84
+ */
85
+VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
95
+{
86
+{
87
+ VhostIOVATree *tree = g_new(VhostIOVATree, 1);
96
+
88
+
89
+ /* Some devices do not like 0 addresses */
90
+ tree->iova_first = MAX(iova_first, iova_min_addr);
91
+ tree->iova_last = iova_last;
92
+
93
+ tree->iova_taddr_map = iova_tree_new();
94
+ return tree;
97
+}
95
+}
98
+
96
+
99
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
97
+/**
98
+ * Delete an iova tree
99
+ */
100
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
100
+{
101
+{
101
+ return false;
102
+ iova_tree_destroy(iova_tree->iova_taddr_map);
103
+ g_free(iova_tree);
102
+}
104
+}
103
+
105
+
104
+bool ebpf_rss_load(struct EBPFRSSContext *ctx)
106
+/**
107
+ * Find the IOVA address stored from a memory address
108
+ *
109
+ * @tree: The iova tree
110
+ * @map: The map with the memory address
111
+ *
112
+ * Return the stored mapping, or NULL if not found.
113
+ */
114
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
115
+ const DMAMap *map)
105
+{
116
+{
106
+ return false;
117
+ return iova_tree_find_iova(tree->iova_taddr_map, map);
107
+}
118
+}
108
+
119
+
109
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
120
+/**
110
+ uint16_t *indirections_table, uint8_t *toeplitz_key)
121
+ * Allocate a new mapping
122
+ *
123
+ * @tree: The iova tree
124
+ * @map: The iova map
125
+ *
126
+ * Returns:
127
+ * - IOVA_OK if the map fits in the container
128
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
129
+ * - IOVA_ERR_NOMEM if tree cannot allocate more space.
130
+ *
131
+ * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
132
+ */
133
+int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
111
+{
134
+{
112
+ return false;
135
+ /* Some vhost devices do not like addr 0. Skip first page */
136
+ hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size;
137
+
138
+ if (map->translated_addr + map->size < map->translated_addr ||
139
+ map->perm == IOMMU_NONE) {
140
+ return IOVA_ERR_INVALID;
141
+ }
142
+
143
+ /* Allocate a node in IOVA address */
144
+ return iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
145
+ tree->iova_last);
113
+}
146
+}
114
+
147
+
115
+void ebpf_rss_unload(struct EBPFRSSContext *ctx)
148
+/**
149
+ * Remove existing mappings from iova tree
150
+ *
151
+ * @iova_tree: The vhost iova tree
152
+ * @map: The map to remove
153
+ */
154
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map)
116
+{
155
+{
117
+
156
+ iova_tree_remove(iova_tree->iova_taddr_map, map);
118
+}
157
+}
119
diff --git a/ebpf/ebpf_rss.c b/ebpf/ebpf_rss.c
158
diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
120
new file mode 100644
159
new file mode 100644
121
index XXXXXXX..XXXXXXX
160
index XXXXXXX..XXXXXXX
122
--- /dev/null
161
--- /dev/null
123
+++ b/ebpf/ebpf_rss.c
162
+++ b/hw/virtio/vhost-iova-tree.h
124
@@ -XXX,XX +XXX,XX @@
163
@@ -XXX,XX +XXX,XX @@
125
+/*
164
+/*
126
+ * eBPF RSS loader
165
+ * vhost software live migration iova tree
127
+ *
166
+ *
128
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
167
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
168
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
129
+ *
169
+ *
130
+ * Authors:
170
+ * SPDX-License-Identifier: GPL-2.0-or-later
131
+ * Andrew Melnychenko <andrew@daynix.com>
132
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
133
+ *
134
+ * This work is licensed under the terms of the GNU GPL, version 2. See
135
+ * the COPYING file in the top-level directory.
136
+ */
171
+ */
137
+
172
+
138
+#include "qemu/osdep.h"
173
+#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
139
+#include "qemu/error-report.h"
174
+#define HW_VIRTIO_VHOST_IOVA_TREE_H
140
+
175
+
141
+#include <bpf/libbpf.h>
176
+#include "qemu/iova-tree.h"
142
+#include <bpf/bpf.h>
177
+#include "exec/memory.h"
143
+
178
+
144
+#include "hw/virtio/virtio-net.h" /* VIRTIO_NET_RSS_MAX_TABLE_LEN */
179
+typedef struct VhostIOVATree VhostIOVATree;
145
+
180
+
146
+#include "ebpf/ebpf_rss.h"
181
+VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
147
+#include "ebpf/rss.bpf.skeleton.h"
182
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
148
+#include "trace.h"
183
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
149
+
184
+
150
+void ebpf_rss_init(struct EBPFRSSContext *ctx)
185
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
151
+{
186
+ const DMAMap *map);
152
+ if (ctx != NULL) {
187
+int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
153
+ ctx->obj = NULL;
188
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
154
+ }
155
+}
156
+
189
+
157
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
190
+#endif
158
+{
159
+ return ctx != NULL && ctx->obj != NULL;
160
+}
161
+
162
+bool ebpf_rss_load(struct EBPFRSSContext *ctx)
163
+{
164
+ struct rss_bpf *rss_bpf_ctx;
165
+
166
+ if (ctx == NULL) {
167
+ return false;
168
+ }
169
+
170
+ rss_bpf_ctx = rss_bpf__open();
171
+ if (rss_bpf_ctx == NULL) {
172
+ trace_ebpf_error("eBPF RSS", "can not open eBPF RSS object");
173
+ goto error;
174
+ }
175
+
176
+ bpf_program__set_socket_filter(rss_bpf_ctx->progs.tun_rss_steering_prog);
177
+
178
+ if (rss_bpf__load(rss_bpf_ctx)) {
179
+ trace_ebpf_error("eBPF RSS", "can not load RSS program");
180
+ goto error;
181
+ }
182
+
183
+ ctx->obj = rss_bpf_ctx;
184
+ ctx->program_fd = bpf_program__fd(
185
+ rss_bpf_ctx->progs.tun_rss_steering_prog);
186
+ ctx->map_configuration = bpf_map__fd(
187
+ rss_bpf_ctx->maps.tap_rss_map_configurations);
188
+ ctx->map_indirections_table = bpf_map__fd(
189
+ rss_bpf_ctx->maps.tap_rss_map_indirection_table);
190
+ ctx->map_toeplitz_key = bpf_map__fd(
191
+ rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
192
+
193
+ return true;
194
+error:
195
+ rss_bpf__destroy(rss_bpf_ctx);
196
+ ctx->obj = NULL;
197
+
198
+ return false;
199
+}
200
+
201
+static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
202
+ struct EBPFRSSConfig *config)
203
+{
204
+ uint32_t map_key = 0;
205
+
206
+ if (!ebpf_rss_is_loaded(ctx)) {
207
+ return false;
208
+ }
209
+ if (bpf_map_update_elem(ctx->map_configuration,
210
+ &map_key, config, 0) < 0) {
211
+ return false;
212
+ }
213
+ return true;
214
+}
215
+
216
+static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
217
+ uint16_t *indirections_table,
218
+ size_t len)
219
+{
220
+ uint32_t i = 0;
221
+
222
+ if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
223
+ len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
224
+ return false;
225
+ }
226
+
227
+ for (; i < len; ++i) {
228
+ if (bpf_map_update_elem(ctx->map_indirections_table, &i,
229
+ indirections_table + i, 0) < 0) {
230
+ return false;
231
+ }
232
+ }
233
+ return true;
234
+}
235
+
236
+static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
237
+ uint8_t *toeplitz_key)
238
+{
239
+ uint32_t map_key = 0;
240
+
241
+ /* prepare toeplitz key */
242
+ uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
243
+
244
+ if (!ebpf_rss_is_loaded(ctx) || toeplitz_key == NULL) {
245
+ return false;
246
+ }
247
+ memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
248
+ *(uint32_t *)toe = ntohl(*(uint32_t *)toe);
249
+
250
+ if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
251
+ 0) < 0) {
252
+ return false;
253
+ }
254
+ return true;
255
+}
256
+
257
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
258
+ uint16_t *indirections_table, uint8_t *toeplitz_key)
259
+{
260
+ if (!ebpf_rss_is_loaded(ctx) || config == NULL ||
261
+ indirections_table == NULL || toeplitz_key == NULL) {
262
+ return false;
263
+ }
264
+
265
+ if (!ebpf_rss_set_config(ctx, config)) {
266
+ return false;
267
+ }
268
+
269
+ if (!ebpf_rss_set_indirections_table(ctx, indirections_table,
270
+ config->indirections_len)) {
271
+ return false;
272
+ }
273
+
274
+ if (!ebpf_rss_set_toepliz_key(ctx, toeplitz_key)) {
275
+ return false;
276
+ }
277
+
278
+ return true;
279
+}
280
+
281
+void ebpf_rss_unload(struct EBPFRSSContext *ctx)
282
+{
283
+ if (!ebpf_rss_is_loaded(ctx)) {
284
+ return;
285
+ }
286
+
287
+ rss_bpf__destroy(ctx->obj);
288
+ ctx->obj = NULL;
289
+}
290
diff --git a/ebpf/ebpf_rss.h b/ebpf/ebpf_rss.h
291
new file mode 100644
292
index XXXXXXX..XXXXXXX
293
--- /dev/null
294
+++ b/ebpf/ebpf_rss.h
295
@@ -XXX,XX +XXX,XX @@
296
+/*
297
+ * eBPF RSS header
298
+ *
299
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
300
+ *
301
+ * Authors:
302
+ * Andrew Melnychenko <andrew@daynix.com>
303
+ * Yuri Benditovich <yuri.benditovich@daynix.com>
304
+ *
305
+ * This work is licensed under the terms of the GNU GPL, version 2. See
306
+ * the COPYING file in the top-level directory.
307
+ */
308
+
309
+#ifndef QEMU_EBPF_RSS_H
310
+#define QEMU_EBPF_RSS_H
311
+
312
+struct EBPFRSSContext {
313
+ void *obj;
314
+ int program_fd;
315
+ int map_configuration;
316
+ int map_toeplitz_key;
317
+ int map_indirections_table;
318
+};
319
+
320
+struct EBPFRSSConfig {
321
+ uint8_t redirect;
322
+ uint8_t populate_hash;
323
+ uint32_t hash_types;
324
+ uint16_t indirections_len;
325
+ uint16_t default_queue;
326
+} __attribute__((packed));
327
+
328
+void ebpf_rss_init(struct EBPFRSSContext *ctx);
329
+
330
+bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx);
331
+
332
+bool ebpf_rss_load(struct EBPFRSSContext *ctx);
333
+
334
+bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
335
+ uint16_t *indirections_table, uint8_t *toeplitz_key);
336
+
337
+void ebpf_rss_unload(struct EBPFRSSContext *ctx);
338
+
339
+#endif /* QEMU_EBPF_RSS_H */
340
diff --git a/ebpf/meson.build b/ebpf/meson.build
341
new file mode 100644
342
index XXXXXXX..XXXXXXX
343
--- /dev/null
344
+++ b/ebpf/meson.build
345
@@ -0,0 +1 @@
346
+common_ss.add(when: libbpf, if_true: files('ebpf_rss.c'), if_false: files('ebpf_rss-stub.c'))
347
diff --git a/ebpf/rss.bpf.skeleton.h b/ebpf/rss.bpf.skeleton.h
348
new file mode 100644
349
index XXXXXXX..XXXXXXX
350
--- /dev/null
351
+++ b/ebpf/rss.bpf.skeleton.h
352
@@ -XXX,XX +XXX,XX @@
353
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
354
+
355
+/* THIS FILE IS AUTOGENERATED! */
356
+#ifndef __RSS_BPF_SKEL_H__
357
+#define __RSS_BPF_SKEL_H__
358
+
359
+#include <stdlib.h>
360
+#include <bpf/libbpf.h>
361
+
362
+struct rss_bpf {
363
+    struct bpf_object_skeleton *skeleton;
364
+    struct bpf_object *obj;
365
+    struct {
366
+        struct bpf_map *tap_rss_map_configurations;
367
+        struct bpf_map *tap_rss_map_indirection_table;
368
+        struct bpf_map *tap_rss_map_toeplitz_key;
369
+    } maps;
370
+    struct {
371
+        struct bpf_program *tun_rss_steering_prog;
372
+    } progs;
373
+    struct {
374
+        struct bpf_link *tun_rss_steering_prog;
375
+    } links;
376
+};
377
+
378
+static void
379
+rss_bpf__destroy(struct rss_bpf *obj)
380
+{
381
+    if (!obj)
382
+        return;
383
+    if (obj->skeleton)
384
+        bpf_object__destroy_skeleton(obj->skeleton);
385
+    free(obj);
386
+}
387
+
388
+static inline int
389
+rss_bpf__create_skeleton(struct rss_bpf *obj);
390
+
391
+static inline struct rss_bpf *
392
+rss_bpf__open_opts(const struct bpf_object_open_opts *opts)
393
+{
394
+    struct rss_bpf *obj;
395
+
396
+    obj = (struct rss_bpf *)calloc(1, sizeof(*obj));
397
+    if (!obj)
398
+        return NULL;
399
+    if (rss_bpf__create_skeleton(obj))
400
+        goto err;
401
+    if (bpf_object__open_skeleton(obj->skeleton, opts))
402
+        goto err;
403
+
404
+    return obj;
405
+err:
406
+    rss_bpf__destroy(obj);
407
+    return NULL;
408
+}
409
+
410
+static inline struct rss_bpf *
411
+rss_bpf__open(void)
412
+{
413
+    return rss_bpf__open_opts(NULL);
414
+}
415
+
416
+static inline int
417
+rss_bpf__load(struct rss_bpf *obj)
418
+{
419
+    return bpf_object__load_skeleton(obj->skeleton);
420
+}
421
+
422
+static inline struct rss_bpf *
423
+rss_bpf__open_and_load(void)
424
+{
425
+    struct rss_bpf *obj;
426
+
427
+    obj = rss_bpf__open();
428
+    if (!obj)
429
+        return NULL;
430
+    if (rss_bpf__load(obj)) {
431
+        rss_bpf__destroy(obj);
432
+        return NULL;
433
+    }
434
+    return obj;
435
+}
436
+
437
+static inline int
438
+rss_bpf__attach(struct rss_bpf *obj)
439
+{
440
+    return bpf_object__attach_skeleton(obj->skeleton);
441
+}
442
+
443
+static inline void
444
+rss_bpf__detach(struct rss_bpf *obj)
445
+{
446
+    return bpf_object__detach_skeleton(obj->skeleton);
447
+}
448
+
449
+static inline int
450
+rss_bpf__create_skeleton(struct rss_bpf *obj)
451
+{
452
+    struct bpf_object_skeleton *s;
453
+
454
+    s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));
455
+    if (!s)
456
+        return -1;
457
+    obj->skeleton = s;
458
+
459
+    s->sz = sizeof(*s);
460
+    s->name = "rss_bpf";
461
+    s->obj = &obj->obj;
462
+
463
+    /* maps */
464
+    s->map_cnt = 3;
465
+    s->map_skel_sz = sizeof(*s->maps);
466
+    s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);
467
+    if (!s->maps)
468
+        goto err;
469
+
470
+    s->maps[0].name = "tap_rss_map_configurations";
471
+    s->maps[0].map = &obj->maps.tap_rss_map_configurations;
472
+
473
+    s->maps[1].name = "tap_rss_map_indirection_table";
474
+    s->maps[1].map = &obj->maps.tap_rss_map_indirection_table;
475
+
476
+    s->maps[2].name = "tap_rss_map_toeplitz_key";
477
+    s->maps[2].map = &obj->maps.tap_rss_map_toeplitz_key;
478
+
479
+    /* programs */
480
+    s->prog_cnt = 1;
481
+    s->prog_skel_sz = sizeof(*s->progs);
482
+    s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);
483
+    if (!s->progs)
484
+        goto err;
485
+
486
+    s->progs[0].name = "tun_rss_steering_prog";
487
+    s->progs[0].prog = &obj->progs.tun_rss_steering_prog;
488
+    s->progs[0].link = &obj->links.tun_rss_steering_prog;
489
+
490
+    s->data_sz = 8088;
491
+    s->data = (void *)"\
492
+\x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
493
+\0\0\0\0\0\0\0\0\0\0\0\x18\x1d\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0a\0\
494
+\x01\0\xbf\x18\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\x4c\xff\0\0\0\0\xbf\xa7\
495
+\0\0\0\0\0\0\x07\x07\0\0\x4c\xff\xff\xff\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
496
+\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x06\0\0\0\0\0\0\x18\x01\0\0\0\0\0\
497
+\0\0\0\0\0\0\0\0\0\xbf\x72\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\xbf\x07\0\0\0\0\0\0\
498
+\x18\0\0\0\xff\xff\xff\xff\0\0\0\0\0\0\0\0\x15\x06\x66\x02\0\0\0\0\xbf\x79\0\0\
499
+\0\0\0\0\x15\x09\x64\x02\0\0\0\0\x71\x61\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\
500
+\0\x5d\x02\0\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xc0\xff\0\0\0\0\x7b\x1a\xb8\xff\
501
+\0\0\0\0\x7b\x1a\xb0\xff\0\0\0\0\x7b\x1a\xa8\xff\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\
502
+\0\x63\x1a\x98\xff\0\0\0\0\x7b\x1a\x90\xff\0\0\0\0\x7b\x1a\x88\xff\0\0\0\0\x7b\
503
+\x1a\x80\xff\0\0\0\0\x7b\x1a\x78\xff\0\0\0\0\x7b\x1a\x70\xff\0\0\0\0\x7b\x1a\
504
+\x68\xff\0\0\0\0\x7b\x1a\x60\xff\0\0\0\0\x7b\x1a\x58\xff\0\0\0\0\x7b\x1a\x50\
505
+\xff\0\0\0\0\x15\x08\x4c\x02\0\0\0\0\x6b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\
506
+\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x02\0\0\x0c\0\0\0\xb7\
507
+\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\
508
+\x77\0\0\0\x20\0\0\0\x55\0\x11\0\0\0\0\0\xb7\x02\0\0\x10\0\0\0\x69\xa1\xd0\xff\
509
+\0\0\0\0\xbf\x13\0\0\0\0\0\0\xdc\x03\0\0\x10\0\0\0\x15\x03\x02\0\0\x81\0\0\x55\
510
+\x03\x0c\0\xa8\x88\0\0\xb7\x02\0\0\x14\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\
511
+\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\0\0\0\0\
512
+\x85\0\0\0\x44\0\0\0\x69\xa1\xd0\xff\0\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\
513
+\0\0\0\x15\0\x01\0\0\0\0\0\x05\0\x2f\x02\0\0\0\0\x15\x01\x2e\x02\0\0\0\0\x7b\
514
+\x9a\x30\xff\0\0\0\0\x15\x01\x57\0\x86\xdd\0\0\x55\x01\x3b\0\x08\0\0\0\x7b\x7a\
515
+\x20\xff\0\0\0\0\xb7\x07\0\0\x01\0\0\0\x73\x7a\x50\xff\0\0\0\0\xb7\x01\0\0\0\0\
516
+\0\0\x63\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\
517
+\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\xb7\x02\0\
518
+\0\0\0\0\0\xb7\x04\0\0\x14\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\x67\
519
+\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x1a\x02\0\0\0\0\x69\xa1\xd6\xff\0\0\
520
+\0\0\x55\x01\x01\0\0\0\0\0\xb7\x07\0\0\0\0\0\0\x61\xa1\xdc\xff\0\0\0\0\x63\x1a\
521
+\x5c\xff\0\0\0\0\x61\xa1\xe0\xff\0\0\0\0\x63\x1a\x60\xff\0\0\0\0\x73\x7a\x56\
522
+\xff\0\0\0\0\x71\xa9\xd9\xff\0\0\0\0\x71\xa1\xd0\xff\0\0\0\0\x67\x01\0\0\x02\0\
523
+\0\0\x57\x01\0\0\x3c\0\0\0\x7b\x1a\x40\xff\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\xbf\
524
+\x91\0\0\0\0\0\0\x57\x01\0\0\xff\0\0\0\x15\x01\x19\0\0\0\0\0\x71\xa1\x56\xff\0\
525
+\0\0\0\x55\x01\x17\0\0\0\0\0\x57\x09\0\0\xff\0\0\0\x15\x09\x7a\x01\x11\0\0\0\
526
+\x55\x09\x14\0\x06\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x53\xff\0\0\0\0\xb7\x01\
527
+\0\0\0\0\0\0\x63\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\xff\0\0\0\0\x7b\x1a\xd0\xff\0\
528
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\
529
+\xa2\x40\xff\0\0\0\0\xb7\x04\0\0\x14\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\
530
+\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\xf4\x01\0\0\0\0\x69\xa1\
531
+\xd0\xff\0\0\0\0\x6b\x1a\x58\xff\0\0\0\0\x69\xa1\xd2\xff\0\0\0\0\x6b\x1a\x5a\
532
+\xff\0\0\0\0\x71\xa1\x50\xff\0\0\0\0\x15\x01\xd4\0\0\0\0\0\x71\x62\x03\0\0\0\0\
533
+\0\x67\x02\0\0\x08\0\0\0\x71\x61\x02\0\0\0\0\0\x4f\x12\0\0\0\0\0\0\x71\x63\x04\
534
+\0\0\0\0\0\x71\x61\x05\0\0\0\0\0\x67\x01\0\0\x08\0\0\0\x4f\x31\0\0\0\0\0\0\x67\
535
+\x01\0\0\x10\0\0\0\x4f\x21\0\0\0\0\0\0\x71\xa2\x53\xff\0\0\0\0\x79\xa0\x30\xff\
536
+\0\0\0\0\x15\x02\x06\x01\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x02\0\0\0\x15\
537
+\x02\x03\x01\0\0\0\0\x61\xa1\x5c\xff\0\0\0\0\x63\x1a\xa0\xff\0\0\0\0\x61\xa1\
538
+\x60\xff\0\0\0\0\x63\x1a\xa4\xff\0\0\0\0\x69\xa1\x58\xff\0\0\0\0\x6b\x1a\xa8\
539
+\xff\0\0\0\0\x69\xa1\x5a\xff\0\0\0\0\x6b\x1a\xaa\xff\0\0\0\0\x05\0\x65\x01\0\0\
540
+\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x51\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\
541
+\xf0\xff\0\0\0\0\x7b\x1a\xe8\xff\0\0\0\0\x7b\x1a\xe0\xff\0\0\0\0\x7b\x1a\xd8\
542
+\xff\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xd0\xff\
543
+\xff\xff\xb7\x01\0\0\x28\0\0\0\x7b\x1a\x40\xff\0\0\0\0\xbf\x81\0\0\0\0\0\0\xb7\
544
+\x02\0\0\0\0\0\0\xb7\x04\0\0\x28\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\
545
+\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x55\0\x10\x01\0\0\0\0\x79\xa1\xe0\
546
+\xff\0\0\0\0\x63\x1a\x64\xff\0\0\0\0\x77\x01\0\0\x20\0\0\0\x63\x1a\x68\xff\0\0\
547
+\0\0\x79\xa1\xd8\xff\0\0\0\0\x63\x1a\x5c\xff\0\0\0\0\x77\x01\0\0\x20\0\0\0\x63\
548
+\x1a\x60\xff\0\0\0\0\x79\xa1\xe8\xff\0\0\0\0\x63\x1a\x6c\xff\0\0\0\0\x77\x01\0\
549
+\0\x20\0\0\0\x63\x1a\x70\xff\0\0\0\0\x79\xa1\xf0\xff\0\0\0\0\x63\x1a\x74\xff\0\
550
+\0\0\0\x77\x01\0\0\x20\0\0\0\x63\x1a\x78\xff\0\0\0\0\x71\xa9\xd6\xff\0\0\0\0\
551
+\x25\x09\xff\0\x3c\0\0\0\xb7\x01\0\0\x01\0\0\0\x6f\x91\0\0\0\0\0\0\x18\x02\0\0\
552
+\x01\0\0\0\0\0\0\0\0\x18\0\x1c\x5f\x21\0\0\0\0\0\0\x55\x01\x01\0\0\0\0\0\x05\0\
553
+\xf8\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x6b\x1a\xfe\xff\0\0\0\0\xb7\x01\0\0\x28\0\0\
554
+\0\x7b\x1a\x40\xff\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x8c\xff\xff\xff\x7b\
555
+\x1a\x18\xff\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x7c\xff\xff\xff\x7b\x1a\
556
+\x10\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\x28\xff\0\0\0\0\x7b\x7a\x20\xff\0\
557
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xfe\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\
558
+\xa2\x40\xff\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\
559
+\0\0\0\x67\0\0\0\x20\0\0\0\x77\0\0\0\x20\0\0\0\x15\0\x01\0\0\0\0\0\x05\0\x90\
560
+\x01\0\0\0\0\xbf\x91\0\0\0\0\0\0\x15\x01\x23\0\x3c\0\0\0\x15\x01\x59\0\x2c\0\0\
561
+\0\x55\x01\x5a\0\x2b\0\0\0\xb7\x01\0\0\0\0\0\0\x63\x1a\xf8\xff\0\0\0\0\xbf\xa3\
562
+\0\0\0\0\0\0\x07\x03\0\0\xf8\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\xa2\x40\xff\0\
563
+\0\0\0\xb7\x04\0\0\x04\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\
564
+\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\x03\x01\0\0\0\
565
+\0\x71\xa1\xfa\xff\0\0\0\0\x55\x01\x4b\0\x02\0\0\0\x71\xa1\xf9\xff\0\0\0\0\x55\
566
+\x01\x49\0\x02\0\0\0\x71\xa1\xfb\xff\0\0\0\0\x55\x01\x47\0\x01\0\0\0\x79\xa2\
567
+\x40\xff\0\0\0\0\x07\x02\0\0\x08\0\0\0\xbf\x81\0\0\0\0\0\0\x79\xa3\x18\xff\0\0\
568
+\0\0\xb7\x04\0\0\x10\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\0\
569
+\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\xf2\0\0\0\0\0\
570
+\xb7\x01\0\0\x01\0\0\0\x73\x1a\x55\xff\0\0\0\0\x05\0\x39\0\0\0\0\0\xb7\x01\0\0\
571
+\0\0\0\0\x6b\x1a\xf8\xff\0\0\0\0\xb7\x09\0\0\x02\0\0\0\xb7\x07\0\0\x1e\0\0\0\
572
+\x05\0\x0e\0\0\0\0\0\x79\xa2\x38\xff\0\0\0\0\x0f\x29\0\0\0\0\0\0\xbf\x92\0\0\0\
573
+\0\0\0\x07\x02\0\0\x01\0\0\0\x71\xa3\xff\xff\0\0\0\0\x67\x03\0\0\x03\0\0\0\x2d\
574
+\x23\x02\0\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\x05\0\x2b\0\0\0\0\0\x07\x07\0\0\xff\
575
+\xff\xff\xff\xbf\x72\0\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x77\x02\0\0\x20\0\0\0\
576
+\x15\x02\xf9\xff\0\0\0\0\x7b\x9a\x38\xff\0\0\0\0\x79\xa1\x40\xff\0\0\0\0\x0f\
577
+\x19\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\xf8\xff\xff\xff\xbf\x81\0\0\0\
578
+\0\0\0\xbf\x92\0\0\0\0\0\0\xb7\x04\0\0\x02\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\
579
+\0\x44\0\0\0\xbf\x01\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\
580
+\x55\x01\x94\0\0\0\0\0\x71\xa2\xf8\xff\0\0\0\0\x55\x02\x0f\0\xc9\0\0\0\x07\x09\
581
+\0\0\x02\0\0\0\xbf\x81\0\0\0\0\0\0\xbf\x92\0\0\0\0\0\0\x79\xa3\x10\xff\0\0\0\0\
582
+\xb7\x04\0\0\x10\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\xbf\x01\0\0\0\
583
+\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\x87\0\0\0\0\0\xb7\
584
+\x01\0\0\x01\0\0\0\x73\x1a\x54\xff\0\0\0\0\x79\xa7\x20\xff\0\0\0\0\x05\0\x07\0\
585
+\0\0\0\0\xb7\x09\0\0\x01\0\0\0\x15\x02\xd1\xff\0\0\0\0\x71\xa9\xf9\xff\0\0\0\0\
586
+\x07\x09\0\0\x02\0\0\0\x05\0\xce\xff\0\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x56\
587
+\xff\0\0\0\0\x71\xa1\xff\xff\0\0\0\0\x67\x01\0\0\x03\0\0\0\x79\xa2\x40\xff\0\0\
588
+\0\0\x0f\x12\0\0\0\0\0\0\x07\x02\0\0\x08\0\0\0\x7b\x2a\x40\xff\0\0\0\0\x71\xa9\
589
+\xfe\xff\0\0\0\0\x25\x09\x0e\0\x3c\0\0\0\xb7\x01\0\0\x01\0\0\0\x6f\x91\0\0\0\0\
590
+\0\0\x18\x02\0\0\x01\0\0\0\0\0\0\0\0\x18\0\x1c\x5f\x21\0\0\0\0\0\0\x55\x01\x01\
591
+\0\0\0\0\0\x05\0\x07\0\0\0\0\0\x79\xa1\x28\xff\0\0\0\0\x07\x01\0\0\x01\0\0\0\
592
+\x7b\x1a\x28\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x55\x01\
593
+\x82\xff\x0b\0\0\0\x05\0\x10\xff\0\0\0\0\x15\x09\xf8\xff\x87\0\0\0\x05\0\xfd\
594
+\xff\0\0\0\0\x71\xa1\x51\xff\0\0\0\0\x79\xa0\x30\xff\0\0\0\0\x15\x01\x17\x01\0\
595
+\0\0\0\x71\x62\x03\0\0\0\0\0\x67\x02\0\0\x08\0\0\0\x71\x61\x02\0\0\0\0\0\x4f\
596
+\x12\0\0\0\0\0\0\x71\x63\x04\0\0\0\0\0\x71\x61\x05\0\0\0\0\0\x67\x01\0\0\x08\0\
597
+\0\0\x4f\x31\0\0\0\0\0\0\x67\x01\0\0\x10\0\0\0\x4f\x21\0\0\0\0\0\0\x71\xa2\x53\
598
+\xff\0\0\0\0\x15\x02\x3d\0\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x10\0\0\0\
599
+\x15\x02\x3a\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x5c\xff\xff\xff\x71\xa4\
600
+\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\
601
+\x07\x03\0\0\x7c\xff\xff\xff\x67\x01\0\0\x38\0\0\0\xc7\x01\0\0\x38\0\0\0\x65\
602
+\x01\x01\0\xff\xff\xff\xff\xbf\x32\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\
603
+\x6c\xff\xff\xff\x71\xa5\x55\xff\0\0\0\0\xbf\x34\0\0\0\0\0\0\x15\x05\x02\0\0\0\
604
+\0\0\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\x8c\xff\xff\xff\x65\x01\x01\0\xff\xff\xff\
605
+\xff\xbf\x43\0\0\0\0\0\0\x61\x21\x04\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\x24\0\
606
+\0\0\0\0\0\x4f\x41\0\0\0\0\0\0\x7b\x1a\xa0\xff\0\0\0\0\x61\x21\x08\0\0\0\0\0\
607
+\x61\x22\x0c\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x12\0\0\0\0\0\0\x7b\x2a\xa8\
608
+\xff\0\0\0\0\x61\x31\0\0\0\0\0\0\x61\x32\x04\0\0\0\0\0\x61\x34\x08\0\0\0\0\0\
609
+\x61\x33\x0c\0\0\0\0\0\x69\xa5\x5a\xff\0\0\0\0\x6b\x5a\xc2\xff\0\0\0\0\x69\xa5\
610
+\x58\xff\0\0\0\0\x6b\x5a\xc0\xff\0\0\0\0\x67\x03\0\0\x20\0\0\0\x4f\x43\0\0\0\0\
611
+\0\0\x7b\x3a\xb8\xff\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x12\0\0\0\0\0\0\x7b\x2a\
612
+\xb0\xff\0\0\0\0\x05\0\x6b\0\0\0\0\0\x71\xa2\x52\xff\0\0\0\0\x15\x02\x04\0\0\0\
613
+\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x04\0\0\0\x15\x02\x01\0\0\0\0\0\x05\0\xf7\
614
+\xfe\0\0\0\0\x57\x01\0\0\x01\0\0\0\x15\x01\xd3\0\0\0\0\0\x61\xa1\x5c\xff\0\0\0\
615
+\0\x63\x1a\xa0\xff\0\0\0\0\x61\xa1\x60\xff\0\0\0\0\x63\x1a\xa4\xff\0\0\0\0\x05\
616
+\0\x5e\0\0\0\0\0\x71\xa2\x52\xff\0\0\0\0\x15\x02\x1e\0\0\0\0\0\xbf\x12\0\0\0\0\
617
+\0\0\x57\x02\0\0\x20\0\0\0\x15\x02\x1b\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\
618
+\0\x5c\xff\xff\xff\x71\xa4\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\
619
+\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\x7c\xff\xff\xff\x57\x01\0\0\0\x01\0\0\
620
+\x15\x01\x01\0\0\0\0\0\xbf\x32\0\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\x03\0\0\x6c\
621
+\xff\xff\xff\x71\xa5\x55\xff\0\0\0\0\xbf\x34\0\0\0\0\0\0\x15\x05\x02\0\0\0\0\0\
622
+\xbf\xa4\0\0\0\0\0\0\x07\x04\0\0\x8c\xff\xff\xff\x15\x01\xc3\xff\0\0\0\0\x05\0\
623
+\xc1\xff\0\0\0\0\xb7\x09\0\0\x3c\0\0\0\x79\xa7\x20\xff\0\0\0\0\x67\0\0\0\x20\0\
624
+\0\0\x77\0\0\0\x20\0\0\0\x15\0\xa5\xfe\0\0\0\0\x05\0\xb0\0\0\0\0\0\x15\x09\x07\
625
+\xff\x87\0\0\0\x05\0\xa2\xfe\0\0\0\0\xbf\x12\0\0\0\0\0\0\x57\x02\0\0\x08\0\0\0\
626
+\x15\x02\xab\0\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x5c\xff\xff\xff\x71\xa4\
627
+\x54\xff\0\0\0\0\xbf\x23\0\0\0\0\0\0\x15\x04\x02\0\0\0\0\0\xbf\xa3\0\0\0\0\0\0\
628
+\x07\x03\0\0\x7c\xff\xff\xff\x57\x01\0\0\x40\0\0\0\x15\x01\x01\0\0\0\0\0\xbf\
629
+\x32\0\0\0\0\0\0\x61\x23\x04\0\0\0\0\0\x67\x03\0\0\x20\0\0\0\x61\x24\0\0\0\0\0\
630
+\0\x4f\x43\0\0\0\0\0\0\x7b\x3a\xa0\xff\0\0\0\0\x61\x23\x08\0\0\0\0\0\x61\x22\
631
+\x0c\0\0\0\0\0\x67\x02\0\0\x20\0\0\0\x4f\x32\0\0\0\0\0\0\x7b\x2a\xa8\xff\0\0\0\
632
+\0\x15\x01\x1c\0\0\0\0\0\x71\xa1\x55\xff\0\0\0\0\x15\x01\x1a\0\0\0\0\0\x61\xa1\
633
+\x98\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\x94\xff\0\0\0\0\x4f\x21\0\0\0\0\
634
+\0\0\x7b\x1a\xb8\xff\0\0\0\0\x61\xa1\x90\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\
635
+\xa2\x8c\xff\0\0\0\0\x05\0\x19\0\0\0\0\0\xb7\x01\0\0\x01\0\0\0\x73\x1a\x52\xff\
636
+\0\0\0\0\xb7\x01\0\0\0\0\0\0\x7b\x1a\xd0\xff\0\0\0\0\xbf\xa3\0\0\0\0\0\0\x07\
637
+\x03\0\0\xd0\xff\xff\xff\xbf\x81\0\0\0\0\0\0\x79\xa2\x40\xff\0\0\0\0\xb7\x04\0\
638
+\0\x08\0\0\0\xb7\x05\0\0\x01\0\0\0\x85\0\0\0\x44\0\0\0\x67\0\0\0\x20\0\0\0\x77\
639
+\0\0\0\x20\0\0\0\x55\0\x7d\0\0\0\0\0\x05\0\x88\xfe\0\0\0\0\xb7\x09\0\0\x2b\0\0\
640
+\0\x05\0\xc6\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\
641
+\x74\xff\0\0\0\0\x4f\x21\0\0\0\0\0\0\x7b\x1a\xb8\xff\0\0\0\0\x61\xa1\x70\xff\0\
642
+\0\0\0\x67\x01\0\0\x20\0\0\0\x61\xa2\x6c\xff\0\0\0\0\x4f\x21\0\0\0\0\0\0\x7b\
643
+\x1a\xb0\xff\0\0\0\0\xb7\x01\0\0\0\0\0\0\x07\x07\0\0\x04\0\0\0\x61\x03\0\0\0\0\
644
+\0\0\xb7\x05\0\0\0\0\0\0\x05\0\x4e\0\0\0\0\0\xaf\x52\0\0\0\0\0\0\xbf\x75\0\0\0\
645
+\0\0\0\x0f\x15\0\0\0\0\0\0\x71\x55\0\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\
646
+\0\0\0\0\0\x77\0\0\0\x07\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\
647
+\0\x39\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\
648
+\x50\0\0\0\0\0\0\x77\0\0\0\x06\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\
649
+\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3a\0\0\0\xc7\0\0\0\x3f\0\0\
650
+\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\0\0\
651
+\0\0\0\x77\0\0\0\x05\0\0\0\x57\0\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\
652
+\0\0\0\0\x67\0\0\0\x3b\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\
653
+\0\0\0\0\0\x67\x03\0\0\x01\0\0\0\xbf\x50\0\0\0\0\0\0\x77\0\0\0\x04\0\0\0\x57\0\
654
+\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3c\0\0\0\xc7\
655
+\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\x50\0\0\0\0\0\0\
656
+\x77\0\0\0\x03\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x03\0\0\0\0\
657
+\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x3d\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\
658
+\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\x50\0\0\0\0\0\0\x77\0\0\0\x02\0\0\0\x57\0\0\0\
659
+\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x03\0\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\
660
+\0\0\x3e\0\0\0\xc7\0\0\0\x3f\0\0\0\x5f\x30\0\0\0\0\0\0\xaf\x02\0\0\0\0\0\0\xbf\
661
+\x50\0\0\0\0\0\0\x77\0\0\0\x01\0\0\0\x57\0\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\
662
+\x4f\x03\0\0\0\0\0\0\x57\x04\0\0\x01\0\0\0\x87\x04\0\0\0\0\0\0\x5f\x34\0\0\0\0\
663
+\0\0\xaf\x42\0\0\0\0\0\0\x57\x05\0\0\x01\0\0\0\x67\x03\0\0\x01\0\0\0\x4f\x53\0\
664
+\0\0\0\0\0\x07\x01\0\0\x01\0\0\0\xbf\x25\0\0\0\0\0\0\x15\x01\x0b\0\x24\0\0\0\
665
+\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\xa0\xff\xff\xff\x0f\x12\0\0\0\0\0\0\x71\x24\0\
666
+\0\0\0\0\0\xbf\x40\0\0\0\0\0\0\x67\0\0\0\x38\0\0\0\xc7\0\0\0\x38\0\0\0\xb7\x02\
667
+\0\0\0\0\0\0\x65\0\xa9\xff\xff\xff\xff\xff\xbf\x32\0\0\0\0\0\0\x05\0\xa7\xff\0\
668
+\0\0\0\xbf\x21\0\0\0\0\0\0\x67\x01\0\0\x20\0\0\0\x77\x01\0\0\x20\0\0\0\x15\x01\
669
+\x0e\0\0\0\0\0\x71\x63\x06\0\0\0\0\0\x71\x64\x07\0\0\0\0\0\x67\x04\0\0\x08\0\0\
670
+\0\x4f\x34\0\0\0\0\0\0\x3f\x41\0\0\0\0\0\0\x2f\x41\0\0\0\0\0\0\x1f\x12\0\0\0\0\
671
+\0\0\x63\x2a\x50\xff\0\0\0\0\xbf\xa2\0\0\0\0\0\0\x07\x02\0\0\x50\xff\xff\xff\
672
+\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x85\0\0\0\x01\0\0\0\x55\0\x05\0\0\0\0\0\
673
+\x71\x61\x08\0\0\0\0\0\x71\x60\x09\0\0\0\0\0\x67\0\0\0\x08\0\0\0\x4f\x10\0\0\0\
674
+\0\0\0\x95\0\0\0\0\0\0\0\x69\0\0\0\0\0\0\0\x05\0\xfd\xff\0\0\0\0\x02\0\0\0\x04\
675
+\0\0\0\x0a\0\0\0\x01\0\0\0\0\0\0\0\x02\0\0\0\x04\0\0\0\x28\0\0\0\x01\0\0\0\0\0\
676
+\0\0\x02\0\0\0\x04\0\0\0\x02\0\0\0\x80\0\0\0\0\0\0\0\x47\x50\x4c\x20\x76\x32\0\
677
+\0\0\0\0\0\x10\0\0\0\0\0\0\0\x01\x7a\x52\0\x08\x7c\x0b\x01\x0c\0\0\0\x18\0\0\0\
678
+\x18\0\0\0\0\0\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
679
+\0\0\0\0\0\0\0\0\0\0\0\0\xa0\0\0\0\x04\0\xf1\xff\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
680
+\0\x60\x02\0\0\0\0\x03\0\x20\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x3f\x02\0\0\0\0\
681
+\x03\0\xd0\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xed\x01\0\0\0\0\x03\0\x10\x10\0\0\0\
682
+\0\0\0\0\0\0\0\0\0\0\0\xd4\x01\0\0\0\0\x03\0\x20\x10\0\0\0\0\0\0\0\0\0\0\0\0\0\
683
+\0\xa3\x01\0\0\0\0\x03\0\xb8\x12\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x01\0\0\0\0\
684
+\x03\0\x48\x10\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x2a\x01\0\0\0\0\x03\0\x10\x13\0\0\0\
685
+\0\0\0\0\0\0\0\0\0\0\0\xe1\0\0\0\0\0\x03\0\xa0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
686
+\x2e\x02\0\0\0\0\x03\0\x28\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x68\x02\0\0\0\0\x03\
687
+\0\xc0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x36\x02\0\0\0\0\x03\0\xc8\x13\0\0\0\0\0\
688
+\0\0\0\0\0\0\0\0\0\x22\x01\0\0\0\0\x03\0\xe8\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
689
+\x02\x01\0\0\0\0\x03\0\x40\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd9\0\0\0\0\0\x03\0\
690
+\xf8\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x26\x02\0\0\0\0\x03\0\x20\x0e\0\0\0\0\0\0\
691
+\0\0\0\0\0\0\0\0\xcc\x01\0\0\0\0\x03\0\x60\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x9b\
692
+\x01\0\0\0\0\x03\0\xc8\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x5b\x01\0\0\0\0\x03\0\
693
+\x20\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x7c\x01\0\0\0\0\x03\0\x48\x08\0\0\0\0\0\0\
694
+\0\0\0\0\0\0\0\0\x53\x01\0\0\0\0\x03\0\xb8\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1a\
695
+\x01\0\0\0\0\x03\0\xe0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x84\x01\0\0\0\0\x03\0\
696
+\xb8\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1e\x02\0\0\0\0\x03\0\xd8\x09\0\0\0\0\0\0\0\
697
+\0\0\0\0\0\0\0\xc4\x01\0\0\0\0\x03\0\x70\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x93\
698
+\x01\0\0\0\0\x03\0\xa8\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x74\x01\0\0\0\0\x03\0\
699
+\xf0\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x4b\x01\0\0\0\0\x03\0\0\x0a\0\0\0\0\0\0\0\
700
+\0\0\0\0\0\0\0\x12\x01\0\0\0\0\x03\0\x10\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xfa\0\
701
+\0\0\0\0\x03\0\xc0\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x58\x02\0\0\0\0\x03\0\x88\
702
+\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x16\x02\0\0\0\0\x03\0\xb8\x0a\0\0\0\0\0\0\0\0\
703
+\0\0\0\0\0\0\xe5\x01\0\0\0\0\x03\0\xc0\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xbc\x01\
704
+\0\0\0\0\x03\0\0\x0e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x8b\x01\0\0\0\0\x03\0\x18\x0e\
705
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xd1\0\0\0\0\0\x03\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\
706
+\0\0\x50\x02\0\0\0\0\x03\0\x20\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0e\x02\0\0\0\0\
707
+\x03\0\x48\x0f\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x6c\x01\0\0\0\0\x03\0\xb0\x04\0\0\0\
708
+\0\0\0\0\0\0\0\0\0\0\0\x43\x01\0\0\0\0\x03\0\xc8\x0c\0\0\0\0\0\0\0\0\0\0\0\0\0\
709
+\0\xc9\0\0\0\0\0\x03\0\xf8\x0c\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x06\x02\0\0\0\0\x03\
710
+\0\xd0\x0a\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x3b\x01\0\0\0\0\x03\0\x98\x0b\0\0\0\0\0\
711
+\0\0\0\0\0\0\0\0\0\xf2\0\0\0\0\0\x03\0\xb8\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x48\
712
+\x02\0\0\0\0\x03\0\xf0\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xfe\x01\0\0\0\0\x03\0\
713
+\xf8\x0b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xdd\x01\0\0\0\0\x03\0\0\x0c\0\0\0\0\0\0\0\
714
+\0\0\0\0\0\0\0\xb4\x01\0\0\0\0\x03\0\x30\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x0a\
715
+\x01\0\0\0\0\x03\0\x90\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xc1\0\0\0\0\0\x03\0\xa8\
716
+\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xba\0\0\0\0\0\x03\0\xd0\x01\0\0\0\0\0\0\0\0\0\
717
+\0\0\0\0\0\xf6\x01\0\0\0\0\x03\0\xe0\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xac\x01\0\
718
+\0\0\0\x03\0\x30\x0e\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x33\x01\0\0\0\0\x03\0\x80\x0e\
719
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xea\0\0\0\0\0\x03\0\x98\x0e\0\0\0\0\0\0\0\0\0\0\0\
720
+\0\0\0\0\0\0\0\x03\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x6b\0\0\0\x11\0\x06\
721
+\0\0\0\0\0\0\0\0\0\x07\0\0\0\0\0\0\0\x25\0\0\0\x11\0\x05\0\0\0\0\0\0\0\0\0\x14\
722
+\0\0\0\0\0\0\0\x82\0\0\0\x11\0\x05\0\x28\0\0\0\0\0\0\0\x14\0\0\0\0\0\0\0\x01\0\
723
+\0\0\x11\0\x05\0\x14\0\0\0\0\0\0\0\x14\0\0\0\0\0\0\0\x40\0\0\0\x12\0\x03\0\0\0\
724
+\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\x28\0\0\0\0\0\0\0\x01\0\0\0\x3a\0\0\0\x50\0\0\
725
+\0\0\0\0\0\x01\0\0\0\x3c\0\0\0\x80\x13\0\0\0\0\0\0\x01\0\0\0\x3b\0\0\0\x1c\0\0\
726
+\0\0\0\0\0\x01\0\0\0\x38\0\0\0\0\x74\x61\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\
727
+\x5f\x74\x6f\x65\x70\x6c\x69\x74\x7a\x5f\x6b\x65\x79\0\x2e\x74\x65\x78\x74\0\
728
+\x6d\x61\x70\x73\0\x74\x61\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\x5f\x63\x6f\x6e\
729
+\x66\x69\x67\x75\x72\x61\x74\x69\x6f\x6e\x73\0\x74\x75\x6e\x5f\x72\x73\x73\x5f\
730
+\x73\x74\x65\x65\x72\x69\x6e\x67\x5f\x70\x72\x6f\x67\0\x2e\x72\x65\x6c\x74\x75\
731
+\x6e\x5f\x72\x73\x73\x5f\x73\x74\x65\x65\x72\x69\x6e\x67\0\x5f\x6c\x69\x63\x65\
732
+\x6e\x73\x65\0\x2e\x72\x65\x6c\x2e\x65\x68\x5f\x66\x72\x61\x6d\x65\0\x74\x61\
733
+\x70\x5f\x72\x73\x73\x5f\x6d\x61\x70\x5f\x69\x6e\x64\x69\x72\x65\x63\x74\x69\
734
+\x6f\x6e\x5f\x74\x61\x62\x6c\x65\0\x72\x73\x73\x2e\x62\x70\x66\x2e\x63\0\x2e\
735
+\x73\x74\x72\x74\x61\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x4c\x42\x42\x30\x5f\
736
+\x39\0\x4c\x42\x42\x30\x5f\x38\x39\0\x4c\x42\x42\x30\x5f\x36\x39\0\x4c\x42\x42\
737
+\x30\x5f\x35\x39\0\x4c\x42\x42\x30\x5f\x31\x39\0\x4c\x42\x42\x30\x5f\x31\x30\
738
+\x39\0\x4c\x42\x42\x30\x5f\x39\x38\0\x4c\x42\x42\x30\x5f\x37\x38\0\x4c\x42\x42\
739
+\x30\x5f\x34\x38\0\x4c\x42\x42\x30\x5f\x31\x38\0\x4c\x42\x42\x30\x5f\x38\x37\0\
740
+\x4c\x42\x42\x30\x5f\x34\x37\0\x4c\x42\x42\x30\x5f\x33\x37\0\x4c\x42\x42\x30\
741
+\x5f\x31\x37\0\x4c\x42\x42\x30\x5f\x31\x30\x37\0\x4c\x42\x42\x30\x5f\x39\x36\0\
742
+\x4c\x42\x42\x30\x5f\x37\x36\0\x4c\x42\x42\x30\x5f\x36\x36\0\x4c\x42\x42\x30\
743
+\x5f\x34\x36\0\x4c\x42\x42\x30\x5f\x33\x36\0\x4c\x42\x42\x30\x5f\x32\x36\0\x4c\
744
+\x42\x42\x30\x5f\x31\x30\x36\0\x4c\x42\x42\x30\x5f\x36\x35\0\x4c\x42\x42\x30\
745
+\x5f\x34\x35\0\x4c\x42\x42\x30\x5f\x33\x35\0\x4c\x42\x42\x30\x5f\x34\0\x4c\x42\
746
+\x42\x30\x5f\x35\x34\0\x4c\x42\x42\x30\x5f\x34\x34\0\x4c\x42\x42\x30\x5f\x32\
747
+\x34\0\x4c\x42\x42\x30\x5f\x31\x30\x34\0\x4c\x42\x42\x30\x5f\x39\x33\0\x4c\x42\
748
+\x42\x30\x5f\x38\x33\0\x4c\x42\x42\x30\x5f\x35\x33\0\x4c\x42\x42\x30\x5f\x34\
749
+\x33\0\x4c\x42\x42\x30\x5f\x32\x33\0\x4c\x42\x42\x30\x5f\x31\x30\x33\0\x4c\x42\
750
+\x42\x30\x5f\x38\x32\0\x4c\x42\x42\x30\x5f\x35\x32\0\x4c\x42\x42\x30\x5f\x31\
751
+\x30\x32\0\x4c\x42\x42\x30\x5f\x39\x31\0\x4c\x42\x42\x30\x5f\x38\x31\0\x4c\x42\
752
+\x42\x30\x5f\x37\x31\0\x4c\x42\x42\x30\x5f\x36\x31\0\x4c\x42\x42\x30\x5f\x35\
753
+\x31\0\x4c\x42\x42\x30\x5f\x34\x31\0\x4c\x42\x42\x30\x5f\x32\x31\0\x4c\x42\x42\
754
+\x30\x5f\x31\x31\0\x4c\x42\x42\x30\x5f\x31\x31\x31\0\x4c\x42\x42\x30\x5f\x31\
755
+\x30\x31\0\x4c\x42\x42\x30\x5f\x38\x30\0\x4c\x42\x42\x30\x5f\x36\x30\0\x4c\x42\
756
+\x42\x30\x5f\x35\x30\0\x4c\x42\x42\x30\x5f\x31\x30\0\x4c\x42\x42\x30\x5f\x31\
757
+\x31\x30\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
758
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xaa\
759
+\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa0\x1a\0\0\0\0\0\0\x71\x02\0\
760
+\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x1a\0\0\0\x01\0\0\
761
+\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
762
+\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x5a\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\
763
+\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\xd8\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\
764
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x56\0\0\0\x09\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
765
+\0\x60\x1a\0\0\0\0\0\0\x30\0\0\0\0\0\0\0\x09\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\
766
+\x10\0\0\0\0\0\0\0\x20\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x18\
767
+\x14\0\0\0\0\0\0\x3c\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\
768
+\0\0\0\x6c\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x54\x14\0\0\0\0\0\
769
+\0\x07\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x78\0\0\
770
+\0\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x60\x14\0\0\0\0\0\0\x30\0\0\0\0\
771
+\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x74\0\0\0\x09\0\0\0\0\
772
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x90\x1a\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x09\0\0\0\
773
+\x07\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\xb2\0\0\0\x02\0\0\0\0\0\0\0\0\0\
774
+\0\0\0\0\0\0\0\0\0\0\x90\x14\0\0\0\0\0\0\xd0\x05\0\0\0\0\0\0\x01\0\0\0\x39\0\0\
775
+\0\x08\0\0\0\0\0\0\0\x18\0\0\0\0\0\0\0";
776
+
777
+    return 0;
778
+err:
779
+    bpf_object__destroy_skeleton(s);
780
+    return -1;
781
+}
782
+
783
+#endif /* __RSS_BPF_SKEL_H__ */
784
diff --git a/ebpf/trace-events b/ebpf/trace-events
785
new file mode 100644
786
index XXXXXXX..XXXXXXX
787
--- /dev/null
788
+++ b/ebpf/trace-events
789
@@ -XXX,XX +XXX,XX @@
790
+# See docs/devel/tracing.txt for syntax documentation.
791
+
792
+# ebpf-rss.c
793
+ebpf_error(const char *s1, const char *s2) "error in %s: %s"
794
diff --git a/ebpf/trace.h b/ebpf/trace.h
795
new file mode 100644
796
index XXXXXXX..XXXXXXX
797
--- /dev/null
798
+++ b/ebpf/trace.h
799
@@ -0,0 +1 @@
800
+#include "trace/trace-ebpf.h"
801
diff --git a/meson.build b/meson.build
802
index XXXXXXX..XXXXXXX 100644
803
--- a/meson.build
804
+++ b/meson.build
805
@@ -XXX,XX +XXX,XX @@ if not get_option('fuse_lseek').disabled()
806
endif
807
endif
808
809
+# libbpf
810
+libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
811
+if libbpf.found() and not cc.links('''
812
+ #include <bpf/libbpf.h>
813
+ int main(void)
814
+ {
815
+ bpf_object__destroy_skeleton(NULL);
816
+ return 0;
817
+ }''', dependencies: libbpf)
818
+ libbpf = not_found
819
+ if get_option('bpf').enabled()
820
+ error('libbpf skeleton test failed')
821
+ else
822
+ warning('libbpf skeleton test failed, disabling')
823
+ endif
824
+endif
825
+
826
if get_option('cfi')
827
cfi_flags=[]
828
# Check for dependency on LTO
829
@@ -XXX,XX +XXX,XX @@ endif
830
config_host_data.set('CONFIG_GTK', gtk.found())
831
config_host_data.set('CONFIG_LIBATTR', have_old_libattr)
832
config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found())
833
+config_host_data.set('CONFIG_EBPF', libbpf.found())
834
config_host_data.set('CONFIG_LIBISCSI', libiscsi.found())
835
config_host_data.set('CONFIG_LIBNFS', libnfs.found())
836
config_host_data.set('CONFIG_RBD', rbd.found())
837
@@ -XXX,XX +XXX,XX @@ if have_system
838
'backends',
839
'backends/tpm',
840
'chardev',
841
+ 'ebpf',
842
'hw/9pfs',
843
'hw/acpi',
844
'hw/adc',
845
@@ -XXX,XX +XXX,XX @@ subdir('accel')
846
subdir('plugins')
847
subdir('bsd-user')
848
subdir('linux-user')
849
+subdir('ebpf')
850
+
851
+common_ss.add(libbpf)
852
853
bsd_user_ss.add(files('gdbstub.c'))
854
specific_ss.add_all(when: 'CONFIG_BSD_USER', if_true: bsd_user_ss)
855
@@ -XXX,XX +XXX,XX @@ summary_info += {'RDMA support': config_host.has_key('CONFIG_RDMA')}
856
summary_info += {'PVRDMA support': config_host.has_key('CONFIG_PVRDMA')}
857
summary_info += {'fdt support': fdt_opt == 'disabled' ? false : fdt_opt}
858
summary_info += {'libcap-ng support': libcap_ng.found()}
859
+summary_info += {'bpf support': libbpf.found()}
860
# TODO: add back protocol and server version
861
summary_info += {'spice support': config_host.has_key('CONFIG_SPICE')}
862
summary_info += {'rbd support': rbd.found()}
863
diff --git a/meson_options.txt b/meson_options.txt
864
index XXXXXXX..XXXXXXX 100644
865
--- a/meson_options.txt
866
+++ b/meson_options.txt
867
@@ -XXX,XX +XXX,XX @@ option('bzip2', type : 'feature', value : 'auto',
868
description: 'bzip2 support for DMG images')
869
option('cap_ng', type : 'feature', value : 'auto',
870
description: 'cap_ng support')
871
+option('bpf', type : 'feature', value : 'auto',
872
+ description: 'eBPF support')
873
option('cocoa', type : 'feature', value : 'auto',
874
description: 'Cocoa user interface (macOS only)')
875
option('curl', type : 'feature', value : 'auto',
876
--
191
--
877
2.7.4
192
2.7.4
878
193
879
194
diff view generated by jsdifflib
New patch
1
1
From: Eugenio Pérez <eperezma@redhat.com>
2
3
Use translations added in VhostIOVATree in SVQ.
4
5
Only introduce usage here, not allocation and deallocation. As with
6
previous patches, we use the dead code paths of shadow_vqs_enabled to
7
avoid commiting too many changes at once. These are impossible to take
8
at the moment.
9
10
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
11
Acked-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/virtio/vhost-shadow-virtqueue.c | 86 +++++++++++++++++++++++---
15
hw/virtio/vhost-shadow-virtqueue.h | 6 +-
16
hw/virtio/vhost-vdpa.c | 122 +++++++++++++++++++++++++++++++------
17
include/hw/virtio/vhost-vdpa.h | 3 +
18
4 files changed, 187 insertions(+), 30 deletions(-)
19
20
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/virtio/vhost-shadow-virtqueue.c
23
+++ b/hw/virtio/vhost-shadow-virtqueue.c
24
@@ -XXX,XX +XXX,XX @@ static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
25
return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
26
}
27
28
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
29
+/**
30
+ * Translate addresses between the qemu's virtual address and the SVQ IOVA
31
+ *
32
+ * @svq: Shadow VirtQueue
33
+ * @vaddr: Translated IOVA addresses
34
+ * @iovec: Source qemu's VA addresses
35
+ * @num: Length of iovec and minimum length of vaddr
36
+ */
37
+static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
38
+ hwaddr *addrs, const struct iovec *iovec,
39
+ size_t num)
40
+{
41
+ if (num == 0) {
42
+ return true;
43
+ }
44
+
45
+ for (size_t i = 0; i < num; ++i) {
46
+ DMAMap needle = {
47
+ .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
48
+ .size = iovec[i].iov_len,
49
+ };
50
+ Int128 needle_last, map_last;
51
+ size_t off;
52
+
53
+ const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
54
+ /*
55
+ * Map cannot be NULL since iova map contains all guest space and
56
+ * qemu already has a physical address mapped
57
+ */
58
+ if (unlikely(!map)) {
59
+ qemu_log_mask(LOG_GUEST_ERROR,
60
+ "Invalid address 0x%"HWADDR_PRIx" given by guest",
61
+ needle.translated_addr);
62
+ return false;
63
+ }
64
+
65
+ off = needle.translated_addr - map->translated_addr;
66
+ addrs[i] = map->iova + off;
67
+
68
+ needle_last = int128_add(int128_make64(needle.translated_addr),
69
+ int128_make64(iovec[i].iov_len));
70
+ map_last = int128_make64(map->translated_addr + map->size);
71
+ if (unlikely(int128_gt(needle_last, map_last))) {
72
+ qemu_log_mask(LOG_GUEST_ERROR,
73
+ "Guest buffer expands over iova range");
74
+ return false;
75
+ }
76
+ }
77
+
78
+ return true;
79
+}
80
+
81
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
82
const struct iovec *iovec, size_t num,
83
bool more_descs, bool write)
84
{
85
@@ -XXX,XX +XXX,XX @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
86
} else {
87
descs[i].flags = flags;
88
}
89
- descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
90
+ descs[i].addr = cpu_to_le64(sg[n]);
91
descs[i].len = cpu_to_le32(iovec[n].iov_len);
92
93
last = i;
94
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
95
{
96
unsigned avail_idx;
97
vring_avail_t *avail = svq->vring.avail;
98
+ bool ok;
99
+ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
100
101
*head = svq->free_head;
102
103
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
104
return false;
105
}
106
107
- vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
108
- false);
109
- vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
110
+ ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
111
+ if (unlikely(!ok)) {
112
+ return false;
113
+ }
114
+ vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
115
+ elem->in_num > 0, false);
116
+
117
+
118
+ ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
119
+ if (unlikely(!ok)) {
120
+ return false;
121
+ }
122
+
123
+ vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
124
125
/*
126
* Put the entry in the available array (but don't update avail->idx until
127
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
128
void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
129
struct vhost_vring_addr *addr)
130
{
131
- addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
132
- addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
133
- addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
134
+ addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc;
135
+ addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail;
136
+ addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used;
137
}
138
139
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
140
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
141
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
142
* shadow methods and file descriptors.
143
*
144
+ * @iova_tree: Tree to perform descriptors translations
145
+ *
146
* Returns the new virtqueue or NULL.
147
*
148
* In case of error, reason is reported through error_report.
149
*/
150
-VhostShadowVirtqueue *vhost_svq_new(void)
151
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
152
{
153
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
154
int r;
155
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
156
157
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
158
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
159
+ svq->iova_tree = iova_tree;
160
return g_steal_pointer(&svq);
161
162
err_init_hdev_call:
163
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
164
index XXXXXXX..XXXXXXX 100644
165
--- a/hw/virtio/vhost-shadow-virtqueue.h
166
+++ b/hw/virtio/vhost-shadow-virtqueue.h
167
@@ -XXX,XX +XXX,XX @@
168
#include "qemu/event_notifier.h"
169
#include "hw/virtio/virtio.h"
170
#include "standard-headers/linux/vhost_types.h"
171
+#include "hw/virtio/vhost-iova-tree.h"
172
173
/* Shadow virtqueue to relay notifications */
174
typedef struct VhostShadowVirtqueue {
175
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
176
/* Virtio device */
177
VirtIODevice *vdev;
178
179
+ /* IOVA mapping */
180
+ VhostIOVATree *iova_tree;
181
+
182
/* Map for use the guest's descriptors */
183
VirtQueueElement **ring_id_maps;
184
185
@@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
186
VirtQueue *vq);
187
void vhost_svq_stop(VhostShadowVirtqueue *svq);
188
189
-VhostShadowVirtqueue *vhost_svq_new(void);
190
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
191
192
void vhost_svq_free(gpointer vq);
193
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
194
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
195
index XXXXXXX..XXXXXXX 100644
196
--- a/hw/virtio/vhost-vdpa.c
197
+++ b/hw/virtio/vhost-vdpa.c
198
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
199
vaddr, section->readonly);
200
201
llsize = int128_sub(llend, int128_make64(iova));
202
+ if (v->shadow_vqs_enabled) {
203
+ DMAMap mem_region = {
204
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
205
+ .size = int128_get64(llsize) - 1,
206
+ .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
207
+ };
208
+
209
+ int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
210
+ if (unlikely(r != IOVA_OK)) {
211
+ error_report("Can't allocate a mapping (%d)", r);
212
+ goto fail;
213
+ }
214
+
215
+ iova = mem_region.iova;
216
+ }
217
218
vhost_vdpa_iotlb_batch_begin_once(v);
219
ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
220
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
221
222
llsize = int128_sub(llend, int128_make64(iova));
223
224
+ if (v->shadow_vqs_enabled) {
225
+ const DMAMap *result;
226
+ const void *vaddr = memory_region_get_ram_ptr(section->mr) +
227
+ section->offset_within_region +
228
+ (iova - section->offset_within_address_space);
229
+ DMAMap mem_region = {
230
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
231
+ .size = int128_get64(llsize) - 1,
232
+ };
233
+
234
+ result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
235
+ iova = result->iova;
236
+ vhost_iova_tree_remove(v->iova_tree, &mem_region);
237
+ }
238
vhost_vdpa_iotlb_batch_begin_once(v);
239
ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
240
if (ret) {
241
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
242
243
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
244
for (unsigned n = 0; n < hdev->nvqs; ++n) {
245
- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
246
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
247
248
if (unlikely(!svq)) {
249
error_setg(errp, "Cannot create svq %u", n);
250
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
251
/**
252
* Unmap a SVQ area in the device
253
*/
254
-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
255
- hwaddr size)
256
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
257
+ const DMAMap *needle)
258
{
259
+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
260
+ hwaddr size;
261
int r;
262
263
- size = ROUND_UP(size, qemu_real_host_page_size);
264
- r = vhost_vdpa_dma_unmap(v, iova, size);
265
+ if (unlikely(!result)) {
266
+ error_report("Unable to find SVQ address to unmap");
267
+ return false;
268
+ }
269
+
270
+ size = ROUND_UP(result->size, qemu_real_host_page_size);
271
+ r = vhost_vdpa_dma_unmap(v, result->iova, size);
272
return r == 0;
273
}
274
275
static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
276
const VhostShadowVirtqueue *svq)
277
{
278
+ DMAMap needle = {};
279
struct vhost_vdpa *v = dev->opaque;
280
struct vhost_vring_addr svq_addr;
281
- size_t device_size = vhost_svq_device_area_size(svq);
282
- size_t driver_size = vhost_svq_driver_area_size(svq);
283
bool ok;
284
285
vhost_svq_get_vring_addr(svq, &svq_addr);
286
287
- ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
288
+ needle.translated_addr = svq_addr.desc_user_addr;
289
+ ok = vhost_vdpa_svq_unmap_ring(v, &needle);
290
if (unlikely(!ok)) {
291
return false;
292
}
293
294
- return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
295
+ needle.translated_addr = svq_addr.used_user_addr;
296
+ return vhost_vdpa_svq_unmap_ring(v, &needle);
297
+}
298
+
299
+/**
300
+ * Map the SVQ area in the device
301
+ *
302
+ * @v: Vhost-vdpa device
303
+ * @needle: The area to search iova
304
+ * @errorp: Error pointer
305
+ */
306
+static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
307
+ Error **errp)
308
+{
309
+ int r;
310
+
311
+ r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
312
+ if (unlikely(r != IOVA_OK)) {
313
+ error_setg(errp, "Cannot allocate iova (%d)", r);
314
+ return false;
315
+ }
316
+
317
+ r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1,
318
+ (void *)(uintptr_t)needle->translated_addr,
319
+ needle->perm == IOMMU_RO);
320
+ if (unlikely(r != 0)) {
321
+ error_setg_errno(errp, -r, "Cannot map region to device");
322
+ vhost_iova_tree_remove(v->iova_tree, needle);
323
+ }
324
+
325
+ return r == 0;
326
}
327
328
/**
329
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
330
struct vhost_vring_addr *addr,
331
Error **errp)
332
{
333
+ DMAMap device_region, driver_region;
334
+ struct vhost_vring_addr svq_addr;
335
struct vhost_vdpa *v = dev->opaque;
336
size_t device_size = vhost_svq_device_area_size(svq);
337
size_t driver_size = vhost_svq_driver_area_size(svq);
338
- int r;
339
+ size_t avail_offset;
340
+ bool ok;
341
342
ERRP_GUARD();
343
- vhost_svq_get_vring_addr(svq, addr);
344
+ vhost_svq_get_vring_addr(svq, &svq_addr);
345
346
- r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
347
- (void *)(uintptr_t)addr->desc_user_addr, true);
348
- if (unlikely(r != 0)) {
349
- error_setg_errno(errp, -r, "Cannot create vq driver region: ");
350
+ driver_region = (DMAMap) {
351
+ .translated_addr = svq_addr.desc_user_addr,
352
+ .size = driver_size - 1,
353
+ .perm = IOMMU_RO,
354
+ };
355
+ ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
356
+ if (unlikely(!ok)) {
357
+ error_prepend(errp, "Cannot create vq driver region: ");
358
return false;
359
}
360
+ addr->desc_user_addr = driver_region.iova;
361
+ avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
362
+ addr->avail_user_addr = driver_region.iova + avail_offset;
363
364
- r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
365
- (void *)(intptr_t)addr->used_user_addr, false);
366
- if (unlikely(r != 0)) {
367
- error_setg_errno(errp, -r, "Cannot create vq device region: ");
368
+ device_region = (DMAMap) {
369
+ .translated_addr = svq_addr.used_user_addr,
370
+ .size = device_size - 1,
371
+ .perm = IOMMU_RW,
372
+ };
373
+ ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
374
+ if (unlikely(!ok)) {
375
+ error_prepend(errp, "Cannot create vq device region: ");
376
+ vhost_vdpa_svq_unmap_ring(v, &driver_region);
377
}
378
+ addr->used_user_addr = device_region.iova;
379
380
- return r == 0;
381
+ return ok;
382
}
383
384
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
385
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
386
index XXXXXXX..XXXXXXX 100644
387
--- a/include/hw/virtio/vhost-vdpa.h
388
+++ b/include/hw/virtio/vhost-vdpa.h
389
@@ -XXX,XX +XXX,XX @@
390
391
#include <gmodule.h>
392
393
+#include "hw/virtio/vhost-iova-tree.h"
394
#include "hw/virtio/virtio.h"
395
#include "standard-headers/linux/vhost_types.h"
396
397
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
398
MemoryListener listener;
399
struct vhost_vdpa_iova_range iova_range;
400
bool shadow_vqs_enabled;
401
+ /* IOVA mapping used by the Shadow Virtqueue */
402
+ VhostIOVATree *iova_tree;
403
GPtrArray *shadow_vqs;
404
struct vhost_dev *dev;
405
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
406
--
407
2.7.4
408
409
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This is needed to achieve migration, so the destination can restore its
4
index.
5
6
Setting base as last used idx, so destination will see as available all
7
the entries that the device did not use, including the in-flight
8
processing ones.
9
10
This is ok for networking, but other kinds of devices might have
11
problems with these retransmissions.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
17
hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
18
1 file changed, 17 insertions(+)
19
20
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/virtio/vhost-vdpa.c
23
+++ b/hw/virtio/vhost-vdpa.c
24
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
25
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
26
struct vhost_vring_state *ring)
27
{
28
+ struct vhost_vdpa *v = dev->opaque;
29
int ret;
30
31
+ if (v->shadow_vqs_enabled) {
32
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
33
+ ring->index);
34
+
35
+ /*
36
+ * Setting base as last used idx, so destination will see as available
37
+ * all the entries that the device did not use, including the in-flight
38
+ * processing ones.
39
+ *
40
+ * TODO: This is ok for networking, but other kinds of devices might
41
+ * have problems with these retransmissions.
42
+ */
43
+ ring->num = svq->last_used_idx;
44
+ return 0;
45
+ }
46
+
47
ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
48
trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
49
return ret;
50
--
51
2.7.4
52
53
diff view generated by jsdifflib
1
From: Andrew Melnychenko <andrew@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Additional code that will be used for eBPF setting steering routine.
3
Setting the log address would make the device start reporting invalid
4
dirty memory because the SVQ vrings are located in qemu's memory.
4
5
5
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Acked-by: Michael S. Tsirkin <mst@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
9
---
8
net/tap-linux.h | 1 +
10
hw/virtio/vhost-vdpa.c | 3 ++-
9
1 file changed, 1 insertion(+)
11
1 file changed, 2 insertions(+), 1 deletion(-)
10
12
11
diff --git a/net/tap-linux.h b/net/tap-linux.h
13
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/net/tap-linux.h
15
--- a/hw/virtio/vhost-vdpa.c
14
+++ b/net/tap-linux.h
16
+++ b/hw/virtio/vhost-vdpa.c
15
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
16
#define TUNSETQUEUE _IOW('T', 217, int)
18
static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
17
#define TUNSETVNETLE _IOW('T', 220, int)
19
struct vhost_log *log)
18
#define TUNSETVNETBE _IOW('T', 222, int)
20
{
19
+#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
21
- if (vhost_vdpa_one_time_request(dev)) {
20
22
+ struct vhost_vdpa *v = dev->opaque;
21
#endif
23
+ if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) {
24
return 0;
25
}
22
26
23
--
27
--
24
2.7.4
28
2.7.4
25
29
26
30
diff view generated by jsdifflib
1
From: Andrew Melnychenko <andrew@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
3
SVQ is able to log the dirty bits by itself, so let's use it to not
4
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
4
block migration.
5
6
Also, ignore set and clear of VHOST_F_LOG_ALL on set_features if SVQ is
7
enabled. Even if the device supports it, the reports would be nonsense
8
because SVQ memory is in the qemu region.
9
10
The log region is still allocated. Future changes might skip that, but
11
this series is already long enough.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
---
16
---
7
MAINTAINERS | 8 ++++++++
17
hw/virtio/vhost-vdpa.c | 39 +++++++++++++++++++++++++++++++++++----
8
1 file changed, 8 insertions(+)
18
include/hw/virtio/vhost-vdpa.h | 1 +
19
2 files changed, 36 insertions(+), 4 deletions(-)
9
20
10
diff --git a/MAINTAINERS b/MAINTAINERS
21
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
11
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
12
--- a/MAINTAINERS
23
--- a/hw/virtio/vhost-vdpa.c
13
+++ b/MAINTAINERS
24
+++ b/hw/virtio/vhost-vdpa.c
14
@@ -XXX,XX +XXX,XX @@ F: include/hw/remote/proxy-memory-listener.h
25
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
15
F: hw/remote/iohub.c
26
return v->index != 0;
16
F: include/hw/remote/iohub.h
27
}
17
28
18
+EBPF:
29
+static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
19
+M: Jason Wang <jasowang@redhat.com>
30
+ uint64_t *features)
20
+R: Andrew Melnychenko <andrew@daynix.com>
31
+{
21
+R: Yuri Benditovich <yuri.benditovich@daynix.com>
32
+ int ret;
22
+S: Maintained
23
+F: ebpf/*
24
+F: tools/ebpf/*
25
+
33
+
26
Build and test automation
34
+ ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
27
-------------------------
35
+ trace_vhost_vdpa_get_features(dev, *features);
28
Build and test automation, general continuous integration
36
+ return ret;
37
+}
38
+
39
static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
40
Error **errp)
41
{
42
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
43
return 0;
44
}
45
46
- r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
47
+ r = vhost_vdpa_get_dev_features(hdev, &dev_features);
48
if (r != 0) {
49
error_setg_errno(errp, -r, "Can't get vdpa device features");
50
return r;
51
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
52
static int vhost_vdpa_set_features(struct vhost_dev *dev,
53
uint64_t features)
54
{
55
+ struct vhost_vdpa *v = dev->opaque;
56
int ret;
57
58
if (vhost_vdpa_one_time_request(dev)) {
59
return 0;
60
}
61
62
+ if (v->shadow_vqs_enabled) {
63
+ if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) {
64
+ /*
65
+ * QEMU is just trying to enable or disable logging. SVQ handles
66
+ * this sepparately, so no need to forward this.
67
+ */
68
+ v->acked_features = features;
69
+ return 0;
70
+ }
71
+
72
+ v->acked_features = features;
73
+
74
+ /* We must not ack _F_LOG if SVQ is enabled */
75
+ features &= ~BIT_ULL(VHOST_F_LOG_ALL);
76
+ }
77
+
78
trace_vhost_vdpa_set_features(dev, features);
79
ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
80
if (ret) {
81
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
82
static int vhost_vdpa_get_features(struct vhost_dev *dev,
83
uint64_t *features)
84
{
85
- int ret;
86
+ struct vhost_vdpa *v = dev->opaque;
87
+ int ret = vhost_vdpa_get_dev_features(dev, features);
88
+
89
+ if (ret == 0 && v->shadow_vqs_enabled) {
90
+ /* Add SVQ logging capabilities */
91
+ *features |= BIT_ULL(VHOST_F_LOG_ALL);
92
+ }
93
94
- ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
95
- trace_vhost_vdpa_get_features(dev, *features);
96
return ret;
97
}
98
99
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/include/hw/virtio/vhost-vdpa.h
102
+++ b/include/hw/virtio/vhost-vdpa.h
103
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
104
bool iotlb_batch_begin_sent;
105
MemoryListener listener;
106
struct vhost_vdpa_iova_range iova_range;
107
+ uint64_t acked_features;
108
bool shadow_vqs_enabled;
109
/* IOVA mapping used by the Shadow Virtqueue */
110
VhostIOVATree *iova_tree;
29
--
111
--
30
2.7.4
112
2.7.4
31
113
32
114
diff view generated by jsdifflib