1
The following changes since commit 344744e148e6e865f5a57e745b02a87e5ea534ad:
1
The following changes since commit 352998df1c53b366413690d95b35f76d0721ebed:
2
2
3
Merge tag 'dump-pull-request' of https://gitlab.com/marcandre.lureau/qemu into staging (2022-10-26 10:53:49 -0400)
3
Merge tag 'i2c-20220314' of https://github.com/philmd/qemu into staging (2022-03-14 14:39:33 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to e506fee8b1e092f6ac6f9459bf6a35b807644ad2:
9
for you to fetch changes up to 12a195fa343aae2ead1301ce04727bd0ae25eb15:
10
10
11
net: stream: add QAPI events to report connection state (2022-10-28 13:28:52 +0800)
11
vdpa: Expose VHOST_F_LOG_ALL on SVQ (2022-03-15 13:57:44 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
Changes since V2:
16
- fix 32bit build errros
17
15
----------------------------------------------------------------
18
----------------------------------------------------------------
16
Daniel P. Berrangé (1):
19
Eugenio Pérez (14):
17
net: improve error message for missing netdev backend
20
vhost: Add VhostShadowVirtqueue
21
vhost: Add Shadow VirtQueue kick forwarding capabilities
22
vhost: Add Shadow VirtQueue call forwarding capabilities
23
vhost: Add vhost_svq_valid_features to shadow vq
24
virtio: Add vhost_svq_get_vring_addr
25
vdpa: adapt vhost_ops callbacks to svq
26
vhost: Shadow virtqueue buffers forwarding
27
util: Add iova_tree_alloc_map
28
util: add iova_tree_find_iova
29
vhost: Add VhostIOVATree
30
vdpa: Add custom IOTLB translations to SVQ
31
vdpa: Adapt vhost_vdpa_get_vring_base to SVQ
32
vdpa: Never set log_base addr if SVQ is enabled
33
vdpa: Expose VHOST_F_LOG_ALL on SVQ
18
34
19
Eugenio Pérez (6):
35
Jason Wang (1):
20
vdpa: Delete duplicated vdpa_feature_bits entry
36
virtio-net: fix map leaking on error during receive
21
vdpa: Remove shadow CVQ command check
22
vhost: allocate event_idx fields on vring
23
vhost: toggle device callbacks using used event idx
24
vhost: use avail event idx on vhost_svq_kick
25
vhost: Accept event idx flag
26
37
27
Laurent Vivier (16):
38
hw/net/virtio-net.c | 1 +
28
virtio-net: fix bottom-half packet TX on asynchronous completion
39
hw/virtio/meson.build | 2 +-
29
virtio-net: fix TX timer with tx_burst
40
hw/virtio/vhost-iova-tree.c | 110 +++++++
30
net: introduce convert_host_port()
41
hw/virtio/vhost-iova-tree.h | 27 ++
31
net: remove the @errp argument of net_client_inits()
42
hw/virtio/vhost-shadow-virtqueue.c | 636 +++++++++++++++++++++++++++++++++++++
32
net: simplify net_client_parse() error management
43
hw/virtio/vhost-shadow-virtqueue.h | 87 +++++
33
qapi: net: introduce a way to bypass qemu_opts_parse_noisily()
44
hw/virtio/vhost-vdpa.c | 522 +++++++++++++++++++++++++++++-
34
net: introduce qemu_set_info_str() function
45
include/hw/virtio/vhost-vdpa.h | 8 +
35
qapi: net: add stream and dgram netdevs
46
include/qemu/iova-tree.h | 38 ++-
36
net: stream: add unix socket
47
util/iova-tree.c | 170 ++++++++++
37
net: dgram: make dgram_dst generic
48
10 files changed, 1584 insertions(+), 17 deletions(-)
38
net: dgram: move mcast specific code from net_socket_fd_init_dgram()
49
create mode 100644 hw/virtio/vhost-iova-tree.c
39
net: dgram: add unix socket
50
create mode 100644 hw/virtio/vhost-iova-tree.h
40
qemu-sockets: move and rename SocketAddress_to_str()
51
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
41
qemu-sockets: update socket_uri() and socket_parse() to be consistent
52
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
42
net: stream: move to QIO to enable additional parameters
43
net: stream: add QAPI events to report connection state
44
45
Si-Wei Liu (1):
46
vhost-vdpa: allow passing opened vhostfd to vhost-vdpa
47
48
Stefano Brivio (2):
49
net: socket: Don't ignore EINVAL on netdev socket connection
50
net: stream: Don't ignore EINVAL on netdev socket connection
51
52
hmp-commands.hx | 2 +-
53
hw/net/virtio-net.c | 59 +++-
54
hw/net/xen_nic.c | 5 +-
55
hw/virtio/vhost-shadow-virtqueue.c | 39 ++-
56
include/net/net.h | 7 +-
57
include/qemu/sockets.h | 4 +-
58
monitor/hmp-cmds.c | 23 +-
59
net/clients.h | 6 +
60
net/dgram.c | 623 +++++++++++++++++++++++++++++++++++++
61
net/hub.c | 2 +
62
net/l2tpv3.c | 3 +-
63
net/meson.build | 2 +
64
net/net.c | 204 ++++++++----
65
net/slirp.c | 5 +-
66
net/socket.c | 36 +--
67
net/stream.c | 386 +++++++++++++++++++++++
68
net/tap-win32.c | 3 +-
69
net/tap.c | 13 +-
70
net/vde.c | 3 +-
71
net/vhost-user.c | 3 +-
72
net/vhost-vdpa.c | 76 ++---
73
qapi/net.json | 118 ++++++-
74
qemu-options.hx | 20 +-
75
softmmu/vl.c | 16 +-
76
util/qemu-sockets.c | 25 ++
77
25 files changed, 1473 insertions(+), 210 deletions(-)
78
create mode 100644 net/dgram.c
79
create mode 100644 net/stream.c
80
53
81
54
55
diff view generated by jsdifflib
Deleted patch
1
From: Laurent Vivier <lvivier@redhat.com>
2
1
3
When virtio-net is used with the socket netdev backend, the backend
4
can be busy and not able to collect new packets.
5
6
In this case, net_socket_receive() returns 0 and registers a poll function
7
to detect when the socket is ready again.
8
9
In virtio_net_tx_bh(), virtio_net_flush_tx() forwards the 0, the virtio
10
notifications are disabled and the function is not re-scheduled, waiting
11
for the backend to be ready.
12
13
When the socket netdev backend is again able to send packets, the poll
14
function re-starts to flush remaining packets. This is done by
15
calling virtio_net_tx_complete(). It re-enables notifications and calls
16
again virtio_net_flush_tx().
17
18
But it seems if virtio_net_flush_tx() reaches the tx_burst value all
19
the queue is not flushed and no new notification is sent to re-schedule
20
virtio_net_tx_bh(). Nothing re-start to flush the queue and remaining
21
packets are stuck in the queue.
22
23
To fix that, detect in virtio_net_tx_complete() if virtio_net_flush_tx()
24
has been stopped by tx_burst and if yes re-schedule the bottom half
25
function virtio_net_tx_bh() to flush the remaining packets.
26
27
This is what is done in virtio_net_tx_bh() when the virtio_net_flush_tx()
28
is synchronous, and completly by-passed when the operation needs to be
29
asynchronous.
30
31
Fixes: a697a334b3c4 ("virtio-net: Introduce a new bottom half packet TX")
32
Cc: alex.williamson@redhat.com
33
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
34
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
35
Acked-by: Michael S. Tsirkin <mst@redhat.com>
36
Signed-off-by: Jason Wang <jasowang@redhat.com>
37
---
38
hw/net/virtio-net.c | 13 ++++++++++++-
39
1 file changed, 12 insertions(+), 1 deletion(-)
40
41
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/net/virtio-net.c
44
+++ b/hw/net/virtio-net.c
45
@@ -XXX,XX +XXX,XX @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
46
VirtIONet *n = qemu_get_nic_opaque(nc);
47
VirtIONetQueue *q = virtio_net_get_subqueue(nc);
48
VirtIODevice *vdev = VIRTIO_DEVICE(n);
49
+ int ret;
50
51
virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
52
virtio_notify(vdev, q->tx_vq);
53
@@ -XXX,XX +XXX,XX @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
54
q->async_tx.elem = NULL;
55
56
virtio_queue_set_notification(q->tx_vq, 1);
57
- virtio_net_flush_tx(q);
58
+ ret = virtio_net_flush_tx(q);
59
+ if (q->tx_bh && ret >= n->tx_burst) {
60
+ /*
61
+ * the flush has been stopped by tx_burst
62
+ * we will not receive notification for the
63
+ * remainining part, so re-schedule
64
+ */
65
+ virtio_queue_set_notification(q->tx_vq, 0);
66
+ qemu_bh_schedule(q->tx_bh);
67
+ q->tx_waiting = 1;
68
+ }
69
}
70
71
/* TX */
72
--
73
2.7.4
diff view generated by jsdifflib
1
From: Laurent Vivier <lvivier@redhat.com>
1
Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
2
tries to fix the use after free of the sg by caching the virtqueue
3
elements in an array and unmap them at once after receiving the
4
packets, But it forgot to unmap the cached elements on error which
5
will lead to leaking of mapping and other unexpected results.
2
6
3
When virtio_net_flush_tx() reaches the tx_burst value all
7
Fixing this by detaching the cached elements on error. This addresses
4
the queue is not flushed and nothing restart the timer.
8
CVE-2022-26353.
5
9
6
Fix that by doing for TX timer as we do for bottom half TX:
10
Reported-by: Victor Tom <vv474172261@gmail.com>
7
rearming the timer if we find any packet to send during the
11
Cc: qemu-stable@nongnu.org
8
virtio_net_flush_tx() call.
12
Fixes: CVE-2022-26353
9
13
Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
10
Fixes: e3f30488e5f8 ("virtio-net: Limit number of packets sent per TX flush")
11
Cc: alex.williamson@redhat.com
12
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
13
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
14
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
---
16
hw/net/virtio-net.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
17
hw/net/virtio-net.c | 1 +
17
1 file changed, 41 insertions(+), 9 deletions(-)
18
1 file changed, 1 insertion(+)
18
19
19
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
20
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
20
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/net/virtio-net.c
22
--- a/hw/net/virtio-net.c
22
+++ b/hw/net/virtio-net.c
23
+++ b/hw/net/virtio-net.c
23
@@ -XXX,XX +XXX,XX @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
24
@@ -XXX,XX +XXX,XX @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
24
25
25
virtio_queue_set_notification(q->tx_vq, 1);
26
err:
26
ret = virtio_net_flush_tx(q);
27
for (j = 0; j < i; j++) {
27
- if (q->tx_bh && ret >= n->tx_burst) {
28
+ virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
28
+ if (ret >= n->tx_burst) {
29
g_free(elems[j]);
29
/*
30
* the flush has been stopped by tx_burst
31
* we will not receive notification for the
32
* remainining part, so re-schedule
33
*/
34
virtio_queue_set_notification(q->tx_vq, 0);
35
- qemu_bh_schedule(q->tx_bh);
36
+ if (q->tx_bh) {
37
+ qemu_bh_schedule(q->tx_bh);
38
+ } else {
39
+ timer_mod(q->tx_timer,
40
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
41
+ }
42
q->tx_waiting = 1;
43
}
30
}
44
}
31
45
@@ -XXX,XX +XXX,XX @@ drop:
46
return num_packets;
47
}
48
49
+static void virtio_net_tx_timer(void *opaque);
50
+
51
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
52
{
53
VirtIONet *n = VIRTIO_NET(vdev);
54
@@ -XXX,XX +XXX,XX @@ static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
55
}
56
57
if (q->tx_waiting) {
58
- virtio_queue_set_notification(vq, 1);
59
+ /* We already have queued packets, immediately flush */
60
timer_del(q->tx_timer);
61
- q->tx_waiting = 0;
62
- if (virtio_net_flush_tx(q) == -EINVAL) {
63
- return;
64
- }
65
+ virtio_net_tx_timer(q);
66
} else {
67
+ /* re-arm timer to flush it (and more) on next tick */
68
timer_mod(q->tx_timer,
69
- qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
70
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
71
q->tx_waiting = 1;
72
virtio_queue_set_notification(vq, 0);
73
}
74
@@ -XXX,XX +XXX,XX @@ static void virtio_net_tx_timer(void *opaque)
75
VirtIONetQueue *q = opaque;
76
VirtIONet *n = q->n;
77
VirtIODevice *vdev = VIRTIO_DEVICE(n);
78
+ int ret;
79
+
80
/* This happens when device was stopped but BH wasn't. */
81
if (!vdev->vm_running) {
82
/* Make sure tx waiting is set, so we'll run when restarted. */
83
@@ -XXX,XX +XXX,XX @@ static void virtio_net_tx_timer(void *opaque)
84
return;
85
}
86
87
+ ret = virtio_net_flush_tx(q);
88
+ if (ret == -EBUSY || ret == -EINVAL) {
89
+ return;
90
+ }
91
+ /*
92
+ * If we flush a full burst of packets, assume there are
93
+ * more coming and immediately rearm
94
+ */
95
+ if (ret >= n->tx_burst) {
96
+ q->tx_waiting = 1;
97
+ timer_mod(q->tx_timer,
98
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
99
+ return;
100
+ }
101
+ /*
102
+ * If less than a full burst, re-enable notification and flush
103
+ * anything that may have come in while we weren't looking. If
104
+ * we find something, assume the guest is still active and rearm
105
+ */
106
virtio_queue_set_notification(q->tx_vq, 1);
107
- virtio_net_flush_tx(q);
108
+ ret = virtio_net_flush_tx(q);
109
+ if (ret > 0) {
110
+ virtio_queue_set_notification(q->tx_vq, 0);
111
+ q->tx_waiting = 1;
112
+ timer_mod(q->tx_timer,
113
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
114
+ }
115
}
116
117
static void virtio_net_tx_bh(void *opaque)
118
--
32
--
119
2.7.4
33
2.7.4
diff view generated by jsdifflib
1
From: Stefano Brivio <sbrivio@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Other errors are treated as failure by net_stream_client_init(),
3
Vhost shadow virtqueue (SVQ) is an intermediate jump for virtqueue
4
but if connect() returns EINVAL, we'll fail silently. Remove the
4
notifications and buffers, allowing qemu to track them. While qemu is
5
related exception.
5
forwarding the buffers and virtqueue changes, it is able to commit the
6
memory it's being dirtied, the same way regular qemu's VirtIO devices
7
do.
6
8
7
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
9
This commit only exposes basic SVQ allocation and free. Next patches of
8
[lvivier: applied to net/stream.c]
10
the series add functionality like notifications and buffers forwarding.
9
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
11
10
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
12
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
11
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
12
Acked-by: Michael S. Tsirkin <mst@redhat.com>
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
---
15
net/stream.c | 3 +--
16
hw/virtio/meson.build | 2 +-
16
1 file changed, 1 insertion(+), 2 deletions(-)
17
hw/virtio/vhost-shadow-virtqueue.c | 62 ++++++++++++++++++++++++++++++++++++++
18
hw/virtio/vhost-shadow-virtqueue.h | 28 +++++++++++++++++
19
3 files changed, 91 insertions(+), 1 deletion(-)
20
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
21
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
17
22
18
diff --git a/net/stream.c b/net/stream.c
23
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
19
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
20
--- a/net/stream.c
25
--- a/hw/virtio/meson.build
21
+++ b/net/stream.c
26
+++ b/hw/virtio/meson.build
22
@@ -XXX,XX +XXX,XX @@ static int net_stream_client_init(NetClientState *peer,
27
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
23
if (errno == EINTR || errno == EWOULDBLOCK) {
28
24
/* continue */
29
virtio_ss = ss.source_set()
25
} else if (errno == EINPROGRESS ||
30
virtio_ss.add(files('virtio.c'))
26
- errno == EALREADY ||
31
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c'))
27
- errno == EINVAL) {
32
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
28
+ errno == EALREADY) {
33
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
29
break;
34
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
30
} else {
35
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
31
error_setg_errno(errp, errno, "can't connect socket");
36
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
37
new file mode 100644
38
index XXXXXXX..XXXXXXX
39
--- /dev/null
40
+++ b/hw/virtio/vhost-shadow-virtqueue.c
41
@@ -XXX,XX +XXX,XX @@
42
+/*
43
+ * vhost shadow virtqueue
44
+ *
45
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
46
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
47
+ *
48
+ * SPDX-License-Identifier: GPL-2.0-or-later
49
+ */
50
+
51
+#include "qemu/osdep.h"
52
+#include "hw/virtio/vhost-shadow-virtqueue.h"
53
+
54
+#include "qemu/error-report.h"
55
+
56
+/**
57
+ * Creates vhost shadow virtqueue, and instructs the vhost device to use the
58
+ * shadow methods and file descriptors.
59
+ *
60
+ * Returns the new virtqueue or NULL.
61
+ *
62
+ * In case of error, reason is reported through error_report.
63
+ */
64
+VhostShadowVirtqueue *vhost_svq_new(void)
65
+{
66
+ g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
67
+ int r;
68
+
69
+ r = event_notifier_init(&svq->hdev_kick, 0);
70
+ if (r != 0) {
71
+ error_report("Couldn't create kick event notifier: %s (%d)",
72
+ g_strerror(errno), errno);
73
+ goto err_init_hdev_kick;
74
+ }
75
+
76
+ r = event_notifier_init(&svq->hdev_call, 0);
77
+ if (r != 0) {
78
+ error_report("Couldn't create call event notifier: %s (%d)",
79
+ g_strerror(errno), errno);
80
+ goto err_init_hdev_call;
81
+ }
82
+
83
+ return g_steal_pointer(&svq);
84
+
85
+err_init_hdev_call:
86
+ event_notifier_cleanup(&svq->hdev_kick);
87
+
88
+err_init_hdev_kick:
89
+ return NULL;
90
+}
91
+
92
+/**
93
+ * Free the resources of the shadow virtqueue.
94
+ *
95
+ * @pvq: gpointer to SVQ so it can be used by autofree functions.
96
+ */
97
+void vhost_svq_free(gpointer pvq)
98
+{
99
+ VhostShadowVirtqueue *vq = pvq;
100
+ event_notifier_cleanup(&vq->hdev_kick);
101
+ event_notifier_cleanup(&vq->hdev_call);
102
+ g_free(vq);
103
+}
104
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
105
new file mode 100644
106
index XXXXXXX..XXXXXXX
107
--- /dev/null
108
+++ b/hw/virtio/vhost-shadow-virtqueue.h
109
@@ -XXX,XX +XXX,XX @@
110
+/*
111
+ * vhost shadow virtqueue
112
+ *
113
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
114
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
115
+ *
116
+ * SPDX-License-Identifier: GPL-2.0-or-later
117
+ */
118
+
119
+#ifndef VHOST_SHADOW_VIRTQUEUE_H
120
+#define VHOST_SHADOW_VIRTQUEUE_H
121
+
122
+#include "qemu/event_notifier.h"
123
+
124
+/* Shadow virtqueue to relay notifications */
125
+typedef struct VhostShadowVirtqueue {
126
+ /* Shadow kick notifier, sent to vhost */
127
+ EventNotifier hdev_kick;
128
+ /* Shadow call notifier, sent to vhost */
129
+ EventNotifier hdev_call;
130
+} VhostShadowVirtqueue;
131
+
132
+VhostShadowVirtqueue *vhost_svq_new(void);
133
+
134
+void vhost_svq_free(gpointer vq);
135
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
136
+
137
+#endif
32
--
138
--
33
2.7.4
139
2.7.4
34
140
35
141
diff view generated by jsdifflib
1
From: Eugenio Pérez <eperezma@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Enabling all the code path created before.
3
At this mode no buffer forwarding will be performed in SVQ mode: Qemu
4
4
will just forward the guest's kicks to the device.
5
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
5
6
Host memory notifiers regions are left out for simplicity, and they will
7
not be addressed in this series.
8
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
12
---
9
hw/virtio/vhost-shadow-virtqueue.c | 1 +
13
hw/virtio/vhost-shadow-virtqueue.c | 55 ++++++++++++++
10
1 file changed, 1 insertion(+)
14
hw/virtio/vhost-shadow-virtqueue.h | 14 ++++
15
hw/virtio/vhost-vdpa.c | 144 ++++++++++++++++++++++++++++++++++++-
16
include/hw/virtio/vhost-vdpa.h | 4 ++
17
4 files changed, 215 insertions(+), 2 deletions(-)
11
18
12
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
19
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
13
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/virtio/vhost-shadow-virtqueue.c
21
--- a/hw/virtio/vhost-shadow-virtqueue.c
15
+++ b/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/hw/virtio/vhost-shadow-virtqueue.c
16
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
23
@@ -XXX,XX +XXX,XX @@
17
++b) {
24
#include "hw/virtio/vhost-shadow-virtqueue.h"
18
switch (b) {
25
19
case VIRTIO_F_ANY_LAYOUT:
26
#include "qemu/error-report.h"
20
+ case VIRTIO_RING_F_EVENT_IDX:
27
+#include "qemu/main-loop.h"
21
continue;
28
+#include "linux-headers/linux/vhost.h"
22
29
+
23
case VIRTIO_F_ACCESS_PLATFORM:
30
+/**
31
+ * Forward guest notifications.
32
+ *
33
+ * @n: guest kick event notifier, the one that guest set to notify svq.
34
+ */
35
+static void vhost_handle_guest_kick(EventNotifier *n)
36
+{
37
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
38
+ event_notifier_test_and_clear(n);
39
+ event_notifier_set(&svq->hdev_kick);
40
+}
41
+
42
+/**
43
+ * Set a new file descriptor for the guest to kick the SVQ and notify for avail
44
+ *
45
+ * @svq: The svq
46
+ * @svq_kick_fd: The svq kick fd
47
+ *
48
+ * Note that the SVQ will never close the old file descriptor.
49
+ */
50
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
51
+{
52
+ EventNotifier *svq_kick = &svq->svq_kick;
53
+ bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick);
54
+ bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND;
55
+
56
+ if (poll_stop) {
57
+ event_notifier_set_handler(svq_kick, NULL);
58
+ }
59
+
60
+ /*
61
+ * event_notifier_set_handler already checks for guest's notifications if
62
+ * they arrive at the new file descriptor in the switch, so there is no
63
+ * need to explicitly check for them.
64
+ */
65
+ if (poll_start) {
66
+ event_notifier_init_fd(svq_kick, svq_kick_fd);
67
+ event_notifier_set(svq_kick);
68
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
69
+ }
70
+}
71
+
72
+/**
73
+ * Stop the shadow virtqueue operation.
74
+ * @svq: Shadow Virtqueue
75
+ */
76
+void vhost_svq_stop(VhostShadowVirtqueue *svq)
77
+{
78
+ event_notifier_set_handler(&svq->svq_kick, NULL);
79
+}
80
81
/**
82
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
83
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
84
goto err_init_hdev_call;
85
}
86
87
+ event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
88
return g_steal_pointer(&svq);
89
90
err_init_hdev_call:
91
@@ -XXX,XX +XXX,XX @@ err_init_hdev_kick:
92
void vhost_svq_free(gpointer pvq)
93
{
94
VhostShadowVirtqueue *vq = pvq;
95
+ vhost_svq_stop(vq);
96
event_notifier_cleanup(&vq->hdev_kick);
97
event_notifier_cleanup(&vq->hdev_call);
98
g_free(vq);
99
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/hw/virtio/vhost-shadow-virtqueue.h
102
+++ b/hw/virtio/vhost-shadow-virtqueue.h
103
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
104
EventNotifier hdev_kick;
105
/* Shadow call notifier, sent to vhost */
106
EventNotifier hdev_call;
107
+
108
+ /*
109
+ * Borrowed virtqueue's guest to host notifier. To borrow it in this event
110
+ * notifier allows to recover the VhostShadowVirtqueue from the event loop
111
+ * easily. If we use the VirtQueue's one, we don't have an easy way to
112
+ * retrieve VhostShadowVirtqueue.
113
+ *
114
+ * So shadow virtqueue must not clean it, or we would lose VirtQueue one.
115
+ */
116
+ EventNotifier svq_kick;
117
} VhostShadowVirtqueue;
118
119
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
120
+
121
+void vhost_svq_stop(VhostShadowVirtqueue *svq);
122
+
123
VhostShadowVirtqueue *vhost_svq_new(void);
124
125
void vhost_svq_free(gpointer vq);
126
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/hw/virtio/vhost-vdpa.c
129
+++ b/hw/virtio/vhost-vdpa.c
130
@@ -XXX,XX +XXX,XX @@
131
#include "hw/virtio/vhost.h"
132
#include "hw/virtio/vhost-backend.h"
133
#include "hw/virtio/virtio-net.h"
134
+#include "hw/virtio/vhost-shadow-virtqueue.h"
135
#include "hw/virtio/vhost-vdpa.h"
136
#include "exec/address-spaces.h"
137
#include "qemu/main-loop.h"
138
#include "cpu.h"
139
#include "trace.h"
140
#include "qemu-common.h"
141
+#include "qapi/error.h"
142
143
/*
144
* Return one past the end of the end of section. Be careful with uint64_t
145
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
146
return v->index != 0;
147
}
148
149
+static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
150
+ Error **errp)
151
+{
152
+ g_autoptr(GPtrArray) shadow_vqs = NULL;
153
+
154
+ if (!v->shadow_vqs_enabled) {
155
+ return 0;
156
+ }
157
+
158
+ shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
159
+ for (unsigned n = 0; n < hdev->nvqs; ++n) {
160
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
161
+
162
+ if (unlikely(!svq)) {
163
+ error_setg(errp, "Cannot create svq %u", n);
164
+ return -1;
165
+ }
166
+ g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq));
167
+ }
168
+
169
+ v->shadow_vqs = g_steal_pointer(&shadow_vqs);
170
+ return 0;
171
+}
172
+
173
static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
174
{
175
struct vhost_vdpa *v;
176
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
177
dev->opaque = opaque ;
178
v->listener = vhost_vdpa_memory_listener;
179
v->msg_type = VHOST_IOTLB_MSG_V2;
180
+ ret = vhost_vdpa_init_svq(dev, v, errp);
181
+ if (ret) {
182
+ goto err;
183
+ }
184
185
vhost_vdpa_get_iova_range(v);
186
187
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
188
VIRTIO_CONFIG_S_DRIVER);
189
190
return 0;
191
+
192
+err:
193
+ ram_block_discard_disable(false);
194
+ return ret;
195
}
196
197
static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
198
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
199
200
static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
201
{
202
+ struct vhost_vdpa *v = dev->opaque;
203
int i;
204
205
+ if (v->shadow_vqs_enabled) {
206
+ /* FIXME SVQ is not compatible with host notifiers mr */
207
+ return;
208
+ }
209
+
210
for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
211
if (vhost_vdpa_host_notifier_init(dev, i)) {
212
goto err;
213
@@ -XXX,XX +XXX,XX @@ err:
214
return;
215
}
216
217
+static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
218
+{
219
+ struct vhost_vdpa *v = dev->opaque;
220
+ size_t idx;
221
+
222
+ if (!v->shadow_vqs) {
223
+ return;
224
+ }
225
+
226
+ for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
227
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
228
+ }
229
+ g_ptr_array_free(v->shadow_vqs, true);
230
+}
231
+
232
static int vhost_vdpa_cleanup(struct vhost_dev *dev)
233
{
234
struct vhost_vdpa *v;
235
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
236
trace_vhost_vdpa_cleanup(dev, v);
237
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
238
memory_listener_unregister(&v->listener);
239
+ vhost_vdpa_svq_cleanup(dev);
240
241
dev->opaque = NULL;
242
ram_block_discard_disable(false);
243
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
244
return ret;
245
}
246
247
+static void vhost_vdpa_reset_svq(struct vhost_vdpa *v)
248
+{
249
+ if (!v->shadow_vqs_enabled) {
250
+ return;
251
+ }
252
+
253
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
254
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
255
+ vhost_svq_stop(svq);
256
+ }
257
+}
258
+
259
static int vhost_vdpa_reset_device(struct vhost_dev *dev)
260
{
261
+ struct vhost_vdpa *v = dev->opaque;
262
int ret;
263
uint8_t status = 0;
264
265
+ vhost_vdpa_reset_svq(v);
266
+
267
ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
268
trace_vhost_vdpa_reset_device(dev, status);
269
return ret;
270
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
271
return ret;
272
}
273
274
+static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
275
+ struct vhost_vring_file *file)
276
+{
277
+ trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
278
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
279
+}
280
+
281
+/**
282
+ * Set the shadow virtqueue descriptors to the device
283
+ *
284
+ * @dev: The vhost device model
285
+ * @svq: The shadow virtqueue
286
+ * @idx: The index of the virtqueue in the vhost device
287
+ * @errp: Error
288
+ */
289
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
290
+ VhostShadowVirtqueue *svq, unsigned idx,
291
+ Error **errp)
292
+{
293
+ struct vhost_vring_file file = {
294
+ .index = dev->vq_index + idx,
295
+ };
296
+ const EventNotifier *event_notifier = &svq->hdev_kick;
297
+ int r;
298
+
299
+ file.fd = event_notifier_get_fd(event_notifier);
300
+ r = vhost_vdpa_set_vring_dev_kick(dev, &file);
301
+ if (unlikely(r != 0)) {
302
+ error_setg_errno(errp, -r, "Can't set device kick fd");
303
+ }
304
+
305
+ return r == 0;
306
+}
307
+
308
+static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
309
+{
310
+ struct vhost_vdpa *v = dev->opaque;
311
+ Error *err = NULL;
312
+ unsigned i;
313
+
314
+ if (!v->shadow_vqs) {
315
+ return true;
316
+ }
317
+
318
+ for (i = 0; i < v->shadow_vqs->len; ++i) {
319
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
320
+ bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
321
+ if (unlikely(!ok)) {
322
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
323
+ return false;
324
+ }
325
+ }
326
+
327
+ return true;
328
+}
329
+
330
static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
331
{
332
struct vhost_vdpa *v = dev->opaque;
333
+ bool ok;
334
trace_vhost_vdpa_dev_start(dev, started);
335
336
if (started) {
337
vhost_vdpa_host_notifiers_init(dev);
338
+ ok = vhost_vdpa_svqs_start(dev);
339
+ if (unlikely(!ok)) {
340
+ return -1;
341
+ }
342
vhost_vdpa_set_vring_ready(dev);
343
} else {
344
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
345
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
346
static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
347
struct vhost_vring_file *file)
348
{
349
- trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
350
- return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
351
+ struct vhost_vdpa *v = dev->opaque;
352
+ int vdpa_idx = file->index - dev->vq_index;
353
+
354
+ if (v->shadow_vqs_enabled) {
355
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
356
+ vhost_svq_set_svq_kick_fd(svq, file->fd);
357
+ return 0;
358
+ } else {
359
+ return vhost_vdpa_set_vring_dev_kick(dev, file);
360
+ }
361
}
362
363
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
364
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
365
index XXXXXXX..XXXXXXX 100644
366
--- a/include/hw/virtio/vhost-vdpa.h
367
+++ b/include/hw/virtio/vhost-vdpa.h
368
@@ -XXX,XX +XXX,XX @@
369
#ifndef HW_VIRTIO_VHOST_VDPA_H
370
#define HW_VIRTIO_VHOST_VDPA_H
371
372
+#include <gmodule.h>
373
+
374
#include "hw/virtio/virtio.h"
375
#include "standard-headers/linux/vhost_types.h"
376
377
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
378
bool iotlb_batch_begin_sent;
379
MemoryListener listener;
380
struct vhost_vdpa_iova_range iova_range;
381
+ bool shadow_vqs_enabled;
382
+ GPtrArray *shadow_vqs;
383
struct vhost_dev *dev;
384
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
385
} VhostVDPA;
24
--
386
--
25
2.7.4
387
2.7.4
26
388
27
389
diff view generated by jsdifflib
1
From: Eugenio Pérez <eperezma@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Actually use the new field of the used ring and tell the device if SVQ
3
This will make qemu aware of the device used buffers, allowing it to
4
wants to be notified.
4
write the guest memory with its contents if needed.
5
5
6
The code is not reachable at the moment.
7
8
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Acked-by: Michael S. Tsirkin <mst@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
9
---
12
hw/virtio/vhost-shadow-virtqueue.c | 18 +++++++++++++++---
10
hw/virtio/vhost-shadow-virtqueue.c | 38 ++++++++++++++++++++++++++++++++++++++
13
1 file changed, 15 insertions(+), 3 deletions(-)
11
hw/virtio/vhost-shadow-virtqueue.h | 4 ++++
12
hw/virtio/vhost-vdpa.c | 31 +++++++++++++++++++++++++++++--
13
3 files changed, 71 insertions(+), 2 deletions(-)
14
14
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
19
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
19
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(EventNotifier *n)
20
}
21
22
/**
23
+ * Forward vhost notifications
24
+ *
25
+ * @n: hdev call event notifier, the one that device set to notify svq.
26
+ */
27
+static void vhost_svq_handle_call(EventNotifier *n)
28
+{
29
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
30
+ hdev_call);
31
+ event_notifier_test_and_clear(n);
32
+ event_notifier_set(&svq->svq_call);
33
+}
34
+
35
+/**
36
+ * Set the call notifier for the SVQ to call the guest
37
+ *
38
+ * @svq: Shadow virtqueue
39
+ * @call_fd: call notifier
40
+ *
41
+ * Called on BQL context.
42
+ */
43
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
44
+{
45
+ if (call_fd == VHOST_FILE_UNBIND) {
46
+ /*
47
+ * Fail event_notifier_set if called handling device call.
48
+ *
49
+ * SVQ still needs device notifications, since it needs to keep
50
+ * forwarding used buffers even with the unbind.
51
+ */
52
+ memset(&svq->svq_call, 0, sizeof(svq->svq_call));
53
+ } else {
54
+ event_notifier_init_fd(&svq->svq_call, call_fd);
55
+ }
56
+}
57
+
58
+/**
59
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
60
*
61
* @svq: The svq
62
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
63
}
64
65
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
66
+ event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
67
return g_steal_pointer(&svq);
68
69
err_init_hdev_call:
70
@@ -XXX,XX +XXX,XX @@ void vhost_svq_free(gpointer pvq)
71
VhostShadowVirtqueue *vq = pvq;
72
vhost_svq_stop(vq);
73
event_notifier_cleanup(&vq->hdev_kick);
74
+ event_notifier_set_handler(&vq->hdev_call, NULL);
75
event_notifier_cleanup(&vq->hdev_call);
76
g_free(vq);
77
}
78
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
79
index XXXXXXX..XXXXXXX 100644
80
--- a/hw/virtio/vhost-shadow-virtqueue.h
81
+++ b/hw/virtio/vhost-shadow-virtqueue.h
82
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
83
* So shadow virtqueue must not clean it, or we would lose VirtQueue one.
84
*/
85
EventNotifier svq_kick;
86
+
87
+ /* Guest's call notifier, where the SVQ calls guest. */
88
+ EventNotifier svq_call;
89
} VhostShadowVirtqueue;
90
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
void vhost_svq_stop(VhostShadowVirtqueue *svq);
95
96
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/hw/virtio/vhost-vdpa.c
99
+++ b/hw/virtio/vhost-vdpa.c
100
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
101
return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
102
}
103
104
+static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
105
+ struct vhost_vring_file *file)
106
+{
107
+ trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
108
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
109
+}
110
+
111
/**
112
* Set the shadow virtqueue descriptors to the device
113
*
114
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
115
* @svq: The shadow virtqueue
116
* @idx: The index of the virtqueue in the vhost device
117
* @errp: Error
118
+ *
119
+ * Note that this function does not rewind kick file descriptor if cannot set
120
+ * call one.
20
*/
121
*/
21
static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
122
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
22
{
123
VhostShadowVirtqueue *svq, unsigned idx,
23
- svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
124
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
24
- /* Make sure the flag is written before the read of used_idx */
125
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
25
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
126
if (unlikely(r != 0)) {
26
+ uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num];
127
error_setg_errno(errp, -r, "Can't set device kick fd");
27
+ *used_event = svq->shadow_used_idx;
128
+ return false;
28
+ } else {
29
+ svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
30
+ }
129
+ }
31
+
130
+
32
+ /* Make sure the event is enabled before the read of used_idx */
131
+ event_notifier = &svq->hdev_call;
33
smp_mb();
132
+ file.fd = event_notifier_get_fd(event_notifier);
34
return !vhost_svq_more_used(svq);
133
+ r = vhost_vdpa_set_vring_dev_call(dev, &file);
35
}
134
+ if (unlikely(r != 0)) {
36
135
+ error_setg_errno(errp, -r, "Can't set device call fd");
37
static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
136
}
137
138
return r == 0;
139
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
140
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
141
struct vhost_vring_file *file)
38
{
142
{
39
- svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
143
- trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
40
+ /*
144
- return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
41
+ * No need to disable notification in the event idx case, since used event
145
+ struct vhost_vdpa *v = dev->opaque;
42
+ * index is already an index too far away.
146
+
43
+ */
147
+ if (v->shadow_vqs_enabled) {
44
+ if (!virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
148
+ int vdpa_idx = file->index - dev->vq_index;
45
+ svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
149
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
150
+
151
+ vhost_svq_set_svq_call_fd(svq, file->fd);
152
+ return 0;
153
+ } else {
154
+ return vhost_vdpa_set_vring_dev_call(dev, file);
46
+ }
155
+ }
47
}
156
}
48
157
49
static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq,
158
static int vhost_vdpa_get_features(struct vhost_dev *dev,
50
--
159
--
51
2.7.4
160
2.7.4
52
161
53
162
diff view generated by jsdifflib
1
From: Eugenio Pérez <eperezma@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
So SVQ code knows if an event is needed.
3
This allows SVQ to negotiate features with the guest and the device. For
4
the device, SVQ is a driver. While this function bypasses all
5
non-transport features, it needs to disable the features that SVQ does
6
not support when forwarding buffers. This includes packed vq layout,
7
indirect descriptors or event idx.
4
8
5
The code is not reachable at the moment.
9
Future changes can add support to offer more features to the guest,
10
since the use of VirtQueue gives this for free. This is left out at the
11
moment for simplicity.
6
12
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
16
---
11
hw/virtio/vhost-shadow-virtqueue.c | 12 +++++++++++-
17
hw/virtio/vhost-shadow-virtqueue.c | 44 ++++++++++++++++++++++++++++++++++++++
12
1 file changed, 11 insertions(+), 1 deletion(-)
18
hw/virtio/vhost-shadow-virtqueue.h | 2 ++
19
hw/virtio/vhost-vdpa.c | 15 +++++++++++++
20
3 files changed, 61 insertions(+)
13
21
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
22
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
24
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
25
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
26
@@ -XXX,XX +XXX,XX @@
19
27
#include "hw/virtio/vhost-shadow-virtqueue.h"
20
static void vhost_svq_kick(VhostShadowVirtqueue *svq)
28
21
{
29
#include "qemu/error-report.h"
22
+ bool needs_kick;
30
+#include "qapi/error.h"
31
#include "qemu/main-loop.h"
32
#include "linux-headers/linux/vhost.h"
33
34
/**
35
+ * Validate the transport device features that both guests can use with the SVQ
36
+ * and SVQs can use with the device.
37
+ *
38
+ * @dev_features: The features
39
+ * @errp: Error pointer
40
+ */
41
+bool vhost_svq_valid_features(uint64_t features, Error **errp)
42
+{
43
+ bool ok = true;
44
+ uint64_t svq_features = features;
23
+
45
+
24
/*
46
+ for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
25
* We need to expose the available array entries before checking the used
47
+ ++b) {
26
* flags
48
+ switch (b) {
27
*/
49
+ case VIRTIO_F_ANY_LAYOUT:
28
smp_mb();
50
+ continue;
29
- if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
30
+
51
+
31
+ if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
52
+ case VIRTIO_F_ACCESS_PLATFORM:
32
+ uint16_t avail_event = *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]);
53
+ /* SVQ trust in the host's IOMMU to translate addresses */
33
+ needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1);
54
+ case VIRTIO_F_VERSION_1:
34
+ } else {
55
+ /* SVQ trust that the guest vring is little endian */
35
+ needs_kick = !(svq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
56
+ if (!(svq_features & BIT_ULL(b))) {
57
+ svq_features |= BIT_ULL(b);
58
+ ok = false;
59
+ }
60
+ continue;
61
+
62
+ default:
63
+ if (svq_features & BIT_ULL(b)) {
64
+ svq_features &= ~BIT_ULL(b);
65
+ ok = false;
66
+ }
67
+ }
36
+ }
68
+ }
37
+
69
+
38
+ if (!needs_kick) {
70
+ if (!ok) {
39
return;
71
+ error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64
72
+ ", ok: 0x%"PRIx64, features, svq_features);
73
+ }
74
+ return ok;
75
+}
76
+
77
+/**
78
* Forward guest notifications.
79
*
80
* @n: guest kick event notifier, the one that guest set to notify svq.
81
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/hw/virtio/vhost-shadow-virtqueue.h
84
+++ b/hw/virtio/vhost-shadow-virtqueue.h
85
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
86
EventNotifier svq_call;
87
} VhostShadowVirtqueue;
88
89
+bool vhost_svq_valid_features(uint64_t features, Error **errp);
90
+
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/hw/virtio/vhost-vdpa.c
97
+++ b/hw/virtio/vhost-vdpa.c
98
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
99
Error **errp)
100
{
101
g_autoptr(GPtrArray) shadow_vqs = NULL;
102
+ uint64_t dev_features, svq_features;
103
+ int r;
104
+ bool ok;
105
106
if (!v->shadow_vqs_enabled) {
107
return 0;
40
}
108
}
41
109
110
+ r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
111
+ if (r != 0) {
112
+ error_setg_errno(errp, -r, "Can't get vdpa device features");
113
+ return r;
114
+ }
115
+
116
+ svq_features = dev_features;
117
+ ok = vhost_svq_valid_features(svq_features, errp);
118
+ if (unlikely(!ok)) {
119
+ return -1;
120
+ }
121
+
122
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
123
for (unsigned n = 0; n < hdev->nvqs; ++n) {
124
g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
42
--
125
--
43
2.7.4
126
2.7.4
44
127
45
128
diff view generated by jsdifflib
1
From: Eugenio Pérez <eperezma@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
There was not enough room to accomodate them.
3
It reports the shadow virtqueue address from qemu virtual address space.
4
4
5
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
5
Since this will be different from the guest's vaddr, but the device can
6
access it, SVQ takes special care about its alignment & lack of garbage
7
data. It assumes that IOMMU will work in host_page_size ranges for that.
8
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
12
---
9
hw/virtio/vhost-shadow-virtqueue.c | 8 ++++----
13
hw/virtio/vhost-shadow-virtqueue.c | 29 +++++++++++++++++++++++++++++
10
1 file changed, 4 insertions(+), 4 deletions(-)
14
hw/virtio/vhost-shadow-virtqueue.h | 9 +++++++++
15
2 files changed, 38 insertions(+)
11
16
12
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
17
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
13
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/virtio/vhost-shadow-virtqueue.c
19
--- a/hw/virtio/vhost-shadow-virtqueue.c
15
+++ b/hw/virtio/vhost-shadow-virtqueue.c
20
+++ b/hw/virtio/vhost-shadow-virtqueue.c
16
@@ -XXX,XX +XXX,XX @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
21
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
17
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
18
{
19
size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
20
- size_t avail_size = offsetof(vring_avail_t, ring) +
21
- sizeof(uint16_t) * svq->vring.num;
22
+ size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) +
23
+ sizeof(uint16_t);
24
25
return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size());
26
}
22
}
27
23
28
size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
24
/**
29
{
25
+ * Get the shadow vq vring address.
30
- size_t used_size = offsetof(vring_used_t, ring) +
26
+ * @svq: Shadow virtqueue
31
- sizeof(vring_used_elem_t) * svq->vring.num;
27
+ * @addr: Destination to store address
32
+ size_t used_size = offsetof(vring_used_t, ring[svq->vring.num]) +
28
+ */
33
+ sizeof(uint16_t);
29
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
34
return ROUND_UP(used_size, qemu_real_host_page_size());
30
+ struct vhost_vring_addr *addr)
35
}
31
+{
32
+ addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
33
+ addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
34
+ addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
35
+}
36
+
37
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
38
+{
39
+ size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
40
+ size_t avail_size = offsetof(vring_avail_t, ring) +
41
+ sizeof(uint16_t) * svq->vring.num;
42
+
43
+ return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size);
44
+}
45
+
46
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
47
+{
48
+ size_t used_size = offsetof(vring_used_t, ring) +
49
+ sizeof(vring_used_elem_t) * svq->vring.num;
50
+ return ROUND_UP(used_size, qemu_real_host_page_size);
51
+}
52
+
53
+/**
54
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
55
*
56
* @svq: The svq
57
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
58
index XXXXXXX..XXXXXXX 100644
59
--- a/hw/virtio/vhost-shadow-virtqueue.h
60
+++ b/hw/virtio/vhost-shadow-virtqueue.h
61
@@ -XXX,XX +XXX,XX @@
62
#define VHOST_SHADOW_VIRTQUEUE_H
63
64
#include "qemu/event_notifier.h"
65
+#include "hw/virtio/virtio.h"
66
+#include "standard-headers/linux/vhost_types.h"
67
68
/* Shadow virtqueue to relay notifications */
69
typedef struct VhostShadowVirtqueue {
70
+ /* Shadow vring */
71
+ struct vring vring;
72
+
73
/* Shadow kick notifier, sent to vhost */
74
EventNotifier hdev_kick;
75
/* Shadow call notifier, sent to vhost */
76
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp);
77
78
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
79
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
80
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
81
+ struct vhost_vring_addr *addr);
82
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
83
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
84
85
void vhost_svq_stop(VhostShadowVirtqueue *svq);
36
86
37
--
87
--
38
2.7.4
88
2.7.4
39
89
40
90
diff view generated by jsdifflib
1
From: Laurent Vivier <lvivier@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Embed the setting of info_str in a function.
3
First half of the buffers forwarding part, preparing vhost-vdpa
4
callbacks to SVQ to offer it. QEMU cannot enable it at this moment, so
5
this is effectively dead code at the moment, but it helps to reduce
6
patch size.
4
7
5
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
6
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
7
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
Acked-by: Michael S. Tsirkin <mst@redhat.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
---
11
hw/net/xen_nic.c | 5 ++---
12
hw/virtio/vhost-vdpa.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
12
include/net/net.h | 1 +
13
1 file changed, 41 insertions(+), 7 deletions(-)
13
net/l2tpv3.c | 3 +--
14
net/net.c | 17 ++++++++++++-----
15
net/slirp.c | 5 ++---
16
net/socket.c | 33 ++++++++++++++-------------------
17
net/tap-win32.c | 3 +--
18
net/tap.c | 13 +++++--------
19
net/vde.c | 3 +--
20
net/vhost-user.c | 3 +--
21
net/vhost-vdpa.c | 2 +-
22
11 files changed, 41 insertions(+), 47 deletions(-)
23
14
24
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
15
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
25
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/net/xen_nic.c
17
--- a/hw/virtio/vhost-vdpa.c
27
+++ b/hw/net/xen_nic.c
18
+++ b/hw/virtio/vhost-vdpa.c
28
@@ -XXX,XX +XXX,XX @@ static int net_init(struct XenLegacyDevice *xendev)
19
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
29
netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf,
20
return ret;
30
"xen", NULL, netdev);
21
}
31
22
32
- snprintf(qemu_get_queue(netdev->nic)->info_str,
23
+static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev,
33
- sizeof(qemu_get_queue(netdev->nic)->info_str),
24
+ struct vhost_vring_state *ring)
34
- "nic: xenbus vif macaddr=%s", netdev->mac);
35
+ qemu_set_info_str(qemu_get_queue(netdev->nic),
36
+ "nic: xenbus vif macaddr=%s", netdev->mac);
37
38
/* fill info */
39
xenstore_write_be_int(&netdev->xendev, "feature-rx-copy", 1);
40
diff --git a/include/net/net.h b/include/net/net.h
41
index XXXXXXX..XXXXXXX 100644
42
--- a/include/net/net.h
43
+++ b/include/net/net.h
44
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_send_packet_async(NetClientState *nc, const uint8_t *buf,
45
void qemu_purge_queued_packets(NetClientState *nc);
46
void qemu_flush_queued_packets(NetClientState *nc);
47
void qemu_flush_or_purge_queued_packets(NetClientState *nc, bool purge);
48
+void qemu_set_info_str(NetClientState *nc, const char *fmt, ...);
49
void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]);
50
bool qemu_has_ufo(NetClientState *nc);
51
bool qemu_has_vnet_hdr(NetClientState *nc);
52
diff --git a/net/l2tpv3.c b/net/l2tpv3.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/net/l2tpv3.c
55
+++ b/net/l2tpv3.c
56
@@ -XXX,XX +XXX,XX @@ int net_init_l2tpv3(const Netdev *netdev,
57
58
l2tpv3_read_poll(s, true);
59
60
- snprintf(s->nc.info_str, sizeof(s->nc.info_str),
61
- "l2tpv3: connected");
62
+ qemu_set_info_str(&s->nc, "l2tpv3: connected");
63
return 0;
64
outerr:
65
qemu_del_net_client(nc);
66
diff --git a/net/net.c b/net/net.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/net/net.c
69
+++ b/net/net.c
70
@@ -XXX,XX +XXX,XX @@ char *qemu_mac_strdup_printf(const uint8_t *macaddr)
71
macaddr[3], macaddr[4], macaddr[5]);
72
}
73
74
+void qemu_set_info_str(NetClientState *nc, const char *fmt, ...)
75
+{
25
+{
76
+ va_list ap;
26
+ trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
77
+
27
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
78
+ va_start(ap, fmt);
79
+ vsnprintf(nc->info_str, sizeof(nc->info_str), fmt, ap);
80
+ va_end(ap);
81
+}
28
+}
82
+
29
+
83
void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6])
30
static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
31
struct vhost_vring_file *file)
84
{
32
{
85
- snprintf(nc->info_str, sizeof(nc->info_str),
33
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
86
- "model=%s,macaddr=%02x:%02x:%02x:%02x:%02x:%02x",
34
return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
87
- nc->model,
88
- macaddr[0], macaddr[1], macaddr[2],
89
- macaddr[3], macaddr[4], macaddr[5]);
90
+ qemu_set_info_str(nc, "model=%s,macaddr=%02x:%02x:%02x:%02x:%02x:%02x",
91
+ nc->model, macaddr[0], macaddr[1], macaddr[2],
92
+ macaddr[3], macaddr[4], macaddr[5]);
93
}
35
}
94
36
95
static int mac_table[256] = {0};
37
+static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
96
diff --git a/net/slirp.c b/net/slirp.c
38
+ struct vhost_vring_addr *addr)
97
index XXXXXXX..XXXXXXX 100644
39
+{
98
--- a/net/slirp.c
40
+ trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
99
+++ b/net/slirp.c
41
+ addr->desc_user_addr, addr->used_user_addr,
100
@@ -XXX,XX +XXX,XX @@ static int net_slirp_init(NetClientState *peer, const char *model,
42
+ addr->avail_user_addr,
101
43
+ addr->log_guest_addr);
102
nc = qemu_new_net_client(&net_slirp_info, peer, model, name);
44
+
103
45
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
104
- snprintf(nc->info_str, sizeof(nc->info_str),
46
+
105
- "net=%s,restrict=%s", inet_ntoa(net),
47
+}
106
- restricted ? "on" : "off");
48
+
107
+ qemu_set_info_str(nc, "net=%s,restrict=%s", inet_ntoa(net),
49
/**
108
+ restricted ? "on" : "off");
50
* Set the shadow virtqueue descriptors to the device
109
51
*
110
s = DO_UPCAST(SlirpState, nc, nc);
52
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
111
53
static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
112
diff --git a/net/socket.c b/net/socket.c
54
struct vhost_vring_addr *addr)
113
index XXXXXXX..XXXXXXX 100644
55
{
114
--- a/net/socket.c
56
- trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
115
+++ b/net/socket.c
57
- addr->desc_user_addr, addr->used_user_addr,
116
@@ -XXX,XX +XXX,XX @@ static void net_socket_send(void *opaque)
58
- addr->avail_user_addr,
117
s->fd = -1;
59
- addr->log_guest_addr);
118
net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
60
- return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
119
s->nc.link_down = true;
61
+ struct vhost_vdpa *v = dev->opaque;
120
- memset(s->nc.info_str, 0, sizeof(s->nc.info_str));
62
+
121
+ qemu_set_info_str(&s->nc, "");
63
+ if (v->shadow_vqs_enabled) {
122
64
+ /*
123
return;
65
+ * Device vring addr was set at device start. SVQ base is handled by
124
}
66
+ * VirtQueue code.
125
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer,
67
+ */
126
/* mcast: save bound address as dst */
68
+ return 0;
127
if (is_connected && mcast != NULL) {
69
+ }
128
s->dgram_dst = saddr;
70
+
129
- snprintf(nc->info_str, sizeof(nc->info_str),
71
+ return vhost_vdpa_set_vring_dev_addr(dev, addr);
130
- "socket: fd=%d (cloned mcast=%s:%d)",
131
- fd, inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
132
+ qemu_set_info_str(nc, "socket: fd=%d (cloned mcast=%s:%d)", fd,
133
+ inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
134
} else {
135
if (sa_type == SOCKET_ADDRESS_TYPE_UNIX) {
136
s->dgram_dst.sin_family = AF_UNIX;
137
}
138
139
- snprintf(nc->info_str, sizeof(nc->info_str),
140
- "socket: fd=%d %s", fd, SocketAddressType_str(sa_type));
141
+ qemu_set_info_str(nc, "socket: fd=%d %s", fd,
142
+ SocketAddressType_str(sa_type));
143
}
144
145
return s;
146
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer,
147
148
nc = qemu_new_net_client(&net_socket_info, peer, model, name);
149
150
- snprintf(nc->info_str, sizeof(nc->info_str), "socket: fd=%d", fd);
151
+ qemu_set_info_str(nc, "socket: fd=%d", fd);
152
153
s = DO_UPCAST(NetSocketState, nc, nc);
154
155
@@ -XXX,XX +XXX,XX @@ static void net_socket_accept(void *opaque)
156
s->fd = fd;
157
s->nc.link_down = false;
158
net_socket_connect(s);
159
- snprintf(s->nc.info_str, sizeof(s->nc.info_str),
160
- "socket: connection from %s:%d",
161
- inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
162
+ qemu_set_info_str(&s->nc, "socket: connection from %s:%d",
163
+ inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
164
}
72
}
165
73
166
static int net_socket_listen_init(NetClientState *peer,
74
static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
167
@@ -XXX,XX +XXX,XX @@ static int net_socket_connect_init(NetClientState *peer,
75
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
168
return -1;
76
static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
169
}
77
struct vhost_vring_state *ring)
170
78
{
171
- snprintf(s->nc.info_str, sizeof(s->nc.info_str),
79
- trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
172
- "socket: connect to %s:%d",
80
- return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
173
- inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
81
+ struct vhost_vdpa *v = dev->opaque;
174
+ qemu_set_info_str(&s->nc, "socket: connect to %s:%d",
82
+
175
+ inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
83
+ if (v->shadow_vqs_enabled) {
176
return 0;
84
+ /*
85
+ * Device vring base was set at device start. SVQ base is handled by
86
+ * VirtQueue code.
87
+ */
88
+ return 0;
89
+ }
90
+
91
+ return vhost_vdpa_set_dev_vring_base(dev, ring);
177
}
92
}
178
93
179
@@ -XXX,XX +XXX,XX @@ static int net_socket_mcast_init(NetClientState *peer,
94
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
180
181
s->dgram_dst = saddr;
182
183
- snprintf(s->nc.info_str, sizeof(s->nc.info_str),
184
- "socket: mcast=%s:%d",
185
- inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
186
+ qemu_set_info_str(&s->nc, "socket: mcast=%s:%d",
187
+ inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
188
return 0;
189
190
}
191
@@ -XXX,XX +XXX,XX @@ static int net_socket_udp_init(NetClientState *peer,
192
193
s->dgram_dst = raddr;
194
195
- snprintf(s->nc.info_str, sizeof(s->nc.info_str),
196
- "socket: udp=%s:%d",
197
- inet_ntoa(raddr.sin_addr), ntohs(raddr.sin_port));
198
+ qemu_set_info_str(&s->nc, "socket: udp=%s:%d", inet_ntoa(raddr.sin_addr),
199
+ ntohs(raddr.sin_port));
200
return 0;
201
}
202
203
diff --git a/net/tap-win32.c b/net/tap-win32.c
204
index XXXXXXX..XXXXXXX 100644
205
--- a/net/tap-win32.c
206
+++ b/net/tap-win32.c
207
@@ -XXX,XX +XXX,XX @@ static int tap_win32_init(NetClientState *peer, const char *model,
208
209
s = DO_UPCAST(TAPState, nc, nc);
210
211
- snprintf(s->nc.info_str, sizeof(s->nc.info_str),
212
- "tap: ifname=%s", ifname);
213
+ qemu_set_info_str(&s->nc, "tap: ifname=%s", ifname);
214
215
s->handle = handle;
216
217
diff --git a/net/tap.c b/net/tap.c
218
index XXXXXXX..XXXXXXX 100644
219
--- a/net/tap.c
220
+++ b/net/tap.c
221
@@ -XXX,XX +XXX,XX @@ int net_init_bridge(const Netdev *netdev, const char *name,
222
}
223
s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
224
225
- snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
226
- br);
227
+ qemu_set_info_str(&s->nc, "helper=%s,br=%s", helper, br);
228
229
return 0;
230
}
231
@@ -XXX,XX +XXX,XX @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
232
}
233
234
if (tap->has_fd || tap->has_fds) {
235
- snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
236
+ qemu_set_info_str(&s->nc, "fd=%d", fd);
237
} else if (tap->has_helper) {
238
- snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
239
- tap->helper);
240
+ qemu_set_info_str(&s->nc, "helper=%s", tap->helper);
241
} else {
242
- snprintf(s->nc.info_str, sizeof(s->nc.info_str),
243
- "ifname=%s,script=%s,downscript=%s", ifname, script,
244
- downscript);
245
+ qemu_set_info_str(&s->nc, "ifname=%s,script=%s,downscript=%s", ifname,
246
+ script, downscript);
247
248
if (strcmp(downscript, "no") != 0) {
249
snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
250
diff --git a/net/vde.c b/net/vde.c
251
index XXXXXXX..XXXXXXX 100644
252
--- a/net/vde.c
253
+++ b/net/vde.c
254
@@ -XXX,XX +XXX,XX @@ static int net_vde_init(NetClientState *peer, const char *model,
255
256
nc = qemu_new_net_client(&net_vde_info, peer, model, name);
257
258
- snprintf(nc->info_str, sizeof(nc->info_str), "sock=%s,fd=%d",
259
- sock, vde_datafd(vde));
260
+ qemu_set_info_str(nc, "sock=%s,fd=%d", sock, vde_datafd(vde));
261
262
s = DO_UPCAST(VDEState, nc, nc);
263
264
diff --git a/net/vhost-user.c b/net/vhost-user.c
265
index XXXXXXX..XXXXXXX 100644
266
--- a/net/vhost-user.c
267
+++ b/net/vhost-user.c
268
@@ -XXX,XX +XXX,XX @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
269
user = g_new0(struct VhostUserState, 1);
270
for (i = 0; i < queues; i++) {
271
nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
272
- snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
273
- i, chr->label);
274
+ qemu_set_info_str(nc, "vhost-user%d to %s", i, chr->label);
275
nc->queue_index = i;
276
if (!nc0) {
277
nc0 = nc;
278
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
279
index XXXXXXX..XXXXXXX 100644
280
--- a/net/vhost-vdpa.c
281
+++ b/net/vhost-vdpa.c
282
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
283
nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
284
device, name);
285
}
286
- snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA);
287
+ qemu_set_info_str(nc, TYPE_VHOST_VDPA);
288
s = DO_UPCAST(VhostVDPAState, nc, nc);
289
290
s->vhost_vdpa.device_fd = vdpa_device_fd;
291
--
95
--
292
2.7.4
96
2.7.4
293
97
294
98
diff view generated by jsdifflib
1
From: Laurent Vivier <lvivier@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Use QIOChannel, QIOChannelSocket and QIONetListener.
3
Initial version of shadow virtqueue that actually forward buffers. There
4
This allows net/stream to use all the available parameters provided by
4
is no iommu support at the moment, and that will be addressed in future
5
SocketAddress.
5
patches of this series. Since all vhost-vdpa devices use forced IOMMU,
6
6
this means that SVQ is not usable at this point of the series on any
7
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
7
device.
8
9
For simplicity it only supports modern devices, that expects vring
10
in little endian, with split ring and no event idx or indirect
11
descriptors. Support for them will not be added in this series.
12
13
It reuses the VirtQueue code for the device part. The driver part is
14
based on Linux's virtio_ring driver, but with stripped functionality
15
and optimizations so it's easier to review.
16
17
However, forwarding buffers have some particular pieces: One of the most
18
unexpected ones is that a guest's buffer can expand through more than
19
one descriptor in SVQ. While this is handled gracefully by qemu's
20
emulated virtio devices, it may cause unexpected SVQ queue full. This
21
patch also solves it by checking for this condition at both guest's
22
kicks and device's calls. The code may be more elegant in the future if
23
SVQ code runs in its own iocontext.
24
25
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Acked-by: Michael S. Tsirkin <mst@redhat.com>
26
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
27
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
28
---
11
net/stream.c | 492 ++++++++++++++++++++------------------------------------
29
hw/virtio/vhost-shadow-virtqueue.c | 352 ++++++++++++++++++++++++++++++++++++-
12
qemu-options.hx | 4 +-
30
hw/virtio/vhost-shadow-virtqueue.h | 26 +++
13
2 files changed, 178 insertions(+), 318 deletions(-)
31
hw/virtio/vhost-vdpa.c | 155 +++++++++++++++-
14
32
3 files changed, 522 insertions(+), 11 deletions(-)
15
diff --git a/net/stream.c b/net/stream.c
33
34
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
16
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
17
--- a/net/stream.c
36
--- a/hw/virtio/vhost-shadow-virtqueue.c
18
+++ b/net/stream.c
37
+++ b/hw/virtio/vhost-shadow-virtqueue.c
19
@@ -XXX,XX +XXX,XX @@
38
@@ -XXX,XX +XXX,XX @@
20
#include "qemu/iov.h"
39
#include "qemu/error-report.h"
40
#include "qapi/error.h"
21
#include "qemu/main-loop.h"
41
#include "qemu/main-loop.h"
22
#include "qemu/cutils.h"
42
+#include "qemu/log.h"
23
+#include "io/channel.h"
43
+#include "qemu/memalign.h"
24
+#include "io/channel-socket.h"
44
#include "linux-headers/linux/vhost.h"
25
+#include "io/net-listener.h"
45
26
46
/**
27
typedef struct NetStreamState {
47
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
28
NetClientState nc;
48
}
29
- int listen_fd;
49
30
- int fd;
50
/**
31
+ QIOChannel *listen_ioc;
51
- * Forward guest notifications.
32
+ QIONetListener *listener;
52
+ * Number of descriptors that the SVQ can make available from the guest.
33
+ QIOChannel *ioc;
53
+ *
34
+ guint ioc_read_tag;
54
+ * @svq: The svq
35
+ guint ioc_write_tag;
55
+ */
36
SocketReadState rs;
56
+static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
37
unsigned int send_index; /* number of bytes sent*/
57
+{
38
- bool read_poll; /* waiting to receive data? */
58
+ return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
39
- bool write_poll; /* waiting to transmit data? */
59
+}
40
} NetStreamState;
60
+
41
61
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
42
-static void net_stream_send(void *opaque);
62
+ const struct iovec *iovec, size_t num,
43
-static void net_stream_accept(void *opaque);
63
+ bool more_descs, bool write)
44
-static void net_stream_writable(void *opaque);
64
+{
45
+static void net_stream_listen(QIONetListener *listener,
65
+ uint16_t i = svq->free_head, last = svq->free_head;
46
+ QIOChannelSocket *cioc,
66
+ unsigned n;
47
+ void *opaque);
67
+ uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
48
68
+ vring_desc_t *descs = svq->vring.desc;
49
-static void net_stream_update_fd_handler(NetStreamState *s)
69
+
50
+static gboolean net_stream_writable(QIOChannel *ioc,
70
+ if (num == 0) {
51
+ GIOCondition condition,
71
+ return;
52
+ gpointer data)
72
+ }
73
+
74
+ for (n = 0; n < num; n++) {
75
+ if (more_descs || (n + 1 < num)) {
76
+ descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
77
+ } else {
78
+ descs[i].flags = flags;
79
+ }
80
+ descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
81
+ descs[i].len = cpu_to_le32(iovec[n].iov_len);
82
+
83
+ last = i;
84
+ i = cpu_to_le16(descs[i].next);
85
+ }
86
+
87
+ svq->free_head = le16_to_cpu(descs[last].next);
88
+}
89
+
90
+static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
91
+ VirtQueueElement *elem, unsigned *head)
92
+{
93
+ unsigned avail_idx;
94
+ vring_avail_t *avail = svq->vring.avail;
95
+
96
+ *head = svq->free_head;
97
+
98
+ /* We need some descriptors here */
99
+ if (unlikely(!elem->out_num && !elem->in_num)) {
100
+ qemu_log_mask(LOG_GUEST_ERROR,
101
+ "Guest provided element with no descriptors");
102
+ return false;
103
+ }
104
+
105
+ vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
106
+ false);
107
+ vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
108
+
109
+ /*
110
+ * Put the entry in the available array (but don't update avail->idx until
111
+ * they do sync).
112
+ */
113
+ avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
114
+ avail->ring[avail_idx] = cpu_to_le16(*head);
115
+ svq->shadow_avail_idx++;
116
+
117
+ /* Update the avail index after write the descriptor */
118
+ smp_wmb();
119
+ avail->idx = cpu_to_le16(svq->shadow_avail_idx);
120
+
121
+ return true;
122
+}
123
+
124
+static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
125
+{
126
+ unsigned qemu_head;
127
+ bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
128
+ if (unlikely(!ok)) {
129
+ return false;
130
+ }
131
+
132
+ svq->ring_id_maps[qemu_head] = elem;
133
+ return true;
134
+}
135
+
136
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
137
+{
138
+ /*
139
+ * We need to expose the available array entries before checking the used
140
+ * flags
141
+ */
142
+ smp_mb();
143
+ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
144
+ return;
145
+ }
146
+
147
+ event_notifier_set(&svq->hdev_kick);
148
+}
149
+
150
+/**
151
+ * Forward available buffers.
152
+ *
153
+ * @svq: Shadow VirtQueue
154
+ *
155
+ * Note that this function does not guarantee that all guest's available
156
+ * buffers are available to the device in SVQ avail ring. The guest may have
157
+ * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
158
+ * qemu vaddr.
159
+ *
160
+ * If that happens, guest's kick notifications will be disabled until the
161
+ * device uses some buffers.
162
+ */
163
+static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
164
+{
165
+ /* Clear event notifier */
166
+ event_notifier_test_and_clear(&svq->svq_kick);
167
+
168
+ /* Forward to the device as many available buffers as possible */
169
+ do {
170
+ virtio_queue_set_notification(svq->vq, false);
171
+
172
+ while (true) {
173
+ VirtQueueElement *elem;
174
+ bool ok;
175
+
176
+ if (svq->next_guest_avail_elem) {
177
+ elem = g_steal_pointer(&svq->next_guest_avail_elem);
178
+ } else {
179
+ elem = virtqueue_pop(svq->vq, sizeof(*elem));
180
+ }
181
+
182
+ if (!elem) {
183
+ break;
184
+ }
185
+
186
+ if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) {
187
+ /*
188
+ * This condition is possible since a contiguous buffer in GPA
189
+ * does not imply a contiguous buffer in qemu's VA
190
+ * scatter-gather segments. If that happens, the buffer exposed
191
+ * to the device needs to be a chain of descriptors at this
192
+ * moment.
193
+ *
194
+ * SVQ cannot hold more available buffers if we are here:
195
+ * queue the current guest descriptor and ignore further kicks
196
+ * until some elements are used.
197
+ */
198
+ svq->next_guest_avail_elem = elem;
199
+ return;
200
+ }
201
+
202
+ ok = vhost_svq_add(svq, elem);
203
+ if (unlikely(!ok)) {
204
+ /* VQ is broken, just return and ignore any other kicks */
205
+ return;
206
+ }
207
+ vhost_svq_kick(svq);
208
+ }
209
+
210
+ virtio_queue_set_notification(svq->vq, true);
211
+ } while (!virtio_queue_empty(svq->vq));
212
+}
213
+
214
+/**
215
+ * Handle guest's kick.
216
*
217
* @n: guest kick event notifier, the one that guest set to notify svq.
218
*/
219
-static void vhost_handle_guest_kick(EventNotifier *n)
220
+static void vhost_handle_guest_kick_notifier(EventNotifier *n)
53
{
221
{
54
- qemu_set_fd_handler(s->fd,
222
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
55
- s->read_poll ? net_stream_send : NULL,
223
event_notifier_test_and_clear(n);
56
- s->write_poll ? net_stream_writable : NULL,
224
- event_notifier_set(&svq->hdev_kick);
57
- s);
225
+ vhost_handle_guest_kick(svq);
58
-}
226
+}
59
-
227
+
60
-static void net_stream_read_poll(NetStreamState *s, bool enable)
228
+static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
61
-{
229
+{
62
- s->read_poll = enable;
230
+ if (svq->last_used_idx != svq->shadow_used_idx) {
63
- net_stream_update_fd_handler(s);
231
+ return true;
64
-}
232
+ }
65
-
233
+
66
-static void net_stream_write_poll(NetStreamState *s, bool enable)
234
+ svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
67
-{
235
+
68
- s->write_poll = enable;
236
+ return svq->last_used_idx != svq->shadow_used_idx;
69
- net_stream_update_fd_handler(s);
70
-}
71
-
72
-static void net_stream_writable(void *opaque)
73
-{
74
- NetStreamState *s = opaque;
75
+ NetStreamState *s = data;
76
77
- net_stream_write_poll(s, false);
78
+ s->ioc_write_tag = 0;
79
80
qemu_flush_queued_packets(&s->nc);
81
+
82
+ return G_SOURCE_REMOVE;
83
}
237
}
84
238
85
static ssize_t net_stream_receive(NetClientState *nc, const uint8_t *buf,
239
/**
86
@@ -XXX,XX +XXX,XX @@ static ssize_t net_stream_receive(NetClientState *nc, const uint8_t *buf,
240
- * Forward vhost notifications
87
.iov_len = size,
241
+ * Enable vhost device calls after disable them.
88
},
242
+ *
89
};
243
+ * @svq: The svq
90
+ struct iovec local_iov[2];
244
+ *
91
+ unsigned int nlocal_iov;
245
+ * It returns false if there are pending used buffers from the vhost device,
92
size_t remaining;
246
+ * avoiding the possible races between SVQ checking for more work and enabling
93
ssize_t ret;
247
+ * callbacks. True if SVQ used vring has no more pending buffers.
94
248
+ */
95
remaining = iov_size(iov, 2) - s->send_index;
249
+static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
96
- ret = iov_send(s->fd, iov, 2, s->send_index, remaining);
250
+{
97
-
251
+ svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
98
- if (ret == -1 && errno == EAGAIN) {
252
+ /* Make sure the flag is written before the read of used_idx */
99
+ nlocal_iov = iov_copy(local_iov, 2, iov, 2, s->send_index, remaining);
253
+ smp_mb();
100
+ ret = qio_channel_writev(s->ioc, local_iov, nlocal_iov, NULL);
254
+ return !vhost_svq_more_used(svq);
101
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
255
+}
102
ret = 0; /* handled further down */
256
+
103
}
257
+static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
104
if (ret == -1) {
258
+{
105
@@ -XXX,XX +XXX,XX @@ static ssize_t net_stream_receive(NetClientState *nc, const uint8_t *buf,
259
+ svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
106
}
260
+}
107
if (ret < (ssize_t)remaining) {
261
+
108
s->send_index += ret;
262
+static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
109
- net_stream_write_poll(s, true);
263
+ uint32_t *len)
110
+ s->ioc_write_tag = qio_channel_add_watch(s->ioc, G_IO_OUT,
264
+{
111
+ net_stream_writable, s, NULL);
265
+ vring_desc_t *descs = svq->vring.desc;
112
return 0;
266
+ const vring_used_t *used = svq->vring.used;
113
}
267
+ vring_used_elem_t used_elem;
114
s->send_index = 0;
268
+ uint16_t last_used;
115
return size;
269
+
270
+ if (!vhost_svq_more_used(svq)) {
271
+ return NULL;
272
+ }
273
+
274
+ /* Only get used array entries after they have been exposed by dev */
275
+ smp_rmb();
276
+ last_used = svq->last_used_idx & (svq->vring.num - 1);
277
+ used_elem.id = le32_to_cpu(used->ring[last_used].id);
278
+ used_elem.len = le32_to_cpu(used->ring[last_used].len);
279
+
280
+ svq->last_used_idx++;
281
+ if (unlikely(used_elem.id >= svq->vring.num)) {
282
+ qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
283
+ svq->vdev->name, used_elem.id);
284
+ return NULL;
285
+ }
286
+
287
+ if (unlikely(!svq->ring_id_maps[used_elem.id])) {
288
+ qemu_log_mask(LOG_GUEST_ERROR,
289
+ "Device %s says index %u is used, but it was not available",
290
+ svq->vdev->name, used_elem.id);
291
+ return NULL;
292
+ }
293
+
294
+ descs[used_elem.id].next = svq->free_head;
295
+ svq->free_head = used_elem.id;
296
+
297
+ *len = used_elem.len;
298
+ return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
299
+}
300
+
301
+static void vhost_svq_flush(VhostShadowVirtqueue *svq,
302
+ bool check_for_avail_queue)
303
+{
304
+ VirtQueue *vq = svq->vq;
305
+
306
+ /* Forward as many used buffers as possible. */
307
+ do {
308
+ unsigned i = 0;
309
+
310
+ vhost_svq_disable_notification(svq);
311
+ while (true) {
312
+ uint32_t len;
313
+ g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
314
+ if (!elem) {
315
+ break;
316
+ }
317
+
318
+ if (unlikely(i >= svq->vring.num)) {
319
+ qemu_log_mask(LOG_GUEST_ERROR,
320
+ "More than %u used buffers obtained in a %u size SVQ",
321
+ i, svq->vring.num);
322
+ virtqueue_fill(vq, elem, len, i);
323
+ virtqueue_flush(vq, i);
324
+ return;
325
+ }
326
+ virtqueue_fill(vq, elem, len, i++);
327
+ }
328
+
329
+ virtqueue_flush(vq, i);
330
+ event_notifier_set(&svq->svq_call);
331
+
332
+ if (check_for_avail_queue && svq->next_guest_avail_elem) {
333
+ /*
334
+ * Avail ring was full when vhost_svq_flush was called, so it's a
335
+ * good moment to make more descriptors available if possible.
336
+ */
337
+ vhost_handle_guest_kick(svq);
338
+ }
339
+ } while (!vhost_svq_enable_notification(svq));
340
+}
341
+
342
+/**
343
+ * Forward used buffers.
344
*
345
* @n: hdev call event notifier, the one that device set to notify svq.
346
+ *
347
+ * Note that we are not making any buffers available in the loop, there is no
348
+ * way that it runs more than virtqueue size times.
349
*/
350
static void vhost_svq_handle_call(EventNotifier *n)
351
{
352
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
353
hdev_call);
354
event_notifier_test_and_clear(n);
355
- event_notifier_set(&svq->svq_call);
356
+ vhost_svq_flush(svq, true);
116
}
357
}
117
358
118
+static gboolean net_stream_send(QIOChannel *ioc,
359
/**
119
+ GIOCondition condition,
360
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
120
+ gpointer data);
361
if (poll_start) {
121
+
362
event_notifier_init_fd(svq_kick, svq_kick_fd);
122
static void net_stream_send_completed(NetClientState *nc, ssize_t len)
363
event_notifier_set(svq_kick);
123
{
364
- event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
124
NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc);
365
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
125
366
+ }
126
- if (!s->read_poll) {
367
+}
127
- net_stream_read_poll(s, true);
368
+
128
+ if (!s->ioc_read_tag) {
369
+/**
129
+ s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN,
370
+ * Start the shadow virtqueue operation.
130
+ net_stream_send, s, NULL);
371
+ *
372
+ * @svq: Shadow Virtqueue
373
+ * @vdev: VirtIO device
374
+ * @vq: Virtqueue to shadow
375
+ */
376
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
377
+ VirtQueue *vq)
378
+{
379
+ size_t desc_size, driver_size, device_size;
380
+
381
+ svq->next_guest_avail_elem = NULL;
382
+ svq->shadow_avail_idx = 0;
383
+ svq->shadow_used_idx = 0;
384
+ svq->last_used_idx = 0;
385
+ svq->vdev = vdev;
386
+ svq->vq = vq;
387
+
388
+ svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
389
+ driver_size = vhost_svq_driver_area_size(svq);
390
+ device_size = vhost_svq_device_area_size(svq);
391
+ svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size);
392
+ desc_size = sizeof(vring_desc_t) * svq->vring.num;
393
+ svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
394
+ memset(svq->vring.desc, 0, driver_size);
395
+ svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
396
+ memset(svq->vring.used, 0, device_size);
397
+ svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
398
+ for (unsigned i = 0; i < svq->vring.num - 1; i++) {
399
+ svq->vring.desc[i].next = cpu_to_le16(i + 1);
131
}
400
}
132
}
401
}
133
402
134
@@ -XXX,XX +XXX,XX @@ static void net_stream_rs_finalize(SocketReadState *rs)
403
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
135
if (qemu_send_packet_async(&s->nc, rs->buf,
404
void vhost_svq_stop(VhostShadowVirtqueue *svq)
136
rs->packet_len,
405
{
137
net_stream_send_completed) == 0) {
406
event_notifier_set_handler(&svq->svq_kick, NULL);
138
- net_stream_read_poll(s, false);
407
+ g_autofree VirtQueueElement *next_avail_elem = NULL;
139
+ if (s->ioc_read_tag) {
408
+
140
+ g_source_remove(s->ioc_read_tag);
409
+ if (!svq->vq) {
141
+ s->ioc_read_tag = 0;
410
+ return;
142
+ }
411
+ }
412
+
413
+ /* Send all pending used descriptors to guest */
414
+ vhost_svq_flush(svq, false);
415
+
416
+ for (unsigned i = 0; i < svq->vring.num; ++i) {
417
+ g_autofree VirtQueueElement *elem = NULL;
418
+ elem = g_steal_pointer(&svq->ring_id_maps[i]);
419
+ if (elem) {
420
+ virtqueue_detach_element(svq->vq, elem, 0);
421
+ }
422
+ }
423
+
424
+ next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
425
+ if (next_avail_elem) {
426
+ virtqueue_detach_element(svq->vq, next_avail_elem, 0);
427
+ }
428
+ svq->vq = NULL;
429
+ g_free(svq->ring_id_maps);
430
+ qemu_vfree(svq->vring.desc);
431
+ qemu_vfree(svq->vring.used);
432
}
433
434
/**
435
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
436
index XXXXXXX..XXXXXXX 100644
437
--- a/hw/virtio/vhost-shadow-virtqueue.h
438
+++ b/hw/virtio/vhost-shadow-virtqueue.h
439
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
440
441
/* Guest's call notifier, where the SVQ calls guest. */
442
EventNotifier svq_call;
443
+
444
+ /* Virtio queue shadowing */
445
+ VirtQueue *vq;
446
+
447
+ /* Virtio device */
448
+ VirtIODevice *vdev;
449
+
450
+ /* Map for use the guest's descriptors */
451
+ VirtQueueElement **ring_id_maps;
452
+
453
+ /* Next VirtQueue element that guest made available */
454
+ VirtQueueElement *next_guest_avail_elem;
455
+
456
+ /* Next head to expose to the device */
457
+ uint16_t shadow_avail_idx;
458
+
459
+ /* Next free descriptor */
460
+ uint16_t free_head;
461
+
462
+ /* Last seen used idx */
463
+ uint16_t shadow_used_idx;
464
+
465
+ /* Next head to consume from the device */
466
+ uint16_t last_used_idx;
467
} VhostShadowVirtqueue;
468
469
bool vhost_svq_valid_features(uint64_t features, Error **errp);
470
@@ -XXX,XX +XXX,XX @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
471
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
472
size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
473
474
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
475
+ VirtQueue *vq);
476
void vhost_svq_stop(VhostShadowVirtqueue *svq);
477
478
VhostShadowVirtqueue *vhost_svq_new(void);
479
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
480
index XXXXXXX..XXXXXXX 100644
481
--- a/hw/virtio/vhost-vdpa.c
482
+++ b/hw/virtio/vhost-vdpa.c
483
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
484
* Note that this function does not rewind kick file descriptor if cannot set
485
* call one.
486
*/
487
-static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
488
- VhostShadowVirtqueue *svq, unsigned idx,
489
- Error **errp)
490
+static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
491
+ VhostShadowVirtqueue *svq, unsigned idx,
492
+ Error **errp)
493
{
494
struct vhost_vring_file file = {
495
.index = dev->vq_index + idx,
496
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
497
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
498
if (unlikely(r != 0)) {
499
error_setg_errno(errp, -r, "Can't set device kick fd");
500
- return false;
501
+ return r;
143
}
502
}
503
504
event_notifier = &svq->hdev_call;
505
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
506
error_setg_errno(errp, -r, "Can't set device call fd");
507
}
508
509
+ return r;
510
+}
511
+
512
+/**
513
+ * Unmap a SVQ area in the device
514
+ */
515
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
516
+ hwaddr size)
517
+{
518
+ int r;
519
+
520
+ size = ROUND_UP(size, qemu_real_host_page_size);
521
+ r = vhost_vdpa_dma_unmap(v, iova, size);
522
+ return r == 0;
523
+}
524
+
525
+static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
526
+ const VhostShadowVirtqueue *svq)
527
+{
528
+ struct vhost_vdpa *v = dev->opaque;
529
+ struct vhost_vring_addr svq_addr;
530
+ size_t device_size = vhost_svq_device_area_size(svq);
531
+ size_t driver_size = vhost_svq_driver_area_size(svq);
532
+ bool ok;
533
+
534
+ vhost_svq_get_vring_addr(svq, &svq_addr);
535
+
536
+ ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
537
+ if (unlikely(!ok)) {
538
+ return false;
539
+ }
540
+
541
+ return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
542
+}
543
+
544
+/**
545
+ * Map the shadow virtqueue rings in the device
546
+ *
547
+ * @dev: The vhost device
548
+ * @svq: The shadow virtqueue
549
+ * @addr: Assigned IOVA addresses
550
+ * @errp: Error pointer
551
+ */
552
+static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
553
+ const VhostShadowVirtqueue *svq,
554
+ struct vhost_vring_addr *addr,
555
+ Error **errp)
556
+{
557
+ struct vhost_vdpa *v = dev->opaque;
558
+ size_t device_size = vhost_svq_device_area_size(svq);
559
+ size_t driver_size = vhost_svq_driver_area_size(svq);
560
+ int r;
561
+
562
+ ERRP_GUARD();
563
+ vhost_svq_get_vring_addr(svq, addr);
564
+
565
+ r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
566
+ (void *)(uintptr_t)addr->desc_user_addr, true);
567
+ if (unlikely(r != 0)) {
568
+ error_setg_errno(errp, -r, "Cannot create vq driver region: ");
569
+ return false;
570
+ }
571
+
572
+ r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
573
+ (void *)(intptr_t)addr->used_user_addr, false);
574
+ if (unlikely(r != 0)) {
575
+ error_setg_errno(errp, -r, "Cannot create vq device region: ");
576
+ }
577
+
578
+ return r == 0;
579
+}
580
+
581
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
582
+ VhostShadowVirtqueue *svq, unsigned idx,
583
+ Error **errp)
584
+{
585
+ uint16_t vq_index = dev->vq_index + idx;
586
+ struct vhost_vring_state s = {
587
+ .index = vq_index,
588
+ };
589
+ int r;
590
+
591
+ r = vhost_vdpa_set_dev_vring_base(dev, &s);
592
+ if (unlikely(r)) {
593
+ error_setg_errno(errp, -r, "Cannot set vring base");
594
+ return false;
595
+ }
596
+
597
+ r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp);
598
return r == 0;
144
}
599
}
145
600
146
-static void net_stream_send(void *opaque)
601
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
147
+static gboolean net_stream_send(QIOChannel *ioc,
602
}
148
+ GIOCondition condition,
603
149
+ gpointer data)
604
for (i = 0; i < v->shadow_vqs->len; ++i) {
150
{
605
+ VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
151
- NetStreamState *s = opaque;
606
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
152
+ NetStreamState *s = data;
607
+ struct vhost_vring_addr addr = {
153
int size;
608
+ .index = i,
154
int ret;
609
+ };
155
- uint8_t buf1[NET_BUFSIZE];
610
+ int r;
156
- const uint8_t *buf;
611
bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
157
+ char buf1[NET_BUFSIZE];
612
if (unlikely(!ok)) {
158
+ const char *buf;
613
- error_reportf_err(err, "Cannot setup SVQ %u: ", i);
159
614
+ goto err;
160
- size = recv(s->fd, buf1, sizeof(buf1), 0);
615
+ }
161
+ size = qio_channel_read(s->ioc, buf1, sizeof(buf1), NULL);
616
+
162
if (size < 0) {
617
+ vhost_svq_start(svq, dev->vdev, vq);
163
if (errno != EWOULDBLOCK) {
618
+ ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err);
164
goto eoc;
619
+ if (unlikely(!ok)) {
165
@@ -XXX,XX +XXX,XX @@ static void net_stream_send(void *opaque)
620
+ goto err_map;
166
} else if (size == 0) {
621
+ }
167
/* end of connection */
622
+
168
eoc:
623
+ /* Override vring GPA set by vhost subsystem */
169
- net_stream_read_poll(s, false);
624
+ r = vhost_vdpa_set_vring_dev_addr(dev, &addr);
170
- net_stream_write_poll(s, false);
625
+ if (unlikely(r != 0)) {
171
- if (s->listen_fd != -1) {
626
+ error_setg_errno(&err, -r, "Cannot set device address");
172
- qemu_set_fd_handler(s->listen_fd, net_stream_accept, NULL, s);
627
+ goto err_set_addr;
173
+ s->ioc_read_tag = 0;
628
+ }
174
+ if (s->ioc_write_tag) {
629
+ }
175
+ g_source_remove(s->ioc_write_tag);
630
+
176
+ s->ioc_write_tag = 0;
631
+ return true;
632
+
633
+err_set_addr:
634
+ vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i));
635
+
636
+err_map:
637
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
638
+
639
+err:
640
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
641
+ for (unsigned j = 0; j < i; ++j) {
642
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
643
+ vhost_vdpa_svq_unmap_rings(dev, svq);
644
+ vhost_svq_stop(svq);
645
+ }
646
+
647
+ return false;
648
+}
649
+
650
+static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
651
+{
652
+ struct vhost_vdpa *v = dev->opaque;
653
+
654
+ if (!v->shadow_vqs) {
655
+ return true;
656
+ }
657
+
658
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
659
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
660
+ bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
661
+ if (unlikely(!ok)) {
662
return false;
177
}
663
}
178
- closesocket(s->fd);
179
+ if (s->listener) {
180
+ qio_net_listener_set_client_func(s->listener, net_stream_listen,
181
+ s, NULL);
182
+ }
183
+ object_unref(OBJECT(s->ioc));
184
+ s->ioc = NULL;
185
186
- s->fd = -1;
187
net_socket_rs_init(&s->rs, net_stream_rs_finalize, false);
188
s->nc.link_down = true;
189
qemu_set_info_str(&s->nc, "");
190
191
- return;
192
+ return G_SOURCE_REMOVE;
193
}
664
}
194
buf = buf1;
665
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
195
666
}
196
- ret = net_fill_rstate(&s->rs, buf, size);
667
vhost_vdpa_set_vring_ready(dev);
197
+ ret = net_fill_rstate(&s->rs, (const uint8_t *)buf, size);
668
} else {
198
669
+ ok = vhost_vdpa_svqs_stop(dev);
199
if (ret == -1) {
670
+ if (unlikely(!ok)) {
200
goto eoc;
671
+ return -1;
672
+ }
673
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
201
}
674
}
202
+
675
203
+ return G_SOURCE_CONTINUE;
204
}
205
206
static void net_stream_cleanup(NetClientState *nc)
207
{
208
NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc);
209
- if (s->fd != -1) {
210
- net_stream_read_poll(s, false);
211
- net_stream_write_poll(s, false);
212
- close(s->fd);
213
- s->fd = -1;
214
+ if (s->ioc) {
215
+ if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) {
216
+ if (s->ioc_read_tag) {
217
+ g_source_remove(s->ioc_read_tag);
218
+ s->ioc_read_tag = 0;
219
+ }
220
+ if (s->ioc_write_tag) {
221
+ g_source_remove(s->ioc_write_tag);
222
+ s->ioc_write_tag = 0;
223
+ }
224
+ }
225
+ object_unref(OBJECT(s->ioc));
226
+ s->ioc = NULL;
227
}
228
- if (s->listen_fd != -1) {
229
- qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL);
230
- closesocket(s->listen_fd);
231
- s->listen_fd = -1;
232
+ if (s->listen_ioc) {
233
+ if (s->listener) {
234
+ qio_net_listener_disconnect(s->listener);
235
+ object_unref(OBJECT(s->listener));
236
+ s->listener = NULL;
237
+ }
238
+ object_unref(OBJECT(s->listen_ioc));
239
+ s->listen_ioc = NULL;
240
}
241
}
242
243
-static void net_stream_connect(void *opaque)
244
-{
245
- NetStreamState *s = opaque;
246
- net_stream_read_poll(s, true);
247
-}
248
-
249
static NetClientInfo net_stream_info = {
250
.type = NET_CLIENT_DRIVER_STREAM,
251
.size = sizeof(NetStreamState),
252
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_stream_info = {
253
.cleanup = net_stream_cleanup,
254
};
255
256
-static NetStreamState *net_stream_fd_init(NetClientState *peer,
257
- const char *model,
258
- const char *name,
259
- int fd, int is_connected)
260
+static void net_stream_listen(QIONetListener *listener,
261
+ QIOChannelSocket *cioc,
262
+ void *opaque)
263
{
264
- NetClientState *nc;
265
- NetStreamState *s;
266
-
267
- nc = qemu_new_net_client(&net_stream_info, peer, model, name);
268
+ NetStreamState *s = opaque;
269
+ SocketAddress *addr;
270
+ char *uri;
271
272
- qemu_set_info_str(nc, "fd=%d", fd);
273
+ object_ref(OBJECT(cioc));
274
275
- s = DO_UPCAST(NetStreamState, nc, nc);
276
+ qio_net_listener_set_client_func(s->listener, NULL, s, NULL);
277
278
- s->fd = fd;
279
- s->listen_fd = -1;
280
- net_socket_rs_init(&s->rs, net_stream_rs_finalize, false);
281
+ s->ioc = QIO_CHANNEL(cioc);
282
+ qio_channel_set_name(s->ioc, "stream-server");
283
+ s->nc.link_down = false;
284
285
- /* Disable Nagle algorithm on TCP sockets to reduce latency */
286
- socket_set_nodelay(fd);
287
+ s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send,
288
+ s, NULL);
289
290
- if (is_connected) {
291
- net_stream_connect(s);
292
+ if (cioc->localAddr.ss_family == AF_UNIX) {
293
+ addr = qio_channel_socket_get_local_address(cioc, NULL);
294
} else {
295
- qemu_set_fd_handler(s->fd, NULL, net_stream_connect, s);
296
+ addr = qio_channel_socket_get_remote_address(cioc, NULL);
297
}
298
- return s;
299
+ g_assert(addr != NULL);
300
+ uri = socket_uri(addr);
301
+ qemu_set_info_str(&s->nc, uri);
302
+ g_free(uri);
303
+ qapi_free_SocketAddress(addr);
304
}
305
306
-static void net_stream_accept(void *opaque)
307
+static void net_stream_server_listening(QIOTask *task, gpointer opaque)
308
{
309
NetStreamState *s = opaque;
310
- struct sockaddr_storage saddr;
311
- socklen_t len;
312
- int fd;
313
-
314
- for (;;) {
315
- len = sizeof(saddr);
316
- fd = qemu_accept(s->listen_fd, (struct sockaddr *)&saddr, &len);
317
- if (fd < 0 && errno != EINTR) {
318
- return;
319
- } else if (fd >= 0) {
320
- qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL);
321
- break;
322
- }
323
- }
324
+ QIOChannelSocket *listen_sioc = QIO_CHANNEL_SOCKET(s->listen_ioc);
325
+ SocketAddress *addr;
326
+ int ret;
327
328
- s->fd = fd;
329
- s->nc.link_down = false;
330
- net_stream_connect(s);
331
- switch (saddr.ss_family) {
332
- case AF_INET: {
333
- struct sockaddr_in *saddr_in = (struct sockaddr_in *)&saddr;
334
-
335
- qemu_set_info_str(&s->nc, "connection from %s:%d",
336
- inet_ntoa(saddr_in->sin_addr),
337
- ntohs(saddr_in->sin_port));
338
- break;
339
+ if (listen_sioc->fd < 0) {
340
+ qemu_set_info_str(&s->nc, "connection error");
341
+ return;
342
}
343
- case AF_UNIX: {
344
- struct sockaddr_un saddr_un;
345
346
- len = sizeof(saddr_un);
347
- getsockname(s->listen_fd, (struct sockaddr *)&saddr_un, &len);
348
- qemu_set_info_str(&s->nc, "connect from %s", saddr_un.sun_path);
349
- break;
350
- }
351
- default:
352
- g_assert_not_reached();
353
+ addr = qio_channel_socket_get_local_address(listen_sioc, NULL);
354
+ g_assert(addr != NULL);
355
+ ret = qemu_socket_try_set_nonblock(listen_sioc->fd);
356
+ if (addr->type == SOCKET_ADDRESS_TYPE_FD && ret < 0) {
357
+ qemu_set_info_str(&s->nc, "can't use file descriptor %s (errno %d)",
358
+ addr->u.fd.str, -ret);
359
+ return;
360
}
361
+ g_assert(ret == 0);
362
+ qapi_free_SocketAddress(addr);
363
+
364
+ s->nc.link_down = true;
365
+ s->listener = qio_net_listener_new();
366
+
367
+ net_socket_rs_init(&s->rs, net_stream_rs_finalize, false);
368
+ qio_net_listener_set_client_func(s->listener, net_stream_listen, s, NULL);
369
+ qio_net_listener_add(s->listener, listen_sioc);
370
}
371
372
static int net_stream_server_init(NetClientState *peer,
373
@@ -XXX,XX +XXX,XX @@ static int net_stream_server_init(NetClientState *peer,
374
{
375
NetClientState *nc;
376
NetStreamState *s;
377
- int fd, ret;
378
+ QIOChannelSocket *listen_sioc = qio_channel_socket_new();
379
380
- switch (addr->type) {
381
- case SOCKET_ADDRESS_TYPE_INET: {
382
- struct sockaddr_in saddr_in;
383
-
384
- if (convert_host_port(&saddr_in, addr->u.inet.host, addr->u.inet.port,
385
- errp) < 0) {
386
- return -1;
387
- }
388
-
389
- fd = qemu_socket(PF_INET, SOCK_STREAM, 0);
390
- if (fd < 0) {
391
- error_setg_errno(errp, errno, "can't create stream socket");
392
- return -1;
393
- }
394
- qemu_socket_set_nonblock(fd);
395
+ nc = qemu_new_net_client(&net_stream_info, peer, model, name);
396
+ s = DO_UPCAST(NetStreamState, nc, nc);
397
398
- socket_set_fast_reuse(fd);
399
+ s->listen_ioc = QIO_CHANNEL(listen_sioc);
400
+ qio_channel_socket_listen_async(listen_sioc, addr, 0,
401
+ net_stream_server_listening, s,
402
+ NULL, NULL);
403
404
- ret = bind(fd, (struct sockaddr *)&saddr_in, sizeof(saddr_in));
405
- if (ret < 0) {
406
- error_setg_errno(errp, errno, "can't bind ip=%s to socket",
407
- inet_ntoa(saddr_in.sin_addr));
408
- closesocket(fd);
409
- return -1;
410
- }
411
- break;
412
- }
413
- case SOCKET_ADDRESS_TYPE_UNIX: {
414
- struct sockaddr_un saddr_un;
415
-
416
- ret = unlink(addr->u.q_unix.path);
417
- if (ret < 0 && errno != ENOENT) {
418
- error_setg_errno(errp, errno, "failed to unlink socket %s",
419
- addr->u.q_unix.path);
420
- return -1;
421
- }
422
+ return 0;
423
+}
424
425
- saddr_un.sun_family = PF_UNIX;
426
- ret = snprintf(saddr_un.sun_path, sizeof(saddr_un.sun_path), "%s",
427
- addr->u.q_unix.path);
428
- if (ret < 0 || ret >= sizeof(saddr_un.sun_path)) {
429
- error_setg(errp, "UNIX socket path '%s' is too long",
430
- addr->u.q_unix.path);
431
- error_append_hint(errp, "Path must be less than %zu bytes\n",
432
- sizeof(saddr_un.sun_path));
433
- return -1;
434
- }
435
+static void net_stream_client_connected(QIOTask *task, gpointer opaque)
436
+{
437
+ NetStreamState *s = opaque;
438
+ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(s->ioc);
439
+ SocketAddress *addr;
440
+ gchar *uri;
441
+ int ret;
442
443
- fd = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
444
- if (fd < 0) {
445
- error_setg_errno(errp, errno, "can't create stream socket");
446
- return -1;
447
- }
448
- qemu_socket_set_nonblock(fd);
449
-
450
- ret = bind(fd, (struct sockaddr *)&saddr_un, sizeof(saddr_un));
451
- if (ret < 0) {
452
- error_setg_errno(errp, errno, "can't create socket with path: %s",
453
- saddr_un.sun_path);
454
- closesocket(fd);
455
- return -1;
456
- }
457
- break;
458
- }
459
- case SOCKET_ADDRESS_TYPE_FD:
460
- fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp);
461
- if (fd == -1) {
462
- return -1;
463
- }
464
- ret = qemu_socket_try_set_nonblock(fd);
465
- if (ret < 0) {
466
- error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
467
- name, fd);
468
- return -1;
469
- }
470
- break;
471
- default:
472
- error_setg(errp, "only support inet or fd type");
473
- return -1;
474
+ if (sioc->fd < 0) {
475
+ qemu_set_info_str(&s->nc, "connection error");
476
+ goto error;
477
}
478
479
- ret = listen(fd, 0);
480
- if (ret < 0) {
481
- error_setg_errno(errp, errno, "can't listen on socket");
482
- closesocket(fd);
483
- return -1;
484
+ addr = qio_channel_socket_get_remote_address(sioc, NULL);
485
+ g_assert(addr != NULL);
486
+ uri = socket_uri(addr);
487
+ qemu_set_info_str(&s->nc, uri);
488
+ g_free(uri);
489
+
490
+ ret = qemu_socket_try_set_nonblock(sioc->fd);
491
+ if (addr->type == SOCKET_ADDRESS_TYPE_FD && ret < 0) {
492
+ qemu_set_info_str(&s->nc, "can't use file descriptor %s (errno %d)",
493
+ addr->u.fd.str, -ret);
494
+ qapi_free_SocketAddress(addr);
495
+ goto error;
496
}
497
+ g_assert(ret == 0);
498
499
- nc = qemu_new_net_client(&net_stream_info, peer, model, name);
500
- s = DO_UPCAST(NetStreamState, nc, nc);
501
- s->fd = -1;
502
- s->listen_fd = fd;
503
- s->nc.link_down = true;
504
net_socket_rs_init(&s->rs, net_stream_rs_finalize, false);
505
506
- qemu_set_fd_handler(s->listen_fd, net_stream_accept, NULL, s);
507
- return 0;
508
+ /* Disable Nagle algorithm on TCP sockets to reduce latency */
509
+ qio_channel_set_delay(s->ioc, false);
510
+
511
+ s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send,
512
+ s, NULL);
513
+ s->nc.link_down = false;
514
+ qapi_free_SocketAddress(addr);
515
+
516
+ return;
517
+error:
518
+ object_unref(OBJECT(s->ioc));
519
+ s->ioc = NULL;
520
}
521
522
static int net_stream_client_init(NetClientState *peer,
523
@@ -XXX,XX +XXX,XX @@ static int net_stream_client_init(NetClientState *peer,
524
Error **errp)
525
{
526
NetStreamState *s;
527
- struct sockaddr_in saddr_in;
528
- struct sockaddr_un saddr_un;
529
- int fd, connected, ret;
530
-
531
- switch (addr->type) {
532
- case SOCKET_ADDRESS_TYPE_INET:
533
- if (convert_host_port(&saddr_in, addr->u.inet.host, addr->u.inet.port,
534
- errp) < 0) {
535
- return -1;
536
- }
537
+ NetClientState *nc;
538
+ QIOChannelSocket *sioc = qio_channel_socket_new();
539
540
- fd = qemu_socket(PF_INET, SOCK_STREAM, 0);
541
- if (fd < 0) {
542
- error_setg_errno(errp, errno, "can't create stream socket");
543
- return -1;
544
- }
545
- qemu_socket_set_nonblock(fd);
546
-
547
- connected = 0;
548
- for (;;) {
549
- ret = connect(fd, (struct sockaddr *)&saddr_in, sizeof(saddr_in));
550
- if (ret < 0) {
551
- if (errno == EINTR || errno == EWOULDBLOCK) {
552
- /* continue */
553
- } else if (errno == EINPROGRESS ||
554
- errno == EALREADY) {
555
- break;
556
- } else {
557
- error_setg_errno(errp, errno, "can't connect socket");
558
- closesocket(fd);
559
- return -1;
560
- }
561
- } else {
562
- connected = 1;
563
- break;
564
- }
565
- }
566
- break;
567
- case SOCKET_ADDRESS_TYPE_UNIX:
568
- saddr_un.sun_family = PF_UNIX;
569
- ret = snprintf(saddr_un.sun_path, sizeof(saddr_un.sun_path), "%s",
570
- addr->u.q_unix.path);
571
- if (ret < 0 || ret >= sizeof(saddr_un.sun_path)) {
572
- error_setg(errp, "UNIX socket path '%s' is too long",
573
- addr->u.q_unix.path);
574
- error_append_hint(errp, "Path must be less than %zu bytes\n",
575
- sizeof(saddr_un.sun_path));
576
- return -1;
577
- }
578
+ nc = qemu_new_net_client(&net_stream_info, peer, model, name);
579
+ s = DO_UPCAST(NetStreamState, nc, nc);
580
581
- fd = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
582
- if (fd < 0) {
583
- error_setg_errno(errp, errno, "can't create stream socket");
584
- return -1;
585
- }
586
- qemu_socket_set_nonblock(fd);
587
-
588
- connected = 0;
589
- for (;;) {
590
- ret = connect(fd, (struct sockaddr *)&saddr_un, sizeof(saddr_un));
591
- if (ret < 0) {
592
- if (errno == EINTR || errno == EWOULDBLOCK) {
593
- /* continue */
594
- } else if (errno == EAGAIN ||
595
- errno == EALREADY) {
596
- break;
597
- } else {
598
- error_setg_errno(errp, errno, "can't connect socket");
599
- closesocket(fd);
600
- return -1;
601
- }
602
- } else {
603
- connected = 1;
604
- break;
605
- }
606
- }
607
- break;
608
- case SOCKET_ADDRESS_TYPE_FD:
609
- fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp);
610
- if (fd == -1) {
611
- return -1;
612
- }
613
- ret = qemu_socket_try_set_nonblock(fd);
614
- if (ret < 0) {
615
- error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
616
- name, fd);
617
- return -1;
618
- }
619
- connected = 1;
620
- break;
621
- default:
622
- error_setg(errp, "only support inet, unix or fd type");
623
- return -1;
624
- }
625
+ s->ioc = QIO_CHANNEL(sioc);
626
+ s->nc.link_down = true;
627
+
628
+ qio_channel_socket_connect_async(sioc, addr,
629
+ net_stream_client_connected, s,
630
+ NULL, NULL);
631
632
- s = net_stream_fd_init(peer, model, name, fd, connected);
633
-
634
- switch (addr->type) {
635
- case SOCKET_ADDRESS_TYPE_INET:
636
- qemu_set_info_str(&s->nc, "connect to %s:%d",
637
- inet_ntoa(saddr_in.sin_addr),
638
- ntohs(saddr_in.sin_port));
639
- break;
640
- case SOCKET_ADDRESS_TYPE_UNIX:
641
- qemu_set_info_str(&s->nc, " connect to %s", saddr_un.sun_path);
642
- break;
643
- case SOCKET_ADDRESS_TYPE_FD:
644
- qemu_set_info_str(&s->nc, "connect to fd %d", fd);
645
- break;
646
- default:
647
- g_assert_not_reached();
648
- }
649
return 0;
650
}
651
652
diff --git a/qemu-options.hx b/qemu-options.hx
653
index XXXXXXX..XXXXXXX 100644
654
--- a/qemu-options.hx
655
+++ b/qemu-options.hx
656
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
657
"-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n"
658
" configure a network backend to connect to another network\n"
659
" using an UDP tunnel\n"
660
- "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port\n"
661
- "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path\n"
662
+ "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n"
663
+ "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n"
664
"-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n"
665
" configure a network backend to connect to another network\n"
666
" using a socket connection in stream mode.\n"
667
--
676
--
668
2.7.4
677
2.7.4
678
679
diff view generated by jsdifflib
1
From: Laurent Vivier <lvivier@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
dgram_dst is a sockaddr_in structure. To be able to use it with
3
This iova tree function allows it to look for a hole in allocated
4
unix socket, use a pointer to a generic sockaddr structure.
4
regions and return a totally new translation for a given translated
5
5
address.
6
Rename it dest_addr, and store socket length in dest_len.
6
7
7
It's usage is mainly to allow devices to access qemu address space,
8
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
8
remapping guest's one into a new iova space where qemu can add chunks of
9
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
9
addresses.
10
11
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
12
Reviewed-by: Peter Xu <peterx@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
15
---
14
net/dgram.c | 82 +++++++++++++++++++++++++++++++++++++++----------------------
16
include/qemu/iova-tree.h | 18 +++++++
15
1 file changed, 53 insertions(+), 29 deletions(-)
17
util/iova-tree.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++
16
18
2 files changed, 154 insertions(+)
17
diff --git a/net/dgram.c b/net/dgram.c
19
20
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
18
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
19
--- a/net/dgram.c
22
--- a/include/qemu/iova-tree.h
20
+++ b/net/dgram.c
23
+++ b/include/qemu/iova-tree.h
21
@@ -XXX,XX +XXX,XX @@ typedef struct NetDgramState {
24
@@ -XXX,XX +XXX,XX @@
22
NetClientState nc;
25
#define IOVA_OK (0)
23
int fd;
26
#define IOVA_ERR_INVALID (-1) /* Invalid parameters */
24
SocketReadState rs;
27
#define IOVA_ERR_OVERLAP (-2) /* IOVA range overlapped */
25
- struct sockaddr_in dgram_dst; /* contains destination iff connectionless */
28
+#define IOVA_ERR_NOMEM (-3) /* Cannot allocate */
26
bool read_poll; /* waiting to receive data? */
29
27
bool write_poll; /* waiting to transmit data? */
30
typedef struct IOVATree IOVATree;
28
+ /* contains destination iff connectionless */
31
typedef struct DMAMap {
29
+ struct sockaddr *dest_addr;
32
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova);
30
+ socklen_t dest_len;
33
void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator);
31
} NetDgramState;
34
32
35
/**
33
static void net_dgram_send(void *opaque);
36
+ * iova_tree_alloc_map:
34
@@ -XXX,XX +XXX,XX @@ static ssize_t net_dgram_receive(NetClientState *nc,
37
+ *
35
ssize_t ret;
38
+ * @tree: the iova tree to allocate from
36
39
+ * @map: the new map (as translated addr & size) to allocate in the iova region
37
do {
40
+ * @iova_begin: the minimum address of the allocation
38
- if (s->dgram_dst.sin_family != AF_UNIX) {
41
+ * @iova_end: the maximum addressable direction of the allocation
39
- ret = sendto(s->fd, buf, size, 0,
42
+ *
40
- (struct sockaddr *)&s->dgram_dst,
43
+ * Allocates a new region of a given size, between iova_min and iova_max.
41
- sizeof(s->dgram_dst));
44
+ *
42
+ if (s->dest_addr) {
45
+ * Return: Same as iova_tree_insert, but cannot overlap and can return error if
43
+ ret = sendto(s->fd, buf, size, 0, s->dest_addr, s->dest_len);
46
+ * iova tree is out of free contiguous range. The caller gets the assigned iova
44
} else {
47
+ * in map->iova.
45
ret = send(s->fd, buf, size, 0);
48
+ */
46
}
49
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
47
@@ -XXX,XX +XXX,XX @@ static void net_dgram_cleanup(NetClientState *nc)
50
+ hwaddr iova_end);
48
close(s->fd);
51
+
49
s->fd = -1;
52
+/**
50
}
53
* iova_tree_destroy:
51
+ g_free(s->dest_addr);
54
*
52
+ s->dest_addr = NULL;
55
* @tree: the iova tree to destroy
53
+ s->dest_len = 0;
56
diff --git a/util/iova-tree.c b/util/iova-tree.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/util/iova-tree.c
59
+++ b/util/iova-tree.c
60
@@ -XXX,XX +XXX,XX @@ struct IOVATree {
61
GTree *tree;
62
};
63
64
+/* Args to pass to iova_tree_alloc foreach function. */
65
+struct IOVATreeAllocArgs {
66
+ /* Size of the desired allocation */
67
+ size_t new_size;
68
+
69
+ /* The minimum address allowed in the allocation */
70
+ hwaddr iova_begin;
71
+
72
+ /* Map at the left of the hole, can be NULL if "this" is first one */
73
+ const DMAMap *prev;
74
+
75
+ /* Map at the right of the hole, can be NULL if "prev" is the last one */
76
+ const DMAMap *this;
77
+
78
+ /* If found, we fill in the IOVA here */
79
+ hwaddr iova_result;
80
+
81
+ /* Whether have we found a valid IOVA */
82
+ bool iova_found;
83
+};
84
+
85
+/**
86
+ * Iterate args to the next hole
87
+ *
88
+ * @args: The alloc arguments
89
+ * @next: The next mapping in the tree. Can be NULL to signal the last one
90
+ */
91
+static void iova_tree_alloc_args_iterate(struct IOVATreeAllocArgs *args,
92
+ const DMAMap *next)
93
+{
94
+ args->prev = args->this;
95
+ args->this = next;
96
+}
97
+
98
static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
99
{
100
const DMAMap *m1 = a, *m2 = b;
101
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map)
102
return IOVA_OK;
54
}
103
}
55
104
56
static NetClientInfo net_dgram_socket_info = {
105
+/**
57
@@ -XXX,XX +XXX,XX @@ static NetDgramState *net_dgram_fd_init(NetClientState *peer,
106
+ * Try to find an unallocated IOVA range between prev and this elements.
58
SocketAddress *mcast,
107
+ *
59
Error **errp)
108
+ * @args: Arguments to allocation
109
+ *
110
+ * Cases:
111
+ *
112
+ * (1) !prev, !this: No entries allocated, always succeed
113
+ *
114
+ * (2) !prev, this: We're iterating at the 1st element.
115
+ *
116
+ * (3) prev, !this: We're iterating at the last element.
117
+ *
118
+ * (4) prev, this: this is the most common case, we'll try to find a hole
119
+ * between "prev" and "this" mapping.
120
+ *
121
+ * Note that this function assumes the last valid iova is HWADDR_MAX, but it
122
+ * searches linearly so it's easy to discard the result if it's not the case.
123
+ */
124
+static void iova_tree_alloc_map_in_hole(struct IOVATreeAllocArgs *args)
125
+{
126
+ const DMAMap *prev = args->prev, *this = args->this;
127
+ uint64_t hole_start, hole_last;
128
+
129
+ if (this && this->iova + this->size < args->iova_begin) {
130
+ return;
131
+ }
132
+
133
+ hole_start = MAX(prev ? prev->iova + prev->size + 1 : 0, args->iova_begin);
134
+ hole_last = this ? this->iova : HWADDR_MAX;
135
+
136
+ if (hole_last - hole_start > args->new_size) {
137
+ args->iova_result = hole_start;
138
+ args->iova_found = true;
139
+ }
140
+}
141
+
142
+/**
143
+ * Foreach dma node in the tree, compare if there is a hole with its previous
144
+ * node (or minimum iova address allowed) and the node.
145
+ *
146
+ * @key: Node iterating
147
+ * @value: Node iterating
148
+ * @pargs: Struct to communicate with the outside world
149
+ *
150
+ * Return: false to keep iterating, true if needs break.
151
+ */
152
+static gboolean iova_tree_alloc_traverse(gpointer key, gpointer value,
153
+ gpointer pargs)
154
+{
155
+ struct IOVATreeAllocArgs *args = pargs;
156
+ DMAMap *node = value;
157
+
158
+ assert(key == value);
159
+
160
+ iova_tree_alloc_args_iterate(args, node);
161
+ iova_tree_alloc_map_in_hole(args);
162
+ return args->iova_found;
163
+}
164
+
165
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
166
+ hwaddr iova_last)
167
+{
168
+ struct IOVATreeAllocArgs args = {
169
+ .new_size = map->size,
170
+ .iova_begin = iova_begin,
171
+ };
172
+
173
+ if (unlikely(iova_last < iova_begin)) {
174
+ return IOVA_ERR_INVALID;
175
+ }
176
+
177
+ /*
178
+ * Find a valid hole for the mapping
179
+ *
180
+ * Assuming low iova_begin, so no need to do a binary search to
181
+ * locate the first node.
182
+ *
183
+ * TODO: Replace all this with g_tree_node_first/next/last when available
184
+ * (from glib since 2.68). To do it with g_tree_foreach complicates the
185
+ * code a lot.
186
+ *
187
+ */
188
+ g_tree_foreach(tree->tree, iova_tree_alloc_traverse, &args);
189
+ if (!args.iova_found) {
190
+ /*
191
+ * Either tree is empty or the last hole is still not checked.
192
+ * g_tree_foreach does not compare (last, iova_last] range, so we check
193
+ * it here.
194
+ */
195
+ iova_tree_alloc_args_iterate(&args, NULL);
196
+ iova_tree_alloc_map_in_hole(&args);
197
+ }
198
+
199
+ if (!args.iova_found || args.iova_result + map->size > iova_last) {
200
+ return IOVA_ERR_NOMEM;
201
+ }
202
+
203
+ map->iova = args.iova_result;
204
+ return iova_tree_insert(tree, map);
205
+}
206
+
207
void iova_tree_destroy(IOVATree *tree)
60
{
208
{
61
- struct sockaddr_in saddr;
209
g_tree_destroy(tree->tree);
62
+ struct sockaddr_in *saddr = NULL;
63
int newfd;
64
NetClientState *nc;
65
NetDgramState *s;
66
@@ -XXX,XX +XXX,XX @@ static NetDgramState *net_dgram_fd_init(NetClientState *peer,
67
qapi_free_SocketAddress(sa);
68
69
/*
70
- * fd passed: multicast: "learn" dgram_dst address from bound address and
71
+ * fd passed: multicast: "learn" dest_addr address from bound address and
72
* save it. Because this may be "shared" socket from a "master" process,
73
* datagrams would be recv() by ONLY ONE process: we must "clone" this
74
* dgram socket --jjo
75
*/
76
77
if (is_fd && mcast != NULL) {
78
- if (convert_host_port(&saddr, mcast->u.inet.host,
79
- mcast->u.inet.port, errp) < 0) {
80
+ saddr = g_new(struct sockaddr_in, 1);
81
+
82
+ if (convert_host_port(saddr, mcast->u.inet.host, mcast->u.inet.port,
83
+ errp) < 0) {
84
goto err;
85
}
86
/* must be bound */
87
- if (saddr.sin_addr.s_addr == 0) {
88
+ if (saddr->sin_addr.s_addr == 0) {
89
error_setg(errp, "can't setup multicast destination address");
90
goto err;
91
}
92
/* clone dgram socket */
93
- newfd = net_dgram_mcast_create(&saddr, NULL, errp);
94
+ newfd = net_dgram_mcast_create(saddr, NULL, errp);
95
if (newfd < 0) {
96
goto err;
97
}
98
/* clone newfd to fd, close newfd */
99
dup2(newfd, fd);
100
close(newfd);
101
-
102
}
103
104
nc = qemu_new_net_client(&net_dgram_socket_info, peer, model, name);
105
@@ -XXX,XX +XXX,XX @@ static NetDgramState *net_dgram_fd_init(NetClientState *peer,
106
net_dgram_read_poll(s, true);
107
108
/* mcast: save bound address as dst */
109
- if (is_fd && mcast != NULL) {
110
- s->dgram_dst = saddr;
111
+ if (saddr) {
112
+ g_assert(s->dest_addr == NULL);
113
+ s->dest_addr = (struct sockaddr *)saddr;
114
+ s->dest_len = sizeof(*saddr);
115
qemu_set_info_str(nc, "fd=%d (cloned mcast=%s:%d)", fd,
116
- inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
117
+ inet_ntoa(saddr->sin_addr), ntohs(saddr->sin_port));
118
} else {
119
- if (sa_type == SOCKET_ADDRESS_TYPE_UNIX) {
120
- s->dgram_dst.sin_family = AF_UNIX;
121
- }
122
-
123
qemu_set_info_str(nc, "fd=%d %s", fd, SocketAddressType_str(sa_type));
124
}
125
126
return s;
127
128
err:
129
+ g_free(saddr);
130
closesocket(fd);
131
return NULL;
132
}
133
@@ -XXX,XX +XXX,XX @@ static int net_dgram_mcast_init(NetClientState *peer,
134
{
135
NetDgramState *s;
136
int fd, ret;
137
- struct sockaddr_in saddr;
138
+ struct sockaddr_in *saddr;
139
140
if (remote->type != SOCKET_ADDRESS_TYPE_INET) {
141
error_setg(errp, "multicast only support inet type");
142
return -1;
143
}
144
145
- if (convert_host_port(&saddr, remote->u.inet.host, remote->u.inet.port,
146
+ saddr = g_new(struct sockaddr_in, 1);
147
+ if (convert_host_port(saddr, remote->u.inet.host, remote->u.inet.port,
148
errp) < 0) {
149
+ g_free(saddr);
150
return -1;
151
}
152
153
if (!local) {
154
- fd = net_dgram_mcast_create(&saddr, NULL, errp);
155
+ fd = net_dgram_mcast_create(saddr, NULL, errp);
156
if (fd < 0) {
157
+ g_free(saddr);
158
return -1;
159
}
160
} else {
161
@@ -XXX,XX +XXX,XX @@ static int net_dgram_mcast_init(NetClientState *peer,
162
struct in_addr localaddr;
163
164
if (inet_aton(local->u.inet.host, &localaddr) == 0) {
165
+ g_free(saddr);
166
error_setg(errp, "localaddr '%s' is not a valid IPv4 address",
167
local->u.inet.host);
168
return -1;
169
}
170
171
- fd = net_dgram_mcast_create(&saddr, &localaddr, errp);
172
+ fd = net_dgram_mcast_create(saddr, &localaddr, errp);
173
if (fd < 0) {
174
+ g_free(saddr);
175
return -1;
176
}
177
break;
178
@@ -XXX,XX +XXX,XX @@ static int net_dgram_mcast_init(NetClientState *peer,
179
case SOCKET_ADDRESS_TYPE_FD:
180
fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp);
181
if (fd == -1) {
182
+ g_free(saddr);
183
return -1;
184
}
185
ret = qemu_socket_try_set_nonblock(fd);
186
if (ret < 0) {
187
+ g_free(saddr);
188
error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
189
name, fd);
190
return -1;
191
}
192
break;
193
default:
194
+ g_free(saddr);
195
error_setg(errp, "only support inet or fd type for local");
196
return -1;
197
}
198
@@ -XXX,XX +XXX,XX @@ static int net_dgram_mcast_init(NetClientState *peer,
199
local->type == SOCKET_ADDRESS_TYPE_FD,
200
remote, errp);
201
if (!s) {
202
+ g_free(saddr);
203
return -1;
204
}
205
206
- s->dgram_dst = saddr;
207
+ g_assert(s->dest_addr == NULL);
208
+ s->dest_addr = (struct sockaddr *)saddr;
209
+ s->dest_len = sizeof(*saddr);
210
+
211
+ qemu_set_info_str(&s->nc, "mcast=%s:%d", inet_ntoa(saddr->sin_addr),
212
+ ntohs(saddr->sin_port));
213
214
- qemu_set_info_str(&s->nc, "mcast=%s:%d", inet_ntoa(saddr.sin_addr),
215
- ntohs(saddr.sin_port));
216
return 0;
217
218
}
219
@@ -XXX,XX +XXX,XX @@ int net_init_dgram(const Netdev *netdev, const char *name,
220
{
221
NetDgramState *s;
222
int fd, ret;
223
- struct sockaddr_in raddr_in;
224
- struct sockaddr_in laddr_in;
225
SocketAddress *remote, *local;
226
+ struct sockaddr *dest_addr;
227
+ struct sockaddr_in laddr_in, raddr_in;
228
+ socklen_t dest_len;
229
230
assert(netdev->type == NET_CLIENT_DRIVER_DGRAM);
231
232
@@ -XXX,XX +XXX,XX @@ int net_init_dgram(const Netdev *netdev, const char *name,
233
return -1;
234
}
235
qemu_socket_set_nonblock(fd);
236
+
237
+ dest_len = sizeof(raddr_in);
238
+ dest_addr = g_malloc(dest_len);
239
+ memcpy(dest_addr, &raddr_in, dest_len);
240
break;
241
case SOCKET_ADDRESS_TYPE_FD:
242
fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp);
243
@@ -XXX,XX +XXX,XX @@ int net_init_dgram(const Netdev *netdev, const char *name,
244
name, fd);
245
return -1;
246
}
247
+ dest_addr = NULL;
248
+ dest_len = 0;
249
break;
250
default:
251
error_setg(errp, "only support inet or fd type for local");
252
@@ -XXX,XX +XXX,XX @@ int net_init_dgram(const Netdev *netdev, const char *name,
253
}
254
255
if (remote) {
256
- s->dgram_dst = raddr_in;
257
+ g_assert(s->dest_addr == NULL);
258
+ s->dest_addr = dest_addr;
259
+ s->dest_len = dest_len;
260
}
261
262
switch (local->type) {
263
--
210
--
264
2.7.4
211
2.7.4
265
212
266
213
diff view generated by jsdifflib
1
From: Eugenio Pérez <eperezma@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The guest will see undefined behavior if it issue not negotiate
3
This function does the reverse operation of iova_tree_find: To look for
4
commands, bit it is expected somehow.
4
a mapping that match a translated address so we can do the reverse.
5
5
6
Simplify code deleting this check.
6
This have linear complexity instead of logarithmic, but it supports
7
overlapping HVA. Future developments could reduce it.
7
8
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
Acked-by: Jason Wang <jasowang@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
---
12
net/vhost-vdpa.c | 48 ------------------------------------------------
13
include/qemu/iova-tree.h | 20 +++++++++++++++++++-
13
1 file changed, 48 deletions(-)
14
util/iova-tree.c | 34 ++++++++++++++++++++++++++++++++++
15
2 files changed, 53 insertions(+), 1 deletion(-)
14
16
15
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
17
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/net/vhost-vdpa.c
19
--- a/include/qemu/iova-tree.h
18
+++ b/net/vhost-vdpa.c
20
+++ b/include/qemu/iova-tree.h
19
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_vhost_vdpa_cvq_info = {
21
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
22
* @tree: the iova tree to search from
23
* @map: the mapping to search
24
*
25
- * Search for a mapping in the iova tree that overlaps with the
26
+ * Search for a mapping in the iova tree that iova overlaps with the
27
* mapping range specified. Only the first found mapping will be
28
* returned.
29
*
30
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
31
const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map);
32
33
/**
34
+ * iova_tree_find_iova:
35
+ *
36
+ * @tree: the iova tree to search from
37
+ * @map: the mapping to search
38
+ *
39
+ * Search for a mapping in the iova tree that translated_addr overlaps with the
40
+ * mapping range specified. Only the first found mapping will be
41
+ * returned.
42
+ *
43
+ * Return: DMAMap pointer if found, or NULL if not found. Note that
44
+ * the returned DMAMap pointer is maintained internally. User should
45
+ * only read the content but never modify or free the content. Also,
46
+ * user is responsible to make sure the pointer is valid (say, no
47
+ * concurrent deletion in progress).
48
+ */
49
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map);
50
+
51
+/**
52
* iova_tree_find_address:
53
*
54
* @tree: the iova tree to search from
55
diff --git a/util/iova-tree.c b/util/iova-tree.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/util/iova-tree.c
58
+++ b/util/iova-tree.c
59
@@ -XXX,XX +XXX,XX @@ struct IOVATreeAllocArgs {
60
bool iova_found;
20
};
61
};
21
62
63
+typedef struct IOVATreeFindIOVAArgs {
64
+ const DMAMap *needle;
65
+ const DMAMap *result;
66
+} IOVATreeFindIOVAArgs;
67
+
22
/**
68
/**
23
- * Do not forward commands not supported by SVQ. Otherwise, the device could
69
* Iterate args to the next hole
24
- * accept it and qemu would not know how to update the device model.
25
- */
26
-static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len)
27
-{
28
- struct virtio_net_ctrl_hdr ctrl;
29
-
30
- if (unlikely(len < sizeof(ctrl))) {
31
- qemu_log_mask(LOG_GUEST_ERROR,
32
- "%s: invalid legnth of out buffer %zu\n", __func__, len);
33
- return false;
34
- }
35
-
36
- memcpy(&ctrl, out_buf, sizeof(ctrl));
37
- switch (ctrl.class) {
38
- case VIRTIO_NET_CTRL_MAC:
39
- switch (ctrl.cmd) {
40
- case VIRTIO_NET_CTRL_MAC_ADDR_SET:
41
- return true;
42
- default:
43
- qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
44
- __func__, ctrl.cmd);
45
- };
46
- break;
47
- case VIRTIO_NET_CTRL_MQ:
48
- switch (ctrl.cmd) {
49
- case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
50
- return true;
51
- default:
52
- qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mq cmd %u\n",
53
- __func__, ctrl.cmd);
54
- };
55
- break;
56
- default:
57
- qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
58
- __func__, ctrl.class);
59
- };
60
-
61
- return false;
62
-}
63
-
64
-/**
65
* Validate and copy control virtqueue commands.
66
*
70
*
67
* Following QEMU guidelines, we offer a copy of the buffers to the device to
71
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map)
68
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
72
return g_tree_lookup(tree->tree, map);
69
.iov_len = sizeof(status),
73
}
70
};
74
71
ssize_t dev_written = -EINVAL;
75
+static gboolean iova_tree_find_address_iterator(gpointer key, gpointer value,
72
- bool ok;
76
+ gpointer data)
73
77
+{
74
out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
78
+ const DMAMap *map = key;
75
s->cvq_cmd_out_buffer,
79
+ IOVATreeFindIOVAArgs *args = data;
76
vhost_vdpa_net_cvq_cmd_len());
80
+ const DMAMap *needle;
77
- ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len);
81
+
78
- if (unlikely(!ok)) {
82
+ g_assert(key == value);
79
- goto out;
83
+
80
- }
84
+ needle = args->needle;
81
-
85
+ if (map->translated_addr + map->size < needle->translated_addr ||
82
dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
86
+ needle->translated_addr + needle->size < map->translated_addr) {
83
if (unlikely(dev_written < 0)) {
87
+ return false;
84
goto out;
88
+ }
89
+
90
+ args->result = map;
91
+ return true;
92
+}
93
+
94
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map)
95
+{
96
+ IOVATreeFindIOVAArgs args = {
97
+ .needle = map,
98
+ };
99
+
100
+ g_tree_foreach(tree->tree, iova_tree_find_address_iterator, &args);
101
+ return args.result;
102
+}
103
+
104
const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova)
105
{
106
const DMAMap map = { .iova = iova, .size = 0 };
85
--
107
--
86
2.7.4
108
2.7.4
87
109
88
110
diff view generated by jsdifflib
1
From: Laurent Vivier <lvivier@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Copied from socket netdev file and modified to use SocketAddress
3
This tree is able to look for a translated address from an IOVA address.
4
to be able to introduce new features like unix socket.
5
4
6
"udp" and "mcast" are squashed into dgram netdev, multicast is detected
5
At first glance it is similar to util/iova-tree. However, SVQ working on
7
according to the IP address type.
6
devices with limited IOVA space need more capabilities, like allocating
8
"listen" and "connect" modes are managed by stream netdev. An optional
7
IOVA chunks or performing reverse translations (qemu addresses to iova).
9
parameter "server" defines the mode (off by default)
10
8
11
The two new types need to be parsed the modern way with -netdev, because
9
The allocation capability, as "assign a free IOVA address to this chunk
12
with the traditional way, the "type" field of netdev structure collides with
10
of memory in qemu's address space" allows shadow virtqueue to create a
13
the "type" field of SocketAddress and prevents the correct evaluation of the
11
new address space that is not restricted by guest's addressable one, so
14
command line option. Moreover the traditional way doesn't allow to use
12
we can allocate shadow vqs vrings outside of it.
15
the same type (SocketAddress) several times with the -netdev option
16
(needed to specify "local" and "remote" addresses).
17
13
18
The previous commit paved the way for parsing the modern way, but
14
It duplicates the tree so it can search efficiently in both directions,
19
omitted one detail: how to pick modern vs. traditional, in
15
and it will signal overlap if iova or the translated address is present
20
netdev_is_modern().
16
in any tree.
21
17
22
We want to pick based on the value of parameter "type". But how to
18
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
23
extract it from the option argument?
24
25
Parsing the option argument, either the modern or the traditional way,
26
extracts it for us, but only if parsing succeeds.
27
28
If parsing fails, there is no good option. No matter which parser we
29
pick, it'll be the wrong one for some arguments, and the error
30
reporting will be confusing.
31
32
Fortunately, the traditional parser accepts *anything* when called in
33
a certain way. This maximizes our chance to extract the value of
34
"type", and in turn minimizes the risk of confusing error reporting.
35
36
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
37
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
38
Acked-by: Markus Armbruster <armbru@redhat.com>
39
Acked-by: Michael S. Tsirkin <mst@redhat.com>
19
Acked-by: Michael S. Tsirkin <mst@redhat.com>
40
Signed-off-by: Jason Wang <jasowang@redhat.com>
20
Signed-off-by: Jason Wang <jasowang@redhat.com>
41
---
21
---
42
hmp-commands.hx | 2 +-
22
hw/virtio/meson.build | 2 +-
43
net/clients.h | 6 +
23
hw/virtio/vhost-iova-tree.c | 110 ++++++++++++++++++++++++++++++++++++++++++++
44
net/dgram.c | 537 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
24
hw/virtio/vhost-iova-tree.h | 27 +++++++++++
45
net/hub.c | 2 +
25
3 files changed, 138 insertions(+), 1 deletion(-)
46
net/meson.build | 2 +
26
create mode 100644 hw/virtio/vhost-iova-tree.c
47
net/net.c | 30 +++-
27
create mode 100644 hw/virtio/vhost-iova-tree.h
48
net/stream.c | 425 ++++++++++++++++++++++++++++++++++++++++++++
49
qapi/net.json | 66 ++++++-
50
qemu-options.hx | 12 ++
51
9 files changed, 1078 insertions(+), 4 deletions(-)
52
create mode 100644 net/dgram.c
53
create mode 100644 net/stream.c
54
28
55
diff --git a/hmp-commands.hx b/hmp-commands.hx
29
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
56
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
57
--- a/hmp-commands.hx
31
--- a/hw/virtio/meson.build
58
+++ b/hmp-commands.hx
32
+++ b/hw/virtio/meson.build
59
@@ -XXX,XX +XXX,XX @@ ERST
33
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
60
{
34
61
.name = "netdev_add",
35
virtio_ss = ss.source_set()
62
.args_type = "netdev:O",
36
virtio_ss.add(files('virtio.c'))
63
- .params = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user"
37
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
64
+ .params = "[user|tap|socket|stream|dgram|vde|bridge|hubport|netmap|vhost-user"
38
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c'))
65
#ifdef CONFIG_VMNET
39
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
66
"|vmnet-host|vmnet-shared|vmnet-bridged"
40
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
67
#endif
41
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
68
diff --git a/net/clients.h b/net/clients.h
42
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/net/clients.h
71
+++ b/net/clients.h
72
@@ -XXX,XX +XXX,XX @@ int net_init_hubport(const Netdev *netdev, const char *name,
73
int net_init_socket(const Netdev *netdev, const char *name,
74
NetClientState *peer, Error **errp);
75
76
+int net_init_stream(const Netdev *netdev, const char *name,
77
+ NetClientState *peer, Error **errp);
78
+
79
+int net_init_dgram(const Netdev *netdev, const char *name,
80
+ NetClientState *peer, Error **errp);
81
+
82
int net_init_tap(const Netdev *netdev, const char *name,
83
NetClientState *peer, Error **errp);
84
85
diff --git a/net/dgram.c b/net/dgram.c
86
new file mode 100644
43
new file mode 100644
87
index XXXXXXX..XXXXXXX
44
index XXXXXXX..XXXXXXX
88
--- /dev/null
45
--- /dev/null
89
+++ b/net/dgram.c
46
+++ b/hw/virtio/vhost-iova-tree.c
90
@@ -XXX,XX +XXX,XX @@
47
@@ -XXX,XX +XXX,XX @@
91
+/*
48
+/*
92
+ * QEMU System Emulator
49
+ * vhost software live migration iova tree
93
+ *
50
+ *
94
+ * Copyright (c) 2003-2008 Fabrice Bellard
51
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
95
+ * Copyright (c) 2022 Red Hat, Inc.
52
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
96
+ *
53
+ *
97
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
54
+ * SPDX-License-Identifier: GPL-2.0-or-later
98
+ * of this software and associated documentation files (the "Software"), to deal
99
+ * in the Software without restriction, including without limitation the rights
100
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
101
+ * copies of the Software, and to permit persons to whom the Software is
102
+ * furnished to do so, subject to the following conditions:
103
+ *
104
+ * The above copyright notice and this permission notice shall be included in
105
+ * all copies or substantial portions of the Software.
106
+ *
107
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
108
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
109
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
110
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
111
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
112
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
113
+ * THE SOFTWARE.
114
+ */
55
+ */
115
+
56
+
116
+#include "qemu/osdep.h"
57
+#include "qemu/osdep.h"
58
+#include "qemu/iova-tree.h"
59
+#include "vhost-iova-tree.h"
117
+
60
+
118
+#include "net/net.h"
61
+#define iova_min_addr qemu_real_host_page_size
119
+#include "clients.h"
120
+#include "monitor/monitor.h"
121
+#include "qapi/error.h"
122
+#include "qemu/error-report.h"
123
+#include "qemu/option.h"
124
+#include "qemu/sockets.h"
125
+#include "qemu/iov.h"
126
+#include "qemu/main-loop.h"
127
+#include "qemu/cutils.h"
128
+
62
+
129
+typedef struct NetDgramState {
63
+/**
130
+ NetClientState nc;
64
+ * VhostIOVATree, able to:
131
+ int fd;
65
+ * - Translate iova address
132
+ SocketReadState rs;
66
+ * - Reverse translate iova address (from translated to iova)
133
+ struct sockaddr_in dgram_dst; /* contains destination iff connectionless */
67
+ * - Allocate IOVA regions for translated range (linear operation)
134
+ bool read_poll; /* waiting to receive data? */
68
+ */
135
+ bool write_poll; /* waiting to transmit data? */
69
+struct VhostIOVATree {
136
+} NetDgramState;
70
+ /* First addressable iova address in the device */
71
+ uint64_t iova_first;
137
+
72
+
138
+static void net_dgram_send(void *opaque);
73
+ /* Last addressable iova address in the device */
139
+static void net_dgram_writable(void *opaque);
74
+ uint64_t iova_last;
140
+
75
+
141
+static void net_dgram_update_fd_handler(NetDgramState *s)
76
+ /* IOVA address to qemu memory maps. */
77
+ IOVATree *iova_taddr_map;
78
+};
79
+
80
+/**
81
+ * Create a new IOVA tree
82
+ *
83
+ * Returns the new IOVA tree
84
+ */
85
+VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
142
+{
86
+{
143
+ qemu_set_fd_handler(s->fd,
87
+ VhostIOVATree *tree = g_new(VhostIOVATree, 1);
144
+ s->read_poll ? net_dgram_send : NULL,
88
+
145
+ s->write_poll ? net_dgram_writable : NULL,
89
+ /* Some devices do not like 0 addresses */
146
+ s);
90
+ tree->iova_first = MAX(iova_first, iova_min_addr);
91
+ tree->iova_last = iova_last;
92
+
93
+ tree->iova_taddr_map = iova_tree_new();
94
+ return tree;
147
+}
95
+}
148
+
96
+
149
+static void net_dgram_read_poll(NetDgramState *s, bool enable)
97
+/**
98
+ * Delete an iova tree
99
+ */
100
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
150
+{
101
+{
151
+ s->read_poll = enable;
102
+ iova_tree_destroy(iova_tree->iova_taddr_map);
152
+ net_dgram_update_fd_handler(s);
103
+ g_free(iova_tree);
153
+}
104
+}
154
+
105
+
155
+static void net_dgram_write_poll(NetDgramState *s, bool enable)
106
+/**
107
+ * Find the IOVA address stored from a memory address
108
+ *
109
+ * @tree: The iova tree
110
+ * @map: The map with the memory address
111
+ *
112
+ * Return the stored mapping, or NULL if not found.
113
+ */
114
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
115
+ const DMAMap *map)
156
+{
116
+{
157
+ s->write_poll = enable;
117
+ return iova_tree_find_iova(tree->iova_taddr_map, map);
158
+ net_dgram_update_fd_handler(s);
159
+}
118
+}
160
+
119
+
161
+static void net_dgram_writable(void *opaque)
120
+/**
121
+ * Allocate a new mapping
122
+ *
123
+ * @tree: The iova tree
124
+ * @map: The iova map
125
+ *
126
+ * Returns:
127
+ * - IOVA_OK if the map fits in the container
128
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
129
+ * - IOVA_ERR_NOMEM if tree cannot allocate more space.
130
+ *
131
+ * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
132
+ */
133
+int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
162
+{
134
+{
163
+ NetDgramState *s = opaque;
135
+ /* Some vhost devices do not like addr 0. Skip first page */
136
+ hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size;
164
+
137
+
165
+ net_dgram_write_poll(s, false);
138
+ if (map->translated_addr + map->size < map->translated_addr ||
139
+ map->perm == IOMMU_NONE) {
140
+ return IOVA_ERR_INVALID;
141
+ }
166
+
142
+
167
+ qemu_flush_queued_packets(&s->nc);
143
+ /* Allocate a node in IOVA address */
144
+ return iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
145
+ tree->iova_last);
168
+}
146
+}
169
+
147
+
170
+static ssize_t net_dgram_receive(NetClientState *nc,
148
+/**
171
+ const uint8_t *buf, size_t size)
149
+ * Remove existing mappings from iova tree
150
+ *
151
+ * @iova_tree: The vhost iova tree
152
+ * @map: The map to remove
153
+ */
154
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map)
172
+{
155
+{
173
+ NetDgramState *s = DO_UPCAST(NetDgramState, nc, nc);
156
+ iova_tree_remove(iova_tree->iova_taddr_map, map);
174
+ ssize_t ret;
175
+
176
+ do {
177
+ if (s->dgram_dst.sin_family != AF_UNIX) {
178
+ ret = sendto(s->fd, buf, size, 0,
179
+ (struct sockaddr *)&s->dgram_dst,
180
+ sizeof(s->dgram_dst));
181
+ } else {
182
+ ret = send(s->fd, buf, size, 0);
183
+ }
184
+ } while (ret == -1 && errno == EINTR);
185
+
186
+ if (ret == -1 && errno == EAGAIN) {
187
+ net_dgram_write_poll(s, true);
188
+ return 0;
189
+ }
190
+ return ret;
191
+}
157
+}
192
+
158
diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
193
+static void net_dgram_send_completed(NetClientState *nc, ssize_t len)
194
+{
195
+ NetDgramState *s = DO_UPCAST(NetDgramState, nc, nc);
196
+
197
+ if (!s->read_poll) {
198
+ net_dgram_read_poll(s, true);
199
+ }
200
+}
201
+
202
+static void net_dgram_rs_finalize(SocketReadState *rs)
203
+{
204
+ NetDgramState *s = container_of(rs, NetDgramState, rs);
205
+
206
+ if (qemu_send_packet_async(&s->nc, rs->buf,
207
+ rs->packet_len,
208
+ net_dgram_send_completed) == 0) {
209
+ net_dgram_read_poll(s, false);
210
+ }
211
+}
212
+
213
+static void net_dgram_send(void *opaque)
214
+{
215
+ NetDgramState *s = opaque;
216
+ int size;
217
+
218
+ size = recv(s->fd, s->rs.buf, sizeof(s->rs.buf), 0);
219
+ if (size < 0) {
220
+ return;
221
+ }
222
+ if (size == 0) {
223
+ /* end of connection */
224
+ net_dgram_read_poll(s, false);
225
+ net_dgram_write_poll(s, false);
226
+ return;
227
+ }
228
+ if (qemu_send_packet_async(&s->nc, s->rs.buf, size,
229
+ net_dgram_send_completed) == 0) {
230
+ net_dgram_read_poll(s, false);
231
+ }
232
+}
233
+
234
+static int net_dgram_mcast_create(struct sockaddr_in *mcastaddr,
235
+ struct in_addr *localaddr,
236
+ Error **errp)
237
+{
238
+ struct ip_mreq imr;
239
+ int fd;
240
+ int val, ret;
241
+#ifdef __OpenBSD__
242
+ unsigned char loop;
243
+#else
244
+ int loop;
245
+#endif
246
+
247
+ if (!IN_MULTICAST(ntohl(mcastaddr->sin_addr.s_addr))) {
248
+ error_setg(errp, "specified mcastaddr %s (0x%08x) "
249
+ "does not contain a multicast address",
250
+ inet_ntoa(mcastaddr->sin_addr),
251
+ (int)ntohl(mcastaddr->sin_addr.s_addr));
252
+ return -1;
253
+ }
254
+
255
+ fd = qemu_socket(PF_INET, SOCK_DGRAM, 0);
256
+ if (fd < 0) {
257
+ error_setg_errno(errp, errno, "can't create datagram socket");
258
+ return -1;
259
+ }
260
+
261
+ /*
262
+ * Allow multiple sockets to bind the same multicast ip and port by setting
263
+ * SO_REUSEADDR. This is the only situation where SO_REUSEADDR should be set
264
+ * on windows. Use socket_set_fast_reuse otherwise as it sets SO_REUSEADDR
265
+ * only on posix systems.
266
+ */
267
+ val = 1;
268
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
269
+ if (ret < 0) {
270
+ error_setg_errno(errp, errno, "can't set socket option SO_REUSEADDR");
271
+ goto fail;
272
+ }
273
+
274
+ ret = bind(fd, (struct sockaddr *)mcastaddr, sizeof(*mcastaddr));
275
+ if (ret < 0) {
276
+ error_setg_errno(errp, errno, "can't bind ip=%s to socket",
277
+ inet_ntoa(mcastaddr->sin_addr));
278
+ goto fail;
279
+ }
280
+
281
+ /* Add host to multicast group */
282
+ imr.imr_multiaddr = mcastaddr->sin_addr;
283
+ if (localaddr) {
284
+ imr.imr_interface = *localaddr;
285
+ } else {
286
+ imr.imr_interface.s_addr = htonl(INADDR_ANY);
287
+ }
288
+
289
+ ret = setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP,
290
+ &imr, sizeof(struct ip_mreq));
291
+ if (ret < 0) {
292
+ error_setg_errno(errp, errno,
293
+ "can't add socket to multicast group %s",
294
+ inet_ntoa(imr.imr_multiaddr));
295
+ goto fail;
296
+ }
297
+
298
+ /* Force mcast msgs to loopback (eg. several QEMUs in same host */
299
+ loop = 1;
300
+ ret = setsockopt(fd, IPPROTO_IP, IP_MULTICAST_LOOP,
301
+ &loop, sizeof(loop));
302
+ if (ret < 0) {
303
+ error_setg_errno(errp, errno,
304
+ "can't force multicast message to loopback");
305
+ goto fail;
306
+ }
307
+
308
+ /* If a bind address is given, only send packets from that address */
309
+ if (localaddr != NULL) {
310
+ ret = setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF,
311
+ localaddr, sizeof(*localaddr));
312
+ if (ret < 0) {
313
+ error_setg_errno(errp, errno,
314
+ "can't set the default network send interface");
315
+ goto fail;
316
+ }
317
+ }
318
+
319
+ qemu_socket_set_nonblock(fd);
320
+ return fd;
321
+fail:
322
+ if (fd >= 0) {
323
+ closesocket(fd);
324
+ }
325
+ return -1;
326
+}
327
+
328
+static void net_dgram_cleanup(NetClientState *nc)
329
+{
330
+ NetDgramState *s = DO_UPCAST(NetDgramState, nc, nc);
331
+ if (s->fd != -1) {
332
+ net_dgram_read_poll(s, false);
333
+ net_dgram_write_poll(s, false);
334
+ close(s->fd);
335
+ s->fd = -1;
336
+ }
337
+}
338
+
339
+static NetClientInfo net_dgram_socket_info = {
340
+ .type = NET_CLIENT_DRIVER_DGRAM,
341
+ .size = sizeof(NetDgramState),
342
+ .receive = net_dgram_receive,
343
+ .cleanup = net_dgram_cleanup,
344
+};
345
+
346
+static NetDgramState *net_dgram_fd_init(NetClientState *peer,
347
+ const char *model,
348
+ const char *name,
349
+ int fd, int is_fd,
350
+ SocketAddress *mcast,
351
+ Error **errp)
352
+{
353
+ struct sockaddr_in saddr;
354
+ int newfd;
355
+ NetClientState *nc;
356
+ NetDgramState *s;
357
+ SocketAddress *sa;
358
+ SocketAddressType sa_type;
359
+
360
+ sa = socket_local_address(fd, errp);
361
+ if (!sa) {
362
+ return NULL;
363
+ }
364
+ sa_type = sa->type;
365
+ qapi_free_SocketAddress(sa);
366
+
367
+ /*
368
+ * fd passed: multicast: "learn" dgram_dst address from bound address and
369
+ * save it. Because this may be "shared" socket from a "master" process,
370
+ * datagrams would be recv() by ONLY ONE process: we must "clone" this
371
+ * dgram socket --jjo
372
+ */
373
+
374
+ if (is_fd && mcast != NULL) {
375
+ if (convert_host_port(&saddr, mcast->u.inet.host,
376
+ mcast->u.inet.port, errp) < 0) {
377
+ goto err;
378
+ }
379
+ /* must be bound */
380
+ if (saddr.sin_addr.s_addr == 0) {
381
+ error_setg(errp, "can't setup multicast destination address");
382
+ goto err;
383
+ }
384
+ /* clone dgram socket */
385
+ newfd = net_dgram_mcast_create(&saddr, NULL, errp);
386
+ if (newfd < 0) {
387
+ goto err;
388
+ }
389
+ /* clone newfd to fd, close newfd */
390
+ dup2(newfd, fd);
391
+ close(newfd);
392
+
393
+ }
394
+
395
+ nc = qemu_new_net_client(&net_dgram_socket_info, peer, model, name);
396
+
397
+ s = DO_UPCAST(NetDgramState, nc, nc);
398
+
399
+ s->fd = fd;
400
+ net_socket_rs_init(&s->rs, net_dgram_rs_finalize, false);
401
+ net_dgram_read_poll(s, true);
402
+
403
+ /* mcast: save bound address as dst */
404
+ if (is_fd && mcast != NULL) {
405
+ s->dgram_dst = saddr;
406
+ qemu_set_info_str(nc, "fd=%d (cloned mcast=%s:%d)", fd,
407
+ inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
408
+ } else {
409
+ if (sa_type == SOCKET_ADDRESS_TYPE_UNIX) {
410
+ s->dgram_dst.sin_family = AF_UNIX;
411
+ }
412
+
413
+ qemu_set_info_str(nc, "fd=%d %s", fd, SocketAddressType_str(sa_type));
414
+ }
415
+
416
+ return s;
417
+
418
+err:
419
+ closesocket(fd);
420
+ return NULL;
421
+}
422
+
423
+static int net_dgram_mcast_init(NetClientState *peer,
424
+ const char *model,
425
+ const char *name,
426
+ SocketAddress *remote,
427
+ SocketAddress *local,
428
+ Error **errp)
429
+{
430
+ NetDgramState *s;
431
+ int fd, ret;
432
+ struct sockaddr_in saddr;
433
+
434
+ if (remote->type != SOCKET_ADDRESS_TYPE_INET) {
435
+ error_setg(errp, "multicast only support inet type");
436
+ return -1;
437
+ }
438
+
439
+ if (convert_host_port(&saddr, remote->u.inet.host, remote->u.inet.port,
440
+ errp) < 0) {
441
+ return -1;
442
+ }
443
+
444
+ if (!local) {
445
+ fd = net_dgram_mcast_create(&saddr, NULL, errp);
446
+ if (fd < 0) {
447
+ return -1;
448
+ }
449
+ } else {
450
+ switch (local->type) {
451
+ case SOCKET_ADDRESS_TYPE_INET: {
452
+ struct in_addr localaddr;
453
+
454
+ if (inet_aton(local->u.inet.host, &localaddr) == 0) {
455
+ error_setg(errp, "localaddr '%s' is not a valid IPv4 address",
456
+ local->u.inet.host);
457
+ return -1;
458
+ }
459
+
460
+ fd = net_dgram_mcast_create(&saddr, &localaddr, errp);
461
+ if (fd < 0) {
462
+ return -1;
463
+ }
464
+ break;
465
+ }
466
+ case SOCKET_ADDRESS_TYPE_FD:
467
+ fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp);
468
+ if (fd == -1) {
469
+ return -1;
470
+ }
471
+ ret = qemu_socket_try_set_nonblock(fd);
472
+ if (ret < 0) {
473
+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
474
+ name, fd);
475
+ return -1;
476
+ }
477
+ break;
478
+ default:
479
+ error_setg(errp, "only support inet or fd type for local");
480
+ return -1;
481
+ }
482
+ }
483
+
484
+ s = net_dgram_fd_init(peer, model, name, fd,
485
+ local->type == SOCKET_ADDRESS_TYPE_FD,
486
+ remote, errp);
487
+ if (!s) {
488
+ return -1;
489
+ }
490
+
491
+ s->dgram_dst = saddr;
492
+
493
+ qemu_set_info_str(&s->nc, "mcast=%s:%d", inet_ntoa(saddr.sin_addr),
494
+ ntohs(saddr.sin_port));
495
+ return 0;
496
+
497
+}
498
+
499
+
500
+int net_init_dgram(const Netdev *netdev, const char *name,
501
+ NetClientState *peer, Error **errp)
502
+{
503
+ NetDgramState *s;
504
+ int fd, ret;
505
+ struct sockaddr_in raddr_in;
506
+ struct sockaddr_in laddr_in;
507
+ SocketAddress *remote, *local;
508
+
509
+ assert(netdev->type == NET_CLIENT_DRIVER_DGRAM);
510
+
511
+ remote = netdev->u.dgram.remote;
512
+ local = netdev->u.dgram.local;
513
+
514
+ /* detect multicast address */
515
+ if (remote && remote->type == SOCKET_ADDRESS_TYPE_INET) {
516
+ struct sockaddr_in mcastaddr;
517
+
518
+ if (convert_host_port(&mcastaddr, remote->u.inet.host,
519
+ remote->u.inet.port, errp) < 0) {
520
+ return -1;
521
+ }
522
+
523
+ if (IN_MULTICAST(ntohl(mcastaddr.sin_addr.s_addr))) {
524
+ return net_dgram_mcast_init(peer, "dram", name, remote, local,
525
+ errp);
526
+ }
527
+ }
528
+
529
+ /* unicast address */
530
+ if (!local) {
531
+ error_setg(errp, "dgram requires local= parameter");
532
+ return -1;
533
+ }
534
+
535
+ if (remote) {
536
+ if (local->type == SOCKET_ADDRESS_TYPE_FD) {
537
+ error_setg(errp, "don't set remote with local.fd");
538
+ return -1;
539
+ }
540
+ if (remote->type != local->type) {
541
+ error_setg(errp, "remote and local types must be the same");
542
+ return -1;
543
+ }
544
+ } else {
545
+ if (local->type != SOCKET_ADDRESS_TYPE_FD) {
546
+ error_setg(errp, "type=inet requires remote parameter");
547
+ return -1;
548
+ }
549
+ }
550
+
551
+ switch (local->type) {
552
+ case SOCKET_ADDRESS_TYPE_INET:
553
+ if (convert_host_port(&laddr_in, local->u.inet.host, local->u.inet.port,
554
+ errp) < 0) {
555
+ return -1;
556
+ }
557
+
558
+ if (convert_host_port(&raddr_in, remote->u.inet.host,
559
+ remote->u.inet.port, errp) < 0) {
560
+ return -1;
561
+ }
562
+
563
+ fd = qemu_socket(PF_INET, SOCK_DGRAM, 0);
564
+ if (fd < 0) {
565
+ error_setg_errno(errp, errno, "can't create datagram socket");
566
+ return -1;
567
+ }
568
+
569
+ ret = socket_set_fast_reuse(fd);
570
+ if (ret < 0) {
571
+ error_setg_errno(errp, errno,
572
+ "can't set socket option SO_REUSEADDR");
573
+ closesocket(fd);
574
+ return -1;
575
+ }
576
+ ret = bind(fd, (struct sockaddr *)&laddr_in, sizeof(laddr_in));
577
+ if (ret < 0) {
578
+ error_setg_errno(errp, errno, "can't bind ip=%s to socket",
579
+ inet_ntoa(laddr_in.sin_addr));
580
+ closesocket(fd);
581
+ return -1;
582
+ }
583
+ qemu_socket_set_nonblock(fd);
584
+ break;
585
+ case SOCKET_ADDRESS_TYPE_FD:
586
+ fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp);
587
+ if (fd == -1) {
588
+ return -1;
589
+ }
590
+ ret = qemu_socket_try_set_nonblock(fd);
591
+ if (ret < 0) {
592
+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
593
+ name, fd);
594
+ return -1;
595
+ }
596
+ break;
597
+ default:
598
+ error_setg(errp, "only support inet or fd type for local");
599
+ return -1;
600
+ }
601
+
602
+ s = net_dgram_fd_init(peer, "dgram", name, fd, 0, NULL, errp);
603
+ if (!s) {
604
+ return -1;
605
+ }
606
+
607
+ if (remote) {
608
+ s->dgram_dst = raddr_in;
609
+ }
610
+
611
+ switch (local->type) {
612
+ case SOCKET_ADDRESS_TYPE_INET:
613
+ qemu_set_info_str(&s->nc, "udp=%s:%d/%s:%d",
614
+ inet_ntoa(laddr_in.sin_addr),
615
+ ntohs(laddr_in.sin_port),
616
+ inet_ntoa(raddr_in.sin_addr),
617
+ ntohs(raddr_in.sin_port));
618
+ break;
619
+ case SOCKET_ADDRESS_TYPE_FD:
620
+ qemu_set_info_str(&s->nc, "fd=%d", fd);
621
+ break;
622
+ default:
623
+ g_assert_not_reached();
624
+ }
625
+
626
+ return 0;
627
+}
628
diff --git a/net/hub.c b/net/hub.c
629
index XXXXXXX..XXXXXXX 100644
630
--- a/net/hub.c
631
+++ b/net/hub.c
632
@@ -XXX,XX +XXX,XX @@ void net_hub_check_clients(void)
633
case NET_CLIENT_DRIVER_USER:
634
case NET_CLIENT_DRIVER_TAP:
635
case NET_CLIENT_DRIVER_SOCKET:
636
+ case NET_CLIENT_DRIVER_STREAM:
637
+ case NET_CLIENT_DRIVER_DGRAM:
638
case NET_CLIENT_DRIVER_VDE:
639
case NET_CLIENT_DRIVER_VHOST_USER:
640
has_host_dev = 1;
641
diff --git a/net/meson.build b/net/meson.build
642
index XXXXXXX..XXXXXXX 100644
643
--- a/net/meson.build
644
+++ b/net/meson.build
645
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(files(
646
'net.c',
647
'queue.c',
648
'socket.c',
649
+ 'stream.c',
650
+ 'dgram.c',
651
'util.c',
652
))
653
654
diff --git a/net/net.c b/net/net.c
655
index XXXXXXX..XXXXXXX 100644
656
--- a/net/net.c
657
+++ b/net/net.c
658
@@ -XXX,XX +XXX,XX @@
659
#include "qemu/qemu-print.h"
660
#include "qemu/main-loop.h"
661
#include "qemu/option.h"
662
+#include "qemu/keyval.h"
663
#include "qapi/error.h"
664
#include "qapi/opts-visitor.h"
665
#include "sysemu/runstate.h"
666
@@ -XXX,XX +XXX,XX @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
667
#endif
668
[NET_CLIENT_DRIVER_TAP] = net_init_tap,
669
[NET_CLIENT_DRIVER_SOCKET] = net_init_socket,
670
+ [NET_CLIENT_DRIVER_STREAM] = net_init_stream,
671
+ [NET_CLIENT_DRIVER_DGRAM] = net_init_dgram,
672
#ifdef CONFIG_VDE
673
[NET_CLIENT_DRIVER_VDE] = net_init_vde,
674
#endif
675
@@ -XXX,XX +XXX,XX @@ void show_netdevs(void)
676
int idx;
677
const char *available_netdevs[] = {
678
"socket",
679
+ "stream",
680
+ "dgram",
681
"hubport",
682
"tap",
683
#ifdef CONFIG_SLIRP
684
@@ -XXX,XX +XXX,XX @@ void net_init_clients(void)
685
*/
686
bool netdev_is_modern(const char *optarg)
687
{
688
- return false;
689
+ QemuOpts *opts;
690
+ bool is_modern;
691
+ const char *type;
692
+ static QemuOptsList dummy_opts = {
693
+ .name = "netdev",
694
+ .implied_opt_name = "type",
695
+ .head = QTAILQ_HEAD_INITIALIZER(dummy_opts.head),
696
+ .desc = { { } },
697
+ };
698
+
699
+ if (optarg[0] == '{') {
700
+ /* This is JSON, which means it's modern syntax */
701
+ return true;
702
+ }
703
+
704
+ opts = qemu_opts_create(&dummy_opts, NULL, false, &error_abort);
705
+ qemu_opts_do_parse(opts, optarg, dummy_opts.implied_opt_name,
706
+ &error_abort);
707
+ type = qemu_opt_get(opts, "type");
708
+ is_modern = !g_strcmp0(type, "stream") || !g_strcmp0(type, "dgram");
709
+
710
+ qemu_opts_reset(&dummy_opts);
711
+
712
+ return is_modern;
713
}
714
715
/*
716
diff --git a/net/stream.c b/net/stream.c
717
new file mode 100644
159
new file mode 100644
718
index XXXXXXX..XXXXXXX
160
index XXXXXXX..XXXXXXX
719
--- /dev/null
161
--- /dev/null
720
+++ b/net/stream.c
162
+++ b/hw/virtio/vhost-iova-tree.h
721
@@ -XXX,XX +XXX,XX @@
163
@@ -XXX,XX +XXX,XX @@
722
+/*
164
+/*
723
+ * QEMU System Emulator
165
+ * vhost software live migration iova tree
724
+ *
166
+ *
725
+ * Copyright (c) 2003-2008 Fabrice Bellard
167
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
726
+ * Copyright (c) 2022 Red Hat, Inc.
168
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
727
+ *
169
+ *
728
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
170
+ * SPDX-License-Identifier: GPL-2.0-or-later
729
+ * of this software and associated documentation files (the "Software"), to deal
730
+ * in the Software without restriction, including without limitation the rights
731
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
732
+ * copies of the Software, and to permit persons to whom the Software is
733
+ * furnished to do so, subject to the following conditions:
734
+ *
735
+ * The above copyright notice and this permission notice shall be included in
736
+ * all copies or substantial portions of the Software.
737
+ *
738
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
739
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
740
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
741
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
742
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
743
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
744
+ * THE SOFTWARE.
745
+ */
171
+ */
746
+
172
+
747
+#include "qemu/osdep.h"
173
+#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
174
+#define HW_VIRTIO_VHOST_IOVA_TREE_H
748
+
175
+
749
+#include "net/net.h"
176
+#include "qemu/iova-tree.h"
750
+#include "clients.h"
177
+#include "exec/memory.h"
751
+#include "monitor/monitor.h"
752
+#include "qapi/error.h"
753
+#include "qemu/error-report.h"
754
+#include "qemu/option.h"
755
+#include "qemu/sockets.h"
756
+#include "qemu/iov.h"
757
+#include "qemu/main-loop.h"
758
+#include "qemu/cutils.h"
759
+
178
+
760
+typedef struct NetStreamState {
179
+typedef struct VhostIOVATree VhostIOVATree;
761
+ NetClientState nc;
762
+ int listen_fd;
763
+ int fd;
764
+ SocketReadState rs;
765
+ unsigned int send_index; /* number of bytes sent*/
766
+ bool read_poll; /* waiting to receive data? */
767
+ bool write_poll; /* waiting to transmit data? */
768
+} NetStreamState;
769
+
180
+
770
+static void net_stream_send(void *opaque);
181
+VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
771
+static void net_stream_accept(void *opaque);
182
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
772
+static void net_stream_writable(void *opaque);
183
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
773
+
184
+
774
+static void net_stream_update_fd_handler(NetStreamState *s)
185
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
775
+{
186
+ const DMAMap *map);
776
+ qemu_set_fd_handler(s->fd,
187
+int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
777
+ s->read_poll ? net_stream_send : NULL,
188
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
778
+ s->write_poll ? net_stream_writable : NULL,
779
+ s);
780
+}
781
+
189
+
782
+static void net_stream_read_poll(NetStreamState *s, bool enable)
190
+#endif
783
+{
784
+ s->read_poll = enable;
785
+ net_stream_update_fd_handler(s);
786
+}
787
+
788
+static void net_stream_write_poll(NetStreamState *s, bool enable)
789
+{
790
+ s->write_poll = enable;
791
+ net_stream_update_fd_handler(s);
792
+}
793
+
794
+static void net_stream_writable(void *opaque)
795
+{
796
+ NetStreamState *s = opaque;
797
+
798
+ net_stream_write_poll(s, false);
799
+
800
+ qemu_flush_queued_packets(&s->nc);
801
+}
802
+
803
+static ssize_t net_stream_receive(NetClientState *nc, const uint8_t *buf,
804
+ size_t size)
805
+{
806
+ NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc);
807
+ uint32_t len = htonl(size);
808
+ struct iovec iov[] = {
809
+ {
810
+ .iov_base = &len,
811
+ .iov_len = sizeof(len),
812
+ }, {
813
+ .iov_base = (void *)buf,
814
+ .iov_len = size,
815
+ },
816
+ };
817
+ size_t remaining;
818
+ ssize_t ret;
819
+
820
+ remaining = iov_size(iov, 2) - s->send_index;
821
+ ret = iov_send(s->fd, iov, 2, s->send_index, remaining);
822
+
823
+ if (ret == -1 && errno == EAGAIN) {
824
+ ret = 0; /* handled further down */
825
+ }
826
+ if (ret == -1) {
827
+ s->send_index = 0;
828
+ return -errno;
829
+ }
830
+ if (ret < (ssize_t)remaining) {
831
+ s->send_index += ret;
832
+ net_stream_write_poll(s, true);
833
+ return 0;
834
+ }
835
+ s->send_index = 0;
836
+ return size;
837
+}
838
+
839
+static void net_stream_send_completed(NetClientState *nc, ssize_t len)
840
+{
841
+ NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc);
842
+
843
+ if (!s->read_poll) {
844
+ net_stream_read_poll(s, true);
845
+ }
846
+}
847
+
848
+static void net_stream_rs_finalize(SocketReadState *rs)
849
+{
850
+ NetStreamState *s = container_of(rs, NetStreamState, rs);
851
+
852
+ if (qemu_send_packet_async(&s->nc, rs->buf,
853
+ rs->packet_len,
854
+ net_stream_send_completed) == 0) {
855
+ net_stream_read_poll(s, false);
856
+ }
857
+}
858
+
859
+static void net_stream_send(void *opaque)
860
+{
861
+ NetStreamState *s = opaque;
862
+ int size;
863
+ int ret;
864
+ uint8_t buf1[NET_BUFSIZE];
865
+ const uint8_t *buf;
866
+
867
+ size = recv(s->fd, buf1, sizeof(buf1), 0);
868
+ if (size < 0) {
869
+ if (errno != EWOULDBLOCK) {
870
+ goto eoc;
871
+ }
872
+ } else if (size == 0) {
873
+ /* end of connection */
874
+ eoc:
875
+ net_stream_read_poll(s, false);
876
+ net_stream_write_poll(s, false);
877
+ if (s->listen_fd != -1) {
878
+ qemu_set_fd_handler(s->listen_fd, net_stream_accept, NULL, s);
879
+ }
880
+ closesocket(s->fd);
881
+
882
+ s->fd = -1;
883
+ net_socket_rs_init(&s->rs, net_stream_rs_finalize, false);
884
+ s->nc.link_down = true;
885
+ qemu_set_info_str(&s->nc, "");
886
+
887
+ return;
888
+ }
889
+ buf = buf1;
890
+
891
+ ret = net_fill_rstate(&s->rs, buf, size);
892
+
893
+ if (ret == -1) {
894
+ goto eoc;
895
+ }
896
+}
897
+
898
+static void net_stream_cleanup(NetClientState *nc)
899
+{
900
+ NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc);
901
+ if (s->fd != -1) {
902
+ net_stream_read_poll(s, false);
903
+ net_stream_write_poll(s, false);
904
+ close(s->fd);
905
+ s->fd = -1;
906
+ }
907
+ if (s->listen_fd != -1) {
908
+ qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL);
909
+ closesocket(s->listen_fd);
910
+ s->listen_fd = -1;
911
+ }
912
+}
913
+
914
+static void net_stream_connect(void *opaque)
915
+{
916
+ NetStreamState *s = opaque;
917
+ net_stream_read_poll(s, true);
918
+}
919
+
920
+static NetClientInfo net_stream_info = {
921
+ .type = NET_CLIENT_DRIVER_STREAM,
922
+ .size = sizeof(NetStreamState),
923
+ .receive = net_stream_receive,
924
+ .cleanup = net_stream_cleanup,
925
+};
926
+
927
+static NetStreamState *net_stream_fd_init(NetClientState *peer,
928
+ const char *model,
929
+ const char *name,
930
+ int fd, int is_connected)
931
+{
932
+ NetClientState *nc;
933
+ NetStreamState *s;
934
+
935
+ nc = qemu_new_net_client(&net_stream_info, peer, model, name);
936
+
937
+ qemu_set_info_str(nc, "fd=%d", fd);
938
+
939
+ s = DO_UPCAST(NetStreamState, nc, nc);
940
+
941
+ s->fd = fd;
942
+ s->listen_fd = -1;
943
+ net_socket_rs_init(&s->rs, net_stream_rs_finalize, false);
944
+
945
+ /* Disable Nagle algorithm on TCP sockets to reduce latency */
946
+ socket_set_nodelay(fd);
947
+
948
+ if (is_connected) {
949
+ net_stream_connect(s);
950
+ } else {
951
+ qemu_set_fd_handler(s->fd, NULL, net_stream_connect, s);
952
+ }
953
+ return s;
954
+}
955
+
956
+static void net_stream_accept(void *opaque)
957
+{
958
+ NetStreamState *s = opaque;
959
+ struct sockaddr_in saddr;
960
+ socklen_t len;
961
+ int fd;
962
+
963
+ for (;;) {
964
+ len = sizeof(saddr);
965
+ fd = qemu_accept(s->listen_fd, (struct sockaddr *)&saddr, &len);
966
+ if (fd < 0 && errno != EINTR) {
967
+ return;
968
+ } else if (fd >= 0) {
969
+ qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL);
970
+ break;
971
+ }
972
+ }
973
+
974
+ s->fd = fd;
975
+ s->nc.link_down = false;
976
+ net_stream_connect(s);
977
+ qemu_set_info_str(&s->nc, "connection from %s:%d",
978
+ inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
979
+}
980
+
981
+static int net_stream_server_init(NetClientState *peer,
982
+ const char *model,
983
+ const char *name,
984
+ SocketAddress *addr,
985
+ Error **errp)
986
+{
987
+ NetClientState *nc;
988
+ NetStreamState *s;
989
+ int fd, ret;
990
+
991
+ switch (addr->type) {
992
+ case SOCKET_ADDRESS_TYPE_INET: {
993
+ struct sockaddr_in saddr_in;
994
+
995
+ if (convert_host_port(&saddr_in, addr->u.inet.host, addr->u.inet.port,
996
+ errp) < 0) {
997
+ return -1;
998
+ }
999
+
1000
+ fd = qemu_socket(PF_INET, SOCK_STREAM, 0);
1001
+ if (fd < 0) {
1002
+ error_setg_errno(errp, errno, "can't create stream socket");
1003
+ return -1;
1004
+ }
1005
+ qemu_socket_set_nonblock(fd);
1006
+
1007
+ socket_set_fast_reuse(fd);
1008
+
1009
+ ret = bind(fd, (struct sockaddr *)&saddr_in, sizeof(saddr_in));
1010
+ if (ret < 0) {
1011
+ error_setg_errno(errp, errno, "can't bind ip=%s to socket",
1012
+ inet_ntoa(saddr_in.sin_addr));
1013
+ closesocket(fd);
1014
+ return -1;
1015
+ }
1016
+ break;
1017
+ }
1018
+ case SOCKET_ADDRESS_TYPE_FD:
1019
+ fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp);
1020
+ if (fd == -1) {
1021
+ return -1;
1022
+ }
1023
+ ret = qemu_socket_try_set_nonblock(fd);
1024
+ if (ret < 0) {
1025
+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
1026
+ name, fd);
1027
+ return -1;
1028
+ }
1029
+ break;
1030
+ default:
1031
+ error_setg(errp, "only support inet or fd type");
1032
+ return -1;
1033
+ }
1034
+
1035
+ ret = listen(fd, 0);
1036
+ if (ret < 0) {
1037
+ error_setg_errno(errp, errno, "can't listen on socket");
1038
+ closesocket(fd);
1039
+ return -1;
1040
+ }
1041
+
1042
+ nc = qemu_new_net_client(&net_stream_info, peer, model, name);
1043
+ s = DO_UPCAST(NetStreamState, nc, nc);
1044
+ s->fd = -1;
1045
+ s->listen_fd = fd;
1046
+ s->nc.link_down = true;
1047
+ net_socket_rs_init(&s->rs, net_stream_rs_finalize, false);
1048
+
1049
+ qemu_set_fd_handler(s->listen_fd, net_stream_accept, NULL, s);
1050
+ return 0;
1051
+}
1052
+
1053
+static int net_stream_client_init(NetClientState *peer,
1054
+ const char *model,
1055
+ const char *name,
1056
+ SocketAddress *addr,
1057
+ Error **errp)
1058
+{
1059
+ NetStreamState *s;
1060
+ struct sockaddr_in saddr_in;
1061
+ int fd, connected, ret;
1062
+
1063
+ switch (addr->type) {
1064
+ case SOCKET_ADDRESS_TYPE_INET:
1065
+ if (convert_host_port(&saddr_in, addr->u.inet.host, addr->u.inet.port,
1066
+ errp) < 0) {
1067
+ return -1;
1068
+ }
1069
+
1070
+ fd = qemu_socket(PF_INET, SOCK_STREAM, 0);
1071
+ if (fd < 0) {
1072
+ error_setg_errno(errp, errno, "can't create stream socket");
1073
+ return -1;
1074
+ }
1075
+ qemu_socket_set_nonblock(fd);
1076
+
1077
+ connected = 0;
1078
+ for (;;) {
1079
+ ret = connect(fd, (struct sockaddr *)&saddr_in, sizeof(saddr_in));
1080
+ if (ret < 0) {
1081
+ if (errno == EINTR || errno == EWOULDBLOCK) {
1082
+ /* continue */
1083
+ } else if (errno == EINPROGRESS ||
1084
+ errno == EALREADY ||
1085
+ errno == EINVAL) {
1086
+ break;
1087
+ } else {
1088
+ error_setg_errno(errp, errno, "can't connect socket");
1089
+ closesocket(fd);
1090
+ return -1;
1091
+ }
1092
+ } else {
1093
+ connected = 1;
1094
+ break;
1095
+ }
1096
+ }
1097
+ break;
1098
+ case SOCKET_ADDRESS_TYPE_FD:
1099
+ fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp);
1100
+ if (fd == -1) {
1101
+ return -1;
1102
+ }
1103
+ ret = qemu_socket_try_set_nonblock(fd);
1104
+ if (ret < 0) {
1105
+ error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
1106
+ name, fd);
1107
+ return -1;
1108
+ }
1109
+ connected = 1;
1110
+ break;
1111
+ default:
1112
+ error_setg(errp, "only support inet or fd type");
1113
+ return -1;
1114
+ }
1115
+
1116
+ s = net_stream_fd_init(peer, model, name, fd, connected);
1117
+
1118
+ switch (addr->type) {
1119
+ case SOCKET_ADDRESS_TYPE_INET:
1120
+ qemu_set_info_str(&s->nc, "connect to %s:%d",
1121
+ inet_ntoa(saddr_in.sin_addr),
1122
+ ntohs(saddr_in.sin_port));
1123
+ break;
1124
+ case SOCKET_ADDRESS_TYPE_FD:
1125
+ qemu_set_info_str(&s->nc, "connect to fd %d", fd);
1126
+ break;
1127
+ default:
1128
+ g_assert_not_reached();
1129
+ }
1130
+
1131
+ return 0;
1132
+}
1133
+
1134
+int net_init_stream(const Netdev *netdev, const char *name,
1135
+ NetClientState *peer, Error **errp)
1136
+{
1137
+ const NetdevStreamOptions *sock;
1138
+
1139
+ assert(netdev->type == NET_CLIENT_DRIVER_STREAM);
1140
+ sock = &netdev->u.stream;
1141
+
1142
+ if (!sock->has_server || !sock->server) {
1143
+ return net_stream_client_init(peer, "stream", name, sock->addr, errp);
1144
+ }
1145
+ return net_stream_server_init(peer, "stream", name, sock->addr, errp);
1146
+}
1147
diff --git a/qapi/net.json b/qapi/net.json
1148
index XXXXXXX..XXXXXXX 100644
1149
--- a/qapi/net.json
1150
+++ b/qapi/net.json
1151
@@ -XXX,XX +XXX,XX @@
1152
##
1153
1154
{ 'include': 'common.json' }
1155
+{ 'include': 'sockets.json' }
1156
1157
##
1158
# @set_link:
1159
@@ -XXX,XX +XXX,XX @@
1160
'if': 'CONFIG_VMNET' }
1161
1162
##
1163
+# @NetdevStreamOptions:
1164
+#
1165
+# Configuration info for stream socket netdev
1166
+#
1167
+# @addr: socket address to listen on (server=true)
1168
+# or connect to (server=false)
1169
+# @server: create server socket (default: false)
1170
+#
1171
+# Only SocketAddress types 'inet' and 'fd' are supported.
1172
+#
1173
+# Since: 7.2
1174
+##
1175
+{ 'struct': 'NetdevStreamOptions',
1176
+ 'data': {
1177
+ 'addr': 'SocketAddress',
1178
+ '*server': 'bool' } }
1179
+
1180
+##
1181
+# @NetdevDgramOptions:
1182
+#
1183
+# Configuration info for datagram socket netdev.
1184
+#
1185
+# @remote: remote address
1186
+# @local: local address
1187
+#
1188
+# Only SocketAddress types 'inet' and 'fd' are supported.
1189
+#
1190
+# If remote address is present and it's a multicast address, local address
1191
+# is optional. Otherwise local address is required and remote address is
1192
+# optional.
1193
+#
1194
+# .. table:: Valid parameters combination table
1195
+# :widths: auto
1196
+#
1197
+# ============= ======== =====
1198
+# remote local okay?
1199
+# ============= ======== =====
1200
+# absent absent no
1201
+# absent not fd no
1202
+# absent fd yes
1203
+# multicast absent yes
1204
+# multicast present yes
1205
+# not multicast absent no
1206
+# not multicast present yes
1207
+# ============= ======== =====
1208
+#
1209
+# Since: 7.2
1210
+##
1211
+{ 'struct': 'NetdevDgramOptions',
1212
+ 'data': {
1213
+ '*local': 'SocketAddress',
1214
+ '*remote': 'SocketAddress' } }
1215
+
1216
+##
1217
# @NetClientDriver:
1218
#
1219
# Available netdev drivers.
1220
@@ -XXX,XX +XXX,XX @@
1221
# @vmnet-host since 7.1
1222
# @vmnet-shared since 7.1
1223
# @vmnet-bridged since 7.1
1224
+# @stream since 7.2
1225
+# @dgram since 7.2
1226
##
1227
{ 'enum': 'NetClientDriver',
1228
- 'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
1229
- 'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa',
1230
+ 'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'stream',
1231
+ 'dgram', 'vde', 'bridge', 'hubport', 'netmap', 'vhost-user',
1232
+ 'vhost-vdpa',
1233
{ 'name': 'vmnet-host', 'if': 'CONFIG_VMNET' },
1234
{ 'name': 'vmnet-shared', 'if': 'CONFIG_VMNET' },
1235
{ 'name': 'vmnet-bridged', 'if': 'CONFIG_VMNET' }] }
1236
@@ -XXX,XX +XXX,XX @@
1237
# 'vmnet-host' - since 7.1
1238
# 'vmnet-shared' - since 7.1
1239
# 'vmnet-bridged' - since 7.1
1240
+# 'stream' since 7.2
1241
+# 'dgram' since 7.2
1242
##
1243
{ 'union': 'Netdev',
1244
'base': { 'id': 'str', 'type': 'NetClientDriver' },
1245
@@ -XXX,XX +XXX,XX @@
1246
'tap': 'NetdevTapOptions',
1247
'l2tpv3': 'NetdevL2TPv3Options',
1248
'socket': 'NetdevSocketOptions',
1249
+ 'stream': 'NetdevStreamOptions',
1250
+ 'dgram': 'NetdevDgramOptions',
1251
'vde': 'NetdevVdeOptions',
1252
'bridge': 'NetdevBridgeOptions',
1253
'hubport': 'NetdevHubPortOptions',
1254
diff --git a/qemu-options.hx b/qemu-options.hx
1255
index XXXXXXX..XXXXXXX 100644
1256
--- a/qemu-options.hx
1257
+++ b/qemu-options.hx
1258
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
1259
"-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n"
1260
" configure a network backend to connect to another network\n"
1261
" using an UDP tunnel\n"
1262
+ "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port\n"
1263
+ "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n"
1264
+ " configure a network backend to connect to another network\n"
1265
+ " using a socket connection in stream mode.\n"
1266
+ "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n"
1267
+ "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=fd,local.str=file-descriptor]\n"
1268
+ " configure a network backend to connect to a multicast maddr and port\n"
1269
+ " use ``local.host=addr`` to specify the host address to send packets from\n"
1270
+ "-netdev dgram,id=str,local.type=inet,local.host=addr,local.port=port[,remote.type=inet,remote.host=addr,remote.port=port]\n"
1271
+ "-netdev dgram,id=str,local.type=fd,local.str=file-descriptor\n"
1272
+ " configure a network backend to connect to another network\n"
1273
+ " using an UDP tunnel\n"
1274
#ifdef CONFIG_VDE
1275
"-netdev vde,id=str[,sock=socketpath][,port=n][,group=groupname][,mode=octalmode]\n"
1276
" configure a network backend to connect to port 'n' of a vde switch\n"
1277
--
191
--
1278
2.7.4
192
2.7.4
193
194
diff view generated by jsdifflib
1
From: Laurent Vivier <lvivier@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
3
Use translations added in VhostIOVATree in SVQ.
4
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
4
5
Only introduce usage here, not allocation and deallocation. As with
6
previous patches, we use the dead code paths of shadow_vqs_enabled to
7
avoid commiting too many changes at once. These are impossible to take
8
at the moment.
9
10
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
5
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Acked-by: Michael S. Tsirkin <mst@redhat.com>
6
Acked-by: Markus Armbruster <armbru@redhat.com> (QAPI schema)
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
13
---
10
net/stream.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
14
hw/virtio/vhost-shadow-virtqueue.c | 86 +++++++++++++++++++++++---
11
qapi/net.json | 2 +-
15
hw/virtio/vhost-shadow-virtqueue.h | 6 +-
12
qemu-options.hx | 1 +
16
hw/virtio/vhost-vdpa.c | 122 +++++++++++++++++++++++++++++++------
13
3 files changed, 104 insertions(+), 6 deletions(-)
17
include/hw/virtio/vhost-vdpa.h | 3 +
14
18
4 files changed, 187 insertions(+), 30 deletions(-)
15
diff --git a/net/stream.c b/net/stream.c
19
20
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
16
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
17
--- a/net/stream.c
22
--- a/hw/virtio/vhost-shadow-virtqueue.c
18
+++ b/net/stream.c
23
+++ b/hw/virtio/vhost-shadow-virtqueue.c
19
@@ -XXX,XX +XXX,XX @@ static NetStreamState *net_stream_fd_init(NetClientState *peer,
24
@@ -XXX,XX +XXX,XX @@ static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
20
static void net_stream_accept(void *opaque)
25
return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
21
{
22
NetStreamState *s = opaque;
23
- struct sockaddr_in saddr;
24
+ struct sockaddr_storage saddr;
25
socklen_t len;
26
int fd;
27
28
@@ -XXX,XX +XXX,XX @@ static void net_stream_accept(void *opaque)
29
s->fd = fd;
30
s->nc.link_down = false;
31
net_stream_connect(s);
32
- qemu_set_info_str(&s->nc, "connection from %s:%d",
33
- inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
34
+ switch (saddr.ss_family) {
35
+ case AF_INET: {
36
+ struct sockaddr_in *saddr_in = (struct sockaddr_in *)&saddr;
37
+
38
+ qemu_set_info_str(&s->nc, "connection from %s:%d",
39
+ inet_ntoa(saddr_in->sin_addr),
40
+ ntohs(saddr_in->sin_port));
41
+ break;
42
+ }
43
+ case AF_UNIX: {
44
+ struct sockaddr_un saddr_un;
45
+
46
+ len = sizeof(saddr_un);
47
+ getsockname(s->listen_fd, (struct sockaddr *)&saddr_un, &len);
48
+ qemu_set_info_str(&s->nc, "connect from %s", saddr_un.sun_path);
49
+ break;
50
+ }
51
+ default:
52
+ g_assert_not_reached();
53
+ }
54
}
26
}
55
27
56
static int net_stream_server_init(NetClientState *peer,
28
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
57
@@ -XXX,XX +XXX,XX @@ static int net_stream_server_init(NetClientState *peer,
29
+/**
30
+ * Translate addresses between the qemu's virtual address and the SVQ IOVA
31
+ *
32
+ * @svq: Shadow VirtQueue
33
+ * @vaddr: Translated IOVA addresses
34
+ * @iovec: Source qemu's VA addresses
35
+ * @num: Length of iovec and minimum length of vaddr
36
+ */
37
+static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
38
+ hwaddr *addrs, const struct iovec *iovec,
39
+ size_t num)
40
+{
41
+ if (num == 0) {
42
+ return true;
43
+ }
44
+
45
+ for (size_t i = 0; i < num; ++i) {
46
+ DMAMap needle = {
47
+ .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
48
+ .size = iovec[i].iov_len,
49
+ };
50
+ Int128 needle_last, map_last;
51
+ size_t off;
52
+
53
+ const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
54
+ /*
55
+ * Map cannot be NULL since iova map contains all guest space and
56
+ * qemu already has a physical address mapped
57
+ */
58
+ if (unlikely(!map)) {
59
+ qemu_log_mask(LOG_GUEST_ERROR,
60
+ "Invalid address 0x%"HWADDR_PRIx" given by guest",
61
+ needle.translated_addr);
62
+ return false;
63
+ }
64
+
65
+ off = needle.translated_addr - map->translated_addr;
66
+ addrs[i] = map->iova + off;
67
+
68
+ needle_last = int128_add(int128_make64(needle.translated_addr),
69
+ int128_make64(iovec[i].iov_len));
70
+ map_last = int128_make64(map->translated_addr + map->size);
71
+ if (unlikely(int128_gt(needle_last, map_last))) {
72
+ qemu_log_mask(LOG_GUEST_ERROR,
73
+ "Guest buffer expands over iova range");
74
+ return false;
75
+ }
76
+ }
77
+
78
+ return true;
79
+}
80
+
81
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
82
const struct iovec *iovec, size_t num,
83
bool more_descs, bool write)
84
{
85
@@ -XXX,XX +XXX,XX @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
86
} else {
87
descs[i].flags = flags;
58
}
88
}
59
break;
89
- descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
90
+ descs[i].addr = cpu_to_le64(sg[n]);
91
descs[i].len = cpu_to_le32(iovec[n].iov_len);
92
93
last = i;
94
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
95
{
96
unsigned avail_idx;
97
vring_avail_t *avail = svq->vring.avail;
98
+ bool ok;
99
+ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
100
101
*head = svq->free_head;
102
103
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
104
return false;
60
}
105
}
61
+ case SOCKET_ADDRESS_TYPE_UNIX: {
106
62
+ struct sockaddr_un saddr_un;
107
- vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
63
+
108
- false);
64
+ ret = unlink(addr->u.q_unix.path);
109
- vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
65
+ if (ret < 0 && errno != ENOENT) {
110
+ ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
66
+ error_setg_errno(errp, errno, "failed to unlink socket %s",
111
+ if (unlikely(!ok)) {
67
+ addr->u.q_unix.path);
112
+ return false;
68
+ return -1;
113
+ }
114
+ vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
115
+ elem->in_num > 0, false);
116
+
117
+
118
+ ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
119
+ if (unlikely(!ok)) {
120
+ return false;
121
+ }
122
+
123
+ vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
124
125
/*
126
* Put the entry in the available array (but don't update avail->idx until
127
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
128
void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
129
struct vhost_vring_addr *addr)
130
{
131
- addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
132
- addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
133
- addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
134
+ addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc;
135
+ addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail;
136
+ addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used;
137
}
138
139
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
140
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
141
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
142
* shadow methods and file descriptors.
143
*
144
+ * @iova_tree: Tree to perform descriptors translations
145
+ *
146
* Returns the new virtqueue or NULL.
147
*
148
* In case of error, reason is reported through error_report.
149
*/
150
-VhostShadowVirtqueue *vhost_svq_new(void)
151
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
152
{
153
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
154
int r;
155
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
156
157
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
158
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
159
+ svq->iova_tree = iova_tree;
160
return g_steal_pointer(&svq);
161
162
err_init_hdev_call:
163
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
164
index XXXXXXX..XXXXXXX 100644
165
--- a/hw/virtio/vhost-shadow-virtqueue.h
166
+++ b/hw/virtio/vhost-shadow-virtqueue.h
167
@@ -XXX,XX +XXX,XX @@
168
#include "qemu/event_notifier.h"
169
#include "hw/virtio/virtio.h"
170
#include "standard-headers/linux/vhost_types.h"
171
+#include "hw/virtio/vhost-iova-tree.h"
172
173
/* Shadow virtqueue to relay notifications */
174
typedef struct VhostShadowVirtqueue {
175
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
176
/* Virtio device */
177
VirtIODevice *vdev;
178
179
+ /* IOVA mapping */
180
+ VhostIOVATree *iova_tree;
181
+
182
/* Map for use the guest's descriptors */
183
VirtQueueElement **ring_id_maps;
184
185
@@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
186
VirtQueue *vq);
187
void vhost_svq_stop(VhostShadowVirtqueue *svq);
188
189
-VhostShadowVirtqueue *vhost_svq_new(void);
190
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
191
192
void vhost_svq_free(gpointer vq);
193
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
194
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
195
index XXXXXXX..XXXXXXX 100644
196
--- a/hw/virtio/vhost-vdpa.c
197
+++ b/hw/virtio/vhost-vdpa.c
198
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
199
vaddr, section->readonly);
200
201
llsize = int128_sub(llend, int128_make64(iova));
202
+ if (v->shadow_vqs_enabled) {
203
+ DMAMap mem_region = {
204
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
205
+ .size = int128_get64(llsize) - 1,
206
+ .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
207
+ };
208
+
209
+ int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
210
+ if (unlikely(r != IOVA_OK)) {
211
+ error_report("Can't allocate a mapping (%d)", r);
212
+ goto fail;
69
+ }
213
+ }
70
+
214
+
71
+ saddr_un.sun_family = PF_UNIX;
215
+ iova = mem_region.iova;
72
+ ret = snprintf(saddr_un.sun_path, sizeof(saddr_un.sun_path), "%s",
216
+ }
73
+ addr->u.q_unix.path);
217
74
+ if (ret < 0 || ret >= sizeof(saddr_un.sun_path)) {
218
vhost_vdpa_iotlb_batch_begin_once(v);
75
+ error_setg(errp, "UNIX socket path '%s' is too long",
219
ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
76
+ addr->u.q_unix.path);
220
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
77
+ error_append_hint(errp, "Path must be less than %zu bytes\n",
221
78
+ sizeof(saddr_un.sun_path));
222
llsize = int128_sub(llend, int128_make64(iova));
79
+ return -1;
223
80
+ }
224
+ if (v->shadow_vqs_enabled) {
81
+
225
+ const DMAMap *result;
82
+ fd = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
226
+ const void *vaddr = memory_region_get_ram_ptr(section->mr) +
83
+ if (fd < 0) {
227
+ section->offset_within_region +
84
+ error_setg_errno(errp, errno, "can't create stream socket");
228
+ (iova - section->offset_within_address_space);
85
+ return -1;
229
+ DMAMap mem_region = {
86
+ }
230
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
87
+ qemu_socket_set_nonblock(fd);
231
+ .size = int128_get64(llsize) - 1,
88
+
232
+ };
89
+ ret = bind(fd, (struct sockaddr *)&saddr_un, sizeof(saddr_un));
233
+
90
+ if (ret < 0) {
234
+ result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
91
+ error_setg_errno(errp, errno, "can't create socket with path: %s",
235
+ iova = result->iova;
92
+ saddr_un.sun_path);
236
+ vhost_iova_tree_remove(v->iova_tree, &mem_region);
93
+ closesocket(fd);
237
+ }
94
+ return -1;
238
vhost_vdpa_iotlb_batch_begin_once(v);
95
+ }
239
ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
96
+ break;
240
if (ret) {
97
+ }
241
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
98
case SOCKET_ADDRESS_TYPE_FD:
242
99
fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp);
243
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
100
if (fd == -1) {
244
for (unsigned n = 0; n < hdev->nvqs; ++n) {
101
@@ -XXX,XX +XXX,XX @@ static int net_stream_client_init(NetClientState *peer,
245
- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
102
{
246
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
103
NetStreamState *s;
247
104
struct sockaddr_in saddr_in;
248
if (unlikely(!svq)) {
105
+ struct sockaddr_un saddr_un;
249
error_setg(errp, "Cannot create svq %u", n);
106
int fd, connected, ret;
250
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
107
251
/**
108
switch (addr->type) {
252
* Unmap a SVQ area in the device
109
@@ -XXX,XX +XXX,XX @@ static int net_stream_client_init(NetClientState *peer,
253
*/
110
}
254
-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
111
}
255
- hwaddr size)
112
break;
256
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
113
+ case SOCKET_ADDRESS_TYPE_UNIX:
257
+ const DMAMap *needle)
114
+ saddr_un.sun_family = PF_UNIX;
258
{
115
+ ret = snprintf(saddr_un.sun_path, sizeof(saddr_un.sun_path), "%s",
259
+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
116
+ addr->u.q_unix.path);
260
+ hwaddr size;
117
+ if (ret < 0 || ret >= sizeof(saddr_un.sun_path)) {
261
int r;
118
+ error_setg(errp, "UNIX socket path '%s' is too long",
262
119
+ addr->u.q_unix.path);
263
- size = ROUND_UP(size, qemu_real_host_page_size);
120
+ error_append_hint(errp, "Path must be less than %zu bytes\n",
264
- r = vhost_vdpa_dma_unmap(v, iova, size);
121
+ sizeof(saddr_un.sun_path));
265
+ if (unlikely(!result)) {
122
+ return -1;
266
+ error_report("Unable to find SVQ address to unmap");
123
+ }
267
+ return false;
124
+
268
+ }
125
+ fd = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
269
+
126
+ if (fd < 0) {
270
+ size = ROUND_UP(result->size, qemu_real_host_page_size);
127
+ error_setg_errno(errp, errno, "can't create stream socket");
271
+ r = vhost_vdpa_dma_unmap(v, result->iova, size);
128
+ return -1;
272
return r == 0;
129
+ }
273
}
130
+ qemu_socket_set_nonblock(fd);
274
131
+
275
static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
132
+ connected = 0;
276
const VhostShadowVirtqueue *svq)
133
+ for (;;) {
277
{
134
+ ret = connect(fd, (struct sockaddr *)&saddr_un, sizeof(saddr_un));
278
+ DMAMap needle = {};
135
+ if (ret < 0) {
279
struct vhost_vdpa *v = dev->opaque;
136
+ if (errno == EINTR || errno == EWOULDBLOCK) {
280
struct vhost_vring_addr svq_addr;
137
+ /* continue */
281
- size_t device_size = vhost_svq_device_area_size(svq);
138
+ } else if (errno == EAGAIN ||
282
- size_t driver_size = vhost_svq_driver_area_size(svq);
139
+ errno == EALREADY) {
283
bool ok;
140
+ break;
284
141
+ } else {
285
vhost_svq_get_vring_addr(svq, &svq_addr);
142
+ error_setg_errno(errp, errno, "can't connect socket");
286
143
+ closesocket(fd);
287
- ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
144
+ return -1;
288
+ needle.translated_addr = svq_addr.desc_user_addr;
145
+ }
289
+ ok = vhost_vdpa_svq_unmap_ring(v, &needle);
146
+ } else {
290
if (unlikely(!ok)) {
147
+ connected = 1;
291
return false;
148
+ break;
149
+ }
150
+ }
151
+ break;
152
case SOCKET_ADDRESS_TYPE_FD:
153
fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp);
154
if (fd == -1) {
155
@@ -XXX,XX +XXX,XX @@ static int net_stream_client_init(NetClientState *peer,
156
connected = 1;
157
break;
158
default:
159
- error_setg(errp, "only support inet or fd type");
160
+ error_setg(errp, "only support inet, unix or fd type");
161
return -1;
162
}
292
}
163
293
164
@@ -XXX,XX +XXX,XX @@ static int net_stream_client_init(NetClientState *peer,
294
- return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
165
inet_ntoa(saddr_in.sin_addr),
295
+ needle.translated_addr = svq_addr.used_user_addr;
166
ntohs(saddr_in.sin_port));
296
+ return vhost_vdpa_svq_unmap_ring(v, &needle);
167
break;
297
+}
168
+ case SOCKET_ADDRESS_TYPE_UNIX:
298
+
169
+ qemu_set_info_str(&s->nc, " connect to %s", saddr_un.sun_path);
299
+/**
170
+ break;
300
+ * Map the SVQ area in the device
171
case SOCKET_ADDRESS_TYPE_FD:
301
+ *
172
qemu_set_info_str(&s->nc, "connect to fd %d", fd);
302
+ * @v: Vhost-vdpa device
173
break;
303
+ * @needle: The area to search iova
174
default:
304
+ * @errorp: Error pointer
175
g_assert_not_reached();
305
+ */
306
+static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
307
+ Error **errp)
308
+{
309
+ int r;
310
+
311
+ r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
312
+ if (unlikely(r != IOVA_OK)) {
313
+ error_setg(errp, "Cannot allocate iova (%d)", r);
314
+ return false;
315
+ }
316
+
317
+ r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1,
318
+ (void *)(uintptr_t)needle->translated_addr,
319
+ needle->perm == IOMMU_RO);
320
+ if (unlikely(r != 0)) {
321
+ error_setg_errno(errp, -r, "Cannot map region to device");
322
+ vhost_iova_tree_remove(v->iova_tree, needle);
323
+ }
324
+
325
+ return r == 0;
326
}
327
328
/**
329
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
330
struct vhost_vring_addr *addr,
331
Error **errp)
332
{
333
+ DMAMap device_region, driver_region;
334
+ struct vhost_vring_addr svq_addr;
335
struct vhost_vdpa *v = dev->opaque;
336
size_t device_size = vhost_svq_device_area_size(svq);
337
size_t driver_size = vhost_svq_driver_area_size(svq);
338
- int r;
339
+ size_t avail_offset;
340
+ bool ok;
341
342
ERRP_GUARD();
343
- vhost_svq_get_vring_addr(svq, addr);
344
+ vhost_svq_get_vring_addr(svq, &svq_addr);
345
346
- r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
347
- (void *)(uintptr_t)addr->desc_user_addr, true);
348
- if (unlikely(r != 0)) {
349
- error_setg_errno(errp, -r, "Cannot create vq driver region: ");
350
+ driver_region = (DMAMap) {
351
+ .translated_addr = svq_addr.desc_user_addr,
352
+ .size = driver_size - 1,
353
+ .perm = IOMMU_RO,
354
+ };
355
+ ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
356
+ if (unlikely(!ok)) {
357
+ error_prepend(errp, "Cannot create vq driver region: ");
358
return false;
176
}
359
}
177
-
360
+ addr->desc_user_addr = driver_region.iova;
178
return 0;
361
+ avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
362
+ addr->avail_user_addr = driver_region.iova + avail_offset;
363
364
- r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
365
- (void *)(intptr_t)addr->used_user_addr, false);
366
- if (unlikely(r != 0)) {
367
- error_setg_errno(errp, -r, "Cannot create vq device region: ");
368
+ device_region = (DMAMap) {
369
+ .translated_addr = svq_addr.used_user_addr,
370
+ .size = device_size - 1,
371
+ .perm = IOMMU_RW,
372
+ };
373
+ ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
374
+ if (unlikely(!ok)) {
375
+ error_prepend(errp, "Cannot create vq device region: ");
376
+ vhost_vdpa_svq_unmap_ring(v, &driver_region);
377
}
378
+ addr->used_user_addr = device_region.iova;
379
380
- return r == 0;
381
+ return ok;
179
}
382
}
180
383
181
diff --git a/qapi/net.json b/qapi/net.json
384
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
385
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
182
index XXXXXXX..XXXXXXX 100644
386
index XXXXXXX..XXXXXXX 100644
183
--- a/qapi/net.json
387
--- a/include/hw/virtio/vhost-vdpa.h
184
+++ b/qapi/net.json
388
+++ b/include/hw/virtio/vhost-vdpa.h
185
@@ -XXX,XX +XXX,XX @@
389
@@ -XXX,XX +XXX,XX @@
186
# or connect to (server=false)
390
187
# @server: create server socket (default: false)
391
#include <gmodule.h>
188
#
392
189
-# Only SocketAddress types 'inet' and 'fd' are supported.
393
+#include "hw/virtio/vhost-iova-tree.h"
190
+# Only SocketAddress types 'unix', 'inet' and 'fd' are supported.
394
#include "hw/virtio/virtio.h"
191
#
395
#include "standard-headers/linux/vhost_types.h"
192
# Since: 7.2
396
193
##
397
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
194
diff --git a/qemu-options.hx b/qemu-options.hx
398
MemoryListener listener;
195
index XXXXXXX..XXXXXXX 100644
399
struct vhost_vdpa_iova_range iova_range;
196
--- a/qemu-options.hx
400
bool shadow_vqs_enabled;
197
+++ b/qemu-options.hx
401
+ /* IOVA mapping used by the Shadow Virtqueue */
198
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
402
+ VhostIOVATree *iova_tree;
199
" configure a network backend to connect to another network\n"
403
GPtrArray *shadow_vqs;
200
" using an UDP tunnel\n"
404
struct vhost_dev *dev;
201
"-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port\n"
405
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
202
+ "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path\n"
203
"-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n"
204
" configure a network backend to connect to another network\n"
205
" using a socket connection in stream mode.\n"
206
--
406
--
207
2.7.4
407
2.7.4
208
408
209
409
diff view generated by jsdifflib
1
From: Eugenio Pérez <eperezma@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
This entry was duplicated on referenced commit. Removing it.
3
This is needed to achieve migration, so the destination can restore its
4
index.
4
5
5
Fixes: 402378407dbd ("vhost-vdpa: multiqueue support")
6
Setting base as last used idx, so destination will see as available all
7
the entries that the device did not use, including the in-flight
8
processing ones.
9
10
This is ok for networking, but other kinds of devices might have
11
problems with these retransmissions.
12
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Acked-by: Jason Wang <jasowang@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
16
---
10
net/vhost-vdpa.c | 1 -
17
hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
11
1 file changed, 1 deletion(-)
18
1 file changed, 17 insertions(+)
12
19
13
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
20
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
14
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
15
--- a/net/vhost-vdpa.c
22
--- a/hw/virtio/vhost-vdpa.c
16
+++ b/net/vhost-vdpa.c
23
+++ b/hw/virtio/vhost-vdpa.c
17
@@ -XXX,XX +XXX,XX @@ const int vdpa_feature_bits[] = {
24
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
18
VIRTIO_NET_F_CTRL_RX,
25
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
19
VIRTIO_NET_F_CTRL_RX_EXTRA,
26
struct vhost_vring_state *ring)
20
VIRTIO_NET_F_CTRL_VLAN,
27
{
21
- VIRTIO_NET_F_GUEST_ANNOUNCE,
28
+ struct vhost_vdpa *v = dev->opaque;
22
VIRTIO_NET_F_CTRL_MAC_ADDR,
29
int ret;
23
VIRTIO_NET_F_RSS,
30
24
VIRTIO_NET_F_MQ,
31
+ if (v->shadow_vqs_enabled) {
32
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
33
+ ring->index);
34
+
35
+ /*
36
+ * Setting base as last used idx, so destination will see as available
37
+ * all the entries that the device did not use, including the in-flight
38
+ * processing ones.
39
+ *
40
+ * TODO: This is ok for networking, but other kinds of devices might
41
+ * have problems with these retransmissions.
42
+ */
43
+ ring->num = svq->last_used_idx;
44
+ return 0;
45
+ }
46
+
47
ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
48
trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
49
return ret;
25
--
50
--
26
2.7.4
51
2.7.4
27
52
28
53
diff view generated by jsdifflib
1
From: Daniel P. Berrangé <berrange@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The current message when using '-net user...' with SLIRP disabled at
3
Setting the log address would make the device start reporting invalid
4
compile time is:
4
dirty memory because the SVQ vrings are located in qemu's memory.
5
5
6
qemu-system-x86_64: -net user: Parameter 'type' expects a net backend type (maybe it is not compiled into this binary)
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
7
Acked-by: Michael S. Tsirkin <mst@redhat.com>
8
An observation is that we're using the 'netdev->type' field here which
9
is an enum value, produced after QAPI has converted from its string
10
form.
11
12
IOW, at this point in the code, we know that the user's specified
13
type name was a valid network backend. The only possible scenario that
14
can make the backend init function be NULL, is if support for that
15
backend was disabled at build time. Given this, we don't need to caveat
16
our error message with a 'maybe' hint, we can be totally explicit.
17
18
The use of QERR_INVALID_PARAMETER_VALUE doesn't really lend itself to
19
user friendly error message text. Since this is not used to set a
20
specific QAPI error class, we can simply stop using this pre-formatted
21
error text and provide something better.
22
23
Thus the new message is:
24
25
qemu-system-x86_64: -net user: network backend 'user' is not compiled into this binary
26
27
The case of passing 'hubport' for -net is also given a message reminding
28
people they should have used -netdev/-nic instead, as this backend type
29
is only valid for the modern syntax.
30
31
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
32
Reviewed-by: Thomas Huth <thuth@redhat.com>
33
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
34
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
35
---
9
---
36
net/net.c | 18 +++++++++++-------
10
hw/virtio/vhost-vdpa.c | 3 ++-
37
1 file changed, 11 insertions(+), 7 deletions(-)
11
1 file changed, 2 insertions(+), 1 deletion(-)
38
12
39
diff --git a/net/net.c b/net/net.c
13
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
40
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
41
--- a/net/net.c
15
--- a/hw/virtio/vhost-vdpa.c
42
+++ b/net/net.c
16
+++ b/hw/virtio/vhost-vdpa.c
43
@@ -XXX,XX +XXX,XX @@ static int net_client_init1(const Netdev *netdev, bool is_netdev, Error **errp)
17
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
44
if (is_netdev) {
18
static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
45
if (netdev->type == NET_CLIENT_DRIVER_NIC ||
19
struct vhost_log *log)
46
!net_client_init_fun[netdev->type]) {
20
{
47
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type",
21
- if (vhost_vdpa_one_time_request(dev)) {
48
- "a netdev backend type");
22
+ struct vhost_vdpa *v = dev->opaque;
49
+ error_setg(errp, "network backend '%s' is not compiled into this binary",
23
+ if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) {
50
+ NetClientDriver_str(netdev->type));
24
return 0;
51
return -1;
25
}
52
}
53
} else {
54
if (netdev->type == NET_CLIENT_DRIVER_NONE) {
55
return 0; /* nothing to do */
56
}
57
- if (netdev->type == NET_CLIENT_DRIVER_HUBPORT ||
58
- !net_client_init_fun[netdev->type]) {
59
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type",
60
- "a net backend type (maybe it is not compiled "
61
- "into this binary)");
62
+ if (netdev->type == NET_CLIENT_DRIVER_HUBPORT) {
63
+ error_setg(errp, "network backend '%s' is only supported with -netdev/-nic",
64
+ NetClientDriver_str(netdev->type));
65
+ return -1;
66
+ }
67
+
68
+ if (!net_client_init_fun[netdev->type]) {
69
+ error_setg(errp, "network backend '%s' is not compiled into this binary",
70
+ NetClientDriver_str(netdev->type));
71
return -1;
72
}
73
26
74
--
27
--
75
2.7.4
28
2.7.4
76
29
77
30
diff view generated by jsdifflib
1
From: Si-Wei Liu <si-wei.liu@oracle.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
3
SVQ is able to log the dirty bits by itself, so let's use it to not
4
backend as another parameter to instantiate vhost-vdpa net client.
4
block migration.
5
This would benefit the use case where only open file descriptors, as
6
opposed to raw vhost-vdpa device paths, are accessible from the QEMU
7
process.
8
5
9
(qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1
6
Also, ignore set and clear of VHOST_F_LOG_ALL on set_features if SVQ is
7
enabled. Even if the device supports it, the reports would be nonsense
8
because SVQ memory is in the qemu region.
10
9
11
Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
10
The log region is still allocated. Future changes might skip that, but
12
Acked-by: Eugenio Pérez <eperezma@redhat.com>
11
this series is already long enough.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
16
---
15
net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
17
hw/virtio/vhost-vdpa.c | 39 +++++++++++++++++++++++++++++++++++----
16
qapi/net.json | 3 +++
18
include/hw/virtio/vhost-vdpa.h | 1 +
17
qemu-options.hx | 6 ++++--
19
2 files changed, 36 insertions(+), 4 deletions(-)
18
3 files changed, 27 insertions(+), 7 deletions(-)
19
20
20
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
21
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
21
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
22
--- a/net/vhost-vdpa.c
23
--- a/hw/virtio/vhost-vdpa.c
23
+++ b/net/vhost-vdpa.c
24
+++ b/hw/virtio/vhost-vdpa.c
24
@@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
25
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
25
26
return v->index != 0;
26
assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
27
}
27
opts = &netdev->u.vhost_vdpa;
28
28
- if (!opts->vhostdev) {
29
+static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
29
- error_setg(errp, "vdpa character device not specified with vhostdev");
30
+ uint64_t *features)
30
+ if (!opts->has_vhostdev && !opts->has_vhostfd) {
31
+{
31
+ error_setg(errp,
32
+ int ret;
32
+ "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
33
+
33
return -1;
34
+ ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
35
+ trace_vhost_vdpa_get_features(dev, *features);
36
+ return ret;
37
+}
38
+
39
static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
40
Error **errp)
41
{
42
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
43
return 0;
34
}
44
}
35
45
36
- vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
46
- r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
37
- if (vdpa_device_fd == -1) {
47
+ r = vhost_vdpa_get_dev_features(hdev, &dev_features);
38
- return -errno;
48
if (r != 0) {
39
+ if (opts->has_vhostdev && opts->has_vhostfd) {
49
error_setg_errno(errp, -r, "Can't get vdpa device features");
40
+ error_setg(errp,
50
return r;
41
+ "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
51
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
42
+ return -1;
52
static int vhost_vdpa_set_features(struct vhost_dev *dev,
53
uint64_t features)
54
{
55
+ struct vhost_vdpa *v = dev->opaque;
56
int ret;
57
58
if (vhost_vdpa_one_time_request(dev)) {
59
return 0;
60
}
61
62
+ if (v->shadow_vqs_enabled) {
63
+ if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) {
64
+ /*
65
+ * QEMU is just trying to enable or disable logging. SVQ handles
66
+ * this sepparately, so no need to forward this.
67
+ */
68
+ v->acked_features = features;
69
+ return 0;
70
+ }
71
+
72
+ v->acked_features = features;
73
+
74
+ /* We must not ack _F_LOG if SVQ is enabled */
75
+ features &= ~BIT_ULL(VHOST_F_LOG_ALL);
43
+ }
76
+ }
44
+
77
+
45
+ if (opts->has_vhostdev) {
78
trace_vhost_vdpa_set_features(dev, features);
46
+ vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
79
ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
47
+ if (vdpa_device_fd == -1) {
80
if (ret) {
48
+ return -errno;
81
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
49
+ }
82
static int vhost_vdpa_get_features(struct vhost_dev *dev,
50
+ } else if (opts->has_vhostfd) {
83
uint64_t *features)
51
+ vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
84
{
52
+ if (vdpa_device_fd == -1) {
85
- int ret;
53
+ error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
86
+ struct vhost_vdpa *v = dev->opaque;
54
+ return -1;
87
+ int ret = vhost_vdpa_get_dev_features(dev, features);
55
+ }
88
+
56
}
89
+ if (ret == 0 && v->shadow_vqs_enabled) {
57
90
+ /* Add SVQ logging capabilities */
58
r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
91
+ *features |= BIT_ULL(VHOST_F_LOG_ALL);
59
diff --git a/qapi/net.json b/qapi/net.json
92
+ }
93
94
- ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
95
- trace_vhost_vdpa_get_features(dev, *features);
96
return ret;
97
}
98
99
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
60
index XXXXXXX..XXXXXXX 100644
100
index XXXXXXX..XXXXXXX 100644
61
--- a/qapi/net.json
101
--- a/include/hw/virtio/vhost-vdpa.h
62
+++ b/qapi/net.json
102
+++ b/include/hw/virtio/vhost-vdpa.h
63
@@ -XXX,XX +XXX,XX @@
103
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
64
# @vhostdev: path of vhost-vdpa device
104
bool iotlb_batch_begin_sent;
65
# (default:'/dev/vhost-vdpa-0')
105
MemoryListener listener;
66
#
106
struct vhost_vdpa_iova_range iova_range;
67
+# @vhostfd: file descriptor of an already opened vhost vdpa device
107
+ uint64_t acked_features;
68
+#
108
bool shadow_vqs_enabled;
69
# @queues: number of queues to be created for multiqueue vhost-vdpa
109
/* IOVA mapping used by the Shadow Virtqueue */
70
# (default: 1)
110
VhostIOVATree *iova_tree;
71
#
72
@@ -XXX,XX +XXX,XX @@
73
{ 'struct': 'NetdevVhostVDPAOptions',
74
'data': {
75
'*vhostdev': 'str',
76
+ '*vhostfd': 'str',
77
'*queues': 'int',
78
'*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } }
79
80
diff --git a/qemu-options.hx b/qemu-options.hx
81
index XXXXXXX..XXXXXXX 100644
82
--- a/qemu-options.hx
83
+++ b/qemu-options.hx
84
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
85
" configure a vhost-user network, backed by a chardev 'dev'\n"
86
#endif
87
#ifdef __linux__
88
- "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
89
+ "-netdev vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
90
" configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n"
91
+ " use 'vhostdev=/path/to/dev' to open a vhost vdpa device\n"
92
+ " use 'vhostfd=h' to connect to an already opened vhost vdpa device\n"
93
#endif
94
#ifdef CONFIG_VMNET
95
"-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
96
@@ -XXX,XX +XXX,XX @@ SRST
97
-netdev type=vhost-user,id=net0,chardev=chr0 \
98
-device virtio-net-pci,netdev=net0
99
100
-``-netdev vhost-vdpa,vhostdev=/path/to/dev``
101
+``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
102
Establish a vhost-vdpa netdev.
103
104
vDPA device is a device that uses a datapath which complies with
105
--
111
--
106
2.7.4
112
2.7.4
107
113
108
114
diff view generated by jsdifflib
Deleted patch
1
From: Laurent Vivier <lvivier@redhat.com>
2
1
3
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
4
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
5
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
6
Acked-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
include/qemu/sockets.h | 2 ++
10
net/net.c | 62 ++++++++++++++++++++++++++------------------------
11
2 files changed, 34 insertions(+), 30 deletions(-)
12
13
diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/qemu/sockets.h
16
+++ b/include/qemu/sockets.h
17
@@ -XXX,XX +XXX,XX @@ void socket_listen_cleanup(int fd, Error **errp);
18
int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp);
19
20
/* Old, ipv4 only bits. Don't use for new code. */
21
+int convert_host_port(struct sockaddr_in *saddr, const char *host,
22
+ const char *port, Error **errp);
23
int parse_host_port(struct sockaddr_in *saddr, const char *str,
24
Error **errp);
25
int socket_init(void);
26
diff --git a/net/net.c b/net/net.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/net/net.c
29
+++ b/net/net.c
30
@@ -XXX,XX +XXX,XX @@ static QTAILQ_HEAD(, NetClientState) net_clients;
31
/***********************************************************/
32
/* network device redirectors */
33
34
-int parse_host_port(struct sockaddr_in *saddr, const char *str,
35
- Error **errp)
36
+int convert_host_port(struct sockaddr_in *saddr, const char *host,
37
+ const char *port, Error **errp)
38
{
39
- gchar **substrings;
40
struct hostent *he;
41
- const char *addr, *p, *r;
42
- int port, ret = 0;
43
+ const char *r;
44
+ long p;
45
46
memset(saddr, 0, sizeof(*saddr));
47
48
- substrings = g_strsplit(str, ":", 2);
49
- if (!substrings || !substrings[0] || !substrings[1]) {
50
- error_setg(errp, "host address '%s' doesn't contain ':' "
51
- "separating host from port", str);
52
- ret = -1;
53
- goto out;
54
- }
55
-
56
- addr = substrings[0];
57
- p = substrings[1];
58
-
59
saddr->sin_family = AF_INET;
60
- if (addr[0] == '\0') {
61
+ if (host[0] == '\0') {
62
saddr->sin_addr.s_addr = 0;
63
} else {
64
- if (qemu_isdigit(addr[0])) {
65
- if (!inet_aton(addr, &saddr->sin_addr)) {
66
+ if (qemu_isdigit(host[0])) {
67
+ if (!inet_aton(host, &saddr->sin_addr)) {
68
error_setg(errp, "host address '%s' is not a valid "
69
- "IPv4 address", addr);
70
- ret = -1;
71
- goto out;
72
+ "IPv4 address", host);
73
+ return -1;
74
}
75
} else {
76
- he = gethostbyname(addr);
77
+ he = gethostbyname(host);
78
if (he == NULL) {
79
- error_setg(errp, "can't resolve host address '%s'", addr);
80
- ret = -1;
81
- goto out;
82
+ error_setg(errp, "can't resolve host address '%s'", host);
83
+ return -1;
84
}
85
saddr->sin_addr = *(struct in_addr *)he->h_addr;
86
}
87
}
88
- port = strtol(p, (char **)&r, 0);
89
- if (r == p) {
90
- error_setg(errp, "port number '%s' is invalid", p);
91
+ if (qemu_strtol(port, &r, 0, &p) != 0) {
92
+ error_setg(errp, "port number '%s' is invalid", port);
93
+ return -1;
94
+ }
95
+ saddr->sin_port = htons(p);
96
+ return 0;
97
+}
98
+
99
+int parse_host_port(struct sockaddr_in *saddr, const char *str,
100
+ Error **errp)
101
+{
102
+ gchar **substrings;
103
+ int ret;
104
+
105
+ substrings = g_strsplit(str, ":", 2);
106
+ if (!substrings || !substrings[0] || !substrings[1]) {
107
+ error_setg(errp, "host address '%s' doesn't contain ':' "
108
+ "separating host from port", str);
109
ret = -1;
110
goto out;
111
}
112
- saddr->sin_port = htons(port);
113
+
114
+ ret = convert_host_port(saddr, substrings[0], substrings[1], errp);
115
116
out:
117
g_strfreev(substrings);
118
--
119
2.7.4
diff view generated by jsdifflib
Deleted patch
1
From: Laurent Vivier <lvivier@redhat.com>
2
1
3
The only caller passes &error_fatal, so use this directly in the function.
4
5
It's what we do for -blockdev, -device, and -object.
6
7
Suggested-by: Markus Armbruster <armbru@redhat.com>
8
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
9
Reviewed-by: Markus Armbruster <armbru@redhat.com>
10
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
11
Acked-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
include/net/net.h | 2 +-
15
net/net.c | 20 +++++++-------------
16
softmmu/vl.c | 2 +-
17
3 files changed, 9 insertions(+), 15 deletions(-)
18
19
diff --git a/include/net/net.h b/include/net/net.h
20
index XXXXXXX..XXXXXXX 100644
21
--- a/include/net/net.h
22
+++ b/include/net/net.h
23
@@ -XXX,XX +XXX,XX @@ extern const char *host_net_devices[];
24
/* from net.c */
25
int net_client_parse(QemuOptsList *opts_list, const char *str);
26
void show_netdevs(void);
27
-int net_init_clients(Error **errp);
28
+void net_init_clients(void);
29
void net_check_clients(void);
30
void net_cleanup(void);
31
void hmp_host_net_add(Monitor *mon, const QDict *qdict);
32
diff --git a/net/net.c b/net/net.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/net/net.c
35
+++ b/net/net.c
36
@@ -XXX,XX +XXX,XX @@ out:
37
return ret;
38
}
39
40
-int net_init_clients(Error **errp)
41
+void net_init_clients(void)
42
{
43
net_change_state_entry =
44
qemu_add_vm_change_state_handler(net_vm_change_state_handler, NULL);
45
46
QTAILQ_INIT(&net_clients);
47
48
- if (qemu_opts_foreach(qemu_find_opts("netdev"),
49
- net_init_netdev, NULL, errp)) {
50
- return -1;
51
- }
52
-
53
- if (qemu_opts_foreach(qemu_find_opts("nic"), net_param_nic, NULL, errp)) {
54
- return -1;
55
- }
56
+ qemu_opts_foreach(qemu_find_opts("netdev"), net_init_netdev, NULL,
57
+ &error_fatal);
58
59
- if (qemu_opts_foreach(qemu_find_opts("net"), net_init_client, NULL, errp)) {
60
- return -1;
61
- }
62
+ qemu_opts_foreach(qemu_find_opts("nic"), net_param_nic, NULL,
63
+ &error_fatal);
64
65
- return 0;
66
+ qemu_opts_foreach(qemu_find_opts("net"), net_init_client, NULL,
67
+ &error_fatal);
68
}
69
70
int net_client_parse(QemuOptsList *opts_list, const char *optarg)
71
diff --git a/softmmu/vl.c b/softmmu/vl.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/softmmu/vl.c
74
+++ b/softmmu/vl.c
75
@@ -XXX,XX +XXX,XX @@ static void qemu_create_late_backends(void)
76
qtest_server_init(qtest_chrdev, qtest_log, &error_fatal);
77
}
78
79
- net_init_clients(&error_fatal);
80
+ net_init_clients();
81
82
object_option_foreach_add(object_create_late);
83
84
--
85
2.7.4
diff view generated by jsdifflib
Deleted patch
1
From: Laurent Vivier <lvivier@redhat.com>
2
1
3
All net_client_parse() callers exit in case of error.
4
5
Move exit(1) to net_client_parse() and remove error checking from
6
the callers.
7
8
Suggested-by: Markus Armbruster <armbru@redhat.com>
9
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
10
Reviewed-by: Markus Armbruster <armbru@redhat.com>
11
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
12
Acked-by: Michael S. Tsirkin <mst@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
include/net/net.h | 2 +-
16
net/net.c | 6 ++----
17
softmmu/vl.c | 12 +++---------
18
3 files changed, 6 insertions(+), 14 deletions(-)
19
20
diff --git a/include/net/net.h b/include/net/net.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/net/net.h
23
+++ b/include/net/net.h
24
@@ -XXX,XX +XXX,XX @@ extern NICInfo nd_table[MAX_NICS];
25
extern const char *host_net_devices[];
26
27
/* from net.c */
28
-int net_client_parse(QemuOptsList *opts_list, const char *str);
29
+void net_client_parse(QemuOptsList *opts_list, const char *str);
30
void show_netdevs(void);
31
void net_init_clients(void);
32
void net_check_clients(void);
33
diff --git a/net/net.c b/net/net.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/net/net.c
36
+++ b/net/net.c
37
@@ -XXX,XX +XXX,XX @@ void net_init_clients(void)
38
&error_fatal);
39
}
40
41
-int net_client_parse(QemuOptsList *opts_list, const char *optarg)
42
+void net_client_parse(QemuOptsList *opts_list, const char *optarg)
43
{
44
if (!qemu_opts_parse_noisily(opts_list, optarg, true)) {
45
- return -1;
46
+ exit(1);
47
}
48
-
49
- return 0;
50
}
51
52
/* From FreeBSD */
53
diff --git a/softmmu/vl.c b/softmmu/vl.c
54
index XXXXXXX..XXXXXXX 100644
55
--- a/softmmu/vl.c
56
+++ b/softmmu/vl.c
57
@@ -XXX,XX +XXX,XX @@ void qemu_init(int argc, char **argv)
58
break;
59
case QEMU_OPTION_netdev:
60
default_net = 0;
61
- if (net_client_parse(qemu_find_opts("netdev"), optarg) == -1) {
62
- exit(1);
63
- }
64
+ net_client_parse(qemu_find_opts("netdev"), optarg);
65
break;
66
case QEMU_OPTION_nic:
67
default_net = 0;
68
- if (net_client_parse(qemu_find_opts("nic"), optarg) == -1) {
69
- exit(1);
70
- }
71
+ net_client_parse(qemu_find_opts("nic"), optarg);
72
break;
73
case QEMU_OPTION_net:
74
default_net = 0;
75
- if (net_client_parse(qemu_find_opts("net"), optarg) == -1) {
76
- exit(1);
77
- }
78
+ net_client_parse(qemu_find_opts("net"), optarg);
79
break;
80
#ifdef CONFIG_LIBISCSI
81
case QEMU_OPTION_iscsi:
82
--
83
2.7.4
diff view generated by jsdifflib
Deleted patch
1
From: Laurent Vivier <lvivier@redhat.com>
2
1
3
As qemu_opts_parse_noisily() flattens the QAPI structures ("type" field
4
of Netdev structure can collides with "type" field of SocketAddress),
5
we introduce a way to bypass qemu_opts_parse_noisily() and use directly
6
visit_type_Netdev() to parse the backend parameters.
7
8
More details from Markus:
9
10
qemu_init() passes the argument of -netdev, -nic, and -net to
11
net_client_parse().
12
13
net_client_parse() parses with qemu_opts_parse_noisily(), passing
14
QemuOptsList qemu_netdev_opts for -netdev, qemu_nic_opts for -nic, and
15
qemu_net_opts for -net. Their desc[] are all empty, which means any
16
keys are accepted. The result of the parse (a QemuOpts) is stored in
17
the QemuOptsList.
18
19
Note that QemuOpts is flat by design. In some places, we layer non-flat
20
on top using dotted keys convention, but not here.
21
22
net_init_clients() iterates over the stored QemuOpts, and passes them to
23
net_init_netdev(), net_param_nic(), or net_init_client(), respectively.
24
25
These functions pass the QemuOpts to net_client_init(). They also do
26
other things with the QemuOpts, which we can ignore here.
27
28
net_client_init() uses the opts visitor to convert the (flat) QemOpts to
29
a (non-flat) QAPI object Netdev. Netdev is also the argument of QMP
30
command netdev_add.
31
32
The opts visitor was an early attempt to support QAPI in
33
(QemuOpts-based) CLI. It restricts QAPI types to a certain shape; see
34
commit eb7ee2cbeb "qapi: introduce OptsVisitor".
35
36
A more modern way to support QAPI is qobject_input_visitor_new_str().
37
It uses keyval_parse() instead of QemuOpts for KEY=VALUE,... syntax, and
38
it also supports JSON syntax. The former isn't quite as expressive as
39
JSON, but it's a lot closer than QemuOpts + opts visitor.
40
41
This commit paves the way to use of the modern way instead.
42
43
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
44
Reviewed-by: Markus Armbruster <armbru@redhat.com>
45
Acked-by: Michael S. Tsirkin <mst@redhat.com>
46
Signed-off-by: Jason Wang <jasowang@redhat.com>
47
---
48
include/net/net.h | 2 ++
49
net/net.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
50
softmmu/vl.c | 6 +++++-
51
3 files changed, 64 insertions(+), 1 deletion(-)
52
53
diff --git a/include/net/net.h b/include/net/net.h
54
index XXXXXXX..XXXXXXX 100644
55
--- a/include/net/net.h
56
+++ b/include/net/net.h
57
@@ -XXX,XX +XXX,XX @@ extern NICInfo nd_table[MAX_NICS];
58
extern const char *host_net_devices[];
59
60
/* from net.c */
61
+bool netdev_is_modern(const char *optarg);
62
+void netdev_parse_modern(const char *optarg);
63
void net_client_parse(QemuOptsList *opts_list, const char *str);
64
void show_netdevs(void);
65
void net_init_clients(void);
66
diff --git a/net/net.c b/net/net.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/net/net.c
69
+++ b/net/net.c
70
@@ -XXX,XX +XXX,XX @@
71
#include "net/colo-compare.h"
72
#include "net/filter.h"
73
#include "qapi/string-output-visitor.h"
74
+#include "qapi/qobject-input-visitor.h"
75
76
/* Net bridge is currently not supported for W32. */
77
#if !defined(_WIN32)
78
@@ -XXX,XX +XXX,XX @@
79
static VMChangeStateEntry *net_change_state_entry;
80
static QTAILQ_HEAD(, NetClientState) net_clients;
81
82
+typedef struct NetdevQueueEntry {
83
+ Netdev *nd;
84
+ Location loc;
85
+ QSIMPLEQ_ENTRY(NetdevQueueEntry) entry;
86
+} NetdevQueueEntry;
87
+
88
+typedef QSIMPLEQ_HEAD(, NetdevQueueEntry) NetdevQueue;
89
+
90
+static NetdevQueue nd_queue = QSIMPLEQ_HEAD_INITIALIZER(nd_queue);
91
+
92
/***********************************************************/
93
/* network device redirectors */
94
95
@@ -XXX,XX +XXX,XX @@ out:
96
return ret;
97
}
98
99
+static void netdev_init_modern(void)
100
+{
101
+ while (!QSIMPLEQ_EMPTY(&nd_queue)) {
102
+ NetdevQueueEntry *nd = QSIMPLEQ_FIRST(&nd_queue);
103
+
104
+ QSIMPLEQ_REMOVE_HEAD(&nd_queue, entry);
105
+ loc_push_restore(&nd->loc);
106
+ net_client_init1(nd->nd, true, &error_fatal);
107
+ loc_pop(&nd->loc);
108
+ qapi_free_Netdev(nd->nd);
109
+ g_free(nd);
110
+ }
111
+}
112
+
113
void net_init_clients(void)
114
{
115
net_change_state_entry =
116
@@ -XXX,XX +XXX,XX @@ void net_init_clients(void)
117
118
QTAILQ_INIT(&net_clients);
119
120
+ netdev_init_modern();
121
+
122
qemu_opts_foreach(qemu_find_opts("netdev"), net_init_netdev, NULL,
123
&error_fatal);
124
125
@@ -XXX,XX +XXX,XX @@ void net_init_clients(void)
126
&error_fatal);
127
}
128
129
+/*
130
+ * Does this -netdev argument use modern rather than traditional syntax?
131
+ * Modern syntax is to be parsed with netdev_parse_modern().
132
+ * Traditional syntax is to be parsed with net_client_parse().
133
+ */
134
+bool netdev_is_modern(const char *optarg)
135
+{
136
+ return false;
137
+}
138
+
139
+/*
140
+ * netdev_parse_modern() uses modern, more expressive syntax than
141
+ * net_client_parse(), but supports only the -netdev option.
142
+ * netdev_parse_modern() appends to @nd_queue, whereas net_client_parse()
143
+ * appends to @qemu_netdev_opts.
144
+ */
145
+void netdev_parse_modern(const char *optarg)
146
+{
147
+ Visitor *v;
148
+ NetdevQueueEntry *nd;
149
+
150
+ v = qobject_input_visitor_new_str(optarg, "type", &error_fatal);
151
+ nd = g_new(NetdevQueueEntry, 1);
152
+ visit_type_Netdev(v, NULL, &nd->nd, &error_fatal);
153
+ visit_free(v);
154
+ loc_save(&nd->loc);
155
+
156
+ QSIMPLEQ_INSERT_TAIL(&nd_queue, nd, entry);
157
+}
158
+
159
void net_client_parse(QemuOptsList *opts_list, const char *optarg)
160
{
161
if (!qemu_opts_parse_noisily(opts_list, optarg, true)) {
162
diff --git a/softmmu/vl.c b/softmmu/vl.c
163
index XXXXXXX..XXXXXXX 100644
164
--- a/softmmu/vl.c
165
+++ b/softmmu/vl.c
166
@@ -XXX,XX +XXX,XX @@ void qemu_init(int argc, char **argv)
167
break;
168
case QEMU_OPTION_netdev:
169
default_net = 0;
170
- net_client_parse(qemu_find_opts("netdev"), optarg);
171
+ if (netdev_is_modern(optarg)) {
172
+ netdev_parse_modern(optarg);
173
+ } else {
174
+ net_client_parse(qemu_find_opts("netdev"), optarg);
175
+ }
176
break;
177
case QEMU_OPTION_nic:
178
default_net = 0;
179
--
180
2.7.4
diff view generated by jsdifflib
Deleted patch
1
From: Stefano Brivio <sbrivio@redhat.com>
2
1
3
Other errors are treated as failure by net_socket_connect_init(),
4
but if connect() returns EINVAL, we'll fail silently. Remove the
5
related exception.
6
7
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
8
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
9
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
net/socket.c | 3 +--
14
1 file changed, 1 insertion(+), 2 deletions(-)
15
16
diff --git a/net/socket.c b/net/socket.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/net/socket.c
19
+++ b/net/socket.c
20
@@ -XXX,XX +XXX,XX @@ static int net_socket_connect_init(NetClientState *peer,
21
if (errno == EINTR || errno == EWOULDBLOCK) {
22
/* continue */
23
} else if (errno == EINPROGRESS ||
24
- errno == EALREADY ||
25
- errno == EINVAL) {
26
+ errno == EALREADY) {
27
break;
28
} else {
29
error_setg_errno(errp, errno, "can't connect socket");
30
--
31
2.7.4
diff view generated by jsdifflib
Deleted patch
1
From: Laurent Vivier <lvivier@redhat.com>
2
1
3
It is less complex to manage special cases directly in
4
net_dgram_mcast_init() and net_dgram_udp_init().
5
6
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
7
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
8
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
net/dgram.c | 143 ++++++++++++++++++++++++++++++++----------------------------
12
1 file changed, 76 insertions(+), 67 deletions(-)
13
14
diff --git a/net/dgram.c b/net/dgram.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/net/dgram.c
17
+++ b/net/dgram.c
18
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_dgram_socket_info = {
19
static NetDgramState *net_dgram_fd_init(NetClientState *peer,
20
const char *model,
21
const char *name,
22
- int fd, int is_fd,
23
- SocketAddress *mcast,
24
+ int fd,
25
Error **errp)
26
{
27
- struct sockaddr_in *saddr = NULL;
28
- int newfd;
29
NetClientState *nc;
30
NetDgramState *s;
31
- SocketAddress *sa;
32
- SocketAddressType sa_type;
33
-
34
- sa = socket_local_address(fd, errp);
35
- if (!sa) {
36
- return NULL;
37
- }
38
- sa_type = sa->type;
39
- qapi_free_SocketAddress(sa);
40
-
41
- /*
42
- * fd passed: multicast: "learn" dest_addr address from bound address and
43
- * save it. Because this may be "shared" socket from a "master" process,
44
- * datagrams would be recv() by ONLY ONE process: we must "clone" this
45
- * dgram socket --jjo
46
- */
47
-
48
- if (is_fd && mcast != NULL) {
49
- saddr = g_new(struct sockaddr_in, 1);
50
-
51
- if (convert_host_port(saddr, mcast->u.inet.host, mcast->u.inet.port,
52
- errp) < 0) {
53
- goto err;
54
- }
55
- /* must be bound */
56
- if (saddr->sin_addr.s_addr == 0) {
57
- error_setg(errp, "can't setup multicast destination address");
58
- goto err;
59
- }
60
- /* clone dgram socket */
61
- newfd = net_dgram_mcast_create(saddr, NULL, errp);
62
- if (newfd < 0) {
63
- goto err;
64
- }
65
- /* clone newfd to fd, close newfd */
66
- dup2(newfd, fd);
67
- close(newfd);
68
- }
69
70
nc = qemu_new_net_client(&net_dgram_socket_info, peer, model, name);
71
72
@@ -XXX,XX +XXX,XX @@ static NetDgramState *net_dgram_fd_init(NetClientState *peer,
73
net_socket_rs_init(&s->rs, net_dgram_rs_finalize, false);
74
net_dgram_read_poll(s, true);
75
76
- /* mcast: save bound address as dst */
77
- if (saddr) {
78
- g_assert(s->dest_addr == NULL);
79
- s->dest_addr = (struct sockaddr *)saddr;
80
- s->dest_len = sizeof(*saddr);
81
- qemu_set_info_str(nc, "fd=%d (cloned mcast=%s:%d)", fd,
82
- inet_ntoa(saddr->sin_addr), ntohs(saddr->sin_port));
83
- } else {
84
- qemu_set_info_str(nc, "fd=%d %s", fd, SocketAddressType_str(sa_type));
85
- }
86
-
87
return s;
88
-
89
-err:
90
- g_free(saddr);
91
- closesocket(fd);
92
- return NULL;
93
}
94
95
static int net_dgram_mcast_init(NetClientState *peer,
96
@@ -XXX,XX +XXX,XX @@ static int net_dgram_mcast_init(NetClientState *peer,
97
}
98
break;
99
}
100
- case SOCKET_ADDRESS_TYPE_FD:
101
+ case SOCKET_ADDRESS_TYPE_FD: {
102
+ int newfd;
103
+
104
fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp);
105
if (fd == -1) {
106
g_free(saddr);
107
@@ -XXX,XX +XXX,XX @@ static int net_dgram_mcast_init(NetClientState *peer,
108
name, fd);
109
return -1;
110
}
111
+
112
+ /*
113
+ * fd passed: multicast: "learn" dest_addr address from bound
114
+ * address and save it. Because this may be "shared" socket from a
115
+ * "master" process, datagrams would be recv() by ONLY ONE process:
116
+ * we must "clone" this dgram socket --jjo
117
+ */
118
+
119
+ saddr = g_new(struct sockaddr_in, 1);
120
+
121
+ if (convert_host_port(saddr, local->u.inet.host, local->u.inet.port,
122
+ errp) < 0) {
123
+ g_free(saddr);
124
+ closesocket(fd);
125
+ return -1;
126
+ }
127
+
128
+ /* must be bound */
129
+ if (saddr->sin_addr.s_addr == 0) {
130
+ error_setg(errp, "can't setup multicast destination address");
131
+ g_free(saddr);
132
+ closesocket(fd);
133
+ return -1;
134
+ }
135
+ /* clone dgram socket */
136
+ newfd = net_dgram_mcast_create(saddr, NULL, errp);
137
+ if (newfd < 0) {
138
+ g_free(saddr);
139
+ closesocket(fd);
140
+ return -1;
141
+ }
142
+ /* clone newfd to fd, close newfd */
143
+ dup2(newfd, fd);
144
+ close(newfd);
145
break;
146
+ }
147
default:
148
g_free(saddr);
149
error_setg(errp, "only support inet or fd type for local");
150
@@ -XXX,XX +XXX,XX @@ static int net_dgram_mcast_init(NetClientState *peer,
151
}
152
}
153
154
- s = net_dgram_fd_init(peer, model, name, fd,
155
- local->type == SOCKET_ADDRESS_TYPE_FD,
156
- remote, errp);
157
+ s = net_dgram_fd_init(peer, model, name, fd, errp);
158
if (!s) {
159
g_free(saddr);
160
return -1;
161
@@ -XXX,XX +XXX,XX @@ static int net_dgram_mcast_init(NetClientState *peer,
162
s->dest_addr = (struct sockaddr *)saddr;
163
s->dest_len = sizeof(*saddr);
164
165
- qemu_set_info_str(&s->nc, "mcast=%s:%d", inet_ntoa(saddr->sin_addr),
166
- ntohs(saddr->sin_port));
167
+ if (!local) {
168
+ qemu_set_info_str(&s->nc, "mcast=%s:%d",
169
+ inet_ntoa(saddr->sin_addr),
170
+ ntohs(saddr->sin_port));
171
+ } else {
172
+ switch (local->type) {
173
+ case SOCKET_ADDRESS_TYPE_INET:
174
+ qemu_set_info_str(&s->nc, "mcast=%s:%d",
175
+ inet_ntoa(saddr->sin_addr),
176
+ ntohs(saddr->sin_port));
177
+ break;
178
+ case SOCKET_ADDRESS_TYPE_FD:
179
+ qemu_set_info_str(&s->nc, "fd=%d (cloned mcast=%s:%d)",
180
+ fd, inet_ntoa(saddr->sin_addr),
181
+ ntohs(saddr->sin_port));
182
+ break;
183
+ default:
184
+ g_assert_not_reached();
185
+ }
186
+ }
187
188
return 0;
189
190
@@ -XXX,XX +XXX,XX @@ int net_init_dgram(const Netdev *netdev, const char *name,
191
return -1;
192
}
193
194
- s = net_dgram_fd_init(peer, "dgram", name, fd, 0, NULL, errp);
195
+ s = net_dgram_fd_init(peer, "dgram", name, fd, errp);
196
if (!s) {
197
return -1;
198
}
199
@@ -XXX,XX +XXX,XX @@ int net_init_dgram(const Netdev *netdev, const char *name,
200
inet_ntoa(raddr_in.sin_addr),
201
ntohs(raddr_in.sin_port));
202
break;
203
- case SOCKET_ADDRESS_TYPE_FD:
204
- qemu_set_info_str(&s->nc, "fd=%d", fd);
205
+ case SOCKET_ADDRESS_TYPE_FD: {
206
+ SocketAddress *sa;
207
+ SocketAddressType sa_type;
208
+
209
+ sa = socket_local_address(fd, errp);
210
+ if (sa) {
211
+ sa_type = sa->type;
212
+ qapi_free_SocketAddress(sa);
213
+
214
+ qemu_set_info_str(&s->nc, "fd=%d %s", fd,
215
+ SocketAddressType_str(sa_type));
216
+ } else {
217
+ qemu_set_info_str(&s->nc, "fd=%d", fd);
218
+ }
219
break;
220
+ }
221
default:
222
g_assert_not_reached();
223
}
224
--
225
2.7.4
diff view generated by jsdifflib
Deleted patch
1
From: Laurent Vivier <lvivier@redhat.com>
2
1
3
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
4
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
5
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
6
Acked-by: Michael S. Tsirkin <mst@redhat.com>
7
Acked-by: Markus Armbruster <armbru@redhat.com> (QAPI schema)
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
net/dgram.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
11
qapi/net.json | 2 +-
12
qemu-options.hx | 1 +
13
3 files changed, 56 insertions(+), 2 deletions(-)
14
15
diff --git a/net/dgram.c b/net/dgram.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/net/dgram.c
18
+++ b/net/dgram.c
19
@@ -XXX,XX +XXX,XX @@ int net_init_dgram(const Netdev *netdev, const char *name,
20
SocketAddress *remote, *local;
21
struct sockaddr *dest_addr;
22
struct sockaddr_in laddr_in, raddr_in;
23
+ struct sockaddr_un laddr_un, raddr_un;
24
socklen_t dest_len;
25
26
assert(netdev->type == NET_CLIENT_DRIVER_DGRAM);
27
@@ -XXX,XX +XXX,XX @@ int net_init_dgram(const Netdev *netdev, const char *name,
28
}
29
} else {
30
if (local->type != SOCKET_ADDRESS_TYPE_FD) {
31
- error_setg(errp, "type=inet requires remote parameter");
32
+ error_setg(errp,
33
+ "type=inet or type=unix requires remote parameter");
34
return -1;
35
}
36
}
37
@@ -XXX,XX +XXX,XX @@ int net_init_dgram(const Netdev *netdev, const char *name,
38
dest_addr = g_malloc(dest_len);
39
memcpy(dest_addr, &raddr_in, dest_len);
40
break;
41
+ case SOCKET_ADDRESS_TYPE_UNIX:
42
+ ret = unlink(local->u.q_unix.path);
43
+ if (ret < 0 && errno != ENOENT) {
44
+ error_setg_errno(errp, errno, "failed to unlink socket %s",
45
+ local->u.q_unix.path);
46
+ return -1;
47
+ }
48
+
49
+ laddr_un.sun_family = PF_UNIX;
50
+ ret = snprintf(laddr_un.sun_path, sizeof(laddr_un.sun_path), "%s",
51
+ local->u.q_unix.path);
52
+ if (ret < 0 || ret >= sizeof(laddr_un.sun_path)) {
53
+ error_setg(errp, "UNIX socket path '%s' is too long",
54
+ local->u.q_unix.path);
55
+ error_append_hint(errp, "Path must be less than %zu bytes\n",
56
+ sizeof(laddr_un.sun_path));
57
+ }
58
+
59
+ raddr_un.sun_family = PF_UNIX;
60
+ ret = snprintf(raddr_un.sun_path, sizeof(raddr_un.sun_path), "%s",
61
+ remote->u.q_unix.path);
62
+ if (ret < 0 || ret >= sizeof(raddr_un.sun_path)) {
63
+ error_setg(errp, "UNIX socket path '%s' is too long",
64
+ remote->u.q_unix.path);
65
+ error_append_hint(errp, "Path must be less than %zu bytes\n",
66
+ sizeof(raddr_un.sun_path));
67
+ }
68
+
69
+ fd = qemu_socket(PF_UNIX, SOCK_DGRAM, 0);
70
+ if (fd < 0) {
71
+ error_setg_errno(errp, errno, "can't create datagram socket");
72
+ return -1;
73
+ }
74
+
75
+ ret = bind(fd, (struct sockaddr *)&laddr_un, sizeof(laddr_un));
76
+ if (ret < 0) {
77
+ error_setg_errno(errp, errno, "can't bind unix=%s to socket",
78
+ laddr_un.sun_path);
79
+ closesocket(fd);
80
+ return -1;
81
+ }
82
+ qemu_socket_set_nonblock(fd);
83
+
84
+ dest_len = sizeof(raddr_un);
85
+ dest_addr = g_malloc(dest_len);
86
+ memcpy(dest_addr, &raddr_un, dest_len);
87
+ break;
88
case SOCKET_ADDRESS_TYPE_FD:
89
fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp);
90
if (fd == -1) {
91
@@ -XXX,XX +XXX,XX @@ int net_init_dgram(const Netdev *netdev, const char *name,
92
inet_ntoa(raddr_in.sin_addr),
93
ntohs(raddr_in.sin_port));
94
break;
95
+ case SOCKET_ADDRESS_TYPE_UNIX:
96
+ qemu_set_info_str(&s->nc, "udp=%s:%s",
97
+ laddr_un.sun_path, raddr_un.sun_path);
98
+ break;
99
case SOCKET_ADDRESS_TYPE_FD: {
100
SocketAddress *sa;
101
SocketAddressType sa_type;
102
diff --git a/qapi/net.json b/qapi/net.json
103
index XXXXXXX..XXXXXXX 100644
104
--- a/qapi/net.json
105
+++ b/qapi/net.json
106
@@ -XXX,XX +XXX,XX @@
107
# @remote: remote address
108
# @local: local address
109
#
110
-# Only SocketAddress types 'inet' and 'fd' are supported.
111
+# Only SocketAddress types 'unix', 'inet' and 'fd' are supported.
112
#
113
# If remote address is present and it's a multicast address, local address
114
# is optional. Otherwise local address is required and remote address is
115
diff --git a/qemu-options.hx b/qemu-options.hx
116
index XXXXXXX..XXXXXXX 100644
117
--- a/qemu-options.hx
118
+++ b/qemu-options.hx
119
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
120
" configure a network backend to connect to a multicast maddr and port\n"
121
" use ``local.host=addr`` to specify the host address to send packets from\n"
122
"-netdev dgram,id=str,local.type=inet,local.host=addr,local.port=port[,remote.type=inet,remote.host=addr,remote.port=port]\n"
123
+ "-netdev dgram,id=str,local.type=unix,local.path=path[,remote.type=unix,remote.path=path]\n"
124
"-netdev dgram,id=str,local.type=fd,local.str=file-descriptor\n"
125
" configure a network backend to connect to another network\n"
126
" using an UDP tunnel\n"
127
--
128
2.7.4
diff view generated by jsdifflib
Deleted patch
1
From: Laurent Vivier <lvivier@redhat.com>
2
1
3
Rename SocketAddress_to_str() to socket_uri() and move it to
4
util/qemu-sockets.c close to socket_parse().
5
6
socket_uri() generates a string from a SocketAddress while
7
socket_parse() generates a SocketAddress from a string.
8
9
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
10
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
11
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
12
Acked-by: Michael S. Tsirkin <mst@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
include/qemu/sockets.h | 2 +-
16
monitor/hmp-cmds.c | 23 +----------------------
17
util/qemu-sockets.c | 20 ++++++++++++++++++++
18
3 files changed, 22 insertions(+), 23 deletions(-)
19
20
diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/qemu/sockets.h
23
+++ b/include/qemu/sockets.h
24
@@ -XXX,XX +XXX,XX @@ NetworkAddressFamily inet_netfamily(int family);
25
int unix_listen(const char *path, Error **errp);
26
int unix_connect(const char *path, Error **errp);
27
28
+char *socket_uri(SocketAddress *addr);
29
SocketAddress *socket_parse(const char *str, Error **errp);
30
int socket_connect(SocketAddress *addr, Error **errp);
31
int socket_listen(SocketAddress *addr, int num, Error **errp);
32
@@ -XXX,XX +XXX,XX @@ SocketAddress *socket_address_flatten(SocketAddressLegacy *addr);
33
* Return 0 on success.
34
*/
35
int socket_address_parse_named_fd(SocketAddress *addr, Error **errp);
36
-
37
#endif /* QEMU_SOCKETS_H */
38
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/monitor/hmp-cmds.c
41
+++ b/monitor/hmp-cmds.c
42
@@ -XXX,XX +XXX,XX @@ void hmp_info_mice(Monitor *mon, const QDict *qdict)
43
qapi_free_MouseInfoList(mice_list);
44
}
45
46
-static char *SocketAddress_to_str(SocketAddress *addr)
47
-{
48
- switch (addr->type) {
49
- case SOCKET_ADDRESS_TYPE_INET:
50
- return g_strdup_printf("tcp:%s:%s",
51
- addr->u.inet.host,
52
- addr->u.inet.port);
53
- case SOCKET_ADDRESS_TYPE_UNIX:
54
- return g_strdup_printf("unix:%s",
55
- addr->u.q_unix.path);
56
- case SOCKET_ADDRESS_TYPE_FD:
57
- return g_strdup_printf("fd:%s", addr->u.fd.str);
58
- case SOCKET_ADDRESS_TYPE_VSOCK:
59
- return g_strdup_printf("tcp:%s:%s",
60
- addr->u.vsock.cid,
61
- addr->u.vsock.port);
62
- default:
63
- return g_strdup("unknown address type");
64
- }
65
-}
66
-
67
void hmp_info_migrate(Monitor *mon, const QDict *qdict)
68
{
69
MigrationInfo *info;
70
@@ -XXX,XX +XXX,XX @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
71
monitor_printf(mon, "socket address: [\n");
72
73
for (addr = info->socket_address; addr; addr = addr->next) {
74
- char *s = SocketAddress_to_str(addr->value);
75
+ char *s = socket_uri(addr->value);
76
monitor_printf(mon, "\t%s\n", s);
77
g_free(s);
78
}
79
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/util/qemu-sockets.c
82
+++ b/util/qemu-sockets.c
83
@@ -XXX,XX +XXX,XX @@ int unix_connect(const char *path, Error **errp)
84
return sock;
85
}
86
87
+char *socket_uri(SocketAddress *addr)
88
+{
89
+ switch (addr->type) {
90
+ case SOCKET_ADDRESS_TYPE_INET:
91
+ return g_strdup_printf("tcp:%s:%s",
92
+ addr->u.inet.host,
93
+ addr->u.inet.port);
94
+ case SOCKET_ADDRESS_TYPE_UNIX:
95
+ return g_strdup_printf("unix:%s",
96
+ addr->u.q_unix.path);
97
+ case SOCKET_ADDRESS_TYPE_FD:
98
+ return g_strdup_printf("fd:%s", addr->u.fd.str);
99
+ case SOCKET_ADDRESS_TYPE_VSOCK:
100
+ return g_strdup_printf("tcp:%s:%s",
101
+ addr->u.vsock.cid,
102
+ addr->u.vsock.port);
103
+ default:
104
+ return g_strdup("unknown address type");
105
+ }
106
+}
107
108
SocketAddress *socket_parse(const char *str, Error **errp)
109
{
110
--
111
2.7.4
diff view generated by jsdifflib
Deleted patch
1
From: Laurent Vivier <lvivier@redhat.com>
2
1
3
To be consistent with socket_uri(), add 'tcp:' prefix for inet type in
4
socket_parse(), by default socket_parse() use tcp when no prefix is
5
provided (format is host:port).
6
7
In socket_uri(), use 'vsock:' prefix for vsock type rather than 'tcp:'
8
because it makes a vsock address look like an inet address with CID
9
misinterpreted as host.
10
Goes back to commit 9aca82ba31 "migration: Create socket-address parameter"
11
12
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
13
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
14
Reviewed-by: Markus Armbruster <armbru@redhat.com>
15
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
16
Acked-by: Michael S. Tsirkin <mst@redhat.com>
17
Signed-off-by: Jason Wang <jasowang@redhat.com>
18
---
19
util/qemu-sockets.c | 7 ++++++-
20
1 file changed, 6 insertions(+), 1 deletion(-)
21
22
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/util/qemu-sockets.c
25
+++ b/util/qemu-sockets.c
26
@@ -XXX,XX +XXX,XX @@ char *socket_uri(SocketAddress *addr)
27
case SOCKET_ADDRESS_TYPE_FD:
28
return g_strdup_printf("fd:%s", addr->u.fd.str);
29
case SOCKET_ADDRESS_TYPE_VSOCK:
30
- return g_strdup_printf("tcp:%s:%s",
31
+ return g_strdup_printf("vsock:%s:%s",
32
addr->u.vsock.cid,
33
addr->u.vsock.port);
34
default:
35
@@ -XXX,XX +XXX,XX @@ SocketAddress *socket_parse(const char *str, Error **errp)
36
if (vsock_parse(&addr->u.vsock, str + strlen("vsock:"), errp)) {
37
goto fail;
38
}
39
+ } else if (strstart(str, "tcp:", NULL)) {
40
+ addr->type = SOCKET_ADDRESS_TYPE_INET;
41
+ if (inet_parse(&addr->u.inet, str + strlen("tcp:"), errp)) {
42
+ goto fail;
43
+ }
44
} else {
45
addr->type = SOCKET_ADDRESS_TYPE_INET;
46
if (inet_parse(&addr->u.inet, str, errp)) {
47
--
48
2.7.4
diff view generated by jsdifflib
Deleted patch
1
From: Laurent Vivier <lvivier@redhat.com>
2
1
3
The netdev reports NETDEV_STREAM_CONNECTED event when the backend
4
is connected, and NETDEV_STREAM_DISCONNECTED when it is disconnected.
5
6
The NETDEV_STREAM_CONNECTED event includes the destination address.
7
8
This allows a system manager like libvirt to detect when the server
9
fails.
10
11
For instance with passt:
12
13
{ 'execute': 'qmp_capabilities' }
14
{ "return": { } }
15
{ "timestamp": { "seconds": 1666341395, "microseconds": 505347 },
16
"event": "NETDEV_STREAM_CONNECTED",
17
"data": { "netdev-id": "netdev0",
18
"addr": { "path": "/tmp/passt_1.socket", "type": "unix" } } }
19
20
[killing passt here]
21
22
{ "timestamp": { "seconds": 1666341430, "microseconds": 968694 },
23
"event": "NETDEV_STREAM_DISCONNECTED",
24
"data": { "netdev-id": "netdev0" } }
25
26
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
27
Acked-by: Michael S. Tsirkin <mst@redhat.com>
28
Signed-off-by: Jason Wang <jasowang@redhat.com>
29
---
30
net/stream.c | 5 +++++
31
qapi/net.json | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
32
2 files changed, 54 insertions(+)
33
34
diff --git a/net/stream.c b/net/stream.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/net/stream.c
37
+++ b/net/stream.c
38
@@ -XXX,XX +XXX,XX @@
39
#include "io/channel.h"
40
#include "io/channel-socket.h"
41
#include "io/net-listener.h"
42
+#include "qapi/qapi-events-net.h"
43
44
typedef struct NetStreamState {
45
NetClientState nc;
46
@@ -XXX,XX +XXX,XX @@ static gboolean net_stream_send(QIOChannel *ioc,
47
s->nc.link_down = true;
48
qemu_set_info_str(&s->nc, "");
49
50
+ qapi_event_send_netdev_stream_disconnected(s->nc.name);
51
+
52
return G_SOURCE_REMOVE;
53
}
54
buf = buf1;
55
@@ -XXX,XX +XXX,XX @@ static void net_stream_listen(QIONetListener *listener,
56
uri = socket_uri(addr);
57
qemu_set_info_str(&s->nc, uri);
58
g_free(uri);
59
+ qapi_event_send_netdev_stream_connected(s->nc.name, addr);
60
qapi_free_SocketAddress(addr);
61
}
62
63
@@ -XXX,XX +XXX,XX @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque)
64
s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send,
65
s, NULL);
66
s->nc.link_down = false;
67
+ qapi_event_send_netdev_stream_connected(s->nc.name, addr);
68
qapi_free_SocketAddress(addr);
69
70
return;
71
diff --git a/qapi/net.json b/qapi/net.json
72
index XXXXXXX..XXXXXXX 100644
73
--- a/qapi/net.json
74
+++ b/qapi/net.json
75
@@ -XXX,XX +XXX,XX @@
76
##
77
{ 'event': 'FAILOVER_NEGOTIATED',
78
'data': {'device-id': 'str'} }
79
+
80
+##
81
+# @NETDEV_STREAM_CONNECTED:
82
+#
83
+# Emitted when the netdev stream backend is connected
84
+#
85
+# @netdev-id: QEMU netdev id that is connected
86
+# @addr: The destination address
87
+#
88
+# Since: 7.2
89
+#
90
+# Example:
91
+#
92
+# <- { "event": "NETDEV_STREAM_CONNECTED",
93
+# "data": { "netdev-id": "netdev0",
94
+# "addr": { "port": "47666", "ipv6": true,
95
+# "host": "::1", "type": "inet" } },
96
+# "timestamp": { "seconds": 1666269863, "microseconds": 311222 } }
97
+#
98
+# or
99
+#
100
+# <- { "event": "NETDEV_STREAM_CONNECTED",
101
+# "data": { "netdev-id": "netdev0",
102
+# "addr": { "path": "/tmp/qemu0", "type": "unix" } },
103
+# "timestamp": { "seconds": 1666269706, "microseconds": 413651 } }
104
+#
105
+##
106
+{ 'event': 'NETDEV_STREAM_CONNECTED',
107
+ 'data': { 'netdev-id': 'str',
108
+ 'addr': 'SocketAddress' } }
109
+
110
+##
111
+# @NETDEV_STREAM_DISCONNECTED:
112
+#
113
+# Emitted when the netdev stream backend is disconnected
114
+#
115
+# @netdev-id: QEMU netdev id that is disconnected
116
+#
117
+# Since: 7.2
118
+#
119
+# Example:
120
+#
121
+# <- { 'event': 'NETDEV_STREAM_DISCONNECTED',
122
+# 'data': {'netdev-id': 'netdev0'},
123
+# 'timestamp': {'seconds': 1663330937, 'microseconds': 526695} }
124
+#
125
+##
126
+{ 'event': 'NETDEV_STREAM_DISCONNECTED',
127
+ 'data': { 'netdev-id': 'str' } }
128
--
129
2.7.4
diff view generated by jsdifflib