... | ... | ||
---|---|---|---|
16 | This means we now have 2 virtio-vsock backends that both have the pushback | 16 | This means we now have 2 virtio-vsock backends that both have the pushback |
17 | logic. If the parent's TX queue runs full at the same time as the | 17 | logic. If the parent's TX queue runs full at the same time as the |
18 | Enclave's, both virtio-vsock drivers fall into the pushback path and | 18 | Enclave's, both virtio-vsock drivers fall into the pushback path and |
19 | no longer accept RX traffic. However, that RX traffic is TX traffic on | 19 | no longer accept RX traffic. However, that RX traffic is TX traffic on |
20 | the other side which blocks that driver from making any forward | 20 | the other side which blocks that driver from making any forward |
21 | progress. We're not in a deadlock. | 21 | progress. We're now in a deadlock. |
22 | 22 | ||
23 | To resolve this, let's remove that pushback logic altogether and rely on | 23 | To resolve this, let's remove that pushback logic altogether and rely on |
24 | higher levels (like credits) to ensure we do not consume unbounded | 24 | higher levels (like credits) to ensure we do not consume unbounded |
25 | memory. | 25 | memory. |
26 | 26 | ||
27 | RX and TX queues share the same work queue. To prevent starvation of TX | ||
28 | by an RX flood and vice versa now that the pushback logic is gone, let's | ||
29 | deliberately reschedule RX and TX work after a fixed threshold (256) of | ||
30 | packets to process. | ||
31 | |||
27 | Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko") | 32 | Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko") |
28 | Signed-off-by: Alexander Graf <graf@amazon.com> | 33 | Signed-off-by: Alexander Graf <graf@amazon.com> |
29 | --- | 34 | --- |
30 | net/vmw_vsock/virtio_transport.c | 51 ++------------------------------ | 35 | net/vmw_vsock/virtio_transport.c | 70 +++++++++----------------------- |
31 | 1 file changed, 2 insertions(+), 49 deletions(-) | 36 | 1 file changed, 19 insertions(+), 51 deletions(-) |
32 | 37 | ||
33 | diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c | 38 | diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c |
34 | index XXXXXXX..XXXXXXX 100644 | 39 | index XXXXXXX..XXXXXXX 100644 |
35 | --- a/net/vmw_vsock/virtio_transport.c | 40 | --- a/net/vmw_vsock/virtio_transport.c |
36 | +++ b/net/vmw_vsock/virtio_transport.c | 41 | +++ b/net/vmw_vsock/virtio_transport.c |
42 | @@ -XXX,XX +XXX,XX @@ static struct virtio_vsock __rcu *the_virtio_vsock; | ||
43 | static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ | ||
44 | static struct virtio_transport virtio_transport; /* forward declaration */ | ||
45 | |||
46 | +/* | ||
47 | + * Max number of RX packets transferred before requeueing so we do | ||
48 | + * not starve TX traffic because they share the same work queue. | ||
49 | + */ | ||
50 | +#define VSOCK_MAX_PKTS_PER_WORK 256 | ||
51 | + | ||
52 | struct virtio_vsock { | ||
53 | struct virtio_device *vdev; | ||
54 | struct virtqueue *vqs[VSOCK_VQ_MAX]; | ||
37 | @@ -XXX,XX +XXX,XX @@ struct virtio_vsock { | 55 | @@ -XXX,XX +XXX,XX @@ struct virtio_vsock { |
38 | struct work_struct send_pkt_work; | 56 | struct work_struct send_pkt_work; |
39 | struct sk_buff_head send_pkt_queue; | 57 | struct sk_buff_head send_pkt_queue; |
40 | 58 | ||
41 | - atomic_t queued_replies; | 59 | - atomic_t queued_replies; |
42 | - | 60 | - |
43 | /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] | 61 | /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] |
44 | * must be accessed with rx_lock held. | 62 | * must be accessed with rx_lock held. |
45 | */ | 63 | */ |
46 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt_work(struct work_struct *work) | 64 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt_work(struct work_struct *work) |
47 | 65 | container_of(work, struct virtio_vsock, send_pkt_work); | |
48 | virtio_transport_deliver_tap_pkt(skb); | 66 | struct virtqueue *vq; |
67 | bool added = false; | ||
68 | - bool restart_rx = false; | ||
69 | + int pkts = 0; | ||
70 | |||
71 | mutex_lock(&vsock->tx_lock); | ||
72 | |||
73 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt_work(struct work_struct *work) | ||
74 | bool reply; | ||
75 | int ret; | ||
76 | |||
77 | + if (++pkts > VSOCK_MAX_PKTS_PER_WORK) { | ||
78 | + /* Allow other works on the same queue to run */ | ||
79 | + queue_work(virtio_vsock_workqueue, work); | ||
80 | + break; | ||
81 | + } | ||
82 | + | ||
83 | skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); | ||
84 | if (!skb) | ||
85 | break; | ||
86 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt_work(struct work_struct *work) | ||
87 | break; | ||
88 | } | ||
49 | 89 | ||
50 | - if (reply) { | 90 | - if (reply) { |
51 | - struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; | 91 | - struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; |
52 | - int val; | 92 | - int val; |
53 | - | 93 | - |
... | ... | ||
59 | - } | 99 | - } |
60 | - | 100 | - |
61 | added = true; | 101 | added = true; |
62 | } | 102 | } |
63 | 103 | ||
104 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt_work(struct work_struct *work) | ||
105 | |||
106 | out: | ||
107 | mutex_unlock(&vsock->tx_lock); | ||
108 | - | ||
109 | - if (restart_rx) | ||
110 | - queue_work(virtio_vsock_workqueue, &vsock->rx_work); | ||
111 | } | ||
112 | |||
113 | /* Caller need to hold RCU for vsock. | ||
64 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt(struct sk_buff *skb) | 114 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt(struct sk_buff *skb) |
65 | goto out_rcu; | 115 | */ |
116 | if (!skb_queue_empty_lockless(&vsock->send_pkt_queue) || | ||
117 | virtio_transport_send_skb_fast_path(vsock, skb)) { | ||
118 | - if (virtio_vsock_skb_reply(skb)) | ||
119 | - atomic_inc(&vsock->queued_replies); | ||
120 | - | ||
121 | virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); | ||
122 | queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); | ||
66 | } | 123 | } |
67 | |||
68 | - if (virtio_vsock_skb_reply(skb)) | ||
69 | - atomic_inc(&vsock->queued_replies); | ||
70 | - | ||
71 | virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); | ||
72 | queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); | ||
73 | |||
74 | @@ -XXX,XX +XXX,XX @@ static int | 124 | @@ -XXX,XX +XXX,XX @@ static int |
75 | virtio_transport_cancel_pkt(struct vsock_sock *vsk) | 125 | virtio_transport_cancel_pkt(struct vsock_sock *vsk) |
76 | { | 126 | { |
77 | struct virtio_vsock *vsock; | 127 | struct virtio_vsock *vsock; |
78 | - int cnt = 0, ret; | 128 | - int cnt = 0, ret; |
... | ... | ||
117 | - | 167 | - |
118 | /* event_lock must be held */ | 168 | /* event_lock must be held */ |
119 | static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, | 169 | static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, |
120 | struct virtio_vsock_event *event) | 170 | struct virtio_vsock_event *event) |
121 | @@ -XXX,XX +XXX,XX @@ static void virtio_transport_rx_work(struct work_struct *work) | 171 | @@ -XXX,XX +XXX,XX @@ static void virtio_transport_rx_work(struct work_struct *work) |
172 | struct virtio_vsock *vsock = | ||
173 | container_of(work, struct virtio_vsock, rx_work); | ||
174 | struct virtqueue *vq; | ||
175 | + int pkts = 0; | ||
176 | |||
177 | vq = vsock->vqs[VSOCK_VQ_RX]; | ||
178 | |||
179 | @@ -XXX,XX +XXX,XX @@ static void virtio_transport_rx_work(struct work_struct *work) | ||
122 | struct sk_buff *skb; | 180 | struct sk_buff *skb; |
123 | unsigned int len; | 181 | unsigned int len; |
124 | 182 | ||
125 | - if (!virtio_transport_more_replies(vsock)) { | 183 | - if (!virtio_transport_more_replies(vsock)) { |
126 | - /* Stop rx until the device processes already | 184 | - /* Stop rx until the device processes already |
127 | - * pending replies. Leave rx virtqueue | 185 | - * pending replies. Leave rx virtqueue |
128 | - * callbacks disabled. | 186 | - * callbacks disabled. |
129 | - */ | 187 | - */ |
130 | - goto out; | 188 | + if (++pkts > VSOCK_MAX_PKTS_PER_WORK) { |
131 | - } | 189 | + /* Allow other works on the same queue to run */ |
132 | - | 190 | + queue_work(virtio_vsock_workqueue, work); |
133 | skb = virtqueue_get_buf(vq, &len); | 191 | goto out; |
134 | if (!skb) | 192 | } |
135 | break; | 193 | |
136 | @@ -XXX,XX +XXX,XX @@ static int virtio_vsock_probe(struct virtio_device *vdev) | 194 | @@ -XXX,XX +XXX,XX @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) |
137 | |||
138 | vsock->rx_buf_nr = 0; | ||
139 | vsock->rx_buf_max_nr = 0; | 195 | vsock->rx_buf_max_nr = 0; |
196 | mutex_unlock(&vsock->rx_lock); | ||
197 | |||
140 | - atomic_set(&vsock->queued_replies, 0); | 198 | - atomic_set(&vsock->queued_replies, 0); |
141 | 199 | - | |
142 | mutex_init(&vsock->tx_lock); | 200 | ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL); |
143 | mutex_init(&vsock->rx_lock); | 201 | if (ret < 0) |
202 | return ret; | ||
144 | -- | 203 | -- |
145 | 2.40.1 | 204 | 2.47.1 |
146 | |||
147 | |||
148 | |||
149 | |||
150 | Amazon Web Services Development Center Germany GmbH | ||
151 | Krausenstr. 38 | ||
152 | 10117 Berlin | ||
153 | Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss | ||
154 | Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B | ||
155 | Sitz: Berlin | ||
156 | Ust-ID: DE 365 538 597 | diff view generated by jsdifflib |