... | ... | ||
---|---|---|---|
16 | This means we now have 2 virtio-vsock backends that both have the pushback | 16 | This means we now have 2 virtio-vsock backends that both have the pushback |
17 | logic. If the parent's TX queue runs full at the same time as the | 17 | logic. If the parent's TX queue runs full at the same time as the |
18 | Enclave's, both virtio-vsock drivers fall into the pushback path and | 18 | Enclave's, both virtio-vsock drivers fall into the pushback path and |
19 | no longer accept RX traffic. However, that RX traffic is TX traffic on | 19 | no longer accept RX traffic. However, that RX traffic is TX traffic on |
20 | the other side which blocks that driver from making any forward | 20 | the other side which blocks that driver from making any forward |
21 | progress. We're not in a deadlock. | 21 | progress. We're now in a deadlock. |
22 | 22 | ||
23 | To resolve this, let's remove that pushback logic altogether and rely on | 23 | To resolve this, let's remove that pushback logic altogether and rely on |
24 | higher levels (like credits) to ensure we do not consume unbounded | 24 | higher levels (like credits) to ensure we do not consume unbounded |
25 | memory. | 25 | memory. |
26 | 26 | ||
27 | RX and TX queues share the same work queue. To prevent starvation of TX | ||
28 | by an RX flood and vice versa now that the pushback logic is gone, let's | ||
29 | deliberately reschedule RX and TX work after a fixed threshold (256) of | ||
30 | packets to process. | ||
31 | |||
27 | Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko") | 32 | Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko") |
28 | Signed-off-by: Alexander Graf <graf@amazon.com> | 33 | Signed-off-by: Alexander Graf <graf@amazon.com> |
34 | |||
29 | --- | 35 | --- |
30 | net/vmw_vsock/virtio_transport.c | 51 ++------------------------------ | 36 | |
31 | 1 file changed, 2 insertions(+), 49 deletions(-) | 37 | v1 -> v2: |
38 | |||
39 | - Rework to use fixed threshold | ||
40 | |||
41 | v2 -> v3: | ||
42 | |||
43 | - Remove superfluous reply variable | ||
44 | --- | ||
45 | net/vmw_vsock/virtio_transport.c | 73 +++++++++----------------------- | ||
46 | 1 file changed, 19 insertions(+), 54 deletions(-) | ||
32 | 47 | ||
33 | diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c | 48 | diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c |
34 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
35 | --- a/net/vmw_vsock/virtio_transport.c | 50 | --- a/net/vmw_vsock/virtio_transport.c |
36 | +++ b/net/vmw_vsock/virtio_transport.c | 51 | +++ b/net/vmw_vsock/virtio_transport.c |
52 | @@ -XXX,XX +XXX,XX @@ static struct virtio_vsock __rcu *the_virtio_vsock; | ||
53 | static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ | ||
54 | static struct virtio_transport virtio_transport; /* forward declaration */ | ||
55 | |||
56 | +/* | ||
57 | + * Max number of RX packets transferred before requeueing so we do | ||
58 | + * not starve TX traffic because they share the same work queue. | ||
59 | + */ | ||
60 | +#define VSOCK_MAX_PKTS_PER_WORK 256 | ||
61 | + | ||
62 | struct virtio_vsock { | ||
63 | struct virtio_device *vdev; | ||
64 | struct virtqueue *vqs[VSOCK_VQ_MAX]; | ||
37 | @@ -XXX,XX +XXX,XX @@ struct virtio_vsock { | 65 | @@ -XXX,XX +XXX,XX @@ struct virtio_vsock { |
38 | struct work_struct send_pkt_work; | 66 | struct work_struct send_pkt_work; |
39 | struct sk_buff_head send_pkt_queue; | 67 | struct sk_buff_head send_pkt_queue; |
40 | 68 | ||
41 | - atomic_t queued_replies; | 69 | - atomic_t queued_replies; |
42 | - | 70 | - |
43 | /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] | 71 | /* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX] |
44 | * must be accessed with rx_lock held. | 72 | * must be accessed with rx_lock held. |
45 | */ | 73 | */ |
46 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt_work(struct work_struct *work) | 74 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt_work(struct work_struct *work) |
47 | 75 | container_of(work, struct virtio_vsock, send_pkt_work); | |
48 | virtio_transport_deliver_tap_pkt(skb); | 76 | struct virtqueue *vq; |
77 | bool added = false; | ||
78 | - bool restart_rx = false; | ||
79 | + int pkts = 0; | ||
80 | |||
81 | mutex_lock(&vsock->tx_lock); | ||
82 | |||
83 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt_work(struct work_struct *work) | ||
84 | |||
85 | for (;;) { | ||
86 | struct sk_buff *skb; | ||
87 | - bool reply; | ||
88 | int ret; | ||
89 | |||
90 | + if (++pkts > VSOCK_MAX_PKTS_PER_WORK) { | ||
91 | + /* Allow other works on the same queue to run */ | ||
92 | + queue_work(virtio_vsock_workqueue, work); | ||
93 | + break; | ||
94 | + } | ||
95 | + | ||
96 | skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue); | ||
97 | if (!skb) | ||
98 | break; | ||
99 | |||
100 | - reply = virtio_vsock_skb_reply(skb); | ||
101 | - | ||
102 | ret = virtio_transport_send_skb(skb, vq, vsock, GFP_KERNEL); | ||
103 | if (ret < 0) { | ||
104 | virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb); | ||
105 | break; | ||
106 | } | ||
49 | 107 | ||
50 | - if (reply) { | 108 | - if (reply) { |
51 | - struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; | 109 | - struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; |
52 | - int val; | 110 | - int val; |
53 | - | 111 | - |
... | ... | ||
59 | - } | 117 | - } |
60 | - | 118 | - |
61 | added = true; | 119 | added = true; |
62 | } | 120 | } |
63 | 121 | ||
122 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt_work(struct work_struct *work) | ||
123 | |||
124 | out: | ||
125 | mutex_unlock(&vsock->tx_lock); | ||
126 | - | ||
127 | - if (restart_rx) | ||
128 | - queue_work(virtio_vsock_workqueue, &vsock->rx_work); | ||
129 | } | ||
130 | |||
131 | /* Caller need to hold RCU for vsock. | ||
64 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt(struct sk_buff *skb) | 132 | @@ -XXX,XX +XXX,XX @@ virtio_transport_send_pkt(struct sk_buff *skb) |
65 | goto out_rcu; | 133 | */ |
134 | if (!skb_queue_empty_lockless(&vsock->send_pkt_queue) || | ||
135 | virtio_transport_send_skb_fast_path(vsock, skb)) { | ||
136 | - if (virtio_vsock_skb_reply(skb)) | ||
137 | - atomic_inc(&vsock->queued_replies); | ||
138 | - | ||
139 | virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); | ||
140 | queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); | ||
66 | } | 141 | } |
67 | |||
68 | - if (virtio_vsock_skb_reply(skb)) | ||
69 | - atomic_inc(&vsock->queued_replies); | ||
70 | - | ||
71 | virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb); | ||
72 | queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work); | ||
73 | |||
74 | @@ -XXX,XX +XXX,XX @@ static int | 142 | @@ -XXX,XX +XXX,XX @@ static int |
75 | virtio_transport_cancel_pkt(struct vsock_sock *vsk) | 143 | virtio_transport_cancel_pkt(struct vsock_sock *vsk) |
76 | { | 144 | { |
77 | struct virtio_vsock *vsock; | 145 | struct virtio_vsock *vsock; |
78 | - int cnt = 0, ret; | 146 | - int cnt = 0, ret; |
... | ... | ||
117 | - | 185 | - |
118 | /* event_lock must be held */ | 186 | /* event_lock must be held */ |
119 | static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, | 187 | static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, |
120 | struct virtio_vsock_event *event) | 188 | struct virtio_vsock_event *event) |
121 | @@ -XXX,XX +XXX,XX @@ static void virtio_transport_rx_work(struct work_struct *work) | 189 | @@ -XXX,XX +XXX,XX @@ static void virtio_transport_rx_work(struct work_struct *work) |
190 | struct virtio_vsock *vsock = | ||
191 | container_of(work, struct virtio_vsock, rx_work); | ||
192 | struct virtqueue *vq; | ||
193 | + int pkts = 0; | ||
194 | |||
195 | vq = vsock->vqs[VSOCK_VQ_RX]; | ||
196 | |||
197 | @@ -XXX,XX +XXX,XX @@ static void virtio_transport_rx_work(struct work_struct *work) | ||
122 | struct sk_buff *skb; | 198 | struct sk_buff *skb; |
123 | unsigned int len; | 199 | unsigned int len; |
124 | 200 | ||
125 | - if (!virtio_transport_more_replies(vsock)) { | 201 | - if (!virtio_transport_more_replies(vsock)) { |
126 | - /* Stop rx until the device processes already | 202 | - /* Stop rx until the device processes already |
127 | - * pending replies. Leave rx virtqueue | 203 | - * pending replies. Leave rx virtqueue |
128 | - * callbacks disabled. | 204 | - * callbacks disabled. |
129 | - */ | 205 | - */ |
130 | - goto out; | 206 | + if (++pkts > VSOCK_MAX_PKTS_PER_WORK) { |
131 | - } | 207 | + /* Allow other works on the same queue to run */ |
132 | - | 208 | + queue_work(virtio_vsock_workqueue, work); |
133 | skb = virtqueue_get_buf(vq, &len); | 209 | goto out; |
134 | if (!skb) | 210 | } |
135 | break; | 211 | |
136 | @@ -XXX,XX +XXX,XX @@ static int virtio_vsock_probe(struct virtio_device *vdev) | 212 | @@ -XXX,XX +XXX,XX @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) |
137 | |||
138 | vsock->rx_buf_nr = 0; | ||
139 | vsock->rx_buf_max_nr = 0; | 213 | vsock->rx_buf_max_nr = 0; |
214 | mutex_unlock(&vsock->rx_lock); | ||
215 | |||
140 | - atomic_set(&vsock->queued_replies, 0); | 216 | - atomic_set(&vsock->queued_replies, 0); |
141 | 217 | - | |
142 | mutex_init(&vsock->tx_lock); | 218 | ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL); |
143 | mutex_init(&vsock->rx_lock); | 219 | if (ret < 0) |
220 | return ret; | ||
144 | -- | 221 | -- |
145 | 2.40.1 | 222 | 2.47.1 |
146 | |||
147 | |||
148 | |||
149 | |||
150 | Amazon Web Services Development Center Germany GmbH | ||
151 | Krausenstr. 38 | ||
152 | 10117 Berlin | ||
153 | Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss | ||
154 | Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B | ||
155 | Sitz: Berlin | ||
156 | Ust-ID: DE 365 538 597 | diff view generated by jsdifflib |