drivers/net/virtio_net.c | 60 ++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 9 deletions(-)
When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
napi_disable() on the receive queue's napi. In delayed refill_work, it
also calls napi_disable() on the receive queue's napi. This can leads to
deadlock when napi_disable() is called on an already disabled napi. This
scenario can be reproducible by binding a XDP socket to virtio-net
interface without setting up the fill ring. As a result, try_fill_recv
will fail until the fill ring is set up and refill_work is scheduled.
This commit adds virtnet_rx_(pause/resume)_all helpers and fixes up the
virtnet_rx_resume to disable future and cancel all inflights delayed
refill_work before calling napi_disable() to pause the rx.
Fixes: 6a4763e26803 ("virtio_net: support rx queue resize")
Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set")
Fixes: 09d2b3182c8e ("virtio_net: xsk: bind/unbind xsk for rx")
Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
---
drivers/net/virtio_net.c | 60 ++++++++++++++++++++++++++++++++++------
1 file changed, 51 insertions(+), 9 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7e4617216a4b..4361b91ccc64 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3342,10 +3342,53 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
+static void virtnet_rx_pause_all(struct virtnet_info *vi)
+{
+ bool running = netif_running(vi->dev);
+
+ /*
+ * Make sure refill_work does not run concurrently to
+ * avoid napi_disable race which leads to deadlock.
+ */
+ disable_delayed_refill(vi);
+ cancel_delayed_work_sync(&vi->refill);
+ if (running) {
+ int i;
+
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ virtnet_napi_disable(&vi->rq[i]);
+ virtnet_cancel_dim(vi, &vi->rq[i].dim);
+ }
+ }
+}
+
+static void virtnet_rx_resume_all(struct virtnet_info *vi)
+{
+ bool running = netif_running(vi->dev);
+ int i;
+
+ enable_delayed_refill(vi);
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ if (i < vi->curr_queue_pairs) {
+ if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
+ schedule_delayed_work(&vi->refill, 0);
+ }
+
+ if (running)
+ virtnet_napi_enable(&vi->rq[i]);
+ }
+}
+
static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
{
bool running = netif_running(vi->dev);
+ /*
+ * Make sure refill_work does not run concurrently to
+ * avoid napi_disable race which leads to deadlock.
+ */
+ disable_delayed_refill(vi);
+ cancel_delayed_work_sync(&vi->refill);
if (running) {
virtnet_napi_disable(rq);
virtnet_cancel_dim(vi, &rq->dim);
@@ -3356,6 +3399,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
{
bool running = netif_running(vi->dev);
+ enable_delayed_refill(vi);
if (!try_fill_recv(vi, rq, GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
@@ -5959,12 +6003,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
if (prog)
bpf_prog_add(prog, vi->max_queue_pairs - 1);
+ virtnet_rx_pause_all(vi);
+
/* Make sure NAPI is not using any XDP TX queues for RX. */
if (netif_running(dev)) {
- for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_disable(&vi->rq[i]);
+ for (i = 0; i < vi->max_queue_pairs; i++)
virtnet_napi_tx_disable(&vi->sq[i]);
- }
}
if (!prog) {
@@ -5996,13 +6040,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
vi->xdp_enabled = false;
}
+ virtnet_rx_resume_all(vi);
for (i = 0; i < vi->max_queue_pairs; i++) {
if (old_prog)
bpf_prog_put(old_prog);
- if (netif_running(dev)) {
- virtnet_napi_enable(&vi->rq[i]);
+ if (netif_running(dev))
virtnet_napi_tx_enable(&vi->sq[i]);
- }
}
return 0;
@@ -6014,11 +6057,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
}
+ virtnet_rx_resume_all(vi);
if (netif_running(dev)) {
- for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_enable(&vi->rq[i]);
+ for (i = 0; i < vi->max_queue_pairs; i++)
virtnet_napi_tx_enable(&vi->sq[i]);
- }
}
if (prog)
bpf_prog_sub(prog, vi->max_queue_pairs - 1);
--
2.43.0
On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
> napi_disable() on the receive queue's napi. In delayed refill_work, it
> also calls napi_disable() on the receive queue's napi. This can leads to
> deadlock when napi_disable() is called on an already disabled napi. This
> scenario can be reproducible by binding a XDP socket to virtio-net
> interface without setting up the fill ring. As a result, try_fill_recv
> will fail until the fill ring is set up and refill_work is scheduled.
So, what is the problem? The refill_work is waiting? As I know, that thread
will sleep some time, so the cpu can do other work.
Thanks.
>
> This commit adds virtnet_rx_(pause/resume)_all helpers and fixes up the
> virtnet_rx_resume to disable future and cancel all inflights delayed
> refill_work before calling napi_disable() to pause the rx.
>
> Fixes: 6a4763e26803 ("virtio_net: support rx queue resize")
> Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set")
> Fixes: 09d2b3182c8e ("virtio_net: xsk: bind/unbind xsk for rx")
> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> ---
> drivers/net/virtio_net.c | 60 ++++++++++++++++++++++++++++++++++------
> 1 file changed, 51 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7e4617216a4b..4361b91ccc64 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -3342,10 +3342,53 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> return NETDEV_TX_OK;
> }
>
> +static void virtnet_rx_pause_all(struct virtnet_info *vi)
> +{
> + bool running = netif_running(vi->dev);
> +
> + /*
> + * Make sure refill_work does not run concurrently to
> + * avoid napi_disable race which leads to deadlock.
> + */
> + disable_delayed_refill(vi);
> + cancel_delayed_work_sync(&vi->refill);
> + if (running) {
> + int i;
> +
> + for (i = 0; i < vi->max_queue_pairs; i++) {
> + virtnet_napi_disable(&vi->rq[i]);
> + virtnet_cancel_dim(vi, &vi->rq[i].dim);
> + }
> + }
> +}
> +
> +static void virtnet_rx_resume_all(struct virtnet_info *vi)
> +{
> + bool running = netif_running(vi->dev);
> + int i;
> +
> + enable_delayed_refill(vi);
> + for (i = 0; i < vi->max_queue_pairs; i++) {
> + if (i < vi->curr_queue_pairs) {
> + if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
> + schedule_delayed_work(&vi->refill, 0);
> + }
> +
> + if (running)
> + virtnet_napi_enable(&vi->rq[i]);
> + }
> +}
> +
> static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
> {
> bool running = netif_running(vi->dev);
>
> + /*
> + * Make sure refill_work does not run concurrently to
> + * avoid napi_disable race which leads to deadlock.
> + */
> + disable_delayed_refill(vi);
> + cancel_delayed_work_sync(&vi->refill);
> if (running) {
> virtnet_napi_disable(rq);
> virtnet_cancel_dim(vi, &rq->dim);
> @@ -3356,6 +3399,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
> {
> bool running = netif_running(vi->dev);
>
> + enable_delayed_refill(vi);
> if (!try_fill_recv(vi, rq, GFP_KERNEL))
> schedule_delayed_work(&vi->refill, 0);
>
> @@ -5959,12 +6003,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> if (prog)
> bpf_prog_add(prog, vi->max_queue_pairs - 1);
>
> + virtnet_rx_pause_all(vi);
> +
> /* Make sure NAPI is not using any XDP TX queues for RX. */
> if (netif_running(dev)) {
> - for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_napi_disable(&vi->rq[i]);
> + for (i = 0; i < vi->max_queue_pairs; i++)
> virtnet_napi_tx_disable(&vi->sq[i]);
> - }
> }
>
> if (!prog) {
> @@ -5996,13 +6040,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> vi->xdp_enabled = false;
> }
>
> + virtnet_rx_resume_all(vi);
> for (i = 0; i < vi->max_queue_pairs; i++) {
> if (old_prog)
> bpf_prog_put(old_prog);
> - if (netif_running(dev)) {
> - virtnet_napi_enable(&vi->rq[i]);
> + if (netif_running(dev))
> virtnet_napi_tx_enable(&vi->sq[i]);
> - }
> }
>
> return 0;
> @@ -6014,11 +6057,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
> rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog);
> }
>
> + virtnet_rx_resume_all(vi);
> if (netif_running(dev)) {
> - for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_napi_enable(&vi->rq[i]);
> + for (i = 0; i < vi->max_queue_pairs; i++)
> virtnet_napi_tx_enable(&vi->sq[i]);
> - }
> }
> if (prog)
> bpf_prog_sub(prog, vi->max_queue_pairs - 1);
> --
> 2.43.0
>
On 4/7/25 08:03, Xuan Zhuo wrote: > On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh <minhquangbui99@gmail.com> wrote: >> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call >> napi_disable() on the receive queue's napi. In delayed refill_work, it >> also calls napi_disable() on the receive queue's napi. This can leads to >> deadlock when napi_disable() is called on an already disabled napi. This >> scenario can be reproducible by binding a XDP socket to virtio-net >> interface without setting up the fill ring. As a result, try_fill_recv >> will fail until the fill ring is set up and refill_work is scheduled. > > So, what is the problem? The refill_work is waiting? As I know, that thread > will sleep some time, so the cpu can do other work. When napi_disable is called on an already disabled napi, it will sleep in napi_disable_locked while still holding the netdev_lock. As a result, later napi_enable gets stuck too as it cannot acquire the netdev_lock. This leads to refill_work and the pause-then-resume tx are stuck altogether. Thanks, Quang Minh.
On Mon, Apr 7, 2025 at 10:27 AM Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>
> On 4/7/25 08:03, Xuan Zhuo wrote:
> > On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh <minhquangbui99@gmail.com> wrote:
> >> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
> >> napi_disable() on the receive queue's napi. In delayed refill_work, it
> >> also calls napi_disable() on the receive queue's napi. This can leads to
> >> deadlock when napi_disable() is called on an already disabled napi. This
> >> scenario can be reproducible by binding a XDP socket to virtio-net
> >> interface without setting up the fill ring. As a result, try_fill_recv
> >> will fail until the fill ring is set up and refill_work is scheduled.
> >
> > So, what is the problem? The refill_work is waiting? As I know, that thread
> > will sleep some time, so the cpu can do other work.
>
> When napi_disable is called on an already disabled napi, it will sleep
> in napi_disable_locked while still holding the netdev_lock. As a result,
> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
> This leads to refill_work and the pause-then-resume tx are stuck altogether.
This needs to be added to the chagelog. And it looks like this is a fix for
commit 413f0271f3966e0c73d4937963f19335af19e628
Author: Jakub Kicinski <kuba@kernel.org>
Date: Tue Jan 14 19:53:14 2025 -0800
net: protect NAPI enablement with netdev_lock()
?
I wonder if it's simpler to just hold the netdev lock in resize or xsk
binding instead of this.
Thanks
>
> Thanks,
> Quang Minh.
>
On 4/8/25 14:34, Jason Wang wrote: > On Mon, Apr 7, 2025 at 10:27 AM Bui Quang Minh <minhquangbui99@gmail.com> wrote: >> On 4/7/25 08:03, Xuan Zhuo wrote: >>> On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh <minhquangbui99@gmail.com> wrote: >>>> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call >>>> napi_disable() on the receive queue's napi. In delayed refill_work, it >>>> also calls napi_disable() on the receive queue's napi. This can leads to >>>> deadlock when napi_disable() is called on an already disabled napi. This >>>> scenario can be reproducible by binding a XDP socket to virtio-net >>>> interface without setting up the fill ring. As a result, try_fill_recv >>>> will fail until the fill ring is set up and refill_work is scheduled. >>> So, what is the problem? The refill_work is waiting? As I know, that thread >>> will sleep some time, so the cpu can do other work. >> When napi_disable is called on an already disabled napi, it will sleep >> in napi_disable_locked while still holding the netdev_lock. As a result, >> later napi_enable gets stuck too as it cannot acquire the netdev_lock. >> This leads to refill_work and the pause-then-resume tx are stuck altogether. > This needs to be added to the chagelog. And it looks like this is a fix for > > commit 413f0271f3966e0c73d4937963f19335af19e628 > Author: Jakub Kicinski <kuba@kernel.org> > Date: Tue Jan 14 19:53:14 2025 -0800 > > net: protect NAPI enablement with netdev_lock() > > ? I'm not aware of this, will update the fix tags in the next patch. > I wonder if it's simpler to just hold the netdev lock in resize or xsk > binding instead of this. That looks cleaner, let me try that approach. Thanks, Quang Minh
On 4/9/25 13:44, Bui Quang Minh wrote: > On 4/8/25 14:34, Jason Wang wrote: >> On Mon, Apr 7, 2025 at 10:27 AM Bui Quang Minh >> <minhquangbui99@gmail.com> wrote: >>> On 4/7/25 08:03, Xuan Zhuo wrote: >>>> On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh >>>> <minhquangbui99@gmail.com> wrote: >>>>> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call >>>>> napi_disable() on the receive queue's napi. In delayed >>>>> refill_work, it >>>>> also calls napi_disable() on the receive queue's napi. This can >>>>> leads to >>>>> deadlock when napi_disable() is called on an already disabled >>>>> napi. This >>>>> scenario can be reproducible by binding a XDP socket to virtio-net >>>>> interface without setting up the fill ring. As a result, >>>>> try_fill_recv >>>>> will fail until the fill ring is set up and refill_work is scheduled. >>>> So, what is the problem? The refill_work is waiting? As I know, >>>> that thread >>>> will sleep some time, so the cpu can do other work. >>> When napi_disable is called on an already disabled napi, it will sleep >>> in napi_disable_locked while still holding the netdev_lock. As a >>> result, >>> later napi_enable gets stuck too as it cannot acquire the netdev_lock. >>> This leads to refill_work and the pause-then-resume tx are stuck >>> altogether. >> This needs to be added to the chagelog. And it looks like this is a >> fix for >> >> commit 413f0271f3966e0c73d4937963f19335af19e628 >> Author: Jakub Kicinski <kuba@kernel.org> >> Date: Tue Jan 14 19:53:14 2025 -0800 >> >> net: protect NAPI enablement with netdev_lock() >> >> ? > > I'm not aware of this, will update the fix tags in the next patch. > >> I wonder if it's simpler to just hold the netdev lock in resize or xsk >> binding instead of this. > > That looks cleaner, let me try that approach. We need to hold the netdev_lock in the refill work too. Otherwise, we get this race refill_work xdp bind napi_disable() netdev_lock(dev) napi_disable_locked() <- hang napi_enable() <- cannot acquire the lock I don't like the idea to hold netdev_lock in refill_work. I will reply with the patch for you to review. Thanks, Quang Minh.
When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
napi_disable() on the receive queue's napi. In delayed refill_work, it
also calls napi_disable() on the receive queue's napi. When
napi_disable() is called on an already disabled napi, it will sleep in
napi_disable_locked while still holding the netdev_lock. As a result,
later napi_enable gets stuck too as it cannot acquire the netdev_lock.
This leads to refill_work and the pause-then-resume tx are stuck
altogether.
This scenario can be reproducible by binding a XDP socket to virtio-net
interface without setting up the fill ring. As a result, try_fill_recv
will fail until the fill ring is set up and refill_work is scheduled.
This commit makes the pausing rx path hold the netdev_lock until
resuming, prevent any napi_disable() to be called on a temporarily
disabled napi.
Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()")
Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
---
drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++---------------
1 file changed, 47 insertions(+), 27 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 7e4617216a4b..74bd1065c586 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq)
}
static void virtnet_napi_do_enable(struct virtqueue *vq,
- struct napi_struct *napi)
+ struct napi_struct *napi,
+ bool netdev_locked)
{
- napi_enable(napi);
+ if (netdev_locked)
+ napi_enable_locked(napi);
+ else
+ napi_enable(napi);
/* If all buffers were filled by other side before we napi_enabled, we
* won't get another interrupt, so process any outstanding packets
now.
@@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct
virtqueue *vq,
local_bh_enable();
}
-static void virtnet_napi_enable(struct receive_queue *rq)
+static void virtnet_napi_enable(struct receive_queue *rq, bool
netdev_locked)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
int qidx = vq2rxq(rq->vq);
- virtnet_napi_do_enable(rq->vq, &rq->napi);
+ virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked);
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi);
}
-static void virtnet_napi_tx_enable(struct send_queue *sq)
+static void virtnet_napi_tx_enable(struct send_queue *sq, bool
netdev_locked)
{
struct virtnet_info *vi = sq->vq->vdev->priv;
struct napi_struct *napi = &sq->napi;
@@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct
send_queue *sq)
return;
}
- virtnet_napi_do_enable(sq->vq, napi);
+ virtnet_napi_do_enable(sq->vq, napi, netdev_locked);
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi);
}
-static void virtnet_napi_tx_disable(struct send_queue *sq)
+static void virtnet_napi_tx_disable(struct send_queue *sq, bool
netdev_locked)
{
struct virtnet_info *vi = sq->vq->vdev->priv;
struct napi_struct *napi = &sq->napi;
@@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct
send_queue *sq)
if (napi->weight) {
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL);
- napi_disable(napi);
+ if (netdev_locked)
+ napi_disable_locked(napi);
+ else
+ napi_disable(napi);
}
}
-static void virtnet_napi_disable(struct receive_queue *rq)
+static void virtnet_napi_disable(struct receive_queue *rq, bool
netdev_locked)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
struct napi_struct *napi = &rq->napi;
int qidx = vq2rxq(rq->vq);
netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL);
- napi_disable(napi);
+ if (netdev_locked)
+ napi_disable_locked(napi);
+ else
+ napi_disable(napi);
}
static void refill_work(struct work_struct *work)
@@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work)
* instance lock)
* - check netif_running() and return early to avoid a race
*/
- napi_disable(&rq->napi);
+ netdev_lock(vi->dev);
+ napi_disable_locked(&rq->napi);
still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
- virtnet_napi_do_enable(rq->vq, &rq->napi);
+ virtnet_napi_do_enable(rq->vq, &rq->napi, true);
+ netdev_unlock(vi->dev);
/* In theory, this can happen: if we don't get any buffers in
* we will *never* try to fill again.
@@ -3074,8 +3086,8 @@ static int virtnet_poll(struct napi_struct *napi,
int budget)
static void virtnet_disable_queue_pair(struct virtnet_info *vi, int
qp_index)
{
- virtnet_napi_tx_disable(&vi->sq[qp_index]);
- virtnet_napi_disable(&vi->rq[qp_index]);
+ virtnet_napi_tx_disable(&vi->sq[qp_index], false);
+ virtnet_napi_disable(&vi->rq[qp_index], false);
xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
}
@@ -3094,8 +3106,8 @@ static int virtnet_enable_queue_pair(struct
virtnet_info *vi, int qp_index)
if (err < 0)
goto err_xdp_reg_mem_model;
- virtnet_napi_enable(&vi->rq[qp_index]);
- virtnet_napi_tx_enable(&vi->sq[qp_index]);
+ virtnet_napi_enable(&vi->rq[qp_index], false);
+ virtnet_napi_tx_enable(&vi->sq[qp_index], false);
return 0;
@@ -3347,7 +3359,8 @@ static void virtnet_rx_pause(struct virtnet_info
*vi, struct receive_queue *rq)
bool running = netif_running(vi->dev);
if (running) {
- virtnet_napi_disable(rq);
+ netdev_lock(vi->dev);
+ virtnet_napi_disable(rq, true);
virtnet_cancel_dim(vi, &rq->dim);
}
}
@@ -3359,8 +3372,10 @@ static void virtnet_rx_resume(struct virtnet_info
*vi, struct receive_queue *rq)
if (!try_fill_recv(vi, rq, GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
- if (running)
- virtnet_napi_enable(rq);
+ if (running) {
+ virtnet_napi_enable(rq, true);
+ netdev_unlock(vi->dev);
+ }
}
static int virtnet_rx_resize(struct virtnet_info *vi,
@@ -3389,7 +3404,7 @@ static void virtnet_tx_pause(struct virtnet_info
*vi, struct send_queue *sq)
qindex = sq - vi->sq;
if (running)
- virtnet_napi_tx_disable(sq);
+ virtnet_napi_tx_disable(sq, false);
txq = netdev_get_tx_queue(vi->dev, qindex);
@@ -3423,7 +3438,7 @@ static void virtnet_tx_resume(struct virtnet_info
*vi, struct send_queue *sq)
__netif_tx_unlock_bh(txq);
if (running)
- virtnet_napi_tx_enable(sq);
+ virtnet_napi_tx_enable(sq, false);
}
static int virtnet_tx_resize(struct virtnet_info *vi, struct
send_queue *sq,
@@ -5961,9 +5976,10 @@ static int virtnet_xdp_set(struct net_device
*dev, struct bpf_prog *prog,
/* Make sure NAPI is not using any XDP TX queues for RX. */
if (netif_running(dev)) {
+ netdev_lock(dev);
for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_disable(&vi->rq[i]);
- virtnet_napi_tx_disable(&vi->sq[i]);
+ virtnet_napi_disable(&vi->rq[i], true);
+ virtnet_napi_tx_disable(&vi->sq[i], true);
}
}
@@ -6000,11 +6016,14 @@ static int virtnet_xdp_set(struct net_device
*dev, struct bpf_prog *prog,
if (old_prog)
bpf_prog_put(old_prog);
if (netif_running(dev)) {
- virtnet_napi_enable(&vi->rq[i]);
- virtnet_napi_tx_enable(&vi->sq[i]);
+ virtnet_napi_enable(&vi->rq[i], true);
+ virtnet_napi_tx_enable(&vi->sq[i], true);
}
}
+ if (netif_running(dev))
+ netdev_unlock(dev);
+
return 0;
err:
@@ -6016,9 +6035,10 @@ static int virtnet_xdp_set(struct net_device
*dev, struct bpf_prog *prog,
if (netif_running(dev)) {
for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_enable(&vi->rq[i]);
- virtnet_napi_tx_enable(&vi->sq[i]);
+ virtnet_napi_enable(&vi->rq[i], true);
+ virtnet_napi_tx_enable(&vi->sq[i], true);
}
+ netdev_unlock(dev);
}
if (prog)
bpf_prog_sub(prog, vi->max_queue_pairs - 1);
--
2.43.0
On Thu, Apr 10, 2025 at 02:05:57PM +0700, Bui Quang Minh wrote:
> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
> napi_disable() on the receive queue's napi. In delayed refill_work, it
> also calls napi_disable() on the receive queue's napi. When
> napi_disable() is called on an already disabled napi, it will sleep in
> napi_disable_locked while still holding the netdev_lock. As a result,
> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
> This leads to refill_work and the pause-then-resume tx are stuck
> altogether.
>
> This scenario can be reproducible by binding a XDP socket to virtio-net
> interface without setting up the fill ring. As a result, try_fill_recv
> will fail until the fill ring is set up and refill_work is scheduled.
>
> This commit makes the pausing rx path hold the netdev_lock until
> resuming, prevent any napi_disable() to be called on a temporarily
> disabled napi.
>
> Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()")
> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
> ---
> drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++---------------
> 1 file changed, 47 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7e4617216a4b..74bd1065c586 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq)
> }
>
> static void virtnet_napi_do_enable(struct virtqueue *vq,
> - struct napi_struct *napi)
> + struct napi_struct *napi,
> + bool netdev_locked)
> {
> - napi_enable(napi);
> + if (netdev_locked)
> + napi_enable_locked(napi);
> + else
> + napi_enable(napi);
>
> /* If all buffers were filled by other side before we napi_enabled, we
> * won't get another interrupt, so process any outstanding packets now.
> @@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct virtqueue
> *vq,
Your patch is line-wrapped, unfortunately. Here and elsewhere.
> local_bh_enable();
> }
>
> -static void virtnet_napi_enable(struct receive_queue *rq)
> +static void virtnet_napi_enable(struct receive_queue *rq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = rq->vq->vdev->priv;
> int qidx = vq2rxq(rq->vq);
>
> - virtnet_napi_do_enable(rq->vq, &rq->napi);
> + virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked);
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi);
> }
>
> -static void virtnet_napi_tx_enable(struct send_queue *sq)
> +static void virtnet_napi_tx_enable(struct send_queue *sq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = sq->vq->vdev->priv;
> struct napi_struct *napi = &sq->napi;
> @@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct send_queue
> *sq)
> return;
> }
>
> - virtnet_napi_do_enable(sq->vq, napi);
> + virtnet_napi_do_enable(sq->vq, napi, netdev_locked);
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi);
> }
>
> -static void virtnet_napi_tx_disable(struct send_queue *sq)
> +static void virtnet_napi_tx_disable(struct send_queue *sq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = sq->vq->vdev->priv;
> struct napi_struct *napi = &sq->napi;
> @@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct
> send_queue *sq)
>
> if (napi->weight) {
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL);
> - napi_disable(napi);
> + if (netdev_locked)
> + napi_disable_locked(napi);
> + else
> + napi_disable(napi);
> }
> }
>
> -static void virtnet_napi_disable(struct receive_queue *rq)
> +static void virtnet_napi_disable(struct receive_queue *rq, bool
> netdev_locked)
> {
> struct virtnet_info *vi = rq->vq->vdev->priv;
> struct napi_struct *napi = &rq->napi;
> int qidx = vq2rxq(rq->vq);
>
> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL);
> - napi_disable(napi);
> + if (netdev_locked)
> + napi_disable_locked(napi);
> + else
> + napi_disable(napi);
> }
>
> static void refill_work(struct work_struct *work)
> @@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work)
> * instance lock)
> * - check netif_running() and return early to avoid a race
> */
> - napi_disable(&rq->napi);
> + netdev_lock(vi->dev);
> + napi_disable_locked(&rq->napi);
> still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
This does mean netdev_lock is held potentially for a long while,
while try_fill_recv and processing inside virtnet_napi_do_enable
finish. Better ideas?
> - virtnet_napi_do_enable(rq->vq, &rq->napi);
> + virtnet_napi_do_enable(rq->vq, &rq->napi, true);
> + netdev_unlock(vi->dev);
>
> /* In theory, this can happen: if we don't get any buffers in
> * we will *never* try to fill again.
> @@ -3074,8 +3086,8 @@ static int virtnet_poll(struct napi_struct *napi, int
> budget)
>
> static void virtnet_disable_queue_pair(struct virtnet_info *vi, int
> qp_index)
> {
> - virtnet_napi_tx_disable(&vi->sq[qp_index]);
> - virtnet_napi_disable(&vi->rq[qp_index]);
> + virtnet_napi_tx_disable(&vi->sq[qp_index], false);
> + virtnet_napi_disable(&vi->rq[qp_index], false);
> xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
> }
>
> @@ -3094,8 +3106,8 @@ static int virtnet_enable_queue_pair(struct
> virtnet_info *vi, int qp_index)
> if (err < 0)
> goto err_xdp_reg_mem_model;
>
> - virtnet_napi_enable(&vi->rq[qp_index]);
> - virtnet_napi_tx_enable(&vi->sq[qp_index]);
> + virtnet_napi_enable(&vi->rq[qp_index], false);
> + virtnet_napi_tx_enable(&vi->sq[qp_index], false);
>
> return 0;
>
> @@ -3347,7 +3359,8 @@ static void virtnet_rx_pause(struct virtnet_info *vi,
> struct receive_queue *rq)
> bool running = netif_running(vi->dev);
>
> if (running) {
> - virtnet_napi_disable(rq);
> + netdev_lock(vi->dev);
> + virtnet_napi_disable(rq, true);
> virtnet_cancel_dim(vi, &rq->dim);
> }
> }
> @@ -3359,8 +3372,10 @@ static void virtnet_rx_resume(struct virtnet_info
> *vi, struct receive_queue *rq)
> if (!try_fill_recv(vi, rq, GFP_KERNEL))
> schedule_delayed_work(&vi->refill, 0);
>
> - if (running)
> - virtnet_napi_enable(rq);
> + if (running) {
> + virtnet_napi_enable(rq, true);
> + netdev_unlock(vi->dev);
> + }
> }
>
> static int virtnet_rx_resize(struct virtnet_info *vi,
> @@ -3389,7 +3404,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi,
> struct send_queue *sq)
> qindex = sq - vi->sq;
>
> if (running)
> - virtnet_napi_tx_disable(sq);
> + virtnet_napi_tx_disable(sq, false);
>
> txq = netdev_get_tx_queue(vi->dev, qindex);
>
> @@ -3423,7 +3438,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi,
> struct send_queue *sq)
> __netif_tx_unlock_bh(txq);
>
> if (running)
> - virtnet_napi_tx_enable(sq);
> + virtnet_napi_tx_enable(sq, false);
> }
>
> static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue
> *sq,
> @@ -5961,9 +5976,10 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
>
> /* Make sure NAPI is not using any XDP TX queues for RX. */
> if (netif_running(dev)) {
> + netdev_lock(dev);
> for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_napi_disable(&vi->rq[i]);
> - virtnet_napi_tx_disable(&vi->sq[i]);
> + virtnet_napi_disable(&vi->rq[i], true);
> + virtnet_napi_tx_disable(&vi->sq[i], true);
> }
> }
>
> @@ -6000,11 +6016,14 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
> if (old_prog)
> bpf_prog_put(old_prog);
> if (netif_running(dev)) {
> - virtnet_napi_enable(&vi->rq[i]);
> - virtnet_napi_tx_enable(&vi->sq[i]);
> + virtnet_napi_enable(&vi->rq[i], true);
> + virtnet_napi_tx_enable(&vi->sq[i], true);
> }
> }
>
> + if (netif_running(dev))
> + netdev_unlock(dev);
> +
> return 0;
>
> err:
> @@ -6016,9 +6035,10 @@ static int virtnet_xdp_set(struct net_device *dev,
> struct bpf_prog *prog,
>
> if (netif_running(dev)) {
> for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_napi_enable(&vi->rq[i]);
> - virtnet_napi_tx_enable(&vi->sq[i]);
> + virtnet_napi_enable(&vi->rq[i], true);
> + virtnet_napi_tx_enable(&vi->sq[i], true);
> }
> + netdev_unlock(dev);
> }
> if (prog)
> bpf_prog_sub(prog, vi->max_queue_pairs - 1);
> --
> 2.43.0
>
On 4/10/25 14:58, Michael S. Tsirkin wrote:
> On Thu, Apr 10, 2025 at 02:05:57PM +0700, Bui Quang Minh wrote:
>> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call
>> napi_disable() on the receive queue's napi. In delayed refill_work, it
>> also calls napi_disable() on the receive queue's napi. When
>> napi_disable() is called on an already disabled napi, it will sleep in
>> napi_disable_locked while still holding the netdev_lock. As a result,
>> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
>> This leads to refill_work and the pause-then-resume tx are stuck
>> altogether.
>>
>> This scenario can be reproducible by binding a XDP socket to virtio-net
>> interface without setting up the fill ring. As a result, try_fill_recv
>> will fail until the fill ring is set up and refill_work is scheduled.
>>
>> This commit makes the pausing rx path hold the netdev_lock until
>> resuming, prevent any napi_disable() to be called on a temporarily
>> disabled napi.
>>
>> Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()")
>> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
>> ---
>> drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++---------------
>> 1 file changed, 47 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 7e4617216a4b..74bd1065c586 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq)
>> }
>>
>> static void virtnet_napi_do_enable(struct virtqueue *vq,
>> - struct napi_struct *napi)
>> + struct napi_struct *napi,
>> + bool netdev_locked)
>> {
>> - napi_enable(napi);
>> + if (netdev_locked)
>> + napi_enable_locked(napi);
>> + else
>> + napi_enable(napi);
>>
>> /* If all buffers were filled by other side before we napi_enabled, we
>> * won't get another interrupt, so process any outstanding packets now.
>> @@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct virtqueue
>> *vq,
>
>
>
> Your patch is line-wrapped, unfortunately. Here and elsewhere.
>
>
>
>
>> local_bh_enable();
>> }
>>
>> -static void virtnet_napi_enable(struct receive_queue *rq)
>> +static void virtnet_napi_enable(struct receive_queue *rq, bool
>> netdev_locked)
>> {
>> struct virtnet_info *vi = rq->vq->vdev->priv;
>> int qidx = vq2rxq(rq->vq);
>>
>> - virtnet_napi_do_enable(rq->vq, &rq->napi);
>> + virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked);
>> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi);
>> }
>>
>> -static void virtnet_napi_tx_enable(struct send_queue *sq)
>> +static void virtnet_napi_tx_enable(struct send_queue *sq, bool
>> netdev_locked)
>> {
>> struct virtnet_info *vi = sq->vq->vdev->priv;
>> struct napi_struct *napi = &sq->napi;
>> @@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct send_queue
>> *sq)
>> return;
>> }
>>
>> - virtnet_napi_do_enable(sq->vq, napi);
>> + virtnet_napi_do_enable(sq->vq, napi, netdev_locked);
>> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi);
>> }
>>
>> -static void virtnet_napi_tx_disable(struct send_queue *sq)
>> +static void virtnet_napi_tx_disable(struct send_queue *sq, bool
>> netdev_locked)
>> {
>> struct virtnet_info *vi = sq->vq->vdev->priv;
>> struct napi_struct *napi = &sq->napi;
>> @@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct
>> send_queue *sq)
>>
>> if (napi->weight) {
>> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL);
>> - napi_disable(napi);
>> + if (netdev_locked)
>> + napi_disable_locked(napi);
>> + else
>> + napi_disable(napi);
>> }
>> }
>>
>> -static void virtnet_napi_disable(struct receive_queue *rq)
>> +static void virtnet_napi_disable(struct receive_queue *rq, bool
>> netdev_locked)
>> {
>> struct virtnet_info *vi = rq->vq->vdev->priv;
>> struct napi_struct *napi = &rq->napi;
>> int qidx = vq2rxq(rq->vq);
>>
>> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL);
>> - napi_disable(napi);
>> + if (netdev_locked)
>> + napi_disable_locked(napi);
>> + else
>> + napi_disable(napi);
>> }
>>
>> static void refill_work(struct work_struct *work)
>> @@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work)
>> * instance lock)
>> * - check netif_running() and return early to avoid a race
>> */
>> - napi_disable(&rq->napi);
>> + netdev_lock(vi->dev);
>> + napi_disable_locked(&rq->napi);
>> still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
>
> This does mean netdev_lock is held potentially for a long while,
> while try_fill_recv and processing inside virtnet_napi_do_enable
> finish. Better ideas?
I prefer the first patch in this thread where we disable delayed refill
and cancel all inflight refill_work before pausing rx.
Thanks,
Quang Minh.
On 4/8/25 9:34 AM, Jason Wang wrote: > On Mon, Apr 7, 2025 at 10:27 AM Bui Quang Minh <minhquangbui99@gmail.com> wrote: >> On 4/7/25 08:03, Xuan Zhuo wrote: >>> On Fri, 4 Apr 2025 16:39:03 +0700, Bui Quang Minh <minhquangbui99@gmail.com> wrote: >>>> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call >>>> napi_disable() on the receive queue's napi. In delayed refill_work, it >>>> also calls napi_disable() on the receive queue's napi. This can leads to >>>> deadlock when napi_disable() is called on an already disabled napi. This >>>> scenario can be reproducible by binding a XDP socket to virtio-net >>>> interface without setting up the fill ring. As a result, try_fill_recv >>>> will fail until the fill ring is set up and refill_work is scheduled. >>> >>> So, what is the problem? The refill_work is waiting? As I know, that thread >>> will sleep some time, so the cpu can do other work. >> >> When napi_disable is called on an already disabled napi, it will sleep >> in napi_disable_locked while still holding the netdev_lock. As a result, >> later napi_enable gets stuck too as it cannot acquire the netdev_lock. >> This leads to refill_work and the pause-then-resume tx are stuck altogether. > > This needs to be added to the chagelog. And it looks like this is a fix for > > commit 413f0271f3966e0c73d4937963f19335af19e628 > Author: Jakub Kicinski <kuba@kernel.org> > Date: Tue Jan 14 19:53:14 2025 -0800 > > net: protect NAPI enablement with netdev_lock() > > ? > > I wonder if it's simpler to just hold the netdev lock in resize or xsk > binding instead of this. Setting: dev->request_ops_lock = true; in virtnet_probe() before calling register_netdevice() should achieve the above. Could you please have a try? Thanks, Paolo
On Tue, 8 Apr 2025 11:28:54 +0200 Paolo Abeni wrote: > >> When napi_disable is called on an already disabled napi, it will sleep > >> in napi_disable_locked while still holding the netdev_lock. As a result, > >> later napi_enable gets stuck too as it cannot acquire the netdev_lock. > >> This leads to refill_work and the pause-then-resume tx are stuck altogether. > > > > This needs to be added to the chagelog. And it looks like this is a fix for > > > > commit 413f0271f3966e0c73d4937963f19335af19e628 > > Author: Jakub Kicinski <kuba@kernel.org> > > Date: Tue Jan 14 19:53:14 2025 -0800 > > > > net: protect NAPI enablement with netdev_lock() > > > > ? > > > > I wonder if it's simpler to just hold the netdev lock in resize or xsk > > binding instead of this. > > Setting: > > dev->request_ops_lock = true; > > in virtnet_probe() before calling register_netdevice() should achieve > the above. Could you please have a try? Can we do that or do we need a more tailored fix? request_ops_lock only appeared in 6.15 and the bug AFAIU dates back to 6.14. We don't normally worry about given the stream of fixes - request_ops_lock is a bit risky. Jason's suggestion AFAIU is just to wrap the disable/enable pairs in the lock. We can try request_ops_lock in -next ? Bui Quang Minh, could you add a selftest for this problem? See tools/testing/selftests/drivers/net/virtio_net/ You can re-use / extend the XSK helper from tools/testing/selftests/drivers/net/xdp_helper.c ? (move it to tools/testing/selftests/net/lib for easier access)
On 4/8/25 21:37, Jakub Kicinski wrote:
> On Tue, 8 Apr 2025 11:28:54 +0200 Paolo Abeni wrote:
>>>> When napi_disable is called on an already disabled napi, it will sleep
>>>> in napi_disable_locked while still holding the netdev_lock. As a result,
>>>> later napi_enable gets stuck too as it cannot acquire the netdev_lock.
>>>> This leads to refill_work and the pause-then-resume tx are stuck altogether.
>>> This needs to be added to the chagelog. And it looks like this is a fix for
>>>
>>> commit 413f0271f3966e0c73d4937963f19335af19e628
>>> Author: Jakub Kicinski <kuba@kernel.org>
>>> Date: Tue Jan 14 19:53:14 2025 -0800
>>>
>>> net: protect NAPI enablement with netdev_lock()
>>>
>>> ?
>>>
>>> I wonder if it's simpler to just hold the netdev lock in resize or xsk
>>> binding instead of this.
>> Setting:
>>
>> dev->request_ops_lock = true;
>>
>> in virtnet_probe() before calling register_netdevice() should achieve
>> the above. Could you please have a try?
> Can we do that or do we need a more tailored fix? request_ops_lock only
> appeared in 6.15 and the bug AFAIU dates back to 6.14. We don't normally
> worry about given the stream of fixes - request_ops_lock is a bit risky.
> Jason's suggestion AFAIU is just to wrap the disable/enable pairs in
> the lock. We can try request_ops_lock in -next ?
>
> Bui Quang Minh, could you add a selftest for this problem?
> See tools/testing/selftests/drivers/net/virtio_net/
> You can re-use / extend the XSK helper from
> tools/testing/selftests/drivers/net/xdp_helper.c ?
> (move it to tools/testing/selftests/net/lib for easier access)
I've just tried the current selftests and found out that the queues.py
can trigger the deadlock.
Running this a few times (the probability is quite high) on the guest
with virtio-net interface
~ NETIF=enp0s2 ./ksft-net-drv/run_kselftest.sh -t drivers/net:queues.py
The test hangs.
I've got the hung task warning
[ 363.841549] #0: ffff88800015c758
((wq_completion)events){+.+.}-{0:0}, at: process_scheduled_works+0x1a9/0x3c9
[ 363.841560] #1: ffffc90000043e28
((work_completion)(&pool->work)){+.+.}-{0:0}, at:
process_scheduled_works+0x1c2/0x3c9
[ 363.841570] #2: ffffffff81f5a150 (rtnl_mutex){+.+.}-{4:4}, at:
rtnl_lock+0x1b/0x1d
[ 363.841579] #3: ffff8880035e49f0 (&dev->lock){+.+.}-{4:4}, at:
netdev_lock+0x12/0x14
[ 363.841587] 3 locks held by kworker/2:0/26:
[ 363.841589] #0: ffff88800015c758
((wq_completion)events){+.+.}-{0:0}, at: process_scheduled_works+0x1a9/0x3c9
[ 363.841598] #1: ffffc900000f7e28
((work_completion)(&(&vi->refill)->work)){+.+.}-{0:0}, at:
process_scheduled_works+0x1c2/0x3c9
[ 363.841608] #2: ffff8880035e49f0 (&dev->lock){+.+.}-{4:4}, at:
netdev_lock+0x12/0x14
~ cat /proc/7/stack
[<0>] netdev_lock+0x12/0x14
[<0>] napi_enable+0x1a/0x35
[<0>] virtnet_napi_do_enable+0x1a/0x40
[<0>] virtnet_napi_enable+0x32/0x4a
[<0>] virtnet_rx_resume+0x4f/0x56
[<0>] virtnet_rq_bind_xsk_pool+0xd8/0xfd
[<0>] virtnet_xdp+0x6d3/0x72d
[<0>] xp_disable_drv_zc+0x5a/0x63
[<0>] xp_clear_dev+0x1d/0x4a
[<0>] xp_release_deferred+0x24/0x75
[<0>] process_scheduled_works+0x23e/0x3c9
[<0>] worker_thread+0x13f/0x1ca
[<0>] kthread+0x1b2/0x1ba
[<0>] ret_from_fork+0x2b/0x40
[<0>] ret_from_fork_asm+0x11/0x20
~ cat /proc/26/stack
[<0>] napi_disable_locked+0x41/0xaf
[<0>] napi_disable+0x22/0x35
[<0>] refill_work+0x4b/0x8f
[<0>] process_scheduled_works+0x23e/0x3c9
[<0>] worker_thread+0x13f/0x1ca
[<0>] kthread+0x1b2/0x1ba
[<0>] ret_from_fork+0x2b/0x40
[<0>] ret_from_fork_asm+0x11/0x20
The deadlock was caused by a race between xsk unbind and refill_work.
Thanks,
Quang Minh.
© 2016 - 2026 Red Hat, Inc.