Use netif_queue_set_napi to map NAPIs to queue IDs so that the mapping
can be accessed by user apps, taking care to hold RTNL as needed.
$ ethtool -i ens4 | grep driver
driver: virtio_net
$ sudo ethtool -L ens4 combined 4
$ ./tools/net/ynl/pyynl/cli.py \
--spec Documentation/netlink/specs/netdev.yaml \
--dump queue-get --json='{"ifindex": 2}'
[{'id': 0, 'ifindex': 2, 'napi-id': 8289, 'type': 'rx'},
{'id': 1, 'ifindex': 2, 'napi-id': 8290, 'type': 'rx'},
{'id': 2, 'ifindex': 2, 'napi-id': 8291, 'type': 'rx'},
{'id': 3, 'ifindex': 2, 'napi-id': 8292, 'type': 'rx'},
{'id': 0, 'ifindex': 2, 'type': 'tx'},
{'id': 1, 'ifindex': 2, 'type': 'tx'},
{'id': 2, 'ifindex': 2, 'type': 'tx'},
{'id': 3, 'ifindex': 2, 'type': 'tx'}]
Note that virtio_net has TX-only NAPIs which do not have NAPI IDs, so
the lack of 'napi-id' in the above output is expected.
Signed-off-by: Joe Damato <jdamato@fastly.com>
---
drivers/net/virtio_net.c | 73 ++++++++++++++++++++++++++++------------
1 file changed, 52 insertions(+), 21 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e578885c1093..13bb4a563073 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2807,6 +2807,20 @@ static void skb_recv_done(struct virtqueue *rvq)
virtqueue_napi_schedule(&rq->napi, rvq);
}
+static void virtnet_queue_set_napi(struct net_device *dev,
+ struct napi_struct *napi,
+ enum netdev_queue_type q_type, int qidx,
+ bool need_rtnl)
+{
+ if (need_rtnl)
+ rtnl_lock();
+
+ netif_queue_set_napi(dev, qidx, q_type, napi);
+
+ if (need_rtnl)
+ rtnl_unlock();
+}
+
static void virtnet_napi_do_enable(struct virtqueue *vq,
struct napi_struct *napi)
{
@@ -2821,15 +2835,21 @@ static void virtnet_napi_do_enable(struct virtqueue *vq,
local_bh_enable();
}
-static void virtnet_napi_enable(struct receive_queue *rq)
+static void virtnet_napi_enable(struct receive_queue *rq, bool need_rtnl)
{
+ struct virtnet_info *vi = rq->vq->vdev->priv;
+ int qidx = vq2rxq(rq->vq);
+
virtnet_napi_do_enable(rq->vq, &rq->napi);
+ virtnet_queue_set_napi(vi->dev, &rq->napi,
+ NETDEV_QUEUE_TYPE_RX, qidx, need_rtnl);
}
-static void virtnet_napi_tx_enable(struct send_queue *sq)
+static void virtnet_napi_tx_enable(struct send_queue *sq, bool need_rtnl)
{
struct virtnet_info *vi = sq->vq->vdev->priv;
struct napi_struct *napi = &sq->napi;
+ int qidx = vq2txq(sq->vq);
if (!napi->weight)
return;
@@ -2843,20 +2863,31 @@ static void virtnet_napi_tx_enable(struct send_queue *sq)
}
virtnet_napi_do_enable(sq->vq, napi);
+ virtnet_queue_set_napi(vi->dev, napi, NETDEV_QUEUE_TYPE_TX, qidx,
+ need_rtnl);
}
-static void virtnet_napi_tx_disable(struct send_queue *sq)
+static void virtnet_napi_tx_disable(struct send_queue *sq, bool need_rtnl)
{
+ struct virtnet_info *vi = sq->vq->vdev->priv;
struct napi_struct *napi = &sq->napi;
+ int qidx = vq2txq(sq->vq);
- if (napi->weight)
+ if (napi->weight) {
+ virtnet_queue_set_napi(vi->dev, NULL, NETDEV_QUEUE_TYPE_TX,
+ qidx, need_rtnl);
napi_disable(napi);
+ }
}
-static void virtnet_napi_disable(struct receive_queue *rq)
+static void virtnet_napi_disable(struct receive_queue *rq, bool need_rtnl)
{
+ struct virtnet_info *vi = rq->vq->vdev->priv;
struct napi_struct *napi = &rq->napi;
+ int qidx = vq2rxq(rq->vq);
+ virtnet_queue_set_napi(vi->dev, NULL, NETDEV_QUEUE_TYPE_TX, qidx,
+ need_rtnl);
napi_disable(napi);
}
@@ -2870,9 +2901,9 @@ static void refill_work(struct work_struct *work)
for (i = 0; i < vi->curr_queue_pairs; i++) {
struct receive_queue *rq = &vi->rq[i];
- virtnet_napi_disable(rq);
+ virtnet_napi_disable(rq, true);
still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
- virtnet_napi_enable(rq);
+ virtnet_napi_enable(rq, true);
/* In theory, this can happen: if we don't get any buffers in
* we will *never* try to fill again.
@@ -3069,8 +3100,8 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
{
- virtnet_napi_tx_disable(&vi->sq[qp_index]);
- virtnet_napi_disable(&vi->rq[qp_index]);
+ virtnet_napi_tx_disable(&vi->sq[qp_index], false);
+ virtnet_napi_disable(&vi->rq[qp_index], false);
xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
}
@@ -3089,8 +3120,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
if (err < 0)
goto err_xdp_reg_mem_model;
- virtnet_napi_enable(&vi->rq[qp_index]);
- virtnet_napi_tx_enable(&vi->sq[qp_index]);
+ virtnet_napi_enable(&vi->rq[qp_index], false);
+ virtnet_napi_tx_enable(&vi->sq[qp_index], false);
return 0;
@@ -3342,7 +3373,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
bool running = netif_running(vi->dev);
if (running) {
- virtnet_napi_disable(rq);
+ virtnet_napi_disable(rq, true);
virtnet_cancel_dim(vi, &rq->dim);
}
}
@@ -3355,7 +3386,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
schedule_delayed_work(&vi->refill, 0);
if (running)
- virtnet_napi_enable(rq);
+ virtnet_napi_enable(rq, true);
}
static int virtnet_rx_resize(struct virtnet_info *vi,
@@ -3384,7 +3415,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq)
qindex = sq - vi->sq;
if (running)
- virtnet_napi_tx_disable(sq);
+ virtnet_napi_tx_disable(sq, true);
txq = netdev_get_tx_queue(vi->dev, qindex);
@@ -3418,7 +3449,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq)
__netif_tx_unlock_bh(txq);
if (running)
- virtnet_napi_tx_enable(sq);
+ virtnet_napi_tx_enable(sq, true);
}
static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq,
@@ -5961,8 +5992,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
/* Make sure NAPI is not using any XDP TX queues for RX. */
if (netif_running(dev)) {
for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_disable(&vi->rq[i]);
- virtnet_napi_tx_disable(&vi->sq[i]);
+ virtnet_napi_disable(&vi->rq[i], false);
+ virtnet_napi_tx_disable(&vi->sq[i], false);
}
}
@@ -5999,8 +6030,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
if (old_prog)
bpf_prog_put(old_prog);
if (netif_running(dev)) {
- virtnet_napi_enable(&vi->rq[i]);
- virtnet_napi_tx_enable(&vi->sq[i]);
+ virtnet_napi_enable(&vi->rq[i], false);
+ virtnet_napi_tx_enable(&vi->sq[i], false);
}
}
@@ -6015,8 +6046,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
if (netif_running(dev)) {
for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_enable(&vi->rq[i]);
- virtnet_napi_tx_enable(&vi->sq[i]);
+ virtnet_napi_enable(&vi->rq[i], false);
+ virtnet_napi_tx_enable(&vi->sq[i], false);
}
}
if (prog)
--
2.45.2
On Tue, Feb 25, 2025 at 10:05 AM Joe Damato <jdamato@fastly.com> wrote:
>
> Use netif_queue_set_napi to map NAPIs to queue IDs so that the mapping
> can be accessed by user apps, taking care to hold RTNL as needed.
I may miss something but I wonder whether letting the caller hold the
lock is better.
More below.
>
> $ ethtool -i ens4 | grep driver
> driver: virtio_net
>
> $ sudo ethtool -L ens4 combined 4
>
> $ ./tools/net/ynl/pyynl/cli.py \
> --spec Documentation/netlink/specs/netdev.yaml \
> --dump queue-get --json='{"ifindex": 2}'
> [{'id': 0, 'ifindex': 2, 'napi-id': 8289, 'type': 'rx'},
> {'id': 1, 'ifindex': 2, 'napi-id': 8290, 'type': 'rx'},
> {'id': 2, 'ifindex': 2, 'napi-id': 8291, 'type': 'rx'},
> {'id': 3, 'ifindex': 2, 'napi-id': 8292, 'type': 'rx'},
> {'id': 0, 'ifindex': 2, 'type': 'tx'},
> {'id': 1, 'ifindex': 2, 'type': 'tx'},
> {'id': 2, 'ifindex': 2, 'type': 'tx'},
> {'id': 3, 'ifindex': 2, 'type': 'tx'}]
>
> Note that virtio_net has TX-only NAPIs which do not have NAPI IDs, so
> the lack of 'napi-id' in the above output is expected.
>
> Signed-off-by: Joe Damato <jdamato@fastly.com>
> ---
> drivers/net/virtio_net.c | 73 ++++++++++++++++++++++++++++------------
> 1 file changed, 52 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index e578885c1093..13bb4a563073 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2807,6 +2807,20 @@ static void skb_recv_done(struct virtqueue *rvq)
> virtqueue_napi_schedule(&rq->napi, rvq);
> }
>
> +static void virtnet_queue_set_napi(struct net_device *dev,
> + struct napi_struct *napi,
> + enum netdev_queue_type q_type, int qidx,
> + bool need_rtnl)
> +{
> + if (need_rtnl)
> + rtnl_lock();
> +
> + netif_queue_set_napi(dev, qidx, q_type, napi);
> +
> + if (need_rtnl)
> + rtnl_unlock();
> +}
> +
> static void virtnet_napi_do_enable(struct virtqueue *vq,
> struct napi_struct *napi)
> {
> @@ -2821,15 +2835,21 @@ static void virtnet_napi_do_enable(struct virtqueue *vq,
> local_bh_enable();
> }
>
> -static void virtnet_napi_enable(struct receive_queue *rq)
> +static void virtnet_napi_enable(struct receive_queue *rq, bool need_rtnl)
> {
> + struct virtnet_info *vi = rq->vq->vdev->priv;
> + int qidx = vq2rxq(rq->vq);
> +
> virtnet_napi_do_enable(rq->vq, &rq->napi);
> + virtnet_queue_set_napi(vi->dev, &rq->napi,
> + NETDEV_QUEUE_TYPE_RX, qidx, need_rtnl);
> }
>
> -static void virtnet_napi_tx_enable(struct send_queue *sq)
> +static void virtnet_napi_tx_enable(struct send_queue *sq, bool need_rtnl)
> {
> struct virtnet_info *vi = sq->vq->vdev->priv;
> struct napi_struct *napi = &sq->napi;
> + int qidx = vq2txq(sq->vq);
>
> if (!napi->weight)
> return;
> @@ -2843,20 +2863,31 @@ static void virtnet_napi_tx_enable(struct send_queue *sq)
> }
>
> virtnet_napi_do_enable(sq->vq, napi);
> + virtnet_queue_set_napi(vi->dev, napi, NETDEV_QUEUE_TYPE_TX, qidx,
> + need_rtnl);
> }
>
> -static void virtnet_napi_tx_disable(struct send_queue *sq)
> +static void virtnet_napi_tx_disable(struct send_queue *sq, bool need_rtnl)
> {
> + struct virtnet_info *vi = sq->vq->vdev->priv;
> struct napi_struct *napi = &sq->napi;
> + int qidx = vq2txq(sq->vq);
>
> - if (napi->weight)
> + if (napi->weight) {
> + virtnet_queue_set_napi(vi->dev, NULL, NETDEV_QUEUE_TYPE_TX,
> + qidx, need_rtnl);
> napi_disable(napi);
> + }
> }
>
> -static void virtnet_napi_disable(struct receive_queue *rq)
> +static void virtnet_napi_disable(struct receive_queue *rq, bool need_rtnl)
> {
> + struct virtnet_info *vi = rq->vq->vdev->priv;
> struct napi_struct *napi = &rq->napi;
> + int qidx = vq2rxq(rq->vq);
>
> + virtnet_queue_set_napi(vi->dev, NULL, NETDEV_QUEUE_TYPE_TX, qidx,
> + need_rtnl);
> napi_disable(napi);
> }
>
> @@ -2870,9 +2901,9 @@ static void refill_work(struct work_struct *work)
> for (i = 0; i < vi->curr_queue_pairs; i++) {
> struct receive_queue *rq = &vi->rq[i];
>
> - virtnet_napi_disable(rq);
> + virtnet_napi_disable(rq, true);
> still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> - virtnet_napi_enable(rq);
> + virtnet_napi_enable(rq, true);
>
> /* In theory, this can happen: if we don't get any buffers in
> * we will *never* try to fill again.
> @@ -3069,8 +3100,8 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
>
> static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
> {
> - virtnet_napi_tx_disable(&vi->sq[qp_index]);
> - virtnet_napi_disable(&vi->rq[qp_index]);
> + virtnet_napi_tx_disable(&vi->sq[qp_index], false);
> + virtnet_napi_disable(&vi->rq[qp_index], false);
> xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
> }
>
> @@ -3089,8 +3120,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> if (err < 0)
> goto err_xdp_reg_mem_model;
>
> - virtnet_napi_enable(&vi->rq[qp_index]);
> - virtnet_napi_tx_enable(&vi->sq[qp_index]);
> + virtnet_napi_enable(&vi->rq[qp_index], false);
> + virtnet_napi_tx_enable(&vi->sq[qp_index], false);
>
> return 0;
>
> @@ -3342,7 +3373,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
> bool running = netif_running(vi->dev);
>
> if (running) {
> - virtnet_napi_disable(rq);
> + virtnet_napi_disable(rq, true);
During the resize, the rtnl lock has been held on the ethtool path
rtnl_lock();
rc = __dev_ethtool(net, ifr, useraddr, ethcmd, state);
rtnl_unlock();
virtnet_rx_resize()
virtnet_rx_pause()
and in the case of XSK binding, I see ASSERT_RTNL in xp_assign_dev() at least.
Thanks
On Wed, Feb 26, 2025 at 01:48:50PM +0800, Jason Wang wrote:
> On Tue, Feb 25, 2025 at 10:05 AM Joe Damato <jdamato@fastly.com> wrote:
> >
> > Use netif_queue_set_napi to map NAPIs to queue IDs so that the mapping
> > can be accessed by user apps, taking care to hold RTNL as needed.
>
> I may miss something but I wonder whether letting the caller hold the
> lock is better.
Hmm...
Double checking all the paths over again, here's what I see:
- refill_work, delayed work that needs RTNL so this change seems
right?
- virtnet_disable_queue_pair, called from virtnet_open and
virtnet_close. When called via NDO these are safe and hold RTNL,
but they can be called from power management and need RTNL.
- virtnet_enable_queue_pair called from virtnet_open, safe when
used via NDO but needs RTNL when used via power management.
- virtnet_rx_pause called in both paths as you mentioned, one
which needs RTNL and one which doesn't.
I think there are a couple ways to fix this:
1. Edit this patch to remove the virtnet_queue_set_napi helper,
and call netif_queue_set_napi from the napi_enable and
napi_disable helpers directly. Modify code calling into these
paths to hold rtnl (or not) as described above.
2. Modify virtnet_enable_queue_pair, virtnet_disable_queue_pair,
and virtnet_rx_pause to take a "bool need_rtnl" as an a
function argument and pass that through.
I'm not sure which is cleaner and I do not have a preference.
Can you let me know which you prefer? I am happy to implement either
one for the next revision.
[...]
> > ---
> > drivers/net/virtio_net.c | 73 ++++++++++++++++++++++++++++------------
> > 1 file changed, 52 insertions(+), 21 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index e578885c1093..13bb4a563073 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -2807,6 +2807,20 @@ static void skb_recv_done(struct virtqueue *rvq)
> > virtqueue_napi_schedule(&rq->napi, rvq);
> > }
> >
> > +static void virtnet_queue_set_napi(struct net_device *dev,
> > + struct napi_struct *napi,
> > + enum netdev_queue_type q_type, int qidx,
> > + bool need_rtnl)
> > +{
> > + if (need_rtnl)
> > + rtnl_lock();
> > +
> > + netif_queue_set_napi(dev, qidx, q_type, napi);
> > +
> > + if (need_rtnl)
> > + rtnl_unlock();
> > +}
> > +
> > static void virtnet_napi_do_enable(struct virtqueue *vq,
> > struct napi_struct *napi)
> > {
> > @@ -2821,15 +2835,21 @@ static void virtnet_napi_do_enable(struct virtqueue *vq,
> > local_bh_enable();
> > }
> >
> > -static void virtnet_napi_enable(struct receive_queue *rq)
> > +static void virtnet_napi_enable(struct receive_queue *rq, bool need_rtnl)
> > {
> > + struct virtnet_info *vi = rq->vq->vdev->priv;
> > + int qidx = vq2rxq(rq->vq);
> > +
> > virtnet_napi_do_enable(rq->vq, &rq->napi);
> > + virtnet_queue_set_napi(vi->dev, &rq->napi,
> > + NETDEV_QUEUE_TYPE_RX, qidx, need_rtnl);
> > }
> >
> > -static void virtnet_napi_tx_enable(struct send_queue *sq)
> > +static void virtnet_napi_tx_enable(struct send_queue *sq, bool need_rtnl)
> > {
> > struct virtnet_info *vi = sq->vq->vdev->priv;
> > struct napi_struct *napi = &sq->napi;
> > + int qidx = vq2txq(sq->vq);
> >
> > if (!napi->weight)
> > return;
> > @@ -2843,20 +2863,31 @@ static void virtnet_napi_tx_enable(struct send_queue *sq)
> > }
> >
> > virtnet_napi_do_enable(sq->vq, napi);
> > + virtnet_queue_set_napi(vi->dev, napi, NETDEV_QUEUE_TYPE_TX, qidx,
> > + need_rtnl);
> > }
> >
> > -static void virtnet_napi_tx_disable(struct send_queue *sq)
> > +static void virtnet_napi_tx_disable(struct send_queue *sq, bool need_rtnl)
> > {
> > + struct virtnet_info *vi = sq->vq->vdev->priv;
> > struct napi_struct *napi = &sq->napi;
> > + int qidx = vq2txq(sq->vq);
> >
> > - if (napi->weight)
> > + if (napi->weight) {
> > + virtnet_queue_set_napi(vi->dev, NULL, NETDEV_QUEUE_TYPE_TX,
> > + qidx, need_rtnl);
> > napi_disable(napi);
> > + }
> > }
> >
> > -static void virtnet_napi_disable(struct receive_queue *rq)
> > +static void virtnet_napi_disable(struct receive_queue *rq, bool need_rtnl)
> > {
> > + struct virtnet_info *vi = rq->vq->vdev->priv;
> > struct napi_struct *napi = &rq->napi;
> > + int qidx = vq2rxq(rq->vq);
> >
> > + virtnet_queue_set_napi(vi->dev, NULL, NETDEV_QUEUE_TYPE_TX, qidx,
> > + need_rtnl);
> > napi_disable(napi);
> > }
> >
> > @@ -2870,9 +2901,9 @@ static void refill_work(struct work_struct *work)
> > for (i = 0; i < vi->curr_queue_pairs; i++) {
> > struct receive_queue *rq = &vi->rq[i];
> >
> > - virtnet_napi_disable(rq);
> > + virtnet_napi_disable(rq, true);
> > still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > - virtnet_napi_enable(rq);
> > + virtnet_napi_enable(rq, true);
> >
> > /* In theory, this can happen: if we don't get any buffers in
> > * we will *never* try to fill again.
> > @@ -3069,8 +3100,8 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
> >
> > static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
> > {
> > - virtnet_napi_tx_disable(&vi->sq[qp_index]);
> > - virtnet_napi_disable(&vi->rq[qp_index]);
> > + virtnet_napi_tx_disable(&vi->sq[qp_index], false);
> > + virtnet_napi_disable(&vi->rq[qp_index], false);
> > xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
> > }
> >
> > @@ -3089,8 +3120,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> > if (err < 0)
> > goto err_xdp_reg_mem_model;
> >
> > - virtnet_napi_enable(&vi->rq[qp_index]);
> > - virtnet_napi_tx_enable(&vi->sq[qp_index]);
> > + virtnet_napi_enable(&vi->rq[qp_index], false);
> > + virtnet_napi_tx_enable(&vi->sq[qp_index], false);
> >
> > return 0;
> >
> > @@ -3342,7 +3373,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
> > bool running = netif_running(vi->dev);
> >
> > if (running) {
> > - virtnet_napi_disable(rq);
> > + virtnet_napi_disable(rq, true);
>
> During the resize, the rtnl lock has been held on the ethtool path
>
> rtnl_lock();
> rc = __dev_ethtool(net, ifr, useraddr, ethcmd, state);
> rtnl_unlock();
>
> virtnet_rx_resize()
> virtnet_rx_pause()
>
> and in the case of XSK binding, I see ASSERT_RTNL in xp_assign_dev() at least.
Thanks for catching this. I re-read all the paths and I think I've
outlined a few other issues above.
Please let me know which of the proposed methods above you'd like me
to implement to get this merged.
Thanks.
---
pw-bot: cr
On Wed, Feb 26, 2025 at 01:03:09PM -0500, Joe Damato wrote:
> On Wed, Feb 26, 2025 at 01:48:50PM +0800, Jason Wang wrote:
> > On Tue, Feb 25, 2025 at 10:05 AM Joe Damato <jdamato@fastly.com> wrote:
> > >
> > > Use netif_queue_set_napi to map NAPIs to queue IDs so that the mapping
> > > can be accessed by user apps, taking care to hold RTNL as needed.
> >
> > I may miss something but I wonder whether letting the caller hold the
> > lock is better.
>
> Hmm...
>
> Double checking all the paths over again, here's what I see:
> - refill_work, delayed work that needs RTNL so this change seems
> right?
>
> - virtnet_disable_queue_pair, called from virtnet_open and
> virtnet_close. When called via NDO these are safe and hold RTNL,
> but they can be called from power management and need RTNL.
>
> - virtnet_enable_queue_pair called from virtnet_open, safe when
> used via NDO but needs RTNL when used via power management.
>
> - virtnet_rx_pause called in both paths as you mentioned, one
> which needs RTNL and one which doesn't.
>
> I think there are a couple ways to fix this:
>
> 1. Edit this patch to remove the virtnet_queue_set_napi helper,
> and call netif_queue_set_napi from the napi_enable and
> napi_disable helpers directly. Modify code calling into these
> paths to hold rtnl (or not) as described above.
>
> 2. Modify virtnet_enable_queue_pair, virtnet_disable_queue_pair,
> and virtnet_rx_pause to take a "bool need_rtnl" as an a
> function argument and pass that through.
>
> I'm not sure which is cleaner and I do not have a preference.
>
> Can you let me know which you prefer? I am happy to implement either
> one for the next revision.
1 seems cleaner.
taking locks depending on paths is confusing
> [...]
>
> > > ---
> > > drivers/net/virtio_net.c | 73 ++++++++++++++++++++++++++++------------
> > > 1 file changed, 52 insertions(+), 21 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index e578885c1093..13bb4a563073 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -2807,6 +2807,20 @@ static void skb_recv_done(struct virtqueue *rvq)
> > > virtqueue_napi_schedule(&rq->napi, rvq);
> > > }
> > >
> > > +static void virtnet_queue_set_napi(struct net_device *dev,
> > > + struct napi_struct *napi,
> > > + enum netdev_queue_type q_type, int qidx,
> > > + bool need_rtnl)
> > > +{
> > > + if (need_rtnl)
> > > + rtnl_lock();
> > > +
> > > + netif_queue_set_napi(dev, qidx, q_type, napi);
> > > +
> > > + if (need_rtnl)
> > > + rtnl_unlock();
> > > +}
> > > +
> > > static void virtnet_napi_do_enable(struct virtqueue *vq,
> > > struct napi_struct *napi)
> > > {
> > > @@ -2821,15 +2835,21 @@ static void virtnet_napi_do_enable(struct virtqueue *vq,
> > > local_bh_enable();
> > > }
> > >
> > > -static void virtnet_napi_enable(struct receive_queue *rq)
> > > +static void virtnet_napi_enable(struct receive_queue *rq, bool need_rtnl)
> > > {
> > > + struct virtnet_info *vi = rq->vq->vdev->priv;
> > > + int qidx = vq2rxq(rq->vq);
> > > +
> > > virtnet_napi_do_enable(rq->vq, &rq->napi);
> > > + virtnet_queue_set_napi(vi->dev, &rq->napi,
> > > + NETDEV_QUEUE_TYPE_RX, qidx, need_rtnl);
> > > }
> > >
> > > -static void virtnet_napi_tx_enable(struct send_queue *sq)
> > > +static void virtnet_napi_tx_enable(struct send_queue *sq, bool need_rtnl)
> > > {
> > > struct virtnet_info *vi = sq->vq->vdev->priv;
> > > struct napi_struct *napi = &sq->napi;
> > > + int qidx = vq2txq(sq->vq);
> > >
> > > if (!napi->weight)
> > > return;
> > > @@ -2843,20 +2863,31 @@ static void virtnet_napi_tx_enable(struct send_queue *sq)
> > > }
> > >
> > > virtnet_napi_do_enable(sq->vq, napi);
> > > + virtnet_queue_set_napi(vi->dev, napi, NETDEV_QUEUE_TYPE_TX, qidx,
> > > + need_rtnl);
> > > }
> > >
> > > -static void virtnet_napi_tx_disable(struct send_queue *sq)
> > > +static void virtnet_napi_tx_disable(struct send_queue *sq, bool need_rtnl)
> > > {
> > > + struct virtnet_info *vi = sq->vq->vdev->priv;
> > > struct napi_struct *napi = &sq->napi;
> > > + int qidx = vq2txq(sq->vq);
> > >
> > > - if (napi->weight)
> > > + if (napi->weight) {
> > > + virtnet_queue_set_napi(vi->dev, NULL, NETDEV_QUEUE_TYPE_TX,
> > > + qidx, need_rtnl);
> > > napi_disable(napi);
> > > + }
> > > }
> > >
> > > -static void virtnet_napi_disable(struct receive_queue *rq)
> > > +static void virtnet_napi_disable(struct receive_queue *rq, bool need_rtnl)
> > > {
> > > + struct virtnet_info *vi = rq->vq->vdev->priv;
> > > struct napi_struct *napi = &rq->napi;
> > > + int qidx = vq2rxq(rq->vq);
> > >
> > > + virtnet_queue_set_napi(vi->dev, NULL, NETDEV_QUEUE_TYPE_TX, qidx,
> > > + need_rtnl);
> > > napi_disable(napi);
> > > }
> > >
> > > @@ -2870,9 +2901,9 @@ static void refill_work(struct work_struct *work)
> > > for (i = 0; i < vi->curr_queue_pairs; i++) {
> > > struct receive_queue *rq = &vi->rq[i];
> > >
> > > - virtnet_napi_disable(rq);
> > > + virtnet_napi_disable(rq, true);
> > > still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > > - virtnet_napi_enable(rq);
> > > + virtnet_napi_enable(rq, true);
> > >
> > > /* In theory, this can happen: if we don't get any buffers in
> > > * we will *never* try to fill again.
> > > @@ -3069,8 +3100,8 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
> > >
> > > static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
> > > {
> > > - virtnet_napi_tx_disable(&vi->sq[qp_index]);
> > > - virtnet_napi_disable(&vi->rq[qp_index]);
> > > + virtnet_napi_tx_disable(&vi->sq[qp_index], false);
> > > + virtnet_napi_disable(&vi->rq[qp_index], false);
> > > xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
> > > }
> > >
> > > @@ -3089,8 +3120,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> > > if (err < 0)
> > > goto err_xdp_reg_mem_model;
> > >
> > > - virtnet_napi_enable(&vi->rq[qp_index]);
> > > - virtnet_napi_tx_enable(&vi->sq[qp_index]);
> > > + virtnet_napi_enable(&vi->rq[qp_index], false);
> > > + virtnet_napi_tx_enable(&vi->sq[qp_index], false);
> > >
> > > return 0;
> > >
> > > @@ -3342,7 +3373,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
> > > bool running = netif_running(vi->dev);
> > >
> > > if (running) {
> > > - virtnet_napi_disable(rq);
> > > + virtnet_napi_disable(rq, true);
> >
> > During the resize, the rtnl lock has been held on the ethtool path
> >
> > rtnl_lock();
> > rc = __dev_ethtool(net, ifr, useraddr, ethcmd, state);
> > rtnl_unlock();
> >
> > virtnet_rx_resize()
> > virtnet_rx_pause()
> >
> > and in the case of XSK binding, I see ASSERT_RTNL in xp_assign_dev() at least.
>
> Thanks for catching this. I re-read all the paths and I think I've
> outlined a few other issues above.
>
> Please let me know which of the proposed methods above you'd like me
> to implement to get this merged.
>
> Thanks.
>
> ---
> pw-bot: cr
On Wed, Feb 26, 2025 at 01:03:09PM -0500, Joe Damato wrote:
> On Wed, Feb 26, 2025 at 01:48:50PM +0800, Jason Wang wrote:
> > On Tue, Feb 25, 2025 at 10:05 AM Joe Damato <jdamato@fastly.com> wrote:
> > >
> > > Use netif_queue_set_napi to map NAPIs to queue IDs so that the mapping
> > > can be accessed by user apps, taking care to hold RTNL as needed.
> >
> > I may miss something but I wonder whether letting the caller hold the
> > lock is better.
>
> Hmm...
>
> Double checking all the paths over again, here's what I see:
> - refill_work, delayed work that needs RTNL so this change seems
> right?
>
> - virtnet_disable_queue_pair, called from virtnet_open and
> virtnet_close. When called via NDO these are safe and hold RTNL,
> but they can be called from power management and need RTNL.
>
> - virtnet_enable_queue_pair called from virtnet_open, safe when
> used via NDO but needs RTNL when used via power management.
>
> - virtnet_rx_pause called in both paths as you mentioned, one
> which needs RTNL and one which doesn't.
Sorry, I missed more paths:
- virtnet_rx_resume
- virtnet_tx_pause and virtnet_tx_resume
which are similar to path you mentioned (virtnet_rx_pause) and need
rtnl in one of two different paths.
Let me know if I missed any paths and what your preferred way to fix
this would be?
I think both options below are possible and I have no strong
preference.
>
> I think there are a couple ways to fix this:
>
> 1. Edit this patch to remove the virtnet_queue_set_napi helper,
> and call netif_queue_set_napi from the napi_enable and
> napi_disable helpers directly. Modify code calling into these
> paths to hold rtnl (or not) as described above.
>
> 2. Modify virtnet_enable_queue_pair, virtnet_disable_queue_pair,
> and virtnet_rx_pause to take a "bool need_rtnl" as an a
> function argument and pass that through.
>
> I'm not sure which is cleaner and I do not have a preference.
>
> Can you let me know which you prefer? I am happy to implement either
> one for the next revision.
>
> [...]
>
> > > ---
> > > drivers/net/virtio_net.c | 73 ++++++++++++++++++++++++++++------------
> > > 1 file changed, 52 insertions(+), 21 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index e578885c1093..13bb4a563073 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -2807,6 +2807,20 @@ static void skb_recv_done(struct virtqueue *rvq)
> > > virtqueue_napi_schedule(&rq->napi, rvq);
> > > }
> > >
> > > +static void virtnet_queue_set_napi(struct net_device *dev,
> > > + struct napi_struct *napi,
> > > + enum netdev_queue_type q_type, int qidx,
> > > + bool need_rtnl)
> > > +{
> > > + if (need_rtnl)
> > > + rtnl_lock();
> > > +
> > > + netif_queue_set_napi(dev, qidx, q_type, napi);
> > > +
> > > + if (need_rtnl)
> > > + rtnl_unlock();
> > > +}
> > > +
> > > static void virtnet_napi_do_enable(struct virtqueue *vq,
> > > struct napi_struct *napi)
> > > {
> > > @@ -2821,15 +2835,21 @@ static void virtnet_napi_do_enable(struct virtqueue *vq,
> > > local_bh_enable();
> > > }
> > >
> > > -static void virtnet_napi_enable(struct receive_queue *rq)
> > > +static void virtnet_napi_enable(struct receive_queue *rq, bool need_rtnl)
> > > {
> > > + struct virtnet_info *vi = rq->vq->vdev->priv;
> > > + int qidx = vq2rxq(rq->vq);
> > > +
> > > virtnet_napi_do_enable(rq->vq, &rq->napi);
> > > + virtnet_queue_set_napi(vi->dev, &rq->napi,
> > > + NETDEV_QUEUE_TYPE_RX, qidx, need_rtnl);
> > > }
> > >
> > > -static void virtnet_napi_tx_enable(struct send_queue *sq)
> > > +static void virtnet_napi_tx_enable(struct send_queue *sq, bool need_rtnl)
> > > {
> > > struct virtnet_info *vi = sq->vq->vdev->priv;
> > > struct napi_struct *napi = &sq->napi;
> > > + int qidx = vq2txq(sq->vq);
> > >
> > > if (!napi->weight)
> > > return;
> > > @@ -2843,20 +2863,31 @@ static void virtnet_napi_tx_enable(struct send_queue *sq)
> > > }
> > >
> > > virtnet_napi_do_enable(sq->vq, napi);
> > > + virtnet_queue_set_napi(vi->dev, napi, NETDEV_QUEUE_TYPE_TX, qidx,
> > > + need_rtnl);
> > > }
> > >
> > > -static void virtnet_napi_tx_disable(struct send_queue *sq)
> > > +static void virtnet_napi_tx_disable(struct send_queue *sq, bool need_rtnl)
> > > {
> > > + struct virtnet_info *vi = sq->vq->vdev->priv;
> > > struct napi_struct *napi = &sq->napi;
> > > + int qidx = vq2txq(sq->vq);
> > >
> > > - if (napi->weight)
> > > + if (napi->weight) {
> > > + virtnet_queue_set_napi(vi->dev, NULL, NETDEV_QUEUE_TYPE_TX,
> > > + qidx, need_rtnl);
> > > napi_disable(napi);
> > > + }
> > > }
> > >
> > > -static void virtnet_napi_disable(struct receive_queue *rq)
> > > +static void virtnet_napi_disable(struct receive_queue *rq, bool need_rtnl)
> > > {
> > > + struct virtnet_info *vi = rq->vq->vdev->priv;
> > > struct napi_struct *napi = &rq->napi;
> > > + int qidx = vq2rxq(rq->vq);
> > >
> > > + virtnet_queue_set_napi(vi->dev, NULL, NETDEV_QUEUE_TYPE_TX, qidx,
> > > + need_rtnl);
> > > napi_disable(napi);
> > > }
> > >
> > > @@ -2870,9 +2901,9 @@ static void refill_work(struct work_struct *work)
> > > for (i = 0; i < vi->curr_queue_pairs; i++) {
> > > struct receive_queue *rq = &vi->rq[i];
> > >
> > > - virtnet_napi_disable(rq);
> > > + virtnet_napi_disable(rq, true);
> > > still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > > - virtnet_napi_enable(rq);
> > > + virtnet_napi_enable(rq, true);
> > >
> > > /* In theory, this can happen: if we don't get any buffers in
> > > * we will *never* try to fill again.
> > > @@ -3069,8 +3100,8 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
> > >
> > > static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
> > > {
> > > - virtnet_napi_tx_disable(&vi->sq[qp_index]);
> > > - virtnet_napi_disable(&vi->rq[qp_index]);
> > > + virtnet_napi_tx_disable(&vi->sq[qp_index], false);
> > > + virtnet_napi_disable(&vi->rq[qp_index], false);
> > > xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
> > > }
> > >
> > > @@ -3089,8 +3120,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> > > if (err < 0)
> > > goto err_xdp_reg_mem_model;
> > >
> > > - virtnet_napi_enable(&vi->rq[qp_index]);
> > > - virtnet_napi_tx_enable(&vi->sq[qp_index]);
> > > + virtnet_napi_enable(&vi->rq[qp_index], false);
> > > + virtnet_napi_tx_enable(&vi->sq[qp_index], false);
> > >
> > > return 0;
> > >
> > > @@ -3342,7 +3373,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
> > > bool running = netif_running(vi->dev);
> > >
> > > if (running) {
> > > - virtnet_napi_disable(rq);
> > > + virtnet_napi_disable(rq, true);
> >
> > During the resize, the rtnl lock has been held on the ethtool path
> >
> > rtnl_lock();
> > rc = __dev_ethtool(net, ifr, useraddr, ethcmd, state);
> > rtnl_unlock();
> >
> > virtnet_rx_resize()
> > virtnet_rx_pause()
> >
> > and in the case of XSK binding, I see ASSERT_RTNL in xp_assign_dev() at least.
>
> Thanks for catching this. I re-read all the paths and I think I've
> outlined a few other issues above.
>
> Please let me know which of the proposed methods above you'd like me
> to implement to get this merged.
>
> Thanks.
>
> ---
> pw-bot: cr
On Wed, Feb 26, 2025 at 01:08:49PM -0500, Joe Damato wrote:
> On Wed, Feb 26, 2025 at 01:03:09PM -0500, Joe Damato wrote:
> > On Wed, Feb 26, 2025 at 01:48:50PM +0800, Jason Wang wrote:
> > > On Tue, Feb 25, 2025 at 10:05 AM Joe Damato <jdamato@fastly.com> wrote:
> > > >
> > > > Use netif_queue_set_napi to map NAPIs to queue IDs so that the mapping
> > > > can be accessed by user apps, taking care to hold RTNL as needed.
> > >
> > > I may miss something but I wonder whether letting the caller hold the
> > > lock is better.
> >
> > Hmm...
> >
> > Double checking all the paths over again, here's what I see:
> > - refill_work, delayed work that needs RTNL so this change seems
> > right?
> >
> > - virtnet_disable_queue_pair, called from virtnet_open and
> > virtnet_close. When called via NDO these are safe and hold RTNL,
> > but they can be called from power management and need RTNL.
> >
> > - virtnet_enable_queue_pair called from virtnet_open, safe when
> > used via NDO but needs RTNL when used via power management.
> >
> > - virtnet_rx_pause called in both paths as you mentioned, one
> > which needs RTNL and one which doesn't.
>
> Sorry, I missed more paths:
>
> - virtnet_rx_resume
> - virtnet_tx_pause and virtnet_tx_resume
>
> which are similar to path you mentioned (virtnet_rx_pause) and need
> rtnl in one of two different paths.
>
> Let me know if I missed any paths and what your preferred way to fix
> this would be?
>
> I think both options below are possible and I have no strong
> preference.
OK, my apologies. I read your message and the code wrong. Sorry for
the back-to-back emails from me.
Please ignore my message above... I think after re-reading the code,
here's where I've arrived:
- refill_work needs to hold RTNL (as in the existing patch)
- virtnet_rx_pause, virtnet_rx_resume, virtnet_tx_pause,
virtnet_tx_resume -- all do NOT need to hold RTNL because it is
already held in the ethtool resize path and the XSK path, as you
explained, but I mis-read (sorry).
- virtnet_disable_queue_pair and virtnet_enable_queue_pair both
need to hold RTNL only when called via power management, but not
when called via ndo_open or ndo_close
Is my understanding correct and does it match your understanding?
If so, that means there are two issues:
1. Fixing the hardcoded bools in rx_pause, rx_resume, tx_pause,
tx_resume (all should be false, RTNL is not needed).
2. Handling the power management case which calls virtnet_open and
virtnet_close.
I made a small diff included below as an example of a possible
solution:
1. Modify virtnet_disable_queue_pair and virtnet_enable_queue_pair
to take a "bool need_rtnl" and pass it through to the helpers
they call.
2. Create two helpers, virtnet_do_open and virt_do_close both of
which take struct net_device *dev, bool need_rtnl. virtnet_open
and virtnet_close are modified to call the helpers and pass
false for need_rtnl. The power management paths call the
helpers and pass true for need_rtnl. (fixes issue 2 above)
3. Fix the bools for rx_pause, rx_resume, tx_pause, tx_resume to
pass false since all paths that I could find that lead to these
functions hold RTNL. (fixes issue 1 above)
See the diff below (which can be applied on top of patch 3) to see
what it looks like.
If you are OK with this approach, I will send a v5 where patch 3
includes the changes shown in this diff.
Please let me know what you think:
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 13bb4a563073..76ecb8f3ce9a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3098,14 +3098,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
return received;
}
-static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
+static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index,
+ bool need_rtnl)
{
- virtnet_napi_tx_disable(&vi->sq[qp_index], false);
- virtnet_napi_disable(&vi->rq[qp_index], false);
+ virtnet_napi_tx_disable(&vi->sq[qp_index], need_rtnl);
+ virtnet_napi_disable(&vi->rq[qp_index], need_rtnl);
xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
}
-static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
+static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index,
+ bool need_rtnl)
{
struct net_device *dev = vi->dev;
int err;
@@ -3120,8 +3122,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
if (err < 0)
goto err_xdp_reg_mem_model;
- virtnet_napi_enable(&vi->rq[qp_index], false);
- virtnet_napi_tx_enable(&vi->sq[qp_index], false);
+ virtnet_napi_enable(&vi->rq[qp_index], need_rtnl);
+ virtnet_napi_tx_enable(&vi->sq[qp_index], need_rtnl);
return 0;
@@ -3156,7 +3158,7 @@ static void virtnet_update_settings(struct virtnet_info *vi)
vi->duplex = duplex;
}
-static int virtnet_open(struct net_device *dev)
+static int virtnet_do_open(struct net_device *dev, bool need_rtnl)
{
struct virtnet_info *vi = netdev_priv(dev);
int i, err;
@@ -3169,7 +3171,7 @@ static int virtnet_open(struct net_device *dev)
if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
- err = virtnet_enable_queue_pair(vi, i);
+ err = virtnet_enable_queue_pair(vi, i, need_rtnl);
if (err < 0)
goto err_enable_qp;
}
@@ -3190,13 +3192,18 @@ static int virtnet_open(struct net_device *dev)
cancel_delayed_work_sync(&vi->refill);
for (i--; i >= 0; i--) {
- virtnet_disable_queue_pair(vi, i);
+ virtnet_disable_queue_pair(vi, i, need_rtnl);
virtnet_cancel_dim(vi, &vi->rq[i].dim);
}
return err;
}
+static int virtnet_open(struct net_device *dev)
+{
+ return virtnet_do_open(dev, false);
+}
+
static int virtnet_poll_tx(struct napi_struct *napi, int budget)
{
struct send_queue *sq = container_of(napi, struct send_queue, napi);
@@ -3373,7 +3380,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
bool running = netif_running(vi->dev);
if (running) {
- virtnet_napi_disable(rq, true);
+ virtnet_napi_disable(rq, false);
virtnet_cancel_dim(vi, &rq->dim);
}
}
@@ -3386,7 +3393,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
schedule_delayed_work(&vi->refill, 0);
if (running)
- virtnet_napi_enable(rq, true);
+ virtnet_napi_enable(rq, false);
}
static int virtnet_rx_resize(struct virtnet_info *vi,
@@ -3415,7 +3422,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq)
qindex = sq - vi->sq;
if (running)
- virtnet_napi_tx_disable(sq, true);
+ virtnet_napi_tx_disable(sq, false);
txq = netdev_get_tx_queue(vi->dev, qindex);
@@ -3449,7 +3456,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq)
__netif_tx_unlock_bh(txq);
if (running)
- virtnet_napi_tx_enable(sq, true);
+ virtnet_napi_tx_enable(sq, false);
}
static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq,
@@ -3708,7 +3715,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
return 0;
}
-static int virtnet_close(struct net_device *dev)
+static int virtnet_do_close(struct net_device *dev, bool need_rtnl)
{
struct virtnet_info *vi = netdev_priv(dev);
int i;
@@ -3727,7 +3734,7 @@ static int virtnet_close(struct net_device *dev)
cancel_work_sync(&vi->config_work);
for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_disable_queue_pair(vi, i);
+ virtnet_disable_queue_pair(vi, i, need_rtnl);
virtnet_cancel_dim(vi, &vi->rq[i].dim);
}
@@ -3736,6 +3743,11 @@ static int virtnet_close(struct net_device *dev)
return 0;
}
+static int virtnet_close(struct net_device *dev)
+{
+ return virtnet_do_close(dev, false);
+}
+
static void virtnet_rx_mode_work(struct work_struct *work)
{
struct virtnet_info *vi =
@@ -5682,7 +5694,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
netif_device_detach(vi->dev);
netif_tx_unlock_bh(vi->dev);
if (netif_running(vi->dev))
- virtnet_close(vi->dev);
+ virtnet_do_close(vi->dev, true);
}
static int init_vqs(struct virtnet_info *vi);
@@ -5702,7 +5714,7 @@ static int virtnet_restore_up(struct virtio_device *vdev)
enable_rx_mode_work(vi);
if (netif_running(vi->dev)) {
- err = virtnet_open(vi->dev);
+ err = virtnet_do_open(vi->dev, false);
if (err)
return err;
}
On Wed, Feb 26, 2025 at 03:27:42PM -0500, Joe Damato wrote:
> On Wed, Feb 26, 2025 at 01:08:49PM -0500, Joe Damato wrote:
> > On Wed, Feb 26, 2025 at 01:03:09PM -0500, Joe Damato wrote:
> > > On Wed, Feb 26, 2025 at 01:48:50PM +0800, Jason Wang wrote:
> > > > On Tue, Feb 25, 2025 at 10:05 AM Joe Damato <jdamato@fastly.com> wrote:
> > > > >
> > > > > Use netif_queue_set_napi to map NAPIs to queue IDs so that the mapping
> > > > > can be accessed by user apps, taking care to hold RTNL as needed.
> > > >
> > > > I may miss something but I wonder whether letting the caller hold the
> > > > lock is better.
> > >
> > > Hmm...
> > >
> > > Double checking all the paths over again, here's what I see:
> > > - refill_work, delayed work that needs RTNL so this change seems
> > > right?
> > >
> > > - virtnet_disable_queue_pair, called from virtnet_open and
> > > virtnet_close. When called via NDO these are safe and hold RTNL,
> > > but they can be called from power management and need RTNL.
> > >
> > > - virtnet_enable_queue_pair called from virtnet_open, safe when
> > > used via NDO but needs RTNL when used via power management.
> > >
> > > - virtnet_rx_pause called in both paths as you mentioned, one
> > > which needs RTNL and one which doesn't.
> >
> > Sorry, I missed more paths:
> >
> > - virtnet_rx_resume
> > - virtnet_tx_pause and virtnet_tx_resume
> >
> > which are similar to path you mentioned (virtnet_rx_pause) and need
> > rtnl in one of two different paths.
> >
> > Let me know if I missed any paths and what your preferred way to fix
> > this would be?
> >
> > I think both options below are possible and I have no strong
> > preference.
>
> OK, my apologies. I read your message and the code wrong. Sorry for
> the back-to-back emails from me.
>
> Please ignore my message above... I think after re-reading the code,
> here's where I've arrived:
>
> - refill_work needs to hold RTNL (as in the existing patch)
>
> - virtnet_rx_pause, virtnet_rx_resume, virtnet_tx_pause,
> virtnet_tx_resume -- all do NOT need to hold RTNL because it is
> already held in the ethtool resize path and the XSK path, as you
> explained, but I mis-read (sorry).
>
> - virtnet_disable_queue_pair and virtnet_enable_queue_pair both
> need to hold RTNL only when called via power management, but not
> when called via ndo_open or ndo_close
>
> Is my understanding correct and does it match your understanding?
>
> If so, that means there are two issues:
>
> 1. Fixing the hardcoded bools in rx_pause, rx_resume, tx_pause,
> tx_resume (all should be false, RTNL is not needed).
>
> 2. Handling the power management case which calls virtnet_open and
> virtnet_close.
>
> I made a small diff included below as an example of a possible
> solution:
>
> 1. Modify virtnet_disable_queue_pair and virtnet_enable_queue_pair
> to take a "bool need_rtnl" and pass it through to the helpers
> they call.
>
> 2. Create two helpers, virtnet_do_open and virt_do_close both of
> which take struct net_device *dev, bool need_rtnl. virtnet_open
> and virtnet_close are modified to call the helpers and pass
> false for need_rtnl. The power management paths call the
> helpers and pass true for need_rtnl. (fixes issue 2 above)
>
> 3. Fix the bools for rx_pause, rx_resume, tx_pause, tx_resume to
> pass false since all paths that I could find that lead to these
> functions hold RTNL. (fixes issue 1 above)
>
> See the diff below (which can be applied on top of patch 3) to see
> what it looks like.
>
> If you are OK with this approach, I will send a v5 where patch 3
> includes the changes shown in this diff.
>
> Please let me know what you think:
Looks ok I think.
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 13bb4a563073..76ecb8f3ce9a 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -3098,14 +3098,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
> return received;
> }
>
> -static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
> +static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index,
> + bool need_rtnl)
> {
> - virtnet_napi_tx_disable(&vi->sq[qp_index], false);
> - virtnet_napi_disable(&vi->rq[qp_index], false);
> + virtnet_napi_tx_disable(&vi->sq[qp_index], need_rtnl);
> + virtnet_napi_disable(&vi->rq[qp_index], need_rtnl);
> xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
> }
>
> -static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> +static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index,
> + bool need_rtnl)
> {
> struct net_device *dev = vi->dev;
> int err;
> @@ -3120,8 +3122,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> if (err < 0)
> goto err_xdp_reg_mem_model;
>
> - virtnet_napi_enable(&vi->rq[qp_index], false);
> - virtnet_napi_tx_enable(&vi->sq[qp_index], false);
> + virtnet_napi_enable(&vi->rq[qp_index], need_rtnl);
> + virtnet_napi_tx_enable(&vi->sq[qp_index], need_rtnl);
>
> return 0;
>
> @@ -3156,7 +3158,7 @@ static void virtnet_update_settings(struct virtnet_info *vi)
> vi->duplex = duplex;
> }
>
> -static int virtnet_open(struct net_device *dev)
> +static int virtnet_do_open(struct net_device *dev, bool need_rtnl)
> {
> struct virtnet_info *vi = netdev_priv(dev);
> int i, err;
> @@ -3169,7 +3171,7 @@ static int virtnet_open(struct net_device *dev)
> if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
> schedule_delayed_work(&vi->refill, 0);
>
> - err = virtnet_enable_queue_pair(vi, i);
> + err = virtnet_enable_queue_pair(vi, i, need_rtnl);
> if (err < 0)
> goto err_enable_qp;
> }
> @@ -3190,13 +3192,18 @@ static int virtnet_open(struct net_device *dev)
> cancel_delayed_work_sync(&vi->refill);
>
> for (i--; i >= 0; i--) {
> - virtnet_disable_queue_pair(vi, i);
> + virtnet_disable_queue_pair(vi, i, need_rtnl);
> virtnet_cancel_dim(vi, &vi->rq[i].dim);
> }
>
> return err;
> }
>
> +static int virtnet_open(struct net_device *dev)
> +{
> + return virtnet_do_open(dev, false);
> +}
> +
> static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> {
> struct send_queue *sq = container_of(napi, struct send_queue, napi);
> @@ -3373,7 +3380,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
> bool running = netif_running(vi->dev);
>
> if (running) {
> - virtnet_napi_disable(rq, true);
> + virtnet_napi_disable(rq, false);
> virtnet_cancel_dim(vi, &rq->dim);
> }
> }
> @@ -3386,7 +3393,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
> schedule_delayed_work(&vi->refill, 0);
>
> if (running)
> - virtnet_napi_enable(rq, true);
> + virtnet_napi_enable(rq, false);
> }
>
> static int virtnet_rx_resize(struct virtnet_info *vi,
> @@ -3415,7 +3422,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq)
> qindex = sq - vi->sq;
>
> if (running)
> - virtnet_napi_tx_disable(sq, true);
> + virtnet_napi_tx_disable(sq, false);
>
> txq = netdev_get_tx_queue(vi->dev, qindex);
>
> @@ -3449,7 +3456,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq)
> __netif_tx_unlock_bh(txq);
>
> if (running)
> - virtnet_napi_tx_enable(sq, true);
> + virtnet_napi_tx_enable(sq, false);
> }
>
> static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq,
> @@ -3708,7 +3715,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
> return 0;
> }
>
> -static int virtnet_close(struct net_device *dev)
> +static int virtnet_do_close(struct net_device *dev, bool need_rtnl)
> {
> struct virtnet_info *vi = netdev_priv(dev);
> int i;
> @@ -3727,7 +3734,7 @@ static int virtnet_close(struct net_device *dev)
> cancel_work_sync(&vi->config_work);
>
> for (i = 0; i < vi->max_queue_pairs; i++) {
> - virtnet_disable_queue_pair(vi, i);
> + virtnet_disable_queue_pair(vi, i, need_rtnl);
> virtnet_cancel_dim(vi, &vi->rq[i].dim);
> }
>
> @@ -3736,6 +3743,11 @@ static int virtnet_close(struct net_device *dev)
> return 0;
> }
>
> +static int virtnet_close(struct net_device *dev)
> +{
> + return virtnet_do_close(dev, false);
> +}
> +
> static void virtnet_rx_mode_work(struct work_struct *work)
> {
> struct virtnet_info *vi =
> @@ -5682,7 +5694,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
> netif_device_detach(vi->dev);
> netif_tx_unlock_bh(vi->dev);
> if (netif_running(vi->dev))
> - virtnet_close(vi->dev);
> + virtnet_do_close(vi->dev, true);
> }
>
> static int init_vqs(struct virtnet_info *vi);
> @@ -5702,7 +5714,7 @@ static int virtnet_restore_up(struct virtio_device *vdev)
> enable_rx_mode_work(vi);
>
> if (netif_running(vi->dev)) {
> - err = virtnet_open(vi->dev);
> + err = virtnet_do_open(vi->dev, false);
> if (err)
> return err;
> }
On Thu, Feb 27, 2025 at 6:13 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Feb 26, 2025 at 03:27:42PM -0500, Joe Damato wrote:
> > On Wed, Feb 26, 2025 at 01:08:49PM -0500, Joe Damato wrote:
> > > On Wed, Feb 26, 2025 at 01:03:09PM -0500, Joe Damato wrote:
> > > > On Wed, Feb 26, 2025 at 01:48:50PM +0800, Jason Wang wrote:
> > > > > On Tue, Feb 25, 2025 at 10:05 AM Joe Damato <jdamato@fastly.com> wrote:
> > > > > >
> > > > > > Use netif_queue_set_napi to map NAPIs to queue IDs so that the mapping
> > > > > > can be accessed by user apps, taking care to hold RTNL as needed.
> > > > >
> > > > > I may miss something but I wonder whether letting the caller hold the
> > > > > lock is better.
> > > >
> > > > Hmm...
> > > >
> > > > Double checking all the paths over again, here's what I see:
> > > > - refill_work, delayed work that needs RTNL so this change seems
> > > > right?
> > > >
> > > > - virtnet_disable_queue_pair, called from virtnet_open and
> > > > virtnet_close. When called via NDO these are safe and hold RTNL,
> > > > but they can be called from power management and need RTNL.
> > > >
> > > > - virtnet_enable_queue_pair called from virtnet_open, safe when
> > > > used via NDO but needs RTNL when used via power management.
> > > >
> > > > - virtnet_rx_pause called in both paths as you mentioned, one
> > > > which needs RTNL and one which doesn't.
> > >
> > > Sorry, I missed more paths:
> > >
> > > - virtnet_rx_resume
> > > - virtnet_tx_pause and virtnet_tx_resume
> > >
> > > which are similar to path you mentioned (virtnet_rx_pause) and need
> > > rtnl in one of two different paths.
> > >
> > > Let me know if I missed any paths and what your preferred way to fix
> > > this would be?
> > >
> > > I think both options below are possible and I have no strong
> > > preference.
> >
> > OK, my apologies. I read your message and the code wrong. Sorry for
> > the back-to-back emails from me.
> >
> > Please ignore my message above... I think after re-reading the code,
> > here's where I've arrived:
> >
> > - refill_work needs to hold RTNL (as in the existing patch)
> >
> > - virtnet_rx_pause, virtnet_rx_resume, virtnet_tx_pause,
> > virtnet_tx_resume -- all do NOT need to hold RTNL because it is
> > already held in the ethtool resize path and the XSK path, as you
> > explained, but I mis-read (sorry).
> >
> > - virtnet_disable_queue_pair and virtnet_enable_queue_pair both
> > need to hold RTNL only when called via power management, but not
> > when called via ndo_open or ndo_close
> >
> > Is my understanding correct and does it match your understanding?
> >
> > If so, that means there are two issues:
> >
> > 1. Fixing the hardcoded bools in rx_pause, rx_resume, tx_pause,
> > tx_resume (all should be false, RTNL is not needed).
> >
> > 2. Handling the power management case which calls virtnet_open and
> > virtnet_close.
> >
> > I made a small diff included below as an example of a possible
> > solution:
> >
> > 1. Modify virtnet_disable_queue_pair and virtnet_enable_queue_pair
> > to take a "bool need_rtnl" and pass it through to the helpers
> > they call.
> >
> > 2. Create two helpers, virtnet_do_open and virt_do_close both of
> > which take struct net_device *dev, bool need_rtnl. virtnet_open
> > and virtnet_close are modified to call the helpers and pass
> > false for need_rtnl. The power management paths call the
> > helpers and pass true for need_rtnl. (fixes issue 2 above)
> >
> > 3. Fix the bools for rx_pause, rx_resume, tx_pause, tx_resume to
> > pass false since all paths that I could find that lead to these
> > functions hold RTNL. (fixes issue 1 above)
> >
> > See the diff below (which can be applied on top of patch 3) to see
> > what it looks like.
> >
> > If you are OK with this approach, I will send a v5 where patch 3
> > includes the changes shown in this diff.
> >
> > Please let me know what you think:
>
>
>
> Looks ok I think.
>
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 13bb4a563073..76ecb8f3ce9a 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -3098,14 +3098,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
> > return received;
> > }
> >
> > -static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
> > +static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index,
> > + bool need_rtnl)
> > {
> > - virtnet_napi_tx_disable(&vi->sq[qp_index], false);
> > - virtnet_napi_disable(&vi->rq[qp_index], false);
> > + virtnet_napi_tx_disable(&vi->sq[qp_index], need_rtnl);
> > + virtnet_napi_disable(&vi->rq[qp_index], need_rtnl);
> > xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
> > }
> >
> > -static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> > +static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index,
> > + bool need_rtnl)
> > {
> > struct net_device *dev = vi->dev;
> > int err;
> > @@ -3120,8 +3122,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> > if (err < 0)
> > goto err_xdp_reg_mem_model;
> >
> > - virtnet_napi_enable(&vi->rq[qp_index], false);
> > - virtnet_napi_tx_enable(&vi->sq[qp_index], false);
> > + virtnet_napi_enable(&vi->rq[qp_index], need_rtnl);
> > + virtnet_napi_tx_enable(&vi->sq[qp_index], need_rtnl);
> >
> > return 0;
> >
> > @@ -3156,7 +3158,7 @@ static void virtnet_update_settings(struct virtnet_info *vi)
> > vi->duplex = duplex;
> > }
> >
> > -static int virtnet_open(struct net_device *dev)
> > +static int virtnet_do_open(struct net_device *dev, bool need_rtnl)
> > {
> > struct virtnet_info *vi = netdev_priv(dev);
> > int i, err;
> > @@ -3169,7 +3171,7 @@ static int virtnet_open(struct net_device *dev)
> > if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
> > schedule_delayed_work(&vi->refill, 0);
> >
> > - err = virtnet_enable_queue_pair(vi, i);
> > + err = virtnet_enable_queue_pair(vi, i, need_rtnl);
> > if (err < 0)
> > goto err_enable_qp;
> > }
> > @@ -3190,13 +3192,18 @@ static int virtnet_open(struct net_device *dev)
> > cancel_delayed_work_sync(&vi->refill);
> >
> > for (i--; i >= 0; i--) {
> > - virtnet_disable_queue_pair(vi, i);
> > + virtnet_disable_queue_pair(vi, i, need_rtnl);
> > virtnet_cancel_dim(vi, &vi->rq[i].dim);
> > }
> >
> > return err;
> > }
> >
> > +static int virtnet_open(struct net_device *dev)
> > +{
> > + return virtnet_do_open(dev, false);
> > +}
> > +
> > static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> > {
> > struct send_queue *sq = container_of(napi, struct send_queue, napi);
> > @@ -3373,7 +3380,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
> > bool running = netif_running(vi->dev);
> >
> > if (running) {
> > - virtnet_napi_disable(rq, true);
> > + virtnet_napi_disable(rq, false);
> > virtnet_cancel_dim(vi, &rq->dim);
> > }
> > }
> > @@ -3386,7 +3393,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
> > schedule_delayed_work(&vi->refill, 0);
> >
> > if (running)
> > - virtnet_napi_enable(rq, true);
> > + virtnet_napi_enable(rq, false);
> > }
> >
> > static int virtnet_rx_resize(struct virtnet_info *vi,
> > @@ -3415,7 +3422,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq)
> > qindex = sq - vi->sq;
> >
> > if (running)
> > - virtnet_napi_tx_disable(sq, true);
> > + virtnet_napi_tx_disable(sq, false);
> >
> > txq = netdev_get_tx_queue(vi->dev, qindex);
> >
> > @@ -3449,7 +3456,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq)
> > __netif_tx_unlock_bh(txq);
> >
> > if (running)
> > - virtnet_napi_tx_enable(sq, true);
> > + virtnet_napi_tx_enable(sq, false);
Instead of this, it looks to me it would be much simpler if we can
just hold the rtnl lock in freeze/restore.
Thanks
On Thu, Feb 27, 2025 at 12:18:33PM +0800, Jason Wang wrote:
> On Thu, Feb 27, 2025 at 6:13 AM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Feb 26, 2025 at 03:27:42PM -0500, Joe Damato wrote:
> > > On Wed, Feb 26, 2025 at 01:08:49PM -0500, Joe Damato wrote:
> > > > On Wed, Feb 26, 2025 at 01:03:09PM -0500, Joe Damato wrote:
> > > > > On Wed, Feb 26, 2025 at 01:48:50PM +0800, Jason Wang wrote:
> > > > > > On Tue, Feb 25, 2025 at 10:05 AM Joe Damato <jdamato@fastly.com> wrote:
> > > > > > >
> > > > > > > Use netif_queue_set_napi to map NAPIs to queue IDs so that the mapping
> > > > > > > can be accessed by user apps, taking care to hold RTNL as needed.
> > > > > >
> > > > > > I may miss something but I wonder whether letting the caller hold the
> > > > > > lock is better.
> > > > >
> > > > > Hmm...
> > > > >
> > > > > Double checking all the paths over again, here's what I see:
> > > > > - refill_work, delayed work that needs RTNL so this change seems
> > > > > right?
> > > > >
> > > > > - virtnet_disable_queue_pair, called from virtnet_open and
> > > > > virtnet_close. When called via NDO these are safe and hold RTNL,
> > > > > but they can be called from power management and need RTNL.
> > > > >
> > > > > - virtnet_enable_queue_pair called from virtnet_open, safe when
> > > > > used via NDO but needs RTNL when used via power management.
> > > > >
> > > > > - virtnet_rx_pause called in both paths as you mentioned, one
> > > > > which needs RTNL and one which doesn't.
> > > >
> > > > Sorry, I missed more paths:
> > > >
> > > > - virtnet_rx_resume
> > > > - virtnet_tx_pause and virtnet_tx_resume
> > > >
> > > > which are similar to path you mentioned (virtnet_rx_pause) and need
> > > > rtnl in one of two different paths.
> > > >
> > > > Let me know if I missed any paths and what your preferred way to fix
> > > > this would be?
> > > >
> > > > I think both options below are possible and I have no strong
> > > > preference.
> > >
> > > OK, my apologies. I read your message and the code wrong. Sorry for
> > > the back-to-back emails from me.
> > >
> > > Please ignore my message above... I think after re-reading the code,
> > > here's where I've arrived:
> > >
> > > - refill_work needs to hold RTNL (as in the existing patch)
> > >
> > > - virtnet_rx_pause, virtnet_rx_resume, virtnet_tx_pause,
> > > virtnet_tx_resume -- all do NOT need to hold RTNL because it is
> > > already held in the ethtool resize path and the XSK path, as you
> > > explained, but I mis-read (sorry).
> > >
> > > - virtnet_disable_queue_pair and virtnet_enable_queue_pair both
> > > need to hold RTNL only when called via power management, but not
> > > when called via ndo_open or ndo_close
> > >
> > > Is my understanding correct and does it match your understanding?
> > >
> > > If so, that means there are two issues:
> > >
> > > 1. Fixing the hardcoded bools in rx_pause, rx_resume, tx_pause,
> > > tx_resume (all should be false, RTNL is not needed).
> > >
> > > 2. Handling the power management case which calls virtnet_open and
> > > virtnet_close.
> > >
> > > I made a small diff included below as an example of a possible
> > > solution:
> > >
> > > 1. Modify virtnet_disable_queue_pair and virtnet_enable_queue_pair
> > > to take a "bool need_rtnl" and pass it through to the helpers
> > > they call.
> > >
> > > 2. Create two helpers, virtnet_do_open and virt_do_close both of
> > > which take struct net_device *dev, bool need_rtnl. virtnet_open
> > > and virtnet_close are modified to call the helpers and pass
> > > false for need_rtnl. The power management paths call the
> > > helpers and pass true for need_rtnl. (fixes issue 2 above)
> > >
> > > 3. Fix the bools for rx_pause, rx_resume, tx_pause, tx_resume to
> > > pass false since all paths that I could find that lead to these
> > > functions hold RTNL. (fixes issue 1 above)
> > >
> > > See the diff below (which can be applied on top of patch 3) to see
> > > what it looks like.
> > >
> > > If you are OK with this approach, I will send a v5 where patch 3
> > > includes the changes shown in this diff.
> > >
> > > Please let me know what you think:
> >
> >
> >
> > Looks ok I think.
> >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index 13bb4a563073..76ecb8f3ce9a 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -3098,14 +3098,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
> > > return received;
> > > }
> > >
> > > -static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
> > > +static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index,
> > > + bool need_rtnl)
> > > {
> > > - virtnet_napi_tx_disable(&vi->sq[qp_index], false);
> > > - virtnet_napi_disable(&vi->rq[qp_index], false);
> > > + virtnet_napi_tx_disable(&vi->sq[qp_index], need_rtnl);
> > > + virtnet_napi_disable(&vi->rq[qp_index], need_rtnl);
> > > xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
> > > }
> > >
> > > -static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> > > +static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index,
> > > + bool need_rtnl)
> > > {
> > > struct net_device *dev = vi->dev;
> > > int err;
> > > @@ -3120,8 +3122,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
> > > if (err < 0)
> > > goto err_xdp_reg_mem_model;
> > >
> > > - virtnet_napi_enable(&vi->rq[qp_index], false);
> > > - virtnet_napi_tx_enable(&vi->sq[qp_index], false);
> > > + virtnet_napi_enable(&vi->rq[qp_index], need_rtnl);
> > > + virtnet_napi_tx_enable(&vi->sq[qp_index], need_rtnl);
> > >
> > > return 0;
> > >
> > > @@ -3156,7 +3158,7 @@ static void virtnet_update_settings(struct virtnet_info *vi)
> > > vi->duplex = duplex;
> > > }
> > >
> > > -static int virtnet_open(struct net_device *dev)
> > > +static int virtnet_do_open(struct net_device *dev, bool need_rtnl)
> > > {
> > > struct virtnet_info *vi = netdev_priv(dev);
> > > int i, err;
> > > @@ -3169,7 +3171,7 @@ static int virtnet_open(struct net_device *dev)
> > > if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
> > > schedule_delayed_work(&vi->refill, 0);
> > >
> > > - err = virtnet_enable_queue_pair(vi, i);
> > > + err = virtnet_enable_queue_pair(vi, i, need_rtnl);
> > > if (err < 0)
> > > goto err_enable_qp;
> > > }
> > > @@ -3190,13 +3192,18 @@ static int virtnet_open(struct net_device *dev)
> > > cancel_delayed_work_sync(&vi->refill);
> > >
> > > for (i--; i >= 0; i--) {
> > > - virtnet_disable_queue_pair(vi, i);
> > > + virtnet_disable_queue_pair(vi, i, need_rtnl);
> > > virtnet_cancel_dim(vi, &vi->rq[i].dim);
> > > }
> > >
> > > return err;
> > > }
> > >
> > > +static int virtnet_open(struct net_device *dev)
> > > +{
> > > + return virtnet_do_open(dev, false);
> > > +}
> > > +
> > > static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> > > {
> > > struct send_queue *sq = container_of(napi, struct send_queue, napi);
> > > @@ -3373,7 +3380,7 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
> > > bool running = netif_running(vi->dev);
> > >
> > > if (running) {
> > > - virtnet_napi_disable(rq, true);
> > > + virtnet_napi_disable(rq, false);
> > > virtnet_cancel_dim(vi, &rq->dim);
> > > }
> > > }
> > > @@ -3386,7 +3393,7 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
> > > schedule_delayed_work(&vi->refill, 0);
> > >
> > > if (running)
> > > - virtnet_napi_enable(rq, true);
> > > + virtnet_napi_enable(rq, false);
> > > }
> > >
> > > static int virtnet_rx_resize(struct virtnet_info *vi,
> > > @@ -3415,7 +3422,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq)
> > > qindex = sq - vi->sq;
> > >
> > > if (running)
> > > - virtnet_napi_tx_disable(sq, true);
> > > + virtnet_napi_tx_disable(sq, false);
> > >
> > > txq = netdev_get_tx_queue(vi->dev, qindex);
> > >
> > > @@ -3449,7 +3456,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq)
> > > __netif_tx_unlock_bh(txq);
> > >
> > > if (running)
> > > - virtnet_napi_tx_enable(sq, true);
> > > + virtnet_napi_tx_enable(sq, false);
>
> Instead of this, it looks to me it would be much simpler if we can
> just hold the rtnl lock in freeze/restore.
I disagree.
Holding RTNL for all of open and close instead of just the 1 API
call that needs it has the possibility of introducing other lock
ordering bugs now or in the future.
We only need RTNL for 1 API, why hold it for all of open or close?
© 2016 - 2026 Red Hat, Inc.