[PATCH net-next v8 2/4] vhost-net: wake queue of tun/tap after ptr_ring consume

Simon Schippers posted 4 patches 3 weeks, 5 days ago
[PATCH net-next v8 2/4] vhost-net: wake queue of tun/tap after ptr_ring consume
Posted by Simon Schippers 3 weeks, 5 days ago
Add tun_wake_queue() to tun.c and export it for use by vhost-net. The
function validates that the file belongs to a tun/tap device,
dereferences the tun_struct under RCU, and delegates to
__tun_wake_queue().

vhost_net_buf_produce() now calls tun_wake_queue() after a successful
batched consume of the ring to allow the netdev subqueue to be woken up.

Without the corresponding queue stopping (introduced in a subsequent
commit), this patch alone causes a slight throughput regression for a
tap+vhost-net setup sending to a qemu VM:
3.948 Mpps to 3.888 Mpps (-1.5%).

Details: AMD Ryzen 5 5600X at 4.3 GHz, 3200 MHz RAM, isolated QEMU
threads, XDP drop program active in VM, pktgen sender; Avg over
20 runs @ 100,000,000 packets. SRSO and spectre v2 mitigations disabled.

Co-developed-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
Signed-off-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
Signed-off-by: Simon Schippers <simon.schippers@tu-dortmund.de>
---
 drivers/net/tun.c      | 21 +++++++++++++++++++++
 drivers/vhost/net.c    | 15 +++++++++++----
 include/linux/if_tun.h |  3 +++
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a82d665dab5f..b86582cc6cb6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -3760,6 +3760,27 @@ struct ptr_ring *tun_get_tx_ring(struct file *file)
 }
 EXPORT_SYMBOL_GPL(tun_get_tx_ring);
 
+void tun_wake_queue(struct file *file)
+{
+	struct tun_file *tfile;
+	struct tun_struct *tun;
+
+	if (file->f_op != &tun_fops)
+		return;
+	tfile = file->private_data;
+	if (!tfile)
+		return;
+
+	rcu_read_lock();
+
+	tun = rcu_dereference(tfile->tun);
+	if (tun)
+		__tun_wake_queue(tun, tfile);
+
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tun_wake_queue);
+
 module_init(tun_init);
 module_exit(tun_cleanup);
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 80965181920c..c8ef804ef28c 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -176,13 +176,19 @@ static void *vhost_net_buf_consume(struct vhost_net_buf *rxq)
 	return ret;
 }
 
-static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
+static int vhost_net_buf_produce(struct sock *sk,
+				 struct vhost_net_virtqueue *nvq)
 {
+	struct file *file = sk->sk_socket->file;
 	struct vhost_net_buf *rxq = &nvq->rxq;
 
 	rxq->head = 0;
 	rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
 					      VHOST_NET_BATCH);
+
+	if (rxq->tail)
+		tun_wake_queue(file);
+
 	return rxq->tail;
 }
 
@@ -209,14 +215,15 @@ static int vhost_net_buf_peek_len(void *ptr)
 	return __skb_array_len_with_tag(ptr);
 }
 
-static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
+static int vhost_net_buf_peek(struct sock *sk,
+			      struct vhost_net_virtqueue *nvq)
 {
 	struct vhost_net_buf *rxq = &nvq->rxq;
 
 	if (!vhost_net_buf_is_empty(rxq))
 		goto out;
 
-	if (!vhost_net_buf_produce(nvq))
+	if (!vhost_net_buf_produce(sk, nvq))
 		return 0;
 
 out:
@@ -995,7 +1002,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
 	unsigned long flags;
 
 	if (rvq->rx_ring)
-		return vhost_net_buf_peek(rvq);
+		return vhost_net_buf_peek(sk, rvq);
 
 	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
 	head = skb_peek(&sk->sk_receive_queue);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 80166eb62f41..ab3b4ebca059 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -22,6 +22,7 @@ struct tun_msg_ctl {
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
+void tun_wake_queue(struct file *file);
 
 static inline bool tun_is_xdp_frame(void *ptr)
 {
@@ -55,6 +56,8 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
 	return ERR_PTR(-EINVAL);
 }
 
+static inline void tun_wake_queue(struct file *f) {}
+
 static inline bool tun_is_xdp_frame(void *ptr)
 {
 	return false;
-- 
2.43.0
Re: [PATCH net-next v8 2/4] vhost-net: wake queue of tun/tap after ptr_ring consume
Posted by Jason Wang 2 weeks ago
On Thu, Mar 12, 2026 at 9:07 PM Simon Schippers
<simon.schippers@tu-dortmund.de> wrote:
>
> Add tun_wake_queue() to tun.c and export it for use by vhost-net. The
> function validates that the file belongs to a tun/tap device,
> dereferences the tun_struct under RCU, and delegates to
> __tun_wake_queue().
>
> vhost_net_buf_produce() now calls tun_wake_queue() after a successful
> batched consume of the ring to allow the netdev subqueue to be woken up.
>
> Without the corresponding queue stopping (introduced in a subsequent
> commit), this patch alone causes a slight throughput regression for a
> tap+vhost-net setup sending to a qemu VM:
> 3.948 Mpps to 3.888 Mpps (-1.5%).
>
> Details: AMD Ryzen 5 5600X at 4.3 GHz, 3200 MHz RAM, isolated QEMU
> threads, XDP drop program active in VM, pktgen sender; Avg over
> 20 runs @ 100,000,000 packets. SRSO and spectre v2 mitigations disabled.
>
> Co-developed-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
> Signed-off-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
> Signed-off-by: Simon Schippers <simon.schippers@tu-dortmund.de>
> ---
>  drivers/net/tun.c      | 21 +++++++++++++++++++++
>  drivers/vhost/net.c    | 15 +++++++++++----
>  include/linux/if_tun.h |  3 +++
>  3 files changed, 35 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index a82d665dab5f..b86582cc6cb6 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -3760,6 +3760,27 @@ struct ptr_ring *tun_get_tx_ring(struct file *file)
>  }
>  EXPORT_SYMBOL_GPL(tun_get_tx_ring);
>
> +void tun_wake_queue(struct file *file)
> +{
> +       struct tun_file *tfile;
> +       struct tun_struct *tun;
> +
> +       if (file->f_op != &tun_fops)
> +               return;
> +       tfile = file->private_data;
> +       if (!tfile)
> +               return;
> +
> +       rcu_read_lock();
> +
> +       tun = rcu_dereference(tfile->tun);
> +       if (tun)
> +               __tun_wake_queue(tun, tfile);
> +
> +       rcu_read_unlock();
> +}
> +EXPORT_SYMBOL_GPL(tun_wake_queue);
> +
>  module_init(tun_init);
>  module_exit(tun_cleanup);
>  MODULE_DESCRIPTION(DRV_DESCRIPTION);
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 80965181920c..c8ef804ef28c 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -176,13 +176,19 @@ static void *vhost_net_buf_consume(struct vhost_net_buf *rxq)
>         return ret;
>  }
>
> -static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
> +static int vhost_net_buf_produce(struct sock *sk,
> +                                struct vhost_net_virtqueue *nvq)
>  {
> +       struct file *file = sk->sk_socket->file;
>         struct vhost_net_buf *rxq = &nvq->rxq;
>
>         rxq->head = 0;
>         rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
>                                               VHOST_NET_BATCH);
> +
> +       if (rxq->tail)
> +               tun_wake_queue(file);
> +
>         return rxq->tail;
>  }
>
> @@ -209,14 +215,15 @@ static int vhost_net_buf_peek_len(void *ptr)
>         return __skb_array_len_with_tag(ptr);
>  }
>
> -static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
> +static int vhost_net_buf_peek(struct sock *sk,
> +                             struct vhost_net_virtqueue *nvq)
>  {
>         struct vhost_net_buf *rxq = &nvq->rxq;
>
>         if (!vhost_net_buf_is_empty(rxq))
>                 goto out;
>
> -       if (!vhost_net_buf_produce(nvq))
> +       if (!vhost_net_buf_produce(sk, nvq))
>                 return 0;
>
>  out:
> @@ -995,7 +1002,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
>         unsigned long flags;
>
>         if (rvq->rx_ring)
> -               return vhost_net_buf_peek(rvq);
> +               return vhost_net_buf_peek(sk, rvq);
>
>         spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
>         head = skb_peek(&sk->sk_receive_queue);
> diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
> index 80166eb62f41..ab3b4ebca059 100644
> --- a/include/linux/if_tun.h
> +++ b/include/linux/if_tun.h
> @@ -22,6 +22,7 @@ struct tun_msg_ctl {
>  #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
>  struct socket *tun_get_socket(struct file *);
>  struct ptr_ring *tun_get_tx_ring(struct file *file);
> +void tun_wake_queue(struct file *file);
>
>  static inline bool tun_is_xdp_frame(void *ptr)
>  {
> @@ -55,6 +56,8 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
>         return ERR_PTR(-EINVAL);
>  }
>
> +static inline void tun_wake_queue(struct file *f) {}
> +
>  static inline bool tun_is_xdp_frame(void *ptr)
>  {
>         return false;
> --
> 2.43.0
>
Re: [PATCH net-next v8 2/4] vhost-net: wake queue of tun/tap after ptr_ring consume
Posted by Michael S. Tsirkin 3 weeks, 5 days ago
On Thu, Mar 12, 2026 at 02:06:37PM +0100, Simon Schippers wrote:
> Add tun_wake_queue() to tun.c and export it for use by vhost-net. The
> function validates that the file belongs to a tun/tap device,
> dereferences the tun_struct under RCU, and delegates to
> __tun_wake_queue().
> 
> vhost_net_buf_produce() now calls tun_wake_queue() after a successful
> batched consume of the ring to allow the netdev subqueue to be woken up.

A sentence missing here:
the point is to allow queue to be stopped when it gets full,
which is required for traffic shaping - implemented
by the following
"avoid ptr_ring tail-drop when a qdisc is present"




> Without the corresponding queue stopping (introduced in a subsequent
> commit), this patch alone causes a slight throughput regression for a
> tap+vhost-net setup sending to a qemu VM:
> 3.948 Mpps to 3.888 Mpps (-1.5%).
> 
> Details: AMD Ryzen 5 5600X at 4.3 GHz, 3200 MHz RAM, isolated QEMU
> threads, XDP drop program active in VM, pktgen sender; Avg over
> 20 runs @ 100,000,000 packets. SRSO and spectre v2 mitigations disabled.
>


 
> Co-developed-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
> Signed-off-by: Tim Gebauer <tim.gebauer@tu-dortmund.de>
> Signed-off-by: Simon Schippers <simon.schippers@tu-dortmund.de>
> ---
>  drivers/net/tun.c      | 21 +++++++++++++++++++++
>  drivers/vhost/net.c    | 15 +++++++++++----
>  include/linux/if_tun.h |  3 +++
>  3 files changed, 35 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index a82d665dab5f..b86582cc6cb6 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -3760,6 +3760,27 @@ struct ptr_ring *tun_get_tx_ring(struct file *file)
>  }
>  EXPORT_SYMBOL_GPL(tun_get_tx_ring);
>  
> +void tun_wake_queue(struct file *file)
> +{
> +	struct tun_file *tfile;
> +	struct tun_struct *tun;
> +
> +	if (file->f_op != &tun_fops)
> +		return;
> +	tfile = file->private_data;
> +	if (!tfile)
> +		return;
> +
> +	rcu_read_lock();
> +
> +	tun = rcu_dereference(tfile->tun);
> +	if (tun)
> +		__tun_wake_queue(tun, tfile);
> +
> +	rcu_read_unlock();
> +}
> +EXPORT_SYMBOL_GPL(tun_wake_queue);
> +
>  module_init(tun_init);
>  module_exit(tun_cleanup);
>  MODULE_DESCRIPTION(DRV_DESCRIPTION);
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 80965181920c..c8ef804ef28c 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -176,13 +176,19 @@ static void *vhost_net_buf_consume(struct vhost_net_buf *rxq)
>  	return ret;
>  }
>  
> -static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
> +static int vhost_net_buf_produce(struct sock *sk,
> +				 struct vhost_net_virtqueue *nvq)
>  {
> +	struct file *file = sk->sk_socket->file;
>  	struct vhost_net_buf *rxq = &nvq->rxq;
>  
>  	rxq->head = 0;
>  	rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
>  					      VHOST_NET_BATCH);
> +
> +	if (rxq->tail)
> +		tun_wake_queue(file);
> +
>  	return rxq->tail;
>  }
>  
> @@ -209,14 +215,15 @@ static int vhost_net_buf_peek_len(void *ptr)
>  	return __skb_array_len_with_tag(ptr);
>  }
>  
> -static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
> +static int vhost_net_buf_peek(struct sock *sk,
> +			      struct vhost_net_virtqueue *nvq)
>  {
>  	struct vhost_net_buf *rxq = &nvq->rxq;
>  
>  	if (!vhost_net_buf_is_empty(rxq))
>  		goto out;
>  
> -	if (!vhost_net_buf_produce(nvq))
> +	if (!vhost_net_buf_produce(sk, nvq))
>  		return 0;
>  
>  out:
> @@ -995,7 +1002,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
>  	unsigned long flags;
>  
>  	if (rvq->rx_ring)
> -		return vhost_net_buf_peek(rvq);
> +		return vhost_net_buf_peek(sk, rvq);
>  
>  	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
>  	head = skb_peek(&sk->sk_receive_queue);
> diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
> index 80166eb62f41..ab3b4ebca059 100644
> --- a/include/linux/if_tun.h
> +++ b/include/linux/if_tun.h
> @@ -22,6 +22,7 @@ struct tun_msg_ctl {
>  #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
>  struct socket *tun_get_socket(struct file *);
>  struct ptr_ring *tun_get_tx_ring(struct file *file);
> +void tun_wake_queue(struct file *file);
>  
>  static inline bool tun_is_xdp_frame(void *ptr)
>  {
> @@ -55,6 +56,8 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
>  	return ERR_PTR(-EINVAL);
>  }
>  
> +static inline void tun_wake_queue(struct file *f) {}
> +
>  static inline bool tun_is_xdp_frame(void *ptr)
>  {
>  	return false;
> -- 
> 2.43.0