From nobody Fri Oct 3 11:28:14 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D53B52E6CBF; Tue, 2 Sep 2025 08:10:17 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756800621; cv=none; b=CkW7v3E9+MawOEdxSOSMtXU+QxUuGBvwooO1/D1AhEU1B0zqJO50Rn5KXPL1d1N3dGUL8Wgx4wEUEa83xbweMTAlXzjOGa/9AbFRfnli51lUCirH/+WM3aWumkWWc6vAM3P8kqzwxGMeNKZbaiejadM1V/pYY3VUL1W+NvD5VCc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756800621; c=relaxed/simple; bh=DpASARpUZpf8wbH1Mg9rkEgTvOPc+bR0wuCWjvVMFAo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=KM9c31+/7DACqCtJCkhu7Lddgrq4oDfFFLS0D+JqGGWYBOrUQ7mxcv87CY3u6UUl2yh/DHDdPtkYqESBqq2gbchYDZfLFEsq+eYKizpaS77WT7iuWyIPncqDVmk/vjx8hc2qHMRwEFKByy9OuJTuPpu0V7Kr667UphyfF6OcAsE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=gwWE3P+e; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="gwWE3P+e" Received: from simon-Latitude-5450.tu-dortmund.de (rechenknecht2.kn.e-technik.tu-dortmund.de [129.217.186.41]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.9/8.18.1.10) with ESMTPSA id 58289x6T004012 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 2 Sep 2025 10:10:08 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1756800608; bh=DpASARpUZpf8wbH1Mg9rkEgTvOPc+bR0wuCWjvVMFAo=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=gwWE3P+emkeYbl05g9UyV195MNP1AFg1B9w+jDvUt+fmmlS+GmQnfDr+knUVitQrC gePlbGJ0RhITE/gauFH3b81e/rpEgjno9l41wYFuVtrCdFcbR4/5FMHo0iuqdNMcmf S66HMVWQQ3Pd+qmRPOc90bndw7XyaCMx+Tm5E7no= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH 1/4] ptr_ring_spare: Helper to check if spare capacity of size cnt is available Date: Tue, 2 Sep 2025 10:09:54 +0200 Message-ID: <20250902080957.47265-2-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250902080957.47265-1-simon.schippers@tu-dortmund.de> References: <20250902080957.47265-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The implementation is inspired by ptr_ring_empty. Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- include/linux/ptr_ring.h | 71 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 551329220e4f..6b8cfaecf478 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -243,6 +243,77 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *= r) return ret; } =20 +/* + * Check if a spare capacity of cnt is available without taking any locks. + * + * If cnt=3D=3D0 or cnt > r->size it acts the same as __ptr_ring_empty. + * + * The same requirements apply as described for __ptr_ring_empty. + */ +static inline bool __ptr_ring_spare(struct ptr_ring *r, int cnt) +{ + int size =3D r->size; + int to_check; + + if (unlikely(!size || cnt < 0)) + return true; + + if (cnt > size) + cnt =3D 0; + + to_check =3D READ_ONCE(r->consumer_head) - cnt; + + if (to_check < 0) + to_check +=3D size; + + return !r->queue[to_check]; +} + +static inline bool ptr_ring_spare(struct ptr_ring *r, int cnt) +{ + bool ret; + + spin_lock(&r->consumer_lock); + ret =3D __ptr_ring_spare(r, cnt); + spin_unlock(&r->consumer_lock); + + return ret; +} + +static inline bool ptr_ring_spare_irq(struct ptr_ring *r, int cnt) +{ + bool ret; + + spin_lock_irq(&r->consumer_lock); + ret =3D __ptr_ring_spare(r, cnt); + spin_unlock_irq(&r->consumer_lock); + + return ret; +} + +static inline bool ptr_ring_spare_any(struct ptr_ring *r, int cnt) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&r->consumer_lock, flags); + ret =3D __ptr_ring_spare(r, cnt); + spin_unlock_irqrestore(&r->consumer_lock, flags); + + return ret; +} + +static inline bool ptr_ring_spare_bh(struct ptr_ring *r, int cnt) +{ + bool ret; + + spin_lock_bh(&r->consumer_lock); + ret =3D __ptr_ring_spare(r, cnt); + spin_unlock_bh(&r->consumer_lock); + + return ret; +} + /* Must only be called after __ptr_ring_peek returned !NULL */ static inline void __ptr_ring_discard_one(struct ptr_ring *r) { --=20 2.43.0 From nobody Fri Oct 3 11:28:14 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D549A2E8B7B; Tue, 2 Sep 2025 08:10:17 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756800621; cv=none; b=qlnPAtd25UD8KRn4w+vBj1NVqcDAYgacGwr7+gzBWDG6hca9rWzLfX2GNVSikJghDxikWl5aTNiIvkaJa0BnbVV9JT0L5Hil6vTSbq9LGMsu88/ijnI97YNXIROIWY9BST9rX2Ek993k8v6cRHMa3DoNG/+sz6e0N5VRqJ2YcfM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756800621; c=relaxed/simple; bh=dwrG5IZYDXYoD9+oEiwigp/MhGkl8O8eBM7BT7aMgHA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=l6F3YSTpibv93YI2OyvEj1oedF1wVksF/fuC5P6g98rG94qDpEavidHGnTfKUxqVYXmP/tC279rpDPFoV/YKMasUbALmkDY81vQPGfGFLLRyLZeWVqCoy/lDnqkngvgfaIno+8FqtYR3fHIhgWP0VjC7MnpBnnJYYsMbmYZR9hQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=cUQ38nlx; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="cUQ38nlx" Received: from simon-Latitude-5450.tu-dortmund.de (rechenknecht2.kn.e-technik.tu-dortmund.de [129.217.186.41]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.9/8.18.1.10) with ESMTPSA id 58289x6V004012 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 2 Sep 2025 10:10:08 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1756800608; bh=dwrG5IZYDXYoD9+oEiwigp/MhGkl8O8eBM7BT7aMgHA=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=cUQ38nlxON2+dUHnC7YPhiqCUV/sG2ck069Eedl8d4RrPp9kBMATIx+oQNMkgL0ZN X69ABnd30SZFJdOOGEh6wGCfUG8fqiFGpYsf9LHH461wHj6Q48UBlnJL7UJAu7LS6m 0FZzPtvYOifQAHFf3F09bAJguTp4LcRE+EDFUCWg= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH 2/4] netdev queue flow control for TUN Date: Tue, 2 Sep 2025 10:09:55 +0200 Message-ID: <20250902080957.47265-3-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250902080957.47265-1-simon.schippers@tu-dortmund.de> References: <20250902080957.47265-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The netdev queue is stopped in tun_net_xmit after inserting an SKB into the ring buffer if the ring buffer became full because of that. If the insertion into the ptr_ring fails, the netdev queue is also stopped and the SKB is dropped. However, this never happened in my testing. To ensure that the ptr_ring change is available to the consumer before the netdev queue stop, an smp_wmb() is used. Then in tun_ring_recv, the new helper wake_netdev_queue is called in the blocking wait queue and after consuming an SKB from the ptr_ring. This helper first checks if the netdev queue has stopped. Then with the paired smp_rmb() it is known that tun_net_xmit will not produce SKBs anymore. With that knowledge, the helper can then wake the netdev queue if there is at least a single spare slot in the ptr_ring by calling ptr_ring_spare with cnt=3D1. Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- drivers/net/tun.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index cc6c50180663..735498e221d8 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1060,13 +1060,21 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb= , struct net_device *dev) =20 nf_reset_ct(skb); =20 - if (ptr_ring_produce(&tfile->tx_ring, skb)) { + queue =3D netdev_get_tx_queue(dev, txq); + if (unlikely(ptr_ring_produce(&tfile->tx_ring, skb))) { + /* Paired with smp_rmb() in wake_netdev_queue. */ + smp_wmb(); + netif_tx_stop_queue(queue); drop_reason =3D SKB_DROP_REASON_FULL_RING; goto drop; } + if (ptr_ring_full(&tfile->tx_ring)) { + /* Paired with smp_rmb() in wake_netdev_queue. */ + smp_wmb(); + netif_tx_stop_queue(queue); + } =20 /* dev->lltx requires to do our own update of trans_start */ - queue =3D netdev_get_tx_queue(dev, txq); txq_trans_cond_update(queue); =20 /* Notify and wake up reader process */ @@ -2110,6 +2118,24 @@ static ssize_t tun_put_user(struct tun_struct *tun, return total; } =20 +static inline void wake_netdev_queue(struct tun_file *tfile) +{ + struct netdev_queue *txq; + struct net_device *dev; + + rcu_read_lock(); + dev =3D rcu_dereference(tfile->tun)->dev; + txq =3D netdev_get_tx_queue(dev, tfile->queue_index); + + if (netif_tx_queue_stopped(txq)) { + /* Paired with smp_wmb() in tun_net_xmit. */ + smp_rmb(); + if (ptr_ring_spare(&tfile->tx_ring, 1)) + netif_tx_wake_queue(txq); + } + rcu_read_unlock(); +} + static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) { DECLARE_WAITQUEUE(wait, current); @@ -2139,7 +2165,7 @@ static void *tun_ring_recv(struct tun_file *tfile, in= t noblock, int *err) error =3D -EFAULT; break; } - + wake_netdev_queue(tfile); schedule(); } =20 @@ -2147,6 +2173,7 @@ static void *tun_ring_recv(struct tun_file *tfile, in= t noblock, int *err) remove_wait_queue(&tfile->socket.wq.wait, &wait); =20 out: + wake_netdev_queue(tfile); *err =3D error; return ptr; } --=20 2.43.0 From nobody Fri Oct 3 11:28:14 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7ECC22E6CC8; Tue, 2 Sep 2025 08:10:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756800621; cv=none; b=JrL0+vqBJxbMZFeDqLALkh6G9dIx9WYETzL6/rFhTjQpIsAl+n4iKaZ62SYdkeYWufZnm36cqgjIqhwUml2uwOZJCJHWshoBzzzH2oAB+OvN+EsU0CJDO/yYVc0iwAb3+CygBEcwUW7UN/rb2ys/EOKmfyxRZcWVGJ21q2h5AWk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756800621; c=relaxed/simple; bh=HBmY7vQEFqMz0J92IFcKEfGWzZs1VRvbCj8tHQAbbmA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=jEkskU+R1ORWBh7b5JwYVGMMvT5kuGiXP/VLGOYZgn+q3iOtZtLIYnimyyBv2BkkqW0P0L1EpBlQteVrW1DIpgtCCGtnvThiLOFIUOs5QlZfiXEL9ZTMVOv1ijl2H9n9sjBXxpok1RimmirUw/VOJ35Rc5RuSHCBXQKsZTnbOW4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=EegUHrcw; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="EegUHrcw" Received: from simon-Latitude-5450.tu-dortmund.de (rechenknecht2.kn.e-technik.tu-dortmund.de [129.217.186.41]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.9/8.18.1.10) with ESMTPSA id 58289x6X004012 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 2 Sep 2025 10:10:08 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1756800609; bh=HBmY7vQEFqMz0J92IFcKEfGWzZs1VRvbCj8tHQAbbmA=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=EegUHrcwUh0aQhfQjkGSzOf+cHnbd2xM6k+4P+8WighQWlkQFGY6fQUY2GJxn1zjR T+i7Urfqo96REY/LIlJycWq0w1ethRU+ms4YaOX9t9R8ZvagrDhrp9hY+gtJxpEH/C B65rGm4xSs1AOOCuGyFxe/xHV5qRSSe3tm2QBNJo= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH 3/4] netdev queue flow control for TAP Date: Tue, 2 Sep 2025 10:09:56 +0200 Message-ID: <20250902080957.47265-4-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250902080957.47265-1-simon.schippers@tu-dortmund.de> References: <20250902080957.47265-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Stopping the netdev queue is done in tun_net_xmit, as TAP uses this method as its ndo_start_xmit. To wake the queue, the new helper wake_netdev_queue is called in tap_do_read. This is done in the blocking wait queue and after consuming an SKB from the ptr_ring. This helper first checks if the netdev queue has stopped. Then with the smp_rmb(), which is paired with the smp_wmb() of tun_net_xmit, it is known that tun_net_xmit will not produce SKBs anymore. With that knowledge, the helper can then wake the netdev queue if there is at least a single spare slot. This check is done by calling the method ptr_ring_spare. Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- drivers/net/tap.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 1197f245e873..4d874672bcd7 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -753,6 +753,24 @@ static ssize_t tap_put_user(struct tap_queue *q, return ret ? ret : total; } =20 +static inline void wake_netdev_queue(struct tap_queue *q) +{ + struct netdev_queue *txq; + struct net_device *dev; + + rcu_read_lock(); + dev =3D rcu_dereference(q->tap)->dev; + txq =3D netdev_get_tx_queue(dev, q->queue_index); + + if (netif_tx_queue_stopped(txq)) { + /* Paired with smp_wmb() in tun_net_xmit. */ + smp_rmb(); + if (ptr_ring_spare(&q->ring, 1)) + netif_tx_wake_queue(txq); + } + rcu_read_unlock(); +} + static ssize_t tap_do_read(struct tap_queue *q, struct iov_iter *to, int noblock, struct sk_buff *skb) @@ -785,12 +803,16 @@ static ssize_t tap_do_read(struct tap_queue *q, ret =3D -ERESTARTSYS; break; } + wake_netdev_queue(q); + /* Nothing to read, let's sleep */ schedule(); } if (!noblock) finish_wait(sk_sleep(&q->sk), &wait); =20 + wake_netdev_queue(q); + put: if (skb) { ret =3D tap_put_user(q, skb, to); --=20 2.43.0 From nobody Fri Oct 3 11:28:14 2025 Received: from unimail.uni-dortmund.de (mx1.hrz.uni-dortmund.de [129.217.128.51]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 302672EAB84; Tue, 2 Sep 2025 08:10:21 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=129.217.128.51 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756800626; cv=none; b=hBGaX9fI1ga3K1Lw7Q519Z6CaKe4JMD9VYGpurMlUCOASu2tIcMadVjaWSvgyIpuEnW2KQx2xfhVR+NydOEeSL36MMySqigQ5goYxS+HUMANyhgEt8++HtIl3hcuuEUwLGDnjgi6RChV1Q2Evdf0NO2HuJl6AOTy3AyP7nYUx4w= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756800626; c=relaxed/simple; bh=PwRCoKvnKUcaF9XJd+7L46ezwLV5dAqjhUK2h2k/uog=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=nEGfX25QBQjwf2uW8eBhKTOjCOgiEoMVQnsSZo7qsgPJB3T25Tddijux2rnkfBW4HEi2zbDhbJhBoO+QUvtBnOYDIdJtBLELvlImreuDCNLiciJe4kANkpf/t0MPRGYkRrdl5jkf0VyQOb+Y+7VcpXzlV6xKih3SmYhP8rATmf8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de; spf=pass smtp.mailfrom=tu-dortmund.de; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b=jZOJHZzI; arc=none smtp.client-ip=129.217.128.51 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=tu-dortmund.de Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=tu-dortmund.de header.i=@tu-dortmund.de header.b="jZOJHZzI" Received: from simon-Latitude-5450.tu-dortmund.de (rechenknecht2.kn.e-technik.tu-dortmund.de [129.217.186.41]) (authenticated bits=0) by unimail.uni-dortmund.de (8.18.1.9/8.18.1.10) with ESMTPSA id 58289x6Z004012 (version=TLSv1.3 cipher=TLS_AES_256_GCM_SHA384 bits=256 verify=NOT); Tue, 2 Sep 2025 10:10:12 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=tu-dortmund.de; s=unimail; t=1756800612; bh=PwRCoKvnKUcaF9XJd+7L46ezwLV5dAqjhUK2h2k/uog=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=jZOJHZzIQ9/CCVHUhlb+0zGeyZuYR13vZcTmAx3gkROU/2MefiSm1gPb4WysBWrbl eBsDI4jP+kfnPOiDQDBYKQSxjhPGUbV2ittAzazS401S1+P5s00SJtbBR6owGebLFT HR44w28paWJ1/WeiSdeCojDG8tPI4F2hBJDhnCZY= From: Simon Schippers To: willemdebruijn.kernel@gmail.com, jasowang@redhat.com, mst@redhat.com, eperezma@redhat.com, stephen@networkplumber.org, netdev@vger.kernel.org, linux-kernel@vger.kernel.org, virtualization@lists.linux.dev, kvm@vger.kernel.org Cc: Simon Schippers , Tim Gebauer Subject: [PATCH 4/4] netdev queue flow control for vhost_net Date: Tue, 2 Sep 2025 10:09:57 +0200 Message-ID: <20250902080957.47265-5-simon.schippers@tu-dortmund.de> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250902080957.47265-1-simon.schippers@tu-dortmund.de> References: <20250902080957.47265-1-simon.schippers@tu-dortmund.de> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Stopping the queue is done in tun_net_xmit. Waking the queue is done by calling one of the helpers, tun_wake_netdev_queue and tap_wake_netdev_queue. For that, in get_wake_netdev_queue, the correct method is determined and saved in the function pointer wake_netdev_queue of the vhost_net_virtqueue. Then, each time after consuming a batch in vhost_net_buf_produce, wake_netdev_queue is called. Co-developed-by: Tim Gebauer Signed-off-by: Tim Gebauer Signed-off-by: Simon Schippers --- drivers/net/tap.c | 6 ++++++ drivers/net/tun.c | 6 ++++++ drivers/vhost/net.c | 34 ++++++++++++++++++++++++++++------ include/linux/if_tap.h | 2 ++ include/linux/if_tun.h | 3 +++ 5 files changed, 45 insertions(+), 6 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 4d874672bcd7..0bad9e3d59af 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1198,6 +1198,12 @@ struct socket *tap_get_socket(struct file *file) } EXPORT_SYMBOL_GPL(tap_get_socket); =20 +void tap_wake_netdev_queue(struct file *file) +{ + wake_netdev_queue(file->private_data); +} +EXPORT_SYMBOL_GPL(tap_wake_netdev_queue); + struct ptr_ring *tap_get_ptr_ring(struct file *file) { struct tap_queue *q; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 735498e221d8..e85589b596ac 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3739,6 +3739,12 @@ struct socket *tun_get_socket(struct file *file) } EXPORT_SYMBOL_GPL(tun_get_socket); =20 +void tun_wake_netdev_queue(struct file *file) +{ + wake_netdev_queue(file->private_data); +} +EXPORT_SYMBOL_GPL(tun_wake_netdev_queue); + struct ptr_ring *tun_get_tx_ring(struct file *file) { struct tun_file *tfile; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 6edac0c1ba9b..e837d3a334f1 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -130,6 +130,7 @@ struct vhost_net_virtqueue { struct vhost_net_buf rxq; /* Batched XDP buffs */ struct xdp_buff *xdp; + void (*wake_netdev_queue)(struct file *f); }; =20 struct vhost_net { @@ -175,13 +176,16 @@ static void *vhost_net_buf_consume(struct vhost_net_b= uf *rxq) return ret; } =20 -static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq) +static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq, + struct sock *sk) { + struct file *file =3D sk->sk_socket->file; struct vhost_net_buf *rxq =3D &nvq->rxq; =20 rxq->head =3D 0; rxq->tail =3D ptr_ring_consume_batched(nvq->rx_ring, rxq->queue, VHOST_NET_BATCH); + nvq->wake_netdev_queue(file); return rxq->tail; } =20 @@ -208,14 +212,15 @@ static int vhost_net_buf_peek_len(void *ptr) return __skb_array_len_with_tag(ptr); } =20 -static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) +static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq, + struct sock *sk) { struct vhost_net_buf *rxq =3D &nvq->rxq; =20 if (!vhost_net_buf_is_empty(rxq)) goto out; =20 - if (!vhost_net_buf_produce(nvq)) + if (!vhost_net_buf_produce(nvq, sk)) return 0; =20 out: @@ -994,7 +999,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rv= q, struct sock *sk) unsigned long flags; =20 if (rvq->rx_ring) - return vhost_net_buf_peek(rvq); + return vhost_net_buf_peek(rvq, sk); =20 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); head =3D skb_peek(&sk->sk_receive_queue); @@ -1499,6 +1504,19 @@ static struct socket *get_tap_socket(int fd) return sock; } =20 +static void (*get_wake_netdev_queue(struct file *file))(struct file *file) +{ + struct ptr_ring *ring; + + ring =3D tun_get_tx_ring(file); + if (!IS_ERR(ring)) + return tun_wake_netdev_queue; + ring =3D tap_get_ptr_ring(file); + if (!IS_ERR(ring)) + return tap_wake_netdev_queue; + return NULL; +} + static struct socket *get_socket(int fd) { struct socket *sock; @@ -1570,10 +1588,14 @@ static long vhost_net_set_backend(struct vhost_net = *n, unsigned index, int fd) if (r) goto err_used; if (index =3D=3D VHOST_NET_VQ_RX) { - if (sock) + if (sock) { nvq->rx_ring =3D get_tap_ptr_ring(sock->file); - else + nvq->wake_netdev_queue =3D + get_wake_netdev_queue(sock->file); + } else { nvq->rx_ring =3D NULL; + nvq->wake_netdev_queue =3D NULL; + } } =20 oldubufs =3D nvq->ubufs; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 553552fa635c..02b2809784b5 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -10,6 +10,7 @@ struct socket; =20 #if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); +void tap_wake_netdev_queue(struct file *file); struct ptr_ring *tap_get_ptr_ring(struct file *file); #else #include @@ -18,6 +19,7 @@ static inline struct socket *tap_get_socket(struct file *= f) { return ERR_PTR(-EINVAL); } +static inline void tap_wake_netdev_queue(struct file *f) {} static inline struct ptr_ring *tap_get_ptr_ring(struct file *f) { return ERR_PTR(-EINVAL); diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 80166eb62f41..04c504bb1954 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -21,6 +21,7 @@ struct tun_msg_ctl { =20 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); +void tun_wake_netdev_queue(struct file *file); struct ptr_ring *tun_get_tx_ring(struct file *file); =20 static inline bool tun_is_xdp_frame(void *ptr) @@ -50,6 +51,8 @@ static inline struct socket *tun_get_socket(struct file *= f) return ERR_PTR(-EINVAL); } =20 +static inline void tun_wake_netdev_queue(struct file *f) {} + static inline struct ptr_ring *tun_get_tx_ring(struct file *f) { return ERR_PTR(-EINVAL); --=20 2.43.0